Skip to content

Instantly share code, notes, and snippets.

@ZhouXing19
Created March 9, 2022 18:43
Show Gist options
  • Save ZhouXing19/1c4434754b9acd9acbcc07507a78143a to your computer and use it in GitHub Desktop.
Save ZhouXing19/1c4434754b9acd9acbcc07507a78143a to your computer and use it in GitHub Desktop.
This file has been truncated, but you can view the full file.
diff --git a/.bazelrc b/.bazelrc
index dc0596733f..2e0f49554a 100644
--- a/.bazelrc
+++ b/.bazelrc
@@ -26,27 +26,18 @@ build:cross --stamp
build:cross --define cockroach_cross=y
# Cross-compilation configurations. Add e.g. --config=crosslinux to turn these on.
-# Generally these should be used for development builds. Each cross config has
-# a corresponding `base` config that is the same thing but without the
-# `--workspace_status_command`; if using these `base` configs, you need to
-# specify an appropriate `--workspace_status_command`. These `base` configs are
-# used by the release process which needs to have more control over stamping.
+build:crosslinux --platforms=//build/toolchains:cross_linux
build:crosslinux '--workspace_status_command=./build/bazelutil/stamp.sh x86_64-pc-linux-gnu'
-build:crosslinux --config=crosslinuxbase
-build:crosslinuxbase --platforms=//build/toolchains:cross_linux
-build:crosslinuxbase --config=cross
+build:crosslinux --config=cross
+build:crosswindows --platforms=//build/toolchains:cross_windows
build:crosswindows '--workspace_status_command=./build/bazelutil/stamp.sh x86_64-w64-mingw32'
-build:crosswindows --config=crosswindowsbase
-build:crosswindowsbase --platforms=//build/toolchains:cross_windows
-build:crosswindowsbase --config=cross
+build:crosswindows --config=cross
+build:crossmacos --platforms=//build/toolchains:cross_macos
build:crossmacos '--workspace_status_command=./build/bazelutil/stamp.sh x86_64-apple-darwin19'
-build:crossmacos --config=crossmacosbase
-build:crossmacosbase --platforms=//build/toolchains:cross_macos
-build:crossmacosbase --config=cross
+build:crossmacos --config=cross
+build:crosslinuxarm --platforms=//build/toolchains:cross_linux_arm
build:crosslinuxarm '--workspace_status_command=./build/bazelutil/stamp.sh aarch64-unknown-linux-gnu'
-build:crosslinuxarm --config=crosslinuxarmbase
-build:crosslinuxarmbase --platforms=//build/toolchains:cross_linux_arm
-build:crosslinuxarmbase --config=cross
+build:crosslinuxarm --config=cross
# Developer configurations. Add e.g. --config=devdarwinx86_64 to turn these on.
# NB: This is consumed in `BUILD` files (see build/toolchains/BUILD.bazel).
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index dada0a3861..54de5e589c 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -38,11 +38,11 @@
/pkg/sql/show_create*.go @cockroachdb/sql-syntax-prs
/pkg/sql/types/ @cockroachdb/sql-syntax-prs
-/pkg/sql/crdb_internal.go @cockroachdb/sql-experience
-/pkg/sql/pg_catalog.go @cockroachdb/sql-experience
-/pkg/sql/pgwire/ @cockroachdb/sql-experience
-/pkg/sql/sem/builtins/ @cockroachdb/sql-experience
-/pkg/sql/vtable/ @cockroachdb/sql-experience
+/pkg/sql/crdb_internal.go @cockroachdb/sql-api-prs
+/pkg/sql/pg_catalog.go @cockroachdb/sql-api-prs
+/pkg/sql/pgwire/ @cockroachdb/sql-api-prs
+/pkg/sql/sem/builtins/ @cockroachdb/sql-api-prs
+/pkg/sql/vtable/ @cockroachdb/sql-api-prs
/pkg/sql/sessiondata/ @cockroachdb/sql-experience
/pkg/sql/tests/rsg_test.go @cockroachdb/sql-experience
@@ -220,14 +220,12 @@
/pkg/internal/team/ @cockroachdb/test-eng
/pkg/jobs/ @cockroachdb/cdc-prs
/pkg/keys/ @cockroachdb/kv-prs
-/pkg/keysbase/ @cockroachdb/kv-prs
# Don't ping KV on updates to reserved descriptor IDs and such.
/pkg/keys/constants.go @cockroachdb/kv-prs-noreview
/pkg/migration/ @cockroachdb/kv-prs-noreview @cockroachdb/sql-schema
/pkg/multitenant @cockroachdb/unowned
/pkg/release/ @cockroachdb/dev-inf
/pkg/roachpb/.gitattributes @cockroachdb/dev-inf
-/pkg/roachpb/ambiguous_* @cockroachdb/kv-prs
/pkg/roachpb/api* @cockroachdb/kv-prs
/pkg/roachpb/batch* @cockroachdb/kv-prs
/pkg/roachpb/BUILD.bazel @cockroachdb/kv-prs-noreview
@@ -249,9 +247,8 @@
/pkg/roachpb/span* @cockroachdb/kv-prs
/pkg/roachpb/string_test.go @cockroachdb/kv-prs
/pkg/roachpb/tenant* @cockroachdb/kv-prs
-/pkg/roachpb/testdata/ambi* @cockroachdb/kv-prs
/pkg/roachpb/testdata/repl* @cockroachdb/kv-prs
-/pkg/roachpb/version* @cockroachdb/unowned
+/pkg/roachpb/version* @cockroachdb/server
/pkg/roachprod/ @cockroachdb/dev-inf
/pkg/rpc/ @cockroachdb/server-prs
/pkg/scheduledjobs/ @cockroachdb/bulk-prs
diff --git a/.github/bors.toml b/.github/bors.toml
index a3aa263a50..d6a6da5a4b 100644
--- a/.github/bors.toml
+++ b/.github/bors.toml
@@ -8,7 +8,7 @@ status = ["GitHub CI (Cockroach)"]
# r+-ed. If it's still in progress (for e.g. if CI is still running), bors will
# construct the merge commit in parallel and simply wait for success right
# before merging.
-pr_status = ["license/cla", "blathers/release-justification-check"]
+pr_status = ["license/cla"]
# List of PR labels that may not be attached to a PR when it is r+-ed.
block_labels = ["do-not-merge"]
diff --git a/DEPS.bzl b/DEPS.bzl
index ddf4b85115..631bf477bb 100644
--- a/DEPS.bzl
+++ b/DEPS.bzl
@@ -244,20 +244,10 @@ def go_deps():
name = "com_github_apache_thrift",
build_file_proto_mode = "disable_global",
importpath = "github.com/apache/thrift",
- sha256 = "50d5c610df30fa2a6039394d5142382b7d9938870dfb12ef46bddfa3da250893",
- strip_prefix = "github.com/apache/[email protected]",
+ sha256 = "f9e5418fda5dff9f5e1a892a127472fc621d417b3ee1351e53141509233fb1d5",
+ strip_prefix = "github.com/apache/[email protected]",
urls = [
- "https://storage.googleapis.com/cockroach-godeps/gomod/github.com/apache/thrift/com_github_apache_thrift-v0.16.0.zip",
- ],
- )
- go_repository(
- name = "com_github_araddon_dateparse",
- build_file_proto_mode = "disable_global",
- importpath = "github.com/araddon/dateparse",
- sha256 = "8fe74d5b36a9aa66c1350657009296469f8aa5b6925acd178044cd248023fc5a",
- strip_prefix = "github.com/araddon/[email protected]",
- urls = [
- "https://storage.googleapis.com/cockroach-godeps/gomod/github.com/araddon/dateparse/com_github_araddon_dateparse-v0.0.0-20210429162001-6b43995a97de.zip",
+ "https://storage.googleapis.com/cockroach-godeps/gomod/github.com/apache/thrift/com_github_apache_thrift-v0.15.0.zip",
],
)
go_repository(
@@ -1297,10 +1287,10 @@ def go_deps():
patches = [
"@cockroach//build/patches:com_github_cockroachdb_pebble.patch",
],
- sha256 = "cc3201e4197273c3ddc0adf72ab1f800d7b09e2b9d50422cab619a854d8e4e80",
- strip_prefix = "github.com/cockroachdb/[email protected]",
+ sha256 = "71da6a69951ab9767aa51efd34b2a4040ab655f67a5b0be87578af5a85132d26",
+ strip_prefix = "github.com/cockroachdb/[email protected]",
urls = [
- "https://storage.googleapis.com/cockroach-godeps/gomod/github.com/cockroachdb/pebble/com_github_cockroachdb_pebble-v0.0.0-20220307192532-e2b7bb844759.zip",
+ "https://storage.googleapis.com/cockroach-godeps/gomod/github.com/cockroachdb/pebble/com_github_cockroachdb_pebble-v0.0.0-20220227235451-40d39da505a5.zip",
],
)
go_repository(
@@ -2458,10 +2448,10 @@ def go_deps():
name = "com_github_fraugster_parquet_go",
build_file_proto_mode = "disable_global",
importpath = "github.com/fraugster/parquet-go",
- sha256 = "a9d995d60a99dac8709c614f3d2795ef4d8d79c82991b40fa703006078babf75",
- strip_prefix = "github.com/fraugster/[email protected]",
+ sha256 = "66beb8f2218c31ca4ca3d3dbcc91c256f3379750ade924016f4179982446edd7",
+ strip_prefix = "github.com/fraugster/[email protected]",
urls = [
- "https://storage.googleapis.com/cockroach-godeps/gomod/github.com/fraugster/parquet-go/com_github_fraugster_parquet_go-v0.10.0.zip",
+ "https://storage.googleapis.com/cockroach-godeps/gomod/github.com/fraugster/parquet-go/com_github_fraugster_parquet_go-v0.6.1.zip",
],
)
go_repository(
@@ -5329,10 +5319,10 @@ def go_deps():
name = "com_github_mattn_go_runewidth",
build_file_proto_mode = "disable_global",
importpath = "github.com/mattn/go-runewidth",
- sha256 = "030dcf3ea6cb39b34269f119ee5fe5e9d5834daf409e1f3f48c065a45326f4a6",
- strip_prefix = "github.com/mattn/[email protected]",
+ sha256 = "dab6e7984b913f61d02ac53f5c1875c029e591f3b4d3262dea8e95d8c28bb657",
+ strip_prefix = "github.com/mattn/[email protected]",
urls = [
- "https://storage.googleapis.com/cockroach-godeps/gomod/github.com/mattn/go-runewidth/com_github_mattn_go_runewidth-v0.0.10.zip",
+ "https://storage.googleapis.com/cockroach-godeps/gomod/github.com/mattn/go-runewidth/com_github_mattn_go_runewidth-v0.0.7.zip",
],
)
go_repository(
@@ -6743,16 +6733,6 @@ def go_deps():
"https://storage.googleapis.com/cockroach-godeps/gomod/github.com/retailnext/hllpp/com_github_retailnext_hllpp-v1.0.1-0.20180308014038-101a6d2f8b52.zip",
],
)
- go_repository(
- name = "com_github_rivo_uniseg",
- build_file_proto_mode = "disable_global",
- importpath = "github.com/rivo/uniseg",
- sha256 = "cb701df81f36acfbb2627a78662fdcaa150ee1ac00d0796a7f3eafbdb6218128",
- strip_prefix = "github.com/rivo/[email protected]",
- urls = [
- "https://storage.googleapis.com/cockroach-godeps/gomod/github.com/rivo/uniseg/com_github_rivo_uniseg-v0.1.0.zip",
- ],
- )
go_repository(
name = "com_github_robertkrimen_godocdown",
build_file_proto_mode = "disable_global",
@@ -6943,16 +6923,6 @@ def go_deps():
"https://storage.googleapis.com/cockroach-godeps/gomod/github.com/sclevine/agouti/com_github_sclevine_agouti-v3.0.0+incompatible.zip",
],
)
- go_repository(
- name = "com_github_scylladb_termtables",
- build_file_proto_mode = "disable_global",
- importpath = "github.com/scylladb/termtables",
- sha256 = "0afd3a75417e6b708c62329cea7f2822162c8bf6ada0de094cdc032461b8f196",
- strip_prefix = "github.com/scylladb/[email protected]",
- urls = [
- "https://storage.googleapis.com/cockroach-godeps/gomod/github.com/scylladb/termtables/com_github_scylladb_termtables-v0.0.0-20191203121021-c4c0b6d42ff4.zip",
- ],
- )
go_repository(
name = "com_github_sean__seed",
build_file_proto_mode = "disable_global",
diff --git a/Makefile b/Makefile
index bf2dc0ae8d..95fce1608a 100644
--- a/Makefile
+++ b/Makefile
@@ -1721,6 +1721,7 @@ bins = \
bin/cockroach-oss \
bin/cockroach-short \
bin/cockroach-sql \
+ bin/compile-builds \
bin/docgen \
bin/execgen \
bin/fuzz \
diff --git a/WORKSPACE b/WORKSPACE
index e71ed27194..74a3198740 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -137,35 +137,16 @@ http_archive(
load(
"@io_bazel_rules_go//go:deps.bzl",
"go_download_sdk",
- "go_host_sdk",
- "go_local_sdk",
"go_register_toolchains",
"go_rules_dependencies",
)
-# To point to a mirrored artifact, use:
-#
go_download_sdk(
name = "go_sdk",
urls = ["https://storage.googleapis.com/public-bazel-artifacts/go/{}"],
version = "1.17.6",
)
-# To point to a local SDK path, use the following instead. We'll call the
-# directory into which you cloned the Go repository $GODIR[1]. You'll have to
-# first run ./make.bash from $GODIR/src to pick up any custom changes.
-#
-# [1]: https://go.dev/doc/contribute#testing
-#
-# go_local_sdk(
-# name = "go_sdk",
-# path = "<path to $GODIR>",
-# )
-
-# To use your whatever your local SDK is, use the following instead:
-#
-# go_host_sdk(name = "go_sdk")
-
go_rules_dependencies()
go_register_toolchains(nogo = "@cockroach//:crdb_nogo")
diff --git a/build/bazelutil/check.sh b/build/bazelutil/check.sh
index 2cbe67821b..3c6d31cbfe 100755
--- a/build/bazelutil/check.sh
+++ b/build/bazelutil/check.sh
@@ -11,7 +11,7 @@ pkg/roachprod/vm/aws/config.go://go:generate gofmt -s -w embedded.go
pkg/roachprod/vm/aws/config.go://go:generate goimports -w embedded.go
pkg/roachprod/vm/aws/config.go://go:generate terraformgen -o terraform/main.tf
pkg/cmd/roachtest/prometheus/prometheus.go://go:generate mockgen -package=prometheus -destination=mocks_generated_test.go . Cluster
-pkg/cmd/roachtest/tests/drt.go://go:generate mockgen -package tests -destination drt_generated_test.go github.com/cockroachdb/cockroach/pkg/cmd/roachtest/prometheus Client
+pkg/cmd/roachtest/tests/drt.go://go:generate mockgen -package tests -destination drt_generated_test.go . PromClient
pkg/kv/kvclient/kvcoord/transport.go://go:generate mockgen -package=kvcoord -destination=mocks_generated_test.go . Transport
pkg/kv/kvclient/rangecache/range_cache.go://go:generate mockgen -package=rangecachemock -destination=rangecachemock/mocks_generated.go . RangeDescriptorDB
pkg/kv/kvclient/rangefeed/rangefeed.go://go:generate mockgen -destination=mocks_generated_test.go --package=rangefeed . DB
diff --git a/build/bazelutil/stamp.sh b/build/bazelutil/stamp.sh
index a76b504fcb..55bbd4b492 100755
--- a/build/bazelutil/stamp.sh
+++ b/build/bazelutil/stamp.sh
@@ -3,15 +3,6 @@
# This command is used by bazel as the workspace_status_command
# to implement build stamping with git information.
-# Usage: stamp.sh [target-triple] [build-channel] [build-tag] [build-type]
-# All arguments are optional and have appropriate defaults. In this way,
-# stamp.sh with no arguments is appropriate as the `workplace_status_command`
-# for a development build.
-# target-triple: defaults to the value of `cc -dumpmachine`
-# build-channel: defaults to `unknown`, but can be `official-binary`
-# build-tag: defaults to a value that is gleaned from `git rev-parse`
-# build-type: defaults to `development`, but can be `release`
-
set -euo pipefail
# Do not use plumbing commands, like git diff-index, in this target. Our build
@@ -23,52 +14,21 @@ set -euo pipefail
# For details, see the "Possible timestamp problems with diff-files?" thread on
# the Git mailing list (http://marc.info/?l=git&m=131687596307197).
-# Handle target-triple.
+GIT_BUILD_TYPE="development"
+GIT_COMMIT=$(git rev-parse HEAD)
+GIT_TAG=$(git describe --tags --dirty --match=v[0-9]* 2> /dev/null || git rev-parse --short HEAD;)
+GIT_UTCTIME=$(date -u '+%Y/%m/%d %H:%M:%S')
+
if [ -z "${1+x}" ]
then
TARGET_TRIPLE=$(cc -dumpmachine)
else
TARGET_TRIPLE="$1"
- shift 1
-fi
-
-# Handle build-channel.
-if [ -z "${1+x}" ]
-then
- BUILD_CHANNEL="unknown"
-else
- BUILD_CHANNEL="$1"
- shift 1
-fi
-
-# Handle build-tag.
-if [ -z "${1+x}" ]
-then
- BUILD_TAG=$(git describe --tags --dirty --match=v[0-9]* 2> /dev/null || git rev-parse --short HEAD;)
-else
- BUILD_TAG="$1"
- shift 1
-fi
-
-# Handle build-type.
-if [ -z "${1+x}" ]
-then
- BUILD_TYPE="development"
-else
- BUILD_TYPE="$1"
- shift 1
fi
-if [ "$BUILD_TYPE" = "release" ]
-then
- CRASH_REPORT_ENV="$BUILD_TAG"
-else
- CRASH_REPORT_ENV="development"
-fi
-
-BUILD_REV=$(git rev-parse HEAD)
-BUILD_UTCTIME=$(date -u '+%Y/%m/%d %H:%M:%S')
-
+# TODO(ricky): Also provide a way to stamp the following variables:
+# - github.com/cockroachdb/cockroach/pkg/build.channel
+# - github.com/cockroachdb/cockroach/pkg/util/log/logcrash.crashReportEnv
# Variables beginning with "STABLE" will be written to stable-status.txt, and
# others will be written to volatile-status.txt.
@@ -78,11 +38,9 @@ BUILD_UTCTIME=$(date -u '+%Y/%m/%d %H:%M:%S')
# * https://docs.bazel.build/versions/main/user-manual.html#workspace_status
# * https://github.com/bazelbuild/rules_go/blob/master/go/core.rst#defines-and-stamping
cat <<EOF
-STABLE_BUILD_CHANNEL ${BUILD_CHANNEL-}
+STABLE_BUILD_GIT_BUILD_TYPE ${GIT_BUILD_TYPE-}
STABLE_BUILD_TARGET_TRIPLE ${TARGET_TRIPLE-}
-STABLE_BUILD_TYPE ${BUILD_TYPE-}
-STABLE_CRASH_REPORT_ENV ${CRASH_REPORT_ENV-}
-BUILD_REV ${BUILD_REV-}
-BUILD_TAG ${BUILD_TAG-}
-BUILD_UTCTIME ${BUILD_UTCTIME-}
+BUILD_GIT_COMMIT ${GIT_COMMIT-}
+BUILD_GIT_TAG ${GIT_TAG-}
+BUILD_GIT_UTCTIME ${GIT_UTCTIME-}
EOF
diff --git a/build/teamcity/internal/release/process/make-and-publish-build.sh b/build/release/teamcity-make-and-publish-build.sh
similarity index 81%
rename from build/teamcity/internal/release/process/make-and-publish-build.sh
rename to build/release/teamcity-make-and-publish-build.sh
index 6e74e45c1d..f4d24f5bea 100755
--- a/build/teamcity/internal/release/process/make-and-publish-build.sh
+++ b/build/release/teamcity-make-and-publish-build.sh
@@ -2,13 +2,13 @@
set -euo pipefail
-dir="$(dirname $(dirname $(dirname $(dirname $(dirname "${0}")))))"
-source "$dir/release/teamcity-support.sh"
-source "$dir/teamcity-bazel-support.sh" # for run_bazel
+source "$(dirname "${0}")/teamcity-support.sh"
tc_start_block "Variable Setup"
+export BUILDER_HIDE_GOPATH_SRC=1
-build_name=$(git describe --tags --dirty --match=v[0-9]* 2> /dev/null || git rev-parse --short HEAD;)
+build/builder.sh make .buildinfo/tag
+build_name="${TAG_NAME:-$(cat .buildinfo/tag)}"
# On no match, `grep -Eo` returns 1. `|| echo""` makes the script not error.
release_branch="$(echo "$build_name" | grep -Eo "^v[0-9]+\.[0-9]+" || echo"")"
@@ -44,14 +44,21 @@ tc_start_block "Tag the release"
git tag "${build_name}"
tc_end_block "Tag the release"
+
+tc_start_block "Compile publish-provisional-artifacts"
+build/builder.sh go install ./pkg/cmd/publish-provisional-artifacts
+tc_end_block "Compile publish-provisional-artifacts"
+
+
tc_start_block "Compile and publish S3 artifacts"
-BAZEL_SUPPORT_EXTRA_DOCKER_ARGS="-e AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY -e TC_BUILD_BRANCH=$build_name -e bucket=$bucket" run_bazel << 'EOF'
-bazel build --config ci //pkg/cmd/publish-provisional-artifacts
-BAZEL_BIN=$(bazel info bazel-bin --config ci)
-$BAZEL_BIN/pkg/cmd/publish-provisional-artifacts/publish-provisional-artifacts_/publish-provisional-artifacts -provisional -release -bucket "$bucket"
-EOF
+build/builder.sh env \
+ AWS_ACCESS_KEY_ID="$AWS_ACCESS_KEY_ID" \
+ AWS_SECRET_ACCESS_KEY="$AWS_SECRET_ACCESS_KEY" \
+ TC_BUILD_BRANCH="$build_name" \
+ publish-provisional-artifacts -provisional -release -bucket "$bucket"
tc_end_block "Compile and publish S3 artifacts"
+
tc_start_block "Make and push docker image"
configure_docker_creds
docker_login_with_google
@@ -67,6 +74,7 @@ docker build --no-cache --tag="${gcr_repository}:${build_name}" build/deploy
docker push "${gcr_repository}:${build_name}"
tc_end_block "Make and push docker image"
+
tc_start_block "Push release tag to github.com/cockroachdb/cockroach"
github_ssh_key="${GITHUB_COCKROACH_TEAMCITY_PRIVATE_SSH_KEY}"
configure_git_ssh_key
diff --git a/build/teamcity/internal/release/process/publish-cockroach-release.sh b/build/release/teamcity-publish-release.sh
similarity index 87%
rename from build/teamcity/internal/release/process/publish-cockroach-release.sh
rename to build/release/teamcity-publish-release.sh
index 9df13c006c..d0f7754100 100755
--- a/build/teamcity/internal/release/process/publish-cockroach-release.sh
+++ b/build/release/teamcity-publish-release.sh
@@ -2,11 +2,10 @@
set -euxo pipefail
-dir="$(dirname $(dirname $(dirname $(dirname $(dirname "${0}")))))"
-source "$dir/release/teamcity-support.sh"
-source "$dir/teamcity-bazel-support.sh" # for run_bazel
+source "$(dirname "${0}")/teamcity-support.sh"
tc_start_block "Variable Setup"
+export BUILDER_HIDE_GOPATH_SRC=1
# Matching the version name regex from within the cockroach code except
# for the `metadata` part at the end because Docker tags don't support
@@ -74,14 +73,19 @@ git tag "${build_name}"
tc_end_block "Tag the release"
+tc_start_block "Compile publish-provisional-artifacts"
+build/builder.sh go install ./pkg/cmd/publish-provisional-artifacts
+tc_end_block "Compile publish-provisional-artifacts"
+
+
tc_start_block "Make and publish release S3 artifacts"
# Using publish-provisional-artifacts here is funky. We're directly publishing
# the official binaries, not provisional ones. Legacy naming. To clean up...
-BAZEL_SUPPORT_EXTRA_DOCKER_ARGS="-e AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY -e TC_BUILD_BRANCH=$build_name -e bucket=$bucket" run_bazel << 'EOF'
-bazel build --config ci //pkg/cmd/publish-provisional-artifacts
-BAZEL_BIN=$(bazel info bazel-bin --config ci)
-$BAZEL_BIN/pkg/cmd/publish-provisional-artifacts/publish-provisional-artifacts_/publish-provisional-artifacts -provisional -release -bucket "$bucket"
-EOF
+build/builder.sh env \
+ AWS_ACCESS_KEY_ID="$AWS_ACCESS_KEY_ID" \
+ AWS_SECRET_ACCESS_KEY="$AWS_SECRET_ACCESS_KEY" \
+ TC_BUILD_BRANCH="$build_name" \
+ publish-provisional-artifacts -provisional -release -bucket "$bucket"
tc_end_block "Make and publish release S3 artifacts"
@@ -127,12 +131,11 @@ tc_start_block "Publish S3 binaries and archive as latest"
# Only push the "latest" for our most recent release branch.
# https://github.com/cockroachdb/cockroach/issues/41067
if [[ -n "${PUBLISH_LATEST}" && -z "${PRE_RELEASE}" ]]; then
- BAZEL_SUPPORT_EXTRA_DOCKER_ARGS="-e AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY -e TC_BUILD_BRANCH=$build_name -e bucket=$bucket" run_bazel << 'EOF'
-bazel build --config ci //pkg/cmd/publish-provisional-artifacts
-BAZEL_BIN=$(bazel info bazel-bin --config ci)
-$BAZEL_BIN/pkg/cmd/publish-provisional-artifacts/publish-provisional-artifacts_/publish-provisional-artifacts -bless -release -bucket "$bucket"
-EOF
-
+ build/builder.sh env \
+ AWS_ACCESS_KEY_ID="$AWS_ACCESS_KEY_ID" \
+ AWS_SECRET_ACCESS_KEY="$AWS_SECRET_ACCESS_KEY" \
+ TC_BUILD_BRANCH="$build_name" \
+ publish-provisional-artifacts -bless -release -bucket "${bucket}"
else
echo "The latest S3 binaries and archive were _not_ updated."
fi
diff --git a/build/release/teamcity-support.sh b/build/release/teamcity-support.sh
index aeb3cb4b8d..0518b15adf 100644
--- a/build/release/teamcity-support.sh
+++ b/build/release/teamcity-support.sh
@@ -1,7 +1,7 @@
# Common helpers for teamcity-*.sh scripts.
# root is the absolute path to the root directory of the repository.
-root="$(dirname $(dirname $(cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )))"
+root=$(cd "$(dirname "$0")/../.." && pwd)
source "$root/build/teamcity-common-support.sh"
remove_files_on_exit() {
diff --git a/build/teamcity-bless-provisional-binaries.sh b/build/teamcity-bless-provisional-binaries.sh
new file mode 100755
index 0000000000..5a463ddcb5
--- /dev/null
+++ b/build/teamcity-bless-provisional-binaries.sh
@@ -0,0 +1,15 @@
+#!/usr/bin/env bash
+
+# Any arguments to this script are passed through unmodified to
+# ./pkg/cmd/publish-provisional-artifacts.
+
+set -euxo pipefail
+
+export BUILDER_HIDE_GOPATH_SRC=1
+
+build/builder.sh go install ./pkg/cmd/publish-provisional-artifacts
+build/builder.sh env \
+ AWS_ACCESS_KEY_ID="$AWS_ACCESS_KEY_ID" \
+ AWS_SECRET_ACCESS_KEY="$AWS_SECRET_ACCESS_KEY" \
+ TC_BUILD_BRANCH="$TC_BUILD_BRANCH" \
+ publish-provisional-artifacts -bless "$@"
diff --git a/build/teamcity-check.sh b/build/teamcity-check.sh
index 1031a2c486..66ad94333f 100755
--- a/build/teamcity-check.sh
+++ b/build/teamcity-check.sh
@@ -6,6 +6,8 @@ source "$(dirname "${0}")/teamcity-support.sh"
tc_prepare
+maybe_require_release_justification
+
tc_start_block "Lint"
# Disable ccache so that Go doesn't try to install dependencies into GOROOT,
# where it doesn't have write permissions. (Using ccache busts the Go package
diff --git a/build/teamcity-compile-build.sh b/build/teamcity-compile-build.sh
new file mode 100755
index 0000000000..e4945b0f40
--- /dev/null
+++ b/build/teamcity-compile-build.sh
@@ -0,0 +1,9 @@
+#!/usr/bin/env bash
+
+set -euxo pipefail
+
+export BUILDER_HIDE_GOPATH_SRC=1
+
+build/builder.sh go install ./pkg/cmd/compile-build
+build/builder.sh env \
+ compile-build
diff --git a/build/teamcity-compile-builds.sh b/build/teamcity-compile-builds.sh
index 15d46678f2..d1cbaf808d 100755
--- a/build/teamcity-compile-builds.sh
+++ b/build/teamcity-compile-builds.sh
@@ -4,7 +4,7 @@ set -euxo pipefail
export BUILDER_HIDE_GOPATH_SRC=1
-build/builder.sh mkrelease linux-gnu SUFFIX=.linux-2.6.32-gnu-amd64
-build/builder.sh mkrelease darwin SUFFIX=.darwin-10.9-amd64
-build/builder.sh mkrelease windows SUFFIX=.windows-6.2-amd64.exe
+build/builder.sh go install ./pkg/cmd/compile-build
+build/builder.sh env \
+ compile-build --all --buildtype=development
cp cockroach.* artifacts
diff --git a/build/teamcity-publish-artifacts.sh b/build/teamcity-publish-artifacts.sh
new file mode 100755
index 0000000000..7a53a07fd5
--- /dev/null
+++ b/build/teamcity-publish-artifacts.sh
@@ -0,0 +1,11 @@
+#!/usr/bin/env bash
+
+# Any arguments to this script are passed through unmodified to
+# ./build/teamcity-publish-s3-binaries.
+
+set -euxo pipefail
+
+export BUILDER_HIDE_GOPATH_SRC=1
+
+build/teamcity-publish-s3-binaries.sh "$@"
+
diff --git a/build/teamcity-publish-s3-binaries.sh b/build/teamcity-publish-s3-binaries.sh
new file mode 100755
index 0000000000..485895b847
--- /dev/null
+++ b/build/teamcity-publish-s3-binaries.sh
@@ -0,0 +1,15 @@
+#!/usr/bin/env bash
+
+# Any arguments to this script are passed through unmodified to
+# ./pkg/cmd/publish-artifacts.
+
+set -euxo pipefail
+
+export BUILDER_HIDE_GOPATH_SRC=1
+
+build/builder.sh go install ./pkg/cmd/publish-artifacts
+build/builder.sh env \
+ AWS_ACCESS_KEY_ID="$AWS_ACCESS_KEY_ID" \
+ AWS_SECRET_ACCESS_KEY="$AWS_SECRET_ACCESS_KEY" \
+ TC_BUILD_BRANCH="$TC_BUILD_BRANCH" \
+ publish-artifacts "$@"
diff --git a/build/teamcity-support.sh b/build/teamcity-support.sh
old mode 100755
new mode 100644
index 70b0afa031..260ece9f8d
--- a/build/teamcity-support.sh
+++ b/build/teamcity-support.sh
@@ -307,6 +307,23 @@ generate_ssh_key() {
fi
}
+maybe_require_release_justification() {
+ # Set this to 1 to require a "release justification" note in the commit message
+ # or the PR description.
+ require_justification=1
+ if [ "$require_justification" = 1 ]; then
+ tc_start_block "Ensure commit message contains a release justification"
+ # Ensure master branch commits have a release justification.
+ if [[ $(git log -n1 | grep -ci "Release justification: \S\+") == 0 ]]; then
+ echo "Build Failed. No Release justification in the commit message or in the PR description." >&2
+ echo "Commits must have a Release justification of the form:" >&2
+ echo "Release justification: <some description of why this commit is safe to add to the release branch.>" >&2
+ exit 1
+ fi
+ tc_end_block "Ensure commit message contains a release justification"
+ fi
+}
+
# Call this function with one argument, the error message to print if the
# workspace is dirty.
check_workspace_clean() {
diff --git a/build/teamcity/cockroach/ci/tests/lint.sh b/build/teamcity/cockroach/ci/tests/lint.sh
index 131984fe80..9255e3c10d 100755
--- a/build/teamcity/cockroach/ci/tests/lint.sh
+++ b/build/teamcity/cockroach/ci/tests/lint.sh
@@ -7,6 +7,8 @@ dir="$(dirname $(dirname $(dirname $(dirname $(dirname "${0}")))))"
source "$dir/teamcity-support.sh" # For $root
source "$dir/teamcity-bazel-support.sh" # For run_bazel
+maybe_require_release_justification
+
tc_start_block "Run lints"
run_bazel build/teamcity/cockroach/ci/tests/lint_impl.sh
tc_end_block "Run lints"
diff --git a/build/teamcity/cockroach/post-merge/publish-bleeding-edge.sh b/build/teamcity/cockroach/post-merge/publish-bleeding-edge.sh
deleted file mode 100755
index 0adeebdfb2..0000000000
--- a/build/teamcity/cockroach/post-merge/publish-bleeding-edge.sh
+++ /dev/null
@@ -1,16 +0,0 @@
-#!/usr/bin/env bash
-
-# This script is called by the build configuration
-# "Cockroach > Post Merge > Publish Bleeding Edge" in TeamCity.
-
-set -euxo pipefail
-
-dir="$(dirname $(dirname $(dirname $(dirname "${0}"))))"
-source "$dir/teamcity-support.sh"
-source "$dir/teamcity-bazel-support.sh"
-
-BAZEL_SUPPORT_EXTRA_DOCKER_ARGS="-e AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY -e TC_BUILD_BRANCH" run_bazel << 'EOF'
-bazel build --config ci //pkg/cmd/publish-artifacts
-BAZEL_BIN=$(bazel info bazel-bin --config ci)
-$BAZEL_BIN/pkg/cmd/publish-artifacts/publish-artifacts_/publish-artifacts
-EOF
diff --git a/cloud/kubernetes/bring-your-own-certs/client.yaml b/cloud/kubernetes/bring-your-own-certs/client.yaml
index a53dbd6305..378e405c7f 100644
--- a/cloud/kubernetes/bring-your-own-certs/client.yaml
+++ b/cloud/kubernetes/bring-your-own-certs/client.yaml
@@ -1,4 +1,3 @@
-# Generated file, DO NOT EDIT. Source: cloud/kubernetes/templates/bring-your-own-certs/client.yaml
# This config file demonstrates how to connect to the CockroachDB StatefulSet
# defined in bring-your-own-certs-statefulset.yaml that uses certificates
# created outside of Kubernetes. See that file for why you may want to use it.
diff --git a/cloud/kubernetes/bring-your-own-certs/cockroachdb-statefulset.yaml b/cloud/kubernetes/bring-your-own-certs/cockroachdb-statefulset.yaml
index 51f62ba81d..86c1868de1 100644
--- a/cloud/kubernetes/bring-your-own-certs/cockroachdb-statefulset.yaml
+++ b/cloud/kubernetes/bring-your-own-certs/cockroachdb-statefulset.yaml
@@ -1,4 +1,3 @@
-# Generated file, DO NOT EDIT. Source: cloud/kubernetes/templates/bring-your-own-certs/cockroachdb-statefulset.yaml
# This config file defines a CockroachDB StatefulSet that uses certificates
# created outside of Kubernetes. You may want to use it if you want to use a
# different certificate authority from the one being used by Kubernetes or if
diff --git a/cloud/kubernetes/client-secure.yaml b/cloud/kubernetes/client-secure.yaml
index 577e8c1bfc..d0c7967dbd 100644
--- a/cloud/kubernetes/client-secure.yaml
+++ b/cloud/kubernetes/client-secure.yaml
@@ -1,4 +1,3 @@
-# Generated file, DO NOT EDIT. Source: cloud/kubernetes/templates/client-secure.yaml
apiVersion: v1
kind: Pod
metadata:
diff --git a/cloud/kubernetes/cluster-init-secure.yaml b/cloud/kubernetes/cluster-init-secure.yaml
index 840ee1f3fc..0154e8680e 100644
--- a/cloud/kubernetes/cluster-init-secure.yaml
+++ b/cloud/kubernetes/cluster-init-secure.yaml
@@ -1,4 +1,3 @@
-# Generated file, DO NOT EDIT. Source: cloud/kubernetes/templates/cluster-init-secure.yaml
apiVersion: batch/v1
kind: Job
metadata:
diff --git a/cloud/kubernetes/cluster-init.yaml b/cloud/kubernetes/cluster-init.yaml
index e9dd778dd5..d33a6fb7d0 100644
--- a/cloud/kubernetes/cluster-init.yaml
+++ b/cloud/kubernetes/cluster-init.yaml
@@ -1,4 +1,3 @@
-# Generated file, DO NOT EDIT. Source: cloud/kubernetes/templates/cluster-init.yaml
apiVersion: batch/v1
kind: Job
metadata:
diff --git a/cloud/kubernetes/cockroachdb-statefulset-secure.yaml b/cloud/kubernetes/cockroachdb-statefulset-secure.yaml
index 5056a8df5e..84e1979fd1 100644
--- a/cloud/kubernetes/cockroachdb-statefulset-secure.yaml
+++ b/cloud/kubernetes/cockroachdb-statefulset-secure.yaml
@@ -1,4 +1,3 @@
-# Generated file, DO NOT EDIT. Source: cloud/kubernetes/templates/cockroachdb-statefulset-secure.yaml
apiVersion: v1
kind: ServiceAccount
metadata:
diff --git a/cloud/kubernetes/cockroachdb-statefulset.yaml b/cloud/kubernetes/cockroachdb-statefulset.yaml
index 473617f0e7..10e1fd591e 100644
--- a/cloud/kubernetes/cockroachdb-statefulset.yaml
+++ b/cloud/kubernetes/cockroachdb-statefulset.yaml
@@ -1,4 +1,3 @@
-# Generated file, DO NOT EDIT. Source: cloud/kubernetes/templates/cockroachdb-statefulset.yaml
apiVersion: v1
kind: Service
metadata:
diff --git a/cloud/kubernetes/multiregion/client-secure.yaml b/cloud/kubernetes/multiregion/client-secure.yaml
index edf7fbfa64..cf57e92628 100644
--- a/cloud/kubernetes/multiregion/client-secure.yaml
+++ b/cloud/kubernetes/multiregion/client-secure.yaml
@@ -1,4 +1,3 @@
-# Generated file, DO NOT EDIT. Source: cloud/kubernetes/templates/multiregion/client-secure.yaml
apiVersion: v1
kind: Pod
metadata:
diff --git a/cloud/kubernetes/multiregion/cluster-init-secure.yaml b/cloud/kubernetes/multiregion/cluster-init-secure.yaml
index 3b44f3634e..c268d3f3e9 100644
--- a/cloud/kubernetes/multiregion/cluster-init-secure.yaml
+++ b/cloud/kubernetes/multiregion/cluster-init-secure.yaml
@@ -1,4 +1,3 @@
-# Generated file, DO NOT EDIT. Source: cloud/kubernetes/templates/multiregion/cluster-init-secure.yaml
apiVersion: batch/v1
kind: Job
metadata:
diff --git a/cloud/kubernetes/multiregion/cockroachdb-statefulset-secure.yaml b/cloud/kubernetes/multiregion/cockroachdb-statefulset-secure.yaml
index 3f32b0515e..891ea8ca9a 100644
--- a/cloud/kubernetes/multiregion/cockroachdb-statefulset-secure.yaml
+++ b/cloud/kubernetes/multiregion/cockroachdb-statefulset-secure.yaml
@@ -1,4 +1,3 @@
-# Generated file, DO NOT EDIT. Source: cloud/kubernetes/templates/multiregion/cockroachdb-statefulset-secure.yaml
apiVersion: v1
kind: ServiceAccount
metadata:
diff --git a/cloud/kubernetes/multiregion/eks/cockroachdb-statefulset-secure-eks.yaml b/cloud/kubernetes/multiregion/eks/cockroachdb-statefulset-secure-eks.yaml
index 24fea374d4..1513692774 100644
--- a/cloud/kubernetes/multiregion/eks/cockroachdb-statefulset-secure-eks.yaml
+++ b/cloud/kubernetes/multiregion/eks/cockroachdb-statefulset-secure-eks.yaml
@@ -1,4 +1,3 @@
-# Generated file, DO NOT EDIT. Source: cloud/kubernetes/templates/multiregion/eks/cockroachdb-statefulset-secure-eks.yaml
apiVersion: v1
kind: ServiceAccount
metadata:
diff --git a/cloud/kubernetes/performance/cockroachdb-daemonset-insecure.yaml b/cloud/kubernetes/performance/cockroachdb-daemonset-insecure.yaml
index e931f6ed8b..7e3add4cf9 100644
--- a/cloud/kubernetes/performance/cockroachdb-daemonset-insecure.yaml
+++ b/cloud/kubernetes/performance/cockroachdb-daemonset-insecure.yaml
@@ -1,4 +1,3 @@
-# Generated file, DO NOT EDIT. Source: cloud/kubernetes/templates/performance/cockroachdb-daemonset-insecure.yaml
# This configuration file sets up a DaemonSet running CockroachDB in insecure
# mode. For more information on why you might want to use a DaemonSet instead
# of a StatefulSet, see our docs:
diff --git a/cloud/kubernetes/performance/cockroachdb-daemonset-secure.yaml b/cloud/kubernetes/performance/cockroachdb-daemonset-secure.yaml
index 5174ccad09..4ba05cef70 100644
--- a/cloud/kubernetes/performance/cockroachdb-daemonset-secure.yaml
+++ b/cloud/kubernetes/performance/cockroachdb-daemonset-secure.yaml
@@ -1,4 +1,3 @@
-# Generated file, DO NOT EDIT. Source: cloud/kubernetes/templates/performance/cockroachdb-daemonset-secure.yaml
# This configuration file sets up a secure DaemonSet running CockroachDB.
# For more information on why you might want to use a DaemonSet instead
# of a StatefulSet, see our docs:
diff --git a/cloud/kubernetes/performance/cockroachdb-statefulset-insecure.yaml b/cloud/kubernetes/performance/cockroachdb-statefulset-insecure.yaml
index 79c20879e8..50a550c505 100644
--- a/cloud/kubernetes/performance/cockroachdb-statefulset-insecure.yaml
+++ b/cloud/kubernetes/performance/cockroachdb-statefulset-insecure.yaml
@@ -1,4 +1,3 @@
-# Generated file, DO NOT EDIT. Source: cloud/kubernetes/templates/performance/cockroachdb-statefulset-insecure.yaml
# This configuration file sets up an insecure StatefulSet running CockroachDB with
# tweaks to make it more performant than our default configuration files. All
# changes from the default insecure configuration have been marked with a comment
diff --git a/cloud/kubernetes/performance/cockroachdb-statefulset-secure.yaml b/cloud/kubernetes/performance/cockroachdb-statefulset-secure.yaml
index 2236ba9937..f7334e3add 100644
--- a/cloud/kubernetes/performance/cockroachdb-statefulset-secure.yaml
+++ b/cloud/kubernetes/performance/cockroachdb-statefulset-secure.yaml
@@ -1,4 +1,3 @@
-# Generated file, DO NOT EDIT. Source: cloud/kubernetes/templates/performance/cockroachdb-statefulset-secure.yaml
# This configuration file sets up a secure StatefulSet running CockroachDB with
# tweaks to make it more performant than our default configuration files. All
# changes from the default secure configuration have been marked with a comment
diff --git a/cloud/kubernetes/templates/bring-your-own-certs/client.yaml b/cloud/kubernetes/templates/bring-your-own-certs/client.yaml
deleted file mode 100644
index d486a5493b..0000000000
--- a/cloud/kubernetes/templates/bring-your-own-certs/client.yaml
+++ /dev/null
@@ -1,35 +0,0 @@
-# This config file demonstrates how to connect to the CockroachDB StatefulSet
-# defined in bring-your-own-certs-statefulset.yaml that uses certificates
-# created outside of Kubernetes. See that file for why you may want to use it.
-# You should be able to adapt the core ideas to deploy your own custom
-# applications and connect them to the database similarly.
-#
-# The pod that this file defines will sleep in the cluster not using any
-# resources. After creating the pod, you can use it to open up a SQL shell to
-# the database by running:
-#
-# kubectl exec -it cockroachdb-client-secure -- ./cockroach sql --url="postgres://root@cockroachdb-public:26257/?sslmode=verify-full&sslcert=/cockroach-certs/client.root.crt&sslkey=/cockroach-certs/client.root.key&sslrootcert=/cockroach-certs/ca.crt"
-apiVersion: v1
-kind: Pod
-metadata:
- name: cockroachdb-client-secure
- labels:
- app: cockroachdb-client
-spec:
- serviceAccountName: cockroachdb
- containers:
- - name: cockroachdb-client
- image: cockroachdb/cockroach:@VERSION@
- # Keep a pod open indefinitely so kubectl exec can be used to get a shell to it
- # and run cockroach client commands, such as cockroach sql, cockroach node status, etc.
- command:
- - sleep
- - "2147483648" # 2^31
- volumeMounts:
- - name: client-certs
- mountPath: /cockroach-certs
- volumes:
- - name: client-certs
- secret:
- secretName: cockroachdb.client.root
- defaultMode: 256
diff --git a/cloud/kubernetes/templates/bring-your-own-certs/cockroachdb-statefulset.yaml b/cloud/kubernetes/templates/bring-your-own-certs/cockroachdb-statefulset.yaml
deleted file mode 100644
index bbfd1c264c..0000000000
--- a/cloud/kubernetes/templates/bring-your-own-certs/cockroachdb-statefulset.yaml
+++ /dev/null
@@ -1,244 +0,0 @@
-# This config file defines a CockroachDB StatefulSet that uses certificates
-# created outside of Kubernetes. You may want to use it if you want to use a
-# different certificate authority from the one being used by Kubernetes or if
-# your Kubernetes cluster doesn't fully support certificate-signing requests
-# (e.g. as of July 2018, EKS doesn't work properly).
-#
-# To use this config file, first set up your certificates and load them into
-# your Kubernetes cluster as Secrets using the commands below:
-#
-# mkdir certs
-# mkdir my-safe-directory
-# cockroach cert create-ca --certs-dir=certs --ca-key=my-safe-directory/ca.key
-# cockroach cert create-client root --certs-dir=certs --ca-key=my-safe-directory/ca.key
-# kubectl create secret generic cockroachdb.client.root --from-file=certs
-# cockroach cert create-node --certs-dir=certs --ca-key=my-safe-directory/ca.key localhost 127.0.0.1 cockroachdb-public cockroachdb-public.default cockroachdb-public.default.svc.cluster.local *.cockroachdb *.cockroachdb.default *.cockroachdb.default.svc.cluster.local
-# kubectl create secret generic cockroachdb.node --from-file=certs
-# kubectl create -f bring-your-own-certs-statefulset.yaml
-# kubectl exec -it cockroachdb-0 -- /cockroach/cockroach init --certs-dir=/cockroach/cockroach-certs
-apiVersion: v1
-kind: ServiceAccount
-metadata:
- name: cockroachdb
- labels:
- app: cockroachdb
----
-apiVersion: rbac.authorization.k8s.io/v1
-kind: Role
-metadata:
- name: cockroachdb
- labels:
- app: cockroachdb
-rules:
-- apiGroups:
- - ""
- resources:
- - secrets
- verbs:
- - get
----
-apiVersion: rbac.authorization.k8s.io/v1
-kind: RoleBinding
-metadata:
- name: cockroachdb
- labels:
- app: cockroachdb
-roleRef:
- apiGroup: rbac.authorization.k8s.io
- kind: Role
- name: cockroachdb
-subjects:
-- kind: ServiceAccount
- name: cockroachdb
- namespace: default
----
-apiVersion: v1
-kind: Service
-metadata:
- # This service is meant to be used by clients of the database. It exposes a ClusterIP that will
- # automatically load balance connections to the different database pods.
- name: cockroachdb-public
- labels:
- app: cockroachdb
-spec:
- ports:
- # The main port, served by gRPC, serves Postgres-flavor SQL, internode
- # traffic and the cli.
- - port: 26257
- targetPort: 26257
- name: grpc
- # The secondary port serves the UI as well as health and debug endpoints.
- - port: 8080
- targetPort: 8080
- name: http
- selector:
- app: cockroachdb
----
-apiVersion: v1
-kind: Service
-metadata:
- # This service only exists to create DNS entries for each pod in the stateful
- # set such that they can resolve each other's IP addresses. It does not
- # create a load-balanced ClusterIP and should not be used directly by clients
- # in most circumstances.
- name: cockroachdb
- labels:
- app: cockroachdb
- annotations:
- # Use this annotation in addition to the actual publishNotReadyAddresses
- # field below because the annotation will stop being respected soon but the
- # field is broken in some versions of Kubernetes:
- # https://github.com/kubernetes/kubernetes/issues/58662
- service.alpha.kubernetes.io/tolerate-unready-endpoints: "true"
- # Enable automatic monitoring of all instances when Prometheus is running in the cluster.
- prometheus.io/scrape: "true"
- prometheus.io/path: "_status/vars"
- prometheus.io/port: "8080"
-spec:
- ports:
- - port: 26257
- targetPort: 26257
- name: grpc
- - port: 8080
- targetPort: 8080
- name: http
- # We want all pods in the StatefulSet to have their addresses published for
- # the sake of the other CockroachDB pods even before they're ready, since they
- # have to be able to talk to each other in order to become ready.
- publishNotReadyAddresses: true
- clusterIP: None
- selector:
- app: cockroachdb
----
-apiVersion: policy/v1beta1
-kind: PodDisruptionBudget
-metadata:
- name: cockroachdb-budget
- labels:
- app: cockroachdb
-spec:
- selector:
- matchLabels:
- app: cockroachdb
- maxUnavailable: 1
----
-apiVersion: apps/v1
-kind: StatefulSet
-metadata:
- name: cockroachdb
-spec:
- serviceName: "cockroachdb"
- replicas: 3
- selector:
- matchLabels:
- app: cockroachdb
- template:
- metadata:
- labels:
- app: cockroachdb
- spec:
- serviceAccountName: cockroachdb
- affinity:
- podAntiAffinity:
- preferredDuringSchedulingIgnoredDuringExecution:
- - weight: 100
- podAffinityTerm:
- labelSelector:
- matchExpressions:
- - key: app
- operator: In
- values:
- - cockroachdb
- topologyKey: kubernetes.io/hostname
- containers:
- - name: cockroachdb
- image: cockroachdb/cockroach:@VERSION@
- imagePullPolicy: IfNotPresent
- # TODO: Change these to appropriate values for the hardware that you're running. You can see
- # the resources that can be allocated on each of your Kubernetes nodes by running:
- # kubectl describe nodes
- # Note that requests and limits should have identical values.
- resources:
- requests:
- cpu: "2"
- memory: "8Gi"
- limits:
- cpu: "2"
- memory: "8Gi"
- ports:
- - containerPort: 26257
- name: grpc
- - containerPort: 8080
- name: http
-# We recommend that you do not configure a liveness probe on a production environment, as this can impact the availability of production databases.
-# livenessProbe:
-# httpGet:
-# path: "/health"
-# port: http
-# scheme: HTTPS
-# initialDelaySeconds: 30
-# periodSeconds: 5
- readinessProbe:
- httpGet:
- path: "/health?ready=1"
- port: http
- scheme: HTTPS
- initialDelaySeconds: 10
- periodSeconds: 5
- failureThreshold: 2
- volumeMounts:
- - name: datadir
- mountPath: /cockroach/cockroach-data
- - name: certs
- mountPath: /cockroach/cockroach-certs
- env:
- - name: COCKROACH_CHANNEL
- value: kubernetes-secure
- - name: GOMAXPROCS
- valueFrom:
- resourceFieldRef:
- resource: limits.cpu
- divisor: "1"
- - name: MEMORY_LIMIT_MIB
- valueFrom:
- resourceFieldRef:
- resource: limits.memory
- divisor: "1Mi"
- command:
- - "/bin/bash"
- - "-ecx"
- # The use of qualified `hostname -f` is crucial:
- # Other nodes aren't able to look up the unqualified hostname.
- - exec
- /cockroach/cockroach
- start
- --logtostderr
- --certs-dir /cockroach/cockroach-certs
- --advertise-host $(hostname -f)
- --http-addr 0.0.0.0
- --join cockroachdb-0.cockroachdb,cockroachdb-1.cockroachdb,cockroachdb-2.cockroachdb
- --cache $(expr $MEMORY_LIMIT_MIB / 4)MiB
- --max-sql-memory $(expr $MEMORY_LIMIT_MIB / 4)MiB
- # No pre-stop hook is required, a SIGTERM plus some time is all that's
- # needed for graceful shutdown of a node.
- terminationGracePeriodSeconds: 60
- volumes:
- - name: datadir
- persistentVolumeClaim:
- claimName: datadir
- - name: certs
- secret:
- secretName: cockroachdb.node
- defaultMode: 256
- podManagementPolicy: Parallel
- updateStrategy:
- type: RollingUpdate
- volumeClaimTemplates:
- - metadata:
- name: datadir
- spec:
- accessModes:
- - "ReadWriteOnce"
- resources:
- requests:
- storage: 100Gi
diff --git a/cloud/kubernetes/templates/client-secure.yaml b/cloud/kubernetes/templates/client-secure.yaml
deleted file mode 100644
index 0df772c510..0000000000
--- a/cloud/kubernetes/templates/client-secure.yaml
+++ /dev/null
@@ -1,48 +0,0 @@
-apiVersion: v1
-kind: Pod
-metadata:
- name: cockroachdb-client-secure
- labels:
- app: cockroachdb-client
-spec:
- serviceAccountName: cockroachdb
- initContainers:
- # The init-certs container sends a certificate signing request to the
- # kubernetes cluster.
- # You can see pending requests using: kubectl get csr
- # CSRs can be approved using: kubectl certificate approve <csr name>
- #
- # In addition to the client certificate and key, the init-certs entrypoint will symlink
- # the cluster CA to the certs directory.
- - name: init-certs
- image: cockroachdb/cockroach-k8s-request-cert:0.4
- imagePullPolicy: IfNotPresent
- command:
- - "/bin/ash"
- - "-ecx"
- - "/request-cert -namespace=${POD_NAMESPACE} -certs-dir=/cockroach-certs -type=client -user=root -symlink-ca-from=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
- env:
- - name: POD_NAMESPACE
- valueFrom:
- fieldRef:
- fieldPath: metadata.namespace
- volumeMounts:
- - name: client-certs
- mountPath: /cockroach-certs
- containers:
- - name: cockroachdb-client
- image: cockroachdb/cockroach:@VERSION@
- imagePullPolicy: IfNotPresent
- volumeMounts:
- - name: client-certs
- mountPath: /cockroach-certs
- # Keep a pod open indefinitely so kubectl exec can be used to get a shell to it
- # and run cockroach client commands, such as cockroach sql, cockroach node status, etc.
- command:
- - sleep
- - "2147483648" # 2^31
- # This pod isn't doing anything important, so don't bother waiting to terminate it.
- terminationGracePeriodSeconds: 0
- volumes:
- - name: client-certs
- emptyDir: {}
diff --git a/cloud/kubernetes/templates/cluster-init-secure.yaml b/cloud/kubernetes/templates/cluster-init-secure.yaml
deleted file mode 100644
index 830f2928a9..0000000000
--- a/cloud/kubernetes/templates/cluster-init-secure.yaml
+++ /dev/null
@@ -1,49 +0,0 @@
-apiVersion: batch/v1
-kind: Job
-metadata:
- name: cluster-init-secure
- labels:
- app: cockroachdb
-spec:
- template:
- spec:
- serviceAccountName: cockroachdb
- initContainers:
- # The init-certs container sends a certificate signing request to the
- # kubernetes cluster.
- # You can see pending requests using: kubectl get csr
- # CSRs can be approved using: kubectl certificate approve <csr name>
- #
- # In addition to the client certificate and key, the init-certs entrypoint will symlink
- # the cluster CA to the certs directory.
- - name: init-certs
- image: cockroachdb/cockroach-k8s-request-cert:0.4
- imagePullPolicy: IfNotPresent
- command:
- - "/bin/ash"
- - "-ecx"
- - "/request-cert -namespace=${POD_NAMESPACE} -certs-dir=/cockroach-certs -type=client -user=root -symlink-ca-from=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
- env:
- - name: POD_NAMESPACE
- valueFrom:
- fieldRef:
- fieldPath: metadata.namespace
- volumeMounts:
- - name: client-certs
- mountPath: /cockroach-certs
- containers:
- - name: cluster-init
- image: cockroachdb/cockroach:@VERSION@
- imagePullPolicy: IfNotPresent
- volumeMounts:
- - name: client-certs
- mountPath: /cockroach-certs
- command:
- - "/cockroach/cockroach"
- - "init"
- - "--certs-dir=/cockroach-certs"
- - "--host=cockroachdb-0.cockroachdb"
- restartPolicy: OnFailure
- volumes:
- - name: client-certs
- emptyDir: {}
diff --git a/cloud/kubernetes/templates/cluster-init.yaml b/cloud/kubernetes/templates/cluster-init.yaml
deleted file mode 100644
index 07a43d8456..0000000000
--- a/cloud/kubernetes/templates/cluster-init.yaml
+++ /dev/null
@@ -1,19 +0,0 @@
-apiVersion: batch/v1
-kind: Job
-metadata:
- name: cluster-init
- labels:
- app: cockroachdb
-spec:
- template:
- spec:
- containers:
- - name: cluster-init
- image: cockroachdb/cockroach:@VERSION@
- imagePullPolicy: IfNotPresent
- command:
- - "/cockroach/cockroach"
- - "init"
- - "--insecure"
- - "--host=cockroachdb-0.cockroachdb"
- restartPolicy: OnFailure
diff --git a/cloud/kubernetes/templates/cockroachdb-statefulset-secure.yaml b/cloud/kubernetes/templates/cockroachdb-statefulset-secure.yaml
deleted file mode 100644
index 86e1682cb8..0000000000
--- a/cloud/kubernetes/templates/cockroachdb-statefulset-secure.yaml
+++ /dev/null
@@ -1,285 +0,0 @@
-apiVersion: v1
-kind: ServiceAccount
-metadata:
- name: cockroachdb
- labels:
- app: cockroachdb
----
-apiVersion: rbac.authorization.k8s.io/v1
-kind: Role
-metadata:
- name: cockroachdb
- labels:
- app: cockroachdb
-rules:
-- apiGroups:
- - ""
- resources:
- - secrets
- verbs:
- - create
- - get
----
-apiVersion: rbac.authorization.k8s.io/v1
-kind: ClusterRole
-metadata:
- name: cockroachdb
- labels:
- app: cockroachdb
-rules:
-- apiGroups:
- - certificates.k8s.io
- resources:
- - certificatesigningrequests
- verbs:
- - create
- - get
- - watch
----
-apiVersion: rbac.authorization.k8s.io/v1
-kind: RoleBinding
-metadata:
- name: cockroachdb
- labels:
- app: cockroachdb
-roleRef:
- apiGroup: rbac.authorization.k8s.io
- kind: Role
- name: cockroachdb
-subjects:
-- kind: ServiceAccount
- name: cockroachdb
- namespace: default
----
-apiVersion: rbac.authorization.k8s.io/v1
-kind: ClusterRoleBinding
-metadata:
- name: cockroachdb
- labels:
- app: cockroachdb
-roleRef:
- apiGroup: rbac.authorization.k8s.io
- kind: ClusterRole
- name: cockroachdb
-subjects:
-- kind: ServiceAccount
- name: cockroachdb
- namespace: default
----
-apiVersion: v1
-kind: Service
-metadata:
- # This service is meant to be used by clients of the database. It exposes a ClusterIP that will
- # automatically load balance connections to the different database pods.
- name: cockroachdb-public
- labels:
- app: cockroachdb
-spec:
- ports:
- # The main port, served by gRPC, serves Postgres-flavor SQL, internode
- # traffic and the cli.
- - port: 26257
- targetPort: 26257
- name: grpc
- # The secondary port serves the UI as well as health and debug endpoints.
- - port: 8080
- targetPort: 8080
- name: http
- selector:
- app: cockroachdb
----
-apiVersion: v1
-kind: Service
-metadata:
- # This service only exists to create DNS entries for each pod in the stateful
- # set such that they can resolve each other's IP addresses. It does not
- # create a load-balanced ClusterIP and should not be used directly by clients
- # in most circumstances.
- name: cockroachdb
- labels:
- app: cockroachdb
- annotations:
- # Use this annotation in addition to the actual publishNotReadyAddresses
- # field below because the annotation will stop being respected soon but the
- # field is broken in some versions of Kubernetes:
- # https://github.com/kubernetes/kubernetes/issues/58662
- service.alpha.kubernetes.io/tolerate-unready-endpoints: "true"
- # Enable automatic monitoring of all instances when Prometheus is running in the cluster.
- prometheus.io/scrape: "true"
- prometheus.io/path: "_status/vars"
- prometheus.io/port: "8080"
-spec:
- ports:
- - port: 26257
- targetPort: 26257
- name: grpc
- - port: 8080
- targetPort: 8080
- name: http
- # We want all pods in the StatefulSet to have their addresses published for
- # the sake of the other CockroachDB pods even before they're ready, since they
- # have to be able to talk to each other in order to become ready.
- publishNotReadyAddresses: true
- clusterIP: None
- selector:
- app: cockroachdb
----
-apiVersion: policy/v1beta1
-kind: PodDisruptionBudget
-metadata:
- name: cockroachdb-budget
- labels:
- app: cockroachdb
-spec:
- selector:
- matchLabels:
- app: cockroachdb
- maxUnavailable: 1
----
-apiVersion: apps/v1
-kind: StatefulSet
-metadata:
- name: cockroachdb
-spec:
- serviceName: "cockroachdb"
- replicas: 3
- selector:
- matchLabels:
- app: cockroachdb
- template:
- metadata:
- labels:
- app: cockroachdb
- spec:
- serviceAccountName: cockroachdb
- # Init containers are run only once in the lifetime of a pod, before
- # it's started up for the first time. It has to exit successfully
- # before the pod's main containers are allowed to start.
- initContainers:
- # The init-certs container sends a certificate signing request to the
- # kubernetes cluster.
- # You can see pending requests using: kubectl get csr
- # CSRs can be approved using: kubectl certificate approve <csr name>
- #
- # All addresses used to contact a node must be specified in the --addresses arg.
- #
- # In addition to the node certificate and key, the init-certs entrypoint will symlink
- # the cluster CA to the certs directory.
- - name: init-certs
- image: cockroachdb/cockroach-k8s-request-cert:0.4
- imagePullPolicy: IfNotPresent
- command:
- - "/bin/ash"
- - "-ecx"
- - "/request-cert -namespace=${POD_NAMESPACE} -certs-dir=/cockroach-certs -type=node -addresses=localhost,127.0.0.1,$(hostname -f),$(hostname -f|cut -f 1-2 -d '.'),cockroachdb-public,cockroachdb-public.$(hostname -f|cut -f 3- -d '.'),cockroachdb-public.$(hostname -f|cut -f 3-4 -d '.'),cockroachdb-public.$(hostname -f|cut -f 3 -d '.') -symlink-ca-from=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
- env:
- - name: POD_NAMESPACE
- valueFrom:
- fieldRef:
- fieldPath: metadata.namespace
- volumeMounts:
- - name: certs
- mountPath: /cockroach-certs
- affinity:
- podAntiAffinity:
- preferredDuringSchedulingIgnoredDuringExecution:
- - weight: 100
- podAffinityTerm:
- labelSelector:
- matchExpressions:
- - key: app
- operator: In
- values:
- - cockroachdb
- topologyKey: kubernetes.io/hostname
- containers:
- - name: cockroachdb
- image: cockroachdb/cockroach:@VERSION@
- imagePullPolicy: IfNotPresent
- # TODO: Change these to appropriate values for the hardware that you're running. You can see
- # the resources that can be allocated on each of your Kubernetes nodes by running:
- # kubectl describe nodes
- # Note that requests and limits should have identical values.
- resources:
- requests:
- cpu: "2"
- memory: "8Gi"
- limits:
- cpu: "2"
- memory: "8Gi"
- ports:
- - containerPort: 26257
- name: grpc
- - containerPort: 8080
- name: http
-# We recommend that you do not configure a liveness probe on a production environment, as this can impact the availability of production databases.
-# livenessProbe:
-# httpGet:
-# path: "/health"
-# port: http
-# scheme: HTTPS
-# initialDelaySeconds: 30
-# periodSeconds: 5
- readinessProbe:
- httpGet:
- path: "/health?ready=1"
- port: http
- scheme: HTTPS
- initialDelaySeconds: 10
- periodSeconds: 5
- failureThreshold: 2
- volumeMounts:
- - name: datadir
- mountPath: /cockroach/cockroach-data
- - name: certs
- mountPath: /cockroach/cockroach-certs
- env:
- - name: COCKROACH_CHANNEL
- value: kubernetes-secure
- - name: GOMAXPROCS
- valueFrom:
- resourceFieldRef:
- resource: limits.cpu
- divisor: "1"
- - name: MEMORY_LIMIT_MIB
- valueFrom:
- resourceFieldRef:
- resource: limits.memory
- divisor: "1Mi"
- command:
- - "/bin/bash"
- - "-ecx"
- # The use of qualified `hostname -f` is crucial:
- # Other nodes aren't able to look up the unqualified hostname.
- # Memory caches are set as a fraction of the pod's memory limit.
- - exec
- /cockroach/cockroach
- start
- --logtostderr
- --certs-dir /cockroach/cockroach-certs
- --advertise-host $(hostname -f)
- --http-addr 0.0.0.0
- --join cockroachdb-0.cockroachdb,cockroachdb-1.cockroachdb,cockroachdb-2.cockroachdb
- --cache $(expr $MEMORY_LIMIT_MIB / 4)MiB
- --max-sql-memory $(expr $MEMORY_LIMIT_MIB / 4)MiB
- # No pre-stop hook is required, a SIGTERM plus some time is all that's
- # needed for graceful shutdown of a node.
- terminationGracePeriodSeconds: 60
- volumes:
- - name: datadir
- persistentVolumeClaim:
- claimName: datadir
- - name: certs
- emptyDir: {}
- podManagementPolicy: Parallel
- updateStrategy:
- type: RollingUpdate
- volumeClaimTemplates:
- - metadata:
- name: datadir
- spec:
- accessModes:
- - "ReadWriteOnce"
- resources:
- requests:
- storage: 100Gi
diff --git a/cloud/kubernetes/templates/cockroachdb-statefulset.yaml b/cloud/kubernetes/templates/cockroachdb-statefulset.yaml
deleted file mode 100644
index f29bc741e2..0000000000
--- a/cloud/kubernetes/templates/cockroachdb-statefulset.yaml
+++ /dev/null
@@ -1,181 +0,0 @@
-apiVersion: v1
-kind: Service
-metadata:
- # This service is meant to be used by clients of the database. It exposes a ClusterIP that will
- # automatically load balance connections to the different database pods.
- name: cockroachdb-public
- labels:
- app: cockroachdb
-spec:
- ports:
- # The main port, served by gRPC, serves Postgres-flavor SQL, internode
- # traffic and the cli.
- - port: 26257
- targetPort: 26257
- name: grpc
- # The secondary port serves the UI as well as health and debug endpoints.
- - port: 8080
- targetPort: 8080
- name: http
- selector:
- app: cockroachdb
----
-apiVersion: v1
-kind: Service
-metadata:
- # This service only exists to create DNS entries for each pod in the stateful
- # set such that they can resolve each other's IP addresses. It does not
- # create a load-balanced ClusterIP and should not be used directly by clients
- # in most circumstances.
- name: cockroachdb
- labels:
- app: cockroachdb
- annotations:
- # Use this annotation in addition to the actual publishNotReadyAddresses
- # field below because the annotation will stop being respected soon but the
- # field is broken in some versions of Kubernetes:
- # https://github.com/kubernetes/kubernetes/issues/58662
- service.alpha.kubernetes.io/tolerate-unready-endpoints: "true"
- # Enable automatic monitoring of all instances when Prometheus is running in the cluster.
- prometheus.io/scrape: "true"
- prometheus.io/path: "_status/vars"
- prometheus.io/port: "8080"
-spec:
- ports:
- - port: 26257
- targetPort: 26257
- name: grpc
- - port: 8080
- targetPort: 8080
- name: http
- # We want all pods in the StatefulSet to have their addresses published for
- # the sake of the other CockroachDB pods even before they're ready, since they
- # have to be able to talk to each other in order to become ready.
- publishNotReadyAddresses: true
- clusterIP: None
- selector:
- app: cockroachdb
----
-apiVersion: policy/v1beta1
-kind: PodDisruptionBudget
-metadata:
- name: cockroachdb-budget
- labels:
- app: cockroachdb
-spec:
- selector:
- matchLabels:
- app: cockroachdb
- maxUnavailable: 1
----
-apiVersion: apps/v1
-kind: StatefulSet
-metadata:
- name: cockroachdb
-spec:
- serviceName: "cockroachdb"
- replicas: 3
- selector:
- matchLabels:
- app: cockroachdb
- template:
- metadata:
- labels:
- app: cockroachdb
- spec:
- affinity:
- podAntiAffinity:
- preferredDuringSchedulingIgnoredDuringExecution:
- - weight: 100
- podAffinityTerm:
- labelSelector:
- matchExpressions:
- - key: app
- operator: In
- values:
- - cockroachdb
- topologyKey: kubernetes.io/hostname
- containers:
- - name: cockroachdb
- image: cockroachdb/cockroach:@VERSION@
- imagePullPolicy: IfNotPresent
- # TODO: Change these to appropriate values for the hardware that you're running. You can see
- # the resources that can be allocated on each of your Kubernetes nodes by running:
- # kubectl describe nodes
- # Note that requests and limits should have identical values.
- resources:
- requests:
- cpu: "2"
- memory: "8Gi"
- limits:
- cpu: "2"
- memory: "8Gi"
- ports:
- - containerPort: 26257
- name: grpc
- - containerPort: 8080
- name: http
-# We recommend that you do not configure a liveness probe on a production environment, as this can impact the availability of production databases.
-# livenessProbe:
-# httpGet:
-# path: "/health"
-# port: http
-# initialDelaySeconds: 30
-# periodSeconds: 5
- readinessProbe:
- httpGet:
- path: "/health?ready=1"
- port: http
- initialDelaySeconds: 10
- periodSeconds: 5
- failureThreshold: 2
- volumeMounts:
- - name: datadir
- mountPath: /cockroach/cockroach-data
- env:
- - name: COCKROACH_CHANNEL
- value: kubernetes-insecure
- - name: GOMAXPROCS
- valueFrom:
- resourceFieldRef:
- resource: limits.cpu
- divisor: "1"
- - name: MEMORY_LIMIT_MIB
- valueFrom:
- resourceFieldRef:
- resource: limits.memory
- divisor: "1Mi"
- command:
- - "/bin/bash"
- - "-ecx"
- # The use of qualified `hostname -f` is crucial:
- # Other nodes aren't able to look up the unqualified hostname.
- - exec
- /cockroach/cockroach
- start
- --logtostderr
- --insecure
- --advertise-host $(hostname -f)
- --http-addr 0.0.0.0
- --join cockroachdb-0.cockroachdb,cockroachdb-1.cockroachdb,cockroachdb-2.cockroachdb
- --cache $(expr $MEMORY_LIMIT_MIB / 4)MiB
- --max-sql-memory $(expr $MEMORY_LIMIT_MIB / 4)MiB
- # No pre-stop hook is required, a SIGTERM plus some time is all that's
- # needed for graceful shutdown of a node.
- terminationGracePeriodSeconds: 60
- volumes:
- - name: datadir
- persistentVolumeClaim:
- claimName: datadir
- podManagementPolicy: Parallel
- updateStrategy:
- type: RollingUpdate
- volumeClaimTemplates:
- - metadata:
- name: datadir
- spec:
- accessModes:
- - "ReadWriteOnce"
- resources:
- requests:
- storage: 100Gi
diff --git a/cloud/kubernetes/templates/multiregion/client-secure.yaml b/cloud/kubernetes/templates/multiregion/client-secure.yaml
deleted file mode 100644
index c72a38f8fe..0000000000
--- a/cloud/kubernetes/templates/multiregion/client-secure.yaml
+++ /dev/null
@@ -1,27 +0,0 @@
-apiVersion: v1
-kind: Pod
-metadata:
- name: cockroachdb-client-secure
- labels:
- app: cockroachdb-client
-spec:
- serviceAccountName: cockroachdb
- containers:
- - name: cockroachdb-client
- image: cockroachdb/cockroach:@VERSION@
- imagePullPolicy: IfNotPresent
- volumeMounts:
- - name: client-certs
- mountPath: /cockroach-certs
- # Keep a pod open indefinitely so kubectl exec can be used to get a shell to it
- # and run cockroach client commands, such as cockroach sql, cockroach node status, etc.
- command:
- - sleep
- - "2147483648" # 2^31
- # This pod isn't doing anything important, so don't bother waiting to terminate it.
- terminationGracePeriodSeconds: 0
- volumes:
- - name: client-certs
- secret:
- secretName: cockroachdb.client.root
- defaultMode: 256
diff --git a/cloud/kubernetes/templates/multiregion/cluster-init-secure.yaml b/cloud/kubernetes/templates/multiregion/cluster-init-secure.yaml
deleted file mode 100644
index 015c1c11d9..0000000000
--- a/cloud/kubernetes/templates/multiregion/cluster-init-secure.yaml
+++ /dev/null
@@ -1,28 +0,0 @@
-apiVersion: batch/v1
-kind: Job
-metadata:
- name: cluster-init-secure
- labels:
- app: cockroachdb
-spec:
- template:
- spec:
- serviceAccountName: cockroachdb
- containers:
- - name: cluster-init
- image: cockroachdb/cockroach:@VERSION@
- imagePullPolicy: IfNotPresent
- volumeMounts:
- - name: client-certs
- mountPath: /cockroach-certs
- command:
- - "/cockroach/cockroach"
- - "init"
- - "--certs-dir=/cockroach-certs"
- - "--host=cockroachdb-0.cockroachdb"
- restartPolicy: OnFailure
- volumes:
- - name: client-certs
- secret:
- secretName: cockroachdb.client.root
- defaultMode: 256
diff --git a/cloud/kubernetes/templates/multiregion/cockroachdb-statefulset-secure.yaml b/cloud/kubernetes/templates/multiregion/cockroachdb-statefulset-secure.yaml
deleted file mode 100644
index cd6379acda..0000000000
--- a/cloud/kubernetes/templates/multiregion/cockroachdb-statefulset-secure.yaml
+++ /dev/null
@@ -1,248 +0,0 @@
-apiVersion: v1
-kind: ServiceAccount
-metadata:
- name: cockroachdb
- labels:
- app: cockroachdb
----
-apiVersion: rbac.authorization.k8s.io/v1
-kind: Role
-metadata:
- name: cockroachdb
- labels:
- app: cockroachdb
-rules:
-- apiGroups:
- - ""
- resources:
- - secrets
- verbs:
- - create
- - get
----
-apiVersion: rbac.authorization.k8s.io/v1
-kind: ClusterRole
-metadata:
- name: cockroachdb
- labels:
- app: cockroachdb
-rules:
-- apiGroups:
- - certificates.k8s.io
- resources:
- - certificatesigningrequests
- verbs:
- - create
- - get
- - watch
----
-apiVersion: rbac.authorization.k8s.io/v1
-kind: RoleBinding
-metadata:
- name: cockroachdb
- labels:
- app: cockroachdb
-roleRef:
- apiGroup: rbac.authorization.k8s.io
- kind: Role
- name: cockroachdb
-subjects:
-- kind: ServiceAccount
- name: cockroachdb
- namespace: default
----
-apiVersion: rbac.authorization.k8s.io/v1
-kind: ClusterRoleBinding
-metadata:
- name: cockroachdb
- labels:
- app: cockroachdb
-roleRef:
- apiGroup: rbac.authorization.k8s.io
- kind: ClusterRole
- name: cockroachdb
-subjects:
-- kind: ServiceAccount
- name: cockroachdb
- namespace: default
----
-apiVersion: v1
-kind: Service
-metadata:
- # This service is meant to be used by clients of the database. It exposes a ClusterIP that will
- # automatically load balance connections to the different database pods.
- name: cockroachdb-public
- labels:
- app: cockroachdb
-spec:
- ports:
- # The main port, served by gRPC, serves Postgres-flavor SQL, internode
- # traffic and the cli.
- - port: 26257
- targetPort: 26257
- name: grpc
- # The secondary port serves the UI as well as health and debug endpoints.
- - port: 8080
- targetPort: 8080
- name: http
- selector:
- app: cockroachdb
----
-apiVersion: v1
-kind: Service
-metadata:
- # This service only exists to create DNS entries for each pod in the stateful
- # set such that they can resolve each other's IP addresses. It does not
- # create a load-balanced ClusterIP and should not be used directly by clients
- # in most circumstances.
- name: cockroachdb
- labels:
- app: cockroachdb
- annotations:
- # Use this annotation in addition to the actual publishNotReadyAddresses
- # field below because the annotation will stop being respected soon but the
- # field is broken in some versions of Kubernetes:
- # https://github.com/kubernetes/kubernetes/issues/58662
- service.alpha.kubernetes.io/tolerate-unready-endpoints: "true"
- # Enable automatic monitoring of all instances when Prometheus is running in the cluster.
- prometheus.io/scrape: "true"
- prometheus.io/path: "_status/vars"
- prometheus.io/port: "8080"
-spec:
- ports:
- - port: 26257
- targetPort: 26257
- name: grpc
- - port: 8080
- targetPort: 8080
- name: http
- # We want all pods in the StatefulSet to have their addresses published for
- # the sake of the other CockroachDB pods even before they're ready, since they
- # have to be able to talk to each other in order to become ready.
- publishNotReadyAddresses: true
- clusterIP: None
- selector:
- app: cockroachdb
----
-apiVersion: policy/v1beta1
-kind: PodDisruptionBudget
-metadata:
- name: cockroachdb-budget
- labels:
- app: cockroachdb
-spec:
- selector:
- matchLabels:
- app: cockroachdb
- maxUnavailable: 1
----
-apiVersion: apps/v1
-kind: StatefulSet
-metadata:
- name: cockroachdb
-spec:
- serviceName: "cockroachdb"
- replicas: 3
- selector:
- matchLabels:
- app: cockroachdb
- template:
- metadata:
- labels:
- app: cockroachdb
- spec:
- serviceAccountName: cockroachdb
- affinity:
- podAntiAffinity:
- preferredDuringSchedulingIgnoredDuringExecution:
- - weight: 100
- podAffinityTerm:
- labelSelector:
- matchExpressions:
- - key: app
- operator: In
- values:
- - cockroachdb
- topologyKey: kubernetes.io/hostname
- containers:
- - name: cockroachdb
- image: cockroachdb/cockroach:@VERSION@
- imagePullPolicy: IfNotPresent
- ports:
- - containerPort: 26257
- name: grpc
- - containerPort: 8080
- name: http
-# We recommend that you do not configure a liveness probe on a production environment, as this can impact the availability of production databases.
-# livenessProbe:
-# httpGet:
-# path: "/health"
-# port: http
-# scheme: HTTPS
-# initialDelaySeconds: 30
-# periodSeconds: 5
- readinessProbe:
- httpGet:
- path: "/health?ready=1"
- port: http
- scheme: HTTPS
- initialDelaySeconds: 10
- periodSeconds: 5
- failureThreshold: 2
- volumeMounts:
- - name: datadir
- mountPath: /cockroach/cockroach-data
- - name: certs
- mountPath: /cockroach/cockroach-certs
- env:
- - name: COCKROACH_CHANNEL
- value: kubernetes-multiregion
- - name: GOMAXPROCS
- valueFrom:
- resourceFieldRef:
- resource: limits.cpu
- divisor: "1"
- - name: MEMORY_LIMIT_MIB
- valueFrom:
- resourceFieldRef:
- resource: limits.memory
- divisor: "1Mi"
- command:
- - "/bin/bash"
- - "-ecx"
- # The use of qualified `hostname -f` is crucial:
- # Other nodes aren't able to look up the unqualified hostname.
- - exec
- /cockroach/cockroach
- start
- --logtostderr
- --certs-dir /cockroach/cockroach-certs
- --advertise-host $(hostname -f)
- --http-addr 0.0.0.0
- --join JOINLIST
- --locality LOCALITYLIST
- --cache $(expr $MEMORY_LIMIT_MIB / 4)MiB
- --max-sql-memory $(expr $MEMORY_LIMIT_MIB / 4)MiB
- # No pre-stop hook is required, a SIGTERM plus some time is all that's
- # needed for graceful shutdown of a node.
- terminationGracePeriodSeconds: 60
- volumes:
- - name: datadir
- persistentVolumeClaim:
- claimName: datadir
- - name: certs
- secret:
- secretName: cockroachdb.node
- defaultMode: 256
- podManagementPolicy: Parallel
- updateStrategy:
- type: RollingUpdate
- volumeClaimTemplates:
- - metadata:
- name: datadir
- spec:
- accessModes:
- - "ReadWriteOnce"
- resources:
- requests:
- storage: 100Gi
diff --git a/cloud/kubernetes/templates/multiregion/eks/cockroachdb-statefulset-secure-eks.yaml b/cloud/kubernetes/templates/multiregion/eks/cockroachdb-statefulset-secure-eks.yaml
deleted file mode 100644
index e829be2460..0000000000
--- a/cloud/kubernetes/templates/multiregion/eks/cockroachdb-statefulset-secure-eks.yaml
+++ /dev/null
@@ -1,282 +0,0 @@
-apiVersion: v1
-kind: ServiceAccount
-metadata:
- name: cockroachdb
- labels:
- app: cockroachdb
----
-apiVersion: rbac.authorization.k8s.io/v1
-kind: Role
-metadata:
- name: cockroachdb
- labels:
- app: cockroachdb
-rules:
-- apiGroups:
- - ""
- resources:
- - secrets
- verbs:
- - create
- - get
----
-apiVersion: rbac.authorization.k8s.io/v1
-kind: ClusterRole
-metadata:
- name: cockroachdb
- labels:
- app: cockroachdb
-rules:
-- apiGroups:
- - certificates.k8s.io
- resources:
- - certificatesigningrequests
- verbs:
- - create
- - get
- - watch
----
-apiVersion: rbac.authorization.k8s.io/v1
-kind: RoleBinding
-metadata:
- name: cockroachdb
- labels:
- app: cockroachdb
-roleRef:
- apiGroup: rbac.authorization.k8s.io
- kind: Role
- name: cockroachdb
-subjects:
-- kind: ServiceAccount
- name: cockroachdb
- namespace: default
----
-apiVersion: rbac.authorization.k8s.io/v1
-kind: ClusterRoleBinding
-metadata:
- name: cockroachdb
- labels:
- app: cockroachdb
-roleRef:
- apiGroup: rbac.authorization.k8s.io
- kind: ClusterRole
- name: cockroachdb
-subjects:
-- kind: ServiceAccount
- name: cockroachdb
- namespace: default
----
-apiVersion: v1
-kind: Service
-metadata:
- # This service is meant to be used by clients of the database. It exposes a ClusterIP that will
- # automatically load balance connections to the different database pods.
- name: cockroachdb-public
- labels:
- app: cockroachdb
-spec:
- ports:
- # The main port, served by gRPC, serves Postgres-flavor SQL, internode
- # traffic and the cli.
- - port: 26257
- targetPort: 26257
- name: grpc
- # The secondary port serves the UI as well as health and debug endpoints.
- - port: 8080
- targetPort: 8080
- name: http
- selector:
- app: cockroachdb
----
-apiVersion: v1
-kind: Service
-metadata:
- # This service only exists to create DNS entries for each pod in the stateful
- # set such that they can resolve each other's IP addresses. It does not
- # create a load-balanced ClusterIP and should not be used directly by clients
- # in most circumstances.
- name: cockroachdb
- labels:
- app: cockroachdb
- annotations:
- # Use this annotation in addition to the actual publishNotReadyAddresses
- # field below because the annotation will stop being respected soon but the
- # field is broken in some versions of Kubernetes:
- # https://github.com/kubernetes/kubernetes/issues/58662
- service.alpha.kubernetes.io/tolerate-unready-endpoints: "true"
- # Enable automatic monitoring of all instances when Prometheus is running in the cluster.
- prometheus.io/scrape: "true"
- prometheus.io/path: "_status/vars"
- prometheus.io/port: "8080"
-spec:
- ports:
- - port: 26257
- targetPort: 26257
- name: grpc
- - port: 8080
- targetPort: 8080
- name: http
- # We want all pods in the StatefulSet to have their addresses published for
- # the sake of the other CockroachDB pods even before they're ready, since they
- # have to be able to talk to each other in order to become ready.
- publishNotReadyAddresses: true
- clusterIP: None
- selector:
- app: cockroachdb
----
-apiVersion: policy/v1beta1
-kind: PodDisruptionBudget
-metadata:
- name: cockroachdb-budget
- labels:
- app: cockroachdb
-spec:
- selector:
- matchLabels:
- app: cockroachdb
- maxUnavailable: 1
----
-apiVersion: apps/v1
-kind: StatefulSet
-metadata:
- name: cockroachdb
- # TODO: Use this field to specify a namespace other than "default" in which to deploy CockroachDB (e.g., us-east-1).
- # namespace: <cluster-namespace>
-spec:
- serviceName: "cockroachdb"
- replicas: 3
- selector:
- matchLabels:
- app: cockroachdb
- template:
- metadata:
- labels:
- app: cockroachdb
- spec:
- serviceAccountName: cockroachdb
- affinity:
- podAntiAffinity:
- preferredDuringSchedulingIgnoredDuringExecution:
- - weight: 100
- podAffinityTerm:
- labelSelector:
- matchExpressions:
- - key: app
- operator: In
- values:
- - cockroachdb
- topologyKey: kubernetes.io/hostname
- # This init container is used to determine the availability zones of the Cockroach pods. The AZs are used to define --locality when starting Cockroach nodes.
- initContainers:
- - command:
- - sh
- - -ecx
- - echo "aws-$(curl http://169.254.169.254/latest/meta-data/placement/availability-zone/)"
- > /etc/cockroach-env/zone
- image: byrnedo/alpine-curl:0.1
- imagePullPolicy: IfNotPresent
- name: locality-container
- resources: {}
- terminationMessagePath: /dev/termination-log
- terminationMessagePolicy: File
- volumeMounts:
- - mountPath: /etc/cockroach-env
- name: cockroach-env
- containers:
- - name: cockroachdb
- image: cockroachdb/cockroach:@VERSION@
- imagePullPolicy: IfNotPresent
- # TODO: Change these to appropriate values for the hardware that you're running. You can see
- # the resources that can be allocated on each of your Kubernetes nodes by running:
- # kubectl describe nodes
- # Note that requests and limits should have identical values.
- resources:
- requests:
- cpu: "2"
- memory: "8Gi"
- limits:
- cpu: "2"
- memory: "8Gi"
- ports:
- - containerPort: 26257
- name: grpc
- - containerPort: 8080
- name: http
-# We recommend that you do not configure a liveness probe on a production environment, as this can impact the availability of production databases.
-# livenessProbe:
-# httpGet:
-# path: "/health"
-# port: http
-# scheme: HTTPS
-# initialDelaySeconds: 30
-# periodSeconds: 5
- readinessProbe:
- httpGet:
- path: "/health?ready=1"
- port: http
- scheme: HTTPS
- initialDelaySeconds: 10
- periodSeconds: 5
- failureThreshold: 2
- volumeMounts:
- - name: datadir
- mountPath: /cockroach/cockroach-data
- - name: certs
- mountPath: /cockroach/cockroach-certs
- - name: cockroach-env
- mountPath: /etc/cockroach-env
- env:
- - name: COCKROACH_CHANNEL
- value: kubernetes-multiregion
- - name: GOMAXPROCS
- valueFrom:
- resourceFieldRef:
- resource: limits.cpu
- divisor: "1"
- - name: MEMORY_LIMIT_MIB
- valueFrom:
- resourceFieldRef:
- resource: limits.memory
- divisor: "1Mi"
- command:
- - "/bin/bash"
- - "-ecx"
- # The use of qualified `hostname -f` is crucial:
- # Other nodes aren't able to look up the unqualified hostname.
- - exec
- /cockroach/cockroach
- start
- --logtostderr
- --certs-dir /cockroach/cockroach-certs
- --advertise-host $(hostname -f)
- --http-addr 0.0.0.0
- # TODO: Replace the placeholder values in --join and --locality with the namespace of the CockroachDB cluster in each region (e.g., us-east-1).
- # --join cockroachdb-0.cockroachdb.<cluster-namespace-1>,cockroachdb-1.cockroachdb.<cluster-namespace-1>,cockroachdb-2.cockroachdb.<cluster-namespace-1>,cockroachdb-0.cockroachdb.<cluster-namespace-2>,cockroachdb-1.cockroachdb.<cluster-namespace-2>,cockroachdb-2.cockroachdb.<cluster-namespace-2>,cockroachdb-0.cockroachdb.<cluster-namespace-3>,cockroachdb-1.cockroachdb.<cluster-namespace-3>,cockroachdb-2.cockroachdb.<cluster-namespace-3>
- # --locality=region=<cluster-namespace-1>,az=$(cat /etc/cockroach-env/zone),dns=$(hostname -f)
- --cache $(expr $MEMORY_LIMIT_MIB / 4)MiB
- --max-sql-memory $(expr $MEMORY_LIMIT_MIB / 4)MiB
- # No pre-stop hook is required, a SIGTERM plus some time is all that's
- # needed for graceful shutdown of a node.
- terminationGracePeriodSeconds: 60
- volumes:
- - name: datadir
- persistentVolumeClaim:
- claimName: datadir
- - name: certs
- secret:
- secretName: cockroachdb.node
- defaultMode: 256
- - name: cockroach-env
- emptyDir: {}
- podManagementPolicy: Parallel
- updateStrategy:
- type: RollingUpdate
- volumeClaimTemplates:
- - metadata:
- name: datadir
- spec:
- accessModes:
- - "ReadWriteOnce"
- resources:
- requests:
- storage: 100Gi
diff --git a/cloud/kubernetes/templates/performance/cockroachdb-daemonset-insecure.yaml b/cloud/kubernetes/templates/performance/cockroachdb-daemonset-insecure.yaml
deleted file mode 100644
index f07af4dbc8..0000000000
--- a/cloud/kubernetes/templates/performance/cockroachdb-daemonset-insecure.yaml
+++ /dev/null
@@ -1,142 +0,0 @@
-# This configuration file sets up a DaemonSet running CockroachDB in insecure
-# mode. For more information on why you might want to use a DaemonSet instead
-# of a StatefulSet, see our docs:
-# https://www.cockroachlabs.com/docs/stable/kubernetes-performance.html#running-in-a-daemonset
-#
-# To use this file, customize the parts labeled "TODO" before running:
-# kubectl create -f cockroachdb-daemonset-insecure.yaml
-#
-# Initialize the cluster by picking one of the CockroachDB pod names from
-# the output of `kubectl get pods`, then run:
-# kubectl exec -it <pod-name> -- ./cockroach init --insecure
-#
-# If you're interested in using a DaemonSet in secure mode instead, please see
-# cockroachdb-daemonset-secure.yaml.
-apiVersion: v1
-kind: Service
-metadata:
- # This service is meant to be used by clients of the database. It exposes a ClusterIP that will
- # automatically load balance connections to the different database pods.
- name: cockroachdb-public
- labels:
- app: cockroachdb
-spec:
- ports:
- # The main port, served by gRPC, serves Postgres-flavor SQL, internode
- # traffic and the cli.
- - port: 26257
- targetPort: 26257
- name: grpc
- # The secondary port serves the UI as well as health and debug endpoints.
- - port: 8080
- targetPort: 8080
- name: http
- selector:
- app: cockroachdb
----
-apiVersion: policy/v1beta1
-kind: PodDisruptionBudget
-metadata:
- name: cockroachdb-budget
- labels:
- app: cockroachdb
-spec:
- selector:
- matchLabels:
- app: cockroachdb
- maxUnavailable: 1
----
-apiVersion: apps/v1
-kind: DaemonSet
-metadata:
- name: cockroachdb
- labels:
- app: cockroachdb
-spec:
- selector:
- matchLabels:
- app: cockroachdb
- template:
- metadata:
- labels:
- app: cockroachdb
- spec:
- # TODO: Remove the nodeSelector section if you want CockroachDB to run on all nodes in your cluster.
- # To give nodes this label, run:
- # kubectl label node <node-name> app=cockroachdb
- nodeSelector:
- app: cockroachdb
- # Tolerations allow CockroachDB to run on Kubernetes nodes that other pods won't be allowed on.
- # To set up nodes to be dedicated to CockroachDB, you must "taint" them by running:
- # kubectl taint node <node-name> app=cockroachdb:NoSchedule
- # If you don't set up any such taints, these tolerations will have no effect.
- tolerations:
- - key: "app"
- operator: "Equal"
- value: "cockroachdb"
- effect: "NoSchedule"
- # NOTE: Running with `hostNetwork: true` means that CockroachDB will use
- # the host machines' IP address and hostname, and that nothing else on
- # the machines will be able to use the same ports.
- hostNetwork: true
- containers:
- - name: cockroachdb
- image: cockroachdb/cockroach:@VERSION@
- imagePullPolicy: IfNotPresent
- # TODO: If you configured taints to give CockroachDB exclusive access to nodes, feel free
- # to remove the requests and limits sections. If you didn't, you'll need to change these to
- # appropriate values for the hardware that you're running. You can see the amount of
- # allocatable resources on each of your Kubernetes nodes by running:
- # kubectl describe nodes
- resources:
- requests:
- cpu: "16"
- memory: "8Gi"
- limits:
- # NOTE: Unless you have enabled the non-default Static CPU Management Policy
- # and are using an integer number of CPUs, we don't recommend setting a CPU limit.
- # See:
- # https://kubernetes.io/docs/tasks/administer-cluster/cpu-management-policies/#static-policy
- # https://github.com/kubernetes/kubernetes/issues/51135
- #cpu: "16"
- memory: "8Gi"
- ports:
- - containerPort: 26257
- hostPort: 26257
- name: grpc
- - containerPort: 8080
- hostPort: 8080
- name: http
-# We recommend that you do not configure a liveness probe on a production environment, as this can impact the availability of production databases.
-# livenessProbe:
-# httpGet:
-# path: "/health"
-# port: http
-# scheme: HTTP
-# initialDelaySeconds: 30
-# periodSeconds: 5
- readinessProbe:
- httpGet:
- path: "/health?ready=1"
- port: http
- scheme: HTTP
- initialDelaySeconds: 10
- periodSeconds: 5
- failureThreshold: 2
- volumeMounts:
- - name: datadir
- mountPath: /cockroach/cockroach-data
- env:
- - name: COCKROACH_CHANNEL
- value: kubernetes-insecure
- command:
- - "/bin/bash"
- - "-ecx"
- # TODO: Replace "YOUR_IP_ADDR1_HERE,YOUR_IP_ADDR2_HERE,YOUR_IP_ADDR3_HERE" with a list of a few of the IP addresses of the machines on which CockroachDB will be running.
- - "exec /cockroach/cockroach start --logtostderr --insecure --http-addr 0.0.0.0 --cache 25% --max-sql-memory 25% --join=YOUR_IP_ADDR1_HERE,YOUR_IP_ADDR2_HERE,YOUR_IP_ADDR3_HERE"
- terminationGracePeriodSeconds: 60
- volumes:
- - name: datadir
- hostPath:
- # TODO: Replace "YOUR_FILESYSTEM_PATH_HERE" with the path where you want CockroachDB's data stored on your Kubernetes nodes.
- path: YOUR_FILESYSTEM_PATH_HERE
diff --git a/cloud/kubernetes/templates/performance/cockroachdb-daemonset-secure.yaml b/cloud/kubernetes/templates/performance/cockroachdb-daemonset-secure.yaml
deleted file mode 100644
index aa3e3a1eee..0000000000
--- a/cloud/kubernetes/templates/performance/cockroachdb-daemonset-secure.yaml
+++ /dev/null
@@ -1,262 +0,0 @@
-# This configuration file sets up a secure DaemonSet running CockroachDB.
-# For more information on why you might want to use a DaemonSet instead
-# of a StatefulSet, see our docs:
-# https://www.cockroachlabs.com/docs/stable/kubernetes-performance.html#running-in-a-daemonset
-#
-# To use this file, customize the parts labeled "TODO" before running:
-# kubectl create -f cockroachdb-daemonset-secure.yaml
-#
-# You will then have to approve certificate signing requests and initialize the
-# cluster as described in the parent directory's README.md file. In order for
-# the initialization step to work, note that you will need to change the
-# address used by the cluster-init-secure.yaml file on the
-# "--host=cockroachdb-0.cockroach" line from "cockroachdb-0.cockroach" to the
-# address of one of your nodes.
-#
-# If you're interested in using a DaemonSet in insecure mode instead, please
-# see cockroachdb-daemonset-insecure.yaml.
-apiVersion: v1
-kind: ServiceAccount
-metadata:
- name: cockroachdb
- labels:
- app: cockroachdb
----
-apiVersion: rbac.authorization.k8s.io/v1
-kind: Role
-metadata:
- name: cockroachdb
- labels:
- app: cockroachdb
-rules:
-- apiGroups:
- - ""
- resources:
- - secrets
- verbs:
- - create
- - get
----
-apiVersion: rbac.authorization.k8s.io/v1
-kind: ClusterRole
-metadata:
- name: cockroachdb
- labels:
- app: cockroachdb
-rules:
-- apiGroups:
- - certificates.k8s.io
- resources:
- - certificatesigningrequests
- verbs:
- - create
- - get
- - watch
----
-apiVersion: rbac.authorization.k8s.io/v1
-kind: RoleBinding
-metadata:
- name: cockroachdb
- labels:
- app: cockroachdb
-roleRef:
- apiGroup: rbac.authorization.k8s.io
- kind: Role
- name: cockroachdb
-subjects:
-- kind: ServiceAccount
- name: cockroachdb
- namespace: default
----
-apiVersion: rbac.authorization.k8s.io/v1
-kind: ClusterRoleBinding
-metadata:
- name: cockroachdb
- labels:
- app: cockroachdb
-roleRef:
- apiGroup: rbac.authorization.k8s.io
- kind: ClusterRole
- name: cockroachdb
-subjects:
-- kind: ServiceAccount
- name: cockroachdb
- namespace: default
----
-apiVersion: v1
-kind: Service
-metadata:
- # This service is meant to be used by clients of the database. It exposes a ClusterIP that will
- # automatically load balance connections to the different database pods.
- name: cockroachdb-public
- labels:
- app: cockroachdb
-spec:
- ports:
- # The main port, served by gRPC, serves Postgres-flavor SQL, internode
- # traffic and the cli.
- - port: 26257
- targetPort: 26257
- name: grpc
- # The secondary port serves the UI as well as health and debug endpoints.
- - port: 8080
- targetPort: 8080
- name: http
- selector:
- app: cockroachdb
----
-apiVersion: policy/v1beta1
-kind: PodDisruptionBudget
-metadata:
- name: cockroachdb-budget
- labels:
- app: cockroachdb
-spec:
- selector:
- matchLabels:
- app: cockroachdb
- maxUnavailable: 1
----
-apiVersion: apps/v1
-kind: DaemonSet
-metadata:
- name: cockroachdb
- labels:
- app: cockroachdb
-spec:
- selector:
- matchLabels:
- app: cockroachdb
- template:
- metadata:
- labels:
- app: cockroachdb
- spec:
- serviceAccountName: cockroachdb
- # TODO: Remove the nodeSelector section if you want CockroachDB to run on all nodes in your cluster.
- # To give nodes this label, run:
- # kubectl label node <node-name> app=cockroachdb
- nodeSelector:
- app: cockroachdb
- # Tolerations allow CockroachDB to run on Kubernetes nodes that other pods won't be allowed on.
- # To set up nodes to be dedicated to CockroachDB, you must "taint" them by running:
- # kubectl taint node <node-name> app=cockroachdb:NoSchedule
- # If you don't set up any such taints, these tolerations will have no effect.
- tolerations:
- - key: "app"
- operator: "Equal"
- value: "cockroachdb"
- effect: "NoSchedule"
- # NOTE: Running with `hostNetwork: true` means that CockroachDB will use
- # the host machines' IP address and hostname, and that nothing else on
- # the machines will be able to use the same ports.
- hostNetwork: true
- # Init containers are run only once in the lifetime of a pod, before
- # it's started up for the first time. It has to exit successfully
- # before the pod's main containers are allowed to start.
- initContainers:
- # The init-certs container sends a certificate signing request to the
- # kubernetes cluster.
- # You can see pending requests using: kubectl get csr
- # CSRs can be approved using: kubectl certificate approve <csr name>
- #
- # All addresses used to contact a node must be specified in the --addresses arg.
- #
- # In addition to the node certificate and key, the init-certs entrypoint will symlink
- # the cluster CA to the certs directory.
- - name: init-certs
- image: cockroachdb/cockroach-k8s-request-cert:0.4
- imagePullPolicy: IfNotPresent
- command:
- - "/bin/ash"
- - "-ecx"
- - "/request-cert -namespace=${POD_NAMESPACE} -certs-dir=/cockroach-certs -type=node -addresses=localhost,127.0.0.1,$(hostname),$(hostname -f),$(hostname -i),cockroachdb-public,cockroachdb-public.${POD_NAMESPACE}.svc.cluster.local,cockroachdb-public.${POD_NAMESPACE}.svc,cockroachdb-public.${POD_NAMESPACE} -symlink-ca-from=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
- env:
- - name: POD_NAMESPACE
- valueFrom:
- fieldRef:
- fieldPath: metadata.namespace
- volumeMounts:
- - name: certs
- mountPath: /cockroach-certs
- # NOTE: If you are running clients that generate heavy load, you may find
- # it useful to copy this anti-affinity policy into the client pods'
- # configurations as well to avoid running them on the same machines as
- # CockroachDB and interfering with each other's performance.
- affinity:
- podAntiAffinity:
- preferredDuringSchedulingIgnoredDuringExecution:
- - weight: 100
- podAffinityTerm:
- labelSelector:
- matchExpressions:
- - key: app
- operator: In
- values:
- - cockroachdb
- topologyKey: kubernetes.io/hostname
- containers:
- - name: cockroachdb
- image: cockroachdb/cockroach:@VERSION@
- imagePullPolicy: IfNotPresent
- # TODO: If you configured taints to give CockroachDB exclusive access to nodes, feel free
- # to remove the requests and limits sections. If you didn't, you'll need to change these to
- # appropriate values for the hardware that you're running. You can see the amount of
- # allocatable resources on each of your Kubernetes nodes by running:
- # kubectl describe nodes
- resources:
- requests:
- cpu: "16"
- memory: "8Gi"
- limits:
- # NOTE: Unless you have enabled the non-default Static CPU Management Policy
- # and are using an integer number of CPUs, we don't recommend setting a CPU limit.
- # See:
- # https://kubernetes.io/docs/tasks/administer-cluster/cpu-management-policies/#static-policy
- # https://github.com/kubernetes/kubernetes/issues/51135
- #cpu: "16"
- memory: "8Gi"
- ports:
- - containerPort: 26257
- hostPort: 26257
- name: grpc
- - containerPort: 8080
- hostPort: 8080
- name: http
-# We recommend that you do not configure a liveness probe on a production environment, as this can impact the availability of production databases.
-# livenessProbe:
-# httpGet:
-# path: "/health"
-# port: http
-# scheme: HTTPS
-# initialDelaySeconds: 30
-# periodSeconds: 5
- readinessProbe:
- httpGet:
- path: "/health?ready=1"
- port: http
- scheme: HTTPS
- initialDelaySeconds: 10
- periodSeconds: 5
- failureThreshold: 2
- volumeMounts:
- - name: datadir
- mountPath: /cockroach/cockroach-data
- - name: certs
- mountPath: /cockroach/cockroach-certs
- env:
- - name: COCKROACH_CHANNEL
- value: kubernetes-secure
- command:
- - "/bin/bash"
- - "-ecx"
- # TODO: Replace "YOUR_IP_ADDR1_HERE,YOUR_IP_ADDR2_HERE,YOUR_IP_ADDR3_HERE" with a list of a few of the IP addresses or hostnames of the machines on which CockroachDB will be running.
- - "exec /cockroach/cockroach start --logtostderr --certs-dir /cockroach/cockroach-certs --http-addr 0.0.0.0 --cache 25% --max-sql-memory 25% --join=YOUR_IP_ADDR1_HERE,YOUR_IP_ADDR2_HERE,YOUR_IP_ADDR3_HERE"
- terminationGracePeriodSeconds: 60
- volumes:
- - name: datadir
- hostPath:
- # TODO: Replace "YOUR_FILESYSTEM_PATH_HERE" with the path where you want CockroachDB's data stored on your Kubernetes nodes.
- path: YOUR_FILESYSTEM_PATH_HERE
- - name: certs
- emptyDir: {}
diff --git a/cloud/kubernetes/templates/performance/cockroachdb-statefulset-insecure.yaml b/cloud/kubernetes/templates/performance/cockroachdb-statefulset-insecure.yaml
deleted file mode 100644
index f3cb1017cf..0000000000
--- a/cloud/kubernetes/templates/performance/cockroachdb-statefulset-insecure.yaml
+++ /dev/null
@@ -1,215 +0,0 @@
-# This configuration file sets up an insecure StatefulSet running CockroachDB with
-# tweaks to make it more performant than our default configuration files. All
-# changes from the default insecure configuration have been marked with a comment
-# starting with "NOTE" or "TODO".
-#
-# Beware that this configuration is quite insecure. By default, it will make
-# CockroachDB accessible on port 26257 on your Kubernetes nodes' network
-# interfaces, meaning that if your nodes are reachable from the Internet, then
-# this CockroachDB cluster will be too. To disable this behavior, remove the
-# `hostNetwork` configuration field below.
-#
-# To use this file, customize all the parts labeled "TODO" before running:
-# kubectl create -f cockroachdb-statefulset-insecure.yaml
-#
-# You will then have to initialize the cluster as described in the parent
-# directory's README.md file.
-#
-# If you don't see any pods being created, it's possible that your cluster was
-# not able to meet the resource requests asked for, whether it was the amount
-# of CPU, memory, or disk or the disk type. To find information about why pods
-# haven't been created, you can run:
-# kubectl get events
-#
-# For more information on improving CockroachDB performance in Kubernetes, see
-# our docs:
-# https://www.cockroachlabs.com/docs/stable/kubernetes-performance.html
-apiVersion: v1
-kind: Service
-metadata:
- # This service is meant to be used by clients of the database. It exposes a ClusterIP that will
- # automatically load balance connections to the different database pods.
- name: cockroachdb-public
- labels:
- app: cockroachdb
-spec:
- ports:
- # The main port, served by gRPC, serves Postgres-flavor SQL, internode
- # traffic and the cli.
- - port: 26257
- targetPort: 26257
- name: grpc
- # The secondary port serves the UI as well as health and debug endpoints.
- - port: 8080
- targetPort: 8080
- name: http
- selector:
- app: cockroachdb
----
-apiVersion: v1
-kind: Service
-metadata:
- # This service only exists to create DNS entries for each pod in the stateful
- # set such that they can resolve each other's IP addresses. It does not
- # create a load-balanced ClusterIP and should not be used directly by clients
- # in most circumstances.
- name: cockroachdb
- labels:
- app: cockroachdb
- annotations:
- # Use this annotation in addition to the actual publishNotReadyAddresses
- # field below because the annotation will stop being respected soon but the
- # field is broken in some versions of Kubernetes:
- # https://github.com/kubernetes/kubernetes/issues/58662
- service.alpha.kubernetes.io/tolerate-unready-endpoints: "true"
- # Enable automatic monitoring of all instances when Prometheus is running in the cluster.
- prometheus.io/scrape: "true"
- prometheus.io/path: "_status/vars"
- prometheus.io/port: "8080"
-spec:
- ports:
- - port: 26257
- targetPort: 26257
- name: grpc
- - port: 8080
- targetPort: 8080
- name: http
- # We want all pods in the StatefulSet to have their addresses published for
- # the sake of the other CockroachDB pods even before they're ready, since they
- # have to be able to talk to each other in order to become ready.
- publishNotReadyAddresses: true
- clusterIP: None
- selector:
- app: cockroachdb
----
-apiVersion: policy/v1beta1
-kind: PodDisruptionBudget
-metadata:
- name: cockroachdb-budget
- labels:
- app: cockroachdb
-spec:
- selector:
- matchLabels:
- app: cockroachdb
- maxUnavailable: 1
----
-apiVersion: apps/v1
-kind: StatefulSet
-metadata:
- name: cockroachdb
-spec:
- serviceName: "cockroachdb"
- replicas: 3
- selector:
- matchLabels:
- app: cockroachdb
- template:
- metadata:
- labels:
- app: cockroachdb
- spec:
- # NOTE: Running with `hostNetwork: true` means that CockroachDB will use
- # the host machines' IP address and hostname, and that nothing else on
- # the machines will be able to use the same ports. This means that only 1
- # CockroachDB pod will ever be schedulable on the same machine, because
- # otherwise their ports would conflict.
- #
- # If your client pods generate a lot of network traffic to and from the
- # CockroachDB cluster, you may see a benefit to doing the same thing in
- # their configurations.
- hostNetwork: true
- dnsPolicy: ClusterFirstWithHostNet
- # NOTE: If you are running clients that generate heavy load, you may find
- # it useful to copy this anti-affinity policy into the client pods'
- # configurations as well to avoid running them on the same machines as
- # CockroachDB and interfering with each other's performance.
- affinity:
- podAntiAffinity:
- preferredDuringSchedulingIgnoredDuringExecution:
- - weight: 100
- podAffinityTerm:
- labelSelector:
- matchExpressions:
- - key: app
- operator: In
- values:
- - cockroachdb
- topologyKey: kubernetes.io/hostname
- containers:
- - name: cockroachdb
- # NOTE: Always use the most recent version of CockroachDB for the best
- # performance and reliability.
- image: cockroachdb/cockroach:@VERSION@
- imagePullPolicy: IfNotPresent
- # TODO: Change these to appropriate values for the hardware that you're running. You can see
- # the resources that can be allocated on each of your Kubernetes nodes by running:
- # kubectl describe nodes
- # Note that requests and limits should have identical values.
- resources:
- requests:
- cpu: "2"
- memory: "8Gi"
- limits:
- cpu: "2"
- memory: "8Gi"
- ports:
- - containerPort: 26257
- name: grpc
- - containerPort: 8080
- name: http
-# We recommend that you do not configure a liveness probe on a production environment, as this can impact the availability of production databases.
-# livenessProbe:
-# httpGet:
-# path: "/health"
-# port: http
-# initialDelaySeconds: 30
-# periodSeconds: 5
- readinessProbe:
- httpGet:
- path: "/health?ready=1"
- port: http
- initialDelaySeconds: 10
- periodSeconds: 5
- failureThreshold: 2
- volumeMounts:
- - name: datadir
- mountPath: /cockroach/cockroach-data
- env:
- - name: COCKROACH_CHANNEL
- value: kubernetes-insecure
- command:
- - "/bin/bash"
- - "-ecx"
- # The use of qualified `hostname -f` is crucial:
- # Other nodes aren't able to look up the unqualified hostname.
- - "exec /cockroach/cockroach start --logtostderr --insecure --advertise-host $(hostname -f) --http-addr 0.0.0.0 --join cockroachdb-0.cockroachdb,cockroachdb-1.cockroachdb,cockroachdb-2.cockroachdb --cache 25% --max-sql-memory 25%"
- # No pre-stop hook is required, a SIGTERM plus some time is all that's
- # needed for graceful shutdown of a node.
- terminationGracePeriodSeconds: 60
- volumes:
- - name: datadir
- persistentVolumeClaim:
- claimName: datadir
- podManagementPolicy: Parallel
- updateStrategy:
- type: RollingUpdate
- volumeClaimTemplates:
- - metadata:
- name: datadir
- spec:
- accessModes:
- - "ReadWriteOnce"
- # TODO: This specifically asks for a storage class with the name "ssd". A
- # storage class of this name doesn't exist by default. See our docs for
- # more information on how to create an optimized storage class for use here:
- # https://www.cockroachlabs.com/docs/stable/kubernetes-performance.html#disk-type
- storageClassName: ssd
- resources:
- requests:
- # TODO: This asks for a fairly large disk by default because on
- # certain popular clouds there is a direct correlation between disk
- # size and the IOPS provisioned to the disk. Change this as necessary
- # to suit your needs, but be aware that smaller disks will typically
- # mean worse performance.
- storage: 1024Gi
diff --git a/cloud/kubernetes/templates/performance/cockroachdb-statefulset-secure.yaml b/cloud/kubernetes/templates/performance/cockroachdb-statefulset-secure.yaml
deleted file mode 100644
index fd9c215dcd..0000000000
--- a/cloud/kubernetes/templates/performance/cockroachdb-statefulset-secure.yaml
+++ /dev/null
@@ -1,312 +0,0 @@
-# This configuration file sets up a secure StatefulSet running CockroachDB with
-# tweaks to make it more performant than our default configuration files. All
-# changes from the default secure configuration have been marked with a comment
-# starting with "NOTE" or "TODO".
-#
-# To use it, customize all the parts of the file labeled "TODO" before running:
-# kubectl create -f cockroachdb-statefulset-secure.yaml
-#
-# You will then have to approve certificate-signing requests and initialize the
-# cluster as described in the parent directory's README.md file.
-#
-# If you don't see any pods being created, it's possible that your cluster was
-# not able to meet the resource requests asked for, whether it was the amount
-# of CPU, memory, or disk or the disk type. To find information about why pods
-# haven't been created, you can run:
-# kubectl get events
-#
-# For more information on improving CockroachDB performance in Kubernetes, see
-# our docs:
-# https://www.cockroachlabs.com/docs/stable/kubernetes-performance.html
-apiVersion: v1
-kind: ServiceAccount
-metadata:
- name: cockroachdb
- labels:
- app: cockroachdb
----
-apiVersion: rbac.authorization.k8s.io/v1
-kind: Role
-metadata:
- name: cockroachdb
- labels:
- app: cockroachdb
-rules:
-- apiGroups:
- - ""
- resources:
- - secrets
- verbs:
- - create
- - get
----
-apiVersion: rbac.authorization.k8s.io/v1
-kind: ClusterRole
-metadata:
- name: cockroachdb
- labels:
- app: cockroachdb
-rules:
-- apiGroups:
- - certificates.k8s.io
- resources:
- - certificatesigningrequests
- verbs:
- - create
- - get
- - watch
----
-apiVersion: rbac.authorization.k8s.io/v1
-kind: RoleBinding
-metadata:
- name: cockroachdb
- labels:
- app: cockroachdb
-roleRef:
- apiGroup: rbac.authorization.k8s.io
- kind: Role
- name: cockroachdb
-subjects:
-- kind: ServiceAccount
- name: cockroachdb
- namespace: default
----
-apiVersion: rbac.authorization.k8s.io/v1
-kind: ClusterRoleBinding
-metadata:
- name: cockroachdb
- labels:
- app: cockroachdb
-roleRef:
- apiGroup: rbac.authorization.k8s.io
- kind: ClusterRole
- name: cockroachdb
-subjects:
-- kind: ServiceAccount
- name: cockroachdb
- namespace: default
----
-apiVersion: v1
-kind: Service
-metadata:
- # This service is meant to be used by clients of the database. It exposes a ClusterIP that will
- # automatically load balance connections to the different database pods.
- name: cockroachdb-public
- labels:
- app: cockroachdb
-spec:
- ports:
- # The main port, served by gRPC, serves Postgres-flavor SQL, internode
- # traffic and the cli.
- - port: 26257
- targetPort: 26257
- name: grpc
- # The secondary port serves the UI as well as health and debug endpoints.
- - port: 8080
- targetPort: 8080
- name: http
- selector:
- app: cockroachdb
----
-apiVersion: v1
-kind: Service
-metadata:
- # This service only exists to create DNS entries for each pod in the stateful
- # set such that they can resolve each other's IP addresses. It does not
- # create a load-balanced ClusterIP and should not be used directly by clients
- # in most circumstances.
- name: cockroachdb
- labels:
- app: cockroachdb
- annotations:
- # Use this annotation in addition to the actual publishNotReadyAddresses
- # field below because the annotation will stop being respected soon but the
- # field is broken in some versions of Kubernetes:
- # https://github.com/kubernetes/kubernetes/issues/58662
- service.alpha.kubernetes.io/tolerate-unready-endpoints: "true"
- # Enable automatic monitoring of all instances when Prometheus is running in the cluster.
- prometheus.io/scrape: "true"
- prometheus.io/path: "_status/vars"
- prometheus.io/port: "8080"
-spec:
- ports:
- - port: 26257
- targetPort: 26257
- name: grpc
- - port: 8080
- targetPort: 8080
- name: http
- # We want all pods in the StatefulSet to have their addresses published for
- # the sake of the other CockroachDB pods even before they're ready, since they
- # have to be able to talk to each other in order to become ready.
- publishNotReadyAddresses: true
- clusterIP: None
- selector:
- app: cockroachdb
----
-apiVersion: policy/v1beta1
-kind: PodDisruptionBudget
-metadata:
- name: cockroachdb-budget
- labels:
- app: cockroachdb
-spec:
- selector:
- matchLabels:
- app: cockroachdb
- maxUnavailable: 1
----
-apiVersion: apps/v1
-kind: StatefulSet
-metadata:
- name: cockroachdb
-spec:
- serviceName: "cockroachdb"
- replicas: 3
- selector:
- matchLabels:
- app: cockroachdb
- template:
- metadata:
- labels:
- app: cockroachdb
- spec:
- serviceAccountName: cockroachdb
- # NOTE: Running with `hostNetwork: true` means that CockroachDB will use
- # the host machines' IP address and hostname, and that nothing else on
- # the machines will be able to use the same ports. This means that only 1
- # CockroachDB pod will ever be schedulable on the same machine, because
- # otherwise their ports would conflict.
- #
- # If your client pods generate a lot of network traffic to and from the
- # CockroachDB cluster, you may see a benefit to doing the same thing in
- # their configurations.
- hostNetwork: true
- dnsPolicy: ClusterFirstWithHostNet
- # Init containers are run only once in the lifetime of a pod, before
- # it's started up for the first time. It has to exit successfully
- # before the pod's main containers are allowed to start.
- initContainers:
- # The init-certs container sends a certificate signing request to the
- # kubernetes cluster.
- # You can see pending requests using: kubectl get csr
- # CSRs can be approved using: kubectl certificate approve <csr name>
- #
- # All addresses used to contact a node must be specified in the --addresses arg.
- #
- # In addition to the node certificate and key, the init-certs entrypoint will symlink
- # the cluster CA to the certs directory.
- - name: init-certs
- image: cockroachdb/cockroach-k8s-request-cert:0.4
- imagePullPolicy: IfNotPresent
- command:
- - "/bin/ash"
- - "-ecx"
- - "/request-cert -namespace=${POD_NAMESPACE} -certs-dir=/cockroach-certs -type=node -addresses=localhost,127.0.0.1,$(hostname -f),$(hostname -f|cut -f 1-2 -d '.'),cockroachdb-public,cockroachdb-public.$(hostname -f|cut -f 3- -d '.'),cockroachdb-public.$(hostname -f|cut -f 3-4 -d '.'),cockroachdb-public.$(hostname -f|cut -f 3 -d '.') -symlink-ca-from=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
- env:
- - name: POD_NAMESPACE
- valueFrom:
- fieldRef:
- fieldPath: metadata.namespace
- volumeMounts:
- - name: certs
- mountPath: /cockroach-certs
- # NOTE: If you are running clients that generate heavy load, you may find
- # it useful to copy this anti-affinity policy into the client pods'
- # configurations as well to avoid running them on the same machines as
- # CockroachDB and interfering with each other's performance.
- affinity:
- podAntiAffinity:
- preferredDuringSchedulingIgnoredDuringExecution:
- - weight: 100
- podAffinityTerm:
- labelSelector:
- matchExpressions:
- - key: app
- operator: In
- values:
- - cockroachdb
- topologyKey: kubernetes.io/hostname
- containers:
- - name: cockroachdb
- # NOTE: Always use the most recent version of CockroachDB for the best
- # performance and reliability.
- image: cockroachdb/cockroach:@VERSION@
- imagePullPolicy: IfNotPresent
- # TODO: Change these to appropriate values for the hardware that you're running. You can see
- # the resources that can be allocated on each of your Kubernetes nodes by running:
- # kubectl describe nodes
- # Note that requests and limits should have identical values.
- resources:
- requests:
- cpu: "2"
- memory: "8Gi"
- limits:
- cpu: "2"
- memory: "8Gi"
- ports:
- - containerPort: 26257
- name: grpc
- - containerPort: 8080
- name: http
-# We recommend that you do not configure a liveness probe on a production environment, as this can impact the availability of production databases.
-# livenessProbe:
-# httpGet:
-# path: "/health"
-# port: http
-# scheme: HTTPS
-# initialDelaySeconds: 30
-# periodSeconds: 5
- readinessProbe:
- httpGet:
- path: "/health?ready=1"
- port: http
- scheme: HTTPS
- initialDelaySeconds: 10
- periodSeconds: 5
- failureThreshold: 2
- volumeMounts:
- - name: datadir
- mountPath: /cockroach/cockroach-data
- - name: certs
- mountPath: /cockroach/cockroach-certs
- env:
- - name: COCKROACH_CHANNEL
- value: kubernetes-secure
- command:
- - "/bin/bash"
- - "-ecx"
- # The use of qualified `hostname -f` is crucial:
- # Other nodes aren't able to look up the unqualified hostname.
- - "exec /cockroach/cockroach start --logtostderr --certs-dir /cockroach/cockroach-certs --advertise-host $(hostname -f) --http-addr 0.0.0.0 --join cockroachdb-0.cockroachdb,cockroachdb-1.cockroachdb,cockroachdb-2.cockroachdb --cache 25% --max-sql-memory 25%"
- # No pre-stop hook is required, a SIGTERM plus some time is all that's
- # needed for graceful shutdown of a node.
- terminationGracePeriodSeconds: 60
- volumes:
- - name: datadir
- persistentVolumeClaim:
- claimName: datadir
- - name: certs
- emptyDir: {}
- podManagementPolicy: Parallel
- updateStrategy:
- type: RollingUpdate
- volumeClaimTemplates:
- - metadata:
- name: datadir
- spec:
- accessModes:
- - "ReadWriteOnce"
- # TODO: This specifically asks for a storage class with the name "ssd". A
- # storage class of this name doesn't exist by default. See our docs for
- # more information on how to create an optimized storage class for use here:
- # https://www.cockroachlabs.com/docs/stable/kubernetes-performance.html#disk-type
- storageClassName: ssd
- resources:
- requests:
- # TODO: This asks for a fairly large disk by default because on
- # certain popular clouds there is a direct correlation between disk
- # size and the IOPS provisioned to the disk. Change this as necessary
- # to suit your needs, but be aware that smaller disks will typically
- # mean worse performance.
- storage: 1024Gi
diff --git a/cloud/kubernetes/templates/v1.6/client-secure.yaml b/cloud/kubernetes/templates/v1.6/client-secure.yaml
deleted file mode 100644
index 0df772c510..0000000000
--- a/cloud/kubernetes/templates/v1.6/client-secure.yaml
+++ /dev/null
@@ -1,48 +0,0 @@
-apiVersion: v1
-kind: Pod
-metadata:
- name: cockroachdb-client-secure
- labels:
- app: cockroachdb-client
-spec:
- serviceAccountName: cockroachdb
- initContainers:
- # The init-certs container sends a certificate signing request to the
- # kubernetes cluster.
- # You can see pending requests using: kubectl get csr
- # CSRs can be approved using: kubectl certificate approve <csr name>
- #
- # In addition to the client certificate and key, the init-certs entrypoint will symlink
- # the cluster CA to the certs directory.
- - name: init-certs
- image: cockroachdb/cockroach-k8s-request-cert:0.4
- imagePullPolicy: IfNotPresent
- command:
- - "/bin/ash"
- - "-ecx"
- - "/request-cert -namespace=${POD_NAMESPACE} -certs-dir=/cockroach-certs -type=client -user=root -symlink-ca-from=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
- env:
- - name: POD_NAMESPACE
- valueFrom:
- fieldRef:
- fieldPath: metadata.namespace
- volumeMounts:
- - name: client-certs
- mountPath: /cockroach-certs
- containers:
- - name: cockroachdb-client
- image: cockroachdb/cockroach:@VERSION@
- imagePullPolicy: IfNotPresent
- volumeMounts:
- - name: client-certs
- mountPath: /cockroach-certs
- # Keep a pod open indefinitely so kubectl exec can be used to get a shell to it
- # and run cockroach client commands, such as cockroach sql, cockroach node status, etc.
- command:
- - sleep
- - "2147483648" # 2^31
- # This pod isn't doing anything important, so don't bother waiting to terminate it.
- terminationGracePeriodSeconds: 0
- volumes:
- - name: client-certs
- emptyDir: {}
diff --git a/cloud/kubernetes/templates/v1.6/cluster-init-secure.yaml b/cloud/kubernetes/templates/v1.6/cluster-init-secure.yaml
deleted file mode 100644
index 830f2928a9..0000000000
--- a/cloud/kubernetes/templates/v1.6/cluster-init-secure.yaml
+++ /dev/null
@@ -1,49 +0,0 @@
-apiVersion: batch/v1
-kind: Job
-metadata:
- name: cluster-init-secure
- labels:
- app: cockroachdb
-spec:
- template:
- spec:
- serviceAccountName: cockroachdb
- initContainers:
- # The init-certs container sends a certificate signing request to the
- # kubernetes cluster.
- # You can see pending requests using: kubectl get csr
- # CSRs can be approved using: kubectl certificate approve <csr name>
- #
- # In addition to the client certificate and key, the init-certs entrypoint will symlink
- # the cluster CA to the certs directory.
- - name: init-certs
- image: cockroachdb/cockroach-k8s-request-cert:0.4
- imagePullPolicy: IfNotPresent
- command:
- - "/bin/ash"
- - "-ecx"
- - "/request-cert -namespace=${POD_NAMESPACE} -certs-dir=/cockroach-certs -type=client -user=root -symlink-ca-from=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
- env:
- - name: POD_NAMESPACE
- valueFrom:
- fieldRef:
- fieldPath: metadata.namespace
- volumeMounts:
- - name: client-certs
- mountPath: /cockroach-certs
- containers:
- - name: cluster-init
- image: cockroachdb/cockroach:@VERSION@
- imagePullPolicy: IfNotPresent
- volumeMounts:
- - name: client-certs
- mountPath: /cockroach-certs
- command:
- - "/cockroach/cockroach"
- - "init"
- - "--certs-dir=/cockroach-certs"
- - "--host=cockroachdb-0.cockroachdb"
- restartPolicy: OnFailure
- volumes:
- - name: client-certs
- emptyDir: {}
diff --git a/cloud/kubernetes/templates/v1.6/cluster-init.yaml b/cloud/kubernetes/templates/v1.6/cluster-init.yaml
deleted file mode 100644
index 07a43d8456..0000000000
--- a/cloud/kubernetes/templates/v1.6/cluster-init.yaml
+++ /dev/null
@@ -1,19 +0,0 @@
-apiVersion: batch/v1
-kind: Job
-metadata:
- name: cluster-init
- labels:
- app: cockroachdb
-spec:
- template:
- spec:
- containers:
- - name: cluster-init
- image: cockroachdb/cockroach:@VERSION@
- imagePullPolicy: IfNotPresent
- command:
- - "/cockroach/cockroach"
- - "init"
- - "--insecure"
- - "--host=cockroachdb-0.cockroachdb"
- restartPolicy: OnFailure
diff --git a/cloud/kubernetes/templates/v1.6/cockroachdb-statefulset-secure.yaml b/cloud/kubernetes/templates/v1.6/cockroachdb-statefulset-secure.yaml
deleted file mode 100644
index 178b870cb9..0000000000
--- a/cloud/kubernetes/templates/v1.6/cockroachdb-statefulset-secure.yaml
+++ /dev/null
@@ -1,221 +0,0 @@
-apiVersion: v1
-kind: ServiceAccount
-metadata:
- name: cockroachdb
- labels:
- app: cockroachdb
----
-apiVersion: rbac.authorization.k8s.io/v1beta1
-kind: Role
-metadata:
- name: cockroachdb
- labels:
- app: cockroachdb
-rules:
-- apiGroups:
- - ""
- resources:
- - secrets
- verbs:
- - create
- - get
----
-apiVersion: rbac.authorization.k8s.io/v1beta1
-kind: ClusterRole
-metadata:
- name: cockroachdb
- labels:
- app: cockroachdb
-rules:
-- apiGroups:
- - certificates.k8s.io
- resources:
- - certificatesigningrequests
- verbs:
- - create
- - get
- - watch
----
-apiVersion: rbac.authorization.k8s.io/v1beta1
-kind: RoleBinding
-metadata:
- name: cockroachdb
- labels:
- app: cockroachdb
-roleRef:
- apiGroup: rbac.authorization.k8s.io
- kind: Role
- name: cockroachdb
-subjects:
-- kind: ServiceAccount
- name: cockroachdb
- namespace: default
----
-apiVersion: rbac.authorization.k8s.io/v1beta1
-kind: ClusterRoleBinding
-metadata:
- name: cockroachdb
- labels:
- app: cockroachdb
-roleRef:
- apiGroup: rbac.authorization.k8s.io
- kind: ClusterRole
- name: cockroachdb
-subjects:
-- kind: ServiceAccount
- name: cockroachdb
- namespace: default
----
-apiVersion: v1
-kind: Service
-metadata:
- # This service is meant to be used by clients of the database. It exposes a ClusterIP that will
- # automatically load balance connections to the different database pods.
- name: cockroachdb-public
- labels:
- app: cockroachdb
-spec:
- ports:
- # The main port, served by gRPC, serves Postgres-flavor SQL, internode
- # traffic and the cli.
- - port: 26257
- targetPort: 26257
- name: grpc
- # The secondary port serves the UI as well as health and debug endpoints.
- - port: 8080
- targetPort: 8080
- name: http
- selector:
- app: cockroachdb
----
-apiVersion: v1
-kind: Service
-metadata:
- # This service only exists to create DNS entries for each pod in the stateful
- # set such that they can resolve each other's IP addresses. It does not
- # create a load-balanced ClusterIP and should not be used directly by clients
- # in most circumstances.
- name: cockroachdb
- labels:
- app: cockroachdb
- annotations:
- # This is needed to make the peer-finder work properly and to help avoid
- # edge cases where instance 0 comes up after losing its data and needs to
- # decide whether it should create a new cluster or try to join an existing
- # one. If it creates a new cluster when it should have joined an existing
- # one, we'd end up with two separate clusters listening at the same service
- # endpoint, which would be very bad.
- service.alpha.kubernetes.io/tolerate-unready-endpoints: "true"
- # Enable automatic monitoring of all instances when Prometheus is running in the cluster.
- prometheus.io/scrape: "true"
- prometheus.io/path: "_status/vars"
- prometheus.io/port: "8080"
-spec:
- ports:
- - port: 26257
- targetPort: 26257
- name: grpc
- - port: 8080
- targetPort: 8080
- name: http
- clusterIP: None
- selector:
- app: cockroachdb
----
-apiVersion: apps/v1beta1
-kind: StatefulSet
-metadata:
- name: cockroachdb
-spec:
- serviceName: "cockroachdb"
- replicas: 3
- template:
- metadata:
- labels:
- app: cockroachdb
- spec:
- serviceAccountName: cockroachdb
- # Init containers are run only once in the lifetime of a pod, before
- # it's started up for the first time. It has to exit successfully
- # before the pod's main containers are allowed to start.
- initContainers:
- # The init-certs container sends a certificate signing request to the
- # kubernetes cluster.
- # You can see pending requests using: kubectl get csr
- # CSRs can be approved using: kubectl certificate approve <csr name>
- #
- # All addresses used to contact a node must be specified in the --addresses arg.
- #
- # In addition to the node certificate and key, the init-certs entrypoint will symlink
- # the cluster CA to the certs directory.
- - name: init-certs
- image: cockroachdb/cockroach-k8s-request-cert:0.4
- imagePullPolicy: IfNotPresent
- command:
- - "/bin/ash"
- - "-ecx"
- - "/request-cert -namespace=${POD_NAMESPACE} -certs-dir=/cockroach-certs -type=node -addresses=localhost,127.0.0.1,$(hostname -f),$(hostname -f|cut -f 1-2 -d '.'),cockroachdb-public,cockroachdb-public.$(hostname -f|cut -f 3- -d '.'),cockroachdb-public.$(hostname -f|cut -f 3-4 -d '.'),cockroachdb-public.$(hostname -f|cut -f 3 -d '.') -symlink-ca-from=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
- env:
- - name: POD_NAMESPACE
- valueFrom:
- fieldRef:
- fieldPath: metadata.namespace
- volumeMounts:
- - name: certs
- mountPath: /cockroach-certs
- affinity:
- podAntiAffinity:
- preferredDuringSchedulingIgnoredDuringExecution:
- - weight: 100
- podAffinityTerm:
- labelSelector:
- matchExpressions:
- - key: app
- operator: In
- values:
- - cockroachdb
- topologyKey: kubernetes.io/hostname
- containers:
- - name: cockroachdb
- image: cockroachdb/cockroach:@VERSION@
- imagePullPolicy: IfNotPresent
- ports:
- - containerPort: 26257
- name: grpc
- - containerPort: 8080
- name: http
- volumeMounts:
- - name: datadir
- mountPath: /cockroach/cockroach-data
- - name: certs
- mountPath: /cockroach/cockroach-certs
- env:
- - name: COCKROACH_CHANNEL
- value: kubernetes-secure
- command:
- - "/bin/bash"
- - "-ecx"
- # The use of qualified `hostname -f` is crucial:
- # Other nodes aren't able to look up the unqualified hostname.
- # Once 2.0 is out, we should be able to switch from --host to --advertise-host to make port-forwarding work to the main port.
- - "exec /cockroach/cockroach start --logtostderr --certs-dir /cockroach/cockroach-certs --host $(hostname -f) --http-addr 0.0.0.0 --join cockroachdb-0.cockroachdb,cockroachdb-1.cockroachdb,cockroachdb-2.cockroachdb --cache 25% --max-sql-memory 25%"
- # No pre-stop hook is required, a SIGTERM plus some time is all that's
- # needed for graceful shutdown of a node.
- terminationGracePeriodSeconds: 60
- volumes:
- - name: datadir
- persistentVolumeClaim:
- claimName: datadir
- - name: certs
- emptyDir: {}
- updateStrategy:
- type: RollingUpdate
- volumeClaimTemplates:
- - metadata:
- name: datadir
- spec:
- accessModes:
- - "ReadWriteOnce"
- resources:
- requests:
- storage: 100Gi
diff --git a/cloud/kubernetes/templates/v1.6/cockroachdb-statefulset.yaml b/cloud/kubernetes/templates/v1.6/cockroachdb-statefulset.yaml
deleted file mode 100644
index 79379f4d37..0000000000
--- a/cloud/kubernetes/templates/v1.6/cockroachdb-statefulset.yaml
+++ /dev/null
@@ -1,119 +0,0 @@
-apiVersion: v1
-kind: Service
-metadata:
- # This service is meant to be used by clients of the database. It exposes a ClusterIP that will
- # automatically load balance connections to the different database pods.
- name: cockroachdb-public
- labels:
- app: cockroachdb
-spec:
- ports:
- # The main port, served by gRPC, serves Postgres-flavor SQL, internode
- # traffic and the cli.
- - port: 26257
- targetPort: 26257
- name: grpc
- # The secondary port serves the UI as well as health and debug endpoints.
- - port: 8080
- targetPort: 8080
- name: http
- selector:
- app: cockroachdb
----
-apiVersion: v1
-kind: Service
-metadata:
- # This service only exists to create DNS entries for each pod in the stateful
- # set such that they can resolve each other's IP addresses. It does not
- # create a load-balanced ClusterIP and should not be used directly by clients
- # in most circumstances.
- name: cockroachdb
- labels:
- app: cockroachdb
- annotations:
- # This is needed to make the peer-finder work properly and to help avoid
- # edge cases where instance 0 comes up after losing its data and needs to
- # decide whether it should create a new cluster or try to join an existing
- # one. If it creates a new cluster when it should have joined an existing
- # one, we'd end up with two separate clusters listening at the same service
- # endpoint, which would be very bad.
- service.alpha.kubernetes.io/tolerate-unready-endpoints: "true"
- # Enable automatic monitoring of all instances when Prometheus is running in the cluster.
- prometheus.io/scrape: "true"
- prometheus.io/path: "_status/vars"
- prometheus.io/port: "8080"
-spec:
- ports:
- - port: 26257
- targetPort: 26257
- name: grpc
- - port: 8080
- targetPort: 8080
- name: http
- clusterIP: None
- selector:
- app: cockroachdb
----
-apiVersion: apps/v1beta1
-kind: StatefulSet
-metadata:
- name: cockroachdb
-spec:
- serviceName: "cockroachdb"
- replicas: 3
- template:
- metadata:
- labels:
- app: cockroachdb
- spec:
- affinity:
- podAntiAffinity:
- preferredDuringSchedulingIgnoredDuringExecution:
- - weight: 100
- podAffinityTerm:
- labelSelector:
- matchExpressions:
- - key: app
- operator: In
- values:
- - cockroachdb
- topologyKey: kubernetes.io/hostname
- containers:
- - name: cockroachdb
- image: cockroachdb/cockroach:@VERSION@
- imagePullPolicy: IfNotPresent
- ports:
- - containerPort: 26257
- name: grpc
- - containerPort: 8080
- name: http
- volumeMounts:
- - name: datadir
- mountPath: /cockroach/cockroach-data
- env:
- - name: COCKROACH_CHANNEL
- value: kubernetes-insecure
- command:
- - "/bin/bash"
- - "-ecx"
- # The use of qualified `hostname -f` is crucial:
- # Other nodes aren't able to look up the unqualified hostname.
- - "exec /cockroach/cockroach start --logtostderr --insecure --advertise-host $(hostname -f) --http-addr 0.0.0.0 --join cockroachdb-0.cockroachdb,cockroachdb-1.cockroachdb,cockroachdb-2.cockroachdb --cache 25% --max-sql-memory 25%"
- # No pre-stop hook is required, a SIGTERM plus some time is all that's
- # needed for graceful shutdown of a node.
- terminationGracePeriodSeconds: 60
- volumes:
- - name: datadir
- persistentVolumeClaim:
- claimName: datadir
- updateStrategy:
- type: RollingUpdate
- volumeClaimTemplates:
- - metadata:
- name: datadir
- spec:
- accessModes:
- - "ReadWriteOnce"
- resources:
- requests:
- storage: 100Gi
diff --git a/cloud/kubernetes/templates/v1.7/client-secure.yaml b/cloud/kubernetes/templates/v1.7/client-secure.yaml
deleted file mode 100644
index 0df772c510..0000000000
--- a/cloud/kubernetes/templates/v1.7/client-secure.yaml
+++ /dev/null
@@ -1,48 +0,0 @@
-apiVersion: v1
-kind: Pod
-metadata:
- name: cockroachdb-client-secure
- labels:
- app: cockroachdb-client
-spec:
- serviceAccountName: cockroachdb
- initContainers:
- # The init-certs container sends a certificate signing request to the
- # kubernetes cluster.
- # You can see pending requests using: kubectl get csr
- # CSRs can be approved using: kubectl certificate approve <csr name>
- #
- # In addition to the client certificate and key, the init-certs entrypoint will symlink
- # the cluster CA to the certs directory.
- - name: init-certs
- image: cockroachdb/cockroach-k8s-request-cert:0.4
- imagePullPolicy: IfNotPresent
- command:
- - "/bin/ash"
- - "-ecx"
- - "/request-cert -namespace=${POD_NAMESPACE} -certs-dir=/cockroach-certs -type=client -user=root -symlink-ca-from=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
- env:
- - name: POD_NAMESPACE
- valueFrom:
- fieldRef:
- fieldPath: metadata.namespace
- volumeMounts:
- - name: client-certs
- mountPath: /cockroach-certs
- containers:
- - name: cockroachdb-client
- image: cockroachdb/cockroach:@VERSION@
- imagePullPolicy: IfNotPresent
- volumeMounts:
- - name: client-certs
- mountPath: /cockroach-certs
- # Keep a pod open indefinitely so kubectl exec can be used to get a shell to it
- # and run cockroach client commands, such as cockroach sql, cockroach node status, etc.
- command:
- - sleep
- - "2147483648" # 2^31
- # This pod isn't doing anything important, so don't bother waiting to terminate it.
- terminationGracePeriodSeconds: 0
- volumes:
- - name: client-certs
- emptyDir: {}
diff --git a/cloud/kubernetes/templates/v1.7/cluster-init-secure.yaml b/cloud/kubernetes/templates/v1.7/cluster-init-secure.yaml
deleted file mode 100644
index 830f2928a9..0000000000
--- a/cloud/kubernetes/templates/v1.7/cluster-init-secure.yaml
+++ /dev/null
@@ -1,49 +0,0 @@
-apiVersion: batch/v1
-kind: Job
-metadata:
- name: cluster-init-secure
- labels:
- app: cockroachdb
-spec:
- template:
- spec:
- serviceAccountName: cockroachdb
- initContainers:
- # The init-certs container sends a certificate signing request to the
- # kubernetes cluster.
- # You can see pending requests using: kubectl get csr
- # CSRs can be approved using: kubectl certificate approve <csr name>
- #
- # In addition to the client certificate and key, the init-certs entrypoint will symlink
- # the cluster CA to the certs directory.
- - name: init-certs
- image: cockroachdb/cockroach-k8s-request-cert:0.4
- imagePullPolicy: IfNotPresent
- command:
- - "/bin/ash"
- - "-ecx"
- - "/request-cert -namespace=${POD_NAMESPACE} -certs-dir=/cockroach-certs -type=client -user=root -symlink-ca-from=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
- env:
- - name: POD_NAMESPACE
- valueFrom:
- fieldRef:
- fieldPath: metadata.namespace
- volumeMounts:
- - name: client-certs
- mountPath: /cockroach-certs
- containers:
- - name: cluster-init
- image: cockroachdb/cockroach:@VERSION@
- imagePullPolicy: IfNotPresent
- volumeMounts:
- - name: client-certs
- mountPath: /cockroach-certs
- command:
- - "/cockroach/cockroach"
- - "init"
- - "--certs-dir=/cockroach-certs"
- - "--host=cockroachdb-0.cockroachdb"
- restartPolicy: OnFailure
- volumes:
- - name: client-certs
- emptyDir: {}
diff --git a/cloud/kubernetes/templates/v1.7/cluster-init.yaml b/cloud/kubernetes/templates/v1.7/cluster-init.yaml
deleted file mode 100644
index 07a43d8456..0000000000
--- a/cloud/kubernetes/templates/v1.7/cluster-init.yaml
+++ /dev/null
@@ -1,19 +0,0 @@
-apiVersion: batch/v1
-kind: Job
-metadata:
- name: cluster-init
- labels:
- app: cockroachdb
-spec:
- template:
- spec:
- containers:
- - name: cluster-init
- image: cockroachdb/cockroach:@VERSION@
- imagePullPolicy: IfNotPresent
- command:
- - "/cockroach/cockroach"
- - "init"
- - "--insecure"
- - "--host=cockroachdb-0.cockroachdb"
- restartPolicy: OnFailure
diff --git a/cloud/kubernetes/templates/v1.7/cockroachdb-statefulset-secure.yaml b/cloud/kubernetes/templates/v1.7/cockroachdb-statefulset-secure.yaml
deleted file mode 100644
index 46c278ae95..0000000000
--- a/cloud/kubernetes/templates/v1.7/cockroachdb-statefulset-secure.yaml
+++ /dev/null
@@ -1,233 +0,0 @@
-apiVersion: v1
-kind: ServiceAccount
-metadata:
- name: cockroachdb
- labels:
- app: cockroachdb
----
-apiVersion: rbac.authorization.k8s.io/v1beta1
-kind: Role
-metadata:
- name: cockroachdb
- labels:
- app: cockroachdb
-rules:
-- apiGroups:
- - ""
- resources:
- - secrets
- verbs:
- - create
- - get
----
-apiVersion: rbac.authorization.k8s.io/v1beta1
-kind: ClusterRole
-metadata:
- name: cockroachdb
- labels:
- app: cockroachdb
-rules:
-- apiGroups:
- - certificates.k8s.io
- resources:
- - certificatesigningrequests
- verbs:
- - create
- - get
- - watch
----
-apiVersion: rbac.authorization.k8s.io/v1beta1
-kind: RoleBinding
-metadata:
- name: cockroachdb
- labels:
- app: cockroachdb
-roleRef:
- apiGroup: rbac.authorization.k8s.io
- kind: Role
- name: cockroachdb
-subjects:
-- kind: ServiceAccount
- name: cockroachdb
- namespace: default
----
-apiVersion: rbac.authorization.k8s.io/v1beta1
-kind: ClusterRoleBinding
-metadata:
- name: cockroachdb
- labels:
- app: cockroachdb
-roleRef:
- apiGroup: rbac.authorization.k8s.io
- kind: ClusterRole
- name: cockroachdb
-subjects:
-- kind: ServiceAccount
- name: cockroachdb
- namespace: default
----
-apiVersion: v1
-kind: Service
-metadata:
- # This service is meant to be used by clients of the database. It exposes a ClusterIP that will
- # automatically load balance connections to the different database pods.
- name: cockroachdb-public
- labels:
- app: cockroachdb
-spec:
- ports:
- # The main port, served by gRPC, serves Postgres-flavor SQL, internode
- # traffic and the cli.
- - port: 26257
- targetPort: 26257
- name: grpc
- # The secondary port serves the UI as well as health and debug endpoints.
- - port: 8080
- targetPort: 8080
- name: http
- selector:
- app: cockroachdb
----
-apiVersion: v1
-kind: Service
-metadata:
- # This service only exists to create DNS entries for each pod in the stateful
- # set such that they can resolve each other's IP addresses. It does not
- # create a load-balanced ClusterIP and should not be used directly by clients
- # in most circumstances.
- name: cockroachdb
- labels:
- app: cockroachdb
- annotations:
- # This is needed to make the peer-finder work properly and to help avoid
- # edge cases where instance 0 comes up after losing its data and needs to
- # decide whether it should create a new cluster or try to join an existing
- # one. If it creates a new cluster when it should have joined an existing
- # one, we'd end up with two separate clusters listening at the same service
- # endpoint, which would be very bad.
- service.alpha.kubernetes.io/tolerate-unready-endpoints: "true"
- # Enable automatic monitoring of all instances when Prometheus is running in the cluster.
- prometheus.io/scrape: "true"
- prometheus.io/path: "_status/vars"
- prometheus.io/port: "8080"
-spec:
- ports:
- - port: 26257
- targetPort: 26257
- name: grpc
- - port: 8080
- targetPort: 8080
- name: http
- clusterIP: None
- selector:
- app: cockroachdb
----
-apiVersion: policy/v1beta1
-kind: PodDisruptionBudget
-metadata:
- name: cockroachdb-budget
- labels:
- app: cockroachdb
-spec:
- selector:
- matchLabels:
- app: cockroachdb
- maxUnavailable: 1
----
-apiVersion: apps/v1beta1
-kind: StatefulSet
-metadata:
- name: cockroachdb
-spec:
- serviceName: "cockroachdb"
- replicas: 3
- template:
- metadata:
- labels:
- app: cockroachdb
- spec:
- serviceAccountName: cockroachdb
- # Init containers are run only once in the lifetime of a pod, before
- # it's started up for the first time. It has to exit successfully
- # before the pod's main containers are allowed to start.
- initContainers:
- # The init-certs container sends a certificate signing request to the
- # kubernetes cluster.
- # You can see pending requests using: kubectl get csr
- # CSRs can be approved using: kubectl certificate approve <csr name>
- #
- # All addresses used to contact a node must be specified in the --addresses arg.
- #
- # In addition to the node certificate and key, the init-certs entrypoint will symlink
- # the cluster CA to the certs directory.
- - name: init-certs
- image: cockroachdb/cockroach-k8s-request-cert:0.4
- imagePullPolicy: IfNotPresent
- command:
- - "/bin/ash"
- - "-ecx"
- - "/request-cert -namespace=${POD_NAMESPACE} -certs-dir=/cockroach-certs -type=node -addresses=localhost,127.0.0.1,$(hostname -f),$(hostname -f|cut -f 1-2 -d '.'),cockroachdb-public,cockroachdb-public.$(hostname -f|cut -f 3- -d '.'),cockroachdb-public.$(hostname -f|cut -f 3-4 -d '.'),cockroachdb-public.$(hostname -f|cut -f 3 -d '.') -symlink-ca-from=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
- env:
- - name: POD_NAMESPACE
- valueFrom:
- fieldRef:
- fieldPath: metadata.namespace
- volumeMounts:
- - name: certs
- mountPath: /cockroach-certs
- affinity:
- podAntiAffinity:
- preferredDuringSchedulingIgnoredDuringExecution:
- - weight: 100
- podAffinityTerm:
- labelSelector:
- matchExpressions:
- - key: app
- operator: In
- values:
- - cockroachdb
- topologyKey: kubernetes.io/hostname
- containers:
- - name: cockroachdb
- image: cockroachdb/cockroach:@VERSION@
- imagePullPolicy: IfNotPresent
- ports:
- - containerPort: 26257
- name: grpc
- - containerPort: 8080
- name: http
- volumeMounts:
- - name: datadir
- mountPath: /cockroach/cockroach-data
- - name: certs
- mountPath: /cockroach/cockroach-certs
- env:
- - name: COCKROACH_CHANNEL
- value: kubernetes-secure
- command:
- - "/bin/bash"
- - "-ecx"
- # The use of qualified `hostname -f` is crucial:
- # Other nodes aren't able to look up the unqualified hostname.
- # Once 2.0 is out, we should be able to switch from --host to --advertise-host to make port-forwarding work to the main port.
- - "exec /cockroach/cockroach start --logtostderr --certs-dir /cockroach/cockroach-certs --host $(hostname -f) --http-addr 0.0.0.0 --join cockroachdb-0.cockroachdb,cockroachdb-1.cockroachdb,cockroachdb-2.cockroachdb --cache 25% --max-sql-memory 25%"
- # No pre-stop hook is required, a SIGTERM plus some time is all that's
- # needed for graceful shutdown of a node.
- terminationGracePeriodSeconds: 60
- volumes:
- - name: datadir
- persistentVolumeClaim:
- claimName: datadir
- - name: certs
- emptyDir: {}
- updateStrategy:
- type: RollingUpdate
- volumeClaimTemplates:
- - metadata:
- name: datadir
- spec:
- accessModes:
- - "ReadWriteOnce"
- resources:
- requests:
- storage: 100Gi
diff --git a/cloud/kubernetes/templates/v1.7/cockroachdb-statefulset.yaml b/cloud/kubernetes/templates/v1.7/cockroachdb-statefulset.yaml
deleted file mode 100644
index 6cdb744279..0000000000
--- a/cloud/kubernetes/templates/v1.7/cockroachdb-statefulset.yaml
+++ /dev/null
@@ -1,131 +0,0 @@
-apiVersion: v1
-kind: Service
-metadata:
- # This service is meant to be used by clients of the database. It exposes a ClusterIP that will
- # automatically load balance connections to the different database pods.
- name: cockroachdb-public
- labels:
- app: cockroachdb
-spec:
- ports:
- # The main port, served by gRPC, serves Postgres-flavor SQL, internode
- # traffic and the cli.
- - port: 26257
- targetPort: 26257
- name: grpc
- # The secondary port serves the UI as well as health and debug endpoints.
- - port: 8080
- targetPort: 8080
- name: http
- selector:
- app: cockroachdb
----
-apiVersion: v1
-kind: Service
-metadata:
- # This service only exists to create DNS entries for each pod in the stateful
- # set such that they can resolve each other's IP addresses. It does not
- # create a load-balanced ClusterIP and should not be used directly by clients
- # in most circumstances.
- name: cockroachdb
- labels:
- app: cockroachdb
- annotations:
- # This is needed to make the peer-finder work properly and to help avoid
- # edge cases where instance 0 comes up after losing its data and needs to
- # decide whether it should create a new cluster or try to join an existing
- # one. If it creates a new cluster when it should have joined an existing
- # one, we'd end up with two separate clusters listening at the same service
- # endpoint, which would be very bad.
- service.alpha.kubernetes.io/tolerate-unready-endpoints: "true"
- # Enable automatic monitoring of all instances when Prometheus is running in the cluster.
- prometheus.io/scrape: "true"
- prometheus.io/path: "_status/vars"
- prometheus.io/port: "8080"
-spec:
- ports:
- - port: 26257
- targetPort: 26257
- name: grpc
- - port: 8080
- targetPort: 8080
- name: http
- clusterIP: None
- selector:
- app: cockroachdb
----
-apiVersion: policy/v1beta1
-kind: PodDisruptionBudget
-metadata:
- name: cockroachdb-budget
- labels:
- app: cockroachdb
-spec:
- selector:
- matchLabels:
- app: cockroachdb
- maxUnavailable: 1
----
-apiVersion: apps/v1beta1
-kind: StatefulSet
-metadata:
- name: cockroachdb
-spec:
- serviceName: "cockroachdb"
- replicas: 3
- template:
- metadata:
- labels:
- app: cockroachdb
- spec:
- affinity:
- podAntiAffinity:
- preferredDuringSchedulingIgnoredDuringExecution:
- - weight: 100
- podAffinityTerm:
- labelSelector:
- matchExpressions:
- - key: app
- operator: In
- values:
- - cockroachdb
- topologyKey: kubernetes.io/hostname
- containers:
- - name: cockroachdb
- image: cockroachdb/cockroach:@VERSION@
- imagePullPolicy: IfNotPresent
- ports:
- - containerPort: 26257
- name: grpc
- - containerPort: 8080
- name: http
- volumeMounts:
- - name: datadir
- mountPath: /cockroach/cockroach-data
- env:
- - name: COCKROACH_CHANNEL
- value: kubernetes-insecure
- command:
- - "/bin/bash"
- - "-ecx"
- # The use of qualified `hostname -f` is crucial:
- # Other nodes aren't able to look up the unqualified hostname.
- - "exec /cockroach/cockroach start --logtostderr --insecure --advertise-host $(hostname -f) --http-addr 0.0.0.0 --join cockroachdb-0.cockroachdb,cockroachdb-1.cockroachdb,cockroachdb-2.cockroachdb --cache 25% --max-sql-memory 25%"
- # No pre-stop hook is required, a SIGTERM plus some time is all that's
- # needed for graceful shutdown of a node.
- terminationGracePeriodSeconds: 60
- volumes:
- - name: datadir
- persistentVolumeClaim:
- claimName: datadir
- updateStrategy:
- type: RollingUpdate
- volumeClaimTemplates:
- - metadata:
- name: datadir
- spec:
- accessModes:
- - "ReadWriteOnce"
- resources:
- requests:
- storage: 100Gi
diff --git a/cloud/kubernetes/v1.6/client-secure.yaml b/cloud/kubernetes/v1.6/client-secure.yaml
index 58c9105972..d0c7967dbd 100644
--- a/cloud/kubernetes/v1.6/client-secure.yaml
+++ b/cloud/kubernetes/v1.6/client-secure.yaml
@@ -1,4 +1,3 @@
-# Generated file, DO NOT EDIT. Source: cloud/kubernetes/templates/v1.6/client-secure.yaml
apiVersion: v1
kind: Pod
metadata:
diff --git a/cloud/kubernetes/v1.6/cluster-init-secure.yaml b/cloud/kubernetes/v1.6/cluster-init-secure.yaml
index b17c6a2bd6..0154e8680e 100644
--- a/cloud/kubernetes/v1.6/cluster-init-secure.yaml
+++ b/cloud/kubernetes/v1.6/cluster-init-secure.yaml
@@ -1,4 +1,3 @@
-# Generated file, DO NOT EDIT. Source: cloud/kubernetes/templates/v1.6/cluster-init-secure.yaml
apiVersion: batch/v1
kind: Job
metadata:
diff --git a/cloud/kubernetes/v1.6/cluster-init.yaml b/cloud/kubernetes/v1.6/cluster-init.yaml
index d323511fc9..d33a6fb7d0 100644
--- a/cloud/kubernetes/v1.6/cluster-init.yaml
+++ b/cloud/kubernetes/v1.6/cluster-init.yaml
@@ -1,4 +1,3 @@
-# Generated file, DO NOT EDIT. Source: cloud/kubernetes/templates/v1.6/cluster-init.yaml
apiVersion: batch/v1
kind: Job
metadata:
diff --git a/cloud/kubernetes/v1.6/cockroachdb-statefulset-secure.yaml b/cloud/kubernetes/v1.6/cockroachdb-statefulset-secure.yaml
index a9110e5235..b269400429 100644
--- a/cloud/kubernetes/v1.6/cockroachdb-statefulset-secure.yaml
+++ b/cloud/kubernetes/v1.6/cockroachdb-statefulset-secure.yaml
@@ -1,4 +1,3 @@
-# Generated file, DO NOT EDIT. Source: cloud/kubernetes/templates/v1.6/cockroachdb-statefulset-secure.yaml
apiVersion: v1
kind: ServiceAccount
metadata:
diff --git a/cloud/kubernetes/v1.6/cockroachdb-statefulset.yaml b/cloud/kubernetes/v1.6/cockroachdb-statefulset.yaml
index 42712a7bca..2de7761fef 100644
--- a/cloud/kubernetes/v1.6/cockroachdb-statefulset.yaml
+++ b/cloud/kubernetes/v1.6/cockroachdb-statefulset.yaml
@@ -1,4 +1,3 @@
-# Generated file, DO NOT EDIT. Source: cloud/kubernetes/templates/v1.6/cockroachdb-statefulset.yaml
apiVersion: v1
kind: Service
metadata:
diff --git a/cloud/kubernetes/v1.7/client-secure.yaml b/cloud/kubernetes/v1.7/client-secure.yaml
index 9b94872ed2..d0c7967dbd 100644
--- a/cloud/kubernetes/v1.7/client-secure.yaml
+++ b/cloud/kubernetes/v1.7/client-secure.yaml
@@ -1,4 +1,3 @@
-# Generated file, DO NOT EDIT. Source: cloud/kubernetes/templates/v1.7/client-secure.yaml
apiVersion: v1
kind: Pod
metadata:
diff --git a/cloud/kubernetes/v1.7/cluster-init-secure.yaml b/cloud/kubernetes/v1.7/cluster-init-secure.yaml
index ca43db7558..0154e8680e 100644
--- a/cloud/kubernetes/v1.7/cluster-init-secure.yaml
+++ b/cloud/kubernetes/v1.7/cluster-init-secure.yaml
@@ -1,4 +1,3 @@
-# Generated file, DO NOT EDIT. Source: cloud/kubernetes/templates/v1.7/cluster-init-secure.yaml
apiVersion: batch/v1
kind: Job
metadata:
diff --git a/cloud/kubernetes/v1.7/cluster-init.yaml b/cloud/kubernetes/v1.7/cluster-init.yaml
index 5fde6df11d..d33a6fb7d0 100644
--- a/cloud/kubernetes/v1.7/cluster-init.yaml
+++ b/cloud/kubernetes/v1.7/cluster-init.yaml
@@ -1,4 +1,3 @@
-# Generated file, DO NOT EDIT. Source: cloud/kubernetes/templates/v1.7/cluster-init.yaml
apiVersion: batch/v1
kind: Job
metadata:
diff --git a/cloud/kubernetes/v1.7/cockroachdb-statefulset-secure.yaml b/cloud/kubernetes/v1.7/cockroachdb-statefulset-secure.yaml
index f10d589062..1b682f8c9e 100644
--- a/cloud/kubernetes/v1.7/cockroachdb-statefulset-secure.yaml
+++ b/cloud/kubernetes/v1.7/cockroachdb-statefulset-secure.yaml
@@ -1,4 +1,3 @@
-# Generated file, DO NOT EDIT. Source: cloud/kubernetes/templates/v1.7/cockroachdb-statefulset-secure.yaml
apiVersion: v1
kind: ServiceAccount
metadata:
diff --git a/cloud/kubernetes/v1.7/cockroachdb-statefulset.yaml b/cloud/kubernetes/v1.7/cockroachdb-statefulset.yaml
index 8be582972a..26937db23c 100644
--- a/cloud/kubernetes/v1.7/cockroachdb-statefulset.yaml
+++ b/cloud/kubernetes/v1.7/cockroachdb-statefulset.yaml
@@ -1,4 +1,3 @@
-# Generated file, DO NOT EDIT. Source: cloud/kubernetes/templates/v1.7/cockroachdb-statefulset.yaml
apiVersion: v1
kind: Service
metadata:
diff --git a/dev b/dev
index 8b2d7d91b1..410066c5ad 100755
--- a/dev
+++ b/dev
@@ -3,7 +3,7 @@
set -euo pipefail
# Bump this counter to force rebuilding `dev` on all machines.
-DEV_VERSION=20
+DEV_VERSION=19
THIS_DIR=$(cd "$(dirname "$0")" && pwd)
BINARY_DIR=$THIS_DIR/bin/dev-versions
diff --git a/docs/generated/eventlog.md b/docs/generated/eventlog.md
index cc042a51e9..15969ca0ff 100644
--- a/docs/generated/eventlog.md
+++ b/docs/generated/eventlog.md
@@ -2305,32 +2305,6 @@ are automatically converted server-side.
Events in this category are logged to the `TELEMETRY` channel.
-### `captured_index_usage_stats`
-
-An event of type `captured_index_usage_stats`
-
-
-| Field | Description | Sensitive |
-|--|--|--|
-| `TotalReadCount` | TotalReadCount is the number of times this index has been read from. | no |
-| `LastRead` | LastRead is the timestamp that this index was last being read from. | yes |
-| `TableID` | TableID is the ID of the table this index is created on. This is same as descpb.TableID and is unique within the cluster. | no |
-| `IndexID` | IndexID is the ID of the index within the scope of the given table. | no |
-| `DatabaseName` | | yes |
-| `TableName` | | yes |
-| `IndexName` | | yes |
-| `IndexType` | | yes |
-| `IsUnique` | | no |
-| `IsInverted` | | no |
-
-
-#### Common fields
-
-| Field | Description | Sensitive |
-|--|--|--|
-| `Timestamp` | The timestamp of the event. Expressed as nanoseconds since the Unix epoch. | no |
-| `EventType` | The type of the event. | no |
-
### `sampled_query`
An event of type `sampled_query` is the SQL query event logged to the telemetry channel. It
diff --git a/docs/generated/http/BUILD.bazel b/docs/generated/http/BUILD.bazel
index 5e5ac861e7..f171a56bfd 100644
--- a/docs/generated/http/BUILD.bazel
+++ b/docs/generated/http/BUILD.bazel
@@ -1,6 +1,19 @@
genrule(
name = "http",
srcs = [
+ "//pkg/ts/tspb:tspb_proto",
+ "//pkg/util/metric:metric_proto",
+ "//pkg/ts/catalog:catalog_proto",
+ "//pkg/util/duration:duration_proto",
+ "//pkg/util/timeutil/pgdate:pgdate_proto",
+ "//pkg/sql/sessiondatapb:sessiondatapb_proto",
+ "//pkg/sql/inverted:inverted_proto",
+ "//pkg/sql/pgwire/pgerror:pgerror_proto",
+ "//pkg/sql/contentionpb:contentionpb_proto",
+ "//pkg/sql/execinfrapb:execinfrapb_proto",
+ "//pkg/kv/kvserver/kvserverpb:kvserverpb_proto",
+ "//pkg/kv/kvserver/liveness/livenesspb:livenesspb_proto",
+ "//pkg/util/log/logpb:logpb_proto",
"//pkg/build:build_proto",
"//pkg/clusterversion:clusterversion_proto",
"//pkg/config/zonepb:zonepb_proto",
@@ -9,8 +22,6 @@ genrule(
"//pkg/gossip:gossip_proto",
"//pkg/jobs/jobspb:jobspb_proto",
"//pkg/kv/kvserver/concurrency/lock:lock_proto",
- "//pkg/kv/kvserver/kvserverpb:kvserverpb_proto",
- "//pkg/kv/kvserver/liveness/livenesspb:livenesspb_proto",
"//pkg/kv/kvserver/readsummary/rspb:rspb_proto",
"//pkg/roachpb:roachpb_proto",
"//pkg/server/diagnostics/diagnosticspb:diagnosticspb_proto",
@@ -19,24 +30,12 @@ genrule(
"//pkg/settings:settings_proto",
"//pkg/sql/catalog/catpb:catpb_proto",
"//pkg/sql/catalog/descpb:descpb_proto",
- "//pkg/sql/contentionpb:contentionpb_proto",
- "//pkg/sql/execinfrapb:execinfrapb_proto",
- "//pkg/sql/inverted:inverted_proto",
- "//pkg/sql/lex:lex_proto",
- "//pkg/sql/pgwire/pgerror:pgerror_proto",
"//pkg/sql/schemachanger/scpb:scpb_proto",
- "//pkg/sql/sessiondatapb:sessiondatapb_proto",
"//pkg/sql/types:types_proto",
"//pkg/storage/enginepb:enginepb_proto",
- "//pkg/ts/catalog:catalog_proto",
- "//pkg/ts/tspb:tspb_proto",
- "//pkg/util/duration:duration_proto",
+ "//pkg/util:util_proto",
"//pkg/util/hlc:hlc_proto",
- "//pkg/util/log/logpb:logpb_proto",
- "//pkg/util/metric:metric_proto",
- "//pkg/util/timeutil/pgdate:pgdate_proto",
"//pkg/util/tracing/tracingpb:tracingpb_proto",
- "//pkg/util:util_proto",
"@com_github_prometheus_client_model//io/prometheus/client:client_proto",
"@com_github_cockroachdb_errors//errorspb:errorspb_proto",
"@com_github_gogo_protobuf//gogoproto:gogo_proto",
diff --git a/docs/generated/http/full.md b/docs/generated/http/full.md
index 5597f184fd..3595e3437d 100644
--- a/docs/generated/http/full.md
+++ b/docs/generated/http/full.md
@@ -1229,9 +1229,6 @@ Support status: [reserved](#support-status)
| locks_with_wait_queues | [int64](#cockroach.server.serverpb.RaftDebugResponse-int64) | | | [reserved](#support-status) |
| lock_wait_queue_waiters | [int64](#cockroach.server.serverpb.RaftDebugResponse-int64) | | | [reserved](#support-status) |
| top_k_locks_by_wait_queue_waiters | [RangeInfo.LockInfo](#cockroach.server.serverpb.RaftDebugResponse-cockroach.server.serverpb.RangeInfo.LockInfo) | repeated | | [reserved](#support-status) |
-| locality | [Locality](#cockroach.server.serverpb.RaftDebugResponse-cockroach.server.serverpb.Locality) | | | [reserved](#support-status) |
-| is_leaseholder | [bool](#cockroach.server.serverpb.RaftDebugResponse-bool) | | | [reserved](#support-status) |
-| lease_valid | [bool](#cockroach.server.serverpb.RaftDebugResponse-bool) | | | [reserved](#support-status) |
@@ -1356,34 +1353,6 @@ only.
-<a name="cockroach.server.serverpb.RaftDebugResponse-cockroach.server.serverpb.Locality"></a>
-#### Locality
-
-Locality is an ordered set of key value Tiers that describe a node's
-location. The tier keys should be the same across all nodes.
-
-| Field | Type | Label | Description | Support status |
-| ----- | ---- | ----- | ----------- | -------------- |
-| tiers | [Tier](#cockroach.server.serverpb.RaftDebugResponse-cockroach.server.serverpb.Tier) | repeated | | [reserved](#support-status) |
-
-
-
-
-
-<a name="cockroach.server.serverpb.RaftDebugResponse-cockroach.server.serverpb.Tier"></a>
-#### Tier
-
-Tier represents one level of the locality hierarchy.
-
-| Field | Type | Label | Description | Support status |
-| ----- | ---- | ----- | ----------- | -------------- |
-| key | [string](#cockroach.server.serverpb.RaftDebugResponse-string) | | Key is the name of tier and should match all other nodes. | [reserved](#support-status) |
-| value | [string](#cockroach.server.serverpb.RaftDebugResponse-string) | | Value is node specific value corresponding to the key. | [reserved](#support-status) |
-
-
-
-
-
<a name="cockroach.server.serverpb.RaftDebugResponse-cockroach.server.serverpb.RaftRangeError"></a>
#### RaftRangeError
@@ -1467,9 +1436,6 @@ Support status: [reserved](#support-status)
| locks_with_wait_queues | [int64](#cockroach.server.serverpb.RangesResponse-int64) | | | [reserved](#support-status) |
| lock_wait_queue_waiters | [int64](#cockroach.server.serverpb.RangesResponse-int64) | | | [reserved](#support-status) |
| top_k_locks_by_wait_queue_waiters | [RangeInfo.LockInfo](#cockroach.server.serverpb.RangesResponse-cockroach.server.serverpb.RangeInfo.LockInfo) | repeated | | [reserved](#support-status) |
-| locality | [Locality](#cockroach.server.serverpb.RangesResponse-cockroach.server.serverpb.Locality) | | | [reserved](#support-status) |
-| is_leaseholder | [bool](#cockroach.server.serverpb.RangesResponse-bool) | | | [reserved](#support-status) |
-| lease_valid | [bool](#cockroach.server.serverpb.RangesResponse-bool) | | | [reserved](#support-status) |
@@ -1594,206 +1560,6 @@ only.
-<a name="cockroach.server.serverpb.RangesResponse-cockroach.server.serverpb.Locality"></a>
-#### Locality
-
-Locality is an ordered set of key value Tiers that describe a node's
-location. The tier keys should be the same across all nodes.
-
-| Field | Type | Label | Description | Support status |
-| ----- | ---- | ----- | ----------- | -------------- |
-| tiers | [Tier](#cockroach.server.serverpb.RangesResponse-cockroach.server.serverpb.Tier) | repeated | | [reserved](#support-status) |
-
-
-
-
-
-<a name="cockroach.server.serverpb.RangesResponse-cockroach.server.serverpb.Tier"></a>
-#### Tier
-
-Tier represents one level of the locality hierarchy.
-
-| Field | Type | Label | Description | Support status |
-| ----- | ---- | ----- | ----------- | -------------- |
-| key | [string](#cockroach.server.serverpb.RangesResponse-string) | | Key is the name of tier and should match all other nodes. | [reserved](#support-status) |
-| value | [string](#cockroach.server.serverpb.RangesResponse-string) | | Value is node specific value corresponding to the key. | [reserved](#support-status) |
-
-
-
-
-
-
-## TenantRanges
-
-`GET /_status/tenant_ranges`
-
-TenantRanges requests internal details about all range replicas within
-the tenant's keyspace.
-
-Support status: [reserved](#support-status)
-
-#### Request Parameters
-
-
-
-
-
-
-
-
-
-
-
-
-
-#### Response Parameters
-
-
-
-
-
-
-
-| Field | Type | Label | Description | Support status |
-| ----- | ---- | ----- | ----------- | -------------- |
-| ranges_by_locality | [TenantRangesResponse.RangesByLocalityEntry](#cockroach.server.serverpb.TenantRangesResponse-cockroach.server.serverpb.TenantRangesResponse.RangesByLocalityEntry) | repeated | ranges_by_locality maps each range replica to its specified availability zone, as defined within the replica's locality metadata (default key `az`). Replicas without the default available zone key set will fall under the `locality-unset` key. | [reserved](#support-status) |
-
-
-
-
-
-
-<a name="cockroach.server.serverpb.TenantRangesResponse-cockroach.server.serverpb.TenantRangesResponse.RangesByLocalityEntry"></a>
-#### TenantRangesResponse.RangesByLocalityEntry
-
-
-
-| Field | Type | Label | Description | Support status |
-| ----- | ---- | ----- | ----------- | -------------- |
-| key | [string](#cockroach.server.serverpb.TenantRangesResponse-string) | | | |
-| value | [TenantRangesResponse.TenantRangeList](#cockroach.server.serverpb.TenantRangesResponse-cockroach.server.serverpb.TenantRangesResponse.TenantRangeList) | | | |
-
-
-
-
-
-<a name="cockroach.server.serverpb.TenantRangesResponse-cockroach.server.serverpb.TenantRangesResponse.TenantRangeList"></a>
-#### TenantRangesResponse.TenantRangeList
-
-
-
-| Field | Type | Label | Description | Support status |
-| ----- | ---- | ----- | ----------- | -------------- |
-| ranges | [TenantRangeInfo](#cockroach.server.serverpb.TenantRangesResponse-cockroach.server.serverpb.TenantRangeInfo) | repeated | | [reserved](#support-status) |
-
-
-
-
-
-<a name="cockroach.server.serverpb.TenantRangesResponse-cockroach.server.serverpb.TenantRangeInfo"></a>
-#### TenantRangeInfo
-
-TenantRangeInfo provides metadata about a specific range replica,
-where concepts not considered to be relevant within the tenant
-abstraction (e.g. NodeIDs) are omitted. Instead, Locality information
-is used to distinguish replicas.
-
-| Field | Type | Label | Description | Support status |
-| ----- | ---- | ----- | ----------- | -------------- |
-| range_id | [int64](#cockroach.server.serverpb.TenantRangesResponse-int64) | | The ID of the Range. | [reserved](#support-status) |
-| span | [PrettySpan](#cockroach.server.serverpb.TenantRangesResponse-cockroach.server.serverpb.PrettySpan) | | The pretty-printed key span of the range. | [reserved](#support-status) |
-| locality | [Locality](#cockroach.server.serverpb.TenantRangesResponse-cockroach.server.serverpb.Locality) | | Any locality information associated with this specific replica. | [reserved](#support-status) |
-| is_leaseholder | [bool](#cockroach.server.serverpb.TenantRangesResponse-bool) | | Whether the range's specific replica is a leaseholder. | [reserved](#support-status) |
-| lease_valid | [bool](#cockroach.server.serverpb.TenantRangesResponse-bool) | | Whether the range's specific replica holds a valid lease. | [reserved](#support-status) |
-| range_stats | [RangeStatistics](#cockroach.server.serverpb.TenantRangesResponse-cockroach.server.serverpb.RangeStatistics) | | Statistics about the range replica, e.g. QPS, WPS. | [reserved](#support-status) |
-| mvcc_stats | [cockroach.storage.enginepb.MVCCStats](#cockroach.server.serverpb.TenantRangesResponse-cockroach.storage.enginepb.MVCCStats) | | MVCC stats about the range replica, e.g. live_bytes. | [reserved](#support-status) |
-| read_latches | [int64](#cockroach.server.serverpb.TenantRangesResponse-int64) | | Read count as reported by the range replica's spanlatch.Manager. | [reserved](#support-status) |
-| write_latches | [int64](#cockroach.server.serverpb.TenantRangesResponse-int64) | | Write count as reported by the range replica's spanlatch.Manager. | [reserved](#support-status) |
-| locks | [int64](#cockroach.server.serverpb.TenantRangesResponse-int64) | | The number of locks as reported by the range replica's lockTable. | [reserved](#support-status) |
-| locks_with_wait_queues | [int64](#cockroach.server.serverpb.TenantRangesResponse-int64) | | The number of locks with non-empty wait-queues as reported by the range replica's lockTable | [reserved](#support-status) |
-| lock_wait_queue_waiters | [int64](#cockroach.server.serverpb.TenantRangesResponse-int64) | | The aggregate number of waiters in wait-queues across all locks as reported by the range replica's lockTable | [reserved](#support-status) |
-| top_k_locks_by_wait_queue_waiters | [TenantRangeInfo.LockInfo](#cockroach.server.serverpb.TenantRangesResponse-cockroach.server.serverpb.TenantRangeInfo.LockInfo) | repeated | The top-k locks with the most waiters (readers + writers) in their wait-queue, ordered in descending order. | [reserved](#support-status) |
-
-
-
-
-
-<a name="cockroach.server.serverpb.TenantRangesResponse-cockroach.server.serverpb.PrettySpan"></a>
-#### PrettySpan
-
-
-
-| Field | Type | Label | Description | Support status |
-| ----- | ---- | ----- | ----------- | -------------- |
-| start_key | [string](#cockroach.server.serverpb.TenantRangesResponse-string) | | | [reserved](#support-status) |
-| end_key | [string](#cockroach.server.serverpb.TenantRangesResponse-string) | | | [reserved](#support-status) |
-
-
-
-
-
-<a name="cockroach.server.serverpb.TenantRangesResponse-cockroach.server.serverpb.Locality"></a>
-#### Locality
-
-Locality is an ordered set of key value Tiers that describe a node's
-location. The tier keys should be the same across all nodes.
-
-| Field | Type | Label | Description | Support status |
-| ----- | ---- | ----- | ----------- | -------------- |
-| tiers | [Tier](#cockroach.server.serverpb.TenantRangesResponse-cockroach.server.serverpb.Tier) | repeated | | [reserved](#support-status) |
-
-
-
-
-
-<a name="cockroach.server.serverpb.TenantRangesResponse-cockroach.server.serverpb.Tier"></a>
-#### Tier
-
-Tier represents one level of the locality hierarchy.
-
-| Field | Type | Label | Description | Support status |
-| ----- | ---- | ----- | ----------- | -------------- |
-| key | [string](#cockroach.server.serverpb.TenantRangesResponse-string) | | Key is the name of tier and should match all other nodes. | [reserved](#support-status) |
-| value | [string](#cockroach.server.serverpb.TenantRangesResponse-string) | | Value is node specific value corresponding to the key. | [reserved](#support-status) |
-
-
-
-
-
-<a name="cockroach.server.serverpb.TenantRangesResponse-cockroach.server.serverpb.RangeStatistics"></a>
-#### RangeStatistics
-
-RangeStatistics describes statistics reported by a range. For internal use
-only.
-
-| Field | Type | Label | Description | Support status |
-| ----- | ---- | ----- | ----------- | -------------- |
-| queries_per_second | [double](#cockroach.server.serverpb.TenantRangesResponse-double) | | Queries per second served by this range.<br><br>Note that queries per second will only be known by the leaseholder. All other replicas will report it as 0. | [reserved](#support-status) |
-| writes_per_second | [double](#cockroach.server.serverpb.TenantRangesResponse-double) | | Writes per second served by this range. | [reserved](#support-status) |
-
-
-
-
-
-<a name="cockroach.server.serverpb.TenantRangesResponse-cockroach.server.serverpb.TenantRangeInfo.LockInfo"></a>
-#### TenantRangeInfo.LockInfo
-
-LockInfo provides metadata about the state of a single lock
-in the range replica's lockTable.
-
-| Field | Type | Label | Description | Support status |
-| ----- | ---- | ----- | ----------- | -------------- |
-| pretty_key | [string](#cockroach.server.serverpb.TenantRangesResponse-string) | | The lock's key in pretty format. | [reserved](#support-status) |
-| key | [bytes](#cockroach.server.serverpb.TenantRangesResponse-bytes) | | The lock's key. | [reserved](#support-status) |
-| held | [bool](#cockroach.server.serverpb.TenantRangesResponse-bool) | | Is the lock actively held by a transaction, or just a reservation? | [reserved](#support-status) |
-| waiters | [int64](#cockroach.server.serverpb.TenantRangesResponse-int64) | | The number of waiters in the lock's wait queue. | [reserved](#support-status) |
-| waiting_readers | [int64](#cockroach.server.serverpb.TenantRangesResponse-int64) | | The number of waiting readers in the lock's wait queue. | [reserved](#support-status) |
-| waiting_writers | [int64](#cockroach.server.serverpb.TenantRangesResponse-int64) | | The number of waiting writers in the lock's wait queue. | [reserved](#support-status) |
-
-
-
-
-
## Gossip
@@ -3364,8 +3130,6 @@ of ranges currently considered “hot” by the node(s).
| Field | Type | Label | Description | Support status |
| ----- | ---- | ----- | ----------- | -------------- |
| node_id | [string](#cockroach.server.serverpb.HotRangesRequest-string) | | NodeID indicates which node to query for a hot range report. It is possible to populate any node ID; if the node receiving the request is not the target node, it will forward the request to the target node.<br><br>If left empty, the request is forwarded to every node in the cluster. | [alpha](#support-status) |
-| page_size | [int32](#cockroach.server.serverpb.HotRangesRequest-int32) | | | [reserved](#support-status) |
-| page_token | [string](#cockroach.server.serverpb.HotRangesRequest-string) | | | [reserved](#support-status) |
@@ -3445,92 +3209,6 @@ target node(s) selected in a HotRangesRequest.
| ----- | ---- | ----- | ----------- | -------------- |
| desc | [cockroach.roachpb.RangeDescriptor](#cockroach.server.serverpb.HotRangesResponse-cockroach.roachpb.RangeDescriptor) | | Desc is the descriptor of the range for which the report was produced.<br><br>TODO(knz): This field should be removed. See: https://github.com/cockroachdb/cockroach/issues/53212 | [reserved](#support-status) |
| queries_per_second | [double](#cockroach.server.serverpb.HotRangesResponse-double) | | QueriesPerSecond is the recent number of queries per second on this range. | [alpha](#support-status) |
-| leaseholder_node_id | [int32](#cockroach.server.serverpb.HotRangesResponse-int32) | | LeaseholderNodeID indicates the Node ID that is the current leaseholder for the given range. | [reserved](#support-status) |
-
-
-
-
-
-
-## HotRangesV2
-
-`POST /_status/v2/hotranges`
-
-
-
-Support status: [reserved](#support-status)
-
-#### Request Parameters
-
-
-
-
-HotRangesRequest queries one or more cluster nodes for a list
-of ranges currently considered “hot” by the node(s).
-
-
-| Field | Type | Label | Description | Support status |
-| ----- | ---- | ----- | ----------- | -------------- |
-| node_id | [string](#cockroach.server.serverpb.HotRangesRequest-string) | | NodeID indicates which node to query for a hot range report. It is possible to populate any node ID; if the node receiving the request is not the target node, it will forward the request to the target node.<br><br>If left empty, the request is forwarded to every node in the cluster. | [alpha](#support-status) |
-| page_size | [int32](#cockroach.server.serverpb.HotRangesRequest-int32) | | | [reserved](#support-status) |
-| page_token | [string](#cockroach.server.serverpb.HotRangesRequest-string) | | | [reserved](#support-status) |
-
-
-
-
-
-
-
-#### Response Parameters
-
-
-
-
-HotRangesResponseV2 is a response payload returned by `HotRangesV2` service.
-
-
-| Field | Type | Label | Description | Support status |
-| ----- | ---- | ----- | ----------- | -------------- |
-| ranges | [HotRangesResponseV2.HotRange](#cockroach.server.serverpb.HotRangesResponseV2-cockroach.server.serverpb.HotRangesResponseV2.HotRange) | repeated | Ranges contain list of hot ranges info that has highest number of QPS. | [reserved](#support-status) |
-| errors_by_node_id | [HotRangesResponseV2.ErrorsByNodeIdEntry](#cockroach.server.serverpb.HotRangesResponseV2-cockroach.server.serverpb.HotRangesResponseV2.ErrorsByNodeIdEntry) | repeated | errors contains any errors that occurred during fan-out calls to other nodes. | [reserved](#support-status) |
-| next_page_token | [string](#cockroach.server.serverpb.HotRangesResponseV2-string) | | NextPageToken represents next pagination token to request next slice of data. | [reserved](#support-status) |
-
-
-
-
-
-
-<a name="cockroach.server.serverpb.HotRangesResponseV2-cockroach.server.serverpb.HotRangesResponseV2.HotRange"></a>
-#### HotRangesResponseV2.HotRange
-
-HotRange message describes a single hot range, ie its QPS, node ID it belongs to, etc.
-
-| Field | Type | Label | Description | Support status |
-| ----- | ---- | ----- | ----------- | -------------- |
-| range_id | [int32](#cockroach.server.serverpb.HotRangesResponseV2-int32) | | range_id indicates Range ID that's identified as hot range. | [reserved](#support-status) |
-| node_id | [int32](#cockroach.server.serverpb.HotRangesResponseV2-int32) | | node_id indicates the node that contains the current hot range. | [reserved](#support-status) |
-| qps | [double](#cockroach.server.serverpb.HotRangesResponseV2-double) | | qps (queries per second) shows the amount of queries that interact with current range. | [reserved](#support-status) |
-| table_name | [string](#cockroach.server.serverpb.HotRangesResponseV2-string) | | table_name indicates the SQL table that the range belongs to. | [reserved](#support-status) |
-| database_name | [string](#cockroach.server.serverpb.HotRangesResponseV2-string) | | database_name indicates on database that has current hot range. | [reserved](#support-status) |
-| index_name | [string](#cockroach.server.serverpb.HotRangesResponseV2-string) | | index_name indicates the index name for current range. | [reserved](#support-status) |
-| replica_node_ids | [int32](#cockroach.server.serverpb.HotRangesResponseV2-int32) | repeated | replica_node_ids specifies the list of node ids that contain replicas with current hot range. | [reserved](#support-status) |
-| leaseholder_node_id | [int32](#cockroach.server.serverpb.HotRangesResponseV2-int32) | | leaseholder_node_id indicates the Node ID that is the current leaseholder for the given range. | [reserved](#support-status) |
-| schema_name | [string](#cockroach.server.serverpb.HotRangesResponseV2-string) | | schema_name provides the name of schema (if exists) for table in current range. | [reserved](#support-status) |
-| store_id | [int32](#cockroach.server.serverpb.HotRangesResponseV2-int32) | | store_id indicates the Store ID where range is stored. | [reserved](#support-status) |
-
-
-
-
-
-<a name="cockroach.server.serverpb.HotRangesResponseV2-cockroach.server.serverpb.HotRangesResponseV2.ErrorsByNodeIdEntry"></a>
-#### HotRangesResponseV2.ErrorsByNodeIdEntry
-
-
-
-| Field | Type | Label | Description | Support status |
-| ----- | ---- | ----- | ----------- | -------------- |
-| key | [int32](#cockroach.server.serverpb.HotRangesResponseV2-int32) | | | |
-| value | [string](#cockroach.server.serverpb.HotRangesResponseV2-string) | | | |
@@ -3636,9 +3314,6 @@ Support status: [reserved](#support-status)
| locks_with_wait_queues | [int64](#cockroach.server.serverpb.RangeResponse-int64) | | | [reserved](#support-status) |
| lock_wait_queue_waiters | [int64](#cockroach.server.serverpb.RangeResponse-int64) | | | [reserved](#support-status) |
| top_k_locks_by_wait_queue_waiters | [RangeInfo.LockInfo](#cockroach.server.serverpb.RangeResponse-cockroach.server.serverpb.RangeInfo.LockInfo) | repeated | | [reserved](#support-status) |
-| locality | [Locality](#cockroach.server.serverpb.RangeResponse-cockroach.server.serverpb.Locality) | | | [reserved](#support-status) |
-| is_leaseholder | [bool](#cockroach.server.serverpb.RangeResponse-bool) | | | [reserved](#support-status) |
-| lease_valid | [bool](#cockroach.server.serverpb.RangeResponse-bool) | | | [reserved](#support-status) |
@@ -3763,34 +3438,6 @@ only.
-<a name="cockroach.server.serverpb.RangeResponse-cockroach.server.serverpb.Locality"></a>
-#### Locality
-
-Locality is an ordered set of key value Tiers that describe a node's
-location. The tier keys should be the same across all nodes.
-
-| Field | Type | Label | Description | Support status |
-| ----- | ---- | ----- | ----------- | -------------- |
-| tiers | [Tier](#cockroach.server.serverpb.RangeResponse-cockroach.server.serverpb.Tier) | repeated | | [reserved](#support-status) |
-
-
-
-
-
-<a name="cockroach.server.serverpb.RangeResponse-cockroach.server.serverpb.Tier"></a>
-#### Tier
-
-Tier represents one level of the locality hierarchy.
-
-| Field | Type | Label | Description | Support status |
-| ----- | ---- | ----- | ----------- | -------------- |
-| key | [string](#cockroach.server.serverpb.RangeResponse-string) | | Key is the name of tier and should match all other nodes. | [reserved](#support-status) |
-| value | [string](#cockroach.server.serverpb.RangeResponse-string) | | Value is node specific value corresponding to the key. | [reserved](#support-status) |
-
-
-
-
-
## Diagnostics
@@ -4793,51 +4440,6 @@ Response object for issuing Transaction ID Resolution.
-## TransactionContentionEvents
-
-`GET /_status/transactioncontentionevents`
-
-TransactionContentionEvents returns a list of un-aggregated contention
-events sorted by the collection timestamp.
-
-Support status: [reserved](#support-status)
-
-#### Request Parameters
-
-
-
-
-
-
-
-| Field | Type | Label | Description | Support status |
-| ----- | ---- | ----- | ----------- | -------------- |
-| node_id | [string](#cockroach.server.serverpb.TransactionContentionEventsRequest-string) | | | [reserved](#support-status) |
-
-
-
-
-
-
-
-#### Response Parameters
-
-
-
-
-
-
-
-| Field | Type | Label | Description | Support status |
-| ----- | ---- | ----- | ----------- | -------------- |
-| events | [cockroach.sql.contentionpb.ExtendedContentionEvent](#cockroach.server.serverpb.TransactionContentionEventsResponse-cockroach.sql.contentionpb.ExtendedContentionEvent) | repeated | | [reserved](#support-status) |
-
-
-
-
-
-
-
## RequestCA
`GET /_join/v1/ca`
diff --git a/docs/generated/http/hotranges-other.md b/docs/generated/http/hotranges-other.md
index fa43e6f7b9..55397521a6 100644
--- a/docs/generated/http/hotranges-other.md
+++ b/docs/generated/http/hotranges-other.md
@@ -62,6 +62,5 @@ Support status: [alpha](#support-status)
| ----- | ---- | ----- | ----------- | -------------- |
| desc | [cockroach.roachpb.RangeDescriptor](#cockroach.roachpb.RangeDescriptor) | | Desc is the descriptor of the range for which the report was produced.<br><br>TODO(knz): This field should be removed. See: https://github.com/cockroachdb/cockroach/issues/53212 | [reserved](#support-status) |
| queries_per_second | [double](#double) | | QueriesPerSecond is the recent number of queries per second on this range. | [alpha](#support-status) |
-| leaseholder_node_id | [int32](#int32) | | LeaseholderNodeID indicates the Node ID that is the current leaseholder for the given range. | [reserved](#support-status) |
diff --git a/docs/generated/http/hotranges-request.md b/docs/generated/http/hotranges-request.md
index c7ccfd28d4..6c542161d1 100644
--- a/docs/generated/http/hotranges-request.md
+++ b/docs/generated/http/hotranges-request.md
@@ -12,7 +12,5 @@ Support status: [alpha](#support-status)
| Field | Type | Label | Description | Support status |
| ----- | ---- | ----- | ----------- | -------------- |
| node_id | [string](#string) | | NodeID indicates which node to query for a hot range report. It is possible to populate any node ID; if the node receiving the request is not the target node, it will forward the request to the target node.<br><br>If left empty, the request is forwarded to every node in the cluster. | [alpha](#support-status) |
-| page_size | [int32](#int32) | | | [reserved](#support-status) |
-| page_token | [string](#string) | | | [reserved](#support-status) |
diff --git a/docs/generated/redact_safe.md b/docs/generated/redact_safe.md
index 61ebfa5f1f..d91bd73b2b 100644
--- a/docs/generated/redact_safe.md
+++ b/docs/generated/redact_safe.md
@@ -7,13 +7,8 @@ pkg/base/node_id.go | `*SQLIDContainer`
pkg/base/node_id.go | `*StoreIDContainer`
pkg/cli/exit/exit.go | `Code`
pkg/jobs/jobspb/wrap.go | `Type`
-pkg/kv/bulk/sst_batcher.go | `sorted`
-pkg/kv/bulk/sst_batcher.go | `sz`
-pkg/kv/bulk/sst_batcher.go | `timing`
pkg/kv/kvserver/closedts/ctpb/service.go | `LAI`
pkg/kv/kvserver/closedts/ctpb/service.go | `SeqNum`
-pkg/kv/kvserver/concurrency/lock/locking.go | `Durability`
-pkg/kv/kvserver/concurrency/lock/locking.go | `Strength`
pkg/kv/kvserver/concurrency/lock/locking.go | `WaitPolicy`
pkg/kv/kvserver/kvserverpb/raft.go | `SnapshotRequest_Type`
pkg/roachpb/data.go | `LeaseSequence`
diff --git a/docs/generated/settings/settings-for-tenants.txt b/docs/generated/settings/settings-for-tenants.txt
index 5c71d3dd71..655feef2c0 100644
--- a/docs/generated/settings/settings-for-tenants.txt
+++ b/docs/generated/settings/settings-for-tenants.txt
@@ -65,7 +65,7 @@ server.oidc_authentication.provider_url string sets OIDC provider URL ({provide
server.oidc_authentication.redirect_url string https://localhost:8080/oidc/v1/callback sets OIDC redirect URL via a URL string or a JSON string containing a required `redirect_urls` key with an object that maps from region keys to URL strings (URLs should point to your load balancer and must route to the path /oidc/v1/callback)
server.oidc_authentication.scopes string openid sets OIDC scopes to include with authentication request (space delimited list of strings, required to start with `openid`)
server.rangelog.ttl duration 720h0m0s if nonzero, range log entries older than this duration are deleted every 10m0s. Should not be lowered below 24 hours.
-server.shutdown.connection_wait duration 0s the maximum amount of time a server waits for all SQL connections to be closed before proceeding with a drain. When all SQL connections are closed before times out, the server early exits and proceeds to draining range leases. (note that the --drain-wait parameter for cockroach node drain may need adjustment after changing this setting)
+server.shutdown.connection_wait duration 0s the maximum amount of time a server waits for all SQL connections to be closed before proceeding with a drain. (note that the --drain-wait parameter for cockroach node drain may need adjustment after changing this setting)
server.shutdown.drain_wait duration 0s the amount of time a server waits in an unready state before proceeding with a drain (note that the --drain-wait parameter for cockroach node drain may need adjustment after changing this setting. --drain-wait is to specify the duration of the whole draining process, while server.shutdown.drain_wait is to set the wait time for health probes to notice that the node is not ready.)
server.shutdown.lease_transfer_wait duration 5s the timeout for a single iteration of the range lease transfer phase of draining (note that the --drain-wait parameter for cockroach node drain may need adjustment after changing this setting)
server.shutdown.query_wait duration 10s the timeout for waiting for active queries to finish during a drain (note that the --drain-wait parameter for cockroach node drain may need adjustment after changing this setting)
@@ -83,7 +83,6 @@ server.web_session.purge.max_deletions_per_cycle integer 10 the maximum number o
server.web_session.purge.period duration 1h0m0s the time until old sessions are deleted
server.web_session.purge.ttl duration 1h0m0s if nonzero, entries in system.web_sessions older than this duration are periodically purged
server.web_session_timeout duration 168h0m0s the duration that a newly created web session will be valid
-sql.auth.resolve_membership_single_scan.enabled boolean true determines whether to populate the role membership cache with a single scan
sql.contention.event_store.capacity byte size 64 MiB the in-memory storage capacity per-node of contention event store
sql.contention.txn_id_cache.max_size byte size 0 B the maximum byte size TxnID cache will use (set to 0 to disable)
sql.cross_db_fks.enabled boolean false if true, creating foreign key references across databases is allowed
@@ -186,6 +185,5 @@ timeseries.storage.resolution_30m.ttl duration 2160h0m0s the maximum age of time
trace.debug.enable boolean false if set, traces for recent requests can be seen at https://<ui>/debug/requests
trace.jaeger.agent string the address of a Jaeger agent to receive traces using the Jaeger UDP Thrift protocol, as <host>:<port>. If no port is specified, 6381 will be used.
trace.opentelemetry.collector string address of an OpenTelemetry trace collector to receive traces using the otel gRPC protocol, as <host>:<port>. If no port is specified, 4317 will be used.
-trace.span_registry.enabled boolean true if set, ongoing traces can be seen at https://<ui>/debug/tracez
trace.zipkin.collector string the address of a Zipkin instance to receive traces, as <host>:<port>. If no port is specified, 9411 will be used.
-version version 21.2-88 set the active cluster version in the format '<major>.<minor>'
+version version 21.2-84 set the active cluster version in the format '<major>.<minor>'
diff --git a/docs/generated/settings/settings.html b/docs/generated/settings/settings.html
index 1a431a87aa..8ff0ff55ef 100644
--- a/docs/generated/settings/settings.html
+++ b/docs/generated/settings/settings.html
@@ -77,7 +77,7 @@
<tr><td><code>server.oidc_authentication.redirect_url</code></td><td>string</td><td><code>https://localhost:8080/oidc/v1/callback</code></td><td>sets OIDC redirect URL via a URL string or a JSON string containing a required `redirect_urls` key with an object that maps from region keys to URL strings (URLs should point to your load balancer and must route to the path /oidc/v1/callback) </td></tr>
<tr><td><code>server.oidc_authentication.scopes</code></td><td>string</td><td><code>openid</code></td><td>sets OIDC scopes to include with authentication request (space delimited list of strings, required to start with `openid`)</td></tr>
<tr><td><code>server.rangelog.ttl</code></td><td>duration</td><td><code>720h0m0s</code></td><td>if nonzero, range log entries older than this duration are deleted every 10m0s. Should not be lowered below 24 hours.</td></tr>
-<tr><td><code>server.shutdown.connection_wait</code></td><td>duration</td><td><code>0s</code></td><td>the maximum amount of time a server waits for all SQL connections to be closed before proceeding with a drain. When all SQL connections are closed before times out, the server early exits and proceeds to draining range leases. (note that the --drain-wait parameter for cockroach node drain may need adjustment after changing this setting)</td></tr>
+<tr><td><code>server.shutdown.connection_wait</code></td><td>duration</td><td><code>0s</code></td><td>the maximum amount of time a server waits for all SQL connections to be closed before proceeding with a drain. (note that the --drain-wait parameter for cockroach node drain may need adjustment after changing this setting)</td></tr>
<tr><td><code>server.shutdown.drain_wait</code></td><td>duration</td><td><code>0s</code></td><td>the amount of time a server waits in an unready state before proceeding with a drain (note that the --drain-wait parameter for cockroach node drain may need adjustment after changing this setting. --drain-wait is to specify the duration of the whole draining process, while server.shutdown.drain_wait is to set the wait time for health probes to notice that the node is not ready.)</td></tr>
<tr><td><code>server.shutdown.lease_transfer_wait</code></td><td>duration</td><td><code>5s</code></td><td>the timeout for a single iteration of the range lease transfer phase of draining (note that the --drain-wait parameter for cockroach node drain may need adjustment after changing this setting)</td></tr>
<tr><td><code>server.shutdown.query_wait</code></td><td>duration</td><td><code>10s</code></td><td>the timeout for waiting for active queries to finish during a drain (note that the --drain-wait parameter for cockroach node drain may need adjustment after changing this setting)</td></tr>
@@ -95,7 +95,6 @@
<tr><td><code>server.web_session.purge.period</code></td><td>duration</td><td><code>1h0m0s</code></td><td>the time until old sessions are deleted</td></tr>
<tr><td><code>server.web_session.purge.ttl</code></td><td>duration</td><td><code>1h0m0s</code></td><td>if nonzero, entries in system.web_sessions older than this duration are periodically purged</td></tr>
<tr><td><code>server.web_session_timeout</code></td><td>duration</td><td><code>168h0m0s</code></td><td>the duration that a newly created web session will be valid</td></tr>
-<tr><td><code>sql.auth.resolve_membership_single_scan.enabled</code></td><td>boolean</td><td><code>true</code></td><td>determines whether to populate the role membership cache with a single scan</td></tr>
<tr><td><code>sql.contention.event_store.capacity</code></td><td>byte size</td><td><code>64 MiB</code></td><td>the in-memory storage capacity per-node of contention event store</td></tr>
<tr><td><code>sql.contention.txn_id_cache.max_size</code></td><td>byte size</td><td><code>0 B</code></td><td>the maximum byte size TxnID cache will use (set to 0 to disable)</td></tr>
<tr><td><code>sql.cross_db_fks.enabled</code></td><td>boolean</td><td><code>false</code></td><td>if true, creating foreign key references across databases is allowed</td></tr>
@@ -199,8 +198,7 @@
<tr><td><code>trace.debug.enable</code></td><td>boolean</td><td><code>false</code></td><td>if set, traces for recent requests can be seen at https://<ui>/debug/requests</td></tr>
<tr><td><code>trace.jaeger.agent</code></td><td>string</td><td><code></code></td><td>the address of a Jaeger agent to receive traces using the Jaeger UDP Thrift protocol, as <host>:<port>. If no port is specified, 6381 will be used.</td></tr>
<tr><td><code>trace.opentelemetry.collector</code></td><td>string</td><td><code></code></td><td>address of an OpenTelemetry trace collector to receive traces using the otel gRPC protocol, as <host>:<port>. If no port is specified, 4317 will be used.</td></tr>
-<tr><td><code>trace.span_registry.enabled</code></td><td>boolean</td><td><code>true</code></td><td>if set, ongoing traces can be seen at https://<ui>/debug/tracez</td></tr>
<tr><td><code>trace.zipkin.collector</code></td><td>string</td><td><code></code></td><td>the address of a Zipkin instance to receive traces, as <host>:<port>. If no port is specified, 9411 will be used.</td></tr>
-<tr><td><code>version</code></td><td>version</td><td><code>21.2-88</code></td><td>set the active cluster version in the format '<major>.<minor>'</td></tr>
+<tr><td><code>version</code></td><td>version</td><td><code>21.2-84</code></td><td>set the active cluster version in the format '<major>.<minor>'</td></tr>
</tbody>
</table>
diff --git a/docs/generated/sql/bnf/BUILD.bazel b/docs/generated/sql/bnf/BUILD.bazel
index 00537a4b1f..706e8a799f 100644
--- a/docs/generated/sql/bnf/BUILD.bazel
+++ b/docs/generated/sql/bnf/BUILD.bazel
@@ -38,7 +38,6 @@ FILES = [
"alter_table_partition_by",
"alter_table_set_schema_stmt",
"alter_table_stmt",
- "alter_tenant_csetting_stmt",
"alter_type",
"alter_view",
"alter_view_owner_stmt",
@@ -169,7 +168,6 @@ FILES = [
"select_stmt",
"set_cluster_setting",
"set_csetting_stmt",
- "set_or_reset_csetting_stmt",
"set_exprs_internal",
"set_local_stmt",
"set_operation",
@@ -192,7 +190,6 @@ FILES = [
"show_indexes_stmt",
"show_jobs",
"show_keys",
- "show_local_or_tenant_csettings_stmt",
"show_locality",
"show_locality_stmt",
"show_partitions_stmt",
diff --git a/docs/generated/sql/bnf/alter_stmt.bnf b/docs/generated/sql/bnf/alter_stmt.bnf
index c41f5092df..50297fc693 100644
--- a/docs/generated/sql/bnf/alter_stmt.bnf
+++ b/docs/generated/sql/bnf/alter_stmt.bnf
@@ -1,4 +1,3 @@
alter_stmt ::=
alter_ddl_stmt
| alter_role_stmt
- | alter_tenant_csetting_stmt
diff --git a/docs/generated/sql/bnf/alter_tenant_csetting_stmt.bnf b/docs/generated/sql/bnf/alter_tenant_csetting_stmt.bnf
deleted file mode 100644
index efdb92f776..0000000000
--- a/docs/generated/sql/bnf/alter_tenant_csetting_stmt.bnf
+++ /dev/null
@@ -1,3 +0,0 @@
-alter_tenant_csetting_stmt ::=
- 'ALTER' 'TENANT' d_expr set_or_reset_csetting_stmt
- | 'ALTER' 'ALL' 'TENANTS' set_or_reset_csetting_stmt
diff --git a/docs/generated/sql/bnf/set_or_reset_csetting_stmt.bnf b/docs/generated/sql/bnf/set_or_reset_csetting_stmt.bnf
deleted file mode 100644
index 1155b92d4e..0000000000
--- a/docs/generated/sql/bnf/set_or_reset_csetting_stmt.bnf
+++ /dev/null
@@ -1,3 +0,0 @@
-set_or_reset_csetting_stmt ::=
- reset_csetting_stmt
- | set_csetting_stmt
diff --git a/docs/generated/sql/bnf/show_local_or_tenant_csettings_stmt.bnf b/docs/generated/sql/bnf/show_local_or_tenant_csettings_stmt.bnf
deleted file mode 100644
index 631a6d6375..0000000000
--- a/docs/generated/sql/bnf/show_local_or_tenant_csettings_stmt.bnf
+++ /dev/null
@@ -1,3 +0,0 @@
-show_local_or_tenant_csettings_stmt ::=
- show_csettings_stmt
- | show_csettings_stmt 'FOR' 'TENANT' d_expr
diff --git a/docs/generated/sql/bnf/show_var.bnf b/docs/generated/sql/bnf/show_var.bnf
index 62daa21435..ffb29fc3e8 100644
--- a/docs/generated/sql/bnf/show_var.bnf
+++ b/docs/generated/sql/bnf/show_var.bnf
@@ -4,7 +4,7 @@ show_stmt ::=
| show_constraints_stmt
| show_create_stmt
| show_create_schedules_stmt
- | show_local_or_tenant_csettings_stmt
+ | show_csettings_stmt
| show_databases_stmt
| show_enums_stmt
| show_types_stmt
diff --git a/docs/generated/sql/bnf/stmt_block.bnf b/docs/generated/sql/bnf/stmt_block.bnf
index 5e4af31f1b..2141dc8aa5 100644
--- a/docs/generated/sql/bnf/stmt_block.bnf
+++ b/docs/generated/sql/bnf/stmt_block.bnf
@@ -135,7 +135,6 @@ fetch_cursor_stmt ::=
alter_stmt ::=
alter_ddl_stmt
| alter_role_stmt
- | alter_tenant_csetting_stmt
backup_stmt ::=
'BACKUP' opt_backup_targets 'INTO' sconst_or_placeholder 'IN' string_or_placeholder_opt_list opt_as_of_clause opt_with_backup_options
@@ -230,7 +229,7 @@ show_stmt ::=
| show_constraints_stmt
| show_create_stmt
| show_create_schedules_stmt
- | show_local_or_tenant_csettings_stmt
+ | show_csettings_stmt
| show_databases_stmt
| show_enums_stmt
| show_types_stmt
@@ -333,7 +332,6 @@ targets ::=
| table_pattern ',' table_pattern_list
| 'TABLE' table_pattern_list
| 'TENANT' iconst64
- | 'TENANT' 'identifier'
| 'DATABASE' name_list
role_spec_list ::=
@@ -458,10 +456,6 @@ alter_role_stmt ::=
| 'ALTER' 'ROLE_ALL' 'ALL' opt_in_database set_or_reset_clause
| 'ALTER' 'USER_ALL' 'ALL' opt_in_database set_or_reset_clause
-alter_tenant_csetting_stmt ::=
- 'ALTER' 'TENANT' d_expr set_or_reset_csetting_stmt
- | 'ALTER' 'ALL' 'TENANTS' set_or_reset_csetting_stmt
-
opt_backup_targets ::=
targets
@@ -715,9 +709,12 @@ show_create_schedules_stmt ::=
'SHOW' 'CREATE' 'ALL' 'SCHEDULES'
| 'SHOW' 'CREATE' 'SCHEDULE' a_expr
-show_local_or_tenant_csettings_stmt ::=
- show_csettings_stmt
- | show_csettings_stmt 'FOR' 'TENANT' d_expr
+show_csettings_stmt ::=
+ 'SHOW' 'CLUSTER' 'SETTING' var_name
+ | 'SHOW' 'CLUSTER' 'SETTING' 'ALL'
+ | 'SHOW' 'ALL' 'CLUSTER' 'SETTINGS'
+ | 'SHOW' 'CLUSTER' 'SETTINGS'
+ | 'SHOW' 'PUBLIC' 'CLUSTER' 'SETTINGS'
show_databases_stmt ::=
'SHOW' 'DATABASES' with_comment
@@ -1245,7 +1242,6 @@ unreserved_keyword ::=
| 'TEMPLATE'
| 'TEMPORARY'
| 'TENANT'
- | 'TENANTS'
| 'TESTING_RELOCATE'
| 'TEXT'
| 'TIES'
@@ -1505,35 +1501,6 @@ set_or_reset_clause ::=
| 'RESET_ALL' 'ALL'
| 'RESET' session_var
-d_expr ::=
- 'ICONST'
- | 'FCONST'
- | 'SCONST'
- | 'BCONST'
- | 'BITCONST'
- | typed_literal
- | interval_value
- | 'TRUE'
- | 'FALSE'
- | 'NULL'
- | column_path_with_star
- | '@' iconst64
- | 'PLACEHOLDER'
- | '(' a_expr ')' '.' '*'
- | '(' a_expr ')' '.' unrestricted_name
- | '(' a_expr ')' '.' '@' 'ICONST'
- | '(' a_expr ')'
- | func_expr
- | select_with_parens
- | labeled_row
- | 'ARRAY' select_with_parens
- | 'ARRAY' row
- | 'ARRAY' array_expr
-
-set_or_reset_csetting_stmt ::=
- reset_csetting_stmt
- | set_csetting_stmt
-
as_of_clause ::=
'AS' 'OF' 'SYSTEM' 'TIME' a_expr
@@ -1761,13 +1728,6 @@ with_comment ::=
'WITH' 'COMMENT'
|
-show_csettings_stmt ::=
- 'SHOW' 'CLUSTER' 'SETTING' var_name
- | 'SHOW' 'CLUSTER' 'SETTING' 'ALL'
- | 'SHOW' 'ALL' 'CLUSTER' 'SETTINGS'
- | 'SHOW' 'CLUSTER' 'SETTINGS'
- | 'SHOW' 'PUBLIC' 'CLUSTER' 'SETTINGS'
-
opt_on_targets_roles ::=
'ON' targets_roles
|
@@ -2031,36 +1991,6 @@ alter_backup_cmds ::=
role_options ::=
( role_option ) ( ( role_option ) )*
-typed_literal ::=
- func_name_no_crdb_extra 'SCONST'
- | const_typename 'SCONST'
-
-interval_value ::=
- 'INTERVAL' 'SCONST' opt_interval_qualifier
- | 'INTERVAL' '(' iconst32 ')' 'SCONST'
-
-column_path_with_star ::=
- column_path
- | db_object_name_component '.' unrestricted_name '.' unrestricted_name '.' '*'
- | db_object_name_component '.' unrestricted_name '.' '*'
- | db_object_name_component '.' '*'
-
-func_expr ::=
- func_application within_group_clause filter_clause over_clause
- | func_expr_common_subexpr
-
-labeled_row ::=
- row
- | '(' row 'AS' name_list ')'
-
-row ::=
- 'ROW' '(' opt_expr_list ')'
- | expr_tuple_unambiguous
-
-array_expr ::=
- '[' opt_expr_list ']'
- | '[' array_expr_list ']'
-
backup_options ::=
'ENCRYPTION_PASSPHRASE' '=' string_or_placeholder
| 'REVISION_HISTORY'
@@ -2579,79 +2509,30 @@ role_option ::=
| password_clause
| valid_until_clause
-func_name_no_crdb_extra ::=
- type_function_name_no_crdb_extra
- | prefixed_column_path
-
-const_typename ::=
- numeric
- | bit_without_length
- | character_without_length
- | const_datetime
- | const_geo
-
-opt_interval_qualifier ::=
- interval_qualifier
- |
-
-iconst32 ::=
+d_expr ::=
'ICONST'
-
-func_application ::=
- func_name '(' ')'
- | func_name '(' expr_list opt_sort_clause ')'
- | func_name '(' 'ALL' expr_list opt_sort_clause ')'
- | func_name '(' 'DISTINCT' expr_list ')'
- | func_name '(' '*' ')'
-
-within_group_clause ::=
- 'WITHIN' 'GROUP' '(' single_sort_clause ')'
- |
-
-filter_clause ::=
- 'FILTER' '(' 'WHERE' a_expr ')'
- |
-
-over_clause ::=
- 'OVER' window_specification
- | 'OVER' window_name
- |
-
-func_expr_common_subexpr ::=
- 'COLLATION' 'FOR' '(' a_expr ')'
- | 'CURRENT_DATE'
- | 'CURRENT_SCHEMA'
- | 'CURRENT_CATALOG'
- | 'CURRENT_TIMESTAMP'
- | 'CURRENT_TIME'
- | 'LOCALTIMESTAMP'
- | 'LOCALTIME'
- | 'CURRENT_USER'
- | 'CURRENT_ROLE'
- | 'SESSION_USER'
- | 'USER'
- | 'CAST' '(' a_expr 'AS' cast_target ')'
- | 'ANNOTATE_TYPE' '(' a_expr ',' typename ')'
- | 'IF' '(' a_expr ',' a_expr ',' a_expr ')'
- | 'IFERROR' '(' a_expr ',' a_expr ',' a_expr ')'
- | 'IFERROR' '(' a_expr ',' a_expr ')'
- | 'ISERROR' '(' a_expr ')'
- | 'ISERROR' '(' a_expr ',' a_expr ')'
- | 'NULLIF' '(' a_expr ',' a_expr ')'
- | 'IFNULL' '(' a_expr ',' a_expr ')'
- | 'COALESCE' '(' expr_list ')'
- | special_function
-
-opt_expr_list ::=
- expr_list
- |
-
-expr_tuple_unambiguous ::=
- '(' ')'
- | '(' tuple1_unambiguous_values ')'
-
-array_expr_list ::=
- ( array_expr ) ( ( ',' array_expr ) )*
+ | 'FCONST'
+ | 'SCONST'
+ | 'BCONST'
+ | 'BITCONST'
+ | typed_literal
+ | interval_value
+ | 'TRUE'
+ | 'FALSE'
+ | 'NULL'
+ | column_path_with_star
+ | '@' iconst64
+ | 'PLACEHOLDER'
+ | '(' a_expr ')' '.' '*'
+ | '(' a_expr ')' '.' unrestricted_name
+ | '(' a_expr ')' '.' '@' 'ICONST'
+ | '(' a_expr ')'
+ | func_expr
+ | select_with_parens
+ | labeled_row
+ | 'ARRAY' select_with_parens
+ | 'ARRAY' row
+ | 'ARRAY' array_expr
array_subscripts ::=
( array_subscript ) ( ( array_subscript ) )*
@@ -2873,10 +2754,20 @@ type_func_name_no_crdb_extra_keyword ::=
general_type_name ::=
type_function_name_no_crdb_extra
+iconst32 ::=
+ 'ICONST'
+
complex_type_name ::=
general_type_name '.' unrestricted_name
| general_type_name '.' unrestricted_name '.' unrestricted_name
+const_typename ::=
+ numeric
+ | bit_without_length
+ | character_without_length
+ | const_datetime
+ | const_geo
+
bit_with_length ::=
'BIT' opt_varying '(' iconst32 ')'
| 'VARBIT' '(' iconst32 ')'
@@ -2954,109 +2845,35 @@ valid_until_clause ::=
'VALID' 'UNTIL' string_or_placeholder
| 'VALID' 'UNTIL' 'NULL'
-type_function_name_no_crdb_extra ::=
- 'identifier'
- | unreserved_keyword
- | type_func_name_no_crdb_extra_keyword
+typed_literal ::=
+ func_name_no_crdb_extra 'SCONST'
+ | const_typename 'SCONST'
-numeric ::=
- 'INT'
- | 'INTEGER'
- | 'SMALLINT'
- | 'BIGINT'
- | 'REAL'
- | 'FLOAT' opt_float
- | 'DOUBLE' 'PRECISION'
- | 'DECIMAL' opt_numeric_modifiers
- | 'DEC' opt_numeric_modifiers
- | 'NUMERIC' opt_numeric_modifiers
- | 'BOOLEAN'
+interval_value ::=
+ 'INTERVAL' 'SCONST' opt_interval_qualifier
+ | 'INTERVAL' '(' iconst32 ')' 'SCONST'
-bit_without_length ::=
- 'BIT'
- | 'BIT' 'VARYING'
- | 'VARBIT'
+column_path_with_star ::=
+ column_path
+ | db_object_name_component '.' unrestricted_name '.' unrestricted_name '.' '*'
+ | db_object_name_component '.' unrestricted_name '.' '*'
+ | db_object_name_component '.' '*'
-character_without_length ::=
- character_base
+func_expr ::=
+ func_application within_group_clause filter_clause over_clause
+ | func_expr_common_subexpr
-const_datetime ::=
- 'DATE'
- | 'TIME' opt_timezone
- | 'TIME' '(' iconst32 ')' opt_timezone
- | 'TIMETZ'
- | 'TIMETZ' '(' iconst32 ')'
- | 'TIMESTAMP' opt_timezone
- | 'TIMESTAMP' '(' iconst32 ')' opt_timezone
- | 'TIMESTAMPTZ'
- | 'TIMESTAMPTZ' '(' iconst32 ')'
+labeled_row ::=
+ row
+ | '(' row 'AS' name_list ')'
-const_geo ::=
- 'GEOGRAPHY'
- | 'GEOMETRY'
- | 'BOX2D'
- | 'GEOMETRY' '(' geo_shape_type ')'
- | 'GEOGRAPHY' '(' geo_shape_type ')'
- | 'GEOMETRY' '(' geo_shape_type ',' signed_iconst ')'
- | 'GEOGRAPHY' '(' geo_shape_type ',' signed_iconst ')'
+row ::=
+ 'ROW' '(' opt_expr_list ')'
+ | expr_tuple_unambiguous
-interval_qualifier ::=
- 'YEAR'
- | 'MONTH'
- | 'DAY'
- | 'HOUR'
- | 'MINUTE'
- | interval_second
- | 'YEAR' 'TO' 'MONTH'
- | 'DAY' 'TO' 'HOUR'
- | 'DAY' 'TO' 'MINUTE'
- | 'DAY' 'TO' interval_second
- | 'HOUR' 'TO' 'MINUTE'
- | 'HOUR' 'TO' interval_second
- | 'MINUTE' 'TO' interval_second
-
-func_name ::=
- type_function_name
- | prefixed_column_path
-
-single_sort_clause ::=
- 'ORDER' 'BY' sortby
- | 'ORDER' 'BY' sortby ',' sortby_list
-
-window_specification ::=
- '(' opt_existing_window_name opt_partition_clause opt_sort_clause opt_frame_clause ')'
-
-window_name ::=
- name
-
-special_function ::=
- 'CURRENT_DATE' '(' ')'
- | 'CURRENT_SCHEMA' '(' ')'
- | 'CURRENT_TIMESTAMP' '(' ')'
- | 'CURRENT_TIMESTAMP' '(' a_expr ')'
- | 'CURRENT_TIME' '(' ')'
- | 'CURRENT_TIME' '(' a_expr ')'
- | 'LOCALTIMESTAMP' '(' ')'
- | 'LOCALTIMESTAMP' '(' a_expr ')'
- | 'LOCALTIME' '(' ')'
- | 'LOCALTIME' '(' a_expr ')'
- | 'CURRENT_USER' '(' ')'
- | 'SESSION_USER' '(' ')'
- | 'EXTRACT' '(' extract_list ')'
- | 'EXTRACT_DURATION' '(' extract_list ')'
- | 'OVERLAY' '(' overlay_list ')'
- | 'POSITION' '(' position_list ')'
- | 'SUBSTRING' '(' substr_list ')'
- | 'TRIM' '(' 'BOTH' trim_list ')'
- | 'TRIM' '(' 'LEADING' trim_list ')'
- | 'TRIM' '(' 'TRAILING' trim_list ')'
- | 'TRIM' '(' trim_list ')'
- | 'GREATEST' '(' expr_list ')'
- | 'LEAST' '(' expr_list ')'
-
-tuple1_unambiguous_values ::=
- a_expr ','
- | a_expr ',' expr_list
+array_expr ::=
+ '[' opt_expr_list ']'
+ | '[' array_expr_list ']'
array_subscript ::=
'[' a_expr ']'
@@ -3170,6 +2987,52 @@ join_qual ::=
rowsfrom_list ::=
( rowsfrom_item ) ( ( ',' rowsfrom_item ) )*
+type_function_name_no_crdb_extra ::=
+ 'identifier'
+ | unreserved_keyword
+ | type_func_name_no_crdb_extra_keyword
+
+numeric ::=
+ 'INT'
+ | 'INTEGER'
+ | 'SMALLINT'
+ | 'BIGINT'
+ | 'REAL'
+ | 'FLOAT' opt_float
+ | 'DOUBLE' 'PRECISION'
+ | 'DECIMAL' opt_numeric_modifiers
+ | 'DEC' opt_numeric_modifiers
+ | 'NUMERIC' opt_numeric_modifiers
+ | 'BOOLEAN'
+
+bit_without_length ::=
+ 'BIT'
+ | 'BIT' 'VARYING'
+ | 'VARBIT'
+
+character_without_length ::=
+ character_base
+
+const_datetime ::=
+ 'DATE'
+ | 'TIME' opt_timezone
+ | 'TIME' '(' iconst32 ')' opt_timezone
+ | 'TIMETZ'
+ | 'TIMETZ' '(' iconst32 ')'
+ | 'TIMESTAMP' opt_timezone
+ | 'TIMESTAMP' '(' iconst32 ')' opt_timezone
+ | 'TIMESTAMPTZ'
+ | 'TIMESTAMPTZ' '(' iconst32 ')'
+
+const_geo ::=
+ 'GEOGRAPHY'
+ | 'GEOMETRY'
+ | 'BOX2D'
+ | 'GEOMETRY' '(' geo_shape_type ')'
+ | 'GEOGRAPHY' '(' geo_shape_type ')'
+ | 'GEOMETRY' '(' geo_shape_type ',' signed_iconst ')'
+ | 'GEOGRAPHY' '(' geo_shape_type ',' signed_iconst ')'
+
opt_varying ::=
'VARYING'
|
@@ -3180,6 +3043,21 @@ character_base ::=
| 'VARCHAR'
| 'STRING'
+interval_qualifier ::=
+ 'YEAR'
+ | 'MONTH'
+ | 'DAY'
+ | 'HOUR'
+ | 'MINUTE'
+ | interval_second
+ | 'YEAR' 'TO' 'MONTH'
+ | 'DAY' 'TO' 'HOUR'
+ | 'DAY' 'TO' 'MINUTE'
+ | 'DAY' 'TO' interval_second
+ | 'HOUR' 'TO' 'MINUTE'
+ | 'HOUR' 'TO' interval_second
+ | 'MINUTE' 'TO' interval_second
+
opt_column ::=
'COLUMN'
|
@@ -3235,101 +3113,69 @@ storage_parameter_key_list ::=
partition_by_index ::=
partition_by
-opt_float ::=
- '(' 'ICONST' ')'
- |
-
-opt_numeric_modifiers ::=
- '(' iconst32 ')'
- | '(' iconst32 ',' iconst32 ')'
- |
+func_name_no_crdb_extra ::=
+ type_function_name_no_crdb_extra
+ | prefixed_column_path
-opt_timezone ::=
- 'WITH' 'TIME' 'ZONE'
- | 'WITHOUT' 'TIME' 'ZONE'
+opt_interval_qualifier ::=
+ interval_qualifier
|
-geo_shape_type ::=
- 'POINT'
- | 'POINTM'
- | 'POINTZ'
- | 'POINTZM'
- | 'LINESTRING'
- | 'LINESTRINGM'
- | 'LINESTRINGZ'
- | 'LINESTRINGZM'
- | 'POLYGON'
- | 'POLYGONM'
- | 'POLYGONZ'
- | 'POLYGONZM'
- | 'MULTIPOINT'
- | 'MULTIPOINTM'
- | 'MULTIPOINTZ'
- | 'MULTIPOINTZM'
- | 'MULTILINESTRING'
- | 'MULTILINESTRINGM'
- | 'MULTILINESTRINGZ'
- | 'MULTILINESTRINGZM'
- | 'MULTIPOLYGON'
- | 'MULTIPOLYGONM'
- | 'MULTIPOLYGONZ'
- | 'MULTIPOLYGONZM'
- | 'GEOMETRYCOLLECTION'
- | 'GEOMETRYCOLLECTIONM'
- | 'GEOMETRYCOLLECTIONZ'
- | 'GEOMETRYCOLLECTIONZM'
- | 'GEOMETRY'
- | 'GEOMETRYM'
- | 'GEOMETRYZ'
- | 'GEOMETRYZM'
-
-interval_second ::=
- 'SECOND'
- | 'SECOND' '(' iconst32 ')'
-
-type_function_name ::=
- 'identifier'
- | unreserved_keyword
- | type_func_name_keyword
+func_application ::=
+ func_name '(' ')'
+ | func_name '(' expr_list opt_sort_clause ')'
+ | func_name '(' 'ALL' expr_list opt_sort_clause ')'
+ | func_name '(' 'DISTINCT' expr_list ')'
+ | func_name '(' '*' ')'
-opt_existing_window_name ::=
- name
+within_group_clause ::=
+ 'WITHIN' 'GROUP' '(' single_sort_clause ')'
|
-opt_partition_clause ::=
- 'PARTITION' 'BY' expr_list
+filter_clause ::=
+ 'FILTER' '(' 'WHERE' a_expr ')'
|
-opt_frame_clause ::=
- 'RANGE' frame_extent opt_frame_exclusion
- | 'ROWS' frame_extent opt_frame_exclusion
- | 'GROUPS' frame_extent opt_frame_exclusion
+over_clause ::=
+ 'OVER' window_specification
+ | 'OVER' window_name
|
-extract_list ::=
- extract_arg 'FROM' a_expr
- | expr_list
-
-overlay_list ::=
- a_expr overlay_placing substr_from substr_for
- | a_expr overlay_placing substr_from
- | expr_list
+func_expr_common_subexpr ::=
+ 'COLLATION' 'FOR' '(' a_expr ')'
+ | 'CURRENT_DATE'
+ | 'CURRENT_SCHEMA'
+ | 'CURRENT_CATALOG'
+ | 'CURRENT_TIMESTAMP'
+ | 'CURRENT_TIME'
+ | 'LOCALTIMESTAMP'
+ | 'LOCALTIME'
+ | 'CURRENT_USER'
+ | 'CURRENT_ROLE'
+ | 'SESSION_USER'
+ | 'USER'
+ | 'CAST' '(' a_expr 'AS' cast_target ')'
+ | 'ANNOTATE_TYPE' '(' a_expr ',' typename ')'
+ | 'IF' '(' a_expr ',' a_expr ',' a_expr ')'
+ | 'IFERROR' '(' a_expr ',' a_expr ',' a_expr ')'
+ | 'IFERROR' '(' a_expr ',' a_expr ')'
+ | 'ISERROR' '(' a_expr ')'
+ | 'ISERROR' '(' a_expr ',' a_expr ')'
+ | 'NULLIF' '(' a_expr ',' a_expr ')'
+ | 'IFNULL' '(' a_expr ',' a_expr ')'
+ | 'COALESCE' '(' expr_list ')'
+ | special_function
-position_list ::=
- b_expr 'IN' b_expr
+opt_expr_list ::=
+ expr_list
|
-substr_list ::=
- a_expr substr_from substr_for
- | a_expr substr_for substr_from
- | a_expr substr_from
- | a_expr substr_for
- | opt_expr_list
+expr_tuple_unambiguous ::=
+ '(' ')'
+ | '(' tuple1_unambiguous_values ')'
-trim_list ::=
- a_expr 'FROM' expr_list
- | 'FROM' expr_list
- | expr_list
+array_expr_list ::=
+ ( array_expr ) ( ( ',' array_expr ) )*
opt_slice_bound ::=
a_expr
@@ -3383,10 +3229,62 @@ join_outer ::=
rowsfrom_item ::=
func_expr_windowless
+opt_float ::=
+ '(' 'ICONST' ')'
+ |
+
+opt_numeric_modifiers ::=
+ '(' iconst32 ')'
+ | '(' iconst32 ',' iconst32 ')'
+ |
+
+opt_timezone ::=
+ 'WITH' 'TIME' 'ZONE'
+ | 'WITHOUT' 'TIME' 'ZONE'
+ |
+
+geo_shape_type ::=
+ 'POINT'
+ | 'POINTM'
+ | 'POINTZ'
+ | 'POINTZM'
+ | 'LINESTRING'
+ | 'LINESTRINGM'
+ | 'LINESTRINGZ'
+ | 'LINESTRINGZM'
+ | 'POLYGON'
+ | 'POLYGONM'
+ | 'POLYGONZ'
+ | 'POLYGONZM'
+ | 'MULTIPOINT'
+ | 'MULTIPOINTM'
+ | 'MULTIPOINTZ'
+ | 'MULTIPOINTZM'
+ | 'MULTILINESTRING'
+ | 'MULTILINESTRINGM'
+ | 'MULTILINESTRINGZ'
+ | 'MULTILINESTRINGZM'
+ | 'MULTIPOLYGON'
+ | 'MULTIPOLYGONM'
+ | 'MULTIPOLYGONZ'
+ | 'MULTIPOLYGONZM'
+ | 'GEOMETRYCOLLECTION'
+ | 'GEOMETRYCOLLECTIONM'
+ | 'GEOMETRYCOLLECTIONZ'
+ | 'GEOMETRYCOLLECTIONZM'
+ | 'GEOMETRY'
+ | 'GEOMETRYM'
+ | 'GEOMETRYZ'
+ | 'GEOMETRYZM'
+
char_aliases ::=
'CHAR'
| 'CHARACTER'
+interval_second ::=
+ 'SECOND'
+ | 'SECOND' '(' iconst32 ')'
+
col_qual_list ::=
( ) ( ( col_qualification ) )*
@@ -3402,35 +3300,48 @@ reference_actions ::=
| reference_on_delete reference_on_update
|
-frame_extent ::=
- frame_bound
- | 'BETWEEN' frame_bound 'AND' frame_bound
+func_name ::=
+ type_function_name
+ | prefixed_column_path
-opt_frame_exclusion ::=
- 'EXCLUDE' 'CURRENT' 'ROW'
- | 'EXCLUDE' 'GROUP'
- | 'EXCLUDE' 'TIES'
- | 'EXCLUDE' 'NO' 'OTHERS'
- |
+single_sort_clause ::=
+ 'ORDER' 'BY' sortby
+ | 'ORDER' 'BY' sortby ',' sortby_list
-extract_arg ::=
- 'identifier'
- | 'YEAR'
- | 'MONTH'
- | 'DAY'
- | 'HOUR'
- | 'MINUTE'
- | 'SECOND'
- | 'SCONST'
+window_specification ::=
+ '(' opt_existing_window_name opt_partition_clause opt_sort_clause opt_frame_clause ')'
-overlay_placing ::=
- 'PLACING' a_expr
+window_name ::=
+ name
-substr_from ::=
- 'FROM' a_expr
+special_function ::=
+ 'CURRENT_DATE' '(' ')'
+ | 'CURRENT_SCHEMA' '(' ')'
+ | 'CURRENT_TIMESTAMP' '(' ')'
+ | 'CURRENT_TIMESTAMP' '(' a_expr ')'
+ | 'CURRENT_TIME' '(' ')'
+ | 'CURRENT_TIME' '(' a_expr ')'
+ | 'LOCALTIMESTAMP' '(' ')'
+ | 'LOCALTIMESTAMP' '(' a_expr ')'
+ | 'LOCALTIME' '(' ')'
+ | 'LOCALTIME' '(' a_expr ')'
+ | 'CURRENT_USER' '(' ')'
+ | 'SESSION_USER' '(' ')'
+ | 'EXTRACT' '(' extract_list ')'
+ | 'EXTRACT_DURATION' '(' extract_list ')'
+ | 'OVERLAY' '(' overlay_list ')'
+ | 'POSITION' '(' position_list ')'
+ | 'SUBSTRING' '(' substr_list ')'
+ | 'TRIM' '(' 'BOTH' trim_list ')'
+ | 'TRIM' '(' 'LEADING' trim_list ')'
+ | 'TRIM' '(' 'TRAILING' trim_list ')'
+ | 'TRIM' '(' trim_list ')'
+ | 'GREATEST' '(' expr_list ')'
+ | 'LEAST' '(' expr_list ')'
-substr_for ::=
- 'FOR' a_expr
+tuple1_unambiguous_values ::=
+ a_expr ','
+ | a_expr ',' expr_list
list_partition ::=
partition 'VALUES' 'IN' '(' expr_list ')' opt_partition_by
@@ -3469,12 +3380,49 @@ reference_on_update ::=
reference_on_delete ::=
'ON' 'DELETE' reference_action
-frame_bound ::=
- 'UNBOUNDED' 'PRECEDING'
- | 'UNBOUNDED' 'FOLLOWING'
- | 'CURRENT' 'ROW'
- | a_expr 'PRECEDING'
- | a_expr 'FOLLOWING'
+type_function_name ::=
+ 'identifier'
+ | unreserved_keyword
+ | type_func_name_keyword
+
+opt_existing_window_name ::=
+ name
+ |
+
+opt_partition_clause ::=
+ 'PARTITION' 'BY' expr_list
+ |
+
+opt_frame_clause ::=
+ 'RANGE' frame_extent opt_frame_exclusion
+ | 'ROWS' frame_extent opt_frame_exclusion
+ | 'GROUPS' frame_extent opt_frame_exclusion
+ |
+
+extract_list ::=
+ extract_arg 'FROM' a_expr
+ | expr_list
+
+overlay_list ::=
+ a_expr overlay_placing substr_from substr_for
+ | a_expr overlay_placing substr_from
+ | expr_list
+
+position_list ::=
+ b_expr 'IN' b_expr
+ |
+
+substr_list ::=
+ a_expr substr_from substr_for
+ | a_expr substr_for substr_from
+ | a_expr substr_from
+ | a_expr substr_for
+ | opt_expr_list
+
+trim_list ::=
+ a_expr 'FROM' expr_list
+ | 'FROM' expr_list
+ | expr_list
opt_partition_by ::=
partition_by
@@ -3508,6 +3456,36 @@ reference_action ::=
| 'SET' 'NULL'
| 'SET' 'DEFAULT'
+frame_extent ::=
+ frame_bound
+ | 'BETWEEN' frame_bound 'AND' frame_bound
+
+opt_frame_exclusion ::=
+ 'EXCLUDE' 'CURRENT' 'ROW'
+ | 'EXCLUDE' 'GROUP'
+ | 'EXCLUDE' 'TIES'
+ | 'EXCLUDE' 'NO' 'OTHERS'
+ |
+
+extract_arg ::=
+ 'identifier'
+ | 'YEAR'
+ | 'MONTH'
+ | 'DAY'
+ | 'HOUR'
+ | 'MINUTE'
+ | 'SECOND'
+ | 'SCONST'
+
+overlay_placing ::=
+ 'PLACING' a_expr
+
+substr_from ::=
+ 'FROM' a_expr
+
+substr_for ::=
+ 'FOR' a_expr
+
opt_name_parens ::=
'(' name ')'
|
@@ -3521,3 +3499,10 @@ generated_always_as ::=
generated_by_default_as ::=
'GENERATED_BY_DEFAULT' 'BY' 'DEFAULT' 'AS'
+
+frame_bound ::=
+ 'UNBOUNDED' 'PRECEDING'
+ | 'UNBOUNDED' 'FOLLOWING'
+ | 'CURRENT' 'ROW'
+ | a_expr 'PRECEDING'
+ | a_expr 'FOLLOWING'
diff --git a/docs/generated/sql/functions.md b/docs/generated/sql/functions.md
index d3c38de22b..2038a9972f 100644
--- a/docs/generated/sql/functions.md
+++ b/docs/generated/sql/functions.md
@@ -2910,8 +2910,6 @@ a CockroachDB HLC in decimal form.</p>
<p>Note that uses of this function disable server-side optimizations and
may increase either contention or retry errors, or both.</p>
</span></td></tr>
-<tr><td><a name="crdb_internal.active_version"></a><code>crdb_internal.active_version() &rarr; jsonb</code></td><td><span class="funcdesc"><p>Returns the current active cluster version.</p>
-</span></td></tr>
<tr><td><a name="crdb_internal.approximate_timestamp"></a><code>crdb_internal.approximate_timestamp(timestamp: <a href="decimal.html">decimal</a>) &rarr; <a href="timestamp.html">timestamp</a></code></td><td><span class="funcdesc"><p>Converts the crdb_internal_mvcc_timestamp column into an approximate timestamp.</p>
</span></td></tr>
<tr><td><a name="crdb_internal.assignment_cast"></a><code>crdb_internal.assignment_cast(val: anyelement, type: anyelement) &rarr; anyelement</code></td><td><span class="funcdesc"><p>This function is used internally to perform assignment casts during mutations.</p>
diff --git a/docs/generated/swagger/spec.json b/docs/generated/swagger/spec.json
index 6e5daf8aa9..5047ea7fe1 100644
--- a/docs/generated/swagger/spec.json
+++ b/docs/generated/swagger/spec.json
@@ -1023,12 +1023,6 @@
},
"x-go-package": "github.com/cockroachdb/cockroach/pkg/util/metric"
},
- "RangeID": {
- "type": "integer",
- "format": "int64",
- "title": "A RangeID is a unique ID associated to a Raft consensus group.",
- "x-go-package": "github.com/cockroachdb/cockroach/pkg/roachpb"
- },
"RangeProblems": {
"type": "object",
"title": "RangeProblems describes issues reported by a range. For internal use only.",
@@ -1580,12 +1574,12 @@
"format": "int32",
"x-go-name": "Internal"
},
- "major": {
+ "major_val": {
"type": "integer",
"format": "int32",
"x-go-name": "Major"
},
- "minor": {
+ "minor_val": {
"type": "integer",
"format": "int32",
"x-go-name": "Minor"
@@ -1794,54 +1788,6 @@
},
"x-go-package": "github.com/cockroachdb/cockroach/pkg/server"
},
- "hotRangeInfo": {
- "description": "(ie its range ID, QPS, table name, etc.).",
- "type": "object",
- "title": "Hot range details struct describes common information about hot range,",
- "properties": {
- "database_name": {
- "type": "string",
- "x-go-name": "DatabaseName"
- },
- "index_name": {
- "type": "string",
- "x-go-name": "IndexName"
- },
- "leaseholder_node_id": {
- "$ref": "#/definitions/NodeID"
- },
- "node_id": {
- "$ref": "#/definitions/NodeID"
- },
- "qps": {
- "type": "number",
- "format": "double",
- "x-go-name": "QPS"
- },
- "range_id": {
- "$ref": "#/definitions/RangeID"
- },
- "replica_node_ids": {
- "type": "array",
- "items": {
- "$ref": "#/definitions/NodeID"
- },
- "x-go-name": "ReplicaNodeIDs"
- },
- "schema_name": {
- "type": "string",
- "x-go-name": "SchemaName"
- },
- "store_id": {
- "$ref": "#/definitions/StoreID"
- },
- "table_name": {
- "type": "string",
- "x-go-name": "TableName"
- }
- },
- "x-go-package": "github.com/cockroachdb/cockroach/pkg/server"
- },
"hotRangesResponse": {
"type": "object",
"title": "Response struct for listHotRanges.",
@@ -1851,12 +1797,15 @@
"type": "string",
"x-go-name": "Next"
},
- "ranges": {
- "type": "array",
- "items": {
- "$ref": "#/definitions/hotRangeInfo"
+ "ranges_by_node_id": {
+ "type": "object",
+ "additionalProperties": {
+ "type": "array",
+ "items": {
+ "$ref": "#/definitions/rangeDescriptorInfo"
+ }
},
- "x-go-name": "Ranges"
+ "x-go-name": "RangesByNodeID"
},
"response_error": {
"type": "array",
diff --git a/docs/tech-notes/change-replicas.md b/docs/tech-notes/change-replicas.md
deleted file mode 100644
index 5de72a42db..0000000000
--- a/docs/tech-notes/change-replicas.md
+++ /dev/null
@@ -1,93 +0,0 @@
-# Up-replicating replicas with snapshots
-
-This tech note briefly explains the end-to-end process of up-replicating a range, starting from the `replicateQueue` to
-sending Learner snapshots. This is meant to serve as a high level overview of the code paths taken, and more detailed
-descriptions can be found in tech notes specific to each area.
-
-## Introduction
-
-Raft snapshots are necessary when a follower replica is unable to catch up using the existing Raft logs of the leader.
-Such a scenario occurs when an existing replica falls too far behind the rest of the Raft group such that we've
-truncated the Raft log above it. On the other hand, Learner snapshots are necessary when we add a new replica (due to a
-rebalancing operation) and the new replica needs a snapshot to up-replicate. We should make a small distinction
-that `Learner snapshots` are sent during rebalancing operations by the `replicateQueue` and `Raft snapshots`
-are sent by the `raftSnapshotQueue` when a replica falls behind. However, there is no difference in the way the
-snapshots are sent, generated or applied. For example, in the case where the replication factor is increased, the Raft
-group would need to create a new replica from a clean slate. To up-replicate this replica, we would send it a Learner
-snapshot of the full range data.
-
-In this note, we will focus on the scenario where a Learner snapshot is needed for the up-replication and rebalancing of
-replicas.
-
-## ReplicateQueue
-
-As a brief overview, the `replicateQueue` manages a queue of replicas and is responsible for processing replicas for
-rebalancing. For each replica that holds the lease, it invokes the allocator to compute decisions on whether any placement changes are needed
-and the `replicateQueue` executes these changes. Then the `replicateQueue` calls `changeReplicas` to act on these change
-requests and repeats until all changes are complete.
-
-## ChangeReplicas API
-
-The `AdminChangeReplicas` API exposed by KV is mainly responsible for atomically changing replicas of a range. These
-changes include non-voter promotions, voter/non-voter swaps, additions and removals. The change is performed in a
-distributed transaction and takes effect when the transaction is committed. There are many details involved in these
-changes, but for the purposes of this note, we will focus on up-replication and the process of sending Learner
-snapshots.
-
-During up-replication, `ChangeReplicas` runs a transaction to add a new learner replica to the range. Learner
-replicas are new replicas that receive Raft traffic but do not participate in quorum; allowing the range to remain
-highly available during the replication process.
-
-Once the learners have been added to the range, they are synchronously sent a Learner snapshot from the leaseholder
-(which is typically, but not necessarily, the Raft leader) to up-replicate. There is some nuance here as Raft snapshots
-are typically automatically sent by the existing Raft snapshot queue. However, we are synchronously sending a snapshot
-here in the replicateQueue to quickly catch up learners. To prevent a race with the raftSnapshotQueue, we lock snapshots
-to learners and non-voters on the current leaseholder store while processing the change. In addition, we also place a
-lock on log truncation to ensure we don't truncate the Raft
-log while a snapshot is inflight, preventing wasted snapshots. However, both of these locks are the best effort as
-opposed to guaranteed as the lease can be transferred and the new leaseholder could still truncate the log.
-
-## Sending Snapshots
-
-The snapshot process itself is broken into three parts: generating, transmitting and applying the snapshot. This process
-is the same whether it is invoked by the `replicateQueue` or `raftSnapshotQueue`.
-
-### Generating the snapshot
-
-A snapshot is a bulk transfer of all replicated data in a range and everything the replica needs to be a member of a
-Raft group. It consists of a consistent view of the state of some replica of a range as of an applied index. The
-`GetSnapshot` method gets a storage engine snapshot which reflects the replicated state as of the applied index the
-snapshot is generated at, and creates an iterator from the storage snapshot. A storage engine snapshot is important to
-ensure that multiple iterators created at different points in time see a consistent replicated state that does not
-change while the snapshot is streamed. In our current code, the engine snapshot is not necessary for correctness as only
-one iterator is constructed.
-
-### Transmitting the snapshot
-
-The snapshot transfer is sent through a bi-directional stream of snapshot requests and snapshot responses. However,
-before the actual range data is sent, the streaming rpc first sends a header message to the recipient store and blocks
-until the store responds to accept or reject the snapshot data.
-
-The recipient checks the following conditions before accepting the snapshot. We currently allow one concurrent snapshot
-application on a receiver store at a time. Consequently, if there are snapshot applications already in-flight on the
-receiver, incoming snapshots are throttled or rejected if they wait too long. The receiver then checks whether its store
-has a compatible replica present to have the snapshot applied. In the case of up-replication, we expect the learner
-replica to be present on the receiver store. However, if the snapshot overlaps an existing replica or replica
-placeholder on the receiver, the snapshot will be rejected as well. Once these conditions have been verified, a response
-message will be sent back to the sender.
-
-Once the recipient has accepted, the sender proceeds to use the iterator to read chunks of
-size `kv.snapshot_sender.batch_size` from storage into memory. The batch size is enforced as to not hold on to a
-significant chunk of data in memory. Then, in-memory batches are created to send the snapshot data in streaming grpc
-message chunks to the recipient. Note that snapshots are rate limited depending on cluster settings as to not overload
-the network. Finally, once all the data is sent, the sender sends a final message to the receiver and waits for a
-response from the recipient indicating whether the snapshot was a success. On the receiver side, the key-value pairs are
-used to construct multiple SSTs for direct ingestion, which prevents the receiver from holding the entire snapshot in
-memory.
-
-### Applying the snapshot
-
-Once the snapshot is received on the receiver, defensive checks are applied to ensure the correct snapshot is received.
-It ensures there is an initialized learner replica or creates a placeholder to accept the snapshot. Finally, the Raft
-snapshot message is handed to the replica’s Raft node, which will apply the snapshot. The placeholder or learner is then
-converted to a fully initialized replica.
diff --git a/docs/tech-notes/contexts.md b/docs/tech-notes/contexts.md
index d9f8a1cc73..3eb14c671e 100644
--- a/docs/tech-notes/contexts.md
+++ b/docs/tech-notes/contexts.md
@@ -278,19 +278,8 @@ Of note:
- a cancellable sub-context of a cancellable context means there is a
goroutine spawn under you for propagating the parents cancellation,
so it can be expensive.
-- GRPC propagates cancel and deadline contexts across the wire. Deadline
- contexts are just copied and recreated on other side. The way cancelation
- works (for instance when a SQL statement timeout expires) is that after
- sending a message GRPC does a
- [select](https://github.com/cockroachdb/vendored/blob/ed53f102e86b51c228b61d3ad54f1428982fe75e/google.golang.org/grpc/internal/transport/transport.go#L178)
- on the response and also listens on the Done channel for the context passed it
- by the client. This in turn will close the GRPC stream which will send an
- [RST](https://datatracker.ietf.org/doc/html/rfc7540#section-6.4)
- message to the server which will cause the server side to be
- [canceled](https://github.com/cockroachdb/vendored/blob/ed53f102e86b51c228b61d3ad54f1428982fe75e/google.golang.org/grpc/internal/transport/http2_server.go#L1231).
- See also TestGRPCDeadlinePropagation and
- TestTenantStatementTimeoutAdmissionQueueCancelation for tests that rely on this
- behavior.
+- it's stil la bit unclear how context cancellation crosses gRPC and
+ libpq boundaries. People are working on this.
## Technical notes
diff --git a/go.mod b/go.mod
index 8164331538..fadc980c32 100644
--- a/go.mod
+++ b/go.mod
@@ -46,7 +46,7 @@ require (
github.com/cockroachdb/go-test-teamcity v0.0.0-20191211140407-cff980ad0a55
github.com/cockroachdb/gostdlib v1.13.0
github.com/cockroachdb/logtags v0.0.0-20211118104740-dabe8e521a4f
- github.com/cockroachdb/pebble v0.0.0-20220307192532-e2b7bb844759
+ github.com/cockroachdb/pebble v0.0.0-20220227235451-40d39da505a5
github.com/cockroachdb/redact v1.1.3
github.com/cockroachdb/returncheck v0.0.0-20200612231554-92cdbca611dd
github.com/cockroachdb/stress v0.0.0-20220217190341-94cf65c2a29f
@@ -64,7 +64,7 @@ require (
github.com/elastic/gosigar v0.14.1
github.com/emicklei/dot v0.15.0
github.com/facebookgo/clock v0.0.0-20150410010913-600d898af40a
- github.com/fraugster/parquet-go v0.10.0
+ github.com/fraugster/parquet-go v0.6.1
github.com/fsnotify/fsnotify v1.5.1
github.com/getsentry/sentry-go v0.12.0
github.com/ghemawat/stream v0.0.0-20171120220530-696b145b53b9
@@ -188,7 +188,7 @@ require (
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 // indirect
github.com/abbot/go-http-auth v0.4.1-0.20181019201920-860ed7f246ff // indirect
github.com/alexbrainman/sspi v0.0.0-20180613141037-e580b900e9f5 // indirect
- github.com/apache/thrift v0.16.0 // indirect
+ github.com/apache/thrift v0.15.0 // indirect
github.com/asaskevich/govalidator v0.0.0-20200907205600-7a23bdc65eef // indirect
github.com/aws/aws-sdk-go-v2/credentials v1.4.2 // indirect
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.5.1 // indirect
@@ -268,7 +268,7 @@ require (
github.com/magiconair/properties v1.8.5 // indirect
github.com/mailru/easyjson v0.7.6 // indirect
github.com/mattn/go-ieproxy v0.0.1 // indirect
- github.com/mattn/go-runewidth v0.0.10 // indirect
+ github.com/mattn/go-runewidth v0.0.7 // indirect
github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369 // indirect
github.com/minio/md5-simd v1.1.2 // indirect
github.com/minio/minio-go/v7 v7.0.21 // indirect
@@ -293,7 +293,6 @@ require (
github.com/pquerna/cachecontrol v0.0.0-20200921180117-858c6e7e6b7e // indirect
github.com/prometheus/procfs v0.7.3 // indirect
github.com/pseudomuto/protokit v0.2.0 // indirect
- github.com/rivo/uniseg v0.1.0 // indirect
github.com/rogpeppe/go-internal v1.8.1 // indirect
github.com/rs/xid v1.3.0 // indirect
github.com/russross/blackfriday v1.6.0 // indirect
diff --git a/go.sum b/go.sum
index b9033e86fc..7c583ce7ce 100644
--- a/go.sum
+++ b/go.sum
@@ -266,10 +266,8 @@ github.com/apache/arrow/go/arrow v0.0.0-20200923215132-ac86123a3f01/go.mod h1:QN
github.com/apache/thrift v0.0.0-20151001171628-53dd39833a08/go.mod h1:cp2SuWMxlEZw2r+iP2GNCdIi4C1qmUzdZFSVb+bacwQ=
github.com/apache/thrift v0.12.0/go.mod h1:cp2SuWMxlEZw2r+iP2GNCdIi4C1qmUzdZFSVb+bacwQ=
github.com/apache/thrift v0.13.0/go.mod h1:cp2SuWMxlEZw2r+iP2GNCdIi4C1qmUzdZFSVb+bacwQ=
+github.com/apache/thrift v0.15.0 h1:aGvdaR0v1t9XLgjtBYwxcBvBOTMqClzwE26CHOgjW1Y=
github.com/apache/thrift v0.15.0/go.mod h1:PHK3hniurgQaNMZYaCLEqXKsYK8upmhPbmdP2FXSqgU=
-github.com/apache/thrift v0.16.0 h1:qEy6UW60iVOlUy+b9ZR0d5WzUWYGOo4HfopoyBaNmoY=
-github.com/apache/thrift v0.16.0/go.mod h1:PHK3hniurgQaNMZYaCLEqXKsYK8upmhPbmdP2FXSqgU=
-github.com/araddon/dateparse v0.0.0-20210429162001-6b43995a97de/go.mod h1:DCaWoUhZrYW9p1lxo/cm8EmUOOzAPSEZNGF2DK1dJgw=
github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e h1:QEF07wC0T1rKkctt1RINW/+RMTVmiwxETico2l3gxJA=
github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e/go.mod h1:3U/XgcO3hCbHZ8TKRvWD2dDTCfh9M9ya+I9JpbB7O8o=
github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8=
@@ -438,8 +436,8 @@ github.com/cockroachdb/logtags v0.0.0-20211118104740-dabe8e521a4f h1:6jduT9Hfc0n
github.com/cockroachdb/logtags v0.0.0-20211118104740-dabe8e521a4f/go.mod h1:Vz9DsVWQQhf3vs21MhPMZpMGSht7O/2vFW2xusFUVOs=
github.com/cockroachdb/panicparse/v2 v2.0.0-20211103220158-604c82a44f1e h1:FrERdkPlRj+v7fc+PGpey3GUiDGuTR5CsmLCA54YJ8I=
github.com/cockroachdb/panicparse/v2 v2.0.0-20211103220158-604c82a44f1e/go.mod h1:pMxsKyCewnV3xPaFvvT9NfwvDTcIx2Xqg0qL5Gq0SjM=
-github.com/cockroachdb/pebble v0.0.0-20220307192532-e2b7bb844759 h1:PKDkU1nARLt2TL99CCEukKJIEUo2cgt9AEVlrdtzfuM=
-github.com/cockroachdb/pebble v0.0.0-20220307192532-e2b7bb844759/go.mod h1:buxOO9GBtOcq1DiXDpIPYrmxY020K2A8lOrwno5FetU=
+github.com/cockroachdb/pebble v0.0.0-20220227235451-40d39da505a5 h1:6ZsiW1sWGEsx2kDq98bdoDfdDeO2IgfI4e2FxUQwkdk=
+github.com/cockroachdb/pebble v0.0.0-20220227235451-40d39da505a5/go.mod h1:buxOO9GBtOcq1DiXDpIPYrmxY020K2A8lOrwno5FetU=
github.com/cockroachdb/redact v1.0.8/go.mod h1:BVNblN9mBWFyMyqK1k3AAiSxhvhfK2oOZZ2lK+dpvRg=
github.com/cockroachdb/redact v1.1.3 h1:AKZds10rFSIj7qADf0g46UixK8NNLwWTNdCIGS5wfSQ=
github.com/cockroachdb/redact v1.1.3/go.mod h1:BVNblN9mBWFyMyqK1k3AAiSxhvhfK2oOZZ2lK+dpvRg=
@@ -714,8 +712,8 @@ github.com/frankban/quicktest v1.7.3/go.mod h1:V1d2J5pfxYH6EjBAgSK7YNXcXlTWxUHdE
github.com/frankban/quicktest v1.10.1/go.mod h1:z7wHrVXJKCWP1Ev7B3iy2DivmuL5uGeeJDWYz/6LLhY=
github.com/frankban/quicktest v1.11.3 h1:8sXhOn0uLys67V8EsXLc6eszDs8VXWxL3iRvebPhedY=
github.com/frankban/quicktest v1.11.3/go.mod h1:wRf/ReqHper53s+kmmSZizM8NamnL3IM0I9ntUbOk+k=
-github.com/fraugster/parquet-go v0.10.0 h1:whX91AO3dkkOnbH9MqD53DZ3rISw+Tnnj5yiqXjSv9Q=
-github.com/fraugster/parquet-go v0.10.0/go.mod h1:asQOKX0K/j+F3Xyj87kw7gKrU3yXo9M2hb8STSQKIIw=
+github.com/fraugster/parquet-go v0.6.1 h1:Kks9Ibly3ZZQPRPGkS1lVmrwndBp8PxamBnDFG5jvEM=
+github.com/fraugster/parquet-go v0.6.1/go.mod h1:1HGhXzpHv7CULzknVNWIY0Ihn2O3qNbD1p+aQvHWhqo=
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
github.com/fsnotify/fsnotify v1.5.1 h1:mZcQUHVQUQWoPXXtuf9yuEXKudkV2sx1E06UadKWpgI=
@@ -1539,9 +1537,8 @@ github.com/mattn/go-isatty v0.0.14 h1:yVuAays6BHfxijgZPzw+3Zlu5yQgKGP2/hcQbHb7S9
github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94=
github.com/mattn/go-runewidth v0.0.2/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU=
github.com/mattn/go-runewidth v0.0.3/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU=
+github.com/mattn/go-runewidth v0.0.7 h1:Ei8KR0497xHyKJPAv59M1dkC+rOZCMBJ+t3fZ+twI54=
github.com/mattn/go-runewidth v0.0.7/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI=
-github.com/mattn/go-runewidth v0.0.10 h1:CoZ3S2P7pvtP45xOtBw+/mDL2z0RKI576gSkzRRpdGg=
-github.com/mattn/go-runewidth v0.0.10/go.mod h1:RAqKPSqVFrSLVXbA8x7dzmKdmGzieGRCM46jaSJTDAk=
github.com/mattn/go-shellwords v1.0.3/go.mod h1:3xCvwCdWdlDJUrvuMn7Wuy9eWs4pE8vqg+NOMyg4B2o=
github.com/mattn/go-sqlite3 v1.11.0/go.mod h1:FPy6KqzDD04eiIsT53CuJW3U88zkxoIYsOqkbpncsNc=
github.com/mattn/go-sqlite3 v1.14.0/go.mod h1:JIl7NbARA7phWnGvh0LKTyg7S9BA+6gx71ShQilpsus=
@@ -1876,8 +1873,6 @@ github.com/rcrowley/go-metrics v0.0.0-20201227073835-cf1acfcdf475 h1:N/ElC8H3+5X
github.com/rcrowley/go-metrics v0.0.0-20201227073835-cf1acfcdf475/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4=
github.com/remyoudompheng/bigfft v0.0.0-20170806203942-52369c62f446/go.mod h1:uYEyJGbgTkfkS4+E/PavXkNJcbFIpEtjt2B0KDQ5+9M=
github.com/retailnext/hllpp v1.0.1-0.20180308014038-101a6d2f8b52/go.mod h1:RDpi1RftBQPUCDRw6SmxeaREsAaRKnOclghuzp/WRzc=
-github.com/rivo/uniseg v0.1.0 h1:+2KBaVoUmb9XzDsrx/Ct0W/EYOSFf/nWTauy++DprtY=
-github.com/rivo/uniseg v0.1.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
github.com/robertkrimen/godocdown v0.0.0-20130622164427-0bfa04905481/go.mod h1:C9WhFzY47SzYBIvzFqSvHIR6ROgDo4TtdTuRaOMjF/s=
github.com/robfig/cron/v3 v3.0.1 h1:WdRxkvbJztn8LMz/QEvLN5sBU+xKpSqwwUO1Pjr4qDs=
github.com/robfig/cron/v3 v3.0.1/go.mod h1:eQICP3HwyT7UooqI/z+Ov+PtYAWygg1TEWWzGIFLtro=
@@ -1918,7 +1913,6 @@ github.com/savsgio/gotils v0.0.0-20210921075833-21a6215cb0e4/go.mod h1:oejLrk1Y/
github.com/scaleway/scaleway-sdk-go v1.0.0-beta.7.0.20210223165440-c65ae3540d44/go.mod h1:CJJ5VAbozOl0yEw7nHB9+7BXTJbIn6h7W+f6Gau5IP8=
github.com/schollz/closestmatch v2.1.0+incompatible/go.mod h1:RtP1ddjLong6gTkbtmuhtR2uUrrJOpYzYRvbcPAid+g=
github.com/sclevine/agouti v3.0.0+incompatible/go.mod h1:b4WX9W9L1sfQKXeJf1mUTLZKJ48R1S7H23Ji7oFO5Bw=
-github.com/scylladb/termtables v0.0.0-20191203121021-c4c0b6d42ff4/go.mod h1:C1a7PQSMz9NShzorzCiG2fk9+xuCgLkPeCvMHYR2OWg=
github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc=
github.com/seccomp/libseccomp-golang v0.9.1/go.mod h1:GbW5+tmTXfcxTToHLXlScSlAvWlF4P2Ca7zGrPiEpWo=
github.com/sectioneight/md-to-godoc v0.0.0-20161108233149-55e43be6c335/go.mod h1:lPZq22klO8la1kyImIDhrGytugMV0TsrsZB55a+xxI0=
diff --git a/pkg/BUILD.bazel b/pkg/BUILD.bazel
index 006b4a8eaa..77752a755a 100644
Binary files a/pkg/BUILD.bazel and b/pkg/BUILD.bazel differ
diff --git a/pkg/base/testing_knobs.go b/pkg/base/testing_knobs.go
index 15abf6afe8..6e72ca49c3 100644
--- a/pkg/base/testing_knobs.go
+++ b/pkg/base/testing_knobs.go
@@ -19,34 +19,32 @@ type ModuleTestingKnobs interface {
// TestingKnobs contains facilities for controlling various parts of the
// system for testing.
type TestingKnobs struct {
- Store ModuleTestingKnobs
- KVClient ModuleTestingKnobs
- RangeFeed ModuleTestingKnobs
- SQLExecutor ModuleTestingKnobs
- SQLLeaseManager ModuleTestingKnobs
- SQLSchemaChanger ModuleTestingKnobs
- SQLDeclarativeSchemaChanger ModuleTestingKnobs
- SQLTypeSchemaChanger ModuleTestingKnobs
- GCJob ModuleTestingKnobs
- PGWireTestingKnobs ModuleTestingKnobs
- StartupMigrationManager ModuleTestingKnobs
- DistSQL ModuleTestingKnobs
- SQLEvalContext ModuleTestingKnobs
- NodeLiveness ModuleTestingKnobs
- Server ModuleTestingKnobs
- TenantTestingKnobs ModuleTestingKnobs
- JobsTestingKnobs ModuleTestingKnobs
- BackupRestore ModuleTestingKnobs
- TTL ModuleTestingKnobs
- Streaming ModuleTestingKnobs
- MigrationManager ModuleTestingKnobs
- IndexUsageStatsKnobs ModuleTestingKnobs
- SQLStatsKnobs ModuleTestingKnobs
- SpanConfig ModuleTestingKnobs
- SQLLivenessKnobs ModuleTestingKnobs
- TelemetryLoggingKnobs ModuleTestingKnobs
- DialerKnobs ModuleTestingKnobs
- ProtectedTS ModuleTestingKnobs
- CapturedIndexUsageStatsKnobs ModuleTestingKnobs
- AdmissionControl ModuleTestingKnobs
+ Store ModuleTestingKnobs
+ KVClient ModuleTestingKnobs
+ RangeFeed ModuleTestingKnobs
+ SQLExecutor ModuleTestingKnobs
+ SQLLeaseManager ModuleTestingKnobs
+ SQLSchemaChanger ModuleTestingKnobs
+ SQLDeclarativeSchemaChanger ModuleTestingKnobs
+ SQLTypeSchemaChanger ModuleTestingKnobs
+ GCJob ModuleTestingKnobs
+ PGWireTestingKnobs ModuleTestingKnobs
+ StartupMigrationManager ModuleTestingKnobs
+ DistSQL ModuleTestingKnobs
+ SQLEvalContext ModuleTestingKnobs
+ NodeLiveness ModuleTestingKnobs
+ Server ModuleTestingKnobs
+ TenantTestingKnobs ModuleTestingKnobs
+ JobsTestingKnobs ModuleTestingKnobs
+ BackupRestore ModuleTestingKnobs
+ TTL ModuleTestingKnobs
+ Streaming ModuleTestingKnobs
+ MigrationManager ModuleTestingKnobs
+ IndexUsageStatsKnobs ModuleTestingKnobs
+ SQLStatsKnobs ModuleTestingKnobs
+ SpanConfig ModuleTestingKnobs
+ SQLLivenessKnobs ModuleTestingKnobs
+ TelemetryLoggingKnobs ModuleTestingKnobs
+ DialerKnobs ModuleTestingKnobs
+ ProtectedTS ModuleTestingKnobs
}
diff --git a/pkg/bench/rttanalysis/BUILD.bazel b/pkg/bench/rttanalysis/BUILD.bazel
index 797bdf94d0..db98f19200 100644
--- a/pkg/bench/rttanalysis/BUILD.bazel
+++ b/pkg/bench/rttanalysis/BUILD.bazel
@@ -49,7 +49,6 @@ go_test(
],
data = glob(["testdata/**"]),
embed = [":rttanalysis"],
- shard_count = 16,
deps = [
"//pkg/base",
"//pkg/security",
diff --git a/pkg/bench/rttanalysis/testdata/benchmark_expectations b/pkg/bench/rttanalysis/testdata/benchmark_expectations
index 10b154c3a3..311a9e95d1 100644
--- a/pkg/bench/rttanalysis/testdata/benchmark_expectations
+++ b/pkg/bench/rttanalysis/testdata/benchmark_expectations
@@ -50,8 +50,8 @@ exp,benchmark
19,Grant/grant_all_on_1_table
19,Grant/grant_all_on_2_tables
19,Grant/grant_all_on_3_tables
-13,GrantRole/grant_1_role
-15,GrantRole/grant_2_roles
+17,GrantRole/grant_1_role
+20,GrantRole/grant_2_roles
2,ORMQueries/activerecord_type_introspection_query
2,ORMQueries/django_table_introspection_1_table
2,ORMQueries/django_table_introspection_4_tables
@@ -79,8 +79,8 @@ exp,benchmark
19,Revoke/revoke_all_on_1_table
19,Revoke/revoke_all_on_2_tables
19,Revoke/revoke_all_on_3_tables
-13,RevokeRole/revoke_1_role
-15,RevokeRole/revoke_2_roles
+16,RevokeRole/revoke_1_role
+18,RevokeRole/revoke_2_roles
1,SystemDatabaseQueries/select_system.users_with_empty_database_Name
1,SystemDatabaseQueries/select_system.users_with_schema_Name
2,SystemDatabaseQueries/select_system.users_without_schema_Name
diff --git a/pkg/build/BUILD.bazel b/pkg/build/BUILD.bazel
index 2fdb50f105..428ae5f446 100644
--- a/pkg/build/BUILD.bazel
+++ b/pkg/build/BUILD.bazel
@@ -15,11 +15,10 @@ go_library(
visibility = ["//visibility:public"],
x_defs = {
"github.com/cockroachdb/cockroach/pkg/build.cgoTargetTriple": "{STABLE_BUILD_TARGET_TRIPLE}",
- "github.com/cockroachdb/cockroach/pkg/build.channel": "{STABLE_BUILD_CHANNEL}",
- "github.com/cockroachdb/cockroach/pkg/build.rev": "{BUILD_REV}",
- "github.com/cockroachdb/cockroach/pkg/build.tag": "{BUILD_TAG}",
- "github.com/cockroachdb/cockroach/pkg/build.typ": "{STABLE_BUILD_TYPE}",
- "github.com/cockroachdb/cockroach/pkg/build.utcTime": "{BUILD_UTCTIME}",
+ "github.com/cockroachdb/cockroach/pkg/build.typ": "{STABLE_BUILD_GIT_BUILD_TYPE}",
+ "github.com/cockroachdb/cockroach/pkg/build.rev": "{BUILD_GIT_COMMIT}",
+ "github.com/cockroachdb/cockroach/pkg/build.tag": "{BUILD_GIT_TAG}",
+ "github.com/cockroachdb/cockroach/pkg/build.utcTime": "{BUILD_GIT_UTCTIME}",
},
deps = [
"//pkg/util/envutil",
diff --git a/pkg/ccl/backupccl/BUILD.bazel b/pkg/ccl/backupccl/BUILD.bazel
index e735fd3302..5d9da862bb 100644
--- a/pkg/ccl/backupccl/BUILD.bazel
+++ b/pkg/ccl/backupccl/BUILD.bazel
@@ -9,7 +9,6 @@ go_library(
"backup.go",
"backup_destination.go",
"backup_job.go",
- "backup_metadata.go",
"backup_planning.go",
"backup_planning_tenant.go",
"backup_processor.go",
@@ -121,7 +120,6 @@ go_library(
"//pkg/util/hlc",
"//pkg/util/interval",
"//pkg/util/ioctx",
- "//pkg/util/json",
"//pkg/util/log",
"//pkg/util/log/eventpb",
"//pkg/util/metric",
@@ -154,7 +152,6 @@ go_test(
"backup_cloud_test.go",
"backup_destination_test.go",
"backup_intents_test.go",
- "backup_metadata_test.go",
"backup_rand_test.go",
"backup_tenant_test.go",
"backup_test.go",
@@ -251,7 +248,6 @@ go_test(
"//pkg/util/ctxgroup",
"//pkg/util/encoding",
"//pkg/util/hlc",
- "//pkg/util/ioctx",
"//pkg/util/leaktest",
"//pkg/util/log",
"//pkg/util/mon",
diff --git a/pkg/ccl/backupccl/backup_job.go b/pkg/ccl/backupccl/backup_job.go
index 46a4674206..ef8166e451 100644
--- a/pkg/ccl/backupccl/backup_job.go
+++ b/pkg/ccl/backupccl/backup_job.go
@@ -165,7 +165,6 @@ func backup(
planCtx,
execCtx,
dsp,
- int64(job.ID()),
spans,
introducedSpans,
pkIDs,
@@ -332,16 +331,6 @@ func backup(
return roachpb.RowCount{}, err
}
- if writeMetadataSST.Get(&settings.SV) {
- if err := writeBackupMetadataSST(ctx, defaultStore, encryption, backupManifest, tableStatistics); err != nil {
- err = errors.Wrap(err, "writing forward-compat metadata sst")
- if !build.IsRelease() {
- return roachpb.RowCount{}, err
- }
- log.Warningf(ctx, "%+v", err)
- }
- }
-
return backupManifest.EntryCounts, nil
}
@@ -481,6 +470,20 @@ func (b *backupResumer) Resume(ctx context.Context, execCtx interface{}) error {
}
}
+ ptsID := details.ProtectedTimestampRecord
+ if ptsID != nil && !b.testingKnobs.ignoreProtectedTimestamps {
+ resumerSpan.RecordStructured(&types.StringValue{Value: "verifying protected timestamp"})
+ if err := p.ExecCfg().ProtectedTimestampProvider.Verify(ctx, *ptsID); err != nil {
+ if errors.Is(err, protectedts.ErrNotExists) {
+ // No reason to return an error which might cause problems if it doesn't
+ // seem to exist.
+ log.Warningf(ctx, "failed to release protected which seems not to exist: %v", err)
+ } else {
+ return err
+ }
+ }
+ }
+
storageByLocalityKV := make(map[string]*roachpb.ExternalStorage)
for kv, uri := range details.URIsByLocalityKV {
conf, err := cloud.ExternalStorageConfFromURI(uri, p.User())
@@ -578,7 +581,7 @@ func (b *backupResumer) Resume(ctx context.Context, execCtx interface{}) error {
return err
}
- if details.ProtectedTimestampRecord != nil && !b.testingKnobs.ignoreProtectedTimestamps {
+ if ptsID != nil && !b.testingKnobs.ignoreProtectedTimestamps {
if err := p.ExecCfg().DB.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error {
details := b.job.Details().(jobspb.BackupDetails)
return releaseProtectedTimestamp(ctx, txn, p.ExecCfg().ProtectedTimestampProvider,
diff --git a/pkg/ccl/backupccl/backup_metadata.go b/pkg/ccl/backupccl/backup_metadata.go
deleted file mode 100644
index fee3e840d6..0000000000
--- a/pkg/ccl/backupccl/backup_metadata.go
+++ /dev/null
@@ -1,1265 +0,0 @@
-// Copyright 2021 The Cockroach Authors.
-//
-// Licensed as a CockroachDB Enterprise file under the Cockroach Community
-// License (the "License"); you may not use this file except in compliance with
-// the License. You may obtain a copy of the License at
-//
-// https://github.com/cockroachdb/cockroach/blob/master/licenses/CCL.txt
-
-package backupccl
-
-import (
- "bytes"
- "context"
- "fmt"
- "io"
- "sort"
- "strings"
-
- "github.com/cockroachdb/cockroach/pkg/ccl/storageccl"
- "github.com/cockroachdb/cockroach/pkg/cloud"
- "github.com/cockroachdb/cockroach/pkg/jobs/jobspb"
- "github.com/cockroachdb/cockroach/pkg/keys"
- "github.com/cockroachdb/cockroach/pkg/roachpb"
- "github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb"
- "github.com/cockroachdb/cockroach/pkg/sql/protoreflect"
- "github.com/cockroachdb/cockroach/pkg/sql/stats"
- "github.com/cockroachdb/cockroach/pkg/storage"
- "github.com/cockroachdb/cockroach/pkg/util/encoding"
- "github.com/cockroachdb/cockroach/pkg/util/hlc"
- "github.com/cockroachdb/cockroach/pkg/util/json"
- "github.com/cockroachdb/cockroach/pkg/util/protoutil"
- "github.com/cockroachdb/errors"
-)
-
-const (
- metadataSSTName = "metadata.sst"
- fileInfoPath = "fileinfo.sst"
- sstBackupKey = "backup"
- sstDescsPrefix = "desc/"
- sstFilesPrefix = "file/"
- sstNamesPrefix = "name/"
- sstSpansPrefix = "span/"
- sstStatsPrefix = "stats/"
- sstTenantsPrefix = "tenant/"
-)
-
-func writeBackupMetadataSST(
- ctx context.Context,
- dest cloud.ExternalStorage,
- enc *jobspb.BackupEncryptionOptions,
- manifest *BackupManifest,
- stats []*stats.TableStatisticProto,
-) error {
- var w io.WriteCloser
- ctx, cancel := context.WithCancel(ctx)
- defer func() {
- cancel() // cancel before Close() to abort write on err returns.
- if w != nil {
- w.Close()
- }
- }()
-
- w, err := makeWriter(ctx, dest, metadataSSTName, enc)
- if err != nil {
- return err
- }
-
- if err := constructMetadataSST(ctx, dest, enc, w, manifest, stats); err != nil {
- return err
- }
-
- // Explicitly close to flush and check for errors do so before defer's cancel
- // which would abort. Then nil out w to avoid defer double-closing.
- err = w.Close()
- w = nil
- return err
-}
-
-func makeWriter(
- ctx context.Context,
- dest cloud.ExternalStorage,
- filename string,
- enc *jobspb.BackupEncryptionOptions,
-) (io.WriteCloser, error) {
- w, err := dest.Writer(ctx, filename)
- if err != nil {
- return nil, err
- }
-
- if enc != nil {
- key, err := getEncryptionKey(ctx, enc, dest.Settings(), dest.ExternalIOConf())
- if err != nil {
- return nil, err
- }
- encW, err := storageccl.EncryptingWriter(w, key)
- if err != nil {
- return nil, err
- }
- w = encW
- }
- return w, nil
-}
-
-func constructMetadataSST(
- ctx context.Context,
- dest cloud.ExternalStorage,
- enc *jobspb.BackupEncryptionOptions,
- w io.Writer,
- m *BackupManifest,
- stats []*stats.TableStatisticProto,
-) error {
- // TODO(dt): use a seek-optimized SST writer instead.
- sst := storage.MakeBackupSSTWriter(ctx, dest.Settings(), w)
- defer sst.Close()
-
- // The following steps must be done in-order, by key prefix.
-
- if err := writeManifestToMetadata(ctx, sst, m); err != nil {
- return err
- }
-
- if err := writeDescsToMetadata(ctx, sst, m); err != nil {
- return err
- }
-
- if err := writeFilesToMetadata(ctx, sst, m, dest, enc, fileInfoPath); err != nil {
- return err
- }
-
- if err := writeNamesToMetadata(ctx, sst, m); err != nil {
- return err
- }
-
- if err := writeSpansToMetadata(ctx, sst, m); err != nil {
- return err
- }
-
- if err := writeStatsToMetadata(ctx, sst, stats); err != nil {
- return err
- }
-
- if err := writeTenantsToMetadata(ctx, sst, m); err != nil {
- return err
- }
-
- return sst.Finish()
-}
-
-func writeManifestToMetadata(ctx context.Context, sst storage.SSTWriter, m *BackupManifest) error {
- info := *m
- info.Descriptors = nil
- info.DescriptorChanges = nil
- info.Files = nil
- info.Spans = nil
- info.StatisticsFilenames = nil
- info.IntroducedSpans = nil
- info.Tenants = nil
-
- b, err := protoutil.Marshal(&info)
- if err != nil {
- return err
- }
- return sst.PutUnversioned(roachpb.Key(sstBackupKey), b)
-}
-
-func writeDescsToMetadata(ctx context.Context, sst storage.SSTWriter, m *BackupManifest) error {
- // Add descriptors from revisions if available, Descriptors if not.
- if len(m.DescriptorChanges) > 0 {
- sort.Slice(m.DescriptorChanges, func(i, j int) bool {
- if m.DescriptorChanges[i].ID < m.DescriptorChanges[j].ID {
- return true
- } else if m.DescriptorChanges[i].ID == m.DescriptorChanges[j].ID {
- return !m.DescriptorChanges[i].Time.Less(m.DescriptorChanges[j].Time)
- }
- return false
- })
- for _, i := range m.DescriptorChanges {
- k := encodeDescSSTKey(i.ID)
- var b []byte
- if i.Desc != nil {
- t, _, _, _ := descpb.FromDescriptor(i.Desc)
- if t == nil || !t.Dropped() {
- bytes, err := protoutil.Marshal(i.Desc)
- if err != nil {
- return err
- }
- b = bytes
- }
- }
- if err := sst.PutMVCC(storage.MVCCKey{Key: k, Timestamp: i.Time}, b); err != nil {
- return err
- }
-
- }
- } else {
- sort.Slice(m.Descriptors, func(i, j int) bool {
- return descID(m.Descriptors[i]) < descID(m.Descriptors[j])
- })
- for _, i := range m.Descriptors {
- id := descID(i)
- k := encodeDescSSTKey(id)
- b, err := protoutil.Marshal(&i)
- if err != nil {
- return err
- }
-
- // Put descriptors at start time. For non-rev backups this timestamp
- // doesn't matter. For the special case where there were no descriptor
- // changes in an incremental backup, it's helpful to have existing
- // descriptors at the start time, so we don't have to look back further
- // than the very last backup.
- if m.StartTime.IsEmpty() {
- if err := sst.PutUnversioned(k, b); err != nil {
- return err
- }
- } else {
- if err := sst.PutMVCC(storage.MVCCKey{Key: k, Timestamp: m.StartTime}, b); err != nil {
- return err
- }
- }
- }
- }
- return nil
-}
-
-func writeFilesToMetadata(
- ctx context.Context,
- sst storage.SSTWriter,
- m *BackupManifest,
- dest cloud.ExternalStorage,
- enc *jobspb.BackupEncryptionOptions,
- fileInfoPath string,
-) error {
- w, err := makeWriter(ctx, dest, fileInfoPath, enc)
- if err != nil {
- return err
- }
- defer w.Close()
- fileSST := storage.MakeBackupSSTWriter(ctx, dest.Settings(), w)
- defer fileSST.Close()
-
- // Sort and write all of the files into a single file info SST.
- sort.Slice(m.Files, func(i, j int) bool {
- cmp := m.Files[i].Span.Key.Compare(m.Files[j].Span.Key)
- return cmp < 0 || (cmp == 0 && strings.Compare(m.Files[i].Path, m.Files[j].Path) < 0)
- })
-
- for _, i := range m.Files {
- b, err := protoutil.Marshal(&i)
- if err != nil {
- return err
- }
- if err := fileSST.PutUnversioned(encodeFileSSTKey(i.Span.Key, i.Path), b); err != nil {
- return err
- }
- }
-
- err = fileSST.Finish()
- if err != nil {
- return err
- }
- err = w.Close()
- if err != nil {
- return err
- }
-
- // Write the file info into the main metadata SST.
- return sst.PutUnversioned(encodeFilenameSSTKey(fileInfoPath), nil)
-}
-
-type name struct {
- parent, parentSchema descpb.ID
- name string
- id descpb.ID
- ts hlc.Timestamp
-}
-
-type namespace []name
-
-func (a namespace) Len() int { return len(a) }
-func (a namespace) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
-func (a namespace) Less(i, j int) bool {
- if a[i].parent == a[j].parent {
- if a[i].parentSchema == a[j].parentSchema {
- cmp := strings.Compare(a[i].name, a[j].name)
- return cmp < 0 || (cmp == 0 && (a[i].ts.IsEmpty() || a[j].ts.Less(a[i].ts)))
- }
- return a[i].parentSchema < a[j].parentSchema
- }
- return a[i].parent < a[j].parent
-}
-
-func writeNamesToMetadata(ctx context.Context, sst storage.SSTWriter, m *BackupManifest) error {
- revs := m.DescriptorChanges
- if len(revs) == 0 {
- revs = make([]BackupManifest_DescriptorRevision, len(m.Descriptors))
- for i := range m.Descriptors {
- revs[i].Desc = &m.Descriptors[i]
- revs[i].Time = m.EndTime
- revs[i].ID = descID(m.Descriptors[i])
- }
- }
-
- names := make(namespace, len(revs))
-
- for i, rev := range revs {
- names[i].id = rev.ID
- names[i].ts = rev.Time
- tb, db, typ, sc := descpb.FromDescriptor(rev.Desc)
- if db != nil {
- names[i].name = db.Name
- } else if sc != nil {
- names[i].name = sc.Name
- names[i].parent = sc.ParentID
- } else if tb != nil {
- names[i].name = tb.Name
- names[i].parent = tb.ParentID
- names[i].parentSchema = keys.PublicSchemaID
- if s := tb.UnexposedParentSchemaID; s != descpb.InvalidID {
- names[i].parentSchema = s
- }
- if tb.Dropped() {
- names[i].id = 0
- }
- } else if typ != nil {
- names[i].name = typ.Name
- names[i].parent = typ.ParentID
- names[i].parentSchema = typ.ParentSchemaID
- }
- }
- sort.Sort(names)
-
- for i, rev := range names {
- if i > 0 {
- prev := names[i-1]
- prev.ts = rev.ts
- if prev == rev {
- continue
- }
- }
- k := encodeNameSSTKey(rev.parent, rev.parentSchema, rev.name)
- v := encoding.EncodeUvarintAscending(nil, uint64(rev.id))
- if err := sst.PutMVCC(storage.MVCCKey{Key: k, Timestamp: rev.ts}, v); err != nil {
- return err
- }
- }
-
- return nil
-}
-
-func writeSpansToMetadata(ctx context.Context, sst storage.SSTWriter, m *BackupManifest) error {
- sort.Sort(roachpb.Spans(m.Spans))
- sort.Sort(roachpb.Spans(m.IntroducedSpans))
-
- for i, j := 0, 0; i < len(m.Spans) || j < len(m.IntroducedSpans); {
- var sp roachpb.Span
- var ts hlc.Timestamp
-
- // Merge spans and introduced spans into one series of spans where the ts on
- // each is 0 if it was introduced or the backup start time otherwise.
- if j >= len(m.IntroducedSpans) {
- sp = m.Spans[i]
- ts = m.StartTime
- i++
- } else if i >= len(m.Spans) {
- sp = m.IntroducedSpans[j]
- ts = hlc.Timestamp{}
- j++
- } else {
- cmp := m.Spans[i].Key.Compare(m.IntroducedSpans[j].Key)
- if cmp < 0 {
- sp = m.Spans[i]
- ts = m.StartTime
- i++
- } else {
- sp = m.IntroducedSpans[j]
- ts = hlc.Timestamp{}
- j++
- }
- }
- if ts.IsEmpty() {
- if err := sst.PutUnversioned(encodeSpanSSTKey(sp), nil); err != nil {
- return err
- }
- } else {
- k := storage.MVCCKey{Key: encodeSpanSSTKey(sp), Timestamp: ts}
- if err := sst.PutMVCC(k, nil); err != nil {
- return err
- }
- }
- }
- return nil
-}
-
-func writeStatsToMetadata(
- ctx context.Context, sst storage.SSTWriter, stats []*stats.TableStatisticProto,
-) error {
- sort.Slice(stats, func(i, j int) bool {
- return stats[i].TableID < stats[j].TableID || (stats[i].TableID == stats[j].TableID && stats[i].StatisticID < stats[j].StatisticID)
- })
-
- for _, i := range stats {
- b, err := protoutil.Marshal(i)
- if err != nil {
- return err
- }
- if err := sst.PutUnversioned(encodeStatSSTKey(i.TableID, i.StatisticID), b); err != nil {
- return err
- }
- }
- return nil
-}
-
-func writeTenantsToMetadata(ctx context.Context, sst storage.SSTWriter, m *BackupManifest) error {
- sort.Slice(m.Tenants, func(i, j int) bool { return m.Tenants[i].ID < m.Tenants[j].ID })
- for _, i := range m.Tenants {
- b, err := protoutil.Marshal(&i)
- if err != nil {
- return err
- }
- if err := sst.PutUnversioned(encodeTenantSSTKey(i.ID), b); err != nil {
- return err
- }
- }
- return nil
-}
-
-func descID(in descpb.Descriptor) descpb.ID {
- switch i := in.Union.(type) {
- case *descpb.Descriptor_Table:
- return i.Table.ID
- case *descpb.Descriptor_Database:
- return i.Database.ID
- case *descpb.Descriptor_Type:
- return i.Type.ID
- case *descpb.Descriptor_Schema:
- return i.Schema.ID
- default:
- panic(fmt.Sprintf("unknown desc %T", in))
- }
-}
-
-func deprefix(key roachpb.Key, prefix string) (roachpb.Key, error) {
- if !bytes.HasPrefix(key, []byte(prefix)) {
- return nil, errors.Errorf("malformed key missing expected prefix %s: %q", prefix, key)
- }
- return key[len(prefix):], nil
-}
-
-func encodeDescSSTKey(id descpb.ID) roachpb.Key {
- return roachpb.Key(encoding.EncodeUvarintAscending([]byte(sstDescsPrefix), uint64(id)))
-}
-
-func decodeDescSSTKey(key roachpb.Key) (descpb.ID, error) {
- key, err := deprefix(key, sstDescsPrefix)
- if err != nil {
- return 0, err
- }
- _, id, err := encoding.DecodeUvarintAscending(key)
- return descpb.ID(id), err
-}
-
-func encodeFileSSTKey(spanStart roachpb.Key, filename string) roachpb.Key {
- buf := make([]byte, 0)
- buf = encoding.EncodeBytesAscending(buf, spanStart)
- return roachpb.Key(encoding.EncodeStringAscending(buf, filename))
-}
-
-func encodeFilenameSSTKey(filename string) roachpb.Key {
- return encoding.EncodeStringAscending([]byte(sstFilesPrefix), filename)
-}
-
-func decodeUnsafeFileSSTKey(key roachpb.Key) (roachpb.Key, string, error) {
- key, spanStart, err := encoding.DecodeBytesAscending(key, nil)
- if err != nil {
- return nil, "", err
- }
- _, filename, err := encoding.DecodeUnsafeStringAscending(key, nil)
- if err != nil {
- return nil, "", err
- }
- return roachpb.Key(spanStart), filename, err
-}
-
-func decodeUnsafeFileInfoSSTKey(key roachpb.Key) (string, error) {
- key, err := deprefix(key, sstFilesPrefix)
- if err != nil {
- return "", err
- }
-
- _, path, err := encoding.DecodeUnsafeStringAscending(key, nil)
- if err != nil {
- return "", err
- }
- return path, err
-}
-
-func encodeNameSSTKey(parentDB, parentSchema descpb.ID, name string) roachpb.Key {
- buf := []byte(sstNamesPrefix)
- buf = encoding.EncodeUvarintAscending(buf, uint64(parentDB))
- buf = encoding.EncodeUvarintAscending(buf, uint64(parentSchema))
- return roachpb.Key(encoding.EncodeStringAscending(buf, name))
-}
-
-func decodeUnsafeNameSSTKey(key roachpb.Key) (descpb.ID, descpb.ID, string, error) {
- key, err := deprefix(key, sstNamesPrefix)
- if err != nil {
- return 0, 0, "", err
- }
- key, parentID, err := encoding.DecodeUvarintAscending(key)
- if err != nil {
- return 0, 0, "", err
- }
- key, schemaID, err := encoding.DecodeUvarintAscending(key)
- if err != nil {
- return 0, 0, "", err
- }
- _, name, err := encoding.DecodeUnsafeStringAscending(key, nil)
- if err != nil {
- return 0, 0, "", err
- }
- return descpb.ID(parentID), descpb.ID(schemaID), name, nil
-}
-
-func encodeSpanSSTKey(span roachpb.Span) roachpb.Key {
- buf := encoding.EncodeBytesAscending([]byte(sstSpansPrefix), span.Key)
- return roachpb.Key(encoding.EncodeBytesAscending(buf, span.EndKey))
-}
-
-func decodeSpanSSTKey(key roachpb.Key) (roachpb.Span, error) {
- key, err := deprefix(key, sstSpansPrefix)
- if err != nil {
- return roachpb.Span{}, err
- }
- key, start, err := encoding.DecodeBytesAscending(key, nil)
- if err != nil {
- return roachpb.Span{}, err
- }
- _, end, err := encoding.DecodeBytesAscending(key, nil)
- return roachpb.Span{Key: start, EndKey: end}, err
-}
-
-func encodeStatSSTKey(id descpb.ID, statID uint64) roachpb.Key {
- buf := encoding.EncodeUvarintAscending([]byte(sstStatsPrefix), uint64(id))
- return roachpb.Key(encoding.EncodeUvarintAscending(buf, statID))
-}
-
-func decodeStatSSTKey(key roachpb.Key) (descpb.ID, uint64, error) {
- key, err := deprefix(key, sstStatsPrefix)
- if err != nil {
- return 0, 0, err
- }
- key, id, err := encoding.DecodeUvarintAscending(key)
- if err != nil {
- return 0, 0, err
- }
- _, stat, err := encoding.DecodeUvarintAscending(key)
- return descpb.ID(id), stat, err
-}
-
-func encodeTenantSSTKey(id uint64) roachpb.Key {
- return encoding.EncodeUvarintAscending([]byte(sstTenantsPrefix), id)
-}
-
-func decodeTenantSSTKey(key roachpb.Key) (uint64, error) {
- key, err := deprefix(key, sstTenantsPrefix)
- if err != nil {
- return 0, err
- }
- _, id, err := encoding.DecodeUvarintAscending(key)
- if err != nil {
- return 0, err
- }
- return id, nil
-}
-
-func pbBytesToJSON(in []byte, msg protoutil.Message) (json.JSON, error) {
- if err := protoutil.Unmarshal(in, msg); err != nil {
- return nil, err
- }
- j, err := protoreflect.MessageToJSON(msg, protoreflect.FmtFlags{})
- if err != nil {
- return nil, err
- }
- return j, nil
-}
-
-func debugDumpFileSST(
- ctx context.Context,
- store cloud.ExternalStorage,
- fileInfoPath string,
- enc *jobspb.BackupEncryptionOptions,
- out func(rawKey, readableKey string, value json.JSON) error,
-) error {
- var encOpts *roachpb.FileEncryptionOptions
- if enc != nil {
- key, err := getEncryptionKey(ctx, enc, store.Settings(), store.ExternalIOConf())
- if err != nil {
- return err
- }
- encOpts = &roachpb.FileEncryptionOptions{Key: key}
- }
- iter, err := storageccl.ExternalSSTReader(ctx, store, fileInfoPath, encOpts)
- if err != nil {
- return err
- }
- defer iter.Close()
- for iter.SeekGE(storage.MVCCKey{}); ; iter.Next() {
- ok, err := iter.Valid()
- if err != nil {
- return err
- }
- if !ok {
- break
- }
- k := iter.UnsafeKey()
- spanStart, path, err := decodeUnsafeFileSSTKey(k.Key)
- if err != nil {
- return err
- }
- f, err := pbBytesToJSON(iter.UnsafeValue(), &BackupManifest_File{})
- if err != nil {
- return err
- }
- if err := out(k.String(), fmt.Sprintf("file %s (%s)", path, spanStart.String()), f); err != nil {
- return err
- }
- }
-
- return nil
-}
-
-// DebugDumpMetadataSST is for debugging a metadata SST.
-func DebugDumpMetadataSST(
- ctx context.Context,
- store cloud.ExternalStorage,
- path string,
- enc *jobspb.BackupEncryptionOptions,
- out func(rawKey, readableKey string, value json.JSON) error,
-) error {
- var encOpts *roachpb.FileEncryptionOptions
- if enc != nil {
- key, err := getEncryptionKey(ctx, enc, store.Settings(), store.ExternalIOConf())
- if err != nil {
- return err
- }
- encOpts = &roachpb.FileEncryptionOptions{Key: key}
- }
-
- iter, err := storageccl.ExternalSSTReader(ctx, store, path, encOpts)
- if err != nil {
- return err
- }
- defer iter.Close()
-
- for iter.SeekGE(storage.MVCCKey{}); ; iter.Next() {
- ok, err := iter.Valid()
- if err != nil {
- return err
- }
- if !ok {
- break
- }
- k := iter.UnsafeKey()
- switch {
- case bytes.Equal(k.Key, []byte(sstBackupKey)):
- info, err := pbBytesToJSON(iter.UnsafeValue(), &BackupManifest{})
- if err != nil {
- return err
- }
- if err := out(k.String(), "backup info", info); err != nil {
- return err
- }
-
- case bytes.HasPrefix(k.Key, []byte(sstDescsPrefix)):
- id, err := decodeDescSSTKey(k.Key)
- if err != nil {
- return err
- }
- var desc json.JSON
- if v := iter.UnsafeValue(); len(v) > 0 {
- desc, err = pbBytesToJSON(v, &descpb.Descriptor{})
- if err != nil {
- return err
- }
- }
- if err := out(k.String(), fmt.Sprintf("desc %d @ %v", id, k.Timestamp), desc); err != nil {
- return err
- }
-
- case bytes.HasPrefix(k.Key, []byte(sstFilesPrefix)):
- p, err := decodeUnsafeFileInfoSSTKey(k.Key)
- if err != nil {
- return err
- }
- if err := out(k.String(), fmt.Sprintf("file info @ %s", p), nil); err != nil {
- return err
- }
- if err := debugDumpFileSST(ctx, store, p, enc, out); err != nil {
- return err
- }
- case bytes.HasPrefix(k.Key, []byte(sstNamesPrefix)):
- db, sc, name, err := decodeUnsafeNameSSTKey(k.Key)
- if err != nil {
- return err
- }
- var id uint64
- if v := iter.UnsafeValue(); len(v) > 0 {
- _, id, err = encoding.DecodeUvarintAscending(v)
- if err != nil {
- return err
- }
- }
- mapping := fmt.Sprintf("name db %d / schema %d / %q @ %v -> %d", db, sc, name, k.Timestamp, id)
- if err := out(k.String(), mapping, nil); err != nil {
- return err
- }
-
- case bytes.HasPrefix(k.Key, []byte(sstSpansPrefix)):
- span, err := decodeSpanSSTKey(k.Key)
- if err != nil {
- return err
- }
- if err := out(k.String(), fmt.Sprintf("span %s @ %v", span, k.Timestamp), nil); err != nil {
- return err
- }
-
- case bytes.HasPrefix(k.Key, []byte(sstStatsPrefix)):
- tblID, statID, err := decodeStatSSTKey(k.Key)
- if err != nil {
- return err
- }
- s, err := pbBytesToJSON(iter.UnsafeValue(), &stats.TableStatisticProto{})
- if err != nil {
- return err
- }
- if err := out(k.String(), fmt.Sprintf("stats tbl %d, id %d", tblID, statID), s); err != nil {
- return err
- }
-
- case bytes.HasPrefix(k.Key, []byte(sstTenantsPrefix)):
- id, err := decodeTenantSSTKey(k.Key)
- if err != nil {
- return err
- }
- i, err := pbBytesToJSON(iter.UnsafeValue(), &descpb.TenantInfo{})
- if err != nil {
- return err
- }
- if err := out(k.String(), fmt.Sprintf("tenant %d", id), i); err != nil {
- return err
- }
-
- default:
- if err := out(k.String(), "unknown", json.FromString(fmt.Sprintf("%q", iter.UnsafeValue()))); err != nil {
- return err
- }
- }
- }
-
- return nil
-}
-
-// BackupMetadata holds all of the data in BackupManifest except a few repeated
-// fields such as descriptors or spans. BackupMetadata provides iterator methods
-// so that the excluded fields can be accessed in a streaming manner.
-type BackupMetadata struct {
- BackupManifest
- store cloud.ExternalStorage
- enc *jobspb.BackupEncryptionOptions
- filename string
-}
-
-func newBackupMetadata(
- ctx context.Context,
- exportStore cloud.ExternalStorage,
- sstFileName string,
- encryption *jobspb.BackupEncryptionOptions,
-) (*BackupMetadata, error) {
- var encOpts *roachpb.FileEncryptionOptions
- if encryption != nil {
- key, err := getEncryptionKey(ctx, encryption, exportStore.Settings(), exportStore.ExternalIOConf())
- if err != nil {
- return nil, err
- }
- encOpts = &roachpb.FileEncryptionOptions{Key: key}
- }
-
- iter, err := storageccl.ExternalSSTReader(ctx, exportStore, sstFileName, encOpts)
- if err != nil {
- return nil, err
- }
- defer iter.Close()
-
- var sstManifest BackupManifest
- iter.SeekGE(storage.MakeMVCCMetadataKey([]byte(sstBackupKey)))
- ok, err := iter.Valid()
- if err != nil {
- return nil, err
- }
- if !ok || !iter.UnsafeKey().Key.Equal([]byte(sstBackupKey)) {
- return nil, errors.Errorf("metadata SST does not contain backup manifest")
- }
-
- if err := protoutil.Unmarshal(iter.UnsafeValue(), &sstManifest); err != nil {
- return nil, err
- }
-
- return &BackupMetadata{BackupManifest: sstManifest, store: exportStore, enc: encryption, filename: sstFileName}, nil
-}
-
-// SpanIterator is a simple iterator to iterate over roachpb.Spans.
-type SpanIterator struct {
- backing bytesIter
- filter func(key storage.MVCCKey) bool
- err error
-}
-
-// SpanIter creates a new SpanIterator for the backup metadata.
-func (b *BackupMetadata) SpanIter(ctx context.Context) SpanIterator {
- backing := makeBytesIter(ctx, b.store, b.filename, []byte(sstSpansPrefix), b.enc, true)
- return SpanIterator{
- backing: backing,
- }
-}
-
-// IntroducedSpanIter creates a new IntroducedSpanIterator for the backup metadata.
-func (b *BackupMetadata) IntroducedSpanIter(ctx context.Context) SpanIterator {
- backing := makeBytesIter(ctx, b.store, b.filename, []byte(sstSpansPrefix), b.enc, false)
-
- return SpanIterator{
- backing: backing,
- filter: func(key storage.MVCCKey) bool {
- return key.Timestamp == hlc.Timestamp{}
- },
- }
-}
-
-// Close closes the iterator.
-func (si *SpanIterator) Close() {
- si.backing.close()
-}
-
-// Err returns the iterator's error
-func (si *SpanIterator) Err() error {
- if si.err != nil {
- return si.err
- }
- return si.backing.err()
-}
-
-// Next retrieves the next span in the iterator.
-//
-// Next returns true if next element was successfully unmarshalled into span,
-// and false if there are no more elements or if an error was encountered. When
-// Next returns false, the user should call the Err method to verify the
-// existence of an error.
-func (si *SpanIterator) Next(span *roachpb.Span) bool {
- wrapper := resultWrapper{}
-
- for si.backing.next(&wrapper) {
- if si.filter == nil || si.filter(wrapper.key) {
- sp, err := decodeSpanSSTKey(wrapper.key.Key)
- if err != nil {
- si.err = err
- return false
- }
-
- *span = sp
- return true
- }
- }
-
- return false
-}
-
-// FileIterator is a simple iterator to iterate over stats.TableStatisticProtos.
-type FileIterator struct {
- mergedIterator storage.SimpleMVCCIterator
- backingIterators []storage.SimpleMVCCIterator
- err error
-}
-
-// FileIter creates a new FileIterator for the backup metadata.
-func (b *BackupMetadata) FileIter(ctx context.Context) FileIterator {
- fileInfoIter := makeBytesIter(ctx, b.store, b.filename, []byte(sstFilesPrefix), b.enc, false)
- defer fileInfoIter.close()
-
- var iters []storage.SimpleMVCCIterator
- var encOpts *roachpb.FileEncryptionOptions
- if b.enc != nil {
- key, err := getEncryptionKey(ctx, b.enc, b.store.Settings(), b.store.ExternalIOConf())
- if err != nil {
- return FileIterator{err: err}
- }
- encOpts = &roachpb.FileEncryptionOptions{Key: key}
- }
-
- result := resultWrapper{}
- for fileInfoIter.next(&result) {
- path, err := decodeUnsafeFileInfoSSTKey(result.key.Key)
- if err != nil {
- break
- }
-
- iter, err := storageccl.ExternalSSTReader(ctx, b.store, path, encOpts)
- if err != nil {
- return FileIterator{err: err}
- }
- iters = append(iters, iter)
- }
-
- if fileInfoIter.err() != nil {
- return FileIterator{err: fileInfoIter.err()}
- }
-
- mergedIter := storage.MakeMultiIterator(iters)
- mergedIter.SeekGE(storage.MVCCKey{})
- return FileIterator{mergedIterator: mergedIter, backingIterators: iters}
-}
-
-// Close closes the iterator.
-func (fi *FileIterator) Close() {
- for _, it := range fi.backingIterators {
- it.Close()
- }
- fi.mergedIterator = nil
- fi.backingIterators = fi.backingIterators[:0]
-}
-
-// Err returns the iterator's error.
-func (fi *FileIterator) Err() error {
- return fi.err
-}
-
-// Next retrieves the next file in the iterator.
-//
-// Next returns true if next element was successfully unmarshalled into file,
-// and false if there are no more elements or if an error was encountered. When
-// Next returns false, the user should call the Err method to verify the
-// existence of an error.
-func (fi *FileIterator) Next(file *BackupManifest_File) bool {
- if fi.err != nil {
- return false
- }
-
- valid, err := fi.mergedIterator.Valid()
- if err != nil || !valid {
- fi.err = err
- return false
- }
- err = protoutil.Unmarshal(fi.mergedIterator.UnsafeValue(), file)
- if err != nil {
- fi.err = err
- return false
- }
-
- fi.mergedIterator.Next()
- return true
-}
-
-// DescIterator is a simple iterator to iterate over descpb.Descriptors.
-type DescIterator struct {
- backing bytesIter
- err error
-}
-
-// DescIter creates a new DescIterator for the backup metadata.
-func (b *BackupMetadata) DescIter(ctx context.Context) DescIterator {
- backing := makeBytesIter(ctx, b.store, b.filename, []byte(sstDescsPrefix), b.enc, true)
- return DescIterator{
- backing: backing,
- }
-}
-
-// Close closes the iterator.
-func (di *DescIterator) Close() {
- di.backing.close()
-}
-
-// Err returns the iterator's error.
-func (di *DescIterator) Err() error {
- if di.err != nil {
- return di.err
- }
- return di.backing.err()
-}
-
-// Next retrieves the next descriptor in the iterator.
-//
-// Next returns true if next element was successfully unmarshalled into desc ,
-// and false if there are no more elements or if an error was encountered. When
-// Next returns false, the user should call the Err method to verify the
-// existence of an error.
-func (di *DescIterator) Next(desc *descpb.Descriptor) bool {
- wrapper := resultWrapper{}
-
- for di.backing.next(&wrapper) {
- err := protoutil.Unmarshal(wrapper.value, desc)
- if err != nil {
- di.err = err
- return false
- }
-
- tbl, db, typ, sc := descpb.FromDescriptor(desc)
- if tbl != nil || db != nil || typ != nil || sc != nil {
- return true
- }
- }
-
- return false
-}
-
-// TenantIterator is a simple iterator to iterate over TenantInfoWithUsages.
-type TenantIterator struct {
- backing bytesIter
- err error
-}
-
-// TenantIter creates a new TenantIterator for the backup metadata.
-func (b *BackupMetadata) TenantIter(ctx context.Context) TenantIterator {
- backing := makeBytesIter(ctx, b.store, b.filename, []byte(sstTenantsPrefix), b.enc, false)
- return TenantIterator{
- backing: backing,
- }
-}
-
-// Close closes the iterator.
-func (ti *TenantIterator) Close() {
- ti.backing.close()
-}
-
-// Err returns the iterator's error.
-func (ti *TenantIterator) Err() error {
- if ti.err != nil {
- return ti.err
- }
- return ti.backing.err()
-}
-
-// Next retrieves the next tenant in the iterator.
-//
-// Next returns true if next element was successfully unmarshalled into tenant,
-// and false if there are no more elements or if an error was encountered. When
-// Next returns false, the user should call the Err method to verify the
-// existence of an error.
-func (ti *TenantIterator) Next(tenant *descpb.TenantInfoWithUsage) bool {
- wrapper := resultWrapper{}
- ok := ti.backing.next(&wrapper)
- if !ok {
- return false
- }
-
- err := protoutil.Unmarshal(wrapper.value, tenant)
- if err != nil {
- ti.err = err
- return false
- }
-
- return true
-}
-
-// DescriptorRevisionIterator is a simple iterator to iterate over BackupManifest_DescriptorRevisions.
-type DescriptorRevisionIterator struct {
- backing bytesIter
- err error
-}
-
-// DescriptorChangesIter creates a new DescriptorChangesIterator for the backup metadata.
-func (b *BackupMetadata) DescriptorChangesIter(ctx context.Context) DescriptorRevisionIterator {
- backing := makeBytesIter(ctx, b.store, b.filename, []byte(sstDescsPrefix), b.enc, false)
- return DescriptorRevisionIterator{
- backing: backing,
- }
-}
-
-// Close closes the iterator.
-func (dri *DescriptorRevisionIterator) Close() {
- dri.backing.close()
-}
-
-// Err returns the iterator's error.
-func (dri *DescriptorRevisionIterator) Err() error {
- if dri.err != nil {
- return dri.err
- }
- return dri.backing.err()
-}
-
-// Next retrieves the next descriptor revision in the iterator.
-//
-// Next returns true if next element was successfully unmarshalled into
-// revision, and false if there are no more elements or if an error was
-// encountered. When Next returns false, the user should call the Err method to
-// verify the existence of an error.
-func (dri *DescriptorRevisionIterator) Next(revision *BackupManifest_DescriptorRevision) bool {
- wrapper := resultWrapper{}
- ok := dri.backing.next(&wrapper)
- if !ok {
- return false
- }
-
- err := unmarshalWrapper(&wrapper, revision)
- if err != nil {
- dri.err = err
- return false
- }
-
- return true
-}
-
-func unmarshalWrapper(wrapper *resultWrapper, rev *BackupManifest_DescriptorRevision) error {
- var desc *descpb.Descriptor
- if len(wrapper.value) > 0 {
- desc = &descpb.Descriptor{}
- err := protoutil.Unmarshal(wrapper.value, desc)
- if err != nil {
- return err
- }
- }
-
- id, err := decodeDescSSTKey(wrapper.key.Key)
- if err != nil {
- return err
- }
-
- *rev = BackupManifest_DescriptorRevision{
- Desc: desc,
- ID: id,
- Time: wrapper.key.Timestamp,
- }
- return nil
-}
-
-// StatsIterator is a simple iterator to iterate over stats.TableStatisticProtos.
-type StatsIterator struct {
- backing bytesIter
- err error
-}
-
-// StatsIter creates a new StatsIterator for the backup metadata.
-func (b *BackupMetadata) StatsIter(ctx context.Context) StatsIterator {
- backing := makeBytesIter(ctx, b.store, b.filename, []byte(sstStatsPrefix), b.enc, false)
- return StatsIterator{
- backing: backing,
- }
-}
-
-// Close closes the iterator.
-func (si *StatsIterator) Close() {
- si.backing.close()
-}
-
-// Err returns the iterator's error.
-func (si *StatsIterator) Err() error {
- if si.err != nil {
- return si.err
- }
- return si.backing.err()
-}
-
-// Next retrieves the next stats proto in the iterator.
-//
-// Next returns true if next element was successfully unmarshalled into
-// statsPtr, and false if there are no more elements or if an error was
-// encountered. When Next returns false, the user should call the Err method to verify the
-// existence of an error.
-func (si *StatsIterator) Next(statsPtr **stats.TableStatisticProto) bool {
- wrapper := resultWrapper{}
- ok := si.backing.next(&wrapper)
-
- if !ok {
- return false
- }
-
- var s stats.TableStatisticProto
- err := protoutil.Unmarshal(wrapper.value, &s)
- if err != nil {
- si.err = err
- return false
- }
-
- *statsPtr = &s
- return true
-}
-
-type bytesIter struct {
- Iter storage.SimpleMVCCIterator
-
- prefix []byte
- useMVCCNext bool
- iterError error
-}
-
-func makeBytesIter(
- ctx context.Context,
- store cloud.ExternalStorage,
- path string,
- prefix []byte,
- enc *jobspb.BackupEncryptionOptions,
- useMVCCNext bool,
-) bytesIter {
- var encOpts *roachpb.FileEncryptionOptions
- if enc != nil {
- key, err := getEncryptionKey(ctx, enc, store.Settings(), store.ExternalIOConf())
- if err != nil {
- return bytesIter{iterError: err}
- }
- encOpts = &roachpb.FileEncryptionOptions{Key: key}
- }
-
- iter, err := storageccl.ExternalSSTReader(ctx, store, path, encOpts)
- if err != nil {
- return bytesIter{iterError: err}
- }
-
- iter.SeekGE(storage.MakeMVCCMetadataKey(prefix))
- return bytesIter{
- Iter: iter,
- prefix: prefix,
- useMVCCNext: useMVCCNext,
- }
-}
-
-func (bi *bytesIter) next(resWrapper *resultWrapper) bool {
- if bi.iterError != nil {
- return false
- }
-
- valid, err := bi.Iter.Valid()
- if err != nil || !valid || !bytes.HasPrefix(bi.Iter.UnsafeKey().Key, bi.prefix) {
- bi.close()
- bi.iterError = err
- return false
- }
-
- key := bi.Iter.UnsafeKey()
- resWrapper.key.Key = key.Key.Clone()
- resWrapper.key.Timestamp = key.Timestamp
- resWrapper.value = resWrapper.value[:0]
- resWrapper.value = append(resWrapper.value, bi.Iter.UnsafeValue()...)
-
- if bi.useMVCCNext {
- bi.Iter.NextKey()
- } else {
- bi.Iter.Next()
- }
- return true
-}
-
-func (bi *bytesIter) err() error {
- return bi.iterError
-}
-
-func (bi *bytesIter) close() {
- if bi.Iter != nil {
- bi.Iter.Close()
- bi.Iter = nil
- }
-}
-
-type resultWrapper struct {
- key storage.MVCCKey
- value []byte
-}
diff --git a/pkg/ccl/backupccl/backup_metadata_test.go b/pkg/ccl/backupccl/backup_metadata_test.go
deleted file mode 100644
index bcfa56c9f6..0000000000
--- a/pkg/ccl/backupccl/backup_metadata_test.go
+++ /dev/null
@@ -1,281 +0,0 @@
-// Copyright 2022 The Cockroach Authors.
-//
-// Licensed as a CockroachDB Enterprise file under the Cockroach Community
-// License (the "License"); you may not use this file except in compliance with
-// the License. You may obtain a copy of the License at
-//
-// https://github.com/cockroachdb/cockroach/blob/master/licenses/CCL.txt
-
-package backupccl
-
-import (
- "context"
- "sort"
- "testing"
-
- "github.com/cockroachdb/cockroach/pkg/base"
- "github.com/cockroachdb/cockroach/pkg/blobs"
- "github.com/cockroachdb/cockroach/pkg/cloud"
- "github.com/cockroachdb/cockroach/pkg/roachpb"
- "github.com/cockroachdb/cockroach/pkg/security"
- "github.com/cockroachdb/cockroach/pkg/sql"
- "github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb"
- "github.com/cockroachdb/cockroach/pkg/sql/stats"
- "github.com/cockroachdb/cockroach/pkg/testutils/testcluster"
- "github.com/cockroachdb/cockroach/pkg/util/hlc"
- "github.com/cockroachdb/cockroach/pkg/util/ioctx"
- "github.com/cockroachdb/cockroach/pkg/util/leaktest"
- "github.com/cockroachdb/cockroach/pkg/util/log"
- "github.com/cockroachdb/cockroach/pkg/util/protoutil"
- "github.com/stretchr/testify/require"
-)
-
-func TestMetadataSST(t *testing.T) {
- defer leaktest.AfterTest(t)()
- defer log.Scope(t).Close(t)
-
- ctx := context.Background()
- const numAccounts = 1
- userfile := "userfile:///0"
- tc, sqlDB, _, cleanupFn := backupRestoreTestSetup(t, singleNode, numAccounts, InitManualReplication)
- defer cleanupFn()
-
- // Check that backup metadata is correct on full cluster backup.
- sqlDB.Exec(t, `BACKUP TO $1`, userfile)
- checkMetadata(ctx, t, tc, userfile)
-
- // Check for correct backup metadata on incremental backup with revision
- // history.
- sqlDB.Exec(t, `CREATE TABLE data.foo(k INT, v INT)`)
- sqlDB.Exec(t, `CREATE INDEX idx ON data.bank (balance)`)
-
- sqlDB.Exec(t, `BACKUP TO $1 WITH revision_history`, userfile)
- checkMetadata(ctx, t, tc, userfile)
-
- // Check for correct backup metadata on single table backups.
- userfile1 := "userfile:///1"
- sqlDB.Exec(t, `BACKUP TABLE data.bank TO $1 WITH revision_history`, userfile1)
- checkMetadata(ctx, t, tc, userfile1)
-
- // Check for correct backup metadata on tenant backups.
- userfile2 := "userfile:///2"
- _, err := tc.Servers[0].StartTenant(ctx, base.TestTenantArgs{TenantID: roachpb.MakeTenantID(10)})
- require.NoError(t, err)
- sqlDB.Exec(t, `BACKUP TENANT 10 TO $1`, userfile2)
- checkMetadata(ctx, t, tc, userfile2)
-}
-
-func checkMetadata(
- ctx context.Context, t *testing.T, tc *testcluster.TestCluster, backupLoc string,
-) {
- store, err := cloud.ExternalStorageFromURI(
- ctx,
- backupLoc,
- base.ExternalIODirConfig{},
- tc.Servers[0].ClusterSettings(),
- blobs.TestEmptyBlobClientFactory,
- security.RootUserName(),
- tc.Servers[0].InternalExecutor().(*sql.InternalExecutor), tc.Servers[0].DB())
- if err != nil {
- t.Fatal(err)
- }
- m, err := testingReadBackupManifest(ctx, store, backupManifestName)
- if err != nil {
- t.Fatal(err)
- }
-
- bm, err := newBackupMetadata(ctx, store, metadataSSTName, nil)
- if err != nil {
- t.Fatal(err)
- }
-
- checkManifest(t, m, bm)
- // If there are descriptor changes, we only check those as they should have
- // all changes as well as existing descriptors
- if len(m.DescriptorChanges) > 0 {
- checkDescriptorChanges(ctx, t, m, bm)
- } else {
- checkDescriptors(ctx, t, m, bm)
- }
-
- checkSpans(ctx, t, m, bm)
- // Don't check introduced spans on the first backup.
- if m.StartTime != (hlc.Timestamp{}) {
- checkIntroducedSpans(ctx, t, m, bm)
- }
- checkFiles(ctx, t, m, bm)
- checkTenants(ctx, t, m, bm)
- checkStats(ctx, t, store, m, bm)
-}
-
-func checkManifest(t *testing.T, m *BackupManifest, bm *BackupMetadata) {
- expectedManifest := *m
- expectedManifest.Descriptors = nil
- expectedManifest.DescriptorChanges = nil
- expectedManifest.Files = nil
- expectedManifest.Spans = nil
- expectedManifest.IntroducedSpans = nil
- expectedManifest.StatisticsFilenames = nil
- expectedManifest.Tenants = nil
-
- require.Equal(t, expectedManifest, bm.BackupManifest)
-}
-
-func checkDescriptors(ctx context.Context, t *testing.T, m *BackupManifest, bm *BackupMetadata) {
- var metaDescs []descpb.Descriptor
- var desc descpb.Descriptor
-
- it := bm.DescIter(ctx)
- defer it.Close()
- for it.Next(&desc) {
- metaDescs = append(metaDescs, desc)
- }
-
- if it.Err() != nil {
- t.Fatal(it.Err())
- }
-
- require.Equal(t, m.Descriptors, metaDescs)
-}
-
-func checkDescriptorChanges(
- ctx context.Context, t *testing.T, m *BackupManifest, bm *BackupMetadata,
-) {
- var metaRevs []BackupManifest_DescriptorRevision
- var rev BackupManifest_DescriptorRevision
- it := bm.DescriptorChangesIter(ctx)
- defer it.Close()
-
- for it.Next(&rev) {
- metaRevs = append(metaRevs, rev)
- }
- if it.Err() != nil {
- t.Fatal(it.Err())
- }
-
- // Descriptor Changes are sorted by time in the manifest.
- sort.Slice(metaRevs, func(i, j int) bool {
- return metaRevs[i].Time.Less(metaRevs[j].Time)
- })
-
- require.Equal(t, m.DescriptorChanges, metaRevs)
-}
-
-func checkFiles(ctx context.Context, t *testing.T, m *BackupManifest, bm *BackupMetadata) {
- var metaFiles []BackupManifest_File
- var file BackupManifest_File
- it := bm.FileIter(ctx)
- defer it.Close()
-
- for it.Next(&file) {
- metaFiles = append(metaFiles, file)
- }
- if it.Err() != nil {
- t.Fatal(it.Err())
- }
-
- require.Equal(t, m.Files, metaFiles)
-}
-
-func checkSpans(ctx context.Context, t *testing.T, m *BackupManifest, bm *BackupMetadata) {
- var metaSpans []roachpb.Span
- var span roachpb.Span
- it := bm.SpanIter(ctx)
- defer it.Close()
-
- for it.Next(&span) {
- metaSpans = append(metaSpans, span)
- }
- if it.Err() != nil {
- t.Fatal(it.Err())
- }
-
- require.Equal(t, m.Spans, metaSpans)
-}
-
-func checkIntroducedSpans(
- ctx context.Context, t *testing.T, m *BackupManifest, bm *BackupMetadata,
-) {
- var metaSpans []roachpb.Span
- var span roachpb.Span
- it := bm.IntroducedSpanIter(ctx)
- defer it.Close()
- for it.Next(&span) {
- metaSpans = append(metaSpans, span)
- }
- if it.Err() != nil {
- t.Fatal(it.Err())
- }
-
- require.Equal(t, m.IntroducedSpans, metaSpans)
-}
-
-func checkTenants(ctx context.Context, t *testing.T, m *BackupManifest, bm *BackupMetadata) {
- var metaTenants []descpb.TenantInfoWithUsage
- var tenant descpb.TenantInfoWithUsage
- it := bm.TenantIter(ctx)
- defer it.Close()
-
- for it.Next(&tenant) {
- metaTenants = append(metaTenants, tenant)
- }
- if it.Err() != nil {
- t.Fatal(it.Err())
- }
-
- require.Equal(t, m.Tenants, metaTenants)
-}
-
-func checkStats(
- ctx context.Context,
- t *testing.T,
- store cloud.ExternalStorage,
- m *BackupManifest,
- bm *BackupMetadata,
-) {
- expectedStats, err := getStatisticsFromBackup(ctx, store, nil, *m)
- if err != nil {
- t.Fatal(err)
- }
-
- var metaStats = make([]*stats.TableStatisticProto, 0)
- var s *stats.TableStatisticProto
- it := bm.StatsIter(ctx)
- defer it.Close()
-
- for it.Next(&s) {
- metaStats = append(metaStats, s)
- }
- if it.Err() != nil {
- t.Fatal(it.Err())
- }
- require.Equal(t, expectedStats, metaStats)
-}
-
-func testingReadBackupManifest(
- ctx context.Context, store cloud.ExternalStorage, file string,
-) (*BackupManifest, error) {
- r, err := store.ReadFile(ctx, file)
- if err != nil {
- return nil, err
- }
- defer r.Close(ctx)
-
- bytes, err := ioctx.ReadAll(ctx, r)
- if err != nil {
- return nil, err
- }
- if isGZipped(bytes) {
- descBytes, err := decompressData(ctx, nil, bytes)
- if err != nil {
- return nil, err
- }
- bytes = descBytes
- }
-
- var m BackupManifest
- if err := protoutil.Unmarshal(bytes, &m); err != nil {
- return nil, err
- }
- return &m, nil
-}
diff --git a/pkg/ccl/backupccl/backup_planning.go b/pkg/ccl/backupccl/backup_planning.go
index e0b642c2a4..d618e186f5 100644
--- a/pkg/ccl/backupccl/backup_planning.go
+++ b/pkg/ccl/backupccl/backup_planning.go
@@ -58,16 +58,15 @@ import (
)
const (
- backupOptRevisionHistory = "revision_history"
- backupOptEncPassphrase = "encryption_passphrase"
- backupOptEncKMS = "kms"
- backupOptWithPrivileges = "privileges"
- backupOptAsJSON = "as_json"
- backupOptWithDebugIDs = "debug_ids"
- backupOptIncStorage = "incremental_location"
- localityURLParam = "COCKROACH_LOCALITY"
- defaultLocalityValue = "default"
- backupOptDebugMetadataSST = "debug_dump_metadata_sst"
+ backupOptRevisionHistory = "revision_history"
+ backupOptEncPassphrase = "encryption_passphrase"
+ backupOptEncKMS = "kms"
+ backupOptWithPrivileges = "privileges"
+ backupOptAsJSON = "as_json"
+ backupOptWithDebugIDs = "debug_ids"
+ backupOptIncStorage = "incremental_location"
+ localityURLParam = "COCKROACH_LOCALITY"
+ defaultLocalityValue = "default"
)
type tableAndIndex struct {
@@ -515,7 +514,7 @@ func checkPrivilegesForBackup(
"only users with the admin role are allowed to perform full cluster backups")
}
// Do not allow tenant backups.
- if backupStmt.Targets != nil && backupStmt.Targets.TenantID.IsSet() {
+ if backupStmt.Targets != nil && backupStmt.Targets.Tenant != (roachpb.TenantID{}) {
return pgerror.Newf(
pgcode.InsufficientPrivilege,
"only users with the admin role can perform BACKUP TENANT")
@@ -782,11 +781,11 @@ func backupPlanHook(
}
}
- if backupStmt.Targets != nil && backupStmt.Targets.TenantID.IsSet() {
+ if backupStmt.Targets != nil && backupStmt.Targets.Tenant != (roachpb.TenantID{}) {
if !p.ExecCfg().Codec.ForSystemTenant() {
return pgerror.Newf(pgcode.InsufficientPrivilege, "only the system tenant can backup other tenants")
}
- initialDetails.SpecificTenantIds = []roachpb.TenantID{backupStmt.Targets.TenantID.TenantID}
+ initialDetails.SpecificTenantIds = []roachpb.TenantID{backupStmt.Targets.Tenant}
}
jobID := p.ExecCfg().JobRegistry.MakeJobID()
diff --git a/pkg/ccl/backupccl/backup_processor.go b/pkg/ccl/backupccl/backup_processor.go
index 55db29c741..b7fc0a0e0e 100644
--- a/pkg/ccl/backupccl/backup_processor.go
+++ b/pkg/ccl/backupccl/backup_processor.go
@@ -44,7 +44,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
"github.com/cockroachdb/cockroach/pkg/util/tracing"
"github.com/cockroachdb/errors"
- "github.com/cockroachdb/logtags"
gogotypes "github.com/gogo/protobuf/types"
"github.com/kr/pretty"
)
@@ -178,7 +177,6 @@ func newBackupDataProcessor(
// Start is part of the RowSource interface.
func (bp *backupDataProcessor) Start(ctx context.Context) {
- ctx = logtags.AddTag(ctx, "job", bp.spec.JobID)
ctx = bp.StartInternal(ctx, backupProcessorName)
ctx, cancel := context.WithCancel(ctx)
bp.cancelAndWaitForWorker = func() {
diff --git a/pkg/ccl/backupccl/backup_processor_planning.go b/pkg/ccl/backupccl/backup_processor_planning.go
index bc62ad876a..97fa9f6e16 100644
--- a/pkg/ccl/backupccl/backup_processor_planning.go
+++ b/pkg/ccl/backupccl/backup_processor_planning.go
@@ -24,6 +24,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/hlc"
"github.com/cockroachdb/cockroach/pkg/util/tracing"
"github.com/cockroachdb/errors"
+ "github.com/cockroachdb/logtags"
)
func distBackupPlanSpecs(
@@ -31,7 +32,6 @@ func distBackupPlanSpecs(
planCtx *sql.PlanningCtx,
execCtx sql.JobExecContext,
dsp *sql.DistSQLPlanner,
- jobID int64,
spans roachpb.Spans,
introducedSpans roachpb.Spans,
pkIDs map[uint64]bool,
@@ -92,7 +92,6 @@ func distBackupPlanSpecs(
sqlInstanceIDToSpec := make(map[base.SQLInstanceID]*execinfrapb.BackupDataSpec)
for _, partition := range spanPartitions {
spec := &execinfrapb.BackupDataSpec{
- JobID: jobID,
Spans: partition.Spans,
DefaultURI: defaultURI,
URIsByLocalityKV: urisByLocalityKV,
@@ -114,7 +113,6 @@ func distBackupPlanSpecs(
// which is not the leaseholder for any of the spans, but is for an
// introduced span.
spec := &execinfrapb.BackupDataSpec{
- JobID: jobID,
IntroducedSpans: partition.Spans,
DefaultURI: defaultURI,
URIsByLocalityKV: urisByLocalityKV,
@@ -155,6 +153,7 @@ func distBackup(
) error {
ctx, span := tracing.ChildSpan(ctx, "backup-distsql")
defer span.Finish()
+ ctx = logtags.AddTag(ctx, "backup-distsql", nil)
evalCtx := execCtx.ExtendedEvalContext()
var noTxn *kv.Txn
diff --git a/pkg/ccl/backupccl/backup_test.go b/pkg/ccl/backupccl/backup_test.go
index 767a78d3e1..57408ca3f3 100644
--- a/pkg/ccl/backupccl/backup_test.go
+++ b/pkg/ccl/backupccl/backup_test.go
@@ -91,7 +91,7 @@ import (
"github.com/cockroachdb/errors/oserror"
"github.com/cockroachdb/logtags"
"github.com/gogo/protobuf/proto"
- pgx "github.com/jackc/pgx/v4"
+ "github.com/jackc/pgx/v4"
"github.com/kr/pretty"
"github.com/lib/pq"
"github.com/stretchr/testify/assert"
@@ -9353,8 +9353,6 @@ func TestGCDropIndexSpanExpansion(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)
- skip.UnderRace(t, "takes >1 min under race")
-
aboutToGC := make(chan struct{})
allowGC := make(chan struct{})
var gcJobID jobspb.JobID
diff --git a/pkg/ccl/backupccl/create_scheduled_backup.go b/pkg/ccl/backupccl/create_scheduled_backup.go
index bccdcd5ba2..47dda37e09 100644
--- a/pkg/ccl/backupccl/create_scheduled_backup.go
+++ b/pkg/ccl/backupccl/create_scheduled_backup.go
@@ -20,6 +20,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/jobs"
"github.com/cockroachdb/cockroach/pkg/jobs/jobspb"
"github.com/cockroachdb/cockroach/pkg/kv"
+ "github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/scheduledjobs"
"github.com/cockroachdb/cockroach/pkg/security"
"github.com/cockroachdb/cockroach/pkg/server/telemetry"
@@ -269,7 +270,7 @@ func canChainProtectedTimestampRecords(p sql.PlanHookState, eval *scheduledBacku
}
// Return true if the backup has table targets or is backing up a tenant.
- return eval.Targets.Tables != nil || eval.Targets.TenantID.IsSet()
+ return eval.Targets.Tables != nil || eval.Targets.Tenant != roachpb.TenantID{}
}
// doCreateBackupSchedule creates requested schedule (or schedules).
diff --git a/pkg/ccl/backupccl/manifest_handling.go b/pkg/ccl/backupccl/manifest_handling.go
index 4cc4f60f97..06a4d80ddc 100644
--- a/pkg/ccl/backupccl/manifest_handling.go
+++ b/pkg/ccl/backupccl/manifest_handling.go
@@ -27,7 +27,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/jobs/jobspb"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/security"
- "github.com/cockroachdb/cockroach/pkg/settings"
"github.com/cockroachdb/cockroach/pkg/settings/cluster"
"github.com/cockroachdb/cockroach/pkg/sql/catalog"
"github.com/cockroachdb/cockroach/pkg/sql/catalog/descbuilder"
@@ -99,13 +98,6 @@ const (
latestHistoryDirectory = "latest"
)
-var writeMetadataSST = settings.RegisterBoolSetting(
- settings.TenantWritable,
- "kv.bulkio.write_metadata_sst.enabled",
- "write experimental new format BACKUP metadata file",
- true,
-)
-
// isGZipped detects whether the given bytes represent GZipped data. This check
// is used rather than a standard implementation such as http.DetectContentType
// since some zipped data may be mis-identified by that method. We've seen
diff --git a/pkg/ccl/backupccl/restore_data_processor.go b/pkg/ccl/backupccl/restore_data_processor.go
index 987e5c0293..504532bf91 100644
--- a/pkg/ccl/backupccl/restore_data_processor.go
+++ b/pkg/ccl/backupccl/restore_data_processor.go
@@ -35,7 +35,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/protoutil"
"github.com/cockroachdb/errors"
- "github.com/cockroachdb/logtags"
gogotypes "github.com/gogo/protobuf/types"
)
@@ -55,6 +54,9 @@ type restoreDataProcessor struct {
// is updated, the job should be PAUSEd and RESUMEd for the new setting to
// take effect.
numWorkers int
+ // flushBytes is the maximum buffer size used when creating SSTs to flush. It
+ // remains constant over the lifetime of the processor.
+ flushBytes int64
// phaseGroup manages the phases of the restore:
// 1) reading entries from the input
@@ -145,6 +147,7 @@ func newRestoreDataProcessor(
progCh: make(chan RestoreProgress, maxConcurrentRestoreWorkers),
metaCh: make(chan *execinfrapb.ProducerMetadata, 1),
numWorkers: int(numRestoreWorkers.Get(sv)),
+ flushBytes: bulk.IngestFileSize(flowCtx.Cfg.Settings),
}
if err := rd.Init(rd, post, restoreDataOutputTypes, flowCtx, processorID, output, nil, /* memMonitor */
@@ -162,7 +165,6 @@ func newRestoreDataProcessor(
// Start is part of the RowSource interface.
func (rd *restoreDataProcessor) Start(ctx context.Context) {
- ctx = logtags.AddTag(ctx, "job", rd.spec.JobID)
ctx = rd.StartInternal(ctx, restoreDataProcName)
rd.input.Start(ctx)
@@ -427,6 +429,7 @@ func (rd *restoreDataProcessor) processRestoreSpanEntry(
batcher, err := bulk.MakeSSTBatcher(ctx,
db,
evalCtx.Settings,
+ func() int64 { return rd.flushBytes },
disallowShadowingBelow,
writeAtBatchTS,
)
diff --git a/pkg/ccl/backupccl/restore_data_processor_test.go b/pkg/ccl/backupccl/restore_data_processor_test.go
index 3031186993..7c31e027bb 100644
--- a/pkg/ccl/backupccl/restore_data_processor_test.go
+++ b/pkg/ccl/backupccl/restore_data_processor_test.go
@@ -221,7 +221,7 @@ func runTestIngest(t *testing.T, init func(*cluster.Settings)) {
if r < 0 {
return nil
}
- return roachpb.NewError(roachpb.NewAmbiguousResultErrorf("%d", r))
+ return roachpb.NewError(roachpb.NewAmbiguousResultError(strconv.Itoa(int(r))))
},
},
}}
diff --git a/pkg/ccl/backupccl/restore_job.go b/pkg/ccl/backupccl/restore_job.go
index 22d53ddc36..3e9d9acb3a 100644
--- a/pkg/ccl/backupccl/restore_job.go
+++ b/pkg/ccl/backupccl/restore_job.go
@@ -357,7 +357,6 @@ func restore(
return distRestore(
ctx,
execCtx,
- int64(job.ID()),
importSpanChunks,
dataToRestore.getPKIDs(),
encryption,
diff --git a/pkg/ccl/backupccl/restore_planning.go b/pkg/ccl/backupccl/restore_planning.go
index 42855b3579..b080898d59 100644
--- a/pkg/ccl/backupccl/restore_planning.go
+++ b/pkg/ccl/backupccl/restore_planning.go
@@ -868,9 +868,7 @@ func maybeUpgradeDescriptors(descs []catalog.Descriptor, skipFKsWithNoMatchingTa
} else {
b = desc.NewBuilder()
}
- if err := b.RunPostDeserializationChanges(); err != nil {
- return errors.NewAssertionErrorWithWrappedErrf(err, "error during RunPostDeserializationChanges")
- }
+ b.RunPostDeserializationChanges()
err := b.RunRestoreChanges(func(id descpb.ID) catalog.Descriptor {
for _, d := range descs {
if d.GetID() == id {
@@ -1111,7 +1109,7 @@ func restorePlanHook(
var newTenantIDFn func() (*roachpb.TenantID, error)
if restoreStmt.Options.AsTenant != nil {
- if restoreStmt.DescriptorCoverage == tree.AllDescriptors || !restoreStmt.Targets.TenantID.IsSet() {
+ if restoreStmt.DescriptorCoverage == tree.AllDescriptors || !restoreStmt.Targets.Tenant.IsSet() {
err := errors.Errorf("%q can only be used when running RESTORE TENANT for a single tenant", restoreOptAsTenant)
return nil, nil, nil, false, err
}
@@ -1288,7 +1286,7 @@ func checkPrivilegesForRestore(
"only users with the admin role are allowed to restore full cluster backups")
}
// Do not allow tenant restores.
- if restoreStmt.Targets.TenantID.IsSet() {
+ if restoreStmt.Targets.Tenant != (roachpb.TenantID{}) {
return pgerror.Newf(
pgcode.InsufficientPrivilege,
"only users with the admin role can perform RESTORE TENANT")
diff --git a/pkg/ccl/backupccl/restore_processor_planning.go b/pkg/ccl/backupccl/restore_processor_planning.go
index 04dda3069d..39525b357f 100644
--- a/pkg/ccl/backupccl/restore_processor_planning.go
+++ b/pkg/ccl/backupccl/restore_processor_planning.go
@@ -26,6 +26,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/types"
"github.com/cockroachdb/cockroach/pkg/util/hlc"
"github.com/cockroachdb/errors"
+ "github.com/cockroachdb/logtags"
)
// distRestore plans a 2 stage distSQL flow for a distributed restore. It
@@ -40,7 +41,6 @@ import (
func distRestore(
ctx context.Context,
execCtx sql.JobExecContext,
- jobID int64,
chunks [][]execinfrapb.RestoreSpanEntry,
pkIDs map[uint64]bool,
encryption *jobspb.BackupEncryptionOptions,
@@ -49,6 +49,7 @@ func distRestore(
restoreTime hlc.Timestamp,
progCh chan *execinfrapb.RemoteProducerMetadata_BulkProcessorProgress,
) error {
+ ctx = logtags.AddTag(ctx, "restore-distsql", nil)
defer close(progCh)
var noTxn *kv.Txn
@@ -88,7 +89,6 @@ func distRestore(
}
restoreDataSpec := execinfrapb.RestoreDataSpec{
- JobID: jobID,
RestoreTime: restoreTime,
Encryption: fileEncryption,
TableRekeys: tableRekeys,
diff --git a/pkg/ccl/backupccl/show.go b/pkg/ccl/backupccl/show.go
index af16e14b0b..9e90e0865c 100644
--- a/pkg/ccl/backupccl/show.go
+++ b/pkg/ccl/backupccl/show.go
@@ -34,7 +34,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/sql/types"
"github.com/cockroachdb/cockroach/pkg/util/encoding"
- "github.com/cockroachdb/cockroach/pkg/util/json"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/mon"
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
@@ -164,54 +163,6 @@ func (m manifestInfoReader) showBackup(
return nil
}
-type metadataSSTInfoReader struct{}
-
-var _ backupInfoReader = manifestInfoReader{}
-
-func (m metadataSSTInfoReader) header() colinfo.ResultColumns {
- return colinfo.ResultColumns{
- {Name: "file", Typ: types.String},
- {Name: "key", Typ: types.String},
- {Name: "detail", Typ: types.Jsonb},
- }
-}
-
-func (m metadataSSTInfoReader) showBackup(
- ctx context.Context,
- mem *mon.BoundAccount,
- store cloud.ExternalStorage,
- incStore cloud.ExternalStorage,
- enc *jobspb.BackupEncryptionOptions,
- incPaths []string,
- resultsCh chan<- tree.Datums,
-) error {
- filename := metadataSSTName
- push := func(_, readable string, value json.JSON) error {
- val := tree.DNull
- if value != nil {
- val = tree.NewDJSON(value)
- }
- select {
- case <-ctx.Done():
- return ctx.Err()
- case resultsCh <- []tree.Datum{tree.NewDString(filename), tree.NewDString(readable), val}:
- return nil
- }
- }
-
- if err := DebugDumpMetadataSST(ctx, store, filename, enc, push); err != nil {
- return err
- }
-
- for _, i := range incPaths {
- filename = strings.TrimSuffix(i, backupManifestName) + metadataSSTName
- if err := DebugDumpMetadataSST(ctx, incStore, filename, enc, push); err != nil {
- return err
- }
- }
- return nil
-}
-
// showBackupPlanHook implements PlanHookFn.
func showBackupPlanHook(
ctx context.Context, stmt tree.Statement, p sql.PlanHookState,
@@ -239,13 +190,12 @@ func showBackupPlanHook(
}
expected := map[string]sql.KVStringOptValidate{
- backupOptEncPassphrase: sql.KVStringOptRequireValue,
- backupOptEncKMS: sql.KVStringOptRequireValue,
- backupOptWithPrivileges: sql.KVStringOptRequireNoValue,
- backupOptAsJSON: sql.KVStringOptRequireNoValue,
- backupOptWithDebugIDs: sql.KVStringOptRequireNoValue,
- backupOptIncStorage: sql.KVStringOptRequireValue,
- backupOptDebugMetadataSST: sql.KVStringOptRequireNoValue,
+ backupOptEncPassphrase: sql.KVStringOptRequireValue,
+ backupOptEncKMS: sql.KVStringOptRequireValue,
+ backupOptWithPrivileges: sql.KVStringOptRequireNoValue,
+ backupOptAsJSON: sql.KVStringOptRequireNoValue,
+ backupOptWithDebugIDs: sql.KVStringOptRequireNoValue,
+ backupOptIncStorage: sql.KVStringOptRequireValue,
}
optsFn, err := p.TypeAsStringOpts(ctx, backup.Options, expected)
if err != nil {
@@ -261,22 +211,18 @@ func showBackupPlanHook(
}
var infoReader backupInfoReader
- if _, dumpSST := opts[backupOptDebugMetadataSST]; dumpSST {
- infoReader = metadataSSTInfoReader{}
- } else {
- var shower backupShower
- switch backup.Details {
- case tree.BackupRangeDetails:
- shower = backupShowerRanges
- case tree.BackupFileDetails:
- shower = backupShowerFiles
- case tree.BackupManifestAsJSON:
- shower = jsonShower
- default:
- shower = backupShowerDefault(ctx, p, backup.ShouldIncludeSchemas, opts)
- }
- infoReader = manifestInfoReader{shower}
+ var shower backupShower
+ switch backup.Details {
+ case tree.BackupRangeDetails:
+ shower = backupShowerRanges
+ case tree.BackupFileDetails:
+ shower = backupShowerFiles
+ case tree.BackupManifestAsJSON:
+ shower = jsonShower
+ default:
+ shower = backupShowerDefault(ctx, p, backup.ShouldIncludeSchemas, opts)
}
+ infoReader = manifestInfoReader{shower}
fn := func(ctx context.Context, _ []sql.PlanNode, resultsCh chan<- tree.Datums) error {
// TODO(dan): Move this span into sql.
diff --git a/pkg/ccl/backupccl/split_and_scatter_processor.go b/pkg/ccl/backupccl/split_and_scatter_processor.go
index a8cf6daa27..b458448535 100644
--- a/pkg/ccl/backupccl/split_and_scatter_processor.go
+++ b/pkg/ccl/backupccl/split_and_scatter_processor.go
@@ -29,7 +29,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/protoutil"
"github.com/cockroachdb/cockroach/pkg/util/stop"
"github.com/cockroachdb/errors"
- "github.com/cockroachdb/logtags"
)
type splitAndScatterer interface {
@@ -245,7 +244,6 @@ func newSplitAndScatterProcessor(
// Start is part of the RowSource interface.
func (ssp *splitAndScatterProcessor) Start(ctx context.Context) {
- ctx = logtags.AddTag(ctx, "job", ssp.spec.JobID)
ctx = ssp.StartInternal(ctx, splitAndScatterProcessorName)
// Note that the loop over doneScatterCh in Next should prevent the goroutine
// below from leaking when there are no errors. However, if that loop needs to
diff --git a/pkg/ccl/backupccl/targets.go b/pkg/ccl/backupccl/targets.go
index 6ebfec397f..b5593ad14e 100644
--- a/pkg/ccl/backupccl/targets.go
+++ b/pkg/ccl/backupccl/targets.go
@@ -366,15 +366,15 @@ func selectTargets(
return systemTables, nil, nil, nil
}
- if targets.TenantID.IsSet() {
+ if targets.Tenant != (roachpb.TenantID{}) {
for _, tenant := range lastBackupManifest.GetTenants() {
// TODO(dt): for now it is zero-or-one but when that changes, we should
// either keep it sorted or build a set here.
- if tenant.ID == targets.TenantID.ToUint64() {
+ if tenant.ID == targets.Tenant.ToUint64() {
return nil, nil, []descpb.TenantInfoWithUsage{tenant}, nil
}
}
- return nil, nil, nil, errors.Errorf("tenant %d not in backup", targets.TenantID.ToUint64())
+ return nil, nil, nil, errors.Errorf("tenant %d not in backup", targets.Tenant.ToUint64())
}
matched, err := backupresolver.DescriptorsMatchingTargets(ctx,
diff --git a/pkg/ccl/benchccl/rttanalysisccl/BUILD.bazel b/pkg/ccl/benchccl/rttanalysisccl/BUILD.bazel
index d502e43257..a7a0b17dc9 100644
--- a/pkg/ccl/benchccl/rttanalysisccl/BUILD.bazel
+++ b/pkg/ccl/benchccl/rttanalysisccl/BUILD.bazel
@@ -8,7 +8,6 @@ go_test(
"multi_region_bench_test.go",
],
data = glob(["testdata/**"]),
- shard_count = 16,
deps = [
"//pkg/base",
"//pkg/bench/rttanalysis",
diff --git a/pkg/ccl/changefeedccl/BUILD.bazel b/pkg/ccl/changefeedccl/BUILD.bazel
index 43da7d3ba7..9f20d86960 100644
--- a/pkg/ccl/changefeedccl/BUILD.bazel
+++ b/pkg/ccl/changefeedccl/BUILD.bazel
@@ -108,7 +108,6 @@ go_library(
"@com_github_cockroachdb_apd_v3//:apd",
"@com_github_cockroachdb_errors//:errors",
"@com_github_cockroachdb_logtags//:logtags",
- "@com_github_cockroachdb_redact//:redact",
"@com_github_google_btree//:btree",
"@com_github_linkedin_goavro_v2//:goavro",
"@com_github_shopify_sarama//:sarama",
@@ -182,6 +181,7 @@ go_test(
"//pkg/server/status",
"//pkg/server/telemetry",
"//pkg/settings/cluster",
+ "//pkg/sql",
"//pkg/sql/catalog",
"//pkg/sql/catalog/bootstrap",
"//pkg/sql/catalog/descbuilder",
diff --git a/pkg/ccl/changefeedccl/avro.go b/pkg/ccl/changefeedccl/avro.go
index 85d48b002f..87dcabe960 100644
--- a/pkg/ccl/changefeedccl/avro.go
+++ b/pkg/ccl/changefeedccl/avro.go
@@ -18,7 +18,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/geo"
"github.com/cockroachdb/cockroach/pkg/geo/geopb"
"github.com/cockroachdb/cockroach/pkg/sql/catalog"
- "github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb"
"github.com/cockroachdb/cockroach/pkg/sql/rowenc"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/sql/types"
@@ -757,21 +756,11 @@ const (
// appended to the end of the avro record's name.
func tableToAvroSchema(
tableDesc catalog.TableDescriptor,
- familyID descpb.FamilyID,
nameSuffix string,
namespace string,
virtualColumnVisibility string,
) (*avroDataRecord, error) {
- family, err := tableDesc.FindFamilyByID(familyID)
- if err != nil {
- return nil, err
- }
- var name string
- if tableDesc.NumFamilies() > 1 {
- name = SQLNameToAvroName(tableDesc.GetName() + family.Name)
- } else {
- name = SQLNameToAvroName(tableDesc.GetName())
- }
+ name := SQLNameToAvroName(tableDesc.GetName())
if nameSuffix != avroSchemaNoSuffix {
name = name + `_` + nameSuffix
}
@@ -785,29 +774,18 @@ func tableToAvroSchema(
colIdxByFieldIdx: make(map[int]int),
fieldIdxByColIdx: make(map[int]int),
}
-
- include := make(map[descpb.ColumnID]struct{}, len(family.ColumnIDs))
- var yes struct{}
- for _, colID := range family.ColumnIDs {
- include[colID] = yes
- }
-
for _, col := range tableDesc.PublicColumns() {
+ if col.IsVirtual() && virtualColumnVisibility == string(changefeedbase.OptVirtualColumnsOmitted) {
+ continue
+ }
+ field, err := columnToAvroSchema(col)
if err != nil {
return nil, err
}
- _, inFamily := include[col.GetID()]
- virtual := col.IsVirtual() && virtualColumnVisibility == string(changefeedbase.OptVirtualColumnsNull)
- if inFamily || virtual {
- field, err := columnToAvroSchema(col)
- if err != nil {
- return nil, err
- }
- schema.colIdxByFieldIdx[len(schema.Fields)] = col.Ordinal()
- schema.fieldIdxByName[field.Name] = len(schema.Fields)
- schema.fieldIdxByColIdx[col.Ordinal()] = len(schema.Fields)
- schema.Fields = append(schema.Fields, field)
- }
+ schema.colIdxByFieldIdx[len(schema.Fields)] = col.Ordinal()
+ schema.fieldIdxByName[field.Name] = len(schema.Fields)
+ schema.fieldIdxByColIdx[col.Ordinal()] = len(schema.Fields)
+ schema.Fields = append(schema.Fields, field)
}
schemaJSON, err := json.Marshal(schema)
if err != nil {
diff --git a/pkg/ccl/changefeedccl/avro_test.go b/pkg/ccl/changefeedccl/avro_test.go
index 233f8acb41..296672626b 100644
--- a/pkg/ccl/changefeedccl/avro_test.go
+++ b/pkg/ccl/changefeedccl/avro_test.go
@@ -52,8 +52,6 @@ import (
var testTypes = make(map[string]*types.T)
var testTypeResolver = tree.MakeTestingMapTypeResolver(testTypes)
-const primary = descpb.FamilyID(0)
-
func makeTestSemaCtx() tree.SemaContext {
testSemaCtx := tree.MakeSemaContext()
testSemaCtx.TypeResolver = testTypeResolver
@@ -79,13 +77,6 @@ func parseTableDesc(createTableStmt string) (catalog.TableDescriptor, error) {
if err != nil {
return nil, err
}
- columnNames := make([]string, len(mutDesc.PublicColumns()))
- for i, col := range mutDesc.PublicColumns() {
- columnNames[i] = col.GetName()
- }
- mutDesc.Families = []descpb.ColumnFamilyDescriptor{
- {ID: primary, Name: "primary", ColumnIDs: mutDesc.PublicColumnIDs(), ColumnNames: columnNames},
- }
return mutDesc, descbuilder.ValidateSelf(mutDesc, clusterversion.TestingClusterVersion)
}
@@ -139,7 +130,7 @@ func parseAvroSchema(j string) (*avroDataRecord, error) {
tableDesc := descpb.TableDescriptor{
Name: AvroNameToSQLName(s.Name),
}
- for i, f := range s.Fields {
+ for _, f := range s.Fields {
// s.Fields[idx] has `Name` and `SchemaType` set but nothing else.
// They're needed for serialization/deserialization, so fake out a
// column descriptor so that we can reuse columnToAvroSchema to get
@@ -148,19 +139,9 @@ func parseAvroSchema(j string) (*avroDataRecord, error) {
if err != nil {
return nil, err
}
- colDesc.ID = descpb.ColumnID(i)
tableDesc.Columns = append(tableDesc.Columns, *colDesc)
}
- columnNames := make([]string, len(tableDesc.Columns))
- columnIDs := make([]descpb.ColumnID, len(tableDesc.Columns))
- for i, col := range tableDesc.Columns {
- columnNames[i] = col.Name
- columnIDs[i] = col.ID
- }
- tableDesc.Families = []descpb.ColumnFamilyDescriptor{
- {ID: primary, Name: "primary", ColumnIDs: columnIDs, ColumnNames: columnNames},
- }
- return tableToAvroSchema(tabledesc.NewBuilder(&tableDesc).BuildImmutableTable(), primary, avroSchemaNoSuffix, "", string(changefeedbase.OptVirtualColumnsOmitted))
+ return tableToAvroSchema(tabledesc.NewBuilder(&tableDesc).BuildImmutableTable(), avroSchemaNoSuffix, "", string(changefeedbase.OptVirtualColumnsOmitted))
}
func avroFieldMetadataToColDesc(metadata string) (*descpb.ColumnDescriptor, error) {
@@ -383,7 +364,7 @@ func TestAvroSchema(t *testing.T) {
tableDesc, err := parseTableDesc(
fmt.Sprintf(`CREATE TABLE "%s" %s`, test.name, test.schema))
require.NoError(t, err)
- origSchema, err := tableToAvroSchema(tableDesc, primary, avroSchemaNoSuffix, "", string(changefeedbase.OptVirtualColumnsOmitted))
+ origSchema, err := tableToAvroSchema(tableDesc, avroSchemaNoSuffix, "", string(changefeedbase.OptVirtualColumnsOmitted))
require.NoError(t, err)
jsonSchema := origSchema.codec.Schema()
roundtrippedSchema, err := parseAvroSchema(jsonSchema)
@@ -419,7 +400,7 @@ func TestAvroSchema(t *testing.T) {
t.Run("escaping", func(t *testing.T) {
tableDesc, err := parseTableDesc(`CREATE TABLE "☃" (🍦 INT PRIMARY KEY)`)
require.NoError(t, err)
- tableSchema, err := tableToAvroSchema(tableDesc, primary, avroSchemaNoSuffix, "", string(changefeedbase.OptVirtualColumnsOmitted))
+ tableSchema, err := tableToAvroSchema(tableDesc, avroSchemaNoSuffix, "", string(changefeedbase.OptVirtualColumnsOmitted))
require.NoError(t, err)
require.Equal(t,
`{"type":"record","name":"_u2603_","fields":[`+
@@ -626,7 +607,7 @@ func TestAvroSchema(t *testing.T) {
rows, err := parseValues(tableDesc, `VALUES (1, `+test.sql+`)`)
require.NoError(t, err)
- schema, err := tableToAvroSchema(tableDesc, primary, avroSchemaNoSuffix, "", string(changefeedbase.OptVirtualColumnsOmitted))
+ schema, err := tableToAvroSchema(tableDesc, avroSchemaNoSuffix, "", string(changefeedbase.OptVirtualColumnsOmitted))
require.NoError(t, err)
textual, err := schema.textualFromRow(rows[0])
require.NoError(t, err)
@@ -676,7 +657,7 @@ func TestAvroSchema(t *testing.T) {
rows, err := parseValues(tableDesc, `VALUES (1, `+test.sql+`)`)
require.NoError(t, err)
- schema, err := tableToAvroSchema(tableDesc, primary, avroSchemaNoSuffix, "", string(changefeedbase.OptVirtualColumnsOmitted))
+ schema, err := tableToAvroSchema(tableDesc, avroSchemaNoSuffix, "", string(changefeedbase.OptVirtualColumnsOmitted))
require.NoError(t, err)
textual, err := schema.textualFromRow(rows[0])
require.NoError(t, err)
@@ -779,12 +760,12 @@ func TestAvroMigration(t *testing.T) {
writerDesc, err := parseTableDesc(
fmt.Sprintf(`CREATE TABLE "%s" %s`, test.name, test.writerSchema))
require.NoError(t, err)
- writerSchema, err := tableToAvroSchema(writerDesc, primary, avroSchemaNoSuffix, "", string(changefeedbase.OptVirtualColumnsOmitted))
+ writerSchema, err := tableToAvroSchema(writerDesc, avroSchemaNoSuffix, "", string(changefeedbase.OptVirtualColumnsOmitted))
require.NoError(t, err)
readerDesc, err := parseTableDesc(
fmt.Sprintf(`CREATE TABLE "%s" %s`, test.name, test.readerSchema))
require.NoError(t, err)
- readerSchema, err := tableToAvroSchema(readerDesc, primary, avroSchemaNoSuffix, "", string(changefeedbase.OptVirtualColumnsOmitted))
+ readerSchema, err := tableToAvroSchema(readerDesc, avroSchemaNoSuffix, "", string(changefeedbase.OptVirtualColumnsOmitted))
require.NoError(t, err)
writerRows, err := parseValues(writerDesc, `VALUES `+test.writerValues)
@@ -861,7 +842,7 @@ func benchmarkEncodeType(b *testing.B, typ *types.T, encRow rowenc.EncDatumRow)
tableDesc, err := parseTableDesc(
fmt.Sprintf(`CREATE TABLE bench_table (bench_field %s)`, typ.SQLString()))
require.NoError(b, err)
- schema, err := tableToAvroSchema(tableDesc, primary, "suffix", "namespace", string(changefeedbase.OptVirtualColumnsOmitted))
+ schema, err := tableToAvroSchema(tableDesc, "suffix", "namespace", string(changefeedbase.OptVirtualColumnsOmitted))
require.NoError(b, err)
b.ReportAllocs()
diff --git a/pkg/ccl/changefeedccl/changefeed_processors.go b/pkg/ccl/changefeedccl/changefeed_processors.go
index 62aeda2b82..74cab69d99 100644
--- a/pkg/ccl/changefeedccl/changefeed_processors.go
+++ b/pkg/ccl/changefeedccl/changefeed_processors.go
@@ -47,8 +47,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
"github.com/cockroachdb/cockroach/pkg/util/uuid"
"github.com/cockroachdb/errors"
- "github.com/cockroachdb/logtags"
- "github.com/cockroachdb/redact"
)
type changeAggregator struct {
@@ -204,9 +202,6 @@ func (ca *changeAggregator) MustBeStreaming() bool {
// Start is part of the RowSource interface.
func (ca *changeAggregator) Start(ctx context.Context) {
- if ca.spec.JobID != 0 {
- ctx = logtags.AddTag(ctx, "job", ca.spec.JobID)
- }
ctx = ca.StartInternal(ctx, changeAggregatorProcName)
// Derive a separate context so that we can shutdown the poller. Note that
@@ -356,30 +351,29 @@ func (ca *changeAggregator) makeKVFeedCfg(
sf = schemafeed.DoNothingSchemaFeed
} else {
sf = schemafeed.New(ctx, cfg, schemaChangeEvents, AllTargets(ca.spec.Feed),
- initialHighWater, &ca.metrics.SchemaFeedMetrics, ca.spec.Feed.Opts)
+ initialHighWater, &ca.metrics.SchemaFeedMetrics)
}
return kvfeed.Config{
- Writer: buf,
- Settings: cfg.Settings,
- DB: cfg.DB,
- Codec: cfg.Codec,
- Clock: cfg.DB.Clock(),
- Gossip: cfg.Gossip,
- Spans: spans,
- BackfillCheckpoint: ca.spec.Checkpoint.Spans,
- Targets: AllTargets(ca.spec.Feed),
- Metrics: &ca.metrics.KVFeedMetrics,
- OnBackfillCallback: ca.sliMetrics.getBackfillCallback(),
- OnBackfillRangeCallback: ca.sliMetrics.getBackfillRangeCallback(),
- MM: ca.kvFeedMemMon,
- InitialHighWater: initialHighWater,
- WithDiff: withDiff,
- NeedsInitialScan: needsInitialScan,
- SchemaChangeEvents: schemaChangeEvents,
- SchemaChangePolicy: schemaChangePolicy,
- SchemaFeed: sf,
- Knobs: ca.knobs.FeedKnobs,
+ Writer: buf,
+ Settings: cfg.Settings,
+ DB: cfg.DB,
+ Codec: cfg.Codec,
+ Clock: cfg.DB.Clock(),
+ Gossip: cfg.Gossip,
+ Spans: spans,
+ BackfillCheckpoint: ca.spec.Checkpoint.Spans,
+ Targets: AllTargets(ca.spec.Feed),
+ Metrics: &ca.metrics.KVFeedMetrics,
+ OnBackfillCallback: ca.sliMetrics.getBackfillCallback(),
+ MM: ca.kvFeedMemMon,
+ InitialHighWater: initialHighWater,
+ WithDiff: withDiff,
+ NeedsInitialScan: needsInitialScan,
+ SchemaChangeEvents: schemaChangeEvents,
+ SchemaChangePolicy: schemaChangePolicy,
+ SchemaFeed: sf,
+ Knobs: ca.knobs.FeedKnobs,
}
}
@@ -774,15 +768,13 @@ func (c *kvEventToRowConsumer) eventToRow(
prevSchemaTimestamp = schemaTimestamp.Prev()
}
- desc, family, err := c.rfCache.TableDescForKey(ctx, event.KV().Key, schemaTimestamp)
+ desc, err := c.rfCache.TableDescForKey(ctx, event.KV().Key, schemaTimestamp)
if err != nil {
return r, err
}
r.tableDesc = desc
- r.familyID = family
- var rf *row.Fetcher
- rf, err = c.rfCache.RowFetcherForColumnFamily(desc, family)
+ rf, err := c.rfCache.RowFetcherForTableDesc(desc)
if err != nil {
return r, err
}
@@ -826,20 +818,17 @@ func (c *kvEventToRowConsumer) eventToRow(
if withDiff {
prevRF := rf
r.prevTableDesc = r.tableDesc
- r.prevFamilyID = r.familyID
if prevSchemaTimestamp != schemaTimestamp {
// If the previous value is being interpreted under a different
// version of the schema, fetch the correct table descriptor and
// create a new row.Fetcher with it.
- prevDesc, family, err := c.rfCache.TableDescForKey(ctx, event.KV().Key, prevSchemaTimestamp)
-
+ prevDesc, err := c.rfCache.TableDescForKey(ctx, event.KV().Key, prevSchemaTimestamp)
if err != nil {
return r, err
}
- r.prevTableDesc = prevDesc
- r.prevFamilyID = family
- prevRF, err = c.rfCache.RowFetcherForColumnFamily(prevDesc, family)
+ r.prevTableDesc = prevDesc
+ prevRF, err = c.rfCache.RowFetcherForTableDesc(prevDesc)
if err != nil {
return r, err
}
@@ -1142,9 +1131,6 @@ func (cf *changeFrontier) MustBeStreaming() bool {
// Start is part of the RowSource interface.
func (cf *changeFrontier) Start(ctx context.Context) {
- if cf.spec.JobID != 0 {
- ctx = logtags.AddTag(ctx, "job", cf.spec.JobID)
- }
// StartInternal called at the beginning of the function because there are
// early returns if errors are detected.
ctx = cf.StartInternal(ctx, changeFrontierProcName)
@@ -1363,7 +1349,7 @@ func (cf *changeFrontier) noteAggregatorProgress(d rowenc.EncDatum) error {
if !resolved.Timestamp.IsEmpty() && resolved.Timestamp.Less(cf.highWaterAtStart) {
logcrash.ReportOrPanic(cf.Ctx, &cf.flowCtx.Cfg.Settings.SV,
`got a span level timestamp %s for %s that is less than the initial high-water %s`,
- redact.Safe(resolved.Timestamp), resolved.Span, redact.Safe(cf.highWaterAtStart))
+ log.Safe(resolved.Timestamp), resolved.Span, log.Safe(cf.highWaterAtStart))
continue
}
if err := cf.forwardFrontier(resolved); err != nil {
diff --git a/pkg/ccl/changefeedccl/changefeed_stmt.go b/pkg/ccl/changefeedccl/changefeed_stmt.go
index 78276ca28e..23012b33e9 100644
--- a/pkg/ccl/changefeedccl/changefeed_stmt.go
+++ b/pkg/ccl/changefeedccl/changefeed_stmt.go
@@ -112,7 +112,6 @@ func changefeedPlanHook(
}
}
- //TODO: We're passing around the full output of optsFn a lot, make it a type.
optsFn, err := p.TypeAsStringOpts(ctx, changefeedStmt.Options, changefeedbase.ChangefeedOptionExpectValues)
if err != nil {
return nil, nil, nil, false, err
@@ -185,9 +184,11 @@ func changefeedPlanHook(
jobID := p.ExecCfg().JobRegistry.MakeJobID()
{
var ptr *ptpb.Record
+ var protectedTimestampID uuid.UUID
codec := p.ExecCfg().Codec
if shouldProtectTimestamps(codec) {
ptr = createProtectedTimestampRecord(ctx, codec, jobID, AllTargets(details), details.StatementTime, progress.GetChangefeed())
+ protectedTimestampID = ptr.ID.GetUUID()
}
jr.Progress = *progress.GetChangefeed()
@@ -208,6 +209,19 @@ func changefeedPlanHook(
}
return err
}
+ // If we created a protected timestamp for an initial scan, verify it.
+ // Doing this synchronously here rather than asynchronously later provides
+ // a nice UX win in the case that the data isn't actually available.
+ if protectedTimestampID != uuid.Nil {
+ if err := p.ExecCfg().ProtectedTimestampProvider.Verify(ctx, protectedTimestampID); err != nil {
+ if cancelErr := sj.Cancel(ctx); cancelErr != nil {
+ if ctx.Err() == nil {
+ log.Warningf(ctx, "failed to cancel job: %v", cancelErr)
+ }
+ }
+ return err
+ }
+ }
}
// Start the job.
@@ -524,16 +538,12 @@ func getTargetsAndTables(
tables[table.GetID()] = jobspb.ChangefeedTargetTable{
StatementTimeName: name,
}
- typ := jobspb.ChangefeedTargetSpecification_PRIMARY_FAMILY_ONLY
- if len(table.GetFamilies()) > 1 {
- typ = jobspb.ChangefeedTargetSpecification_EACH_FAMILY
- }
ts := jobspb.ChangefeedTargetSpecification{
- Type: typ,
+ Type: jobspb.ChangefeedTargetSpecification_PRIMARY_FAMILY_ONLY,
TableID: table.GetID(),
}
targets = append(targets, ts)
- if err := changefeedbase.ValidateTable(targets, table, opts); err != nil {
+ if err := changefeedbase.ValidateTable(targets, table); err != nil {
return nil, nil, err
}
for _, warning := range changefeedbase.WarningsForTable(tables, table, opts) {
diff --git a/pkg/ccl/changefeedccl/changefeed_test.go b/pkg/ccl/changefeedccl/changefeed_test.go
index e64b007e6a..04f1ccb6ab 100644
--- a/pkg/ccl/changefeedccl/changefeed_test.go
+++ b/pkg/ccl/changefeedccl/changefeed_test.go
@@ -54,6 +54,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/server"
"github.com/cockroachdb/cockroach/pkg/server/telemetry"
"github.com/cockroachdb/cockroach/pkg/settings/cluster"
+ "github.com/cockroachdb/cockroach/pkg/sql"
"github.com/cockroachdb/cockroach/pkg/sql/catalog/bootstrap"
"github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb"
"github.com/cockroachdb/cockroach/pkg/sql/catalog/desctestutils"
@@ -799,77 +800,6 @@ func TestChangefeedInitialScan(t *testing.T) {
t.Run(`pubsub`, pubsubTest(testFn))
}
-func TestChangefeedBackfillObservability(t *testing.T) {
- defer leaktest.AfterTest(t)()
- defer log.Scope(t).Close(t)
-
- testFn := func(t *testing.T, db *gosql.DB, f cdctest.TestFeedFactory) {
- sqlDB := sqlutils.MakeSQLRunner(db)
-
- knobs := f.Server().(*server.TestServer).Cfg.TestingKnobs.
- DistSQL.(*execinfra.TestingKnobs).
- Changefeed.(*TestingKnobs)
-
- registry := f.Server().JobRegistry().(*jobs.Registry)
- sli, err := registry.MetricsStruct().Changefeed.(*Metrics).getSLIMetrics(defaultSLIScope)
- require.NoError(t, err)
- pendingRanges := sli.BackfillPendingRanges
-
- // Create a table with multiple ranges
- numRanges := 10
- rowsPerRange := 20
- sqlDB.Exec(t, fmt.Sprintf(`
- CREATE TABLE foo (key INT PRIMARY KEY);
- INSERT INTO foo (key) SELECT * FROM generate_series(1, %d);
- ALTER TABLE foo SPLIT AT (SELECT * FROM generate_series(%d, %d, %d));
- `, numRanges*rowsPerRange, rowsPerRange, (numRanges-1)*rowsPerRange, rowsPerRange))
- sqlDB.CheckQueryResults(t, `SELECT count(*) FROM [SHOW RANGES FROM TABLE foo]`,
- [][]string{{fmt.Sprint(numRanges)}},
- )
-
- // Allow control of the scans
- scanCtx, scanCancel := context.WithCancel(context.Background())
- scanChan := make(chan struct{})
- knobs.FeedKnobs.BeforeScanRequest = func(b *kv.Batch) error {
- select {
- case <-scanCtx.Done():
- return scanCtx.Err()
- case <-scanChan:
- return nil
- }
- }
-
- require.Equal(t, pendingRanges.Value(), int64(0))
- foo := feed(t, f, `CREATE CHANGEFEED FOR foo`)
- defer closeFeed(t, foo)
-
- // Progress the initial backfill halfway through its ranges
- for i := 0; i < numRanges/2; i++ {
- scanChan <- struct{}{}
- }
- testutils.SucceedsSoon(t, func() error {
- count := pendingRanges.Value()
- if count != int64(numRanges/2) {
- return fmt.Errorf("range count %d should be %d", count, numRanges/2)
- }
- return nil
- })
-
- // Ensure that the pending count is cleared if the backfill completes
- // regardless of successful scans
- scanCancel()
- testutils.SucceedsSoon(t, func() error {
- count := pendingRanges.Value()
- if count > 0 {
- return fmt.Errorf("range count %d should be 0", count)
- }
- return nil
- })
- }
-
- t.Run("enterprise", enterpriseTest(testFn, feedTestNoTenants))
-}
-
func TestChangefeedUserDefinedTypes(t *testing.T) {
defer leaktest.AfterTest(t)()
testFn := func(t *testing.T, db *gosql.DB, f cdctest.TestFeedFactory) {
@@ -1202,9 +1132,8 @@ func TestChangefeedSchemaChangeBackfillCheckpoint(t *testing.T) {
// Ensure Scan Requests are always small enough that we receive multiple
// resolved events during a backfill
- knobs.FeedKnobs.BeforeScanRequest = func(b *kv.Batch) error {
+ knobs.FeedKnobs.BeforeScanRequest = func(b *kv.Batch) {
b.Header.MaxSpanRequestKeys = 10
- return nil
}
// Setup changefeed job details, avoid relying on initial scan functionality
@@ -1773,48 +1702,15 @@ func TestChangefeedColumnFamily(t *testing.T) {
defer log.Scope(t).Close(t)
testFn := func(t *testing.T, db *gosql.DB, f cdctest.TestFeedFactory) {
-
sqlDB := sqlutils.MakeSQLRunner(db)
// Table with 2 column families.
- sqlDB.Exec(t, `CREATE TABLE foo (a INT PRIMARY KEY, b STRING, c STRING, FAMILY (a,b), FAMILY (c))`)
- sqlDB.Exec(t, `INSERT INTO foo values (0, 'dog', 'cat')`)
-
- // Must specify WITH split_column_families
- sqlDB.ExpectErr(t, `multiple column families`, `CREATE CHANGEFEED FOR foo`)
-
- foo := feed(t, f, `CREATE CHANGEFEED FOR foo WITH split_column_families`)
- defer closeFeed(t, foo)
- assertPayloads(t, foo, []string{
- `foo: [0]->{"after": {"a": 0, "b": "dog"}}`,
- `foo: [0]->{"after": {"c": "cat"}}`,
- })
-
- // No messages for unaffected column families.
- sqlDB.Exec(t, `UPDATE foo SET c='lion' WHERE a=0`)
- sqlDB.Exec(t, `UPDATE foo SET c='tiger' WHERE a=0`)
- assertPayloads(t, foo, []string{
- `foo: [0]->{"after": {"c": "lion"}}`,
- `foo: [0]->{"after": {"c": "tiger"}}`,
- })
-
- // No messages on insert for families where no non-null values were set.
- sqlDB.Exec(t, `INSERT INTO foo values (1, 'puppy', null)`)
- sqlDB.Exec(t, `INSERT INTO foo values (2, null, 'kitten')`)
- assertPayloads(t, foo, []string{
- `foo: [1]->{"after": {"a": 1, "b": "puppy"}}`,
- `foo: [2]->{"after": {"a": 2, "b": null}}`,
- `foo: [2]->{"after": {"c": "kitten"}}`,
- })
-
- // Deletes send a message for each column family.
- fooWithDiff := feed(t, f, `CREATE CHANGEFEED FOR foo WITH split_column_families, diff, no_initial_scan, resolved='1s'`)
- defer closeFeed(t, fooWithDiff)
- sqlDB.Exec(t, `DELETE FROM foo WHERE a=0`)
- assertPayloads(t, fooWithDiff, []string{
- `foo: [0]->{"after": null, "before": {"a": 0, "b": "dog"}}`,
- `foo: [0]->{"after": null, "before": {"c": "tiger"}}`,
- })
+ sqlDB.Exec(t, `CREATE TABLE foo (a INT PRIMARY KEY, b STRING, FAMILY (a), FAMILY (b))`)
+ if strings.Contains(t.Name(), `enterprise`) {
+ sqlDB.ExpectErr(t, `exactly 1 column family`, `CREATE CHANGEFEED FOR foo`)
+ } else {
+ sqlDB.ExpectErr(t, `exactly 1 column family`, `EXPERIMENTAL CHANGEFEED FOR foo`)
+ }
// Table with a second column family added after the changefeed starts.
sqlDB.Exec(t, `CREATE TABLE bar (a INT PRIMARY KEY, FAMILY f_a (a))`)
@@ -1826,116 +1722,16 @@ func TestChangefeedColumnFamily(t *testing.T) {
})
sqlDB.Exec(t, `ALTER TABLE bar ADD COLUMN b STRING CREATE FAMILY f_b`)
sqlDB.Exec(t, `INSERT INTO bar VALUES (1)`)
- if _, err := bar.Next(); !testutils.IsError(err, `created on a table with a single column family`) {
- t.Errorf(`expected "column family" error got: %+v`, err)
+ if _, err := bar.Next(); !testutils.IsError(err, `exactly 1 column family`) {
+ t.Errorf(`expected "exactly 1 column family" error got: %+v`, err)
}
}
t.Run(`sinkless`, sinklessTest(testFn))
t.Run(`enterprise`, enterpriseTest(testFn))
t.Run(`kafka`, kafkaTest(testFn))
-}
-
-func TestChangefeedEachColumnFamilySchemaChanges(t *testing.T) {
- defer leaktest.AfterTest(t)()
- defer log.Scope(t).Close(t)
-
- testFn := func(t *testing.T, db *gosql.DB, f cdctest.TestFeedFactory) {
-
- sqlDB := sqlutils.MakeSQLRunner(db)
-
- // Table with 2 column families.
- sqlDB.Exec(t, `CREATE TABLE foo (a INT PRIMARY KEY, b STRING, c STRING, FAMILY f1 (a,b), FAMILY f2 (c))`)
- sqlDB.Exec(t, `INSERT INTO foo values (0, 'dog', 'cat')`)
- foo := feed(t, f, `CREATE CHANGEFEED FOR foo WITH split_column_families`)
- defer closeFeed(t, foo)
- assertPayloads(t, foo, []string{
- `foo: [0]->{"after": {"a": 0, "b": "dog"}}`,
- `foo: [0]->{"after": {"c": "cat"}}`,
- })
-
- // Add a column to an existing family
- sqlDB.Exec(t, `ALTER TABLE foo ADD COLUMN d string DEFAULT 'hi' FAMILY f2`)
- assertPayloads(t, foo, []string{
- `foo: [0]->{"after": {"c": "cat", "d": "hi"}}`,
- })
-
- // Add a column to a new family.
- // Behavior here is a little wonky with default values in a way
- // that's likely to change with declarative schema changer,
- // so not asserting anything either way about that.
- sqlDB.Exec(t, `ALTER TABLE foo ADD COLUMN e string CREATE FAMILY f3`)
- sqlDB.Exec(t, `UPDATE foo SET e='hello' WHERE a=0`)
- assertPayloads(t, foo, []string{
- `foo: [0]->{"after": {"e": "hello"}}`,
- })
-
- }
-
- t.Run(`sinkless`, sinklessTest(testFn))
- t.Run(`enterprise`, enterpriseTest(testFn))
- t.Run(`kafka`, kafkaTest(testFn))
-}
-
-func TestChangefeedEachColumnFamilyVirtualColumns(t *testing.T) {
- defer leaktest.AfterTest(t)()
- defer log.Scope(t).Close(t)
-
- testFn := func(t *testing.T, db *gosql.DB, f cdctest.TestFeedFactory) {
-
- sqlDB := sqlutils.MakeSQLRunner(db)
-
- // Table with 2 column families.
- sqlDB.Exec(t, `CREATE TABLE foo (a INT PRIMARY KEY, b STRING, c STRING, FAMILY f1 (a,b), FAMILY f2 (c))`)
- sqlDB.Exec(t, `INSERT INTO foo values (0, 'dog', 'cat')`)
- foo := feed(t, f, `CREATE CHANGEFEED FOR foo WITH split_column_families`)
- defer closeFeed(t, foo)
- assertPayloads(t, foo, []string{
- `foo: [0]->{"after": {"a": 0, "b": "dog"}}`,
- `foo: [0]->{"after": {"c": "cat"}}`,
- })
-
- // Add a column to an existing family
- sqlDB.Exec(t, `ALTER TABLE foo ADD COLUMN d string DEFAULT 'hi' FAMILY f2`)
- assertPayloads(t, foo, []string{
- `foo: [0]->{"after": {"c": "cat", "d": "hi"}}`,
- })
-
- // Add a column to a new family.
- // Behavior here is a little wonky with default values in a way
- // that's likely to change with declarative schema changer,
- // so not asserting anything either way about that.
- sqlDB.Exec(t, `ALTER TABLE foo ADD COLUMN e string CREATE FAMILY f3`)
- sqlDB.Exec(t, `UPDATE foo SET e='hello' WHERE a=0`)
- assertPayloads(t, foo, []string{
- `foo: [0]->{"after": {"e": "hello"}}`,
- })
-
- }
-
- t.Run(`sinkless`, sinklessTest(testFn))
- t.Run(`enterprise`, enterpriseTest(testFn))
- t.Run(`kafka`, kafkaTest(testFn))
-}
-
-func TestChangefeedColumnFamilyAvro(t *testing.T) {
- defer leaktest.AfterTest(t)()
- defer log.Scope(t).Close(t)
-
- testFn := func(t *testing.T, db *gosql.DB, f cdctest.TestFeedFactory) {
- sqlDB := sqlutils.MakeSQLRunner(db)
-
- sqlDB.Exec(t, `CREATE TABLE foo (a INT PRIMARY KEY, b STRING, c STRING, FAMILY most (a,b), FAMILY justc (c))`)
- sqlDB.Exec(t, `INSERT INTO foo values (0, 'dog', 'cat')`)
- foo := feed(t, f, `CREATE CHANGEFEED FOR foo WITH split_column_families, format=avro`)
- defer closeFeed(t, foo)
- assertPayloads(t, foo, []string{
- `foo: {"a":{"long":0}}->{"after":{"foomost":{"a":{"long":0},"b":{"string":"dog"}}}}`,
- `foo: {"a":{"long":0}}->{"after":{"foojustc":{"c":{"string":"cat"}}}}`,
- })
-
- }
- t.Run(`kafka`, kafkaTest(testFn))
+ t.Run(`webhook`, webhookTest(testFn))
+ t.Run(`pubsub`, pubsubTest(testFn))
}
func TestChangefeedAuthorization(t *testing.T) {
@@ -3128,13 +2924,12 @@ func TestChangefeedDataTTL(t *testing.T) {
knobs := f.Server().TestingKnobs().
DistSQL.(*execinfra.TestingKnobs).
Changefeed.(*TestingKnobs)
- knobs.FeedKnobs.BeforeScanRequest = func(_ *kv.Batch) error {
+ knobs.FeedKnobs.BeforeScanRequest = func(_ *kv.Batch) {
if atomic.LoadInt32(&shouldWait) == 0 {
- return nil
+ return
}
wait <- struct{}{}
<-resume
- return nil
}
sqlDB := sqlutils.MakeSQLRunner(db)
@@ -4265,6 +4060,67 @@ func TestChangefeedProtectedTimestampOnPause(t *testing.T) {
}
+// This test ensures that the changefeed attempts to verify its initial protected
+// timestamp record and that when that verification fails, the job is canceled
+// and the record removed.
+func TestChangefeedProtectedTimestampsVerificationFails(t *testing.T) {
+ defer leaktest.AfterTest(t)()
+ defer log.Scope(t).Close(t)
+
+ verifyRequestCh := make(chan *roachpb.AdminVerifyProtectedTimestampRequest, 1)
+ requestFilter := kvserverbase.ReplicaRequestFilter(func(
+ ctx context.Context, ba roachpb.BatchRequest,
+ ) *roachpb.Error {
+ if r, ok := ba.GetArg(roachpb.AdminVerifyProtectedTimestamp); ok {
+ req := r.(*roachpb.AdminVerifyProtectedTimestampRequest)
+ verifyRequestCh <- req
+ return roachpb.NewError(errors.Errorf("failed to verify protection %v on %v", req.RecordID, ba.RangeID))
+ }
+ return nil
+ })
+
+ setStoreKnobs := func(args *base.TestServerArgs) {
+ storeKnobs := &kvserver.StoreTestingKnobs{}
+ storeKnobs.TestingRequestFilter = requestFilter
+ args.Knobs.Store = storeKnobs
+ }
+
+ testFn := func(t *testing.T, db *gosql.DB, f cdctest.TestFeedFactory) {
+ ctx := context.Background()
+ sqlDB := sqlutils.MakeSQLRunner(db)
+ sqlDB.Exec(t, `CREATE TABLE foo (a INT PRIMARY KEY, b STRING)`)
+ _, err := f.Feed(`CREATE CHANGEFEED FOR foo WITH resolved`)
+ // Make sure we got the injected error.
+ require.Regexp(t, "failed to verify", err)
+ // Make sure we tried to verify the request.
+ r := <-verifyRequestCh
+ cfg := f.Server().ExecutorConfig().(sql.ExecutorConfig)
+ kvDB := cfg.DB
+ pts := cfg.ProtectedTimestampProvider
+ // Make sure that the canceled job gets moved through its OnFailOrCancel
+ // phase and removes its protected timestamp.
+ testutils.SucceedsSoon(t, func() error {
+ err := kvDB.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error {
+ _, err := pts.GetRecord(ctx, txn, r.RecordID)
+ return err
+ })
+ if err == nil {
+ return errors.Errorf("expected record to be removed")
+ }
+ if errors.Is(err, protectedts.ErrNotExists) {
+ return nil
+ }
+ return err
+ })
+ }
+ opts := []feedTestOption{withArgsFn(setStoreKnobs), feedTestNoTenants}
+ t.Run(`enterprise`, enterpriseTest(testFn, opts...))
+ t.Run(`cloudstorage`, cloudStorageTest(testFn, opts...))
+ t.Run(`kafka`, kafkaTest(testFn, opts...))
+ t.Run(`webhook`, webhookTest(testFn, opts...))
+ t.Run(`pubsub`, pubsubTest(testFn, opts...))
+}
+
func TestManyChangefeedsOneTable(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)
@@ -4371,8 +4227,8 @@ func TestChangefeedNodeShutdown(t *testing.T) {
defer tc.Stopper().Stop(context.Background())
db := tc.ServerConn(1)
- serverutils.SetClusterSetting(t, tc, "changefeed.experimental_poll_interval", time.Millisecond)
sqlDB := sqlutils.MakeSQLRunner(db)
+ sqlDB.Exec(t, `SET CLUSTER SETTING changefeed.experimental_poll_interval = '0ns'`)
sqlDB.Exec(t, `CREATE DATABASE d`)
sqlDB.Exec(t, `CREATE TABLE foo (a INT PRIMARY KEY, b STRING)`)
sqlDB.Exec(t, `INSERT INTO foo VALUES (0, 'initial')`)
@@ -4583,22 +4439,19 @@ func TestChangefeedHandlesDrainingNodes(t *testing.T) {
skip.UnderRace(t, "Takes too long with race enabled")
shouldDrain := true
- knobs := base.TestingKnobs{
- DistSQL: &execinfra.TestingKnobs{
- DrainFast: true,
- Changefeed: &TestingKnobs{},
- Flowinfra: &flowinfra.TestingKnobs{
- FlowRegistryDraining: func() bool {
- if shouldDrain {
- shouldDrain = false
- return true
- }
- return false
- },
+ knobs := base.TestingKnobs{DistSQL: &execinfra.TestingKnobs{
+ DrainFast: true,
+ Changefeed: &TestingKnobs{},
+ Flowinfra: &flowinfra.TestingKnobs{
+ FlowRegistryDraining: func() bool {
+ if shouldDrain {
+ shouldDrain = false
+ return true
+ }
+ return false
},
},
- JobsTestingKnobs: jobs.NewTestingKnobsWithShortIntervals(),
- }
+ }}
sinkDir, cleanupFn := testutils.TempDir(t)
defer cleanupFn()
@@ -4613,9 +4466,9 @@ func TestChangefeedHandlesDrainingNodes(t *testing.T) {
db := tc.ServerConn(1)
sqlDB := sqlutils.MakeSQLRunner(db)
- serverutils.SetClusterSetting(t, tc, "kv.rangefeed.enabled", true)
- serverutils.SetClusterSetting(t, tc, "kv.closed_timestamp.target_duration", time.Second)
- serverutils.SetClusterSetting(t, tc, "changefeed.experimental_poll_interval", 10*time.Millisecond)
+ sqlDB.Exec(t, `SET CLUSTER SETTING kv.rangefeed.enabled = true`)
+ sqlDB.Exec(t, `SET CLUSTER SETTING kv.closed_timestamp.target_duration = '1s'`)
+ sqlDB.Exec(t, `SET CLUSTER SETTING changefeed.experimental_poll_interval = '10ms'`)
sqlutils.CreateTable(
t, db, "foo",
@@ -4638,9 +4491,9 @@ func TestChangefeedHandlesDrainingNodes(t *testing.T) {
defer closeFeed(t, feed)
// At this point, the job created by feed will fail to start running on node 0 due to draining
- // registry. However, this job will be retried, and it should succeed.
+ // registry. However, this job will be retried, and it should succeeded.
// Note: This test is a bit unrealistic in that if the registry is draining, that
- // means that the server is draining (i.e. being shut down). We don't do a full shutdown
+ // means that the server is draining (i.e being shut down). We don't do a full shutdown
// here, but we are simulating a restart by failing to start a flow the first time around.
assertPayloads(t, feed, []string{
`foo: [1]->{"after": {"k": 1, "v": 1}}`,
@@ -5034,9 +4887,8 @@ func TestChangefeedBackfillCheckpoint(t *testing.T) {
// Ensure Scan Requests are always small enough that we receive multiple
// resolved events during a backfill
- knobs.FeedKnobs.BeforeScanRequest = func(b *kv.Batch) error {
+ knobs.FeedKnobs.BeforeScanRequest = func(b *kv.Batch) {
b.Header.MaxSpanRequestKeys = 1 + rnd.Int63n(100)
- return nil
}
// Emit resolved events for majority of spans. Be extra paranoid and ensure that
diff --git a/pkg/ccl/changefeedccl/changefeedbase/options.go b/pkg/ccl/changefeedccl/changefeedbase/options.go
index f161163176..83b5c54d1e 100644
--- a/pkg/ccl/changefeedccl/changefeedbase/options.go
+++ b/pkg/ccl/changefeedccl/changefeedbase/options.go
@@ -49,7 +49,6 @@ const (
OptCompression = `compression`
OptSchemaChangeEvents = `schema_change_events`
OptSchemaChangePolicy = `schema_change_policy`
- OptSplitColumnFamilies = `split_column_families`
OptProtectDataFromGCOnPause = `protect_data_from_gc_on_pause`
OptWebhookAuthHeader = `webhook_auth_header`
OptWebhookClientTimeout = `webhook_client_timeout`
@@ -179,7 +178,6 @@ var ChangefeedOptionExpectValues = map[string]sql.KVStringOptValidate{
OptCompression: sql.KVStringOptRequireValue,
OptSchemaChangeEvents: sql.KVStringOptRequireValue,
OptSchemaChangePolicy: sql.KVStringOptRequireValue,
- OptSplitColumnFamilies: sql.KVStringOptRequireNoValue,
OptInitialScan: sql.KVStringOptRequireNoValue,
OptNoInitialScan: sql.KVStringOptRequireNoValue,
OptProtectDataFromGCOnPause: sql.KVStringOptRequireNoValue,
@@ -205,7 +203,7 @@ var CommonOptions = makeStringSet(OptCursor, OptEnvelope,
OptFormat, OptFullTableName,
OptKeyInValue, OptTopicInValue,
OptResolvedTimestamps, OptUpdatedTimestamps,
- OptMVCCTimestamps, OptDiff, OptSplitColumnFamilies,
+ OptMVCCTimestamps, OptDiff,
OptSchemaChangeEvents, OptSchemaChangePolicy,
OptProtectDataFromGCOnPause, OptOnError,
OptInitialScan, OptNoInitialScan,
diff --git a/pkg/ccl/changefeedccl/changefeedbase/validate.go b/pkg/ccl/changefeedccl/changefeedbase/validate.go
index fb5facb737..056cab49ed 100644
--- a/pkg/ccl/changefeedccl/changefeedbase/validate.go
+++ b/pkg/ccl/changefeedccl/changefeedbase/validate.go
@@ -16,9 +16,7 @@ import (
// ValidateTable validates that a table descriptor can be watched by a CHANGEFEED.
func ValidateTable(
- targets []jobspb.ChangefeedTargetSpecification,
- tableDesc catalog.TableDescriptor,
- opts map[string]string,
+ targets []jobspb.ChangefeedTargetSpecification, tableDesc catalog.TableDescriptor,
) error {
var t jobspb.ChangefeedTargetSpecification
var found bool
@@ -50,17 +48,11 @@ func ValidateTable(
if tableDesc.IsSequence() {
return errors.Errorf(`CHANGEFEED cannot target sequences: %s`, tableDesc.GetName())
}
- if t.Type == jobspb.ChangefeedTargetSpecification_PRIMARY_FAMILY_ONLY && len(tableDesc.GetFamilies()) != 1 {
+ if len(tableDesc.GetFamilies()) != 1 {
return errors.Errorf(
- `CHANGEFEED created on a table with a single column family (%s) cannot now target a table with %d families.`,
+ `CHANGEFEEDs are currently supported on tables with exactly 1 column family: %s has %d`,
tableDesc.GetName(), len(tableDesc.GetFamilies()))
}
- _, columnFamiliesOpt := opts[OptSplitColumnFamilies]
- if !columnFamiliesOpt && len(tableDesc.GetFamilies()) != 1 {
- return errors.Errorf(
- `CHANGEFEED targeting a table (%s) with multiple column families requires WITH %s and will emit multiple events per row.`,
- tableDesc.GetName(), OptSplitColumnFamilies)
- }
if tableDesc.Dropped() {
return errors.Errorf(`"%s" was dropped`, t.StatementTimeName)
@@ -87,11 +79,5 @@ func WarningsForTable(
}
}
}
- if tableDesc.NumFamilies() > 1 {
- warnings = append(warnings,
- errors.Errorf("Table %s has %d underlying column families. Messages will be emitted separately for each family.",
- tableDesc.GetName(), tableDesc.NumFamilies(),
- ))
- }
return warnings
}
diff --git a/pkg/ccl/changefeedccl/encoder.go b/pkg/ccl/changefeedccl/encoder.go
index 85df7113f2..7aef94121f 100644
--- a/pkg/ccl/changefeedccl/encoder.go
+++ b/pkg/ccl/changefeedccl/encoder.go
@@ -53,9 +53,6 @@ type encodeRow struct {
// tableDesc is a TableDescriptor for the table containing `datums`.
// It's valid for interpreting the row at `updated`.
tableDesc catalog.TableDescriptor
- // familyID indicates which column family is populated on this row.
- // It's valid for interpreting the row at `updated`.
- familyID descpb.FamilyID
// prevDatums is the old value of a changed table row. The field is set
// to nil if the before value for changes was not requested (OptDiff).
prevDatums rowenc.EncDatumRow
@@ -64,8 +61,6 @@ type encodeRow struct {
// prevTableDesc is a TableDescriptor for the table containing `prevDatums`.
// It's valid for interpreting the row at `updated.Prev()`.
prevTableDesc catalog.TableDescriptor
- // prevFamilyID indicates which column family is populated in prevDatums.
- prevFamilyID descpb.FamilyID
}
// Encoder turns a row into a serialized changefeed key, value, or resolved
@@ -191,23 +186,9 @@ func (e *jsonEncoder) encodeKeyRaw(row encodeRow) ([]interface{}, error) {
func (e *jsonEncoder) encodeTopicRaw(row encodeRow) (interface{}, error) {
descID := row.tableDesc.GetID()
// use the target list since row.tableDesc.GetName() will not have fully qualified names
- for _, target := range e.targets {
- if target.TableID == descID {
- switch target.Type {
- case jobspb.ChangefeedTargetSpecification_PRIMARY_FAMILY_ONLY:
- return target.StatementTimeName, nil
- case jobspb.ChangefeedTargetSpecification_EACH_FAMILY:
- family, err := row.tableDesc.FindFamilyByID(row.familyID)
- if err != nil {
- return nil, err
- }
- return fmt.Sprintf("%s.%s", target.StatementTimeName, family.Name), nil
- case jobspb.ChangefeedTargetSpecification_COLUMN_FAMILY:
- // Not implemented yet
- default:
- // fall through to error
- }
- return nil, errors.AssertionFailedf("Found a matching target with unimplemented type %s", target.Type)
+ for _, topic := range e.targets {
+ if topic.TableID == descID {
+ return topic.StatementTimeName, nil
}
}
return nil, fmt.Errorf("table with name %s and descriptor ID %d not found in changefeed target list",
@@ -222,64 +203,48 @@ func (e *jsonEncoder) EncodeValue(_ context.Context, row encodeRow) ([]byte, err
var after map[string]interface{}
if !row.deleted {
- family, err := row.tableDesc.FindFamilyByID(row.familyID)
- if err != nil {
- return nil, err
- }
- include := make(map[descpb.ColumnID]struct{}, len(family.ColumnIDs))
- var yes struct{}
- for _, colID := range family.ColumnIDs {
- include[colID] = yes
- }
+ columns := row.tableDesc.PublicColumns()
after = make(map[string]interface{})
- for i, col := range row.tableDesc.PublicColumns() {
- _, inFamily := include[col.GetID()]
- virtual := col.IsVirtual() && e.virtualColumnVisibility == string(changefeedbase.OptVirtualColumnsNull)
- if inFamily || virtual {
- datum := row.datums[i]
- if err := datum.EnsureDecoded(col.GetType(), &e.alloc); err != nil {
- return nil, err
- }
- after[col.GetName()], err = tree.AsJSON(
- datum.Datum,
- sessiondatapb.DataConversionConfig{},
- time.UTC,
- )
- if err != nil {
- return nil, err
- }
+ for i, col := range columns {
+ if col.IsVirtual() && e.virtualColumnVisibility == string(changefeedbase.OptVirtualColumnsOmitted) {
+ continue
+ }
+ datum := row.datums[i]
+ if err := datum.EnsureDecoded(col.GetType(), &e.alloc); err != nil {
+ return nil, err
+ }
+ var err error
+ after[col.GetName()], err = tree.AsJSON(
+ datum.Datum,
+ sessiondatapb.DataConversionConfig{},
+ time.UTC,
+ )
+ if err != nil {
+ return nil, err
}
}
}
var before map[string]interface{}
if row.prevDatums != nil && !row.prevDeleted {
- family, err := row.prevTableDesc.FindFamilyByID(row.prevFamilyID)
- if err != nil {
- return nil, err
- }
- include := make(map[descpb.ColumnID]struct{})
- var yes struct{}
- for _, colID := range family.ColumnIDs {
- include[colID] = yes
- }
+ columns := row.prevTableDesc.PublicColumns()
before = make(map[string]interface{})
- for i, col := range row.prevTableDesc.PublicColumns() {
- _, inFamily := include[col.GetID()]
- virtual := col.IsVirtual() && e.virtualColumnVisibility == string(changefeedbase.OptVirtualColumnsNull)
- if inFamily || virtual {
- datum := row.prevDatums[i]
- if err := datum.EnsureDecoded(col.GetType(), &e.alloc); err != nil {
- return nil, err
- }
- before[col.GetName()], err = tree.AsJSON(
- datum.Datum,
- sessiondatapb.DataConversionConfig{},
- time.UTC,
- )
- if err != nil {
- return nil, err
- }
+ for i, col := range columns {
+ if col.IsVirtual() && e.virtualColumnVisibility == string(changefeedbase.OptVirtualColumnsOmitted) {
+ continue
+ }
+ datum := row.prevDatums[i]
+ if err := datum.EnsureDecoded(col.GetType(), &e.alloc); err != nil {
+ return nil, err
+ }
+ var err error
+ before[col.GetName()], err = tree.AsJSON(
+ datum.Datum,
+ sessiondatapb.DataConversionConfig{},
+ time.UTC,
+ )
+ if err != nil {
+ return nil, err
}
}
}
@@ -377,13 +342,13 @@ type confluentAvroEncoder struct {
resolvedCache map[string]confluentRegisteredEnvelopeSchema
}
-type tableIDAndVersion struct {
- tableID descpb.ID
- version descpb.DescriptorVersion
- familyID descpb.FamilyID
-}
+type tableIDAndVersion uint64
type tableIDAndVersionPair [2]tableIDAndVersion // [before, after]
+func makeTableIDAndVersion(id descpb.ID, version descpb.DescriptorVersion) tableIDAndVersion {
+ return tableIDAndVersion(id)<<32 + tableIDAndVersion(version)
+}
+
type confluentRegisteredKeySchema struct {
schema *avroDataRecord
registryID int32
@@ -459,35 +424,19 @@ func newConfluentAvroEncoder(
// Get the raw SQL-formatted string for a table name
// and apply full_table_name and avro_schema_prefix options
-func (e *confluentAvroEncoder) rawTableName(
- desc catalog.TableDescriptor, familyID descpb.FamilyID,
-) (string, error) {
- for _, target := range e.targets {
- if target.TableID == desc.GetID() {
- switch target.Type {
- case jobspb.ChangefeedTargetSpecification_PRIMARY_FAMILY_ONLY:
- return e.schemaPrefix + target.StatementTimeName, nil
- case jobspb.ChangefeedTargetSpecification_EACH_FAMILY:
- family, err := desc.FindFamilyByID(familyID)
- if err != nil {
- return "", err
- }
- return fmt.Sprintf("%s%s.%s", e.schemaPrefix, target.StatementTimeName, family.Name), nil
- case jobspb.ChangefeedTargetSpecification_COLUMN_FAMILY:
- // Not implemented yet
- default:
- // fall through to error
- }
- return "", errors.AssertionFailedf("Found a matching target with unimplemented type %s", target.Type)
+func (e *confluentAvroEncoder) rawTableName(desc catalog.TableDescriptor) string {
+ for _, spec := range e.targets {
+ if spec.TableID == desc.GetID() {
+ return e.schemaPrefix + spec.StatementTimeName
}
}
- return desc.GetName(), errors.Newf("Could not find TargetSpecification for descriptor %v", desc)
+ //TODO (zinger): error here
+ return desc.GetName()
}
// EncodeKey implements the Encoder interface.
func (e *confluentAvroEncoder) EncodeKey(ctx context.Context, row encodeRow) ([]byte, error) {
- // No familyID in the cache key for keys because it's the same schema for all families
- cacheKey := tableIDAndVersion{tableID: row.tableDesc.GetID(), version: row.tableDesc.GetVersion()}
+ cacheKey := makeTableIDAndVersion(row.tableDesc.GetID(), row.tableDesc.GetVersion())
var registered confluentRegisteredKeySchema
v, ok := e.keyCache.Get(cacheKey)
@@ -496,10 +445,7 @@ func (e *confluentAvroEncoder) EncodeKey(ctx context.Context, row encodeRow) ([]
registered.schema.refreshTypeMetadata(row.tableDesc)
} else {
var err error
- tableName, err := e.rawTableName(row.tableDesc, row.familyID)
- if err != nil {
- return nil, err
- }
+ tableName := e.rawTableName(row.tableDesc)
registered.schema, err = indexToAvroSchema(row.tableDesc, row.tableDesc.GetPrimaryIndex(), tableName, e.schemaPrefix)
if err != nil {
return nil, err
@@ -532,13 +478,9 @@ func (e *confluentAvroEncoder) EncodeValue(ctx context.Context, row encodeRow) (
var cacheKey tableIDAndVersionPair
if e.beforeField && row.prevTableDesc != nil {
- cacheKey[0] = tableIDAndVersion{
- tableID: row.prevTableDesc.GetID(), version: row.prevTableDesc.GetVersion(), familyID: row.prevFamilyID,
- }
- }
- cacheKey[1] = tableIDAndVersion{
- tableID: row.tableDesc.GetID(), version: row.tableDesc.GetVersion(), familyID: row.familyID,
+ cacheKey[0] = makeTableIDAndVersion(row.prevTableDesc.GetID(), row.prevTableDesc.GetVersion())
}
+ cacheKey[1] = makeTableIDAndVersion(row.tableDesc.GetID(), row.tableDesc.GetVersion())
var registered confluentRegisteredEnvelopeSchema
v, ok := e.valueCache.Get(cacheKey)
@@ -552,23 +494,19 @@ func (e *confluentAvroEncoder) EncodeValue(ctx context.Context, row encodeRow) (
var beforeDataSchema *avroDataRecord
if e.beforeField && row.prevTableDesc != nil {
var err error
- beforeDataSchema, err = tableToAvroSchema(row.prevTableDesc, row.prevFamilyID, `before`, e.schemaPrefix, e.virtualColumnVisibility)
+ beforeDataSchema, err = tableToAvroSchema(row.prevTableDesc, `before`, e.schemaPrefix, e.virtualColumnVisibility)
if err != nil {
return nil, err
}
}
- afterDataSchema, err := tableToAvroSchema(row.tableDesc, row.familyID, avroSchemaNoSuffix, e.schemaPrefix, e.virtualColumnVisibility)
+ afterDataSchema, err := tableToAvroSchema(row.tableDesc, avroSchemaNoSuffix, e.schemaPrefix, e.virtualColumnVisibility)
if err != nil {
return nil, err
}
opts := avroEnvelopeOpts{afterField: true, beforeField: e.beforeField, updatedField: e.updatedField}
- name, err := e.rawTableName(row.tableDesc, row.familyID)
- if err != nil {
- return nil, err
- }
- registered.schema, err = envelopeToAvroSchema(name, opts, beforeDataSchema, afterDataSchema, e.schemaPrefix)
+ registered.schema, err = envelopeToAvroSchema(e.rawTableName(row.tableDesc), opts, beforeDataSchema, afterDataSchema, e.schemaPrefix)
if err != nil {
return nil, err
@@ -576,7 +514,7 @@ func (e *confluentAvroEncoder) EncodeValue(ctx context.Context, row encodeRow) (
// NB: This uses the kafka name escaper because it has to match the name
// of the kafka topic.
- subject := SQLNameToKafkaName(name) + confluentSubjectSuffixValue
+ subject := SQLNameToKafkaName(e.rawTableName(row.tableDesc)) + confluentSubjectSuffixValue
registered.registryID, err = e.register(ctx, &registered.schema.avroRecord, subject)
if err != nil {
return nil, err
diff --git a/pkg/ccl/changefeedccl/encoder_test.go b/pkg/ccl/changefeedccl/encoder_test.go
index c6821e0403..69b0fb8407 100644
--- a/pkg/ccl/changefeedccl/encoder_test.go
+++ b/pkg/ccl/changefeedccl/encoder_test.go
@@ -672,26 +672,6 @@ func TestAvroSchemaNaming(t *testing.T) {
//Both changes to the subject are also reflected in the schema name in the posted schemas
require.Contains(t, foo.registry.SchemaForSubject(`supermovr.public.drivers-key`), `supermovr`)
require.Contains(t, foo.registry.SchemaForSubject(`supermovr.public.drivers-value`), `supermovr`)
-
- sqlDB.Exec(t, `ALTER TABLE movr.drivers ADD COLUMN vehicle_id int CREATE FAMILY volatile`)
- multiFamilyFeed := feed(t, f, fmt.Sprintf(`CREATE CHANGEFEED FOR movr.drivers `+
- `WITH format=%s, %s`, changefeedbase.OptFormatAvro, changefeedbase.OptSplitColumnFamilies))
- defer closeFeed(t, multiFamilyFeed)
- foo = multiFamilyFeed.(*kafkaFeed)
-
- sqlDB.Exec(t, `UPDATE movr.drivers SET vehicle_id = 1 WHERE id=1`)
-
- assertPayloads(t, multiFamilyFeed, []string{
- `drivers: {"id":{"long":1}}->{"after":{"driversprimary":{"id":{"long":1},"name":{"string":"Alice"}}}}`,
- `drivers: {"id":{"long":1}}->{"after":{"driversvolatile":{"vehicle_id":{"long":1}}}}`,
- })
-
- assertRegisteredSubjects(t, foo.registry, []string{
- `drivers.primary-key`,
- `drivers.primary-value`,
- `drivers.volatile-value`,
- })
-
}
t.Run(`kafka`, kafkaTest(testFn))
diff --git a/pkg/ccl/changefeedccl/kvfeed/kv_feed.go b/pkg/ccl/changefeedccl/kvfeed/kv_feed.go
index ad347d7337..55751564d9 100644
--- a/pkg/ccl/changefeedccl/kvfeed/kv_feed.go
+++ b/pkg/ccl/changefeedccl/kvfeed/kv_feed.go
@@ -36,23 +36,22 @@ import (
// Config configures a kvfeed.
type Config struct {
- Settings *cluster.Settings
- DB *kv.DB
- Codec keys.SQLCodec
- Clock *hlc.Clock
- Gossip gossip.OptionalGossip
- Spans []roachpb.Span
- BackfillCheckpoint []roachpb.Span
- Targets []jobspb.ChangefeedTargetSpecification
- Writer kvevent.Writer
- Metrics *kvevent.Metrics
- OnBackfillCallback func() func()
- OnBackfillRangeCallback func(int64) (func(), func())
- MM *mon.BytesMonitor
- WithDiff bool
- SchemaChangeEvents changefeedbase.SchemaChangeEventClass
- SchemaChangePolicy changefeedbase.SchemaChangePolicy
- SchemaFeed schemafeed.SchemaFeed
+ Settings *cluster.Settings
+ DB *kv.DB
+ Codec keys.SQLCodec
+ Clock *hlc.Clock
+ Gossip gossip.OptionalGossip
+ Spans []roachpb.Span
+ BackfillCheckpoint []roachpb.Span
+ Targets []jobspb.ChangefeedTargetSpecification
+ Writer kvevent.Writer
+ Metrics *kvevent.Metrics
+ OnBackfillCallback func() func()
+ MM *mon.BytesMonitor
+ WithDiff bool
+ SchemaChangeEvents changefeedbase.SchemaChangeEventClass
+ SchemaChangePolicy changefeedbase.SchemaChangePolicy
+ SchemaFeed schemafeed.SchemaFeed
// If true, the feed will begin with a dump of data at exactly the
// InitialHighWater. This is a peculiar behavior. In general the
@@ -75,10 +74,9 @@ func Run(ctx context.Context, cfg Config) error {
var sc kvScanner
{
sc = &scanRequestScanner{
- settings: cfg.Settings,
- gossip: cfg.Gossip,
- db: cfg.DB,
- onBackfillRangeCallback: cfg.OnBackfillRangeCallback,
+ settings: cfg.Settings,
+ gossip: cfg.Gossip,
+ db: cfg.DB,
}
}
var pff physicalFeedFactory
diff --git a/pkg/ccl/changefeedccl/kvfeed/scanner.go b/pkg/ccl/changefeedccl/kvfeed/scanner.go
index c33398144c..5bfbeb67de 100644
--- a/pkg/ccl/changefeedccl/kvfeed/scanner.go
+++ b/pkg/ccl/changefeedccl/kvfeed/scanner.go
@@ -40,10 +40,9 @@ type kvScanner interface {
}
type scanRequestScanner struct {
- settings *cluster.Settings
- gossip gossip.OptionalGossip
- db *kv.DB
- onBackfillRangeCallback func(int64) (func(), func())
+ settings *cluster.Settings
+ gossip gossip.OptionalGossip
+ db *kv.DB
}
var _ kvScanner = (*scanRequestScanner)(nil)
@@ -66,12 +65,6 @@ func (p *scanRequestScanner) Scan(
return err
}
- var backfillDec, backfillClear func()
- if p.onBackfillRangeCallback != nil {
- backfillDec, backfillClear = p.onBackfillRangeCallback(int64(len(spans)))
- defer backfillClear()
- }
-
maxConcurrentScans := maxConcurrentScanRequests(p.gossip, &p.settings.SV)
exportLim := limit.MakeConcurrentRequestLimiter("changefeedScanRequestLimiter", maxConcurrentScans)
@@ -99,9 +92,6 @@ func (p *scanRequestScanner) Scan(
defer limAlloc.Release()
err := p.exportSpan(ctx, span, cfg.Timestamp, cfg.WithDiff, sink, cfg.Knobs)
finished := atomic.AddInt64(&atomicFinished, 1)
- if backfillDec != nil {
- backfillDec()
- }
if log.V(2) {
log.Infof(ctx, `exported %d of %d: %v`, finished, len(spans), err)
}
@@ -140,9 +130,7 @@ func (p *scanRequestScanner) exportSpan(
// during result parsing.
b.AddRawRequest(r)
if knobs.BeforeScanRequest != nil {
- if err := knobs.BeforeScanRequest(b); err != nil {
- return err
- }
+ knobs.BeforeScanRequest(b)
}
if err := txn.Run(ctx, b); err != nil {
diff --git a/pkg/ccl/changefeedccl/kvfeed/scanner_test.go b/pkg/ccl/changefeedccl/kvfeed/scanner_test.go
index 48b104d91c..5c7dc6238e 100644
--- a/pkg/ccl/changefeedccl/kvfeed/scanner_test.go
+++ b/pkg/ccl/changefeedccl/kvfeed/scanner_test.go
@@ -70,9 +70,8 @@ INSERT INTO t VALUES (1), (2), (3);
Spans: []roachpb.Span{span},
Timestamp: exportTime,
Knobs: TestingKnobs{
- BeforeScanRequest: func(b *kv.Batch) error {
+ BeforeScanRequest: func(b *kv.Batch) {
b.Header.MaxSpanRequestKeys = 1
- return nil
},
},
}
diff --git a/pkg/ccl/changefeedccl/kvfeed/testing_knobs.go b/pkg/ccl/changefeedccl/kvfeed/testing_knobs.go
index 72dc6fecce..2e8645f035 100644
--- a/pkg/ccl/changefeedccl/kvfeed/testing_knobs.go
+++ b/pkg/ccl/changefeedccl/kvfeed/testing_knobs.go
@@ -16,7 +16,7 @@ import (
// TestingKnobs are the testing knobs for kvfeed.
type TestingKnobs struct {
// BeforeScanRequest is a callback invoked before issuing Scan request.
- BeforeScanRequest func(b *kv.Batch) error
+ BeforeScanRequest func(b *kv.Batch)
OnRangeFeedValue func(kv roachpb.KeyValue) error
}
diff --git a/pkg/ccl/changefeedccl/metrics.go b/pkg/ccl/changefeedccl/metrics.go
index e0e62c3753..3d85f035a7 100644
--- a/pkg/ccl/changefeedccl/metrics.go
+++ b/pkg/ccl/changefeedccl/metrics.go
@@ -10,7 +10,6 @@ package changefeedccl
import (
"strings"
- "sync/atomic"
"time"
"github.com/cockroachdb/cockroach/pkg/ccl/changefeedccl/cdcutils"
@@ -42,18 +41,17 @@ const defaultSLIScope = "default"
// AggMetrics are aggregated metrics keeping track of aggregated changefeed performance
// indicators, combined with a limited number of per-changefeed indicators.
type AggMetrics struct {
- EmittedMessages *aggmetric.AggCounter
- EmittedBytes *aggmetric.AggCounter
- FlushedBytes *aggmetric.AggCounter
- BatchHistNanos *aggmetric.AggHistogram
- Flushes *aggmetric.AggCounter
- FlushHistNanos *aggmetric.AggHistogram
- CommitLatency *aggmetric.AggHistogram
- BackfillCount *aggmetric.AggGauge
- BackfillPendingRanges *aggmetric.AggGauge
- ErrorRetries *aggmetric.AggCounter
- AdmitLatency *aggmetric.AggHistogram
- RunningCount *aggmetric.AggGauge
+ EmittedMessages *aggmetric.AggCounter
+ EmittedBytes *aggmetric.AggCounter
+ FlushedBytes *aggmetric.AggCounter
+ BatchHistNanos *aggmetric.AggHistogram
+ Flushes *aggmetric.AggCounter
+ FlushHistNanos *aggmetric.AggHistogram
+ CommitLatency *aggmetric.AggHistogram
+ BackfillCount *aggmetric.AggGauge
+ ErrorRetries *aggmetric.AggCounter
+ AdmitLatency *aggmetric.AggHistogram
+ RunningCount *aggmetric.AggGauge
// There is always at least 1 sliMetrics created for defaultSLI scope.
mu struct {
@@ -67,18 +65,17 @@ func (a *AggMetrics) MetricStruct() {}
// sliMetrics holds all SLI related metrics aggregated into AggMetrics.
type sliMetrics struct {
- EmittedMessages *aggmetric.Counter
- EmittedBytes *aggmetric.Counter
- FlushedBytes *aggmetric.Counter
- BatchHistNanos *aggmetric.Histogram
- Flushes *aggmetric.Counter
- FlushHistNanos *aggmetric.Histogram
- CommitLatency *aggmetric.Histogram
- ErrorRetries *aggmetric.Counter
- AdmitLatency *aggmetric.Histogram
- BackfillCount *aggmetric.Gauge
- BackfillPendingRanges *aggmetric.Gauge
- RunningCount *aggmetric.Gauge
+ EmittedMessages *aggmetric.Counter
+ EmittedBytes *aggmetric.Counter
+ FlushedBytes *aggmetric.Counter
+ BatchHistNanos *aggmetric.Histogram
+ Flushes *aggmetric.Counter
+ FlushHistNanos *aggmetric.Histogram
+ CommitLatency *aggmetric.Histogram
+ ErrorRetries *aggmetric.Counter
+ AdmitLatency *aggmetric.Histogram
+ BackfillCount *aggmetric.Gauge
+ RunningCount *aggmetric.Gauge
}
// sinkDoesNotCompress is a sentinel value indicating the sink
@@ -152,28 +149,6 @@ func (m *sliMetrics) getBackfillCallback() func() func() {
}
}
-// getBackfillRangeCallback returns a backfillRangeCallback that is to be called
-// at the beginning of a backfill with the number of ranges that will be scanned
-// and returns a two callbacks to decrement the value until all ranges have
-// been emitted or clear the number completely if the backfill is cancelled.
-// Note: dec() should only be called as many times as the initial value, and
-// clear() should be called when there will never be another dec() call.
-func (m *sliMetrics) getBackfillRangeCallback() func(int64) (func(), func()) {
- return func(initial int64) (dec func(), clear func()) {
- remaining := initial
- m.BackfillPendingRanges.Inc(initial)
- dec = func() {
- m.BackfillPendingRanges.Dec(1)
- atomic.AddInt64(&remaining, -1)
- }
- clear = func() {
- m.BackfillPendingRanges.Dec(remaining)
- atomic.AddInt64(&remaining, -remaining)
- }
- return
- }
-}
-
const (
changefeedCheckpointHistMaxLatency = 30 * time.Second
changefeedBatchHistMaxLatency = 30 * time.Second
@@ -297,12 +272,6 @@ func newAggregateMetrics(histogramWindow time.Duration) *AggMetrics {
Measurement: "Count",
Unit: metric.Unit_COUNT,
}
- metaChangefeedBackfillPendingRanges := metric.Metadata{
- Name: "changefeed.backfill_pending_ranges",
- Help: "Number of ranges in an ongoing backfill that are yet to be fully emitted",
- Measurement: "Count",
- Unit: metric.Unit_COUNT,
- }
metaChangefeedRunning := metric.Metadata{
Name: "changefeed.running",
Help: "Number of currently running changefeeds, including sinkless",
@@ -328,9 +297,8 @@ func newAggregateMetrics(histogramWindow time.Duration) *AggMetrics {
histogramWindow, commitLatencyMaxValue.Nanoseconds(), 1),
AdmitLatency: b.Histogram(metaAdmitLatency, histogramWindow,
admitLatencyMaxValue.Nanoseconds(), 1),
- BackfillCount: b.Gauge(metaChangefeedBackfillCount),
- BackfillPendingRanges: b.Gauge(metaChangefeedBackfillPendingRanges),
- RunningCount: b.Gauge(metaChangefeedRunning),
+ BackfillCount: b.Gauge(metaChangefeedBackfillCount),
+ RunningCount: b.Gauge(metaChangefeedRunning),
}
a.mu.sliMetrics = make(map[string]*sliMetrics)
_, err := a.getOrCreateScope(defaultSLIScope)
@@ -375,18 +343,17 @@ func (a *AggMetrics) getOrCreateScope(scope string) (*sliMetrics, error) {
}
sm := &sliMetrics{
- EmittedMessages: a.EmittedMessages.AddChild(scope),
- EmittedBytes: a.EmittedBytes.AddChild(scope),
- FlushedBytes: a.FlushedBytes.AddChild(scope),
- BatchHistNanos: a.BatchHistNanos.AddChild(scope),
- Flushes: a.Flushes.AddChild(scope),
- FlushHistNanos: a.FlushHistNanos.AddChild(scope),
- CommitLatency: a.CommitLatency.AddChild(scope),
- ErrorRetries: a.ErrorRetries.AddChild(scope),
- AdmitLatency: a.AdmitLatency.AddChild(scope),
- BackfillCount: a.BackfillCount.AddChild(scope),
- BackfillPendingRanges: a.BackfillPendingRanges.AddChild(scope),
- RunningCount: a.RunningCount.AddChild(scope),
+ EmittedMessages: a.EmittedMessages.AddChild(scope),
+ EmittedBytes: a.EmittedBytes.AddChild(scope),
+ FlushedBytes: a.FlushedBytes.AddChild(scope),
+ BatchHistNanos: a.BatchHistNanos.AddChild(scope),
+ Flushes: a.Flushes.AddChild(scope),
+ FlushHistNanos: a.FlushHistNanos.AddChild(scope),
+ CommitLatency: a.CommitLatency.AddChild(scope),
+ ErrorRetries: a.ErrorRetries.AddChild(scope),
+ AdmitLatency: a.AdmitLatency.AddChild(scope),
+ BackfillCount: a.BackfillCount.AddChild(scope),
+ RunningCount: a.RunningCount.AddChild(scope),
}
a.mu.sliMetrics[scope] = sm
diff --git a/pkg/ccl/changefeedccl/rowfetcher_cache.go b/pkg/ccl/changefeedccl/rowfetcher_cache.go
index cfdcc82990..d5fce6be70 100644
--- a/pkg/ccl/changefeedccl/rowfetcher_cache.go
+++ b/pkg/ccl/changefeedccl/rowfetcher_cache.go
@@ -44,9 +44,8 @@ type rowFetcherCache struct {
}
type cachedFetcher struct {
- tableDesc catalog.TableDescriptor
- fetcher row.Fetcher
- familyDesc descpb.ColumnFamilyDescriptor
+ tableDesc catalog.TableDescriptor
+ fetcher row.Fetcher
}
var rfCacheConfig = cache.Config{
@@ -60,7 +59,6 @@ var rfCacheConfig = cache.Config{
type idVersion struct {
id descpb.ID
version descpb.DescriptorVersion
- family descpb.FamilyID
}
func newRowFetcherCache(
@@ -81,31 +79,24 @@ func newRowFetcherCache(
func (c *rowFetcherCache) TableDescForKey(
ctx context.Context, key roachpb.Key, ts hlc.Timestamp,
-) (catalog.TableDescriptor, descpb.FamilyID, error) {
+) (catalog.TableDescriptor, error) {
var tableDesc catalog.TableDescriptor
key, err := c.codec.StripTenantPrefix(key)
if err != nil {
- return nil, descpb.FamilyID(0), err
+ return nil, err
}
remaining, tableID, _, err := rowenc.DecodePartialTableIDIndexID(key)
if err != nil {
- return nil, descpb.FamilyID(0), err
- }
-
- familyID, err := keys.DecodeFamilyKey(key)
- if err != nil {
- return nil, descpb.FamilyID(0), err
+ return nil, err
}
- family := descpb.FamilyID(familyID)
-
// Retrieve the target TableDescriptor from the lease manager. No caching
// is attempted because the lease manager does its own caching.
desc, err := c.leaseMgr.Acquire(ctx, ts, tableID)
if err != nil {
// Manager can return all kinds of errors during chaos, but based on
// its usage, none of them should ever be terminal.
- return nil, family, changefeedbase.MarkRetryableError(err)
+ return nil, changefeedbase.MarkRetryableError(err)
}
tableDesc = desc.Underlying().(catalog.TableDescriptor)
// Immediately release the lease, since we only need it for the exact
@@ -132,7 +123,7 @@ func (c *rowFetcherCache) TableDescForKey(
}); err != nil {
// Manager can return all kinds of errors during chaos, but based on
// its usage, none of them should ever be terminal.
- return nil, family, changefeedbase.MarkRetryableError(err)
+ return nil, changefeedbase.MarkRetryableError(err)
}
// Immediately release the lease, since we only need it for the exact
// timestamp requested.
@@ -143,12 +134,12 @@ func (c *rowFetcherCache) TableDescForKey(
for skippedCols := 0; skippedCols < tableDesc.GetPrimaryIndex().NumKeyColumns(); skippedCols++ {
l, err := encoding.PeekLength(remaining)
if err != nil {
- return nil, family, err
+ return nil, err
}
remaining = remaining[l:]
}
- return tableDesc, family, nil
+ return tableDesc, nil
}
func (c *rowFetcherCache) RowFetcherForTableDesc(
@@ -200,62 +191,3 @@ func (c *rowFetcherCache) RowFetcherForTableDesc(
c.fetchers.Add(idVer, f)
return rf, nil
}
-
-func (c *rowFetcherCache) RowFetcherForColumnFamily(
- tableDesc catalog.TableDescriptor, family descpb.FamilyID,
-) (*row.Fetcher, error) {
- idVer := idVersion{id: tableDesc.GetID(), version: tableDesc.GetVersion(), family: family}
- // Ensure that all user defined types are up to date with the cached
- // version and the desired version to use the cache. It is safe to use
- // UserDefinedTypeColsHaveSameVersion if we have a hit because we are
- // guaranteed that the tables have the same version. Additionally, these
- // fetchers are always initialized with a single tabledesc.Immutable.
- // TODO (zinger): Only check types used in the relevant family.
- if v, ok := c.fetchers.Get(idVer); ok {
- f := v.(*cachedFetcher)
- if catalog.UserDefinedTypeColsHaveSameVersion(tableDesc, f.tableDesc) {
- return &f.fetcher, nil
- }
- }
-
- familyDesc, err := tableDesc.FindFamilyByID(family)
- if err != nil {
- return nil, err
- }
-
- f := &cachedFetcher{
- tableDesc: tableDesc,
- familyDesc: *familyDesc,
- }
- rf := &f.fetcher
-
- var spec descpb.IndexFetchSpec
-
- // TODO (zinger): Make fetchColumnIDs only the family and the primary key.
- // This seems to cause an error without further work but would be more efficient.
- if err := rowenc.InitIndexFetchSpec(
- &spec, c.codec, tableDesc, tableDesc.GetPrimaryIndex(), tableDesc.PublicColumnIDs(),
- ); err != nil {
- return nil, err
- }
-
- if err := rf.Init(
- context.TODO(),
- false, /* reverse */
- descpb.ScanLockingStrength_FOR_NONE,
- descpb.ScanLockingWaitPolicy_BLOCK,
- 0, /* lockTimeout */
- &c.a,
- nil, /* memMonitor */
- &spec,
- ); err != nil {
- return nil, err
- }
-
- // Necessary because virtual columns are not populated.
- // TODO(radu): should we stop requesting those columns from the fetcher?
- rf.IgnoreUnexpectedNulls = true
-
- c.fetchers.Add(idVer, f)
- return rf, nil
-}
diff --git a/pkg/ccl/changefeedccl/schemafeed/schema_feed.go b/pkg/ccl/changefeedccl/schemafeed/schema_feed.go
index c4b36f5fbe..2761e653b6 100644
--- a/pkg/ccl/changefeedccl/schemafeed/schema_feed.go
+++ b/pkg/ccl/changefeedccl/schemafeed/schema_feed.go
@@ -81,7 +81,6 @@ func New(
targets []jobspb.ChangefeedTargetSpecification,
initialHighwater hlc.Timestamp,
metrics *Metrics,
- changefeedOpts map[string]string,
) SchemaFeed {
m := &schemaFeed{
filter: schemaChangeEventFilters[events],
@@ -93,7 +92,6 @@ func New(
ie: cfg.SessionBoundInternalExecutorFactory(ctx, &sessiondata.SessionData{}),
collectionFactory: cfg.CollectionFactory,
metrics: metrics,
- changefeedOpts: changefeedOpts,
}
m.mu.previousTableVersion = make(map[descpb.ID]catalog.TableDescriptor)
m.mu.highWater = initialHighwater
@@ -112,14 +110,13 @@ func New(
// invariant (via `validateFn`). An error timestamp is also kept, which is the
// lowest timestamp where at least one table doesn't meet the invariant.
type schemaFeed struct {
- filter tableEventFilter
- db *kv.DB
- clock *hlc.Clock
- settings *cluster.Settings
- targets []jobspb.ChangefeedTargetSpecification
- ie sqlutil.InternalExecutor
- metrics *Metrics
- changefeedOpts map[string]string
+ filter tableEventFilter
+ db *kv.DB
+ clock *hlc.Clock
+ settings *cluster.Settings
+ targets []jobspb.ChangefeedTargetSpecification
+ ie sqlutil.InternalExecutor
+ metrics *Metrics
// TODO(ajwerner): Should this live underneath the FilterFunc?
// Should there be another function to decide whether to update the
@@ -528,7 +525,7 @@ func (tf *schemaFeed) validateDescriptor(
// manager to acquire the freshest version of the type.
return tf.leaseMgr.AcquireFreshestFromStore(ctx, desc.GetID())
case catalog.TableDescriptor:
- if err := changefeedbase.ValidateTable(tf.targets, desc, tf.changefeedOpts); err != nil {
+ if err := changefeedbase.ValidateTable(tf.targets, desc); err != nil {
return err
}
log.VEventf(ctx, 1, "validate %v", formatDesc(desc))
diff --git a/pkg/ccl/changefeedccl/sink_kafka.go b/pkg/ccl/changefeedccl/sink_kafka.go
index 95f827bdd7..273578cfcb 100644
--- a/pkg/ccl/changefeedccl/sink_kafka.go
+++ b/pkg/ccl/changefeedccl/sink_kafka.go
@@ -360,11 +360,6 @@ func (s *kafkaSink) workerLoop() {
ackMsg = m
case err := <-s.producer.Errors():
ackMsg, ackError = err.Msg, err.Err
- if ackError != nil {
- ackError = errors.Wrapf(ackError,
- "while sending message with key=%s, size=%d",
- err.Msg.Key, err.Msg.Key.Length()+err.Msg.Value.Length())
- }
}
if m, ok := ackMsg.Metadata.(messageMetadata); ok {
diff --git a/pkg/ccl/cliccl/debug.go b/pkg/ccl/cliccl/debug.go
index 138b4bdf39..81bcbfd5a4 100644
--- a/pkg/ccl/cliccl/debug.go
+++ b/pkg/ccl/cliccl/debug.go
@@ -160,7 +160,7 @@ func runEncryptionStatus(cmd *cobra.Command, args []string) error {
dir := args[0]
- db, err := cli.OpenExistingStore(dir, stopper, true /* readOnly */, false /* disableAutomaticCompactions */)
+ db, err := cli.OpenExistingStore(dir, stopper, true /* readOnly */)
if err != nil {
return err
}
diff --git a/pkg/ccl/kvccl/kvtenantccl/BUILD.bazel b/pkg/ccl/kvccl/kvtenantccl/BUILD.bazel
index a9c2164988..16ec8993c2 100644
--- a/pkg/ccl/kvccl/kvtenantccl/BUILD.bazel
+++ b/pkg/ccl/kvccl/kvtenantccl/BUILD.bazel
@@ -48,7 +48,6 @@ go_test(
"tenant_upgrade_test.go",
],
embed = [":kvtenantccl"],
- shard_count = 16,
deps = [
"//pkg/base",
"//pkg/ccl/utilccl",
@@ -85,7 +84,6 @@ go_test(
"//pkg/util/stop",
"//pkg/util/tracing",
"//pkg/util/uuid",
- "@com_github_cockroachdb_redact//:redact",
"@com_github_stretchr_testify//assert",
"@com_github_stretchr_testify//require",
"@org_golang_google_grpc//codes",
diff --git a/pkg/ccl/kvccl/kvtenantccl/connector.go b/pkg/ccl/kvccl/kvtenantccl/connector.go
index 1b28cad4f8..ddce7fd207 100644
--- a/pkg/ccl/kvccl/kvtenantccl/connector.go
+++ b/pkg/ccl/kvccl/kvtenantccl/connector.go
@@ -120,11 +120,6 @@ var _ config.SystemConfigProvider = (*Connector)(nil)
// multi-region primitives.
var _ serverpb.RegionsServer = (*Connector)(nil)
-// Connector is capable of finding debug information about the current
-// tenant within the cluster. This is necessary for things such as
-// debug zip and range reports.
-var _ serverpb.TenantStatusServer = (*Connector)(nil)
-
// Connector is capable of accessing span configurations for secondary tenants.
var _ spanconfig.KVAccessor = (*Connector)(nil)
@@ -433,21 +428,6 @@ func (c *Connector) Regions(
return resp, nil
}
-// TenantRanges implements the serverpb.TenantStatusServer interface
-func (c *Connector) TenantRanges(
- ctx context.Context, req *serverpb.TenantRangesRequest,
-) (resp *serverpb.TenantRangesResponse, _ error) {
- if err := c.withClient(ctx, func(ctx context.Context, c *client) error {
- var err error
- resp, err = c.TenantRanges(ctx, req)
- return err
- }); err != nil {
- return nil, err
- }
-
- return resp, nil
-}
-
// FirstRange implements the kvcoord.RangeDescriptorDB interface.
func (c *Connector) FirstRange() (*roachpb.RangeDescriptor, error) {
return nil, status.Error(codes.Unauthenticated, "kvtenant.Proxy does not have access to FirstRange")
diff --git a/pkg/ccl/kvccl/kvtenantccl/tenant_trace_test.go b/pkg/ccl/kvccl/kvtenantccl/tenant_trace_test.go
index dcb8819ce3..4399ccdba4 100644
--- a/pkg/ccl/kvccl/kvtenantccl/tenant_trace_test.go
+++ b/pkg/ccl/kvccl/kvtenantccl/tenant_trace_test.go
@@ -26,7 +26,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/tracing"
- "github.com/cockroachdb/redact"
"github.com/stretchr/testify/require"
)
@@ -58,7 +57,7 @@ func testTenantTracesAreRedactedImpl(t *testing.T, redactable bool) {
EvalKnobs: kvserverbase.BatchEvalTestingKnobs{
TestingEvalFilter: func(args kvserverbase.FilterArgs) *roachpb.Error {
log.Eventf(args.Ctx, "%v", sensitiveString)
- log.Eventf(args.Ctx, "%v", redact.Safe(visibleString))
+ log.Eventf(args.Ctx, "%v", log.Safe(visibleString))
return nil
},
},
diff --git a/pkg/ccl/logictestccl/testdata/logic_test/crdb_internal_tenant b/pkg/ccl/logictestccl/testdata/logic_test/crdb_internal_tenant
index 0c28ebb0b3..c4477aa8a6 100644
--- a/pkg/ccl/logictestccl/testdata/logic_test/crdb_internal_tenant
+++ b/pkg/ccl/logictestccl/testdata/logic_test/crdb_internal_tenant
@@ -97,7 +97,6 @@ crdb_internal table_indexes table NULL NULL NULL
crdb_internal table_row_statistics table NULL NULL NULL
crdb_internal tables table NULL NULL NULL
crdb_internal tenant_usage_details view NULL NULL NULL
-crdb_internal transaction_contention_events table NULL NULL NULL
crdb_internal transaction_statistics view NULL NULL NULL
crdb_internal zones table NULL NULL NULL
diff --git a/pkg/ccl/logictestccl/testdata/logic_test/partitioning_hash_sharded_index b/pkg/ccl/logictestccl/testdata/logic_test/partitioning_hash_sharded_index
index 03b696cc89..b7706cbc31 100644
--- a/pkg/ccl/logictestccl/testdata/logic_test/partitioning_hash_sharded_index
+++ b/pkg/ccl/logictestccl/testdata/logic_test/partitioning_hash_sharded_index
@@ -281,4 +281,4 @@ t_presplit 116 t_presplit_idx_member_id /Table/116/2/"seattle"/3 /Tabl
t_presplit 116 t_presplit_idx_member_id /Table/116/2/"seattle"/4 /Table/116/2/"seattle"/5
t_presplit 116 t_presplit_idx_member_id /Table/116/2/"seattle"/5 /Table/116/2/"seattle"/6
t_presplit 116 t_presplit_idx_member_id /Table/116/2/"seattle"/6 /Table/116/2/"seattle"/7
-t_presplit 116 t_presplit_idx_member_id /Table/116/2/"seattle"/7 /Table/116/3/"new york"/0
+t_presplit 116 t_presplit_idx_member_id /Table/116/2/"seattle"/7 /Max
diff --git a/pkg/ccl/logictestccl/testdata/logic_test/regional_by_row_hash_sharded_index_query_plan b/pkg/ccl/logictestccl/testdata/logic_test/regional_by_row_hash_sharded_index_query_plan
index 585df35668..76e0db7d72 100644
--- a/pkg/ccl/logictestccl/testdata/logic_test/regional_by_row_hash_sharded_index_query_plan
+++ b/pkg/ccl/logictestccl/testdata/logic_test/regional_by_row_hash_sharded_index_query_plan
@@ -36,7 +36,7 @@ CREATE TABLE t_child_regional (
# TODO (mgartner): there is a lookup join with lookup condition checking every
# single shard. This is unnecessary and could be improved by having the shard
# number calculated instead of looking at all possible shards.
-query T retry
+query T
EXPLAIN (VERBOSE) INSERT INTO t_child VALUES (123, 321);
----
distribution: local
diff --git a/pkg/ccl/schemachangerccl/BUILD.bazel b/pkg/ccl/schemachangerccl/BUILD.bazel
index bd5a4da608..956253970d 100644
--- a/pkg/ccl/schemachangerccl/BUILD.bazel
+++ b/pkg/ccl/schemachangerccl/BUILD.bazel
@@ -11,7 +11,6 @@ go_test(
"//pkg/sql/schemachanger:testdata",
],
embed = [":schemachangerccl"],
- shard_count = 16,
deps = [
"//pkg/base",
"//pkg/build/bazel",
diff --git a/pkg/ccl/serverccl/statusccl/BUILD.bazel b/pkg/ccl/serverccl/statusccl/BUILD.bazel
index 034fefe8d6..ded721723a 100644
--- a/pkg/ccl/serverccl/statusccl/BUILD.bazel
+++ b/pkg/ccl/serverccl/statusccl/BUILD.bazel
@@ -10,8 +10,6 @@ go_library(
"//pkg/roachpb",
"//pkg/security",
"//pkg/server/serverpb",
- "//pkg/sql",
- "//pkg/sql/contention",
"//pkg/sql/pgwire",
"//pkg/sql/sqlstats/persistedsqlstats",
"//pkg/sql/tests",
@@ -36,7 +34,6 @@ go_test(
"//pkg/ccl",
"//pkg/ccl/kvccl",
"//pkg/ccl/utilccl",
- "//pkg/keys",
"//pkg/roachpb",
"//pkg/rpc",
"//pkg/security",
diff --git a/pkg/ccl/serverccl/statusccl/tenant_status_test.go b/pkg/ccl/serverccl/statusccl/tenant_status_test.go
index b615a2265c..2633187ca8 100644
--- a/pkg/ccl/serverccl/statusccl/tenant_status_test.go
+++ b/pkg/ccl/serverccl/statusccl/tenant_status_test.go
@@ -22,7 +22,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/base"
_ "github.com/cockroachdb/cockroach/pkg/ccl/kvccl"
- "github.com/cockroachdb/cockroach/pkg/keys"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/security"
"github.com/cockroachdb/cockroach/pkg/server/serverpb"
@@ -97,10 +96,6 @@ func TestTenantStatusAPI(t *testing.T) {
t.Run("txn_id_resolution", func(t *testing.T) {
testTxnIDResolutionRPC(ctx, t, testHelper)
})
-
- t.Run("tenant_ranges", func(t *testing.T) {
- testTenantRangesRPC(ctx, t, testHelper)
- })
}
func TestTenantCannotSeeNonTenantStats(t *testing.T) {
@@ -718,26 +713,6 @@ SET TRACING=off;
t.Errorf("did not expect contention event in controlled cluster, but it was found")
}
}
-
- testutils.SucceedsWithin(t, func() error {
- err = testHelper.testCluster().tenantContentionRegistry(1).FlushEventsForTest(ctx)
- if err != nil {
- return err
- }
-
- resp := &serverpb.TransactionContentionEventsResponse{}
- testHelper.
- testCluster().
- tenantHTTPClient(t, 1).
- GetJSON("/_status/transactioncontentionevents", resp)
-
- if len(resp.Events) == 0 {
- return errors.New("expected transaction contention events being populated, " +
- "but it is not")
- }
-
- return nil
- }, 5*time.Second)
}
func testIndexUsageForTenants(t *testing.T, testHelper *tenantTestHelper) {
@@ -949,7 +924,7 @@ func testTxnIDResolutionRPC(ctx context.Context, t *testing.T, helper *tenantTes
"expected a valid txnID, but %+v is found", result)
sqlConn.Exec(t, "COMMIT")
- testutils.SucceedsSoon(t, func() error {
+ testutils.SucceedsWithin(t, func() error {
resp, err := status.TxnIDResolution(ctx, &serverpb.TxnIDResolutionRequest{
CoordinatorID: strconv.Itoa(int(coordinatorNodeID)),
TxnIDs: []uuid.UUID{txnID},
@@ -964,7 +939,7 @@ func testTxnIDResolutionRPC(ctx context.Context, t *testing.T, helper *tenantTes
"was not", txnID.String(), coordinatorNodeID)
require.NotEqual(t, roachpb.InvalidTransactionFingerprintID, resp.ResolvedTxnIDs[0].TxnFingerprintID)
return nil
- })
+ }, 3*time.Second)
}
t.Run("regular_cluster", func(t *testing.T) {
@@ -983,60 +958,3 @@ func testTxnIDResolutionRPC(ctx context.Context, t *testing.T, helper *tenantTes
run(sqlConn, status, 1 /* coordinatorNodeID */)
})
}
-
-func testTenantRangesRPC(_ context.Context, t *testing.T, helper *tenantTestHelper) {
- tenantA := helper.testCluster().tenant(0).tenant.TenantStatusServer().(serverpb.TenantStatusServer)
- keyPrefixForA := keys.MakeTenantPrefix(helper.testCluster().tenant(0).tenant.RPCContext().TenantID)
- keyPrefixEndForA := keyPrefixForA.PrefixEnd()
-
- tenantB := helper.controlCluster().tenant(0).tenant.TenantStatusServer().(serverpb.TenantStatusServer)
- keyPrefixForB := keys.MakeTenantPrefix(helper.controlCluster().tenant(0).tenant.RPCContext().TenantID)
- keyPrefixEndForB := keyPrefixForB.PrefixEnd()
-
- resp, err := tenantA.TenantRanges(context.Background(), &serverpb.TenantRangesRequest{})
- require.NoError(t, err)
- require.NotEmpty(t, resp.RangesByLocality)
- for localityKey, rangeList := range resp.RangesByLocality {
- require.NotEmpty(t, localityKey)
- for _, r := range rangeList.Ranges {
- assertStartKeyInRange(t, r.Span.StartKey, keyPrefixForA)
- assertEndKeyInRange(t, r.Span.EndKey, keyPrefixForA, keyPrefixEndForA)
- }
- }
-
- resp, err = tenantB.TenantRanges(context.Background(), &serverpb.TenantRangesRequest{})
- require.NoError(t, err)
- require.NotEmpty(t, resp.RangesByLocality)
- for localityKey, rangeList := range resp.RangesByLocality {
- require.NotEmpty(t, localityKey)
- for _, r := range rangeList.Ranges {
- assertStartKeyInRange(t, r.Span.StartKey, keyPrefixForB)
- assertEndKeyInRange(t, r.Span.EndKey, keyPrefixForB, keyPrefixEndForB)
- }
- }
-}
-
-// assertStartKeyInRange compares the pretty printed startKey with the provided
-// tenantPrefix key, ensuring that the startKey starts with the tenantPrefix.
-func assertStartKeyInRange(t *testing.T, startKey string, tenantPrefix roachpb.Key) {
- require.Truef(t, strings.Index(startKey, tenantPrefix.String()) == 0,
- fmt.Sprintf("start key %s is outside of the tenant's keyspace (prefix: %v)",
- startKey, tenantPrefix.String()))
-}
-
-// assertEndKeyInRange compares the pretty printed endKey with the provided
-// tenantPrefix and tenantPrefixEnd keys. Ensures that the key starts with
-// either the tenantPrefix, or the tenantPrefixEnd (valid as end keys are
-// exclusive).
-func assertEndKeyInRange(
- t *testing.T, endKey string, tenantPrefix roachpb.Key, tenantPrefixEnd roachpb.Key,
-) {
- require.Truef(t,
- strings.Index(endKey, tenantPrefix.String()) == 0 ||
- strings.Index(endKey, tenantPrefixEnd.String()) == 0 ||
- // Possible if the tenant's ranges fall at the end of the entire keyspace
- // range within the cluster.
- endKey == "/Max",
- fmt.Sprintf("end key %s is outside of the tenant's keyspace (prefix: %v, prefixEnd: %v)",
- endKey, tenantPrefix.String(), tenantPrefixEnd.String()))
-}
diff --git a/pkg/ccl/serverccl/statusccl/tenant_test_utils.go b/pkg/ccl/serverccl/statusccl/tenant_test_utils.go
index 4156a271e2..91c5a15acc 100644
--- a/pkg/ccl/serverccl/statusccl/tenant_test_utils.go
+++ b/pkg/ccl/serverccl/statusccl/tenant_test_utils.go
@@ -19,8 +19,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/security"
"github.com/cockroachdb/cockroach/pkg/server/serverpb"
- "github.com/cockroachdb/cockroach/pkg/sql"
- "github.com/cockroachdb/cockroach/pkg/sql/contention"
"github.com/cockroachdb/cockroach/pkg/sql/pgwire"
"github.com/cockroachdb/cockroach/pkg/sql/sqlstats/persistedsqlstats"
"github.com/cockroachdb/cockroach/pkg/sql/tests"
@@ -39,12 +37,11 @@ type serverIdx int
const randomServer serverIdx = -1
type testTenant struct {
- tenant serverutils.TestTenantInterface
- tenantConn *gosql.DB
- tenantDB *sqlutils.SQLRunner
- tenantStatus serverpb.SQLStatusServer
- tenantSQLStats *persistedsqlstats.PersistedSQLStats
- tenantContentionRegistry *contention.Registry
+ tenant serverutils.TestTenantInterface
+ tenantConn *gosql.DB
+ tenantDB *sqlutils.SQLRunner
+ tenantStatus serverpb.SQLStatusServer
+ tenantSQLStats *persistedsqlstats.PersistedSQLStats
}
func newTestTenant(
@@ -65,15 +62,13 @@ func newTestTenant(
status := tenant.StatusServer().(serverpb.SQLStatusServer)
sqlStats := tenant.PGServer().(*pgwire.Server).SQLServer.
GetSQLStatsProvider().(*persistedsqlstats.PersistedSQLStats)
- contentionRegistry := tenant.ExecutorConfig().(sql.ExecutorConfig).ContentionRegistry
return &testTenant{
- tenant: tenant,
- tenantConn: tenantConn,
- tenantDB: sqlDB,
- tenantStatus: status,
- tenantSQLStats: sqlStats,
- tenantContentionRegistry: contentionRegistry,
+ tenant: tenant,
+ tenantConn: tenantConn,
+ tenantDB: sqlDB,
+ tenantStatus: status,
+ tenantSQLStats: sqlStats,
}
}
@@ -177,10 +172,6 @@ func (c tenantCluster) tenantStatusSrv(idx serverIdx) serverpb.SQLStatusServer {
return c.tenant(idx).tenantStatus
}
-func (c tenantCluster) tenantContentionRegistry(idx serverIdx) *contention.Registry {
- return c.tenant(idx).tenantContentionRegistry
-}
-
func (c tenantCluster) cleanup(t *testing.T) {
for _, tenant := range c {
tenant.cleanup(t)
diff --git a/pkg/ccl/sqlproxyccl/BUILD.bazel b/pkg/ccl/sqlproxyccl/BUILD.bazel
index 69a8d24b3d..8eec3dd6ba 100644
--- a/pkg/ccl/sqlproxyccl/BUILD.bazel
+++ b/pkg/ccl/sqlproxyccl/BUILD.bazel
@@ -6,7 +6,6 @@ go_library(
srcs = [
"authentication.go",
"backend_dialer.go",
- "conn_migration.go",
"connector.go",
"error.go",
"forwarder.go",
@@ -29,7 +28,6 @@ go_library(
"//pkg/security/certmgr",
"//pkg/sql/pgwire",
"//pkg/sql/pgwire/pgcode",
- "//pkg/sql/pgwire/pgwirebase",
"//pkg/util/contextutil",
"//pkg/util/grpcutil",
"//pkg/util/httputil",
@@ -54,7 +52,6 @@ go_test(
size = "small",
srcs = [
"authentication_test.go",
- "conn_migration_test.go",
"connector_test.go",
"forwarder_test.go",
"frontend_admitter_test.go",
@@ -68,7 +65,6 @@ go_test(
"//pkg/base",
"//pkg/ccl/kvccl/kvtenantccl",
"//pkg/ccl/sqlproxyccl/denylist",
- "//pkg/ccl/sqlproxyccl/interceptor",
"//pkg/ccl/sqlproxyccl/tenantdirsvr",
"//pkg/ccl/sqlproxyccl/throttler",
"//pkg/ccl/utilccl",
diff --git a/pkg/ccl/sqlproxyccl/authentication.go b/pkg/ccl/sqlproxyccl/authentication.go
index f1635fd43f..e9e8f32604 100644
--- a/pkg/ccl/sqlproxyccl/authentication.go
+++ b/pkg/ccl/sqlproxyccl/authentication.go
@@ -153,19 +153,19 @@ var authenticate = func(clientConn, crdbConn net.Conn, throttleHook func(throttl
// we should merge them back in the future. Instead of having the writer as the
// other end, the writer should be the same connection. That way, a
// sqlproxyccl.Conn can be used to read-from, or write-to the same component.
-var readTokenAuthResult = func(conn net.Conn) error {
+var readTokenAuthResult = func(serverConn net.Conn) error {
// This interceptor is discarded once this function returns. Just like
- // pgproto3.NewFrontend, this serverConn object has an internal buffer.
+ // pgproto3.NewFrontend, this interceptor has an internal buffer.
// Discarding the buffer is fine since there won't be any other messages
// from the server once we receive the ReadyForQuery message because the
// caller (i.e. proxy) does not forward client messages until then.
- serverConn := interceptor.NewFrontendConn(conn)
+ serverInterceptor := interceptor.NewFrontendInterceptor(serverConn)
// The auth step should require only a few back and forths so 20 iterations
// should be enough.
var i int
for ; i < 20; i++ {
- backendMsg, err := serverConn.ReadMsg()
+ backendMsg, err := serverInterceptor.ReadMsg()
if err != nil {
return newErrorf(codeBackendReadFailed, "unable to receive message from backend: %v", err)
}
diff --git a/pkg/ccl/sqlproxyccl/conn_migration.go b/pkg/ccl/sqlproxyccl/conn_migration.go
deleted file mode 100644
index 3224e75f31..0000000000
--- a/pkg/ccl/sqlproxyccl/conn_migration.go
+++ /dev/null
@@ -1,371 +0,0 @@
-// Copyright 2022 The Cockroach Authors.
-//
-// Licensed as a CockroachDB Enterprise file under the Cockroach Community
-// License (the "License"); you may not use this file except in compliance with
-// the License. You may obtain a copy of the License at
-//
-// https://github.com/cockroachdb/cockroach/blob/master/licenses/CCL.txt
-
-package sqlproxyccl
-
-import (
- "context"
- "encoding/json"
- "fmt"
- "io"
-
- "github.com/cockroachdb/cockroach/pkg/ccl/sqlproxyccl/interceptor"
- "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgwirebase"
- "github.com/cockroachdb/errors"
- pgproto3 "github.com/jackc/pgproto3/v2"
-)
-
-// runShowTransferState sends a SHOW TRANSFER STATE query with the input
-// transferKey to the given writer. The transferKey will be used to uniquely
-// identify the request when parsing the response messages in
-// waitForShowTransferState.
-//
-// Unlike runAndWaitForDeserializeSession, we split the SHOW TRANSFER STATE
-// operation into `run` and `wait` since they both will be invoked in different
-// goroutines. If we combined them, we'll have to wait for at least one of the
-// goroutines to pause, which can introduce a latency of about 1-2s per transfer
-// while waiting for Read in readTimeoutConn to be unblocked.
-func runShowTransferState(w io.Writer, transferKey string) error {
- return writeQuery(w, "SHOW TRANSFER STATE WITH '%s'", transferKey)
-}
-
-// waitForShowTransferState retrieves the transfer state from the SQL pod
-// through SHOW TRANSFER STATE WITH 'key'. It is assumed that the last message
-// from the server was ReadyForQuery, so the server is ready to accept a query.
-// Since ReadyForQuery may be for a previous pipelined query, this handles the
-// forwarding of messages back to the client in case we don't see our state yet.
-//
-// WARNING: When using this, we assume that no other goroutines are using both
-// serverConn and clientConn. In the context of a transfer, the response
-// processor must be blocked to avoid concurrent reads from serverConn.
-var waitForShowTransferState = func(
- ctx context.Context,
- serverConn *interceptor.FrontendConn,
- clientConn io.Writer,
- transferKey string,
-) (transferErr string, state string, revivalToken string, retErr error) {
- // Wait for a response that looks like the following:
- //
- // error | session_state_base64 | session_revival_token_base64 | transfer_key
- // --------+----------------------+------------------------------+---------------
- // NULL | .................... | ............................ | <transferKey>
- // (1 row)
- //
- // Postgres messages always come in the following order for the
- // SHOW TRANSFER STATE WITH '<transferKey>' query:
- // 1. RowDescription
- // 2. DataRow
- // 3. CommandComplete
- // 4. ReadyForQuery
-
- // 1. Wait for the relevant RowDescription.
- if err := waitForSmallRowDescription(
- ctx,
- serverConn,
- clientConn,
- func(msg *pgproto3.RowDescription) bool {
- // Do we have the right number of columns?
- if len(msg.Fields) != 4 {
- return false
- }
- // Do the names of the columns match?
- var transferStateCols = []string{
- "error",
- "session_state_base64",
- "session_revival_token_base64",
- "transfer_key",
- }
- for i, col := range transferStateCols {
- if string(msg.Fields[i].Name) != col {
- return false
- }
- }
- return true
- },
- ); err != nil {
- return "", "", "", errors.Wrap(err, "waiting for RowDescription")
- }
-
- // 2. Read DataRow.
- if err := expectDataRow(ctx, serverConn, func(msg *pgproto3.DataRow) bool {
- // This has to be 4 since we validated RowDescription earlier.
- if len(msg.Values) != 4 {
- return false
- }
-
- // Validate transfer key. It is possible that the end-user uses the SHOW
- // TRANSFER STATE WITH 'transfer_key' statement, but that isn't designed
- // for external usage, so it is fine to just terminate here if the
- // transfer key does not match.
- if string(msg.Values[3]) != transferKey {
- return false
- }
-
- // NOTE: We have to cast to string and copy here since the slice
- // referenced in msg will no longer be valid once we read the next pgwire
- // message.
- transferErr, state, revivalToken = string(msg.Values[0]), string(msg.Values[1]), string(msg.Values[2])
- return true
- }); err != nil {
- return "", "", "", errors.Wrap(err, "expecting DataRow")
- }
-
- // 3. Read CommandComplete.
- if err := expectCommandComplete(ctx, serverConn, "SHOW TRANSFER STATE 1"); err != nil {
- return "", "", "", errors.Wrap(err, "expecting CommandComplete")
- }
-
- // 4. Read ReadyForQuery.
- if err := expectReadyForQuery(ctx, serverConn); err != nil {
- return "", "", "", errors.Wrap(err, "expecting ReadyForQuery")
- }
-
- return transferErr, state, revivalToken, nil
-}
-
-// runAndWaitForDeserializeSession deserializes state into the SQL pod through
-// crdb_internal.deserialize_session. It is assumed that the last message from
-// the server was ReadyForQuery, so the server is ready to accept a query.
-//
-// This is meant to be used with a new connection, and nothing needs to be
-// forwarded back to the client.
-//
-// WARNING: When using this, we assume that no other goroutines are using both
-// serverConn and clientConn.
-var runAndWaitForDeserializeSession = func(
- ctx context.Context, serverConn *interceptor.FrontendConn, state string,
-) error {
- // Send deserialization query.
- if err := writeQuery(serverConn,
- "SELECT crdb_internal.deserialize_session(decode('%s', 'base64'))", state); err != nil {
- return err
- }
-
- // Wait for a response that looks like the following:
- //
- // crdb_internal.deserialize_session
- // -------------------------------------
- // true
- // (1 row)
- //
- // Postgres messages always come in the following order for the
- // deserialize_session query:
- // 1. RowDescription
- // 2. DataRow
- // 3. CommandComplete
- // 4. ReadyForQuery
-
- // 1. Read RowDescription. We reuse waitFor here for convenience when we are
- // really expecting instead. This is fine because we only deserialize a
- // session for a new connection which hasn't been handed off to the user,
- // so we can guarantee that there won't be pipelined queries.
- if err := waitForSmallRowDescription(
- ctx,
- serverConn,
- &errWriter{},
- func(msg *pgproto3.RowDescription) bool {
- return len(msg.Fields) == 1 &&
- string(msg.Fields[0].Name) == "crdb_internal.deserialize_session"
- },
- ); err != nil {
- return errors.Wrap(err, "expecting RowDescription")
- }
-
- // 2. Read DataRow.
- if err := expectDataRow(ctx, serverConn, func(msg *pgproto3.DataRow) bool {
- return len(msg.Values) == 1 && string(msg.Values[0]) == "t"
- }); err != nil {
- return errors.Wrap(err, "expecting DataRow")
- }
-
- // 3. Read CommandComplete.
- if err := expectCommandComplete(ctx, serverConn, "SELECT 1"); err != nil {
- return errors.Wrap(err, "expecting CommandComplete")
- }
-
- // 4. Read ReadyForQuery.
- if err := expectReadyForQuery(ctx, serverConn); err != nil {
- return errors.Wrap(err, "expecting ReadyForQuery")
- }
-
- return nil
-}
-
-// writeQuery writes a SimpleQuery to the given writer w.
-func writeQuery(w io.Writer, format string, a ...interface{}) error {
- query := &pgproto3.Query{String: fmt.Sprintf(format, a...)}
- _, err := w.Write(query.Encode(nil))
- return err
-}
-
-// waitForSmallRowDescription waits until the next message from serverConn
-// is a *small* RowDescription message (i.e. within 4K bytes), and one that
-// passes matchFn. When that happens, this returns nil.
-//
-// For all other messages (i.e. non RowDescription or large messages), they will
-// be forwarded to clientConn. One exception to this would be the ErrorResponse
-// message, which will result in an error since we're in an ambiguous state.
-// The ErrorResponse message may be for a pipelined query, or the RowDescription
-// message that we're waiting.
-func waitForSmallRowDescription(
- ctx context.Context,
- serverConn *interceptor.FrontendConn,
- clientConn io.Writer,
- matchFn func(*pgproto3.RowDescription) bool,
-) error {
- // Since we're waiting for the first message that matches the given
- // condition, we're going to loop here until we find one.
- for {
- if ctx.Err() != nil {
- return ctx.Err()
- }
-
- typ, size, err := serverConn.PeekMsg()
- if err != nil {
- return errors.Wrap(err, "peeking message")
- }
-
- // We don't know if the ErrorResponse is for the expected RowDescription
- // or a previous pipelined query, so return an error.
- if typ == pgwirebase.ServerMsgErrorResponse {
- // Error messages are small, so read for debugging purposes.
- msg, err := serverConn.ReadMsg()
- if err != nil {
- return errors.Wrap(err, "ambiguous ErrorResponse")
- }
- return errors.Newf("ambiguous ErrorResponse: %v", jsonOrRaw(msg))
- }
-
- // Messages are intended for the client in two cases:
- // 1. We have not seen a RowDescription message yet
- // 2. Message was too large. This function only expects a few columns.
- //
- // This is mostly an optimization, and there's no point reading such
- // messages into memory, so we'll just forward them back to the client
- // right away.
- const maxSmallMsgSize = 1 << 12 // 4KB
- if typ != pgwirebase.ServerMsgRowDescription || size > maxSmallMsgSize {
- if _, err := serverConn.ForwardMsg(clientConn); err != nil {
- return errors.Wrap(err, "forwarding message")
- }
- continue
- }
-
- msg, err := serverConn.ReadMsg()
- if err != nil {
- return errors.Wrap(err, "reading RowDescription")
- }
-
- pgMsg, ok := msg.(*pgproto3.RowDescription)
- if !ok {
- // This case will not occur since have validated the type earlier.
- return errors.Newf("unexpected message: %v", jsonOrRaw(msg))
- }
-
- // We have found our desired RowDescription.
- if matchFn(pgMsg) {
- return nil
- }
-
- // Matching fails, so forward the message back to the client, and
- // continue searching.
- if _, err := clientConn.Write(msg.Encode(nil)); err != nil {
- return errors.Wrap(err, "writing message")
- }
- }
-}
-
-// expectDataRow expects that the next message from serverConn is a DataRow
-// message. If the next message is a DataRow message, validateFn will be called
-// to validate the contents. This function will return an error if we don't see
-// a DataRow message or the validation failed.
-//
-// WARNING: Use this with care since this reads the entire message into memory.
-// Unlike the other expectX methods, DataRow messages may be large, and this
-// does not check for that. We are currently only using this for the SHOW
-// TRANSFER and crdb_internal.deserialize_session() statements, and they both
-// have been vetted. The former's size will be guarded behind a cluster setting,
-// whereas for the latter, the response is expected to be small.
-func expectDataRow(
- ctx context.Context,
- serverConn *interceptor.FrontendConn,
- validateFn func(*pgproto3.DataRow) bool,
-) error {
- if ctx.Err() != nil {
- return ctx.Err()
- }
- msg, err := serverConn.ReadMsg()
- if err != nil {
- return errors.Wrap(err, "reading message")
- }
- pgMsg, ok := msg.(*pgproto3.DataRow)
- if !ok {
- return errors.Newf("unexpected message: %v", jsonOrRaw(msg))
- }
- if !validateFn(pgMsg) {
- return errors.Newf("validation failed for message: %v", jsonOrRaw(msg))
- }
- return nil
-}
-
-// expectCommandComplete expects that the next message from serverConn is a
-// CommandComplete message with the input tag, and returns an error if it isn't.
-func expectCommandComplete(
- ctx context.Context, serverConn *interceptor.FrontendConn, tag string,
-) error {
- if ctx.Err() != nil {
- return ctx.Err()
- }
- msg, err := serverConn.ReadMsg()
- if err != nil {
- return errors.Wrap(err, "reading message")
- }
- pgMsg, ok := msg.(*pgproto3.CommandComplete)
- if !ok || string(pgMsg.CommandTag) != tag {
- return errors.Newf("unexpected message: %v", jsonOrRaw(msg))
- }
- return nil
-}
-
-// expectReadyForQuery expects that the next message from serverConn is a
-// ReadyForQuery message, and returns an error if it isn't.
-func expectReadyForQuery(ctx context.Context, serverConn *interceptor.FrontendConn) error {
- if ctx.Err() != nil {
- return ctx.Err()
- }
- msg, err := serverConn.ReadMsg()
- if err != nil {
- return errors.Wrap(err, "reading message")
- }
- _, ok := msg.(*pgproto3.ReadyForQuery)
- if !ok {
- return errors.Newf("unexpected message: %v", jsonOrRaw(msg))
- }
- return nil
-}
-
-// jsonOrRaw returns msg in a json string representation if it can be marshaled
-// into one, or in a raw struct string representation otherwise. Only used for
-// displaying better error messages.
-func jsonOrRaw(msg pgproto3.BackendMessage) string {
- m, err := json.Marshal(msg)
- if err != nil {
- return fmt.Sprintf("%v", msg)
- }
- return string(m)
-}
-
-var _ io.Writer = &errWriter{}
-
-// errWriter is an io.Writer that fails whenever a Write call is made.
-type errWriter struct{}
-
-// Write implements the io.Writer interface.
-func (w *errWriter) Write(p []byte) (int, error) {
- return 0, errors.AssertionFailedf("unexpected Write call")
-}
diff --git a/pkg/ccl/sqlproxyccl/conn_migration_test.go b/pkg/ccl/sqlproxyccl/conn_migration_test.go
deleted file mode 100644
index d8c1d70da4..0000000000
--- a/pkg/ccl/sqlproxyccl/conn_migration_test.go
+++ /dev/null
@@ -1,831 +0,0 @@
-// Copyright 2022 The Cockroach Authors.
-//
-// Licensed as a CockroachDB Enterprise file under the Cockroach Community
-// License (the "License"); you may not use this file except in compliance with
-// the License. You may obtain a copy of the License at
-//
-// https://github.com/cockroachdb/cockroach/blob/master/licenses/CCL.txt
-
-package sqlproxyccl
-
-import (
- "bytes"
- "context"
- "io"
- "net"
- "testing"
- "time"
-
- "github.com/cockroachdb/cockroach/pkg/ccl/sqlproxyccl/interceptor"
- "github.com/cockroachdb/cockroach/pkg/util/leaktest"
- "github.com/cockroachdb/errors"
- "github.com/jackc/pgproto3/v2"
- "github.com/stretchr/testify/require"
-)
-
-func TestRunShowTransferState(t *testing.T) {
- defer leaktest.AfterTest(t)()
-
- t.Run("successful", func(t *testing.T) {
- buf := new(bytes.Buffer)
- err := runShowTransferState(buf, "foo-bar-baz")
- require.NoError(t, err)
-
- backend := pgproto3.NewBackend(pgproto3.NewChunkReader(buf), buf)
- msg, err := backend.Receive()
- require.NoError(t, err)
- m, ok := msg.(*pgproto3.Query)
- require.True(t, ok)
- require.Equal(t, "SHOW TRANSFER STATE WITH 'foo-bar-baz'", m.String)
- })
-
- t.Run("error", func(t *testing.T) {
- err := runShowTransferState(&errWriter{}, "foo")
- require.Regexp(t, "unexpected Write call", err)
- })
-}
-
-func TestWaitForShowTransferState(t *testing.T) {
- defer leaktest.AfterTest(t)()
- ctx := context.Background()
-
- t.Run("context_cancelled", func(t *testing.T) {
- tCtx, cancel := context.WithCancel(ctx)
- cancel()
-
- transferErr, state, token, err := waitForShowTransferState(tCtx, nil, nil, "")
- require.True(t, errors.Is(err, context.Canceled))
- require.Equal(t, "", transferErr)
- require.Equal(t, "", state)
- require.Equal(t, "", token)
- })
-
- transferStateDataRow := &pgproto3.DataRow{
- Values: [][]byte{
- {},
- []byte("foo-state"),
- []byte("foo-token"),
- []byte("foo-transfer-key"),
- },
- }
-
- for _, tc := range []struct {
- name string
- sendSequence []pgproto3.BackendMessage
- postValidate func(*testing.T, <-chan pgproto3.BackendMessage)
- err string
- transferErr string
- }{
- {
- // All irrelevant messages are forwarded to the client. This returns
- // an error when we see ErrorResponse.
- name: "RowDescription/candidate_mismatch",
- sendSequence: []pgproto3.BackendMessage{
- // not RowDescription.
- &pgproto3.BackendKeyData{},
- // Too large (> 4k).
- &pgproto3.RowDescription{
- Fields: []pgproto3.FieldDescription{
- {Name: []byte("foo1")},
- {Name: make([]byte, 1<<13 /* 8K */)},
- },
- },
- // Invalid number of columns.
- &pgproto3.RowDescription{
- Fields: []pgproto3.FieldDescription{
- {Name: []byte("foo2")},
- },
- },
- // Invalid column names.
- &pgproto3.RowDescription{
- Fields: []pgproto3.FieldDescription{
- {Name: []byte("error")},
- {Name: []byte("session_state_foo")},
- {Name: []byte("session_revival_token_bar")},
- {Name: []byte("apple")},
- },
- },
- &pgproto3.ErrorResponse{},
- },
- err: "ambiguous ErrorResponse",
- postValidate: func(t *testing.T, msgCh <-chan pgproto3.BackendMessage) {
- t.Helper()
- expectMsg(t, msgCh, `"Type":"BackendKeyData"`)
- expectMsg(t, msgCh, `"Type":"RowDescription".*"Name":"foo1"`)
- expectMsg(t, msgCh, `"Type":"RowDescription".*"Name":"foo2"`)
- expectMsg(t, msgCh, `"Type":"RowDescription".*session_state_foo.*session_revival_token_bar`)
- },
- },
- {
- name: "DataRow/type_mismatch",
- sendSequence: []pgproto3.BackendMessage{
- &pgproto3.RowDescription{
- Fields: []pgproto3.FieldDescription{
- {Name: []byte("error")},
- {Name: []byte("session_state_base64")},
- {Name: []byte("session_revival_token_base64")},
- {Name: []byte("transfer_key")},
- },
- },
- &pgproto3.ReadyForQuery{},
- },
- err: `DataRow: unexpected message:.*"Type":"ReadyForQuery"`,
- },
- {
- name: "DataRow/invalid_response",
- sendSequence: []pgproto3.BackendMessage{
- &pgproto3.RowDescription{
- Fields: []pgproto3.FieldDescription{
- {Name: []byte("error")},
- {Name: []byte("session_state_base64")},
- {Name: []byte("session_revival_token_base64")},
- {Name: []byte("transfer_key")},
- },
- },
- &pgproto3.DataRow{
- // 3 columns instead of 4.
- Values: [][]byte{
- {},
- {},
- []byte("bar"),
- },
- },
- },
- err: "DataRow: validation failed for message",
- },
- {
- name: "DataRow/invalid_transfer_key",
- sendSequence: []pgproto3.BackendMessage{
- &pgproto3.RowDescription{
- Fields: []pgproto3.FieldDescription{
- {Name: []byte("error")},
- {Name: []byte("session_state_base64")},
- {Name: []byte("session_revival_token_base64")},
- {Name: []byte("transfer_key")},
- },
- },
- &pgproto3.DataRow{
- Values: [][]byte{
- {},
- {},
- {},
- []byte("bar"),
- },
- },
- },
- err: "DataRow: validation failed for message",
- },
- {
- name: "CommandComplete/type_mismatch",
- sendSequence: []pgproto3.BackendMessage{
- &pgproto3.RowDescription{
- Fields: []pgproto3.FieldDescription{
- {Name: []byte("error")},
- {Name: []byte("session_state_base64")},
- {Name: []byte("session_revival_token_base64")},
- {Name: []byte("transfer_key")},
- },
- },
- &pgproto3.DataRow{
- Values: [][]byte{
- {},
- {},
- {},
- []byte("foo-transfer-key"),
- },
- },
- &pgproto3.ReadyForQuery{},
- },
- err: `CommandComplete: unexpected message:.*"Type":"ReadyForQuery"`,
- },
- {
- name: "CommandComplete/value_mismatch",
- sendSequence: []pgproto3.BackendMessage{
- &pgproto3.RowDescription{
- Fields: []pgproto3.FieldDescription{
- {Name: []byte("error")},
- {Name: []byte("session_state_base64")},
- {Name: []byte("session_revival_token_base64")},
- {Name: []byte("transfer_key")},
- },
- },
- &pgproto3.DataRow{
- Values: [][]byte{
- {},
- {},
- {},
- []byte("foo-transfer-key"),
- },
- },
- &pgproto3.CommandComplete{CommandTag: []byte("SHOW TRANSFER STATE 2")},
- },
- err: `CommandComplete: unexpected message:.*"Type":"CommandComplete".*"CommandTag":"SHOW TRANSFER STATE 2"`,
- },
- {
- name: "ReadyForQuery/type_mismatch",
- sendSequence: []pgproto3.BackendMessage{
- &pgproto3.RowDescription{
- Fields: []pgproto3.FieldDescription{
- {Name: []byte("error")},
- {Name: []byte("session_state_base64")},
- {Name: []byte("session_revival_token_base64")},
- {Name: []byte("transfer_key")},
- },
- },
- &pgproto3.DataRow{
- Values: [][]byte{
- {},
- {},
- {},
- []byte("foo-transfer-key"),
- },
- },
- &pgproto3.CommandComplete{CommandTag: []byte("SHOW TRANSFER STATE 1")},
- &pgproto3.CommandComplete{},
- },
- err: `ReadyForQuery: unexpected message:.*"Type":"CommandComplete"`,
- },
- {
- // This should be a common case with open transactions.
- name: "transfer_state_error",
- sendSequence: []pgproto3.BackendMessage{
- &pgproto3.RowDescription{
- Fields: []pgproto3.FieldDescription{
- {Name: []byte("error")},
- {Name: []byte("session_state_base64")},
- {Name: []byte("session_revival_token_base64")},
- {Name: []byte("transfer_key")},
- },
- },
- &pgproto3.DataRow{
- Values: [][]byte{
- []byte("serialization error"),
- {},
- {},
- []byte("foo-transfer-key"),
- },
- },
- &pgproto3.CommandComplete{CommandTag: []byte("SHOW TRANSFER STATE 1")},
- &pgproto3.ReadyForQuery{},
- },
- transferErr: "serialization error",
- },
- {
- name: "successful",
- sendSequence: []pgproto3.BackendMessage{
- &pgproto3.BackendKeyData{},
- &pgproto3.RowDescription{},
- &pgproto3.CommandComplete{},
- &pgproto3.RowDescription{
- Fields: []pgproto3.FieldDescription{
- {Name: []byte("error")},
- {Name: []byte("session_state_base64")},
- {Name: []byte("session_revival_token_base64")},
- {Name: []byte("transfer_key")},
- },
- },
- transferStateDataRow,
- &pgproto3.CommandComplete{CommandTag: []byte("SHOW TRANSFER STATE 1")},
- &pgproto3.ReadyForQuery{},
- },
- postValidate: func(t *testing.T, msgCh <-chan pgproto3.BackendMessage) {
- t.Helper()
- expectMsg(t, msgCh, `"Type":"BackendKeyData"`)
- expectMsg(t, msgCh, `"Type":"RowDescription"`)
- expectMsg(t, msgCh, `"Type":"CommandComplete"`)
- },
- },
- } {
- t.Run(tc.name, func(t *testing.T) {
- serverProxy, server := net.Pipe()
- defer serverProxy.Close()
- defer server.Close()
-
- clientProxy, client := net.Pipe()
- defer clientProxy.Close()
- defer client.Close()
-
- doneCh := make(chan struct{})
- go func() {
- for _, m := range tc.sendSequence {
- writeServerMsg(server, m)
- }
- close(doneCh)
- }()
-
- msgCh := make(chan pgproto3.BackendMessage, 10)
- go func() {
- fi := interceptor.NewFrontendConn(client)
- for {
- msg, err := fi.ReadMsg()
- if err != nil {
- return
- }
- msgCh <- msg
- }
- }()
-
- transferErr, state, token, err := waitForShowTransferState(
- ctx,
- interceptor.NewFrontendConn(serverProxy),
- clientProxy,
- "foo-transfer-key",
- )
- if tc.err == "" {
- require.NoError(t, err)
- if tc.transferErr != "" {
- require.Equal(t, tc.transferErr, transferErr)
- } else {
- require.Equal(t, "", transferErr)
- require.Equal(t, "foo-state", state)
- require.Equal(t, "foo-token", token)
-
- // Ensure that returned strings are copied. Alternatively,
- // we could also check pointers using encoding.UnsafeConvertStringToBytes.
- transferStateDataRow.Values[0] = []byte("x")
- transferStateDataRow.Values[1][1] = '-'
- transferStateDataRow.Values[2][1] = '-'
- require.Equal(t, "", transferErr)
- require.Equal(t, "foo-state", state)
- require.Equal(t, "foo-token", token)
- }
- require.Eventually(t, func() bool {
- select {
- case <-doneCh:
- return true
- default:
- return false
- }
- }, 5*time.Second, 100*time.Millisecond, "require doneCh to be closed")
- } else {
- require.Regexp(t, tc.err, err)
- }
-
- // Verify that forwarding was correct.
- if tc.postValidate != nil {
- tc.postValidate(t, msgCh)
- }
- })
- }
-}
-
-func TestRunAndWaitForDeserializeSession(t *testing.T) {
- defer leaktest.AfterTest(t)()
- ctx := context.Background()
-
- t.Run("write_failed", func(t *testing.T) {
- r, w := net.Pipe()
- r.Close()
- w.Close()
- err := runAndWaitForDeserializeSession(ctx,
- interceptor.NewFrontendConn(r), "foo")
- require.Regexp(t, "closed pipe", err)
- })
-
- for _, tc := range []struct {
- name string
- sendSequence []pgproto3.BackendMessage
- err string
- }{
- {
- name: "RowDescription/type_mismatch",
- sendSequence: []pgproto3.BackendMessage{
- &pgproto3.BackendKeyData{},
- },
- err: "RowDescription: forwarding message: unexpected Write call",
- },
- {
- name: "RowDescription/column_mismatch/length",
- sendSequence: []pgproto3.BackendMessage{
- &pgproto3.RowDescription{},
- },
- err: "RowDescription: writing message: unexpected Write call",
- },
- {
- name: "RowDescription/column_mismatch/name",
- sendSequence: []pgproto3.BackendMessage{
- &pgproto3.RowDescription{
- Fields: []pgproto3.FieldDescription{{Name: []byte("bar")}}},
- },
- err: "RowDescription: writing message: unexpected Write call",
- },
- {
- name: "DataRow/type_mismatch",
- sendSequence: []pgproto3.BackendMessage{
- &pgproto3.RowDescription{
- Fields: []pgproto3.FieldDescription{
- {Name: []byte("crdb_internal.deserialize_session")},
- },
- },
- &pgproto3.ReadyForQuery{},
- },
- err: `DataRow: unexpected message:.*"Type":"ReadyForQuery"`,
- },
- {
- name: "DataRow/column_mismatch/length",
- sendSequence: []pgproto3.BackendMessage{
- &pgproto3.RowDescription{
- Fields: []pgproto3.FieldDescription{
- {Name: []byte("crdb_internal.deserialize_session")},
- },
- },
- &pgproto3.DataRow{},
- },
- err: `DataRow: validation failed for message`,
- },
- {
- name: "DataRow/column_mismatch/value",
- sendSequence: []pgproto3.BackendMessage{
- &pgproto3.RowDescription{
- Fields: []pgproto3.FieldDescription{
- {Name: []byte("crdb_internal.deserialize_session")},
- },
- },
- &pgproto3.DataRow{Values: [][]byte{[]byte("temp")}},
- },
- err: "DataRow: validation failed for message",
- },
- {
- name: "CommandComplete/type_mismatch",
- sendSequence: []pgproto3.BackendMessage{
- &pgproto3.RowDescription{
- Fields: []pgproto3.FieldDescription{
- {Name: []byte("crdb_internal.deserialize_session")},
- },
- },
- &pgproto3.DataRow{Values: [][]byte{[]byte("t")}},
- &pgproto3.ReadyForQuery{},
- },
- err: `CommandComplete: unexpected message:.*"Type":"ReadyForQuery"`,
- },
- {
- name: "CommandComplete/value_mismatch",
- sendSequence: []pgproto3.BackendMessage{
- &pgproto3.RowDescription{
- Fields: []pgproto3.FieldDescription{
- {Name: []byte("crdb_internal.deserialize_session")},
- },
- },
- &pgproto3.DataRow{Values: [][]byte{[]byte("t")}},
- &pgproto3.CommandComplete{CommandTag: []byte("SELECT 2")},
- },
- err: `CommandComplete: unexpected message:.*"Type":"CommandComplete".*"CommandTag":"SELECT 2"`,
- },
- {
- name: "ReadyForQuery/type_mismatch",
- sendSequence: []pgproto3.BackendMessage{
- &pgproto3.RowDescription{
- Fields: []pgproto3.FieldDescription{
- {Name: []byte("crdb_internal.deserialize_session")},
- },
- },
- &pgproto3.DataRow{Values: [][]byte{[]byte("t")}},
- &pgproto3.CommandComplete{CommandTag: []byte("SELECT 1")},
- &pgproto3.CommandComplete{},
- },
- err: `ReadyForQuery: unexpected message:.*"Type":"CommandComplete"`,
- },
- {
- name: "successful",
- sendSequence: []pgproto3.BackendMessage{
- &pgproto3.RowDescription{
- Fields: []pgproto3.FieldDescription{
- {Name: []byte("crdb_internal.deserialize_session")},
- },
- },
- &pgproto3.DataRow{Values: [][]byte{[]byte("t")}},
- &pgproto3.CommandComplete{CommandTag: []byte("SELECT 1")},
- &pgproto3.ReadyForQuery{},
- },
- err: "",
- },
- } {
- t.Run(tc.name, func(t *testing.T) {
- serverProxy, server := net.Pipe()
- defer serverProxy.Close()
- defer server.Close()
- doneCh := make(chan struct{})
- go func() {
- for _, m := range tc.sendSequence {
- writeServerMsg(server, m)
- }
- close(doneCh)
- }()
-
- msgCh := make(chan pgproto3.FrontendMessage, 1)
- go func() {
- backend := interceptor.NewBackendConn(server)
- msg, _ := backend.ReadMsg()
- msgCh <- msg
- }()
-
- err := runAndWaitForDeserializeSession(ctx,
- interceptor.NewFrontendConn(serverProxy), "foo-transfer-key")
- if tc.err == "" {
- require.NoError(t, err)
- } else {
- require.Regexp(t, tc.err, err)
- }
-
- require.Eventually(t, func() bool {
- select {
- case <-doneCh:
- return true
- default:
- return false
- }
- }, 5*time.Second, 100*time.Millisecond, "require doneCh to be closed")
-
- msg := <-msgCh
- m, ok := msg.(*pgproto3.Query)
- require.True(t, ok)
- const queryStr = "SELECT crdb_internal.deserialize_session(decode('foo-transfer-key', 'base64'))"
- require.Equal(t, queryStr, m.String)
- })
- }
-}
-
-func TestWaitForSmallRowDescription(t *testing.T) {
- defer leaktest.AfterTest(t)()
- ctx := context.Background()
-
- t.Run("context_cancelled", func(t *testing.T) {
- tCtx, cancel := context.WithCancel(ctx)
- cancel()
-
- err := waitForSmallRowDescription(tCtx, nil, nil, nil)
- require.EqualError(t, err, context.Canceled.Error())
- })
-
- t.Run("peek_error", func(t *testing.T) {
- r, w := net.Pipe()
- r.Close()
- w.Close()
-
- err := waitForSmallRowDescription(ctx, interceptor.NewFrontendConn(r), nil, nil)
- require.Regexp(t, "peeking message", err)
- })
-
- t.Run("ambiguous_error", func(t *testing.T) {
- r, w := net.Pipe()
- defer r.Close()
- defer w.Close()
-
- go func() {
- writeServerMsg(w, &pgproto3.ErrorResponse{})
- }()
-
- err := waitForSmallRowDescription(ctx, interceptor.NewFrontendConn(r), nil, nil)
- require.Regexp(t, "ambiguous ErrorResponse.*ErrorResponse", err)
- })
-
- t.Run("successful", func(t *testing.T) {
- serverProxy, server := net.Pipe()
- defer serverProxy.Close()
- defer server.Close()
-
- clientProxy, client := net.Pipe()
- defer clientProxy.Close()
- defer client.Close()
-
- go func() {
- // Not RowDescription.
- writeServerMsg(server, &pgproto3.BackendKeyData{ProcessID: 42})
- // Too large (> 4k bytes).
- writeServerMsg(server, &pgproto3.RowDescription{
- Fields: []pgproto3.FieldDescription{
- {Name: []byte("foo1")},
- {Name: make([]byte, 1<<13 /* 8K */)},
- },
- })
- // Mismatch.
- writeServerMsg(server, &pgproto3.RowDescription{
- Fields: []pgproto3.FieldDescription{
- {Name: []byte("foo2")},
- {Name: []byte("foo3")},
- },
- })
- // Match.
- writeServerMsg(server, &pgproto3.RowDescription{
- Fields: []pgproto3.FieldDescription{
- {Name: []byte("foo1")},
- },
- })
- }()
-
- msgCh := make(chan pgproto3.BackendMessage, 10)
- go func() {
- fi := interceptor.NewFrontendConn(client)
- for {
- msg, err := fi.ReadMsg()
- if err != nil {
- return
- }
- msgCh <- msg
- }
- }()
-
- err := waitForSmallRowDescription(
- ctx,
- interceptor.NewFrontendConn(serverProxy),
- clientProxy,
- func(m *pgproto3.RowDescription) bool {
- return len(m.Fields) == 1 && string(m.Fields[0].Name) == "foo1"
- },
- )
- require.Nil(t, err)
-
- // Verify that forwarding was correct.
- expectMsg(t, msgCh, `"Type":"BackendKeyData".*"ProcessID":42`)
- expectMsg(t, msgCh, `"Type":"RowDescription".*"Name":"foo1"`)
- expectMsg(t, msgCh, `"Type":"RowDescription".*"Name":"foo2".*"Name":"foo3"`)
- })
-}
-
-func TestExpectDataRow(t *testing.T) {
- defer leaktest.AfterTest(t)()
- ctx := context.Background()
-
- falseValidateFn := func(m *pgproto3.DataRow) bool { return false }
-
- t.Run("context_cancelled", func(t *testing.T) {
- tCtx, cancel := context.WithCancel(ctx)
- cancel()
-
- err := expectDataRow(tCtx, nil, falseValidateFn)
- require.EqualError(t, err, context.Canceled.Error())
- })
-
- t.Run("read_error", func(t *testing.T) {
- r, w := net.Pipe()
- r.Close()
- w.Close()
-
- err := expectDataRow(ctx, interceptor.NewFrontendConn(r), falseValidateFn)
- require.Regexp(t, "reading message", err)
- })
-
- t.Run("type_mismatch", func(t *testing.T) {
- r, w := net.Pipe()
- defer r.Close()
- defer w.Close()
-
- go func() {
- writeServerMsg(w, &pgproto3.ReadyForQuery{})
- }()
-
- err := expectDataRow(ctx, interceptor.NewFrontendConn(r), falseValidateFn)
- require.Regexp(t, "unexpected message.*ReadyForQuery", err)
- })
-
- t.Run("validation_failed", func(t *testing.T) {
- r, w := net.Pipe()
- defer r.Close()
- defer w.Close()
-
- go func() {
- writeServerMsg(w, &pgproto3.DataRow{})
- }()
-
- err := expectDataRow(ctx, interceptor.NewFrontendConn(r), falseValidateFn)
- require.Regexp(t, "validation failed for message.*DataRow", err)
- })
-
- t.Run("successful", func(t *testing.T) {
- r, w := net.Pipe()
- defer r.Close()
- defer w.Close()
-
- go func() {
- writeServerMsg(w, &pgproto3.DataRow{Values: [][]byte{[]byte("foo")}})
- }()
-
- err := expectDataRow(
- ctx,
- interceptor.NewFrontendConn(r),
- func(m *pgproto3.DataRow) bool {
- return len(m.Values) == 1 && string(m.Values[0]) == "foo"
- },
- )
- require.Nil(t, err)
- })
-}
-
-func TestExpectCommandComplete(t *testing.T) {
- defer leaktest.AfterTest(t)()
- ctx := context.Background()
-
- t.Run("context_cancelled", func(t *testing.T) {
- tCtx, cancel := context.WithCancel(ctx)
- cancel()
-
- err := expectCommandComplete(tCtx, nil, "")
- require.EqualError(t, err, context.Canceled.Error())
- })
-
- t.Run("read_error", func(t *testing.T) {
- r, w := net.Pipe()
- r.Close()
- w.Close()
-
- err := expectCommandComplete(ctx, interceptor.NewFrontendConn(r), "")
- require.Regexp(t, "reading message", err)
- })
-
- t.Run("type_mismatch", func(t *testing.T) {
- r, w := net.Pipe()
- defer r.Close()
- defer w.Close()
-
- go func() {
- writeServerMsg(w, &pgproto3.ReadyForQuery{})
- }()
-
- err := expectCommandComplete(ctx, interceptor.NewFrontendConn(r), "")
- require.Regexp(t, "unexpected message.*ReadyForQuery", err)
- })
-
- t.Run("tag_mismatch", func(t *testing.T) {
- r, w := net.Pipe()
- defer r.Close()
- defer w.Close()
-
- go func() {
- writeServerMsg(w, &pgproto3.CommandComplete{CommandTag: []byte("foo")})
- }()
-
- err := expectCommandComplete(ctx, interceptor.NewFrontendConn(r), "bar")
- require.Regexp(t, "unexpected message.*CommandComplete.*CommandTag.*foo", err)
- })
-
- t.Run("successful", func(t *testing.T) {
- r, w := net.Pipe()
- defer r.Close()
- defer w.Close()
-
- go func() {
- writeServerMsg(w, &pgproto3.CommandComplete{CommandTag: []byte("SELECT 1")})
- }()
-
- err := expectCommandComplete(ctx, interceptor.NewFrontendConn(r), "SELECT 1")
- require.Nil(t, err)
- })
-}
-
-func TestExpectReadyForQuery(t *testing.T) {
- defer leaktest.AfterTest(t)()
- ctx := context.Background()
-
- t.Run("context_cancelled", func(t *testing.T) {
- tCtx, cancel := context.WithCancel(ctx)
- cancel()
-
- err := expectReadyForQuery(tCtx, nil)
- require.EqualError(t, err, context.Canceled.Error())
- })
-
- t.Run("read_error", func(t *testing.T) {
- r, w := net.Pipe()
- r.Close()
- w.Close()
-
- err := expectReadyForQuery(ctx, interceptor.NewFrontendConn(r))
- require.Regexp(t, "reading message", err)
- })
-
- t.Run("type_mismatch", func(t *testing.T) {
- r, w := net.Pipe()
- defer r.Close()
- defer w.Close()
-
- go func() {
- writeServerMsg(w, &pgproto3.ErrorResponse{})
- }()
-
- err := expectReadyForQuery(ctx, interceptor.NewFrontendConn(r))
- require.Regexp(t, "unexpected message.*ErrorResponse", err)
- })
-
- t.Run("successful", func(t *testing.T) {
- r, w := net.Pipe()
- defer r.Close()
- defer w.Close()
-
- go func() {
- writeServerMsg(w, &pgproto3.ReadyForQuery{TxStatus: 'I'})
- }()
-
- err := expectReadyForQuery(ctx, interceptor.NewFrontendConn(r))
- require.Nil(t, err)
- })
-}
-
-func writeServerMsg(w io.Writer, msg pgproto3.BackendMessage) {
- _, _ = w.Write(msg.Encode(nil))
-}
-
-func expectMsg(t *testing.T, msgCh <-chan pgproto3.BackendMessage, match string) {
- t.Helper()
- msg := <-msgCh
- require.Regexp(t, match, jsonOrRaw(msg))
-}
diff --git a/pkg/ccl/sqlproxyccl/forwarder.go b/pkg/ccl/sqlproxyccl/forwarder.go
index 67040cef88..9810ef695a 100644
--- a/pkg/ccl/sqlproxyccl/forwarder.go
+++ b/pkg/ccl/sqlproxyccl/forwarder.go
@@ -31,23 +31,31 @@ type forwarder struct {
ctx context.Context
ctxCancel context.CancelFunc
- // clientConn and serverConn provide a convenient way to read and forward
- // Postgres messages, while minimizing IO reads and memory allocations.
- //
- // clientConn is set once during initialization, and stays the same
- // throughout the lifetime of the forwarder.
- //
// serverConn is only set after the authentication phase for the initial
// connection. In the context of a connection migration, serverConn is only
- // replaced once the session has successfully been deserialized, and the old
- // connection will be closed. Whenever serverConn gets updated, both
- // clientMessageTypeSent and isServerMsgReadyReceived fields have to reset
- // to their initial values.
+ // replaced once the session has successfully been deserialized, and the
+ // old connection will be closed.
//
// All reads from these connections must go through the interceptors. It is
- // not safe to call Read directly as the interceptors may have buffered data.
- clientConn *interceptor.BackendConn // client <-> proxy
- serverConn *interceptor.FrontendConn // proxy <-> server
+ // not safe to read from these directly as the interceptors may have
+ // buffered data.
+ clientConn net.Conn // client <-> proxy
+ serverConn net.Conn // proxy <-> server
+
+ // clientInterceptor and serverInterceptor provides a convenient way to
+ // read and forward Postgres messages, while minimizing IO reads and memory
+ // allocations.
+ //
+ // These interceptors have to match clientConn and serverConn. See comment
+ // above on when those fields will be updated.
+ //
+ // TODO(jaylim-crl): Add updater functions that sets both conn and
+ // interceptor fields at the same time. At the moment, there's no use case
+ // besides the forward function. When connection migration happens, we
+ // will need to create a new serverInterceptor. We should remember to close
+ // old serverConn as well.
+ clientInterceptor *interceptor.BackendInterceptor // clientConn -> serverConn
+ serverInterceptor *interceptor.FrontendInterceptor // serverConn -> clientConn
// errChan is a buffered channel that contains the first forwarder error.
// This channel may receive nil errors.
@@ -82,8 +90,9 @@ func forward(ctx context.Context, clientConn, serverConn net.Conn) *forwarder {
// TODO(jaylim-crl): Check for transfer state here.
return nil
})
- f.clientConn = interceptor.NewBackendConn(clientConn)
- f.serverConn = interceptor.NewFrontendConn(serverConn)
+
+ f.setClientConn(clientConn)
+ f.setServerConn(serverConn)
// Start request (client to server) and response (server to client)
// processors. We will copy all pgwire messages/ from client to server
@@ -128,7 +137,7 @@ func (f *forwarder) Close() {
// gets triggered when context is cancelled.
func (f *forwarder) handleClientToServer() error {
for f.ctx.Err() == nil {
- if _, err := f.clientConn.ForwardMsg(f.serverConn); err != nil {
+ if _, err := f.clientInterceptor.ForwardMsg(f.serverConn); err != nil {
return err
}
}
@@ -141,13 +150,31 @@ func (f *forwarder) handleClientToServer() error {
// Read, we will unblock that by closing serverConn through f.Close().
func (f *forwarder) handleServerToClient() error {
for f.ctx.Err() == nil {
- if _, err := f.serverConn.ForwardMsg(f.clientConn); err != nil {
+ if _, err := f.serverInterceptor.ForwardMsg(f.clientConn); err != nil {
return err
}
}
return f.ctx.Err()
}
+// setClientConn is a convenient helper to update clientConn, and will also
+// create a matching interceptor for the given connection. It is the caller's
+// responsibility to close the old connection before calling this, or there
+// may be a leak.
+func (f *forwarder) setClientConn(clientConn net.Conn) {
+ f.clientConn = clientConn
+ f.clientInterceptor = interceptor.NewBackendInterceptor(f.clientConn)
+}
+
+// setServerConn is a convenient helper to update serverConn, and will also
+// create a matching interceptor for the given connection. It is the caller's
+// responsibility to close the old connection before calling this, or there
+// may be a leak.
+func (f *forwarder) setServerConn(serverConn net.Conn) {
+ f.serverConn = serverConn
+ f.serverInterceptor = interceptor.NewFrontendInterceptor(f.serverConn)
+}
+
// wrapClientToServerError overrides client to server errors for external
// consumption.
//
diff --git a/pkg/ccl/sqlproxyccl/forwarder_test.go b/pkg/ccl/sqlproxyccl/forwarder_test.go
index 0b398d75d1..0acdf12e5c 100644
--- a/pkg/ccl/sqlproxyccl/forwarder_test.go
+++ b/pkg/ccl/sqlproxyccl/forwarder_test.go
@@ -9,6 +9,7 @@
package sqlproxyccl
import (
+ "bytes"
"context"
"net"
"testing"
@@ -187,6 +188,62 @@ func TestForwarder_Close(t *testing.T) {
require.EqualError(t, f.ctx.Err(), context.Canceled.Error())
}
+func TestForwarder_setClientConn(t *testing.T) {
+ defer leaktest.AfterTest(t)()
+ f := &forwarder{serverConn: nil, serverInterceptor: nil}
+
+ w, r := net.Pipe()
+ defer w.Close()
+ defer r.Close()
+
+ f.setClientConn(r)
+ require.Equal(t, r, f.clientConn)
+
+ dst := new(bytes.Buffer)
+ errChan := make(chan error, 1)
+ go func() {
+ _, err := f.clientInterceptor.ForwardMsg(dst)
+ errChan <- err
+ }()
+
+ _, err := w.Write((&pgproto3.Query{String: "SELECT 1"}).Encode(nil))
+ require.NoError(t, err)
+
+ // Block until message has been forwarded. This checks that we are creating
+ // our interceptor properly.
+ err = <-errChan
+ require.NoError(t, err)
+ require.Equal(t, 14, dst.Len())
+}
+
+func TestForwarder_setServerConn(t *testing.T) {
+ defer leaktest.AfterTest(t)()
+ f := &forwarder{serverConn: nil, serverInterceptor: nil}
+
+ w, r := net.Pipe()
+ defer w.Close()
+ defer r.Close()
+
+ f.setServerConn(r)
+ require.Equal(t, r, f.serverConn)
+
+ dst := new(bytes.Buffer)
+ errChan := make(chan error, 1)
+ go func() {
+ _, err := f.serverInterceptor.ForwardMsg(dst)
+ errChan <- err
+ }()
+
+ _, err := w.Write((&pgproto3.ReadyForQuery{TxStatus: 'I'}).Encode(nil))
+ require.NoError(t, err)
+
+ // Block until message has been forwarded. This checks that we are creating
+ // our interceptor properly.
+ err = <-errChan
+ require.NoError(t, err)
+ require.Equal(t, 6, dst.Len())
+}
+
func TestWrapClientToServerError(t *testing.T) {
defer leaktest.AfterTest(t)()
diff --git a/pkg/ccl/sqlproxyccl/interceptor/BUILD.bazel b/pkg/ccl/sqlproxyccl/interceptor/BUILD.bazel
index 2052d9670a..11771fdc56 100644
--- a/pkg/ccl/sqlproxyccl/interceptor/BUILD.bazel
+++ b/pkg/ccl/sqlproxyccl/interceptor/BUILD.bazel
@@ -3,10 +3,10 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
go_library(
name = "interceptor",
srcs = [
- "backend_conn.go",
+ "backend_interceptor.go",
"base.go",
"chunkreader.go",
- "frontend_conn.go",
+ "frontend_interceptor.go",
],
importpath = "github.com/cockroachdb/cockroach/pkg/ccl/sqlproxyccl/interceptor",
visibility = ["//visibility:public"],
@@ -21,10 +21,10 @@ go_library(
go_test(
name = "interceptor_test",
srcs = [
- "backend_conn_test.go",
+ "backend_interceptor_test.go",
"base_test.go",
"chunkreader_test.go",
- "frontend_conn_test.go",
+ "frontend_interceptor_test.go",
"interceptor_test.go",
],
embed = [":interceptor"],
diff --git a/pkg/ccl/sqlproxyccl/interceptor/backend_conn.go b/pkg/ccl/sqlproxyccl/interceptor/backend_interceptor.go
similarity index 60%
rename from pkg/ccl/sqlproxyccl/interceptor/backend_conn.go
rename to pkg/ccl/sqlproxyccl/interceptor/backend_interceptor.go
index 6f6977786a..0611b267a4 100644
--- a/pkg/ccl/sqlproxyccl/interceptor/backend_conn.go
+++ b/pkg/ccl/sqlproxyccl/interceptor/backend_interceptor.go
@@ -10,33 +10,26 @@ package interceptor
import (
"io"
- "net"
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgwirebase"
"github.com/jackc/pgproto3/v2"
)
-// BackendConn is a server interceptor for the Postgres backend protocol.
-// This will be used for the connection between client and proxy.
-type BackendConn struct {
- net.Conn
- interceptor *pgInterceptor
-}
+// BackendInterceptor is a server int/erceptor for the Postgres backend protocol.
+type BackendInterceptor pgInterceptor
-// NewBackendConn creates a BackendConn using the default buffer size of 8KB.
-func NewBackendConn(conn net.Conn) *BackendConn {
- return &BackendConn{
- Conn: conn,
- interceptor: newPgInterceptor(conn, defaultBufferSize),
- }
+// NewBackendInterceptor creates a BackendInterceptor using the default buffer
+// size of 8K bytes.
+func NewBackendInterceptor(src io.Reader) *BackendInterceptor {
+ return (*BackendInterceptor)(newPgInterceptor(src, defaultBufferSize))
}
// PeekMsg returns the header of the current pgwire message without advancing
// the interceptor.
//
// See pgInterceptor.PeekMsg for more information.
-func (c *BackendConn) PeekMsg() (typ pgwirebase.ClientMessageType, size int, err error) {
- byteType, size, err := c.interceptor.PeekMsg()
+func (bi *BackendInterceptor) PeekMsg() (typ pgwirebase.ClientMessageType, size int, err error) {
+ byteType, size, err := (*pgInterceptor)(bi).PeekMsg()
return pgwirebase.ClientMessageType(byteType), size, err
}
@@ -44,8 +37,8 @@ func (c *BackendConn) PeekMsg() (typ pgwirebase.ClientMessageType, size int, err
// This also advances the interceptor to the next message.
//
// See pgInterceptor.ReadMsg for more information.
-func (c *BackendConn) ReadMsg() (msg pgproto3.FrontendMessage, err error) {
- msgBytes, err := c.interceptor.ReadMsg()
+func (bi *BackendInterceptor) ReadMsg() (msg pgproto3.FrontendMessage, err error) {
+ msgBytes, err := (*pgInterceptor)(bi).ReadMsg()
if err != nil {
return nil, err
}
@@ -57,6 +50,6 @@ func (c *BackendConn) ReadMsg() (msg pgproto3.FrontendMessage, err error) {
// decoding, and advances the interceptor to the next message.
//
// See pgInterceptor.ForwardMsg for more information.
-func (c *BackendConn) ForwardMsg(dst io.Writer) (n int, err error) {
- return c.interceptor.ForwardMsg(dst)
+func (bi *BackendInterceptor) ForwardMsg(dst io.Writer) (n int, err error) {
+ return (*pgInterceptor)(bi).ForwardMsg(dst)
}
diff --git a/pkg/ccl/sqlproxyccl/interceptor/backend_conn_test.go b/pkg/ccl/sqlproxyccl/interceptor/backend_interceptor_test.go
similarity index 66%
rename from pkg/ccl/sqlproxyccl/interceptor/backend_conn_test.go
rename to pkg/ccl/sqlproxyccl/interceptor/backend_interceptor_test.go
index 2b4de17e8f..50fba567d0 100644
--- a/pkg/ccl/sqlproxyccl/interceptor/backend_conn_test.go
+++ b/pkg/ccl/sqlproxyccl/interceptor/backend_interceptor_test.go
@@ -10,8 +10,6 @@ package interceptor_test
import (
"bytes"
- "io"
- "net"
"testing"
"github.com/cockroachdb/cockroach/pkg/ccl/sqlproxyccl/interceptor"
@@ -21,44 +19,38 @@ import (
"github.com/stretchr/testify/require"
)
-// TestBackendConn tests the BackendConn. Note that the tests here are shallow.
-// For detailed ones, see the tests for the internal interceptor in base_test.go.
-func TestBackendConn(t *testing.T) {
+// TestBackendInterceptor tests the BackendInterceptor. Note that the tests
+// here are shallow. For detailed ones, see the tests for the internal
+// interceptor in base_test.go.
+func TestBackendInterceptor(t *testing.T) {
defer leaktest.AfterTest(t)()
q := (&pgproto3.Query{String: "SELECT 1"}).Encode(nil)
- writeAsync := func(t *testing.T, w io.Writer) <-chan error {
+ buildSrc := func(t *testing.T, count int) *bytes.Buffer {
t.Helper()
- errCh := make(chan error, 1)
- go func() {
- _, err := w.Write(q)
- errCh <- err
- }()
- return errCh
+ src := new(bytes.Buffer)
+ _, err := src.Write(q)
+ require.NoError(t, err)
+ return src
}
t.Run("PeekMsg returns the right message type", func(t *testing.T) {
- w, r := net.Pipe()
- errCh := writeAsync(t, w)
+ src := buildSrc(t, 1)
- bi := interceptor.NewBackendConn(r)
+ bi := interceptor.NewBackendInterceptor(src)
require.NotNil(t, bi)
typ, size, err := bi.PeekMsg()
require.NoError(t, err)
require.Equal(t, pgwirebase.ClientMsgSimpleQuery, typ)
require.Equal(t, 14, size)
-
- err = <-errCh
- require.Nil(t, err)
})
t.Run("ReadMsg decodes the message correctly", func(t *testing.T) {
- w, r := net.Pipe()
- errCh := writeAsync(t, w)
+ src := buildSrc(t, 1)
- bi := interceptor.NewBackendConn(r)
+ bi := interceptor.NewBackendInterceptor(src)
require.NotNil(t, bi)
msg, err := bi.ReadMsg()
@@ -66,25 +58,18 @@ func TestBackendConn(t *testing.T) {
rmsg, ok := msg.(*pgproto3.Query)
require.True(t, ok)
require.Equal(t, "SELECT 1", rmsg.String)
-
- err = <-errCh
- require.Nil(t, err)
})
t.Run("ForwardMsg forwards data to dst", func(t *testing.T) {
- w, r := net.Pipe()
- errCh := writeAsync(t, w)
+ src := buildSrc(t, 1)
dst := new(bytes.Buffer)
- bi := interceptor.NewBackendConn(r)
+ bi := interceptor.NewBackendInterceptor(src)
require.NotNil(t, bi)
n, err := bi.ForwardMsg(dst)
require.NoError(t, err)
require.Equal(t, 14, n)
require.Equal(t, 14, dst.Len())
-
- err = <-errCh
- require.Nil(t, err)
})
}
diff --git a/pkg/ccl/sqlproxyccl/interceptor/base.go b/pkg/ccl/sqlproxyccl/interceptor/base.go
index b9c113600f..c59945cb0b 100644
--- a/pkg/ccl/sqlproxyccl/interceptor/base.go
+++ b/pkg/ccl/sqlproxyccl/interceptor/base.go
@@ -26,7 +26,7 @@ const pgHeaderSizeBytes = 5
// chosen to match Postgres' send and receive buffer sizes.
//
// See: https://github.com/postgres/postgres/blob/249d64999615802752940e017ee5166e726bc7cd/src/backend/libpq/pqcomm.c#L134-L135.
-const defaultBufferSize = 1 << 13 // 8K
+const defaultBufferSize = 2 << 13 // 8K
// ErrProtocolError indicates that the packets are malformed, and are not as
// expected.
diff --git a/pkg/ccl/sqlproxyccl/interceptor/base_test.go b/pkg/ccl/sqlproxyccl/interceptor/base_test.go
index 28463e9af9..aca49e46f9 100644
--- a/pkg/ccl/sqlproxyccl/interceptor/base_test.go
+++ b/pkg/ccl/sqlproxyccl/interceptor/base_test.go
@@ -33,8 +33,8 @@ func TestNewPgInterceptor(t *testing.T) {
bufSize int
normalizedBufSize int
}{
- {-1, 8192},
- {pgHeaderSizeBytes - 1, 8192},
+ {-1, defaultBufferSize},
+ {pgHeaderSizeBytes - 1, defaultBufferSize},
{pgHeaderSizeBytes, pgHeaderSizeBytes},
{1024, 1024},
} {
diff --git a/pkg/ccl/sqlproxyccl/interceptor/frontend_conn.go b/pkg/ccl/sqlproxyccl/interceptor/frontend_interceptor.go
similarity index 60%
rename from pkg/ccl/sqlproxyccl/interceptor/frontend_conn.go
rename to pkg/ccl/sqlproxyccl/interceptor/frontend_interceptor.go
index 6967a1ab78..cd4a5ca9ca 100644
--- a/pkg/ccl/sqlproxyccl/interceptor/frontend_conn.go
+++ b/pkg/ccl/sqlproxyccl/interceptor/frontend_interceptor.go
@@ -10,33 +10,26 @@ package interceptor
import (
"io"
- "net"
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgwirebase"
"github.com/jackc/pgproto3/v2"
)
-// FrontendConn is a client interceptor for the Postgres frontend protocol.
-// This will be used for the connection between proxy and server.
-type FrontendConn struct {
- net.Conn
- interceptor *pgInterceptor
-}
+// FrontendInterceptor is a client interceptor for the Postgres frontend protocol.
+type FrontendInterceptor pgInterceptor
-// NewFrontendConn creates a FrontendConn using the default buffer size of 8KB.
-func NewFrontendConn(conn net.Conn) *FrontendConn {
- return &FrontendConn{
- Conn: conn,
- interceptor: newPgInterceptor(conn, defaultBufferSize),
- }
+// NewFrontendInterceptor creates a FrontendInterceptor using the default buffer
+// size of 8K bytes.
+func NewFrontendInterceptor(src io.Reader) *FrontendInterceptor {
+ return (*FrontendInterceptor)(newPgInterceptor(src, defaultBufferSize))
}
// PeekMsg returns the header of the current pgwire message without advancing
// the interceptor.
//
// See pgInterceptor.PeekMsg for more information.
-func (c *FrontendConn) PeekMsg() (typ pgwirebase.ServerMessageType, size int, err error) {
- byteType, size, err := c.interceptor.PeekMsg()
+func (fi *FrontendInterceptor) PeekMsg() (typ pgwirebase.ServerMessageType, size int, err error) {
+ byteType, size, err := (*pgInterceptor)(fi).PeekMsg()
return pgwirebase.ServerMessageType(byteType), size, err
}
@@ -44,8 +37,8 @@ func (c *FrontendConn) PeekMsg() (typ pgwirebase.ServerMessageType, size int, er
// This also advances the interceptor to the next message.
//
// See pgInterceptor.ReadMsg for more information.
-func (c *FrontendConn) ReadMsg() (msg pgproto3.BackendMessage, err error) {
- msgBytes, err := c.interceptor.ReadMsg()
+func (fi *FrontendInterceptor) ReadMsg() (msg pgproto3.BackendMessage, err error) {
+ msgBytes, err := (*pgInterceptor)(fi).ReadMsg()
if err != nil {
return nil, err
}
@@ -57,6 +50,6 @@ func (c *FrontendConn) ReadMsg() (msg pgproto3.BackendMessage, err error) {
// decoding, and advances the interceptor to the next message.
//
// See pgInterceptor.ForwardMsg for more information.
-func (c *FrontendConn) ForwardMsg(dst io.Writer) (n int, err error) {
- return c.interceptor.ForwardMsg(dst)
+func (fi *FrontendInterceptor) ForwardMsg(dst io.Writer) (n int, err error) {
+ return (*pgInterceptor)(fi).ForwardMsg(dst)
}
diff --git a/pkg/ccl/sqlproxyccl/interceptor/frontend_conn_test.go b/pkg/ccl/sqlproxyccl/interceptor/frontend_interceptor_test.go
similarity index 66%
rename from pkg/ccl/sqlproxyccl/interceptor/frontend_conn_test.go
rename to pkg/ccl/sqlproxyccl/interceptor/frontend_interceptor_test.go
index 730a4d2860..7e1b03f50d 100644
--- a/pkg/ccl/sqlproxyccl/interceptor/frontend_conn_test.go
+++ b/pkg/ccl/sqlproxyccl/interceptor/frontend_interceptor_test.go
@@ -10,8 +10,6 @@ package interceptor_test
import (
"bytes"
- "io"
- "net"
"testing"
"github.com/cockroachdb/cockroach/pkg/ccl/sqlproxyccl/interceptor"
@@ -21,44 +19,38 @@ import (
"github.com/stretchr/testify/require"
)
-// TestFrontendConn tests the FrontendConn. Note that the tests here are shallow.
-// For detailed ones, see the tests for the internal interceptor in base_test.go.
-func TestFrontendConn(t *testing.T) {
+// TestFrontendInterceptor tests the FrontendInterceptor. Note that the tests
+// here are shallow. For detailed ones, see the tests for the internal
+// interceptor in base_test.go.
+func TestFrontendInterceptor(t *testing.T) {
defer leaktest.AfterTest(t)()
q := (&pgproto3.ReadyForQuery{TxStatus: 'I'}).Encode(nil)
- writeAsync := func(t *testing.T, w io.Writer) <-chan error {
+ buildSrc := func(t *testing.T, count int) *bytes.Buffer {
t.Helper()
- errCh := make(chan error, 1)
- go func() {
- _, err := w.Write(q)
- errCh <- err
- }()
- return errCh
+ src := new(bytes.Buffer)
+ _, err := src.Write(q)
+ require.NoError(t, err)
+ return src
}
t.Run("PeekMsg returns the right message type", func(t *testing.T) {
- w, r := net.Pipe()
- errCh := writeAsync(t, w)
+ src := buildSrc(t, 1)
- fi := interceptor.NewFrontendConn(r)
+ fi := interceptor.NewFrontendInterceptor(src)
require.NotNil(t, fi)
typ, size, err := fi.PeekMsg()
require.NoError(t, err)
require.Equal(t, pgwirebase.ServerMsgReady, typ)
require.Equal(t, 6, size)
-
- err = <-errCh
- require.Nil(t, err)
})
t.Run("ReadMsg decodes the message correctly", func(t *testing.T) {
- w, r := net.Pipe()
- errCh := writeAsync(t, w)
+ src := buildSrc(t, 1)
- fi := interceptor.NewFrontendConn(r)
+ fi := interceptor.NewFrontendInterceptor(src)
require.NotNil(t, fi)
msg, err := fi.ReadMsg()
@@ -66,25 +58,18 @@ func TestFrontendConn(t *testing.T) {
rmsg, ok := msg.(*pgproto3.ReadyForQuery)
require.True(t, ok)
require.Equal(t, byte('I'), rmsg.TxStatus)
-
- err = <-errCh
- require.Nil(t, err)
})
t.Run("ForwardMsg forwards data to dst", func(t *testing.T) {
- w, r := net.Pipe()
- errCh := writeAsync(t, w)
+ src := buildSrc(t, 1)
dst := new(bytes.Buffer)
- fi := interceptor.NewFrontendConn(r)
+ fi := interceptor.NewFrontendInterceptor(src)
require.NotNil(t, fi)
n, err := fi.ForwardMsg(dst)
require.NoError(t, err)
require.Equal(t, 6, n)
require.Equal(t, 6, dst.Len())
-
- err = <-errCh
- require.Nil(t, err)
})
}
diff --git a/pkg/ccl/sqlproxyccl/interceptor/interceptor_test.go b/pkg/ccl/sqlproxyccl/interceptor/interceptor_test.go
index aa1534450d..bb91118dc9 100644
--- a/pkg/ccl/sqlproxyccl/interceptor/interceptor_test.go
+++ b/pkg/ccl/sqlproxyccl/interceptor/interceptor_test.go
@@ -9,8 +9,7 @@
package interceptor_test
import (
- "encoding/json"
- "net"
+ "bytes"
"testing"
"github.com/cockroachdb/cockroach/pkg/ccl/sqlproxyccl/interceptor"
@@ -20,100 +19,60 @@ import (
"github.com/stretchr/testify/require"
)
-// TestSimpleProxy illustrates how the frontend and backend connections can be
+// TestSimpleProxy illustrates how the frontend and backend interceptors can be
// used as a proxy.
func TestSimpleProxy(t *testing.T) {
defer leaktest.AfterTest(t)()
- t.Run("client to server", func(t *testing.T) {
- // These represents connections for client<->proxy and proxy<->server.
- clientProxy, client := net.Pipe()
- serverProxy, server := net.Pipe()
- defer clientProxy.Close()
- defer client.Close()
- defer serverProxy.Close()
- defer server.Close()
-
- // Create client and server interceptors.
- clientConn := interceptor.NewBackendConn(clientProxy)
- serverConn := interceptor.NewFrontendConn(serverProxy)
+ // These represents connections for client<->proxy and proxy<->server.
+ fromClient := new(bytes.Buffer)
+ toClient := new(bytes.Buffer)
+ fromServer := new(bytes.Buffer)
+ toServer := new(bytes.Buffer)
+
+ // Create client and server interceptors.
+ clientInt := interceptor.NewBackendInterceptor(fromClient)
+ serverInt := interceptor.NewFrontendInterceptor(fromServer)
+ t.Run("client to server", func(t *testing.T) {
// Client sends a list of SQL queries.
queries := []pgproto3.FrontendMessage{
&pgproto3.Query{String: "SELECT 1"},
- &pgproto3.Query{String: "SELECT 2 FROM foobar"},
+ &pgproto3.Query{String: "SELECT * FROM foo.bar"},
&pgproto3.Query{String: "UPDATE foo SET x = 42"},
&pgproto3.Sync{},
&pgproto3.Terminate{},
}
- errCh := make(chan error, len(queries))
- go func() {
- for _, msg := range queries {
- _, err := client.Write(msg.Encode(nil))
- errCh <- err
- }
- }()
- msgCh := make(chan pgproto3.FrontendMessage, 10)
- go func() {
- backend := interceptor.NewBackendConn(server)
- for {
- msg, err := backend.ReadMsg()
- if err != nil {
- return
- }
- msgCh <- msg
- }
- }()
+ for _, msg := range queries {
+ _, err := fromClient.Write(msg.Encode(nil))
+ require.NoError(t, err)
+ }
+ totalBytes := fromClient.Len()
customQuery := &pgproto3.Query{
String: "SELECT * FROM crdb_internal.serialize_session()"}
for {
- typ, _, err := clientConn.PeekMsg()
+ typ, _, err := clientInt.PeekMsg()
require.NoError(t, err)
// Forward message to server.
- _, err = clientConn.ForwardMsg(serverConn)
+ _, err = clientInt.ForwardMsg(toServer)
require.NoError(t, err)
if typ == pgwirebase.ClientMsgTerminate {
// Right before we terminate, we could also craft a custom
// message, and send it to the server.
- _, err := serverConn.Write(customQuery.Encode(nil))
+ _, err := toServer.Write(customQuery.Encode(nil))
require.NoError(t, err)
break
}
}
-
- expectedMsg := []string{
- `"Type":"Query","String":"SELECT 1"`,
- `"Type":"Query","String":"SELECT 2 FROM foobar"`,
- `"Type":"Query","String":"UPDATE foo SET x = 42"`,
- `"Type":"Sync"`,
- `"Type":"Terminate"`,
- `"Type":"Query","String":"SELECT \* FROM crdb_internal.serialize_session\(\)"`,
- }
- var m1 []byte
- for _, m2 := range expectedMsg {
- msg := <-msgCh
- m1, _ = json.Marshal(msg)
- require.Regexp(t, m2, string(m1))
- }
+ require.Equal(t, 0, fromClient.Len())
+ require.Equal(t, totalBytes+len(customQuery.Encode(nil)), toServer.Len())
})
t.Run("server to client", func(t *testing.T) {
- // These represents connections for client<->proxy and proxy<->server.
- clientProxy, client := net.Pipe()
- serverProxy, server := net.Pipe()
- defer clientProxy.Close()
- defer client.Close()
- defer serverProxy.Close()
- defer server.Close()
-
- // Create client and server interceptors.
- clientConn := interceptor.NewBackendConn(clientProxy)
- serverConn := interceptor.NewFrontendConn(serverProxy)
-
// Server sends back responses.
queries := []pgproto3.BackendMessage{
// Forward these back to the client.
@@ -124,27 +83,15 @@ func TestSimpleProxy(t *testing.T) {
// Terminator.
&pgproto3.ReadyForQuery{},
}
- errCh := make(chan error, len(queries))
- go func() {
- for _, msg := range queries {
- _, err := server.Write(msg.Encode(nil))
- errCh <- err
- }
- }()
- msgCh := make(chan pgproto3.BackendMessage, 10)
- go func() {
- frontend := interceptor.NewFrontendConn(client)
- for {
- msg, err := frontend.ReadMsg()
- if err != nil {
- return
- }
- msgCh <- msg
- }
- }()
+ for _, msg := range queries {
+ _, err := fromServer.Write(msg.Encode(nil))
+ require.NoError(t, err)
+ }
+ // Exclude bytes from second message.
+ totalBytes := fromServer.Len() - len(queries[2].Encode(nil))
for {
- typ, size, err := serverConn.PeekMsg()
+ typ, size, err := serverInt.PeekMsg()
require.NoError(t, err)
switch typ {
@@ -152,13 +99,13 @@ func TestSimpleProxy(t *testing.T) {
// Assuming that we're only interested in small messages, then
// we could skip all the large ones.
if size > 12 {
- _, err := serverConn.ForwardMsg(clientConn)
+ _, err := serverInt.ForwardMsg(toClient)
require.NoError(t, err)
continue
}
// Decode message.
- msg, err := serverConn.ReadMsg()
+ msg, err := serverInt.ReadMsg()
require.NoError(t, err)
// Once we've decoded the message, we could store the message
@@ -167,7 +114,7 @@ func TestSimpleProxy(t *testing.T) {
require.True(t, ok)
require.Equal(t, "short", string(dmsg.CommandTag))
case pgwirebase.ServerMsgBackendKeyData:
- msg, err := serverConn.ReadMsg()
+ msg, err := serverInt.ReadMsg()
require.NoError(t, err)
dmsg, ok := msg.(*pgproto3.BackendKeyData)
@@ -177,11 +124,11 @@ func TestSimpleProxy(t *testing.T) {
// the client.
dmsg.SecretKey = 100
- _, err = clientConn.Write(dmsg.Encode(nil))
+ _, err = toClient.Write(dmsg.Encode(nil))
require.NoError(t, err)
default:
// Forward message that we're not interested to the client.
- _, err := serverConn.ForwardMsg(clientConn)
+ _, err := serverInt.ForwardMsg(toClient)
require.NoError(t, err)
}
@@ -189,17 +136,7 @@ func TestSimpleProxy(t *testing.T) {
break
}
}
-
- expectedMsg := []string{
- `"Type":"CommandComplete","CommandTag":"averylongstring"`,
- `"Type":"BackendKeyData","ProcessID":100,"SecretKey":100`,
- `"Type":"ReadyForQuery"`,
- }
- var m1 []byte
- for _, m2 := range expectedMsg {
- msg := <-msgCh
- m1, _ = json.Marshal(msg)
- require.Regexp(t, m2, string(m1))
- }
+ require.Equal(t, 0, fromServer.Len())
+ require.Equal(t, totalBytes, toClient.Len())
})
}
diff --git a/pkg/ccl/sqlproxyccl/proxy_handler_test.go b/pkg/ccl/sqlproxyccl/proxy_handler_test.go
index a439d53211..3de0c858c8 100644
--- a/pkg/ccl/sqlproxyccl/proxy_handler_test.go
+++ b/pkg/ccl/sqlproxyccl/proxy_handler_test.go
@@ -609,9 +609,6 @@ func TestDenylistUpdate(t *testing.T) {
func TestDirectoryConnect(t *testing.T) {
defer leaktest.AfterTest(t)()
- // TODO(jaylim-crl): This is a potential port reuse issue, so skip this
- // under stress. See linked GitHub issue.
- skip.UnderStress(t, "https://github.com/cockroachdb/cockroach/issues/76839")
skip.UnderDeadlockWithIssue(t, 71365)
defer log.Scope(t).Close(t)
diff --git a/pkg/ccl/streamingccl/streamingest/stream_ingestion_planning.go b/pkg/ccl/streamingccl/streamingest/stream_ingestion_planning.go
index 1c738b5ff3..b84076bdfd 100644
--- a/pkg/ccl/streamingccl/streamingest/stream_ingestion_planning.go
+++ b/pkg/ccl/streamingccl/streamingest/stream_ingestion_planning.go
@@ -121,7 +121,7 @@ func ingestionPlanHook(
}
// We only support a TENANT target, so error out if that is nil.
- if !ingestionStmt.Targets.TenantID.IsSet() {
+ if ingestionStmt.Targets.Tenant == (roachpb.TenantID{}) {
return errors.Newf("no tenant specified in ingestion query: %s", ingestionStmt.String())
}
@@ -154,7 +154,7 @@ func ingestionPlanHook(
// TODO(adityamaru): Add privileges checks. Probably the same as RESTORE.
- prefix := keys.MakeTenantPrefix(ingestionStmt.Targets.TenantID.TenantID)
+ prefix := keys.MakeTenantPrefix(ingestionStmt.Targets.Tenant)
startTime := hlc.Timestamp{WallTime: timeutil.Now().UnixNano()}
if ingestionStmt.AsOf.Expr != nil {
asOf, err := p.EvalAsOfTimestamp(ctx, ingestionStmt.AsOf)
@@ -166,7 +166,7 @@ func ingestionPlanHook(
streamIngestionDetails := jobspb.StreamIngestionDetails{
StreamAddress: string(streamAddress),
- TenantID: ingestionStmt.Targets.TenantID.TenantID,
+ TenantID: ingestionStmt.Targets.Tenant,
Span: roachpb.Span{Key: prefix, EndKey: prefix.PrefixEnd()},
StartTime: startTime,
}
diff --git a/pkg/ccl/streamingccl/streamingest/stream_ingestion_processor.go b/pkg/ccl/streamingccl/streamingest/stream_ingestion_processor.go
index 2d75a64324..94839f51a6 100644
--- a/pkg/ccl/streamingccl/streamingest/stream_ingestion_processor.go
+++ b/pkg/ccl/streamingccl/streamingest/stream_ingestion_processor.go
@@ -36,7 +36,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/syncutil"
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
"github.com/cockroachdb/errors"
- "github.com/cockroachdb/logtags"
)
var minimumFlushInterval = settings.RegisterPublicDurationSettingWithExplicitUnit(
@@ -192,7 +191,6 @@ func newStreamIngestionDataProcessor(
// Start is part of the RowSource interface.
func (sip *streamIngestionProcessor) Start(ctx context.Context) {
- ctx = logtags.AddTag(ctx, "job", sip.spec.JobID)
log.Infof(ctx, "starting ingest proc")
ctx = sip.StartInternal(ctx, streamIngestionProcessorName)
@@ -201,7 +199,8 @@ func (sip *streamIngestionProcessor) Start(ctx context.Context) {
evalCtx := sip.FlowCtx.EvalCtx
db := sip.FlowCtx.Cfg.DB
var err error
- sip.batcher, err = bulk.MakeStreamSSTBatcher(ctx, db, evalCtx.Settings)
+ sip.batcher, err = bulk.MakeStreamSSTBatcher(ctx, db, evalCtx.Settings,
+ func() int64 { return bulk.IngestFileSize(evalCtx.Settings) })
if err != nil {
sip.MoveToDraining(errors.Wrap(err, "creating stream sst batcher"))
return
diff --git a/pkg/ccl/streamingccl/streamproducer/replication_stream_planning.go b/pkg/ccl/streamingccl/streamproducer/replication_stream_planning.go
index 82deb3119a..0d44081636 100644
--- a/pkg/ccl/streamingccl/streamproducer/replication_stream_planning.go
+++ b/pkg/ccl/streamingccl/streamproducer/replication_stream_planning.go
@@ -149,13 +149,13 @@ func doCreateReplicationStream(
}
var spans []roachpb.Span
- if !eval.Targets.TenantID.IsSet() {
+ if eval.Targets.Tenant == (roachpb.TenantID{}) {
// TODO(yevgeniy): Only tenant streaming supported now; Support granular streaming.
return pgerror.New(pgcode.FeatureNotSupported, "granular replication streaming not supported")
}
telemetry.Count(`replication.create.tenant`)
- prefix := keys.MakeTenantPrefix(roachpb.MakeTenantID(eval.Targets.TenantID.ToUint64()))
+ prefix := keys.MakeTenantPrefix(roachpb.MakeTenantID(eval.Targets.Tenant.ToUint64()))
spans = append(spans, roachpb.Span{
Key: prefix,
EndKey: prefix.PrefixEnd(),
diff --git a/pkg/ccl/telemetryccl/BUILD.bazel b/pkg/ccl/telemetryccl/BUILD.bazel
index 4ef6c408db..9c1ba639f4 100644
--- a/pkg/ccl/telemetryccl/BUILD.bazel
+++ b/pkg/ccl/telemetryccl/BUILD.bazel
@@ -7,7 +7,6 @@ go_test(
"telemetry_test.go",
],
data = glob(["testdata/**"]),
- shard_count = 16,
deps = [
"//pkg/base",
"//pkg/ccl",
diff --git a/pkg/ccl/testccl/sqlccl/BUILD.bazel b/pkg/ccl/testccl/sqlccl/BUILD.bazel
index 5f7497c399..a75377363c 100644
--- a/pkg/ccl/testccl/sqlccl/BUILD.bazel
+++ b/pkg/ccl/testccl/sqlccl/BUILD.bazel
@@ -8,23 +8,15 @@ go_test(
"session_revival_test.go",
"show_transfer_state_test.go",
"temp_table_clean_test.go",
- "tenant_gc_test.go",
],
- shard_count = 16,
deps = [
"//pkg/base",
"//pkg/ccl/kvccl/kvtenantccl",
- "//pkg/jobs/jobspb",
- "//pkg/keys",
- "//pkg/roachpb",
"//pkg/security",
"//pkg/security/securitytest",
"//pkg/server",
"//pkg/settings/cluster",
- "//pkg/spanconfig",
"//pkg/sql",
- "//pkg/sql/catalog/descpb",
- "//pkg/sql/gcjob",
"//pkg/sql/sessiondatapb",
"//pkg/sql/sqlliveness/slinstance",
"//pkg/sql/sqltestutils",
diff --git a/pkg/ccl/testccl/sqlccl/session_revival_test.go b/pkg/ccl/testccl/sqlccl/session_revival_test.go
index 97861122ea..83f9a36cab 100644
--- a/pkg/ccl/testccl/sqlccl/session_revival_test.go
+++ b/pkg/ccl/testccl/sqlccl/session_revival_test.go
@@ -43,9 +43,7 @@ func TestAuthenticateWithSessionRevivalToken(t *testing.T) {
_, err := tenantDB.Exec("CREATE USER testuser WITH PASSWORD 'hunter2'")
require.NoError(t, err)
- // TODO(rafi): use ALTER TENANT ALL when available.
- _, err = mainDB.Exec(`INSERT INTO system.tenant_settings (tenant_id, name, value, value_type) VALUES
- (0, 'server.user_login.session_revival_token.enabled', 'true', 'b')`)
+ _, err = tenantDB.Exec("SET CLUSTER SETTING server.user_login.session_revival_token.enabled = true")
require.NoError(t, err)
var token string
diff --git a/pkg/ccl/testccl/sqlccl/show_transfer_state_test.go b/pkg/ccl/testccl/sqlccl/show_transfer_state_test.go
index 7946c7e96d..815951ad99 100644
--- a/pkg/ccl/testccl/sqlccl/show_transfer_state_test.go
+++ b/pkg/ccl/testccl/sqlccl/show_transfer_state_test.go
@@ -28,17 +28,15 @@ func TestShowTransferState(t *testing.T) {
ctx := context.Background()
params, _ := tests.CreateTestServerParams()
- s, mainDB, _ := serverutils.StartServer(t, params)
+ s, _, _ := serverutils.StartServer(t, params)
defer s.Stopper().Stop(ctx)
- tenant, tenantDB := serverutils.StartTenant(t, s, tests.CreateTestTenantParams(serverutils.TestTenantID()))
+ tenant, mainDB := serverutils.StartTenant(t, s, tests.CreateTestTenantParams(serverutils.TestTenantID()))
defer tenant.Stopper().Stop(ctx)
- defer tenantDB.Close()
+ defer mainDB.Close()
- _, err := tenantDB.Exec("CREATE USER testuser WITH PASSWORD 'hunter2'")
+ _, err := mainDB.Exec("CREATE USER testuser WITH PASSWORD 'hunter2'")
require.NoError(t, err)
- // TODO(rafi): use ALTER TENANT ALL when available.
- _, err = mainDB.Exec(`INSERT INTO system.tenant_settings (tenant_id, name, value, value_type) VALUES
- (0, 'server.user_login.session_revival_token.enabled', 'true', 'b')`)
+ _, err = mainDB.Exec("SET CLUSTER SETTING server.user_login.session_revival_token.enabled = true")
require.NoError(t, err)
t.Run("without_transfer_key", func(t *testing.T) {
@@ -172,7 +170,7 @@ func TestShowTransferState(t *testing.T) {
t.Run("root_user", func(t *testing.T) {
var key string
var errVal, sessionState, sessionRevivalToken gosql.NullString
- err := tenantDB.QueryRow(`SHOW TRANSFER STATE WITH 'bar'`).Scan(&errVal, &sessionState, &sessionRevivalToken, &key)
+ err := mainDB.QueryRow(`SHOW TRANSFER STATE WITH 'bar'`).Scan(&errVal, &sessionState, &sessionRevivalToken, &key)
require.NoError(t, err)
require.True(t, errVal.Valid)
diff --git a/pkg/ccl/testccl/sqlccl/tenant_gc_test.go b/pkg/ccl/testccl/sqlccl/tenant_gc_test.go
deleted file mode 100644
index 18c474d6e5..0000000000
--- a/pkg/ccl/testccl/sqlccl/tenant_gc_test.go
+++ /dev/null
@@ -1,121 +0,0 @@
-// Copyright 2022 The Cockroach Authors.
-//
-// Licensed as a CockroachDB Enterprise file under the Cockroach Community
-// License (the "License"); you may not use this file except in compliance with
-// the License. You may obtain a copy of the License at
-//
-// https://github.com/cockroachdb/cockroach/blob/master/licenses/CCL.txt
-
-package sqlccl
-
-import (
- "context"
- "testing"
-
- "github.com/cockroachdb/cockroach/pkg/base"
- "github.com/cockroachdb/cockroach/pkg/jobs/jobspb"
- "github.com/cockroachdb/cockroach/pkg/keys"
- "github.com/cockroachdb/cockroach/pkg/roachpb"
- "github.com/cockroachdb/cockroach/pkg/spanconfig"
- "github.com/cockroachdb/cockroach/pkg/sql"
- "github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb"
- "github.com/cockroachdb/cockroach/pkg/sql/gcjob"
- "github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
- "github.com/cockroachdb/cockroach/pkg/util/leaktest"
- "github.com/cockroachdb/cockroach/pkg/util/log"
- "github.com/stretchr/testify/require"
-)
-
-// TestGCTenantRemovesSpanConfigs ensures that GC-ing a tenant removes all
-// span/system span configs installed by it.
-func TestGCTenantRemovesSpanConfigs(t *testing.T) {
- defer leaktest.AfterTest(t)()
- defer log.Scope(t).Close(t)
-
- ctx := context.Background()
- ts, _, _ := serverutils.StartServer(t, base.TestServerArgs{
- Knobs: base.TestingKnobs{
- SpanConfig: &spanconfig.TestingKnobs{
- // Disable the system tenant's reconciliation process so that we can
- // make assertions on the total number of span configurations in the
- // system.
- ManagerDisableJobCreation: true,
- },
- },
- })
- defer ts.Stopper().Stop(ctx)
- execCfg := ts.ExecutorConfig().(sql.ExecutorConfig)
- scKVAccessor := ts.SpanConfigKVAccessor().(spanconfig.KVAccessor)
-
- gcClosure := func(tenID uint64, progress *jobspb.SchemaChangeGCProgress) error {
- return gcjob.TestingGCTenant(ctx, &execCfg, tenID, progress)
- }
-
- tenantID := roachpb.MakeTenantID(10)
-
- tt, err := ts.StartTenant(ctx, base.TestTenantArgs{
- TenantID: tenantID,
- TestingKnobs: base.TestingKnobs{
- SpanConfig: &spanconfig.TestingKnobs{
- // Disable the tenant's span config reconciliation process, we'll
- // instead manually add system span configs via the KVAccessor.
- ManagerDisableJobCreation: true,
- },
- },
- })
- require.NoError(t, err)
-
- tenantKVAccessor := tt.SpanConfigKVAccessor().(spanconfig.KVAccessor)
- // Write a system span config, set by the tenant, targeting its entire
- // keyspace.
- systemTarget, err := spanconfig.MakeTenantKeyspaceTarget(tenantID, tenantID)
- require.NoError(t, err)
- err = tenantKVAccessor.UpdateSpanConfigRecords(ctx, nil /* toDelete */, []spanconfig.Record{
- {
- Target: spanconfig.MakeTargetFromSystemTarget(systemTarget),
- Config: roachpb.SpanConfig{}, // Doesn't matter
- },
- })
- require.NoError(t, err)
-
- // Ensure there are 2 configs for the tenant -- one that spans its entire
- // keyspace, installed on creation, and of course the system span config we
- // inserted above.
- tenPrefix := keys.MakeTenantPrefix(tenantID)
- records, err := tenantKVAccessor.GetSpanConfigRecords(ctx, spanconfig.Targets{
- spanconfig.MakeTargetFromSpan(roachpb.Span{Key: tenPrefix, EndKey: tenPrefix.PrefixEnd()}),
- spanconfig.MakeTargetFromSystemTarget(systemTarget),
- })
- require.NoError(t, err)
- require.Equal(t, 2, len(records))
-
- // Get the entire span config state, from the system tenant's perspective,
- // which we'll use to compare against once the tenant is GC-ed.
- records, err = scKVAccessor.GetSpanConfigRecords(
- ctx, spanconfig.TestingEntireSpanConfigurationStateTargets(),
- )
- require.NoError(t, err)
- beforeDelete := len(records)
-
- // Mark the tenant as dropped by updating its record.
- require.NoError(t, sql.TestingUpdateTenantRecord(
- ctx, &execCfg, nil, /* txn */
- &descpb.TenantInfo{ID: tenantID.ToUint64(), State: descpb.TenantInfo_DROP},
- ))
-
- // Run GC on the tenant.
- progress := &jobspb.SchemaChangeGCProgress{
- Tenant: &jobspb.SchemaChangeGCProgress_TenantProgress{
- Status: jobspb.SchemaChangeGCProgress_DELETING,
- },
- }
- require.NoError(t, gcClosure(tenantID.ToUint64(), progress))
- require.Equal(t, jobspb.SchemaChangeGCProgress_DELETED, progress.Tenant.Status)
-
- // Ensure the tenant's span configs and system span configs have been deleted.
- records, err = scKVAccessor.GetSpanConfigRecords(
- ctx, spanconfig.TestingEntireSpanConfigurationStateTargets(),
- )
- require.NoError(t, err)
- require.Equal(t, len(records), beforeDelete-2)
-}
diff --git a/pkg/ccl/workloadccl/allccl/BUILD.bazel b/pkg/ccl/workloadccl/allccl/BUILD.bazel
index 7e9d849184..a2a29ed991 100644
--- a/pkg/ccl/workloadccl/allccl/BUILD.bazel
+++ b/pkg/ccl/workloadccl/allccl/BUILD.bazel
@@ -40,7 +40,6 @@ go_test(
"main_test.go",
],
embed = [":allccl"],
- shard_count = 16,
deps = [
"//pkg/base",
"//pkg/ccl",
diff --git a/pkg/cli/debug.go b/pkg/cli/debug.go
index 2797bd981c..9e9c74f52e 100644
--- a/pkg/cli/debug.go
+++ b/pkg/cli/debug.go
@@ -131,9 +131,8 @@ func parsePositiveDuration(arg string) (time.Duration, error) {
// OpenEngineOptions tunes the behavior of OpenEngine.
type OpenEngineOptions struct {
- ReadOnly bool
- MustExist bool
- DisableAutomaticCompactions bool
+ ReadOnly bool
+ MustExist bool
}
func (opts OpenEngineOptions) configOptions() []storage.ConfigOption {
@@ -144,22 +143,13 @@ func (opts OpenEngineOptions) configOptions() []storage.ConfigOption {
if opts.MustExist {
cfgOpts = append(cfgOpts, storage.MustExist)
}
- if opts.DisableAutomaticCompactions {
- cfgOpts = append(cfgOpts, storage.DisableAutomaticCompactions)
- }
return cfgOpts
}
-// OpenExistingStore opens the Pebble engine rooted at 'dir'. If 'readOnly' is
-// true, opens the store in read-only mode. If 'disableAutomaticCompactions' is
-// true, disables automatic/background compactions (only used for manual
-// compactions).
-func OpenExistingStore(
- dir string, stopper *stop.Stopper, readOnly, disableAutomaticCompactions bool,
-) (storage.Engine, error) {
- return OpenEngine(dir, stopper, OpenEngineOptions{
- ReadOnly: readOnly, MustExist: true, DisableAutomaticCompactions: disableAutomaticCompactions,
- })
+// OpenExistingStore opens the Pebble engine rooted at 'dir'.
+// If 'readOnly' is true, opens the store in read-only mode.
+func OpenExistingStore(dir string, stopper *stop.Stopper, readOnly bool) (storage.Engine, error) {
+ return OpenEngine(dir, stopper, OpenEngineOptions{ReadOnly: readOnly, MustExist: true})
}
// OpenEngine opens the engine at 'dir'. Depending on the supplied options,
@@ -247,7 +237,7 @@ func runDebugKeys(cmd *cobra.Command, args []string) error {
stopper := stop.NewStopper()
defer stopper.Stop(context.Background())
- db, err := OpenExistingStore(args[0], stopper, true /* readOnly */, false /* disableAutomaticCompactions */)
+ db, err := OpenExistingStore(args[0], stopper, true /* readOnly */)
if err != nil {
return err
}
@@ -420,7 +410,7 @@ func runDebugRangeData(cmd *cobra.Command, args []string) error {
stopper := stop.NewStopper()
defer stopper.Stop(context.Background())
- db, err := OpenExistingStore(args[0], stopper, true /* readOnly */, false /* disableAutomaticCompactions */)
+ db, err := OpenExistingStore(args[0], stopper, true /* readOnly */)
if err != nil {
return err
}
@@ -510,7 +500,7 @@ func runDebugRangeDescriptors(cmd *cobra.Command, args []string) error {
stopper := stop.NewStopper()
defer stopper.Stop(context.Background())
- db, err := OpenExistingStore(args[0], stopper, true /* readOnly */, false /* disableAutomaticCompactions */)
+ db, err := OpenExistingStore(args[0], stopper, true /* readOnly */)
if err != nil {
return err
}
@@ -638,7 +628,7 @@ func runDebugRaftLog(cmd *cobra.Command, args []string) error {
stopper := stop.NewStopper()
defer stopper.Stop(context.Background())
- db, err := OpenExistingStore(args[0], stopper, true /* readOnly */, false /* disableAutomaticCompactions */)
+ db, err := OpenExistingStore(args[0], stopper, true /* readOnly */)
if err != nil {
return err
}
@@ -708,7 +698,7 @@ func runDebugGCCmd(cmd *cobra.Command, args []string) error {
}
}
- db, err := OpenExistingStore(args[0], stopper, true /* readOnly */, false /* disableAutomaticCompactions */)
+ db, err := OpenExistingStore(args[0], stopper, true /* readOnly */)
if err != nil {
return err
}
@@ -806,7 +796,7 @@ func runDebugCompact(cmd *cobra.Command, args []string) error {
stopper := stop.NewStopper()
defer stopper.Stop(context.Background())
- db, err := OpenExistingStore(args[0], stopper, false /* readOnly */, true /* disableAutomaticCompactions */)
+ db, err := OpenExistingStore(args[0], stopper, false /* readOnly */)
if err != nil {
return err
}
@@ -1074,7 +1064,7 @@ func runDebugUnsafeRemoveDeadReplicas(cmd *cobra.Command, args []string) error {
stopper := stop.NewStopper()
defer stopper.Stop(context.Background())
- db, err := OpenExistingStore(args[0], stopper, false /* readOnly */, false /* disableAutomaticCompactions */)
+ db, err := OpenExistingStore(args[0], stopper, false /* readOnly */)
if err != nil {
return err
}
@@ -1436,7 +1426,7 @@ func runDebugIntentCount(cmd *cobra.Command, args []string) error {
ctx := context.Background()
defer stopper.Stop(ctx)
- db, err := OpenExistingStore(args[0], stopper, true /* readOnly */, false /* disableAutomaticCompactions */)
+ db, err := OpenExistingStore(args[0], stopper, true /* readOnly */)
if err != nil {
return err
}
diff --git a/pkg/cli/debug_check_store.go b/pkg/cli/debug_check_store.go
index 44136ff31f..240eb61246 100644
--- a/pkg/cli/debug_check_store.go
+++ b/pkg/cli/debug_check_store.go
@@ -146,7 +146,7 @@ func checkStoreRangeStats(
stopper := stop.NewStopper()
defer stopper.Stop(ctx)
- eng, err := OpenExistingStore(dir, stopper, true /* readOnly */, false /* disableAutomaticCompactions */)
+ eng, err := OpenExistingStore(dir, stopper, true /* readOnly */)
if err != nil {
return err
}
@@ -220,7 +220,7 @@ func checkStoreRaftState(
stopper := stop.NewStopper()
defer stopper.Stop(context.Background())
- db, err := OpenExistingStore(dir, stopper, true /* readOnly */, false /* disableAutomaticCompactions */)
+ db, err := OpenExistingStore(dir, stopper, true /* readOnly */)
if err != nil {
return err
}
diff --git a/pkg/cli/debug_recover_loss_of_quorum.go b/pkg/cli/debug_recover_loss_of_quorum.go
index ca45a07795..7080edb996 100644
--- a/pkg/cli/debug_recover_loss_of_quorum.go
+++ b/pkg/cli/debug_recover_loss_of_quorum.go
@@ -203,7 +203,7 @@ func runDebugDeadReplicaCollect(cmd *cobra.Command, args []string) error {
var stores []storage.Engine
for _, storeSpec := range debugRecoverCollectInfoOpts.Stores.Specs {
- db, err := OpenExistingStore(storeSpec.Path, stopper, true /* readOnly */, false /* disableAutomaticCompactions */)
+ db, err := OpenExistingStore(storeSpec.Path, stopper, true /* readOnly */)
if err != nil {
return errors.Wrapf(err, "failed to open store at path %q, ensure that store path is "+
"correct and that it is not used by another process", storeSpec.Path)
@@ -457,7 +457,7 @@ func runDebugExecuteRecoverPlan(cmd *cobra.Command, args []string) error {
var localNodeID roachpb.NodeID
batches := make(map[roachpb.StoreID]storage.Batch)
for _, storeSpec := range debugRecoverExecuteOpts.Stores.Specs {
- store, err := OpenExistingStore(storeSpec.Path, stopper, false /* readOnly */, false /* disableAutomaticCompactions */)
+ store, err := OpenExistingStore(storeSpec.Path, stopper, false /* readOnly */)
if err != nil {
return errors.Wrapf(err, "failed to open store at path %q. ensure that store path is "+
"correct and that it is not used by another process", storeSpec.Path)
diff --git a/pkg/cli/debug_recover_loss_of_quorum_test.go b/pkg/cli/debug_recover_loss_of_quorum_test.go
index d59314b6b6..863738a361 100644
--- a/pkg/cli/debug_recover_loss_of_quorum_test.go
+++ b/pkg/cli/debug_recover_loss_of_quorum_test.go
@@ -265,52 +265,3 @@ func createIntentOnRangeDescriptor(
t.Fatal(err)
}
}
-
-// TestJsonSerialization verifies that all fields serialized in JSON could be
-// read back. This specific test addresses issues where default naming scheme
-// may not work in combination with other tags correctly. e.g. repeated used
-// with omitempty seem to use camelcase unless explicitly specified.
-func TestJsonSerialization(t *testing.T) {
- defer leaktest.AfterTest(t)()
-
- rt := roachpb.VOTER_INCOMING
- nr := loqrecoverypb.NodeReplicaInfo{
- Replicas: []loqrecoverypb.ReplicaInfo{
- {
- NodeID: 1,
- StoreID: 2,
- Desc: roachpb.RangeDescriptor{
- RangeID: 3,
- StartKey: roachpb.RKey(keys.MetaMin),
- EndKey: roachpb.RKey(keys.MetaMax),
- InternalReplicas: []roachpb.ReplicaDescriptor{
- {
- NodeID: 1,
- StoreID: 2,
- ReplicaID: 3,
- Type: &rt,
- },
- },
- NextReplicaID: 4,
- Generation: 7,
- },
- RaftAppliedIndex: 13,
- RaftCommittedIndex: 19,
- RaftLogDescriptorChanges: []loqrecoverypb.DescriptorChangeInfo{
- {
- ChangeType: 1,
- Desc: &roachpb.RangeDescriptor{},
- OtherDesc: &roachpb.RangeDescriptor{},
- },
- },
- },
- },
- }
- jsonpb := protoutil.JSONPb{Indent: " "}
- data, err := jsonpb.Marshal(nr)
- require.NoError(t, err)
-
- var nrFromJSON loqrecoverypb.NodeReplicaInfo
- require.NoError(t, jsonpb.Unmarshal(data, &nrFromJSON))
- require.Equal(t, nr, nrFromJSON, "objects before and after serialization")
-}
diff --git a/pkg/cli/debug_test.go b/pkg/cli/debug_test.go
index 56dc90fb8c..b4e6473ae6 100644
--- a/pkg/cli/debug_test.go
+++ b/pkg/cli/debug_test.go
@@ -80,7 +80,7 @@ func TestOpenExistingStore(t *testing.T) {
},
} {
t.Run(fmt.Sprintf("dir=%s", test.dir), func(t *testing.T) {
- _, err := OpenExistingStore(test.dir, stopper, false /* readOnly */, false /* disableAutomaticCompactions */)
+ _, err := OpenExistingStore(test.dir, stopper, false /* readOnly */)
if !testutils.IsError(err, test.expErr) {
t.Errorf("wanted %s but got %v", test.expErr, err)
}
@@ -114,7 +114,7 @@ func TestOpenReadOnlyStore(t *testing.T) {
},
} {
t.Run(fmt.Sprintf("readOnly=%t", test.readOnly), func(t *testing.T) {
- db, err := OpenExistingStore(storePath, stopper, test.readOnly, false /* disableAutomaticCompactions */)
+ db, err := OpenExistingStore(storePath, stopper, test.readOnly)
if err != nil {
t.Fatal(err)
}
@@ -255,7 +255,7 @@ func TestRemoveDeadReplicas(t *testing.T) {
stopper := stop.NewStopper()
defer stopper.Stop(ctx)
- db, err := OpenExistingStore(storePaths[idx], stopper, false /* readOnly */, false /* disableAutomaticCompactions */)
+ db, err := OpenExistingStore(storePaths[idx], stopper, false /* readOnly */)
if err != nil {
return err
}
diff --git a/pkg/cli/democluster/BUILD.bazel b/pkg/cli/democluster/BUILD.bazel
index d4fe734e33..6877ca2cac 100644
--- a/pkg/cli/democluster/BUILD.bazel
+++ b/pkg/cli/democluster/BUILD.bazel
@@ -54,7 +54,6 @@ go_test(
name = "democluster_test",
srcs = ["demo_cluster_test.go"],
embed = [":democluster"],
- shard_count = 16,
deps = [
"//pkg/base",
"//pkg/ccl/kvccl/kvtenantccl",
diff --git a/pkg/cli/mt_proxy.go b/pkg/cli/mt_proxy.go
index dff7bbf49a..7db9adbcb5 100644
--- a/pkg/cli/mt_proxy.go
+++ b/pkg/cli/mt_proxy.go
@@ -23,7 +23,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/log/severity"
"github.com/cockroachdb/cockroach/pkg/util/stop"
"github.com/cockroachdb/errors"
- "github.com/cockroachdb/redact"
"github.com/spf13/cobra"
)
@@ -148,7 +147,7 @@ func waitForSignals(
}
log.Ops.Shoutf(ctx, severity.ERROR,
- "received signal '%s' during shutdown, initiating hard shutdown", redact.Safe(sig))
+ "received signal '%s' during shutdown, initiating hard shutdown", log.Safe(sig))
panic("terminate")
case <-stopper.IsStopped():
const msgDone = "server shutdown completed"
diff --git a/pkg/cli/node.go b/pkg/cli/node.go
index 421142b22f..0b8f72f62e 100644
--- a/pkg/cli/node.go
+++ b/pkg/cli/node.go
@@ -29,7 +29,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/retry"
"github.com/cockroachdb/errors"
- "github.com/cockroachdb/redact"
"github.com/spf13/cobra"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
@@ -399,7 +398,7 @@ func handleNodeDecommissionSelf(
cliflags.NodeDecommissionSelf.Name)
}
- log.Infof(ctx, "%s node %d", redact.Safe(command), localNodeID)
+ log.Infof(ctx, "%s node %d", log.Safe(command), localNodeID)
return []roachpb.NodeID{localNodeID}, nil
}
diff --git a/pkg/cli/start.go b/pkg/cli/start.go
index 0769d47507..6b5e6a319c 100644
--- a/pkg/cli/start.go
+++ b/pkg/cli/start.go
@@ -888,7 +888,7 @@ func waitForShutdown(
// shutdown process.
log.Ops.Shoutf(shutdownCtx, severity.ERROR,
"received signal '%s' during shutdown, initiating hard shutdown%s",
- redact.Safe(sig), redact.Safe(hardShutdownHint))
+ log.Safe(sig), log.Safe(hardShutdownHint))
handleSignalDuringShutdown(sig)
panic("unreachable")
@@ -1187,7 +1187,7 @@ func setupAndInitializeLoggingAndProfiling(
"- %s\n"+
"- %s",
build.MakeIssueURL(53404),
- redact.Safe(docs.URL("secure-a-cluster.html")),
+ log.Safe(docs.URL("secure-a-cluster.html")),
)
}
@@ -1201,7 +1201,7 @@ func setupAndInitializeLoggingAndProfiling(
"For more information, see:\n\n" +
"- %s"
log.Shoutf(ctx, severity.WARNING, warningString,
- redact.Safe(docs.URL("cockroach-start.html#locality")))
+ log.Safe(docs.URL("cockroach-start.html#locality")))
}
}
diff --git a/pkg/cli/testdata/zip/partial1 b/pkg/cli/testdata/zip/partial1
index c9d9ba8891..0e45edfaea 100644
--- a/pkg/cli/testdata/zip/partial1
+++ b/pkg/cli/testdata/zip/partial1
@@ -39,12 +39,8 @@ debug zip --concurrency=1 --cpu-profile-duration=0s /dev/null
[cluster] retrieving SQL data for crdb_internal.invalid_objects... writing output: debug/crdb_internal.invalid_objects.txt... done
[cluster] retrieving SQL data for crdb_internal.index_usage_statistics... writing output: debug/crdb_internal.index_usage_statistics.txt... done
[cluster] retrieving SQL data for crdb_internal.table_indexes... writing output: debug/crdb_internal.table_indexes.txt... done
-[cluster] retrieving SQL data for crdb_internal.transaction_contention_events... writing output: debug/crdb_internal.transaction_contention_events.txt... done
[cluster] requesting nodes... received response... converting to JSON... writing binary output: debug/nodes.json... done
[cluster] requesting liveness... received response... converting to JSON... writing binary output: debug/liveness.json... done
-[cluster] requesting tenant ranges... received response...
-[cluster] requesting tenant ranges: last request failed: rpc error: ...
-[cluster] requesting tenant ranges: creating error output: debug/tenant_ranges.err.txt... done
[node 1] node status... converting to JSON... writing binary output: debug/nodes/1/status.json... done
[node 1] using SQL connection URL: postgresql://...
[node 1] retrieving SQL data for crdb_internal.feature_usage... writing output: debug/nodes/1/crdb_internal.feature_usage.txt... done
@@ -300,4 +296,3 @@ debug zip --concurrency=1 --cpu-profile-duration=0s /dev/null
[node 3] writing range 44... converting to JSON... writing binary output: debug/nodes/3/ranges/44.json... done
[cluster] pprof summary script... writing binary output: debug/pprof-summary.sh... done
[cluster] hot range summary script... writing binary output: debug/hot-ranges.sh... done
-[cluster] tenant hot range summary script... writing binary output: debug/hot-ranges-tenant.sh... done
diff --git a/pkg/cli/testdata/zip/partial1_excluded b/pkg/cli/testdata/zip/partial1_excluded
index 73d6fef32c..eefecf24a6 100644
--- a/pkg/cli/testdata/zip/partial1_excluded
+++ b/pkg/cli/testdata/zip/partial1_excluded
@@ -39,12 +39,8 @@ debug zip /dev/null --concurrency=1 --exclude-nodes=2 --cpu-profile-duration=0
[cluster] retrieving SQL data for crdb_internal.invalid_objects... writing output: debug/crdb_internal.invalid_objects.txt... done
[cluster] retrieving SQL data for crdb_internal.index_usage_statistics... writing output: debug/crdb_internal.index_usage_statistics.txt... done
[cluster] retrieving SQL data for crdb_internal.table_indexes... writing output: debug/crdb_internal.table_indexes.txt... done
-[cluster] retrieving SQL data for crdb_internal.transaction_contention_events... writing output: debug/crdb_internal.transaction_contention_events.txt... done
[cluster] requesting nodes... received response... converting to JSON... writing binary output: debug/nodes.json... done
[cluster] requesting liveness... received response... converting to JSON... writing binary output: debug/liveness.json... done
-[cluster] requesting tenant ranges... received response...
-[cluster] requesting tenant ranges: last request failed: rpc error: ...
-[cluster] requesting tenant ranges: creating error output: debug/tenant_ranges.err.txt... done
[node 1] node status... converting to JSON... writing binary output: debug/nodes/1/status.json... done
[node 1] using SQL connection URL: postgresql://...
[node 1] retrieving SQL data for crdb_internal.feature_usage... writing output: debug/nodes/1/crdb_internal.feature_usage.txt... done
@@ -212,4 +208,3 @@ debug zip /dev/null --concurrency=1 --exclude-nodes=2 --cpu-profile-duration=0
[node 3] writing range 44... converting to JSON... writing binary output: debug/nodes/3/ranges/44.json... done
[cluster] pprof summary script... writing binary output: debug/pprof-summary.sh... done
[cluster] hot range summary script... writing binary output: debug/hot-ranges.sh... done
-[cluster] tenant hot range summary script... writing binary output: debug/hot-ranges-tenant.sh... done
diff --git a/pkg/cli/testdata/zip/partial2 b/pkg/cli/testdata/zip/partial2
index 840176f0d3..8ecd45bb24 100644
--- a/pkg/cli/testdata/zip/partial2
+++ b/pkg/cli/testdata/zip/partial2
@@ -39,12 +39,8 @@ debug zip --concurrency=1 --cpu-profile-duration=0 /dev/null
[cluster] retrieving SQL data for crdb_internal.invalid_objects... writing output: debug/crdb_internal.invalid_objects.txt... done
[cluster] retrieving SQL data for crdb_internal.index_usage_statistics... writing output: debug/crdb_internal.index_usage_statistics.txt... done
[cluster] retrieving SQL data for crdb_internal.table_indexes... writing output: debug/crdb_internal.table_indexes.txt... done
-[cluster] retrieving SQL data for crdb_internal.transaction_contention_events... writing output: debug/crdb_internal.transaction_contention_events.txt... done
[cluster] requesting nodes... received response... converting to JSON... writing binary output: debug/nodes.json... done
[cluster] requesting liveness... received response... converting to JSON... writing binary output: debug/liveness.json... done
-[cluster] requesting tenant ranges... received response...
-[cluster] requesting tenant ranges: last request failed: rpc error: ...
-[cluster] requesting tenant ranges: creating error output: debug/tenant_ranges.err.txt... done
[node 1] node status... converting to JSON... writing binary output: debug/nodes/1/status.json... done
[node 1] using SQL connection URL: postgresql://...
[node 1] retrieving SQL data for crdb_internal.feature_usage... writing output: debug/nodes/1/crdb_internal.feature_usage.txt... done
@@ -211,4 +207,3 @@ debug zip --concurrency=1 --cpu-profile-duration=0 /dev/null
[node 3] writing range 44... converting to JSON... writing binary output: debug/nodes/3/ranges/44.json... done
[cluster] pprof summary script... writing binary output: debug/pprof-summary.sh... done
[cluster] hot range summary script... writing binary output: debug/hot-ranges.sh... done
-[cluster] tenant hot range summary script... writing binary output: debug/hot-ranges-tenant.sh... done
diff --git a/pkg/cli/testdata/zip/testzip b/pkg/cli/testdata/zip/testzip
index 3001f0fdaa..4692b0a03d 100644
--- a/pkg/cli/testdata/zip/testzip
+++ b/pkg/cli/testdata/zip/testzip
@@ -39,12 +39,8 @@ debug zip --concurrency=1 --cpu-profile-duration=1s /dev/null
[cluster] retrieving SQL data for crdb_internal.invalid_objects... writing output: debug/crdb_internal.invalid_objects.txt... done
[cluster] retrieving SQL data for crdb_internal.index_usage_statistics... writing output: debug/crdb_internal.index_usage_statistics.txt... done
[cluster] retrieving SQL data for crdb_internal.table_indexes... writing output: debug/crdb_internal.table_indexes.txt... done
-[cluster] retrieving SQL data for crdb_internal.transaction_contention_events... writing output: debug/crdb_internal.transaction_contention_events.txt... done
[cluster] requesting nodes... received response... converting to JSON... writing binary output: debug/nodes.json... done
[cluster] requesting liveness... received response... converting to JSON... writing binary output: debug/liveness.json... done
-[cluster] requesting tenant ranges... received response...
-[cluster] requesting tenant ranges: last request failed: rpc error: ...
-[cluster] requesting tenant ranges: creating error output: debug/tenant_ranges.err.txt... done
[cluster] requesting CPU profiles
[cluster] profiles generated
[cluster] profile for node 1... writing binary output: debug/nodes/1/cpu.pprof... done
@@ -129,4 +125,3 @@ debug zip --concurrency=1 --cpu-profile-duration=1s /dev/null
[node 1] writing range 44... converting to JSON... writing binary output: debug/nodes/1/ranges/44.json... done
[cluster] pprof summary script... writing binary output: debug/pprof-summary.sh... done
[cluster] hot range summary script... writing binary output: debug/hot-ranges.sh... done
-[cluster] tenant hot range summary script... writing binary output: debug/hot-ranges-tenant.sh... done
diff --git a/pkg/cli/testdata/zip/testzip_concurrent b/pkg/cli/testdata/zip/testzip_concurrent
index ca38f82986..c3254f4197 100644
--- a/pkg/cli/testdata/zip/testzip_concurrent
+++ b/pkg/cli/testdata/zip/testzip_concurrent
@@ -40,11 +40,6 @@ zip
[cluster] requesting nodes: done
[cluster] requesting nodes: received response...
[cluster] requesting nodes: writing binary output: debug/nodes.json...
-[cluster] requesting tenant ranges...
-[cluster] requesting tenant ranges: creating error output: debug/tenant_ranges.err.txt...
-[cluster] requesting tenant ranges: done
-[cluster] requesting tenant ranges: last request failed: rpc error: ...
-[cluster] requesting tenant ranges: received response...
[cluster] retrieving SQL data for "".crdb_internal.create_schema_statements...
[cluster] retrieving SQL data for "".crdb_internal.create_schema_statements: done
[cluster] retrieving SQL data for "".crdb_internal.create_schema_statements: writing output: debug/crdb_internal.create_schema_statements.txt...
@@ -108,9 +103,6 @@ zip
[cluster] retrieving SQL data for crdb_internal.table_indexes...
[cluster] retrieving SQL data for crdb_internal.table_indexes: done
[cluster] retrieving SQL data for crdb_internal.table_indexes: writing output: debug/crdb_internal.table_indexes.txt...
-[cluster] retrieving SQL data for crdb_internal.transaction_contention_events...
-[cluster] retrieving SQL data for crdb_internal.transaction_contention_events: done
-[cluster] retrieving SQL data for crdb_internal.transaction_contention_events: writing output: debug/crdb_internal.transaction_contention_events.txt...
[cluster] retrieving SQL data for crdb_internal.zones...
[cluster] retrieving SQL data for crdb_internal.zones: done
[cluster] retrieving SQL data for crdb_internal.zones: writing output: debug/crdb_internal.zones.txt...
@@ -140,9 +132,6 @@ zip
[cluster] retrieving SQL data for system.settings: writing output: debug/system.settings.txt...
[cluster] retrieving the node status to get the SQL address...
[cluster] retrieving the node status to get the SQL address: ...
-[cluster] tenant hot range summary script...
-[cluster] tenant hot range summary script: done
-[cluster] tenant hot range summary script: writing binary output: debug/hot-ranges-tenant.sh...
[cluster] using SQL address: ...
[cluster] using SQL address: ...
[cluster] using SQL address: ...
diff --git a/pkg/cli/testdata/zip/testzip_tenant b/pkg/cli/testdata/zip/testzip_tenant
index 573876d7b9..88143c02f3 100644
--- a/pkg/cli/testdata/zip/testzip_tenant
+++ b/pkg/cli/testdata/zip/testzip_tenant
@@ -53,14 +53,10 @@ debug zip --concurrency=1 --cpu-profile-duration=1s /dev/null
[cluster] retrieving SQL data for crdb_internal.invalid_objects... writing output: debug/crdb_internal.invalid_objects.txt... done
[cluster] retrieving SQL data for crdb_internal.index_usage_statistics... writing output: debug/crdb_internal.index_usage_statistics.txt... done
[cluster] retrieving SQL data for crdb_internal.table_indexes... writing output: debug/crdb_internal.table_indexes.txt... done
-[cluster] retrieving SQL data for crdb_internal.transaction_contention_events... writing output: debug/crdb_internal.transaction_contention_events.txt... done
[cluster] requesting nodes... received response... converting to JSON... writing binary output: debug/nodes.json... done
[cluster] requesting liveness... received response...
[cluster] requesting liveness: last request failed: rpc error: ...
[cluster] requesting liveness: creating error output: debug/liveness.json.err.txt... done
-[cluster] requesting tenant ranges... received response...
-[cluster] requesting tenant ranges: last request failed: rpc error: ...
-[cluster] requesting tenant ranges: creating error output: debug/tenant_ranges.err.txt... done
[cluster] requesting CPU profiles
[cluster] profiles generated
[cluster] profile for node 1... writing binary output: debug/nodes/1/cpu.pprof... done
@@ -115,4 +111,3 @@ debug zip --concurrency=1 --cpu-profile-duration=1s /dev/null
[node 1] requesting ranges: creating error output: debug/nodes/1/ranges.err.txt... done
[cluster] pprof summary script... writing binary output: debug/pprof-summary.sh... done
[cluster] hot range summary script... writing binary output: debug/hot-ranges.sh... done
-[cluster] tenant hot range summary script... writing binary output: debug/hot-ranges-tenant.sh... done
diff --git a/pkg/cli/zip.go b/pkg/cli/zip.go
index cbbb30e491..1952c8953b 100644
--- a/pkg/cli/zip.go
+++ b/pkg/cli/zip.go
@@ -276,7 +276,7 @@ find . -name cpu.pprof -print0 | xargs -0 go tool pprof -tags
}
}
- // A script to summarize the hottest ranges for a storage server's range reports.
+ // A script to summarize the hottest ranges.
{
s := zc.clusterPrinter.start("hot range summary script")
if err := z.createRaw(s, debugBase+"/hot-ranges.sh", []byte(`#!/bin/sh
@@ -286,15 +286,6 @@ find . -path './nodes/*/ranges/*.json' -print0 | xargs -0 grep per_second | sort
}
}
- // A script to summarize the hottest ranges for a tenant's range report.
- {
- s := zc.clusterPrinter.start("tenant hot range summary script")
- if err := z.createRaw(s, debugBase+"/hot-ranges-tenant.sh", []byte(`#!/bin/sh
-find . -path './tenant_ranges/*/*.json' -print0 | xargs -0 grep per_second | sort -rhk3 | head -n 20`)); err != nil {
- return err
- }
- }
-
return nil
}
diff --git a/pkg/cli/zip_cluster_wide.go b/pkg/cli/zip_cluster_wide.go
index 70e5eaf3ff..55ac022ebf 100644
--- a/pkg/cli/zip_cluster_wide.go
+++ b/pkg/cli/zip_cluster_wide.go
@@ -13,7 +13,6 @@ package cli
import (
"context"
"fmt"
- "sort"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/liveness/livenesspb"
"github.com/cockroachdb/cockroach/pkg/roachpb"
@@ -33,7 +32,6 @@ const (
schemaPrefix = debugBase + "/schema"
settingsName = debugBase + "/settings"
problemRangesName = reportsPrefix + "/problemranges"
- tenantRangesName = debugBase + "/tenant_ranges"
)
// makeClusterWideZipRequests defines the zipRequests that are to be
@@ -112,7 +110,6 @@ var debugZipTablesPerCluster = []string{
"crdb_internal.invalid_objects",
"crdb_internal.index_usage_statistics",
"crdb_internal.table_indexes",
- "crdb_internal.transaction_contention_events",
}
// nodesInfo holds node details pulled from a SQL or storage node.
@@ -192,41 +189,6 @@ func (zc *debugZipContext) collectClusterData(
livenessByNodeID = lresponse.Statuses
}
}
-
- {
- var tenantRanges *serverpb.TenantRangesResponse
- s := zc.clusterPrinter.start("requesting tenant ranges")
- if requestErr := zc.runZipFn(ctx, s, func(ctx context.Context) error {
- var err error
- tenantRanges, err = zc.status.TenantRanges(ctx, &serverpb.TenantRangesRequest{})
- return err
- }); requestErr != nil {
- if err := zc.z.createError(s, tenantRangesName, requestErr); err != nil {
- return nodesInfo{}, nil, errors.Wrap(err, "fetching tenant ranges")
- }
- } else {
- s.done()
- rangesFound := 0
- for locality, rangeList := range tenantRanges.RangesByLocality {
- rangesFound += len(rangeList.Ranges)
- sort.Slice(rangeList.Ranges, func(i, j int) bool {
- return rangeList.Ranges[i].RangeID > rangeList.Ranges[j].RangeID
- })
- sLocality := zc.clusterPrinter.start("writing tenant ranges for locality: %s", locality)
- prefix := fmt.Sprintf("%s/%s", tenantRangesName, locality)
- for _, r := range rangeList.Ranges {
- sRange := zc.clusterPrinter.start("writing tenant range %d", r.RangeID)
- name := fmt.Sprintf("%s/%d", prefix, r.RangeID)
- if err := zc.z.createJSON(sRange, name+".json", r); err != nil {
- return nodesInfo{}, nil, errors.Wrapf(err, "writing tenant range %d for locality %s", r.RangeID, locality)
- }
- }
- sLocality.done()
- }
- zc.clusterPrinter.info("%d tenant ranges found", rangesFound)
- }
- }
-
return ni, livenessByNodeID, nil
}
diff --git a/pkg/cli/zip_tenant_test.go b/pkg/cli/zip_tenant_test.go
index 099fff97a0..7e69de1f1a 100644
--- a/pkg/cli/zip_tenant_test.go
+++ b/pkg/cli/zip_tenant_test.go
@@ -46,8 +46,6 @@ func TestTenantZip(t *testing.T) {
StoreSpecs: []base.StoreSpec{{
Path: hostDir,
}},
- // TODO(abarganier): Switch to secure mode once underlying infra has been
- // updated to support it. See: https://github.com/cockroachdb/cockroach/issues/77173
Insecure: true,
TenantArgs: &tenantArgs,
})
diff --git a/pkg/clusterversion/cockroach_versions.go b/pkg/clusterversion/cockroach_versions.go
index 4bedf909ef..ebac3bddc2 100644
--- a/pkg/clusterversion/cockroach_versions.go
+++ b/pkg/clusterversion/cockroach_versions.go
@@ -305,12 +305,6 @@ const (
// stop overwriting the LATEST and checkpoint files during backup execution.
// Instead, it writes new files alongside the old in reserved subdirectories.
BackupDoesNotOverwriteLatestAndCheckpoint
- // EnableDeclarativeSchemaChanger is the version where new declarative schema changer
- // can be used to construct schema change plan node.
- EnableDeclarativeSchemaChanger
-
- // RowLevelTTL is the version where we allow row level TTL tables.
- RowLevelTTL
// *************************************************
// Step (1): Add new versions here.
@@ -506,14 +500,6 @@ var versionsSingleton = keyedVersions{
Key: BackupDoesNotOverwriteLatestAndCheckpoint,
Version: roachpb.Version{Major: 21, Minor: 2, Internal: 84},
},
- {
- Key: EnableDeclarativeSchemaChanger,
- Version: roachpb.Version{Major: 21, Minor: 2, Internal: 86},
- },
- {
- Key: RowLevelTTL,
- Version: roachpb.Version{Major: 21, Minor: 2, Internal: 88},
- },
// *************************************************
// Step (2): Add new versions here.
diff --git a/pkg/clusterversion/key_string.go b/pkg/clusterversion/key_string.go
index f7de307fb5..9bee7041fd 100644
Binary files a/pkg/clusterversion/key_string.go and b/pkg/clusterversion/key_string.go differ
diff --git a/pkg/cmd/bazci/watch.go b/pkg/cmd/bazci/watch.go
index d860be5874..ad6faceda6 100644
--- a/pkg/cmd/bazci/watch.go
+++ b/pkg/cmd/bazci/watch.go
@@ -198,12 +198,8 @@ func (w watcher) stageBinaryArtifacts() error {
// These targets don't have stable, predictable locations, so
// they have to be hardcoded.
var ext string
- libDir := "lib"
if usingCrossWindowsConfig() {
ext = "dll"
- // NB: the libs end up in the "bin" subdir of libgeos
- // on Windows.
- libDir = "bin"
} else if usingCrossDarwinConfig() {
ext = "dylib"
} else {
@@ -212,8 +208,8 @@ func (w watcher) stageBinaryArtifacts() error {
switch bin {
case "//c-deps:libgeos":
for _, relBinPath := range []string{
- fmt.Sprintf("c-deps/libgeos/%s/libgeos_c.%s", libDir, ext),
- fmt.Sprintf("c-deps/libgeos/%s/libgeos.%s", libDir, ext),
+ fmt.Sprintf("c-deps/libgeos/lib/libgeos_c.%s", ext),
+ fmt.Sprintf("c-deps/libgeos/lib/libgeos.%s", ext),
} {
err := w.maybeStageArtifact(w.info.binDir, relBinPath, 0644, finalizePhase, copyContentTo)
if err != nil {
diff --git a/pkg/cmd/compile-build/BUILD.bazel b/pkg/cmd/compile-build/BUILD.bazel
new file mode 100644
index 0000000000..e4c2887470
--- /dev/null
+++ b/pkg/cmd/compile-build/BUILD.bazel
@@ -0,0 +1,15 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library")
+
+go_library(
+ name = "compile-build_lib",
+ srcs = ["main.go"],
+ importpath = "github.com/cockroachdb/cockroach/pkg/cmd/compile-build",
+ visibility = ["//visibility:private"],
+ deps = ["//pkg/release"],
+)
+
+go_binary(
+ name = "compile-build",
+ embed = [":compile-build_lib"],
+ visibility = ["//visibility:public"],
+)
diff --git a/pkg/cmd/compile-build/main.go b/pkg/cmd/compile-build/main.go
new file mode 100644
index 0000000000..26e6e5bd82
--- /dev/null
+++ b/pkg/cmd/compile-build/main.go
@@ -0,0 +1,54 @@
+// Copyright 2020 The Cockroach Authors.
+//
+// Use of this software is governed by the Business Source License
+// included in the file licenses/BSL.txt.
+//
+// As of the Change Date specified in that file, in accordance with
+// the Business Source License, use of this software will be governed
+// by the Apache License, Version 2.0, included in the file
+// licenses/APL.txt.
+
+package main
+
+import (
+ "flag"
+ "go/build"
+ "log"
+
+ "github.com/cockroachdb/cockroach/pkg/release"
+)
+
+func main() {
+ pkg, err := build.Import("github.com/cockroachdb/cockroach", "", build.FindOnly)
+ if err != nil {
+ log.Fatalf("unable to locate CRDB directory: %s", err)
+ }
+
+ var compileAll = flag.Bool("all", false, "compile all supported builds (darwin, linux, windows)")
+ var buildType = flag.String("buildtype", "release", "compile with a different build type. Default: 'release'. Options: 'development', 'release'")
+ flag.Parse()
+
+ // We compile just the first supported target unless we explicitly told to
+ // cross compile.
+ targets := release.SupportedTargets[:1]
+ if *compileAll {
+ targets = release.SupportedTargets
+ }
+ opts := []release.MakeReleaseOption{
+ release.WithMakeReleaseOptionEnv("MKRELEASE_BUILDTYPE=" + *buildType),
+ }
+
+ for _, target := range targets {
+ if err := release.MakeRelease(
+ target,
+ pkg.Dir,
+ opts...,
+ ); err != nil {
+ log.Fatal(err)
+ }
+ }
+
+ if err := release.MakeWorkload(pkg.Dir); err != nil {
+ log.Fatal(err)
+ }
+}
diff --git a/pkg/cmd/dev/cache.go b/pkg/cmd/dev/cache.go
index ad81cddd23..1186538196 100644
--- a/pkg/cmd/dev/cache.go
+++ b/pkg/cmd/dev/cache.go
@@ -61,28 +61,26 @@ func (d *dev) cache(cmd *cobra.Command, _ []string) error {
if clean {
return d.cleanCache(ctx)
}
- if down {
- return d.tearDownCache(ctx)
- }
if reset {
// Errors here don't really mean much, we can just ignore them.
err := d.tearDownCache(ctx)
if err != nil {
log.Printf("%v\n", err)
}
- }
- bazelRcLine, err := d.setUpCache(ctx)
- if err != nil {
+ bazelRcLine, err := d.setUpCache(ctx)
+ if bazelRcLine != "" {
+ fmt.Printf("Please add `%s` to your ~/.bazelrc\n", bazelRcLine)
+ }
return err
}
- errStr, err := d.checkPresenceInBazelRc(bazelRcLine)
- if err != nil {
- return err
+ if down {
+ return d.tearDownCache(ctx)
}
- if errStr != "" {
- return fmt.Errorf("%s", errStr)
+ bazelRcLine, err := d.setUpCache(ctx)
+ if bazelRcLine != "" {
+ fmt.Printf("Please add `%s` to your ~/.bazelrc\n", bazelRcLine)
}
- return nil
+ return err
}
func bazelRemoteCacheDir() (string, error) {
@@ -136,10 +134,6 @@ func (d *dev) setUpCache(ctx context.Context) (string, error) {
log.Printf("Configuring cache...\n")
- err := d.exec.CommandContextInheritingStdStreams(ctx, "bazel", "build", bazelRemoteTarget)
- if err != nil {
- return "", err
- }
bazelRemoteLoc, err := d.exec.CommandContextSilent(ctx, "bazel", "run", bazelRemoteTarget, "--run_under=//build/bazelutil/whereis")
if err != nil {
return "", err
diff --git a/pkg/cmd/dev/doctor.go b/pkg/cmd/dev/doctor.go
index 12615ba210..e5c2504768 100644
--- a/pkg/cmd/dev/doctor.go
+++ b/pkg/cmd/dev/doctor.go
@@ -13,10 +13,10 @@ package main
import (
"context"
"errors"
- "fmt"
"io/ioutil"
"log"
"os"
+ osexec "os/exec"
"path/filepath"
"runtime"
"strconv"
@@ -79,7 +79,7 @@ func printStdoutAndErr(stdoutStr string, err error) {
if len(stdoutStr) > 0 {
log.Printf("stdout: %s", stdoutStr)
}
- var cmderr *exec.ExitError
+ var cmderr *osexec.ExitError
if errors.As(err, &cmderr) {
stderrStr := strings.TrimSpace(string(cmderr.Stderr))
if len(stderrStr) > 0 {
@@ -105,13 +105,9 @@ func makeDoctorCmd(runE func(cmd *cobra.Command, args []string) error) *cobra.Co
func (d *dev) doctor(cmd *cobra.Command, _ []string) error {
ctx := cmd.Context()
- failures := []string{}
+ success := true
noCache := mustGetFlagBool(cmd, noCacheFlag)
noCacheEnv := d.os.Getenv("DEV_NO_REMOTE_CACHE")
- workspace, err := d.getWorkspace(ctx)
- if err != nil {
- return err
- }
if noCacheEnv != "" {
noCache = true
}
@@ -121,15 +117,16 @@ func (d *dev) doctor(cmd *cobra.Command, _ []string) error {
if runtime.GOOS == "darwin" {
stdout, err := d.exec.CommandContextSilent(ctx, "/usr/bin/xcodebuild", "-version")
if err != nil {
- log.Println("Failed to run `/usr/bin/xcodebuild -version`.")
+ success = false
+ log.Printf("Failed to run `/usr/bin/xcodebuild -version`.")
stdoutStr := strings.TrimSpace(string(stdout))
printStdoutAndErr(stdoutStr, err)
- failures = append(failures, `You must have a full installation of XCode to build with Bazel.
+ log.Println(`You must have a full installation of XCode to build with Bazel.
A command-line tools instance does not suffice.
Please perform the following steps:
1. Install XCode from the App Store.
2. Launch Xcode.app at least once to perform one-time initialization of developer tools.
- 3. Run `+"`xcode-select -switch /Applications/Xcode.app/`.")
+ 3. Run ` + "`xcode-select -switch /Applications/Xcode.app/`.")
}
}
@@ -139,24 +136,25 @@ Please perform the following steps:
stdout, err := d.exec.CommandContextSilent(ctx, "cmake", "--version")
stdoutStr := strings.TrimSpace(string(stdout))
if err != nil {
- log.Println("Failed to run `cmake --version`.")
printStdoutAndErr(stdoutStr, err)
- failures = append(failures, "Failed to run `cmake --version`; do you have it installed?")
+ success = false
} else {
versionFields := strings.Split(strings.TrimPrefix(stdoutStr, "cmake version "), ".")
if len(versionFields) < 3 {
- failures = append(failures, fmt.Sprintf("malformed cmake version: %q", stdoutStr))
+ log.Printf("malformed cmake version: %q\n", stdoutStr)
+ success = false
} else {
major, majorErr := strconv.Atoi(versionFields[0])
minor, minorErr := strconv.Atoi(versionFields[1])
if majorErr != nil || minorErr != nil {
- failures = append(failures, fmt.Sprintf("malformed cmake version: %q", stdoutStr))
+ log.Printf("malformed cmake version: %q\n", stdoutStr)
+ success = false
} else if major < cmakeRequiredMajor || minor < cmakeRequiredMinor {
- msg := "cmake is too old, upgrade to 3.20.x+"
+ log.Printf("cmake is too old, upgrade to 3.20.x+\n")
if runtime.GOOS == "linux" {
- msg = msg + "\n\t If this is a gceworker you can use ./build/bootstrap/bootstrap-debian.sh to update all tools"
+ log.Printf("\t If this is a gceworker you can use ./build/bootstrap/bootstrap-debian.sh to update all tools\n")
}
- failures = append(failures, msg)
+ success = false
}
}
}
@@ -181,43 +179,41 @@ Please perform the following steps:
}
// Check whether the build is properly configured to use stamping.
- d.log.Println("doctor: running stamp test")
- failedStampTestMsg := ""
- stdout, err := d.exec.CommandContextSilent(ctx, "bazel", "build", "//build/bazelutil:test_stamping")
- if err != nil {
- failedStampTestMsg = "Failed to run `bazel build //build/bazelutil:test_stamping`"
- log.Println(failedStampTestMsg)
- printStdoutAndErr(string(stdout), err)
+ passedStampTest := true
+ if _, err := d.exec.CommandContextSilent(ctx, "bazel", "build", "//build/bazelutil:test_stamping"); err != nil {
+ passedStampTest = false
} else {
bazelBin, err := d.getBazelBin(ctx)
if err != nil {
return err
}
- testStampingTxt := filepath.Join(bazelBin, "build", "bazelutil", "test_stamping.txt")
- fileContents, err := d.os.ReadFile(testStampingTxt)
+ fileContents, err := d.os.ReadFile(
+ filepath.Join(bazelBin, "build", "bazelutil", "test_stamping.txt"))
if err != nil {
return err
}
if !strings.Contains(fileContents, "STABLE_BUILD_GIT_BUILD_TYPE") {
- failedStampTestMsg = fmt.Sprintf("Could not find STABLE_BUILD_GIT_TYPE in %s\n", testStampingTxt)
+ passedStampTest = false
}
}
- if failedStampTestMsg != "" {
- failedStampTestMsg = failedStampTestMsg + fmt.Sprintf(`
-This may be because your Bazel is not configured to "stamp" built executables.
-Make sure one of the following lines is in the file %s/.bazelrc.user:
-`, workspace)
+ workspace, err := d.getWorkspace(ctx)
+ if err != nil {
+ return err
+ }
+ if !passedStampTest {
+ success = false
+ log.Printf(`Your machine is not configured to "stamp" your built executables.
+Please add one of the following to your %s/.bazelrc.user:`, workspace)
if runtime.GOOS == "darwin" && runtime.GOARCH == "amd64" {
- failedStampTestMsg = failedStampTestMsg + " build --config=devdarwinx86_64"
+ log.Printf(" build --config=devdarwinx86_64")
} else if runtime.GOOS == "linux" && runtime.GOARCH == "amd64" {
- failedStampTestMsg = failedStampTestMsg + " build --config=dev\n"
- failedStampTestMsg = failedStampTestMsg + " OR \n"
- failedStampTestMsg = failedStampTestMsg + " build --config=crosslinux\n"
- failedStampTestMsg = failedStampTestMsg + "The former will use your host toolchain, while the latter will use the cross-compiler that we use in CI."
+ log.Printf(" build --config=dev")
+ log.Printf(" OR ")
+ log.Printf(" build --config=crosslinux")
+ log.Printf("The former will use your host toolchain, while the latter will use the cross-compiler that we use in CI.")
} else {
- failedStampTestMsg = failedStampTestMsg + " build --config=dev"
+ log.Printf(" build --config=dev")
}
- failures = append(failures, failedStampTestMsg)
}
if !noCache {
@@ -226,20 +222,19 @@ Make sure one of the following lines is in the file %s/.bazelrc.user:
if err != nil {
return err
}
- msg, err := d.checkPresenceInBazelRc(bazelRcLine)
+ homeDir, err := os.UserHomeDir()
if err != nil {
return err
}
- if msg != "" {
- failures = append(failures, msg)
+ bazelRcContents, err := d.os.ReadFile(filepath.Join(homeDir, ".bazelrc"))
+ if err != nil || !strings.Contains(bazelRcContents, bazelRcLine) {
+ log.Printf("Please add the string `%s` to your ~/.bazelrc:\n", bazelRcLine)
+ log.Printf(" echo \"%s\" >> ~/.bazelrc", bazelRcLine)
+ success = false
}
}
- if len(failures) > 0 {
- log.Printf("doctor: encountered %d errors", len(failures))
- for _, failure := range failures {
- log.Println(failure)
- }
+ if !success {
return errors.New("please address the errors described above and try again")
}
@@ -249,38 +244,3 @@ Make sure one of the following lines is in the file %s/.bazelrc.user:
log.Println("You are ready to build :)")
return nil
}
-
-// checkPresenceInBazelRc checks whether the given line is in ~/.bazelrc.
-// If it is, this function returns an empty string and a nil error.
-// If it isn't, this function returns a non-empty human-readable string describing
-// what the user should do to solve the issue and a nil error.
-// In other failure cases the function returns an empty string and a non-nil error.
-func (d *dev) checkPresenceInBazelRc(expectedBazelRcLine string) (string, error) {
- homeDir, err := os.UserHomeDir()
- if err != nil {
- return "", err
- }
- errString := fmt.Sprintf("Please add the string `%s` to your ~/.bazelrc:\n", expectedBazelRcLine)
- errString = errString + fmt.Sprintf(" echo \"%s\" >> ~/.bazelrc", expectedBazelRcLine)
-
- bazelRcContents, err := d.os.ReadFile(filepath.Join(homeDir, ".bazelrc"))
- if err != nil {
- // The file may not exist; that's OK, but the line definitely is
- // not in the file.
- return errString, nil //nolint:returnerrcheck
- }
- found := false
- for _, line := range strings.Split(bazelRcContents, "\n") {
- if !strings.Contains(line, expectedBazelRcLine) {
- continue
- }
- if strings.HasPrefix(strings.TrimSpace(line), "#") {
- continue
- }
- found = true
- }
- if found {
- return "", nil
- }
- return errString, nil
-}
diff --git a/pkg/cmd/dev/io/exec/exec.go b/pkg/cmd/dev/io/exec/exec.go
index 4e6a53a9db..ee217f927f 100644
--- a/pkg/cmd/dev/io/exec/exec.go
+++ b/pkg/cmd/dev/io/exec/exec.go
@@ -25,21 +25,6 @@ import (
"github.com/irfansharif/recorder"
)
-// ExitError is an error type similar to os.ExitError. The stderr for
-// failed commands is captured as `Stderr`.
-type ExitError struct {
- Stderr []byte
- Inner error
-}
-
-func (e *ExitError) Unwrap() error {
- return e.Inner
-}
-
-func (e *ExitError) Error() string {
- return e.Inner.Error()
-}
-
// Exec is a convenience wrapper around the stdlib os/exec package. It lets us:
//
// (a) mock all instances where we shell out, for tests, and
@@ -238,13 +223,13 @@ func (e *Exec) commandContextImpl(
output, err := e.Next(command, func(outTrace, errTrace io.Writer) (string, error) {
cmd := exec.CommandContext(ctx, name, args...)
- var stdoutBuffer, stderrBuffer bytes.Buffer
+ var buffer bytes.Buffer
if silent {
- cmd.Stdout = io.MultiWriter(&stdoutBuffer, outTrace)
- cmd.Stderr = io.MultiWriter(&stderrBuffer, errTrace)
+ cmd.Stdout = io.MultiWriter(&buffer, outTrace)
+ cmd.Stderr = errTrace
} else {
- cmd.Stdout = io.MultiWriter(e.stdout, &stdoutBuffer, outTrace)
- cmd.Stderr = io.MultiWriter(e.stderr, &stderrBuffer, errTrace)
+ cmd.Stdout = io.MultiWriter(e.stdout, &buffer, outTrace)
+ cmd.Stderr = io.MultiWriter(e.stderr, errTrace)
}
if stdin != nil {
cmd.Stdin = stdin
@@ -255,9 +240,9 @@ func (e *Exec) commandContextImpl(
return "", err
}
if err := cmd.Wait(); err != nil {
- return "", &ExitError{Inner: err, Stderr: stderrBuffer.Bytes()}
+ return "", err
}
- return stdoutBuffer.String(), nil
+ return buffer.String(), nil
})
if err != nil {
diff --git a/pkg/cmd/publish-artifacts/BUILD.bazel b/pkg/cmd/publish-artifacts/BUILD.bazel
index 9f1a9704e7..f6d4a538cb 100644
--- a/pkg/cmd/publish-artifacts/BUILD.bazel
+++ b/pkg/cmd/publish-artifacts/BUILD.bazel
@@ -7,6 +7,7 @@ go_library(
visibility = ["//visibility:private"],
deps = [
"//pkg/release",
+ "//pkg/util/version",
"@com_github_aws_aws_sdk_go//aws",
"@com_github_aws_aws_sdk_go//aws/session",
"@com_github_aws_aws_sdk_go//service/s3",
diff --git a/pkg/cmd/publish-artifacts/main.go b/pkg/cmd/publish-artifacts/main.go
index 62a506d98a..91d41656bd 100644
--- a/pkg/cmd/publish-artifacts/main.go
+++ b/pkg/cmd/publish-artifacts/main.go
@@ -11,15 +11,21 @@
package main
import (
+ "bytes"
"flag"
+ "fmt"
+ "go/build"
"log"
"os"
+ "os/exec"
"path/filepath"
+ "strings"
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/s3"
"github.com/cockroachdb/cockroach/pkg/release"
+ "github.com/cockroachdb/cockroach/pkg/util/version"
"github.com/kr/pretty"
)
@@ -44,8 +50,16 @@ var testableS3 = func() (s3putter, error) {
return s3.New(sess), nil
}
+var isRelease = flag.Bool("release", false, "build in release mode instead of bleeding-edge mode")
var destBucket = flag.String("bucket", "", "override default bucket")
+var (
+ // TODO(tamird,benesch,bdarnell): make "latest" a website-redirect
+ // rather than a full key. This means that the actual artifact will no
+ // longer be named "-latest".
+ latestStr = "latest"
+)
+
func main() {
flag.Parse()
@@ -60,17 +74,34 @@ func main() {
if !ok {
log.Fatalf("VCS branch environment variable %s is not set", teamcityBuildBranchKey)
}
- pkg, err := os.Getwd()
- if err != nil {
- log.Fatalf("unable to locate CRDB directory: %s", err)
- }
- // Make sure the WORKSPACE file is in the current working directory.
- _, err = os.Stat(filepath.Join(pkg, "WORKSPACE"))
+ pkg, err := build.Import("github.com/cockroachdb/cockroach", "", build.FindOnly)
if err != nil {
log.Fatalf("unable to locate CRDB directory: %s", err)
}
var versionStr string
+ var isStableRelease bool
+ if *isRelease {
+ ver, err := version.Parse(branch)
+ if err != nil {
+ log.Fatalf("refusing to build release with invalid version name '%s' (err: %s)", branch, err)
+ }
+
+ // Prerelease returns anything after the `-` and before metadata. eg: `beta` for `1.0.1-beta+metadata`
+ if ver.PreRelease() == "" {
+ isStableRelease = true
+ }
+ versionStr = branch
+ } else {
+ cmd := exec.Command("git", "rev-parse", "HEAD")
+ cmd.Dir = pkg.Dir
+ log.Printf("%s %s", cmd.Env, cmd.Args)
+ out, err := cmd.Output()
+ if err != nil {
+ log.Fatalf("%s: out=%q err=%s", cmd.Args, out, err)
+ }
+ versionStr = string(bytes.TrimSpace(out))
+ }
svc, err := testableS3()
if err != nil {
@@ -80,26 +111,112 @@ func main() {
var bucketName string
if len(*destBucket) > 0 {
bucketName = *destBucket
+ } else if *isRelease {
+ bucketName = "binaries.cockroachdb.com"
} else {
bucketName = "cockroach"
}
log.Printf("Using S3 bucket: %s", bucketName)
releaseVersionStrs := []string{versionStr}
+ // Only build `latest` tarballs for stable releases.
+ if isStableRelease {
+ releaseVersionStrs = append(releaseVersionStrs, latestStr)
+ }
+
+ if *isRelease {
+ buildArchive(svc, opts{
+ PkgDir: pkg.Dir,
+ BucketName: bucketName,
+ ReleaseVersionStrs: releaseVersionStrs,
+ })
+ }
- for _, platform := range []release.Platform{release.PlatformLinux, release.PlatformMacOS, release.PlatformWindows} {
- var o opts
- o.Platform = platform
- o.ReleaseVersionStrs = releaseVersionStrs
- o.PkgDir = pkg
- o.Branch = branch
- o.VersionStr = versionStr
- o.BucketName = bucketName
- o.Branch = branch
+ for _, target := range release.SupportedTargets {
+ for i, extraArgs := range []struct {
+ goflags string
+ suffix string
+ tags string
+ }{
+ {},
+ // TODO(tamird): re-enable deadlock builds. This really wants its
+ // own install suffix; it currently pollutes the normal release
+ // build cache.
+ //
+ // {suffix: ".deadlock", tags: "deadlock"},
+ {suffix: ".race", goflags: "-race"},
+ } {
+ var o opts
+ o.ReleaseVersionStrs = releaseVersionStrs
+ o.PkgDir = pkg.Dir
+ o.Branch = branch
+ o.VersionStr = versionStr
+ o.BucketName = bucketName
+ o.Branch = branch
+ o.BuildType = target.BuildType
+ o.GoFlags = extraArgs.goflags
+ o.Suffix = extraArgs.suffix + target.Suffix
+ o.Tags = extraArgs.tags
- log.Printf("building %s", pretty.Sprint(o))
+ log.Printf("building %s", pretty.Sprint(o))
- buildOneCockroach(svc, o)
+ // TODO(tamird): build deadlock,race binaries for all targets?
+ if i > 0 && (*isRelease || !strings.HasSuffix(o.BuildType, "linux-gnu")) {
+ log.Printf("skipping auxiliary build")
+ continue
+ }
+
+ buildOneCockroach(svc, o)
+ }
+ }
+
+ if !*isRelease {
+ buildOneWorkload(svc, opts{
+ PkgDir: pkg.Dir,
+ BucketName: bucketName,
+ Branch: branch,
+ VersionStr: versionStr,
+ })
+ }
+}
+
+func buildArchive(svc s3putter, o opts) {
+ for _, releaseVersionStr := range o.ReleaseVersionStrs {
+ archiveBase := fmt.Sprintf("cockroach-%s", releaseVersionStr)
+ srcArchive := fmt.Sprintf("%s.%s", archiveBase, "src.tgz")
+ cmd := exec.Command(
+ "make",
+ "archive",
+ fmt.Sprintf("ARCHIVE_BASE=%s", archiveBase),
+ fmt.Sprintf("ARCHIVE=%s", srcArchive),
+ )
+ cmd.Dir = o.PkgDir
+ cmd.Stdout = os.Stdout
+ cmd.Stderr = os.Stderr
+ log.Printf("%s %s", cmd.Env, cmd.Args)
+ if err := cmd.Run(); err != nil {
+ log.Fatalf("%s: %s", cmd.Args, err)
+ }
+
+ absoluteSrcArchivePath := filepath.Join(o.PkgDir, srcArchive)
+ f, err := os.Open(absoluteSrcArchivePath)
+ if err != nil {
+ log.Fatalf("os.Open(%s): %s", absoluteSrcArchivePath, err)
+ }
+ putObjectInput := s3.PutObjectInput{
+ Bucket: &o.BucketName,
+ Key: &srcArchive,
+ Body: f,
+ }
+ if releaseVersionStr == latestStr {
+ putObjectInput.CacheControl = &release.NoCache
+ }
+ if _, err := svc.PutObject(&putObjectInput); err != nil {
+ log.Fatalf("s3 upload %s: %s", absoluteSrcArchivePath, err)
+ }
+ if err := f.Close(); err != nil {
+ log.Fatal(err)
+ }
}
}
@@ -109,11 +226,52 @@ func buildOneCockroach(svc s3putter, o opts) {
log.Printf("done building cockroach: %s", pretty.Sprint(o))
}()
- if err := release.MakeRelease(o.Platform, release.BuildOptions{}, o.PkgDir); err != nil {
+ opts := []release.MakeReleaseOption{
+ release.WithMakeReleaseOptionBuildArg(fmt.Sprintf("%s=%s", "GOFLAGS", o.GoFlags)),
+ release.WithMakeReleaseOptionBuildArg(fmt.Sprintf("%s=%s", "TAGS", o.Tags)),
+ release.WithMakeReleaseOptionBuildArg(fmt.Sprintf("%s=%s", "BUILDCHANNEL", "official-binary")),
+ }
+ if *isRelease {
+ opts = append(opts, release.WithMakeReleaseOptionBuildArg(fmt.Sprintf("%s=%s", "BUILD_TAGGED_RELEASE", "true")))
+ }
+
+ if err := release.MakeRelease(
+ release.SupportedTarget{
+ BuildType: o.BuildType,
+ Suffix: o.Suffix,
+ },
+ o.PkgDir,
+ opts...,
+ ); err != nil {
+ log.Fatal(err)
+ }
+
+ o.Base = "cockroach" + o.Suffix
+ o.AbsolutePath = filepath.Join(o.PkgDir, o.Base)
+
+ if !*isRelease {
+ putNonRelease(svc, o, release.MakeCRDBLibraryNonReleaseFiles(o.PkgDir, o.BuildType, o.VersionStr, o.Suffix)...)
+ } else {
+ putRelease(svc, o)
+ }
+}
+
+func buildOneWorkload(svc s3putter, o opts) {
+ defer func() {
+ log.Printf("done building workload: %s", pretty.Sprint(o))
+ }()
+
+ if *isRelease {
+ log.Fatalf("refusing to build workload in release mode")
+ }
+
+ if err := release.MakeWorkload(o.PkgDir); err != nil {
log.Fatal(err)
}
- putNonRelease(svc, o, release.MakeCRDBLibraryNonReleaseFiles(o.PkgDir, o.Platform, o.VersionStr)...)
+ o.Base = "workload"
+ o.AbsolutePath = filepath.Join(o.PkgDir, "bin", o.Base)
+ putNonRelease(svc, o)
}
type opts struct {
@@ -121,8 +279,12 @@ type opts struct {
Branch string
ReleaseVersionStrs []string
- Platform release.Platform
+ BuildType string
+ GoFlags string
+ Suffix string
+ Tags string
+ Base string
BucketName string
AbsolutePath string
PkgDir string
@@ -135,9 +297,25 @@ func putNonRelease(svc s3putter, o opts, additionalNonReleaseFiles ...release.No
Branch: o.Branch,
BucketName: o.BucketName,
Files: append(
- []release.NonReleaseFile{release.MakeCRDBBinaryNonReleaseFile(o.AbsolutePath, o.VersionStr)},
+ []release.NonReleaseFile{release.MakeCRDBBinaryNonReleaseFile(o.Base, o.AbsolutePath, o.VersionStr)},
additionalNonReleaseFiles...,
),
},
)
}
+
+func putRelease(svc s3putter, o opts) {
+ for _, releaseVersionStr := range o.ReleaseVersionStrs {
+ release.PutRelease(svc, release.PutReleaseOptions{
+ BucketName: o.BucketName,
+ NoCache: releaseVersionStr == latestStr,
+ Suffix: o.Suffix,
+ BuildType: o.BuildType,
+ VersionStr: releaseVersionStr,
+ Files: append(
+ []release.ArchiveFile{release.MakeCRDBBinaryArchiveFile(o.Base, o.AbsolutePath)},
+ release.MakeCRDBLibraryArchiveFiles(o.PkgDir, o.BuildType)...,
+ ),
+ })
+ }
+}
diff --git a/pkg/cmd/publish-artifacts/main_test.go b/pkg/cmd/publish-artifacts/main_test.go
index 351361136d..26e51394f3 100644
--- a/pkg/cmd/publish-artifacts/main_test.go
+++ b/pkg/cmd/publish-artifacts/main_test.go
@@ -219,6 +219,42 @@ func TestMainF(t *testing.T) {
Key: "cockroach/workload.LATEST",
WebsiteRedirectLocation: "/cockroach/workload." + shaStub,
},
+ {
+ Bucket: "binaries.cockroachdb.com",
+ Key: "cockroach-v42.42.42.src.tgz",
+ },
+ {
+ Bucket: "binaries.cockroachdb.com",
+ CacheControl: "no-cache",
+ Key: "cockroach-latest.src.tgz",
+ },
+ {
+ Bucket: "binaries.cockroachdb.com",
+ Key: "cockroach-v42.42.42.darwin-10.9-amd64.tgz",
+ },
+ {
+ Bucket: "binaries.cockroachdb.com",
+ CacheControl: "no-cache",
+ Key: "cockroach-latest.darwin-10.9-amd64.tgz",
+ },
+ {
+ Bucket: "binaries.cockroachdb.com",
+ Key: "cockroach-v42.42.42.linux-amd64.tgz",
+ },
+ {
+ Bucket: "binaries.cockroachdb.com",
+ CacheControl: "no-cache",
+ Key: "cockroach-latest.linux-amd64.tgz",
+ },
+ {
+ Bucket: "binaries.cockroachdb.com",
+ Key: "cockroach-v42.42.42.windows-6.2-amd64.zip",
+ },
+ {
+ Bucket: "binaries.cockroachdb.com",
+ CacheControl: "no-cache",
+ Key: "cockroach-latest.windows-6.2-amd64.zip",
+ },
}
if err := os.Setenv("TC_BUILD_BRANCH", "master"); err != nil {
@@ -226,6 +262,12 @@ func TestMainF(t *testing.T) {
}
main()
+ if err := os.Setenv("TC_BUILD_BRANCH", "v42.42.42"); err != nil {
+ t.Fatal(err)
+ }
+ *isRelease = true
+ main()
+
var acts []testCase
for _, req := range r.reqs {
var act testCase
diff --git a/pkg/cmd/publish-provisional-artifacts/BUILD.bazel b/pkg/cmd/publish-provisional-artifacts/BUILD.bazel
index a23da639b1..3d2ab9748c 100644
--- a/pkg/cmd/publish-provisional-artifacts/BUILD.bazel
+++ b/pkg/cmd/publish-provisional-artifacts/BUILD.bazel
@@ -29,7 +29,6 @@ go_test(
deps = [
"//pkg/release",
"//pkg/testutils",
- "@com_github_alessio_shellescape//:shellescape",
"@com_github_aws_aws_sdk_go//service/s3",
"@com_github_cockroachdb_errors//:errors",
"@com_github_stretchr_testify//require",
diff --git a/pkg/cmd/publish-provisional-artifacts/main.go b/pkg/cmd/publish-provisional-artifacts/main.go
index d5e79cc7af..bf606560bf 100644
--- a/pkg/cmd/publish-provisional-artifacts/main.go
+++ b/pkg/cmd/publish-provisional-artifacts/main.go
@@ -13,12 +13,15 @@ package main
import (
"bytes"
"flag"
+ "fmt"
+ "go/build"
"io"
"log"
"os"
"os/exec"
"path/filepath"
"regexp"
+ "strings"
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/session"
@@ -77,36 +80,31 @@ func main() {
if err != nil {
log.Fatalf("Creating AWS S3 session: %s", err)
}
+ execFn := release.DefaultExecFn
branch, ok := os.LookupEnv(teamcityBuildBranchKey)
if !ok {
log.Fatalf("VCS branch environment variable %s is not set", teamcityBuildBranchKey)
}
- pkg, err := os.Getwd()
+ pkg, err := build.Import("github.com/cockroachdb/cockroach", "", build.FindOnly)
if err != nil {
log.Fatalf("unable to locate CRDB directory: %s", err)
}
- // Make sure the WORKSPACE file is in the current working directory.
- _, err = os.Stat(filepath.Join(pkg, "WORKSPACE"))
- if err != nil {
- log.Fatalf("unable to locate CRDB directory: %s", err)
- }
-
cmd := exec.Command("git", "rev-parse", "HEAD")
- cmd.Dir = pkg
+ cmd.Dir = pkg.Dir
log.Printf("%s %s", cmd.Env, cmd.Args)
shaOut, err := cmd.Output()
if err != nil {
log.Fatalf("%s: out=%q err=%s", cmd.Args, shaOut, err)
}
- run(s3, runFlags{
+ run(s3, execFn, runFlags{
doProvisional: *doProvisionalF,
doBless: *doBlessF,
isRelease: *isReleaseF,
branch: branch,
- pkgDir: pkg,
+ pkgDir: pkg.Dir,
sha: string(bytes.TrimSpace(shaOut)),
- }, release.ExecFn{})
+ })
}
type runFlags struct {
@@ -116,7 +114,7 @@ type runFlags struct {
pkgDir string
}
-func run(svc s3I, flags runFlags, execFn release.ExecFn) {
+func run(svc s3I, execFn release.ExecFn, flags runFlags) {
// TODO(dan): non-release builds currently aren't broken into the two
// phases. Instead, the provisional phase does them both.
if !flags.isRelease {
@@ -165,21 +163,51 @@ func run(svc s3I, flags runFlags, execFn release.ExecFn) {
log.Printf("Using S3 bucket: %s", bucketName)
var cockroachBuildOpts []opts
- for _, platform := range []release.Platform{release.PlatformLinux, release.PlatformMacOS, release.PlatformWindows} {
- var o opts
- o.Platform = platform
- o.PkgDir = flags.pkgDir
- o.Branch = flags.branch
- o.VersionStr = versionStr
- o.BucketName = bucketName
- o.AbsolutePath = filepath.Join(flags.pkgDir, "cockroach"+release.SuffixFromPlatform(platform))
-
- cockroachBuildOpts = append(cockroachBuildOpts, o)
+ for _, target := range release.SupportedTargets {
+ for i, extraArgs := range []struct {
+ goflags string
+ suffix string
+ tags string
+ }{
+ {},
+ // TODO(tamird): re-enable deadlock builds. This really wants its
+ // own install suffix; it currently pollutes the normal release
+ // build cache.
+ //
+ // {suffix: ".deadlock", tags: "deadlock"},
+ //
+ // As of 2019-01-08, we don't really use this for anything, so save the build time.
+ // {suffix: ".race", goflags: "-race"},
+ } {
+ var o opts
+ o.PkgDir = flags.pkgDir
+ o.Branch = flags.branch
+ o.VersionStr = versionStr
+ o.BucketName = bucketName
+ o.BuildType = target.BuildType
+ o.GoFlags = extraArgs.goflags
+ o.Suffix = extraArgs.suffix + target.Suffix
+ o.Tags = extraArgs.tags
+ o.Base = "cockroach" + o.Suffix
+
+ // TODO(tamird): build deadlock,race binaries for all targets?
+ if i > 0 && (flags.isRelease || !strings.HasSuffix(o.BuildType, "linux-gnu")) {
+ log.Printf("skipping auxiliary build: %s", pretty.Sprint(o))
+ continue
+ }
+
+ cockroachBuildOpts = append(cockroachBuildOpts, o)
+ }
+ }
+ archiveBuildOpts := opts{
+ PkgDir: flags.pkgDir,
+ BucketName: bucketName,
+ VersionStr: versionStr,
}
if flags.doProvisional {
for _, o := range cockroachBuildOpts {
- buildCockroach(flags, o, execFn)
+ buildCockroach(execFn, flags, o)
if !flags.isRelease {
putNonRelease(svc, o)
@@ -187,6 +215,9 @@ func run(svc s3I, flags runFlags, execFn release.ExecFn) {
putRelease(svc, o)
}
}
+ if flags.isRelease {
+ buildAndPutArchive(svc, execFn, archiveBuildOpts)
+ }
}
if flags.doBless {
if !flags.isRelease {
@@ -196,24 +227,76 @@ func run(svc s3I, flags runFlags, execFn release.ExecFn) {
for _, o := range cockroachBuildOpts {
markLatestRelease(svc, o)
}
+ markLatestArchive(svc, archiveBuildOpts)
}
}
}
-func buildCockroach(flags runFlags, o opts, execFn release.ExecFn) {
+func buildAndPutArchive(svc s3I, execFn release.ExecFn, o opts) {
+ log.Printf("building archive %s", pretty.Sprint(o))
+ defer func() {
+ log.Printf("done building archive: %s", pretty.Sprint(o))
+ }()
+
+ archiveBase, srcArchive := s3KeyArchive(o)
+ cmd := exec.Command(
+ "make",
+ "archive",
+ fmt.Sprintf("ARCHIVE_BASE=%s", archiveBase),
+ fmt.Sprintf("ARCHIVE=%s", srcArchive),
+ fmt.Sprintf("BUILDINFO_TAG=%s", o.VersionStr),
+ )
+ cmd.Dir = o.PkgDir
+ cmd.Stderr = os.Stderr
+ log.Printf("%s %s", cmd.Env, cmd.Args)
+ if out, err := execFn(cmd); err != nil {
+ log.Fatalf("%s %s: %s\n\n%s", cmd.Env, cmd.Args, err, out)
+ }
+
+ absoluteSrcArchivePath := filepath.Join(o.PkgDir, srcArchive)
+ f, err := os.Open(absoluteSrcArchivePath)
+ if err != nil {
+ log.Fatalf("os.Open(%s): %s", absoluteSrcArchivePath, err)
+ }
+ log.Printf("Uploading to s3://%s/%s", o.BucketName, srcArchive)
+ putObjectInput := s3.PutObjectInput{
+ Bucket: &o.BucketName,
+ Key: &srcArchive,
+ Body: f,
+ }
+ if _, err := svc.PutObject(&putObjectInput); err != nil {
+ log.Fatalf("s3 upload %s: %s", absoluteSrcArchivePath, err)
+ }
+ if err := f.Close(); err != nil {
+ log.Fatal(err)
+ }
+}
+
+func buildCockroach(execFn release.ExecFn, flags runFlags, o opts) {
log.Printf("building cockroach %s", pretty.Sprint(o))
defer func() {
log.Printf("done building cockroach: %s", pretty.Sprint(o))
}()
- var buildOpts release.BuildOptions
- buildOpts.ExecFn = execFn
+ opts := []release.MakeReleaseOption{
+ release.WithMakeReleaseOptionExecFn(execFn),
+ release.WithMakeReleaseOptionBuildArg(fmt.Sprintf("%s=%s", "GOFLAGS", o.GoFlags)),
+ release.WithMakeReleaseOptionBuildArg(fmt.Sprintf("%s=%s", "TAGS", o.Tags)),
+ release.WithMakeReleaseOptionBuildArg(fmt.Sprintf("%s=%s", "BUILDCHANNEL", "official-binary")),
+ }
if flags.isRelease {
- buildOpts.Release = true
- buildOpts.BuildTag = o.VersionStr
+ opts = append(opts, release.WithMakeReleaseOptionBuildArg(fmt.Sprintf("%s=%s", "BUILDINFO_TAG", o.VersionStr)))
+ opts = append(opts, release.WithMakeReleaseOptionBuildArg(fmt.Sprintf("%s=%s", "BUILD_TAGGED_RELEASE", "true")))
}
- if err := release.MakeRelease(o.Platform, buildOpts, o.PkgDir); err != nil {
+ if err := release.MakeRelease(
+ release.SupportedTarget{
+ BuildType: o.BuildType,
+ Suffix: o.Suffix,
+ },
+ o.PkgDir,
+ opts...,
+ ); err != nil {
log.Fatal(err)
}
}
@@ -222,11 +305,14 @@ type opts struct {
VersionStr string
Branch string
- Platform release.Platform
+ BuildType string
+ GoFlags string
+ Suffix string
+ Tags string
- AbsolutePath string
- BucketName string
- PkgDir string
+ Base string
+ BucketName string
+ PkgDir string
}
func putNonRelease(svc s3I, o opts) {
@@ -236,26 +322,33 @@ func putNonRelease(svc s3I, o opts) {
Branch: o.Branch,
BucketName: o.BucketName,
Files: append(
- []release.NonReleaseFile{release.MakeCRDBBinaryNonReleaseFile(o.AbsolutePath, o.VersionStr)},
- release.MakeCRDBLibraryNonReleaseFiles(o.PkgDir, o.Platform, o.VersionStr)...,
+ []release.NonReleaseFile{release.MakeCRDBBinaryNonReleaseFile(o.Base, filepath.Join(o.PkgDir, o.Base), o.VersionStr)},
+ release.MakeCRDBLibraryNonReleaseFiles(o.PkgDir, o.BuildType, o.VersionStr, o.Suffix)...,
),
},
)
}
func s3KeyRelease(o opts) (string, string) {
- return release.S3KeyRelease(o.Platform, o.VersionStr)
+ return release.S3KeyRelease(o.Suffix, o.BuildType, o.VersionStr)
+}
+
+func s3KeyArchive(o opts) (string, string) {
+ archiveBase := fmt.Sprintf("cockroach-%s", o.VersionStr)
+ srcArchive := fmt.Sprintf("%s.%s", archiveBase, "src.tgz")
+ return archiveBase, srcArchive
}
func putRelease(svc s3I, o opts) {
release.PutRelease(svc, release.PutReleaseOptions{
BucketName: o.BucketName,
NoCache: false,
- Platform: o.Platform,
+ Suffix: o.Suffix,
+ BuildType: o.BuildType,
VersionStr: o.VersionStr,
Files: append(
- []release.ArchiveFile{release.MakeCRDBBinaryArchiveFile(o.AbsolutePath)},
- release.MakeCRDBLibraryArchiveFiles(o.PkgDir, o.Platform)...,
+ []release.ArchiveFile{release.MakeCRDBBinaryArchiveFile(o.Base, filepath.Join(o.PkgDir, o.Base))},
+ release.MakeCRDBLibraryArchiveFiles(o.PkgDir, o.BuildType)...,
),
})
}
@@ -290,3 +383,35 @@ func markLatestRelease(svc s3I, o opts) {
log.Fatalf("s3 upload %s: %s", keyLatest, err)
}
}
+
+func markLatestArchive(svc s3I, o opts) {
+ _, keyRelease := s3KeyArchive(o)
+
+ log.Printf("Downloading from %s/%s", o.BucketName, keyRelease)
+ binary, err := svc.GetObject(&s3.GetObjectInput{
+ Bucket: &o.BucketName,
+ Key: &keyRelease,
+ })
+ if err != nil {
+ log.Fatal(err)
+ }
+ defer binary.Body.Close()
+ var buf bytes.Buffer
+ if _, err := io.Copy(&buf, binary.Body); err != nil {
+ log.Fatalf("downloading %s/%s: %s", o.BucketName, keyRelease, err)
+ }
+
+ oLatest := o
+ oLatest.VersionStr = latestStr
+ _, keyLatest := s3KeyArchive(oLatest)
+ log.Printf("Uploading to s3://%s/%s", o.BucketName, keyLatest)
+ putObjectInput := s3.PutObjectInput{
+ Bucket: &o.BucketName,
+ Key: &keyLatest,
+ Body: bytes.NewReader(buf.Bytes()),
+ CacheControl: &release.NoCache,
+ }
+ if _, err := svc.PutObject(&putObjectInput); err != nil {
+ log.Fatalf("s3 upload %s: %s", keyLatest, err)
+ }
+}
diff --git a/pkg/cmd/publish-provisional-artifacts/main_test.go b/pkg/cmd/publish-provisional-artifacts/main_test.go
index 4393826ada..a7ebb36c8d 100644
--- a/pkg/cmd/publish-provisional-artifacts/main_test.go
+++ b/pkg/cmd/publish-provisional-artifacts/main_test.go
@@ -21,7 +21,6 @@ import (
"testing"
"unicode/utf8"
- "github.com/alessio/shellescape"
"github.com/aws/aws-sdk-go/service/s3"
"github.com/cockroachdb/cockroach/pkg/release"
"github.com/cockroachdb/cockroach/pkg/testutils"
@@ -67,64 +66,49 @@ func (s *mockS3) PutObject(i *s3.PutObjectInput) (*s3.PutObjectOutput, error) {
}
type mockExecRunner struct {
- fakeBazelBin string
- cmds []string
+ cmds []string
}
func (r *mockExecRunner) run(c *exec.Cmd) ([]byte, error) {
- if r.fakeBazelBin == "" {
- panic("r.fakeBazelBin not set")
- }
if c.Dir == `` {
return nil, errors.Errorf(`Dir must be specified`)
}
- cmd := fmt.Sprintf("env=%s args=%s", c.Env, shellescape.QuoteCommand(c.Args))
- r.cmds = append(r.cmds, cmd)
+ cmd := fmt.Sprintf("env=%s args=%s", c.Env, c.Args)
var paths []string
- if c.Args[0] == "bazel" && c.Args[1] == "info" && c.Args[2] == "bazel-bin" {
- return []byte(r.fakeBazelBin), nil
- }
- if c.Args[0] == "bazel" && c.Args[1] == "build" {
- path := filepath.Join(r.fakeBazelBin, "pkg", "cmd", "cockroach", "cockroach_", "cockroach")
- var platform release.Platform
+ if c.Args[0] == `mkrelease` {
+ path := filepath.Join(c.Dir, `cockroach`)
for _, arg := range c.Args {
- if strings.HasPrefix(arg, `--config=`) {
- switch strings.TrimPrefix(arg, `--config=`) {
- case "crosslinuxbase":
- platform = release.PlatformLinux
- case "crosslinuxarmbase":
- platform = release.PlatformLinuxArm
- case "crossmacosbase":
- platform = release.PlatformMacOS
- case "crosswindowsbase":
- platform = release.PlatformWindows
- path += ".exe"
- case "ci", "with_ui":
- default:
- panic(fmt.Sprintf("Unexpected configuration %s", arg))
- }
+ if strings.HasPrefix(arg, `SUFFIX=`) {
+ path += strings.TrimPrefix(arg, `SUFFIX=`)
}
}
paths = append(paths, path)
- ext := release.SharedLibraryExtensionFromPlatform(platform)
+ ext := release.SharedLibraryExtensionFromBuildType(c.Args[1])
for _, lib := range release.CRDBSharedLibraries {
- libDir := "lib"
- if platform == release.PlatformWindows {
- libDir = "bin"
+ paths = append(paths, filepath.Join(c.Dir, "lib", lib+ext))
+ }
+ // Make the lib directory as it exists after `make`.
+ if err := os.MkdirAll(filepath.Join(c.Dir, "lib"), 0755); err != nil {
+ return nil, err
+ }
+ } else if c.Args[0] == `make` && c.Args[1] == `archive` {
+ for _, arg := range c.Args {
+ if strings.HasPrefix(arg, `ARCHIVE=`) {
+ paths = append(paths, filepath.Join(c.Dir, strings.TrimPrefix(arg, `ARCHIVE=`)))
+ break
}
- paths = append(paths, filepath.Join(r.fakeBazelBin, "c-deps", "libgeos", libDir, lib+ext))
}
}
for _, path := range paths {
- if err := os.MkdirAll(filepath.Dir(path), 0777); err != nil {
- return nil, err
- }
if err := ioutil.WriteFile(path, []byte(cmd), 0666); err != nil {
return nil, err
}
}
+ if len(paths) > 0 {
+ r.cmds = append(r.cmds, cmd)
+ }
var output []byte
return output, nil
@@ -146,14 +130,10 @@ func TestProvisional(t *testing.T) {
branch: `provisional_201901010101_v0.0.1-alpha`,
},
expectedCmds: []string{
- "env=[] args=bazel build //pkg/cmd/cockroach //c-deps:libgeos '--workspace_status_command=./build/bazelutil/stamp.sh x86_64-pc-linux-gnu official-binary v0.0.1-alpha release' -c opt --config=ci --config=with_ui --config=crosslinuxbase",
- "env=[] args=bazel info bazel-bin -c opt --config=ci --config=with_ui --config=crosslinuxbase",
- "env=[MALLOC_CONF=prof:true] args=./cockroach.linux-2.6.32-gnu-amd64 version",
- "env=[] args=ldd ./cockroach.linux-2.6.32-gnu-amd64",
- "env=[] args=bazel build //pkg/cmd/cockroach //c-deps:libgeos '--workspace_status_command=./build/bazelutil/stamp.sh x86_64-apple-darwin19 official-binary v0.0.1-alpha release' -c opt --config=ci --config=with_ui --config=crossmacosbase",
- "env=[] args=bazel info bazel-bin -c opt --config=ci --config=with_ui --config=crossmacosbase",
- "env=[] args=bazel build //pkg/cmd/cockroach //c-deps:libgeos '--workspace_status_command=./build/bazelutil/stamp.sh x86_64-w64-mingw32 official-binary v0.0.1-alpha release' -c opt --config=ci --config=with_ui --config=crosswindowsbase",
- "env=[] args=bazel info bazel-bin -c opt --config=ci --config=with_ui --config=crosswindowsbase",
+ "env=[] args=[mkrelease linux-gnu SUFFIX=.linux-2.6.32-gnu-amd64 GOFLAGS= TAGS= BUILDCHANNEL=official-binary BUILDINFO_TAG=v0.0.1-alpha BUILD_TAGGED_RELEASE=true]",
+ "env=[] args=[mkrelease darwin SUFFIX=.darwin-10.9-amd64 GOFLAGS= TAGS= BUILDCHANNEL=official-binary BUILDINFO_TAG=v0.0.1-alpha BUILD_TAGGED_RELEASE=true]",
+ "env=[] args=[mkrelease windows SUFFIX=.windows-6.2-amd64.exe GOFLAGS= TAGS= BUILDCHANNEL=official-binary BUILDINFO_TAG=v0.0.1-alpha BUILD_TAGGED_RELEASE=true]",
+ "env=[] args=[make archive ARCHIVE_BASE=cockroach-v0.0.1-alpha ARCHIVE=cockroach-v0.0.1-alpha.src.tgz BUILDINFO_TAG=v0.0.1-alpha]",
},
expectedGets: nil,
expectedPuts: []string{
@@ -163,6 +143,8 @@ func TestProvisional(t *testing.T) {
"CONTENTS <binary stuff>",
"s3://binaries.cockroachdb.com/cockroach-v0.0.1-alpha.windows-6.2-amd64.zip " +
"CONTENTS <binary stuff>",
+ "s3://binaries.cockroachdb.com/cockroach-v0.0.1-alpha.src.tgz " +
+ "CONTENTS env=[] args=[make archive ARCHIVE_BASE=cockroach-v0.0.1-alpha ARCHIVE=cockroach-v0.0.1-alpha.src.tgz BUILDINFO_TAG=v0.0.1-alpha]",
},
},
{
@@ -174,40 +156,35 @@ func TestProvisional(t *testing.T) {
sha: `00SHA00`,
},
expectedCmds: []string{
- "env=[] args=bazel build //pkg/cmd/cockroach //c-deps:libgeos '--workspace_status_command=./build/bazelutil/stamp.sh x86_64-pc-linux-gnu official-binary' -c opt --config=ci --config=with_ui --config=crosslinuxbase",
- "env=[] args=bazel info bazel-bin -c opt --config=ci --config=with_ui --config=crosslinuxbase",
- "env=[MALLOC_CONF=prof:true] args=./cockroach.linux-2.6.32-gnu-amd64 version",
- "env=[] args=ldd ./cockroach.linux-2.6.32-gnu-amd64",
- "env=[] args=bazel build //pkg/cmd/cockroach //c-deps:libgeos '--workspace_status_command=./build/bazelutil/stamp.sh x86_64-apple-darwin19 official-binary' -c opt --config=ci --config=with_ui --config=crossmacosbase",
- "env=[] args=bazel info bazel-bin -c opt --config=ci --config=with_ui --config=crossmacosbase",
- "env=[] args=bazel build //pkg/cmd/cockroach //c-deps:libgeos '--workspace_status_command=./build/bazelutil/stamp.sh x86_64-w64-mingw32 official-binary' -c opt --config=ci --config=with_ui --config=crosswindowsbase",
- "env=[] args=bazel info bazel-bin -c opt --config=ci --config=with_ui --config=crosswindowsbase",
+ "env=[] args=[mkrelease linux-gnu SUFFIX=.linux-2.6.32-gnu-amd64 GOFLAGS= TAGS= BUILDCHANNEL=official-binary]",
+ "env=[] args=[mkrelease darwin SUFFIX=.darwin-10.9-amd64 GOFLAGS= TAGS= BUILDCHANNEL=official-binary]",
+ "env=[] args=[mkrelease windows SUFFIX=.windows-6.2-amd64.exe GOFLAGS= TAGS= BUILDCHANNEL=official-binary]",
},
expectedGets: nil,
expectedPuts: []string{
"s3://cockroach//cockroach/cockroach.linux-gnu-amd64.00SHA00 " +
- "CONTENTS env=[] args=bazel build //pkg/cmd/cockroach //c-deps:libgeos '--workspace_status_command=./build/bazelutil/stamp.sh x86_64-pc-linux-gnu official-binary' -c opt --config=ci --config=with_ui --config=crosslinuxbase",
+ "CONTENTS env=[] args=[mkrelease linux-gnu SUFFIX=.linux-2.6.32-gnu-amd64 GOFLAGS= TAGS= BUILDCHANNEL=official-binary]",
"s3://cockroach/cockroach/cockroach.linux-gnu-amd64.LATEST/no-cache " +
"REDIRECT /cockroach/cockroach.linux-gnu-amd64.00SHA00",
- "s3://cockroach//cockroach/lib/libgeos.linux-gnu-amd64.00SHA00.so CONTENTS env=[] args=bazel build //pkg/cmd/cockroach //c-deps:libgeos '--workspace_status_command=./build/bazelutil/stamp.sh x86_64-pc-linux-gnu official-binary' -c opt --config=ci --config=with_ui --config=crosslinuxbase",
+ "s3://cockroach//cockroach/lib/libgeos.linux-gnu-amd64.00SHA00.so CONTENTS env=[] args=[mkrelease linux-gnu SUFFIX=.linux-2.6.32-gnu-amd64 GOFLAGS= TAGS= BUILDCHANNEL=official-binary]",
"s3://cockroach/cockroach/lib/libgeos.linux-gnu-amd64.so.LATEST/no-cache REDIRECT /cockroach/lib/libgeos.linux-gnu-amd64.00SHA00.so",
- "s3://cockroach//cockroach/lib/libgeos_c.linux-gnu-amd64.00SHA00.so CONTENTS env=[] args=bazel build //pkg/cmd/cockroach //c-deps:libgeos '--workspace_status_command=./build/bazelutil/stamp.sh x86_64-pc-linux-gnu official-binary' -c opt --config=ci --config=with_ui --config=crosslinuxbase",
+ "s3://cockroach//cockroach/lib/libgeos_c.linux-gnu-amd64.00SHA00.so CONTENTS env=[] args=[mkrelease linux-gnu SUFFIX=.linux-2.6.32-gnu-amd64 GOFLAGS= TAGS= BUILDCHANNEL=official-binary]",
"s3://cockroach/cockroach/lib/libgeos_c.linux-gnu-amd64.so.LATEST/no-cache REDIRECT /cockroach/lib/libgeos_c.linux-gnu-amd64.00SHA00.so",
"s3://cockroach//cockroach/cockroach.darwin-amd64.00SHA00 " +
- "CONTENTS env=[] args=bazel build //pkg/cmd/cockroach //c-deps:libgeos '--workspace_status_command=./build/bazelutil/stamp.sh x86_64-apple-darwin19 official-binary' -c opt --config=ci --config=with_ui --config=crossmacosbase",
+ "CONTENTS env=[] args=[mkrelease darwin SUFFIX=.darwin-10.9-amd64 GOFLAGS= TAGS= BUILDCHANNEL=official-binary]",
"s3://cockroach/cockroach/cockroach.darwin-amd64.LATEST/no-cache " +
"REDIRECT /cockroach/cockroach.darwin-amd64.00SHA00",
- "s3://cockroach//cockroach/lib/libgeos.darwin-amd64.00SHA00.dylib CONTENTS env=[] args=bazel build //pkg/cmd/cockroach //c-deps:libgeos '--workspace_status_command=./build/bazelutil/stamp.sh x86_64-apple-darwin19 official-binary' -c opt --config=ci --config=with_ui --config=crossmacosbase",
+ "s3://cockroach//cockroach/lib/libgeos.darwin-amd64.00SHA00.dylib CONTENTS env=[] args=[mkrelease darwin SUFFIX=.darwin-10.9-amd64 GOFLAGS= TAGS= BUILDCHANNEL=official-binary]",
"s3://cockroach/cockroach/lib/libgeos.darwin-amd64.dylib.LATEST/no-cache REDIRECT /cockroach/lib/libgeos.darwin-amd64.00SHA00.dylib",
- "s3://cockroach//cockroach/lib/libgeos_c.darwin-amd64.00SHA00.dylib CONTENTS env=[] args=bazel build //pkg/cmd/cockroach //c-deps:libgeos '--workspace_status_command=./build/bazelutil/stamp.sh x86_64-apple-darwin19 official-binary' -c opt --config=ci --config=with_ui --config=crossmacosbase",
+ "s3://cockroach//cockroach/lib/libgeos_c.darwin-amd64.00SHA00.dylib CONTENTS env=[] args=[mkrelease darwin SUFFIX=.darwin-10.9-amd64 GOFLAGS= TAGS= BUILDCHANNEL=official-binary]",
"s3://cockroach/cockroach/lib/libgeos_c.darwin-amd64.dylib.LATEST/no-cache REDIRECT /cockroach/lib/libgeos_c.darwin-amd64.00SHA00.dylib",
"s3://cockroach//cockroach/cockroach.windows-amd64.00SHA00.exe " +
- "CONTENTS env=[] args=bazel build //pkg/cmd/cockroach //c-deps:libgeos '--workspace_status_command=./build/bazelutil/stamp.sh x86_64-w64-mingw32 official-binary' -c opt --config=ci --config=with_ui --config=crosswindowsbase",
+ "CONTENTS env=[] args=[mkrelease windows SUFFIX=.windows-6.2-amd64.exe GOFLAGS= TAGS= BUILDCHANNEL=official-binary]",
"s3://cockroach/cockroach/cockroach.windows-amd64.LATEST/no-cache " +
"REDIRECT /cockroach/cockroach.windows-amd64.00SHA00.exe",
- "s3://cockroach//cockroach/lib/libgeos.windows-amd64.00SHA00.dll CONTENTS env=[] args=bazel build //pkg/cmd/cockroach //c-deps:libgeos '--workspace_status_command=./build/bazelutil/stamp.sh x86_64-w64-mingw32 official-binary' -c opt --config=ci --config=with_ui --config=crosswindowsbase",
+ "s3://cockroach//cockroach/lib/libgeos.windows-amd64.00SHA00.dll CONTENTS env=[] args=[mkrelease windows SUFFIX=.windows-6.2-amd64.exe GOFLAGS= TAGS= BUILDCHANNEL=official-binary]",
"s3://cockroach/cockroach/lib/libgeos.windows-amd64.dll.LATEST/no-cache REDIRECT /cockroach/lib/libgeos.windows-amd64.00SHA00.dll",
- "s3://cockroach//cockroach/lib/libgeos_c.windows-amd64.00SHA00.dll CONTENTS env=[] args=bazel build //pkg/cmd/cockroach //c-deps:libgeos '--workspace_status_command=./build/bazelutil/stamp.sh x86_64-w64-mingw32 official-binary' -c opt --config=ci --config=with_ui --config=crosswindowsbase",
+ "s3://cockroach//cockroach/lib/libgeos_c.windows-amd64.00SHA00.dll CONTENTS env=[] args=[mkrelease windows SUFFIX=.windows-6.2-amd64.exe GOFLAGS= TAGS= BUILDCHANNEL=official-binary]",
"s3://cockroach/cockroach/lib/libgeos_c.windows-amd64.dll.LATEST/no-cache REDIRECT /cockroach/lib/libgeos_c.windows-amd64.00SHA00.dll",
},
},
@@ -219,13 +196,9 @@ func TestProvisional(t *testing.T) {
var s3 mockS3
var exec mockExecRunner
- fakeBazelBin, cleanup := testutils.TempDir(t)
- defer cleanup()
- exec.fakeBazelBin = fakeBazelBin
flags := test.flags
flags.pkgDir = dir
- execFn := release.ExecFn{MockExecFn: exec.run}
- run(&s3, flags, execFn)
+ run(&s3, exec.run, flags)
require.Equal(t, test.expectedCmds, exec.cmds)
require.Equal(t, test.expectedGets, s3.gets)
require.Equal(t, test.expectedPuts, s3.puts)
@@ -261,6 +234,7 @@ func TestBless(t *testing.T) {
"s3://binaries.cockroachdb.com/cockroach-v0.0.1.linux-amd64.tgz",
"s3://binaries.cockroachdb.com/cockroach-v0.0.1.darwin-10.9-amd64.tgz",
"s3://binaries.cockroachdb.com/cockroach-v0.0.1.windows-6.2-amd64.zip",
+ "s3://binaries.cockroachdb.com/cockroach-v0.0.1.src.tgz",
},
expectedPuts: []string{
"s3://binaries.cockroachdb.com/cockroach-latest.linux-amd64.tgz/no-cache " +
@@ -269,6 +243,8 @@ func TestBless(t *testing.T) {
"CONTENTS s3://binaries.cockroachdb.com/cockroach-v0.0.1.darwin-10.9-amd64.tgz",
"s3://binaries.cockroachdb.com/cockroach-latest.windows-6.2-amd64.zip/no-cache " +
"CONTENTS s3://binaries.cockroachdb.com/cockroach-v0.0.1.windows-6.2-amd64.zip",
+ "s3://binaries.cockroachdb.com/cockroach-latest.src.tgz/no-cache " +
+ "CONTENTS s3://binaries.cockroachdb.com/cockroach-v0.0.1.src.tgz",
},
},
}
@@ -277,7 +253,7 @@ func TestBless(t *testing.T) {
t.Run(test.name, func(t *testing.T) {
var s3 mockS3
var execFn release.ExecFn // bless shouldn't exec anything
- run(&s3, test.flags, execFn)
+ run(&s3, execFn, test.flags)
require.Equal(t, test.expectedGets, s3.gets)
require.Equal(t, test.expectedPuts, s3.puts)
})
diff --git a/pkg/cmd/release/BUILD.bazel b/pkg/cmd/release/BUILD.bazel
index 54e721dfae..52fab9d313 100644
--- a/pkg/cmd/release/BUILD.bazel
+++ b/pkg/cmd/release/BUILD.bazel
@@ -1,4 +1,4 @@
-load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library", "go_test")
+load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library")
go_library(
name = "release_lib",
@@ -6,11 +6,10 @@ go_library(
"blockers.go",
"git.go",
"jira.go",
+ "mail.go",
"main.go",
"metadata.go",
- "orchestration.go",
"pick_sha.go",
- "sender.go",
"templates.go",
],
importpath = "github.com/cockroachdb/cockroach/pkg/cmd/release",
@@ -29,14 +28,3 @@ go_binary(
embed = [":release_lib"],
visibility = ["//visibility:public"],
)
-
-go_test(
- name = "release_test",
- srcs = ["sender_test.go"],
- data = glob([
- "templates/**",
- "testdata/**",
- ]),
- embed = [":release_lib"],
- deps = ["@com_github_stretchr_testify//require"],
-)
diff --git a/pkg/cmd/release/blockers.go b/pkg/cmd/release/blockers.go
index aa7069a1c4..8fc164f81f 100644
--- a/pkg/cmd/release/blockers.go
+++ b/pkg/cmd/release/blockers.go
@@ -13,34 +13,29 @@ package main
import (
"fmt"
"os"
- "time"
"github.com/spf13/cobra"
)
const (
- // dateLayout is the expected date format for prepDate and publishDate input values.
- dateLayout = "2006-01-02"
-
- // prepDate is the date when we expect to select the release candidate.
- prepDate = "prep-date"
- // publishDate is the date when we expect to publish the release candidate.
- publishDate = "publish-date"
-
- // nextVersion can be left out for stable/patch releases, but needs to be passed in for pre-releases (alpha/beta/rc).
- nextVersion = "next-version"
+ envSMTPUser = "SMTP_USER"
+ envSMTPPassword = "SMTP_PASSWORD"
+ envGithubToken = "GITHUB_TOKEN"
+ releaseSeries = "release-series"
+ smtpUser = "smtp-user"
+ smtpHost = "smtp-host"
+ smtpPort = "smtp-port"
+ emailAddresses = "to"
+ dryRun = "dry-run"
)
var blockersFlags = struct {
releaseSeries string
- templatesDir string
- prepDate string
- publishDate string
- nextVersion string
smtpUser string
smtpHost string
smtpPort int
emailAddresses []string
+ dryRun bool
}{}
var postReleaseSeriesBlockersCmd = &cobra.Command{
@@ -51,20 +46,16 @@ var postReleaseSeriesBlockersCmd = &cobra.Command{
}
func init() {
+ // TODO: improve flag usage comments
postReleaseSeriesBlockersCmd.Flags().StringVar(&blockersFlags.releaseSeries, releaseSeries, "", "major release series")
- postReleaseSeriesBlockersCmd.Flags().StringVar(&blockersFlags.templatesDir, templatesDir, "", "templates directory")
- postReleaseSeriesBlockersCmd.Flags().StringVar(&blockersFlags.prepDate, prepDate, "", "date to select candidate")
- postReleaseSeriesBlockersCmd.Flags().StringVar(&blockersFlags.publishDate, publishDate, "", "date to publish candidate")
- postReleaseSeriesBlockersCmd.Flags().StringVar(&blockersFlags.nextVersion, nextVersion, "", "next release version")
postReleaseSeriesBlockersCmd.Flags().StringVar(&blockersFlags.smtpUser, smtpUser, os.Getenv(envSMTPUser), "SMTP user name")
postReleaseSeriesBlockersCmd.Flags().StringVar(&blockersFlags.smtpHost, smtpHost, "", "SMTP host")
postReleaseSeriesBlockersCmd.Flags().IntVar(&blockersFlags.smtpPort, smtpPort, 0, "SMTP port")
postReleaseSeriesBlockersCmd.Flags().StringArrayVar(&blockersFlags.emailAddresses, emailAddresses, []string{}, "email addresses")
+ postReleaseSeriesBlockersCmd.Flags().BoolVar(&blockersFlags.dryRun, dryRun, false, "use dry run Jira project for issues")
_ = postReleaseSeriesBlockersCmd.MarkFlagRequired(releaseSeries)
- _ = postReleaseSeriesBlockersCmd.MarkFlagRequired(templatesDir)
- _ = postReleaseSeriesBlockersCmd.MarkFlagRequired(prepDate)
- _ = postReleaseSeriesBlockersCmd.MarkFlagRequired(publishDate)
+ _ = postReleaseSeriesBlockersCmd.MarkFlagRequired(smtpUser)
_ = postReleaseSeriesBlockersCmd.MarkFlagRequired(smtpHost)
_ = postReleaseSeriesBlockersCmd.MarkFlagRequired(smtpPort)
_ = postReleaseSeriesBlockersCmd.MarkFlagRequired(emailAddresses)
@@ -79,70 +70,9 @@ func fetchReleaseSeriesBlockers(_ *cobra.Command, _ []string) error {
if githubToken == "" {
return fmt.Errorf("%s environment variable should be set", envGithubToken)
}
- if blockersFlags.smtpUser == "" {
- return fmt.Errorf("either %s environment variable or %s flag should be set", envSMTPUser, smtpUser)
- }
- releasePrepDate, err := time.Parse(dateLayout, blockersFlags.prepDate)
- if err != nil {
- return fmt.Errorf("%s is not parseable into %s date layout", blockersFlags.prepDate, dateLayout)
- }
- releasePublishDate, err := time.Parse(dateLayout, blockersFlags.publishDate)
- if err != nil {
- return fmt.Errorf("%s is not parseable into %s date layout", blockersFlags.publishDate, dateLayout)
- }
- if blockersFlags.nextVersion == "" {
- var err error
- blockersFlags.nextVersion, err = findNextVersion(blockersFlags.releaseSeries)
- if err != nil {
- return fmt.Errorf("cannot find next release version: %w", err)
- }
- }
- // TODO(celia): (future PR) fetch blockers and use real data instead of `temp*` values.
- fmt.Println("TODO(celia): fetch blockers")
- tempTotalBlockers := 23
- tempBlockerList := []ProjectBlocker{
- {
- ProjectName: "Project ABC",
- NumBlockers: 11,
- },
- {
- ProjectName: "Project XYZ",
- NumBlockers: 12,
- },
- }
+ // TODO(celia): fetch blockers and send out email
+ fmt.Println("TODO(celia): fetch blockers and send out email")
- blockersURL := "go.crdb.dev/blockers/" + blockersFlags.releaseSeries
- releaseBranch := "release-" + blockersFlags.releaseSeries
-
- // TODO(celia): (future PR) dynamically set branchExists, based on whether `releaseBranch` branch exists in crdb repo
- branchExists := true
- if !branchExists {
- blockersURL = "go.crdb.dev/blockers"
- releaseBranch = "master"
- }
- args := messageDataPostBlockers{
- Version: blockersFlags.nextVersion,
- PrepDate: releasePrepDate.Format("Monday, January 2"),
- ReleaseDate: releasePublishDate.Format("Monday, January 2"),
- TotalBlockers: tempTotalBlockers,
- BlockersURL: blockersURL,
- ReleaseBranch: releaseBranch,
- BlockerList: tempBlockerList,
- }
- opts := sendOpts{
- templatesDir: blockersFlags.templatesDir,
- from: fmt.Sprintf("Justin Beaver <%s>", blockersFlags.smtpUser),
- host: blockersFlags.smtpHost,
- port: blockersFlags.smtpPort,
- user: blockersFlags.smtpUser,
- password: smtpPassword,
- to: blockersFlags.emailAddresses,
- }
-
- fmt.Println("Sending email")
- if err := sendMailPostBlockers(args, opts); err != nil {
- return fmt.Errorf("cannot send email: %w", err)
- }
return nil
}
diff --git a/pkg/cmd/release/git.go b/pkg/cmd/release/git.go
index 869e87cd53..778b531c99 100644
--- a/pkg/cmd/release/git.go
+++ b/pkg/cmd/release/git.go
@@ -31,19 +31,6 @@ type releaseInfo struct {
releaseSeries string
}
-// findNextVersion returns the next release version for given releaseSeries.
-func findNextVersion(releaseSeries string) (string, error) {
- prevReleaseVersion, err := findPreviousRelease(releaseSeries)
- if err != nil {
- return "", fmt.Errorf("cannot find previous release: %w", err)
- }
- nextReleaseVersion, err := bumpVersion(prevReleaseVersion)
- if err != nil {
- return "", fmt.Errorf("cannot bump version: %w", err)
- }
- return nextReleaseVersion, nil
-}
-
// findNextRelease finds all required information for the next release.
func findNextRelease(releaseSeries string) (releaseInfo, error) {
prevReleaseVersion, err := findPreviousRelease(releaseSeries)
diff --git a/pkg/cmd/release/mail.go b/pkg/cmd/release/mail.go
new file mode 100644
index 0000000000..fb632ba200
--- /dev/null
+++ b/pkg/cmd/release/mail.go
@@ -0,0 +1,94 @@
+// Copyright 2019 The Cockroach Authors.
+//
+// Use of this software is governed by the Business Source License
+// included in the file licenses/BSL.txt.
+//
+// As of the Change Date specified in that file, in accordance with
+// the Business Source License, use of this software will be governed
+// by the Apache License, Version 2.0, included in the file
+// licenses/APL.txt.
+
+package main
+
+import (
+ "fmt"
+ htmltemplate "html/template"
+ "net/smtp"
+ "net/textproto"
+
+ "github.com/jordan-wright/email"
+)
+
+var emailSubjectTemplate = "Release {{ .Version }}"
+var emailTextTemplate = `
+A candidate SHA has been selected for {{ .Version }}. Proceeding to qualification shortly.
+
+ SHA: {{ .SHA }}
+ Tracking Issue: {{ .TrackingIssueURL }}
+ List of changes since last release: {{ .DiffURL }}
+
+Thanks
+Release Engineering
+`
+var emailHTMLTemplate = `
+<html>
+<body>
+<p>A candidate SHA has been selected for <strong>{{ .Version }}</strong>. Proceeding to qualification shortly.</p>
+<ul>
+<li>SHA: <a href="https://github.com/cockroachlabs/release-staging/commit/{{ .SHA }}">{{ .SHA }}</a></li>
+<li>Tracking Issue: <a href="{{ .TrackingIssueURL }}">{{ .TrackingIssue }}</a></li>
+<li><a href="{{ .DiffURL }}">List of changes</a> since last release</li>
+</ul>
+<p>Thanks,<br />
+Release Engineering</p>
+</body>
+</html>
+`
+
+type emailArgs struct {
+ Version string
+ SHA string
+ TrackingIssue string
+ TrackingIssueURL htmltemplate.URL
+ DiffURL htmltemplate.URL
+}
+
+type smtpOpts struct {
+ host string
+ port int
+ user string
+ password string
+ from string
+ to []string
+}
+
+// sendmail creates and sends an email to the releases mailing list
+func sendmail(args emailArgs, smtpOpts smtpOpts) error {
+ text, err := templateToText(emailTextTemplate, args)
+ if err != nil {
+ return fmt.Errorf("cannot use text template: %w", err)
+ }
+ subject, err := templateToText(emailSubjectTemplate, args)
+ if err != nil {
+ return fmt.Errorf("cannot use subject template: %w", err)
+ }
+ html, err := templateToHTML(emailHTMLTemplate, args)
+ if err != nil {
+ return fmt.Errorf("cannot use html template: %w", err)
+ }
+
+ e := &email.Email{
+ To: smtpOpts.to,
+ From: smtpOpts.from,
+ Subject: subject,
+ Text: []byte(text),
+ HTML: []byte(html),
+ Headers: textproto.MIMEHeader{},
+ }
+ emailAuth := smtp.PlainAuth("", smtpOpts.user, smtpOpts.password, smtpOpts.host)
+ addr := fmt.Sprintf("%s:%d", smtpOpts.host, smtpOpts.port)
+ if err := e.Send(addr, emailAuth); err != nil {
+ return fmt.Errorf("cannot send email: %w", err)
+ }
+ return nil
+}
diff --git a/pkg/cmd/release/main.go b/pkg/cmd/release/main.go
index 69c67f02ba..be2e193f3a 100644
--- a/pkg/cmd/release/main.go
+++ b/pkg/cmd/release/main.go
@@ -14,19 +14,6 @@ import "github.com/spf13/cobra"
var rootCmd = &cobra.Command{Use: "release"}
-const (
- envSMTPUser = "SMTP_USER"
- envSMTPPassword = "SMTP_PASSWORD"
- envGithubToken = "GITHUB_TOKEN"
- releaseSeries = "release-series"
- templatesDir = "template-dir"
- smtpUser = "smtp-user"
- smtpHost = "smtp-host"
- smtpPort = "smtp-port"
- emailAddresses = "to"
- dryRun = "dry-run"
-)
-
func main() {
if err := rootCmd.Execute(); err != nil {
panic(err)
@@ -36,5 +23,4 @@ func main() {
func init() {
rootCmd.AddCommand(pickSHACmd)
rootCmd.AddCommand(postReleaseSeriesBlockersCmd)
- rootCmd.AddCommand(setOrchestrationVersionCmd)
}
diff --git a/pkg/cmd/release/orchestration.go b/pkg/cmd/release/orchestration.go
deleted file mode 100644
index 0dfc901c81..0000000000
--- a/pkg/cmd/release/orchestration.go
+++ /dev/null
@@ -1,100 +0,0 @@
-// Copyright 2019 The Cockroach Authors.
-//
-// Use of this software is governed by the Business Source License
-// included in the file licenses/BSL.txt.
-//
-// As of the Change Date specified in that file, in accordance with
-// the Business Source License, use of this software will be governed
-// by the Apache License, Version 2.0, included in the file
-// licenses/APL.txt.
-
-package main
-
-import (
- "errors"
- "fmt"
- "io/ioutil"
- "os"
- "path"
- "path/filepath"
- "strings"
-
- "github.com/spf13/cobra"
-)
-
-const (
- versionFlag = "version"
- templatesDirFlag = "template-dir"
- outputDirFlag = "output-dir"
-)
-
-var orchestrationFlags = struct {
- version string
- templatesDir string
- outputDir string
-}{}
-
-var setOrchestrationVersionCmd = &cobra.Command{
- Use: "set-orchestration-version",
- Short: "Set orchestration version",
- Long: "Updates orchestration version under the ./cloud/kubernetes directory",
- RunE: setOrchestrationVersion,
-}
-
-func init() {
- setOrchestrationVersionCmd.Flags().StringVar(&orchestrationFlags.version, versionFlag, "", "cockroachdb version")
- setOrchestrationVersionCmd.Flags().StringVar(&orchestrationFlags.templatesDir, templatesDirFlag, "",
- "orchestration templates directory")
- setOrchestrationVersionCmd.Flags().StringVar(&orchestrationFlags.outputDir, outputDirFlag, "",
- "orchestration directory")
- _ = setOrchestrationVersionCmd.MarkFlagRequired(versionFlag)
- _ = setOrchestrationVersionCmd.MarkFlagRequired(templatesDirFlag)
- _ = setOrchestrationVersionCmd.MarkFlagRequired(outputDirFlag)
-}
-
-func setOrchestrationVersion(_ *cobra.Command, _ []string) error {
- dirInfo, err := os.Stat(orchestrationFlags.templatesDir)
- if err != nil {
- return fmt.Errorf("cannot stat templates directory: %w", err)
- }
- if !dirInfo.IsDir() {
- return fmt.Errorf("%s is not a directory", orchestrationFlags.templatesDir)
- }
- return filepath.Walk(orchestrationFlags.templatesDir, func(filePath string, fileInfo os.FileInfo, e error) error {
- if e != nil {
- return e
- }
- // Skip directories
- if !fileInfo.Mode().IsRegular() {
- return nil
- }
- // calculate file directory relative to the given root directory.
- dir := path.Dir(filePath)
- relDir, err := filepath.Rel(orchestrationFlags.templatesDir, dir)
- if err != nil {
- return err
- }
- destDir := filepath.Join(orchestrationFlags.outputDir, relDir)
- destFile := filepath.Join(destDir, fileInfo.Name())
- if err := os.MkdirAll(destDir, 0755); err != nil && !errors.Is(err, os.ErrExist) {
- return err
- }
- contents, err := ioutil.ReadFile(filePath)
- if err != nil {
- return err
- }
- // Go templates cannot be used here, because some files are templates already.
- generatedContents := strings.ReplaceAll(string(contents), "@VERSION@", orchestrationFlags.version)
- if err != nil {
- return err
- }
- if strings.HasSuffix(destFile, ".yaml") {
- generatedContents = fmt.Sprintf("# Generated file, DO NOT EDIT. Source: %s\n", filePath) + generatedContents
- }
- err = ioutil.WriteFile(destFile, []byte(generatedContents), fileInfo.Mode().Perm())
- if err != nil {
- return err
- }
- return nil
- })
-}
diff --git a/pkg/cmd/release/pick_sha.go b/pkg/cmd/release/pick_sha.go
index 4cd48e166b..620f27d3c8 100644
--- a/pkg/cmd/release/pick_sha.go
+++ b/pkg/cmd/release/pick_sha.go
@@ -19,20 +19,12 @@ import (
"github.com/spf13/cobra"
)
-const (
- qualifyBucket = "qualify-bucket"
- qualifyObjectPrefix = "qualify-object-prefix"
- releaseBucket = "release-bucket"
- releaseObjectPrefix = "release-object-prefix"
-)
-
var pickSHAFlags = struct {
qualifyBucket string
qualifyObjectPrefix string
releaseBucket string
releaseObjectPrefix string
releaseSeries string
- templatesDir string
smtpUser string
smtpHost string
smtpPort int
@@ -50,29 +42,27 @@ var pickSHACmd = &cobra.Command{
func init() {
// TODO: improve flag usage comments
- pickSHACmd.Flags().StringVar(&pickSHAFlags.qualifyBucket, qualifyBucket, "", "release qualification metadata GCS bucket")
- pickSHACmd.Flags().StringVar(&pickSHAFlags.qualifyObjectPrefix, qualifyObjectPrefix, "",
+ pickSHACmd.Flags().StringVar(&pickSHAFlags.qualifyBucket, "qualify-bucket", "", "release qualification metadata GCS bucket")
+ pickSHACmd.Flags().StringVar(&pickSHAFlags.qualifyObjectPrefix, "qualify-object-prefix", "",
"release qualification object prefix")
- pickSHACmd.Flags().StringVar(&pickSHAFlags.releaseBucket, releaseBucket, "", "release candidates metadata GCS bucket")
- pickSHACmd.Flags().StringVar(&pickSHAFlags.releaseObjectPrefix, releaseObjectPrefix, "", "release candidate object prefix")
- pickSHACmd.Flags().StringVar(&pickSHAFlags.releaseSeries, releaseSeries, "", "major release series")
- pickSHACmd.Flags().StringVar(&pickSHAFlags.templatesDir, templatesDir, "", "templates directory")
- pickSHACmd.Flags().StringVar(&pickSHAFlags.smtpUser, smtpUser, os.Getenv(envSMTPUser), "SMTP user name")
- pickSHACmd.Flags().StringVar(&pickSHAFlags.smtpHost, smtpHost, "", "SMTP host")
- pickSHACmd.Flags().IntVar(&pickSHAFlags.smtpPort, smtpPort, 0, "SMTP port")
- pickSHACmd.Flags().StringArrayVar(&pickSHAFlags.emailAddresses, emailAddresses, []string{}, "email addresses")
- pickSHACmd.Flags().BoolVar(&pickSHAFlags.dryRun, dryRun, false, "use dry run Jira project for issues")
-
+ pickSHACmd.Flags().StringVar(&pickSHAFlags.releaseBucket, "release-bucket", "", "release candidates metadata GCS bucket")
+ pickSHACmd.Flags().StringVar(&pickSHAFlags.releaseObjectPrefix, "release-object-prefix", "", "release candidate object prefix")
+ pickSHACmd.Flags().StringVar(&pickSHAFlags.releaseSeries, "release-series", "", "major release series")
+ pickSHACmd.Flags().StringVar(&pickSHAFlags.smtpUser, "smtp-user", os.Getenv("SMTP_USER"), "SMTP user name")
+ pickSHACmd.Flags().StringVar(&pickSHAFlags.smtpHost, "smtp-host", "", "SMTP host")
+ pickSHACmd.Flags().IntVar(&pickSHAFlags.smtpPort, "smtp-port", 0, "SMTP port")
+ pickSHACmd.Flags().StringArrayVar(&pickSHAFlags.emailAddresses, "to", []string{}, "email addresses")
+ pickSHACmd.Flags().BoolVar(&pickSHAFlags.dryRun, "dry-run", false, "use dry run Jira project for issues")
requiredFlags := []string{
- qualifyBucket,
- qualifyObjectPrefix,
- releaseBucket,
- releaseObjectPrefix,
- releaseSeries,
- smtpUser,
- smtpHost,
- smtpPort,
- emailAddresses,
+ "qualify-bucket",
+ "qualify-object-prefix",
+ "release-bucket",
+ "release-object-prefix",
+ "release-series",
+ "smtp-user",
+ "smtp-host",
+ "smtp-port",
+ "to",
}
for _, flag := range requiredFlags {
if err := pickSHACmd.MarkFlagRequired(flag); err != nil {
@@ -130,24 +120,23 @@ func pickSHA(_ *cobra.Command, _ []string) error {
fmt.Sprintf("https://github.com/cockroachdb/cockroach/compare/%s...%s",
nextRelease.prevReleaseVersion,
nextRelease.buildInfo.SHA))
- args := messageDataPickSHA{
+ args := emailArgs{
Version: nextRelease.nextReleaseVersion,
SHA: nextRelease.buildInfo.SHA,
TrackingIssue: trackingIssue.Key,
TrackingIssueURL: template.URL(trackingIssue.url()),
DiffURL: diffURL,
}
- opts := sendOpts{
- templatesDir: pickSHAFlags.templatesDir,
- from: fmt.Sprintf("Justin Beaver <%s>", pickSHAFlags.smtpUser),
- host: pickSHAFlags.smtpHost,
- port: pickSHAFlags.smtpPort,
- user: pickSHAFlags.smtpUser,
- password: smtpPassword,
- to: pickSHAFlags.emailAddresses,
+ opts := smtpOpts{
+ from: fmt.Sprintf("Justin Beaver <%s>", pickSHAFlags.smtpUser),
+ host: pickSHAFlags.smtpHost,
+ port: pickSHAFlags.smtpPort,
+ user: pickSHAFlags.smtpUser,
+ password: smtpPassword,
+ to: pickSHAFlags.emailAddresses,
}
fmt.Println("Sending email")
- if err := sendMailPickSHA(args, opts); err != nil {
+ if err := sendmail(args, opts); err != nil {
return fmt.Errorf("cannot send email: %w", err)
}
return nil
diff --git a/pkg/cmd/release/sender.go b/pkg/cmd/release/sender.go
deleted file mode 100644
index b090f86b94..0000000000
--- a/pkg/cmd/release/sender.go
+++ /dev/null
@@ -1,189 +0,0 @@
-// Copyright 2019 The Cockroach Authors.
-//
-// Use of this software is governed by the Business Source License
-// included in the file licenses/BSL.txt.
-//
-// As of the Change Date specified in that file, in accordance with
-// the Business Source License, use of this software will be governed
-// by the Apache License, Version 2.0, included in the file
-// licenses/APL.txt.
-
-package main
-
-import (
- "fmt"
- htmltemplate "html/template"
- "net/smtp"
- "net/textproto"
- "os"
- "path/filepath"
- "strings"
-
- "github.com/jordan-wright/email"
-)
-
-const (
- templatePrefixPickSHA = "pick-sha"
- templatePrefixPostBlockers = "post-blockers"
- templatePrefixPostBlockersAlpha = "post-blockers.alpha"
-)
-
-type messageDataPickSHA struct {
- Version string
- SHA string
- TrackingIssue string
- TrackingIssueURL htmltemplate.URL
- DiffURL htmltemplate.URL
-}
-
-// ProjectBlocker lists the number of blockers per project.
-// This needs to be public in order to be used by the html/template engine.
-type ProjectBlocker struct {
- ProjectName string
- NumBlockers int
-}
-
-type messageDataPostBlockers struct {
- Version string
- PrepDate string
- ReleaseDate string
- TotalBlockers int
- BlockersURL string
- ReleaseBranch string
- BlockerList []ProjectBlocker
-}
-
-type postBlockerTemplateArgs struct {
- BackportsUseBackboard bool
- BackportsWeeklyTriageReview bool
-}
-
-type message struct {
- Subject string
- TextBody string
- HTMLBody string
-}
-
-func loadTemplate(templatesDir, template string) (string, error) {
- file, err := os.ReadFile(filepath.Join(templatesDir, template))
- if err != nil {
- return "", fmt.Errorf("loadTemplate %s: %w", template, err)
- }
- return string(file), nil
-}
-
-// newMessage generates new message parts, based on:
-// - templatePrefix - the filename prefix for subject/txt/html templates in the ./templates/ folder
-// - data - the data object applied to the html/text/subject templates
-func newMessage(templatesDir string, templatePrefix string, data interface{}) (*message, error) {
- subjectTemplate, err := loadTemplate(templatesDir, templatePrefix+".subject")
- if err != nil {
- return nil, err
- }
- subject, err := templateToText(subjectTemplate, data)
- if err != nil {
- return nil, fmt.Errorf("templateToText %s: %w", templatePrefix+".subject", err)
- }
-
- textTemplate, err := loadTemplate(templatesDir, templatePrefix+".txt")
- if err != nil {
- return nil, err
- }
- text, err := templateToText(textTemplate, data)
- if err != nil {
- return nil, fmt.Errorf("templateToText %s: %w", templatePrefix+".txt", err)
- }
-
- htmlTemplate, err := loadTemplate(templatesDir, templatePrefix+".gohtml")
- if err != nil {
- return nil, err
- }
- html, err := templateToHTML(htmlTemplate, data)
- if err != nil {
- return nil, fmt.Errorf("templateToHTML %s: %w", templatePrefix+".gohtml", err)
- }
-
- return &message{
- Subject: subject,
- TextBody: text,
- HTMLBody: html,
- }, nil
-}
-
-type sendOpts struct {
- templatesDir string
- host string
- port int
- user string
- password string
- from string
- to []string
-}
-
-func sendMailPostBlockers(args messageDataPostBlockers, opts sendOpts) error {
- templatePrefix := templatePrefixPostBlockers
-
- // Backport policy:
- // - stable/production: refer to backboard
- // - alpha: no need to mention backports
- // - beta/rc's: backports reviewed during triage meeting
- backportsUseBackboard := false
- backportsWeeklyTriageReview := false
-
- switch {
- case strings.Contains(args.Version, "-alpha."):
- // alpha copy is so different that we'll use a completely separate template.
- templatePrefix = templatePrefixPostBlockersAlpha
- case
- strings.Contains(args.Version, "-beta."),
- strings.Contains(args.Version, "-rc."):
- backportsWeeklyTriageReview = true
- default: // stable/production
- backportsUseBackboard = true
- }
-
- data := struct {
- Args messageDataPostBlockers
- Template postBlockerTemplateArgs
- }{
- Args: args,
- Template: postBlockerTemplateArgs{
- BackportsUseBackboard: backportsUseBackboard,
- BackportsWeeklyTriageReview: backportsWeeklyTriageReview,
- },
- }
- msg, err := newMessage(opts.templatesDir, templatePrefix, data)
- if err != nil {
- return fmt.Errorf("newMessage: %w", err)
- }
- return sendmail(msg, opts)
-}
-
-func sendMailPickSHA(args messageDataPickSHA, opts sendOpts) error {
- msg, err := newMessage(opts.templatesDir, templatePrefixPickSHA, args)
- if err != nil {
- return fmt.Errorf("newMessage: %w", err)
- }
- return sendmail(msg, opts)
-}
-
-// sendmail creates and sends an email to the releases mailing list.
-// sendmail is specified as a function closure to allow for testing
-// of sendMail* methods.
-var sendmail = func(content *message, smtpOpts sendOpts) error {
-
- e := &email.Email{
- To: smtpOpts.to,
- From: smtpOpts.from,
- Subject: content.Subject,
- Text: []byte(content.TextBody),
- HTML: []byte(content.HTMLBody),
- Headers: textproto.MIMEHeader{},
- }
- emailAuth := smtp.PlainAuth("", smtpOpts.user, smtpOpts.password, smtpOpts.host)
- addr := fmt.Sprintf("%s:%d", smtpOpts.host, smtpOpts.port)
- if err := e.Send(addr, emailAuth); err != nil {
- return fmt.Errorf("cannot send email: %w", err)
- }
- return nil
-}
diff --git a/pkg/cmd/release/sender_test.go b/pkg/cmd/release/sender_test.go
deleted file mode 100644
index 931cbf93bc..0000000000
--- a/pkg/cmd/release/sender_test.go
+++ /dev/null
@@ -1,259 +0,0 @@
-// Copyright 2019 The Cockroach Authors.
-//
-// Use of this software is governed by the Business Source License
-// included in the file licenses/BSL.txt.
-//
-// As of the Change Date specified in that file, in accordance with
-// the Business Source License, use of this software will be governed
-// by the Apache License, Version 2.0, included in the file
-// licenses/APL.txt.
-
-package main
-
-import (
- "flag"
- "fmt"
- htmltemplate "html/template"
- "io/ioutil"
- "path/filepath"
- "reflect"
- "testing"
-
- "github.com/stretchr/testify/require"
-)
-
-var update = flag.Bool("update", false, "update .golden files")
-
-// hookGlobal sets `*ptr = val` and returns a closure for restoring `*ptr` to
-// its original value. A runtime panic will occur if `val` is not assignable to
-// `*ptr`.
-func hookGlobal(ptr, val interface{}) func() {
- global := reflect.ValueOf(ptr).Elem()
- orig := reflect.New(global.Type()).Elem()
- orig.Set(global)
- global.Set(reflect.ValueOf(val))
- return func() { global.Set(orig) }
-}
-
-func TestPickSHA(t *testing.T) {
- var expectedMessage *message
- defer hookGlobal(
- &sendmail,
- func(content *message, smtpOpts sendOpts) error {
- expectedMessage = content
- return nil
- })()
-
- args := messageDataPickSHA{
- Version: "v21.1.13",
- SHA: "0fd6eead6c6eb7b2529deb39197cc3c95e93ded8",
- TrackingIssue: "REL-111",
- TrackingIssueURL: htmltemplate.URL("https://cockroachlabs.atlassian.net/browse/REL-111"),
- DiffURL: "https://github.com/cockroachdb/cockroach/compare/v21.1.13...0fd6eead6c6eb7b2529deb39197cc3c95e93ded8",
- }
- require.NoError(t, sendMailPickSHA(
- args, sendOpts{templatesDir: "templates"},
- ))
- verifyMessageValues(t, expectedMessage, templatePrefixPickSHA)
-}
-
-func TestPostBlockers(t *testing.T) {
- tests := []struct {
- testCase string
- version string
- blockersPerProject []int
- goldenFilePrefix string
- expectedContains []string
- expectedNotContains []string
- }{
- {
- testCase: "alpha: zero-blockers",
- version: "v19.1.0-alpha.3",
- blockersPerProject: []int{},
- goldenFilePrefix: templatePrefixPostBlockersAlpha + ".zero-blockers",
- expectedContains: []string{
- "We are clear to proceed with preparation and qualification",
- },
- },
- {
- testCase: "alpha: 1-blocker",
- version: "v19.1.0-alpha.3",
- blockersPerProject: []int{1},
- goldenFilePrefix: templatePrefixPostBlockersAlpha + ".1-blocker",
- expectedNotContains: []string{
- "which must be resolved before a candidate is chosen",
- },
- },
- {
- testCase: "alpha: many-blockers",
- version: "v19.1.0-alpha.3",
- blockersPerProject: []int{4, 2, 3},
- goldenFilePrefix: templatePrefixPostBlockersAlpha + ".many-blockers",
- expectedNotContains: []string{
- "which must be resolved before a candidate is chosen",
- "reminder to merge any outstanding backports",
- "reviewed at Monday's triage meeting",
- },
- },
- {
- testCase: "non-alpha: zero-blockers. stable/production: refer to backboard",
- version: "v19.1.11",
- blockersPerProject: []int{},
- goldenFilePrefix: templatePrefixPostBlockers + ".zero-blockers",
- expectedContains: []string{
- "We are clear to proceed with preparation and qualification",
- "backboard.crdb.dev",
- },
- expectedNotContains: []string{
- "reviewed at Monday's triage meeting",
- },
- },
- {
- testCase: "non-alpha: 1-blocker. beta/rc's are reviewed in triage meeting",
- version: "v19.1.0-beta.2",
- blockersPerProject: []int{1},
- goldenFilePrefix: templatePrefixPostBlockers + ".1-blocker",
- expectedContains: []string{
- "which must be resolved before a candidate is chosen",
- "reviewed at Monday's triage meeting",
- },
- expectedNotContains: []string{
- "backboard.crdb.dev",
- },
- },
- {
- testCase: "non-alpha: many-blockers. beta/rc's are reviewed in triage meeting",
- version: "v19.1.0-rc.3",
- blockersPerProject: []int{4, 2, 3},
- goldenFilePrefix: templatePrefixPostBlockers + ".many-blockers",
- expectedContains: []string{
- "which must be resolved before a candidate is chosen",
- "reviewed at Monday's triage meeting",
- },
- expectedNotContains: []string{
- "backboard.crdb.dev",
- },
- },
- }
-
- for _, test := range tests {
- t.Run(fmt.Sprintf(test.testCase), func(t *testing.T) {
- var expectedMessage *message
- defer hookGlobal(
- &sendmail,
- func(content *message, smtpOpts sendOpts) error {
- expectedMessage = content
- return nil
- })()
-
- // generate test data for blockerList
- totalBlockers := 0
- var blockerList []ProjectBlocker
- for i, numBlockers := range test.blockersPerProject {
- blockerList = append(blockerList, ProjectBlocker{
- ProjectName: fmt.Sprintf("Project %d", i+1),
- NumBlockers: numBlockers,
- })
- totalBlockers += numBlockers
- }
- args := messageDataPostBlockers{
- Version: test.version,
- PrepDate: "Saturday, April 1",
- ReleaseDate: "Saturday, April 11",
- TotalBlockers: totalBlockers,
- BlockersURL: "go.crdb.dev/blockers",
- ReleaseBranch: "master",
- BlockerList: blockerList,
- }
- require.NoError(t, sendMailPostBlockers(
- args, sendOpts{templatesDir: "templates"},
- ))
- verifyMessageValues(t, expectedMessage, test.goldenFilePrefix)
- for _, expectedContains := range test.expectedContains {
- require.Contains(t, expectedMessage.HTMLBody, expectedContains)
- require.Contains(t, expectedMessage.TextBody, expectedContains)
- }
- for _, expectedNotContains := range test.expectedNotContains {
- require.NotContains(t, expectedMessage.HTMLBody, expectedNotContains)
- require.NotContains(t, expectedMessage.TextBody, expectedNotContains)
- }
- })
- }
-}
-
-func verifyMessageValue(t *testing.T, expected string, goldenFileName string) {
- templates := mockMailTemplates()
-
- if *update {
- templates.CreateGolden(t, expected, goldenFileName)
- }
-
- goldenFile := templates.GetGolden(t, goldenFileName)
-
- require.Equal(t, goldenFile, expected)
-}
-
-func verifyMessageValues(t *testing.T, msg *message, goldenFilePrefix string) {
- goldenTextFileName := goldenFilePrefix + textBodyGoldenFileSuffix
- goldenHTMLFileName := goldenFilePrefix + htmlBodyGoldenFileSuffix
- goldenSubjectFileName := goldenFilePrefix + subjectGoldenFileSuffix
-
- verifyMessageValue(t, msg.TextBody, goldenTextFileName)
- verifyMessageValue(t, msg.HTMLBody, goldenHTMLFileName)
- verifyMessageValue(t, msg.Subject, goldenSubjectFileName)
-}
-
-type mailTemplates struct {
-}
-
-func mockMailTemplates() *mailTemplates {
- return &mailTemplates{}
-}
-
-const (
- subjectGoldenFileSuffix = ".subject.golden"
- textBodyGoldenFileSuffix = ".txt.golden"
- htmlBodyGoldenFileSuffix = ".html.golden"
- testDataDirectory = "testdata"
-)
-
-/*
-
-Golden Files
-
-Golden files are files containing expected output (like Snapshots in Jest). We use them here to
-compare expected output of email templates to generated output. For every email that is sent there
-are three associated files; an HTML file, a Plain Text file, and a file containing the Subject.
-Testing these files can be done with the convinience method `verifyMessageValues`. For Example,
-
- func blahblahEmailTest (t *testing.T) {
- ...
-
- messageValues := MessageValues{
- Subject: "blah blah subject",
- TextBody: "blah blah text body",
- HtmlBody: "<main>blah blah html stuff</main>",
- }
- goldenFilePrefix := "very_cool_email"
-
- verifyMessageValues(t, MessageValues, goldenFilePrefix)
- }
-
-To generate golden files you may run go test with the update flag
-
- go test -v ./pkg/cmd/release -update
-
-*/
-
-// CreateGolden generates Jest-like snapshots used to verify Mailer message formatting.
-func (mt *mailTemplates) CreateGolden(t *testing.T, actualValue string, filename string) {
- err := ioutil.WriteFile(filepath.Join(testDataDirectory, filename), []byte(actualValue), 0644)
- require.NoError(t, err)
-}
-
-// GetGolden verifies Mailer-generated messages against golden files generated by CreateGolden.
-func (mt *mailTemplates) GetGolden(t *testing.T, filename string) string {
- file, err := ioutil.ReadFile(filepath.Join(testDataDirectory, filename))
- require.NoError(t, err)
- return string(file)
-}
diff --git a/pkg/cmd/release/templates/pick-sha.gohtml b/pkg/cmd/release/templates/pick-sha.gohtml
deleted file mode 100644
index e12b4bacbd..0000000000
--- a/pkg/cmd/release/templates/pick-sha.gohtml
+++ /dev/null
@@ -1,14 +0,0 @@
-<html>
- <body>
- <p>A candidate SHA has been selected for <strong>{{ .Version }}</strong>. Proceeding to qualification shortly.</p>
- <ul>
- <li>SHA: <a href="https://github.com/cockroachlabs/release-staging/commit/{{ .SHA }}">{{ .SHA }}</a></li>
- <li>Tracking Issue: <a href="{{ .TrackingIssueURL }}">{{ .TrackingIssue }}</a></li>
- <li><a href="{{ .DiffURL }}">List of changes</a> since last release</li>
- </ul>
- <p>
- Thanks,<br />
- Release Engineering
- </p>
- </body>
-</html>
diff --git a/pkg/cmd/release/templates/pick-sha.subject b/pkg/cmd/release/templates/pick-sha.subject
deleted file mode 100644
index 27e01766e8..0000000000
--- a/pkg/cmd/release/templates/pick-sha.subject
+++ /dev/null
@@ -1 +0,0 @@
-Release {{ .Version }}
diff --git a/pkg/cmd/release/templates/pick-sha.txt b/pkg/cmd/release/templates/pick-sha.txt
deleted file mode 100644
index 2580241c8c..0000000000
--- a/pkg/cmd/release/templates/pick-sha.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-A candidate SHA has been selected for {{ .Version }}. Proceeding to qualification shortly.
-
- SHA: {{ .SHA }}
- Tracking Issue: {{ .TrackingIssueURL }}
- List of changes since last release: {{ .DiffURL }}
-
-Thanks
-Release Engineering
diff --git a/pkg/cmd/release/templates/post-blockers.alpha.gohtml b/pkg/cmd/release/templates/post-blockers.alpha.gohtml
deleted file mode 100644
index dd214c4268..0000000000
--- a/pkg/cmd/release/templates/post-blockers.alpha.gohtml
+++ /dev/null
@@ -1,54 +0,0 @@
-<html>
- <body>
- <p>Hello!</p>
- <p>
- The <strong>{{ .Args.Version }}</strong> release is scheduled for prep & qualification on
- <strong>{{ .Args.PrepDate }}</strong> for a scheduled release of <strong>{{ .Args.ReleaseDate }}</strong>.
- </p>
- <!-- ----- current blocker status and next steps ----- -->
- {{ if (eq .Args.TotalBlockers 0) }}
- <p>
- There are currently <strong>0 open release blockers</strong>
- (<a href='https://{{ .Args.BlockersURL }}'>{{ .Args.BlockersURL }}</a>)&nbsp;<span font-size='70%%'>🎉</span>.
- We are clear to proceed with preparation and qualification.
- </p>
- {{ else }}
- <p>
- <strong>Respective teams: please review blockers below</strong> to assess if any of these cause
- known data loss or corruption[1]. Otherwise, we will proceed to prep & qualify on Tuesday
- <strong>even if there are open release blockers</strong>.
- </p>
- <!-- grammar-specific conditional ("there is" vs "there are") -->
- {{ if (eq .Args.TotalBlockers 1) }}
- <p>
- There is currently <strong>1 open release blocker</strong>
- (<a href='https://{{ .Args.BlockersURL }}'>{{ .Args.BlockersURL }}</a>):
- </p>
- {{ else }}
- <p>
- There are currently <strong>{{ .Args.TotalBlockers }} open release blockers</strong>
- (<a href='https://{{ .Args.BlockersURL }}'>{{ .Args.BlockersURL }}</a>):
- </p>
- {{ end }}
- <ul>
- {{ range .Args.BlockerList }}
- <li>{{ .ProjectName }}: {{ .NumBlockers }}</li>
- {{ end }}
- </ul>
- <p>
- [1] As per our <a href='https://cockroachlabs.atlassian.net/wiki/spaces/ENG/pages/869990521/Release+Process+Terms+and+Definitions'
- >terms & definitions</a>, alphas/betas <strong>should not cause data loss or corruption</strong>,
- but may contain both known and unknown issues including speed or performance issues. As with other alphas:
- <ul>
- <li>there would be no expectation that it would be used in prod or needs to be supported</li>
- <li>alpha clusters must be wiped, and cannot be upgraded to 22.1 betas, RCs or final production release</li>
- <li>alphas/betas may contain both known and unknown issues including speed or performance issues</li>
- </ul>
- </p>
- {{ end }}
- <p>
- Thanks,<br />
- Release Engineering
- </p>
- </body>
-</html>
diff --git a/pkg/cmd/release/templates/post-blockers.alpha.subject b/pkg/cmd/release/templates/post-blockers.alpha.subject
deleted file mode 100644
index e7681e6323..0000000000
--- a/pkg/cmd/release/templates/post-blockers.alpha.subject
+++ /dev/null
@@ -1 +0,0 @@
-Release {{ .Args.Version }}
diff --git a/pkg/cmd/release/templates/post-blockers.alpha.txt b/pkg/cmd/release/templates/post-blockers.alpha.txt
deleted file mode 100644
index 7ae158e591..0000000000
--- a/pkg/cmd/release/templates/post-blockers.alpha.txt
+++ /dev/null
@@ -1,36 +0,0 @@
-Hello!
-
-The {{ .Args.Version }} release is scheduled for prep & qualification on {{ .Args.PrepDate }} for a scheduled release of {{ .Args.ReleaseDate }}.
-{{
- if (eq .Args.TotalBlockers 0)
-}}
-There are currently 0 open release blockers \o/ (https://{{ .Args.BlockersURL }}). We are clear to proceed with preparation and qualification.
-{{
- else
-}}
-Respective teams: please review blockers below to assess if any of these cause known data loss or corruption[1]. Otherwise, we will proceed to prep & qualify on Tuesday even if there are open release blockers.
-{{
- if (eq .Args.TotalBlockers 1)
-}}
-There is currently 1 open release blocker (https://{{ .Args.BlockersURL }}):{{
- else
-}}
-There are currently {{ .Args.TotalBlockers }} open release blockers (https://{{ .Args.BlockersURL }}):{{
- end
-}}
-{{
- range .Args.BlockerList
-}} - {{ .ProjectName }}: {{ .NumBlockers }}
-{{
- end
-}}
-[1] As per our terms & definitions, alphas/betas should not cause data loss or corruption, but may contain both known and unknown issues including speed or performance issues. As with other alphas:
- - there would be no expectation that it would be used in prod or needs to be supported
- - alpha clusters must be wiped, and cannot be upgraded to 22.1 betas, RCs or final production release
- - alphas/betas may contain both known and unknown issues including speed or performance issues
-See our Release Terms & Conditions: https://cockroachlabs.atlassian.net/wiki/spaces/ENG/pages/869990521/Release+Process+Terms+and+Definitions
-{{
- end
-}}
-Thanks,
-Release Engineering
diff --git a/pkg/cmd/release/templates/post-blockers.gohtml b/pkg/cmd/release/templates/post-blockers.gohtml
deleted file mode 100644
index a0ad43f58a..0000000000
--- a/pkg/cmd/release/templates/post-blockers.gohtml
+++ /dev/null
@@ -1,57 +0,0 @@
-<html>
- <body>
- <p>Hello!</p>
- <p>
- The <strong>{{ .Args.Version }}</strong> release is scheduled for prep & qualification on
- <strong>{{ .Args.PrepDate }}</strong> for a scheduled release of <strong>{{ .Args.ReleaseDate }}</strong>.
- </p>
- <!-- ----- backport direction, which depends on release-type ----- -->
- {{ if .Template.BackportsUseBackboard }}
- <p>
- This is a reminder to merge any outstanding backports that need to be included in the next release.
- Please refer to
- <a href='https://backboard.crdb.dev/?branch={{ .Args.ReleaseBranch }}&repo=386372623925772289'
- >backboard</a> to identify any such backports.
- </p>
- {{ end }}
- {{ if .Template.BackportsWeeklyTriageReview }}
- <p>
- With the
- <a href='https://go.crdb.dev/backport-policy-branch-cut'>Restrictive Backport Policy</a> in effect,
- backports for {{ .Args.ReleaseBranch }} will be reviewed at Monday's triage meeting.
- </p>
- {{ end }}
- <!-- ----- current blocker status and next steps ----- -->
- {{ if (eq .Args.TotalBlockers 0) }}
- <p>
- There are currently <strong>0 open release blockers</strong>
- (<a href='https://{{ .Args.BlockersURL }}'>{{ .Args.BlockersURL }}</a>)&nbsp;<span font-size='70%%'>🎉</span>.
- We are clear to proceed with preparation and qualification.
- </p>
- {{ else }}
- <!-- grammar-specific conditional ("there is" vs "there are") -->
- {{ if (eq .Args.TotalBlockers 1) }}
- <p>
- There is currently <strong>1 open release blocker</strong>
- (<a href='https://{{ .Args.BlockersURL }}'>{{ .Args.BlockersURL }}</a>),
- which must be resolved before a candidate is chosen:
- </p>
- {{ else }}
- <p>
- There are currently <strong>{{ .Args.TotalBlockers }} open release blockers</strong>
- (<a href='https://{{ .Args.BlockersURL }}'>{{ .Args.BlockersURL }}</a>),
- which must be resolved before a candidate is chosen:
- </p>
- {{ end }}
- <ul>
- {{ range .Args.BlockerList }}
- <li>{{ .ProjectName }}: {{ .NumBlockers }}</li>
- {{ end }}
- </ul>
- {{ end }}
- <p>
- Thanks,<br />
- Release Engineering
- </p>
- </body>
-</html>
diff --git a/pkg/cmd/release/templates/post-blockers.subject b/pkg/cmd/release/templates/post-blockers.subject
deleted file mode 100644
index e7681e6323..0000000000
--- a/pkg/cmd/release/templates/post-blockers.subject
+++ /dev/null
@@ -1 +0,0 @@
-Release {{ .Args.Version }}
diff --git a/pkg/cmd/release/templates/post-blockers.txt b/pkg/cmd/release/templates/post-blockers.txt
deleted file mode 100644
index a6c2a932ef..0000000000
--- a/pkg/cmd/release/templates/post-blockers.txt
+++ /dev/null
@@ -1,40 +0,0 @@
-Hello!
-
-The {{ .Args.Version }} release is scheduled for prep & qualification on {{ .Args.PrepDate }} for a scheduled release of {{ .Args.ReleaseDate }}.
-{{
- if .Template.BackportsUseBackboard
-}}
-This is a reminder to merge any outstanding backports that need to be included in the next release. Please refer to backboard to identify any such backports, https://backboard.crdb.dev/?branch={{ .Args.ReleaseBranch }}&repo=386372623925772289
-{{
- end
-}}{{
- if .Template.BackportsWeeklyTriageReview
-}}
-With the Restrictive Backport Policy in effect, backports for {{ .Args.ReleaseBranch }} will be reviewed at Monday's triage meeting. Backport policy: https://go.crdb.dev/backport-policy-branch-cut.
-{{
- end
-}}{{
- if (eq .Args.TotalBlockers 0)
-}}
-There are currently 0 open release blockers \o/ (https://{{ .Args.BlockersURL }}). We are clear to proceed with preparation and qualification.
-{{
- else
-}}{{
- if (eq .Args.TotalBlockers 1)
-}}
-There is currently 1 open release blocker (https://{{ .Args.BlockersURL }}), which must be resolved before a candidate is chosen:{{
- else
-}}
-There are currently {{ .Args.TotalBlockers }} open release blockers (https://{{ .Args.BlockersURL }}), which must be resolved before a candidate is chosen:{{
- end
-}}
-{{
- range .Args.BlockerList
-}} - {{ .ProjectName }}: {{ .NumBlockers }}
-{{
- end
-}}{{
- end
-}}
-Thanks,
-Release Engineering
diff --git a/pkg/cmd/release/testdata/pick-sha.html.golden b/pkg/cmd/release/testdata/pick-sha.html.golden
deleted file mode 100644
index 7dd9512497..0000000000
--- a/pkg/cmd/release/testdata/pick-sha.html.golden
+++ /dev/null
@@ -1,14 +0,0 @@
-<html>
- <body>
- <p>A candidate SHA has been selected for <strong>v21.1.13</strong>. Proceeding to qualification shortly.</p>
- <ul>
- <li>SHA: <a href="https://github.com/cockroachlabs/release-staging/commit/0fd6eead6c6eb7b2529deb39197cc3c95e93ded8">0fd6eead6c6eb7b2529deb39197cc3c95e93ded8</a></li>
- <li>Tracking Issue: <a href="https://cockroachlabs.atlassian.net/browse/REL-111">REL-111</a></li>
- <li><a href="https://github.com/cockroachdb/cockroach/compare/v21.1.13...0fd6eead6c6eb7b2529deb39197cc3c95e93ded8">List of changes</a> since last release</li>
- </ul>
- <p>
- Thanks,<br />
- Release Engineering
- </p>
- </body>
-</html>
diff --git a/pkg/cmd/release/testdata/pick-sha.subject.golden b/pkg/cmd/release/testdata/pick-sha.subject.golden
deleted file mode 100644
index f9ca091ed1..0000000000
--- a/pkg/cmd/release/testdata/pick-sha.subject.golden
+++ /dev/null
@@ -1 +0,0 @@
-Release v21.1.13
diff --git a/pkg/cmd/release/testdata/pick-sha.txt.golden b/pkg/cmd/release/testdata/pick-sha.txt.golden
deleted file mode 100644
index 9a3651a945..0000000000
--- a/pkg/cmd/release/testdata/pick-sha.txt.golden
+++ /dev/null
@@ -1,8 +0,0 @@
-A candidate SHA has been selected for v21.1.13. Proceeding to qualification shortly.
-
- SHA: 0fd6eead6c6eb7b2529deb39197cc3c95e93ded8
- Tracking Issue: https://cockroachlabs.atlassian.net/browse/REL-111
- List of changes since last release: https://github.com/cockroachdb/cockroach/compare/v21.1.13...0fd6eead6c6eb7b2529deb39197cc3c95e93ded8
-
-Thanks
-Release Engineering
diff --git a/pkg/cmd/release/testdata/post-blockers.1-blocker.html.golden b/pkg/cmd/release/testdata/post-blockers.1-blocker.html.golden
deleted file mode 100644
index b746358fd0..0000000000
--- a/pkg/cmd/release/testdata/post-blockers.1-blocker.html.golden
+++ /dev/null
@@ -1,38 +0,0 @@
-<html>
- <body>
- <p>Hello!</p>
- <p>
- The <strong>v19.1.0-beta.2</strong> release is scheduled for prep & qualification on
- <strong>Saturday, April 1</strong> for a scheduled release of <strong>Saturday, April 11</strong>.
- </p>
-
-
-
- <p>
- With the
- <a href='https://go.crdb.dev/backport-policy-branch-cut'>Restrictive Backport Policy</a> in effect,
- backports for master will be reviewed at Monday's triage meeting.
- </p>
-
-
-
-
-
- <p>
- There is currently <strong>1 open release blocker</strong>
- (<a href='https://go.crdb.dev/blockers'>go.crdb.dev/blockers</a>),
- which must be resolved before a candidate is chosen:
- </p>
-
- <ul>
-
- <li>Project 1: 1</li>
-
- </ul>
-
- <p>
- Thanks,<br />
- Release Engineering
- </p>
- </body>
-</html>
diff --git a/pkg/cmd/release/testdata/post-blockers.1-blocker.subject.golden b/pkg/cmd/release/testdata/post-blockers.1-blocker.subject.golden
deleted file mode 100644
index e168458a12..0000000000
--- a/pkg/cmd/release/testdata/post-blockers.1-blocker.subject.golden
+++ /dev/null
@@ -1 +0,0 @@
-Release v19.1.0-beta.2
diff --git a/pkg/cmd/release/testdata/post-blockers.1-blocker.txt.golden b/pkg/cmd/release/testdata/post-blockers.1-blocker.txt.golden
deleted file mode 100644
index 6049f4dc92..0000000000
--- a/pkg/cmd/release/testdata/post-blockers.1-blocker.txt.golden
+++ /dev/null
@@ -1,11 +0,0 @@
-Hello!
-
-The v19.1.0-beta.2 release is scheduled for prep & qualification on Saturday, April 1 for a scheduled release of Saturday, April 11.
-
-With the Restrictive Backport Policy in effect, backports for master will be reviewed at Monday's triage meeting. Backport policy: https://go.crdb.dev/backport-policy-branch-cut.
-
-There is currently 1 open release blocker (https://go.crdb.dev/blockers), which must be resolved before a candidate is chosen:
- - Project 1: 1
-
-Thanks,
-Release Engineering
diff --git a/pkg/cmd/release/testdata/post-blockers.alpha.1-blocker.html.golden b/pkg/cmd/release/testdata/post-blockers.alpha.1-blocker.html.golden
deleted file mode 100644
index c5f6a06944..0000000000
--- a/pkg/cmd/release/testdata/post-blockers.alpha.1-blocker.html.golden
+++ /dev/null
@@ -1,43 +0,0 @@
-<html>
- <body>
- <p>Hello!</p>
- <p>
- The <strong>v19.1.0-alpha.3</strong> release is scheduled for prep & qualification on
- <strong>Saturday, April 1</strong> for a scheduled release of <strong>Saturday, April 11</strong>.
- </p>
-
-
- <p>
- <strong>Respective teams: please review blockers below</strong> to assess if any of these cause
- known data loss or corruption[1]. Otherwise, we will proceed to prep & qualify on Tuesday
- <strong>even if there are open release blockers</strong>.
- </p>
-
-
- <p>
- There is currently <strong>1 open release blocker</strong>
- (<a href='https://go.crdb.dev/blockers'>go.crdb.dev/blockers</a>):
- </p>
-
- <ul>
-
- <li>Project 1: 1</li>
-
- </ul>
- <p>
- [1] As per our <a href='https://cockroachlabs.atlassian.net/wiki/spaces/ENG/pages/869990521/Release+Process+Terms+and+Definitions'
- >terms & definitions</a>, alphas/betas <strong>should not cause data loss or corruption</strong>,
- but may contain both known and unknown issues including speed or performance issues. As with other alphas:
- <ul>
- <li>there would be no expectation that it would be used in prod or needs to be supported</li>
- <li>alpha clusters must be wiped, and cannot be upgraded to 22.1 betas, RCs or final production release</li>
- <li>alphas/betas may contain both known and unknown issues including speed or performance issues</li>
- </ul>
- </p>
-
- <p>
- Thanks,<br />
- Release Engineering
- </p>
- </body>
-</html>
diff --git a/pkg/cmd/release/testdata/post-blockers.alpha.1-blocker.subject.golden b/pkg/cmd/release/testdata/post-blockers.alpha.1-blocker.subject.golden
deleted file mode 100644
index ff4d866600..0000000000
--- a/pkg/cmd/release/testdata/post-blockers.alpha.1-blocker.subject.golden
+++ /dev/null
@@ -1 +0,0 @@
-Release v19.1.0-alpha.3
diff --git a/pkg/cmd/release/testdata/post-blockers.alpha.1-blocker.txt.golden b/pkg/cmd/release/testdata/post-blockers.alpha.1-blocker.txt.golden
deleted file mode 100644
index b61bde0964..0000000000
--- a/pkg/cmd/release/testdata/post-blockers.alpha.1-blocker.txt.golden
+++ /dev/null
@@ -1,17 +0,0 @@
-Hello!
-
-The v19.1.0-alpha.3 release is scheduled for prep & qualification on Saturday, April 1 for a scheduled release of Saturday, April 11.
-
-Respective teams: please review blockers below to assess if any of these cause known data loss or corruption[1]. Otherwise, we will proceed to prep & qualify on Tuesday even if there are open release blockers.
-
-There is currently 1 open release blocker (https://go.crdb.dev/blockers):
- - Project 1: 1
-
-[1] As per our terms & definitions, alphas/betas should not cause data loss or corruption, but may contain both known and unknown issues including speed or performance issues. As with other alphas:
- - there would be no expectation that it would be used in prod or needs to be supported
- - alpha clusters must be wiped, and cannot be upgraded to 22.1 betas, RCs or final production release
- - alphas/betas may contain both known and unknown issues including speed or performance issues
-See our Release Terms & Conditions: https://cockroachlabs.atlassian.net/wiki/spaces/ENG/pages/869990521/Release+Process+Terms+and+Definitions
-
-Thanks,
-Release Engineering
diff --git a/pkg/cmd/release/testdata/post-blockers.alpha.many-blockers.html.golden b/pkg/cmd/release/testdata/post-blockers.alpha.many-blockers.html.golden
deleted file mode 100644
index 9b29adb3be..0000000000
--- a/pkg/cmd/release/testdata/post-blockers.alpha.many-blockers.html.golden
+++ /dev/null
@@ -1,47 +0,0 @@
-<html>
- <body>
- <p>Hello!</p>
- <p>
- The <strong>v19.1.0-alpha.3</strong> release is scheduled for prep & qualification on
- <strong>Saturday, April 1</strong> for a scheduled release of <strong>Saturday, April 11</strong>.
- </p>
-
-
- <p>
- <strong>Respective teams: please review blockers below</strong> to assess if any of these cause
- known data loss or corruption[1]. Otherwise, we will proceed to prep & qualify on Tuesday
- <strong>even if there are open release blockers</strong>.
- </p>
-
-
- <p>
- There are currently <strong>9 open release blockers</strong>
- (<a href='https://go.crdb.dev/blockers'>go.crdb.dev/blockers</a>):
- </p>
-
- <ul>
-
- <li>Project 1: 4</li>
-
- <li>Project 2: 2</li>
-
- <li>Project 3: 3</li>
-
- </ul>
- <p>
- [1] As per our <a href='https://cockroachlabs.atlassian.net/wiki/spaces/ENG/pages/869990521/Release+Process+Terms+and+Definitions'
- >terms & definitions</a>, alphas/betas <strong>should not cause data loss or corruption</strong>,
- but may contain both known and unknown issues including speed or performance issues. As with other alphas:
- <ul>
- <li>there would be no expectation that it would be used in prod or needs to be supported</li>
- <li>alpha clusters must be wiped, and cannot be upgraded to 22.1 betas, RCs or final production release</li>
- <li>alphas/betas may contain both known and unknown issues including speed or performance issues</li>
- </ul>
- </p>
-
- <p>
- Thanks,<br />
- Release Engineering
- </p>
- </body>
-</html>
diff --git a/pkg/cmd/release/testdata/post-blockers.alpha.many-blockers.subject.golden b/pkg/cmd/release/testdata/post-blockers.alpha.many-blockers.subject.golden
deleted file mode 100644
index ff4d866600..0000000000
--- a/pkg/cmd/release/testdata/post-blockers.alpha.many-blockers.subject.golden
+++ /dev/null
@@ -1 +0,0 @@
-Release v19.1.0-alpha.3
diff --git a/pkg/cmd/release/testdata/post-blockers.alpha.many-blockers.txt.golden b/pkg/cmd/release/testdata/post-blockers.alpha.many-blockers.txt.golden
deleted file mode 100644
index cf990c7331..0000000000
--- a/pkg/cmd/release/testdata/post-blockers.alpha.many-blockers.txt.golden
+++ /dev/null
@@ -1,19 +0,0 @@
-Hello!
-
-The v19.1.0-alpha.3 release is scheduled for prep & qualification on Saturday, April 1 for a scheduled release of Saturday, April 11.
-
-Respective teams: please review blockers below to assess if any of these cause known data loss or corruption[1]. Otherwise, we will proceed to prep & qualify on Tuesday even if there are open release blockers.
-
-There are currently 9 open release blockers (https://go.crdb.dev/blockers):
- - Project 1: 4
- - Project 2: 2
- - Project 3: 3
-
-[1] As per our terms & definitions, alphas/betas should not cause data loss or corruption, but may contain both known and unknown issues including speed or performance issues. As with other alphas:
- - there would be no expectation that it would be used in prod or needs to be supported
- - alpha clusters must be wiped, and cannot be upgraded to 22.1 betas, RCs or final production release
- - alphas/betas may contain both known and unknown issues including speed or performance issues
-See our Release Terms & Conditions: https://cockroachlabs.atlassian.net/wiki/spaces/ENG/pages/869990521/Release+Process+Terms+and+Definitions
-
-Thanks,
-Release Engineering
diff --git a/pkg/cmd/release/testdata/post-blockers.alpha.zero-blockers.html.golden b/pkg/cmd/release/testdata/post-blockers.alpha.zero-blockers.html.golden
deleted file mode 100644
index 5c6115f1de..0000000000
--- a/pkg/cmd/release/testdata/post-blockers.alpha.zero-blockers.html.golden
+++ /dev/null
@@ -1,21 +0,0 @@
-<html>
- <body>
- <p>Hello!</p>
- <p>
- The <strong>v19.1.0-alpha.3</strong> release is scheduled for prep & qualification on
- <strong>Saturday, April 1</strong> for a scheduled release of <strong>Saturday, April 11</strong>.
- </p>
-
-
- <p>
- There are currently <strong>0 open release blockers</strong>
- (<a href='https://go.crdb.dev/blockers'>go.crdb.dev/blockers</a>)&nbsp;<span font-size='70%%'>🎉</span>.
- We are clear to proceed with preparation and qualification.
- </p>
-
- <p>
- Thanks,<br />
- Release Engineering
- </p>
- </body>
-</html>
diff --git a/pkg/cmd/release/testdata/post-blockers.alpha.zero-blockers.subject.golden b/pkg/cmd/release/testdata/post-blockers.alpha.zero-blockers.subject.golden
deleted file mode 100644
index ff4d866600..0000000000
--- a/pkg/cmd/release/testdata/post-blockers.alpha.zero-blockers.subject.golden
+++ /dev/null
@@ -1 +0,0 @@
-Release v19.1.0-alpha.3
diff --git a/pkg/cmd/release/testdata/post-blockers.alpha.zero-blockers.txt.golden b/pkg/cmd/release/testdata/post-blockers.alpha.zero-blockers.txt.golden
deleted file mode 100644
index 428fcad1d5..0000000000
--- a/pkg/cmd/release/testdata/post-blockers.alpha.zero-blockers.txt.golden
+++ /dev/null
@@ -1,8 +0,0 @@
-Hello!
-
-The v19.1.0-alpha.3 release is scheduled for prep & qualification on Saturday, April 1 for a scheduled release of Saturday, April 11.
-
-There are currently 0 open release blockers \o/ (https://go.crdb.dev/blockers). We are clear to proceed with preparation and qualification.
-
-Thanks,
-Release Engineering
diff --git a/pkg/cmd/release/testdata/post-blockers.many-blockers.html.golden b/pkg/cmd/release/testdata/post-blockers.many-blockers.html.golden
deleted file mode 100644
index 1be3d57191..0000000000
--- a/pkg/cmd/release/testdata/post-blockers.many-blockers.html.golden
+++ /dev/null
@@ -1,42 +0,0 @@
-<html>
- <body>
- <p>Hello!</p>
- <p>
- The <strong>v19.1.0-rc.3</strong> release is scheduled for prep & qualification on
- <strong>Saturday, April 1</strong> for a scheduled release of <strong>Saturday, April 11</strong>.
- </p>
-
-
-
- <p>
- With the
- <a href='https://go.crdb.dev/backport-policy-branch-cut'>Restrictive Backport Policy</a> in effect,
- backports for master will be reviewed at Monday's triage meeting.
- </p>
-
-
-
-
-
- <p>
- There are currently <strong>9 open release blockers</strong>
- (<a href='https://go.crdb.dev/blockers'>go.crdb.dev/blockers</a>),
- which must be resolved before a candidate is chosen:
- </p>
-
- <ul>
-
- <li>Project 1: 4</li>
-
- <li>Project 2: 2</li>
-
- <li>Project 3: 3</li>
-
- </ul>
-
- <p>
- Thanks,<br />
- Release Engineering
- </p>
- </body>
-</html>
diff --git a/pkg/cmd/release/testdata/post-blockers.many-blockers.subject.golden b/pkg/cmd/release/testdata/post-blockers.many-blockers.subject.golden
deleted file mode 100644
index 3c0e921ce0..0000000000
--- a/pkg/cmd/release/testdata/post-blockers.many-blockers.subject.golden
+++ /dev/null
@@ -1 +0,0 @@
-Release v19.1.0-rc.3
diff --git a/pkg/cmd/release/testdata/post-blockers.many-blockers.txt.golden b/pkg/cmd/release/testdata/post-blockers.many-blockers.txt.golden
deleted file mode 100644
index e8ed81da2f..0000000000
--- a/pkg/cmd/release/testdata/post-blockers.many-blockers.txt.golden
+++ /dev/null
@@ -1,13 +0,0 @@
-Hello!
-
-The v19.1.0-rc.3 release is scheduled for prep & qualification on Saturday, April 1 for a scheduled release of Saturday, April 11.
-
-With the Restrictive Backport Policy in effect, backports for master will be reviewed at Monday's triage meeting. Backport policy: https://go.crdb.dev/backport-policy-branch-cut.
-
-There are currently 9 open release blockers (https://go.crdb.dev/blockers), which must be resolved before a candidate is chosen:
- - Project 1: 4
- - Project 2: 2
- - Project 3: 3
-
-Thanks,
-Release Engineering
diff --git a/pkg/cmd/release/testdata/post-blockers.zero-blockers.html.golden b/pkg/cmd/release/testdata/post-blockers.zero-blockers.html.golden
deleted file mode 100644
index 75e04ac841..0000000000
--- a/pkg/cmd/release/testdata/post-blockers.zero-blockers.html.golden
+++ /dev/null
@@ -1,31 +0,0 @@
-<html>
- <body>
- <p>Hello!</p>
- <p>
- The <strong>v19.1.11</strong> release is scheduled for prep & qualification on
- <strong>Saturday, April 1</strong> for a scheduled release of <strong>Saturday, April 11</strong>.
- </p>
-
-
- <p>
- This is a reminder to merge any outstanding backports that need to be included in the next release.
- Please refer to
- <a href='https://backboard.crdb.dev/?branch=master&repo=386372623925772289'
- >backboard</a> to identify any such backports.
- </p>
-
-
-
-
- <p>
- There are currently <strong>0 open release blockers</strong>
- (<a href='https://go.crdb.dev/blockers'>go.crdb.dev/blockers</a>)&nbsp;<span font-size='70%%'>🎉</span>.
- We are clear to proceed with preparation and qualification.
- </p>
-
- <p>
- Thanks,<br />
- Release Engineering
- </p>
- </body>
-</html>
diff --git a/pkg/cmd/release/testdata/post-blockers.zero-blockers.subject.golden b/pkg/cmd/release/testdata/post-blockers.zero-blockers.subject.golden
deleted file mode 100644
index bd85bcf500..0000000000
--- a/pkg/cmd/release/testdata/post-blockers.zero-blockers.subject.golden
+++ /dev/null
@@ -1 +0,0 @@
-Release v19.1.11
diff --git a/pkg/cmd/release/testdata/post-blockers.zero-blockers.txt.golden b/pkg/cmd/release/testdata/post-blockers.zero-blockers.txt.golden
deleted file mode 100644
index 87f7181424..0000000000
--- a/pkg/cmd/release/testdata/post-blockers.zero-blockers.txt.golden
+++ /dev/null
@@ -1,10 +0,0 @@
-Hello!
-
-The v19.1.11 release is scheduled for prep & qualification on Saturday, April 1 for a scheduled release of Saturday, April 11.
-
-This is a reminder to merge any outstanding backports that need to be included in the next release. Please refer to backboard to identify any such backports, https://backboard.crdb.dev/?branch=master&repo=386372623925772289
-
-There are currently 0 open release blockers \o/ (https://go.crdb.dev/blockers). We are clear to proceed with preparation and qualification.
-
-Thanks,
-Release Engineering
diff --git a/pkg/cmd/roachtest/BUILD.bazel b/pkg/cmd/roachtest/BUILD.bazel
index 9f6fdf8fd1..79eef8d40c 100644
--- a/pkg/cmd/roachtest/BUILD.bazel
+++ b/pkg/cmd/roachtest/BUILD.bazel
@@ -74,7 +74,6 @@ go_test(
"//pkg/cmd/roachtest/test",
"//pkg/roachprod/logger",
"//pkg/testutils",
- "//pkg/util/quotapool",
"//pkg/util/stop",
"//pkg/util/syncutil",
"//pkg/util/version",
diff --git a/pkg/cmd/roachtest/cluster.go b/pkg/cmd/roachtest/cluster.go
index 5abf38d35d..c4e867b37c 100644
--- a/pkg/cmd/roachtest/cluster.go
+++ b/pkg/cmd/roachtest/cluster.go
@@ -818,15 +818,6 @@ func createFlagsOverride(flags *pflag.FlagSet, opts *vm.CreateOpts) {
}
}
-// clusterMock creates a cluster to be used for (self) testing.
-func (f *clusterFactory) clusterMock(cfg clusterConfig) *clusterImpl {
- return &clusterImpl{
- name: f.genName(cfg),
- expiration: timeutil.Now().Add(24 * time.Hour),
- r: f.r,
- }
-}
-
// newCluster creates a new roachprod cluster.
//
// setStatus is called with status messages indicating the stage of cluster
@@ -845,8 +836,12 @@ func (f *clusterFactory) newCluster(
}
if cfg.spec.NodeCount == 0 {
- // For tests, use a mock cluster.
- c := f.clusterMock(cfg)
+ // For tests. Return the minimum that makes them happy.
+ c := &clusterImpl{
+ name: f.genName(cfg),
+ expiration: timeutil.Now().Add(24 * time.Hour),
+ r: f.r,
+ }
if err := f.r.registerCluster(c); err != nil {
return nil, err
}
@@ -1367,7 +1362,7 @@ WHERE t.status NOT IN ('RANGE_CONSISTENT', 'RANGE_INDETERMINATE')`)
// crdb_internal.check_consistency(true, '', '') indicates that any ranges'
// replicas are inconsistent with each other. It uses the first node that
// is up to run the query.
-func (c *clusterImpl) FailOnReplicaDivergence(ctx context.Context, t *testImpl) {
+func (c *clusterImpl) FailOnReplicaDivergence(ctx context.Context, t test.Test) {
if c.spec.NodeCount < 1 {
return // unit tests
}
diff --git a/pkg/cmd/roachtest/main.go b/pkg/cmd/roachtest/main.go
index fdbe9c817a..28eca72c22 100644
--- a/pkg/cmd/roachtest/main.go
+++ b/pkg/cmd/roachtest/main.go
@@ -415,7 +415,7 @@ func runTests(register func(registry.Registry), cfg cliCfg) error {
err = runner.Run(
ctx, tests, cfg.count, cfg.parallelism, opt,
testOpts{versionsBinaryOverride: cfg.versionsBinaryOverride},
- lopt, nil /* clusterAllocator */)
+ lopt)
// Make sure we attempt to clean up. We run with a non-canceled ctx; the
// ctx above might be canceled in case a signal was received. If that's
diff --git a/pkg/cmd/roachtest/prometheus/BUILD.bazel b/pkg/cmd/roachtest/prometheus/BUILD.bazel
index bcdf3635b8..3046155493 100644
--- a/pkg/cmd/roachtest/prometheus/BUILD.bazel
+++ b/pkg/cmd/roachtest/prometheus/BUILD.bazel
@@ -9,8 +9,6 @@ go_library(
deps = [
"//pkg/cmd/roachtest/option",
"//pkg/roachprod/logger",
- "@com_github_prometheus_client_golang//api/prometheus/v1:prometheus",
- "@com_github_prometheus_common//model",
"@in_gopkg_yaml_v2//:yaml_v2",
],
)
diff --git a/pkg/cmd/roachtest/prometheus/prometheus.go b/pkg/cmd/roachtest/prometheus/prometheus.go
index dda5b932ca..7f0d520713 100644
--- a/pkg/cmd/roachtest/prometheus/prometheus.go
+++ b/pkg/cmd/roachtest/prometheus/prometheus.go
@@ -21,16 +21,9 @@ import (
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/option"
"github.com/cockroachdb/cockroach/pkg/roachprod/logger"
- promv1 "github.com/prometheus/client_golang/api/prometheus/v1"
- "github.com/prometheus/common/model"
"gopkg.in/yaml.v2"
)
-// Client is an interface allowing queries against Prometheus.
-type Client interface {
- Query(ctx context.Context, query string, ts time.Time) (model.Value, promv1.Warnings, error)
-}
-
// ScrapeNode are nodes to scrape from.
type ScrapeNode struct {
Nodes option.NodeListOption
diff --git a/pkg/cmd/roachtest/registry/owners.go b/pkg/cmd/roachtest/registry/owners.go
index 8f3e61822f..c0f2c6bce9 100644
--- a/pkg/cmd/roachtest/registry/owners.go
+++ b/pkg/cmd/roachtest/registry/owners.go
@@ -27,5 +27,4 @@ const (
OwnerSQLSchema Owner = `sql-schema`
OwnerStorage Owner = `storage`
OwnerTestEng Owner = `test-eng`
- OwnerDevInf Owner = `dev-inf`
)
diff --git a/pkg/cmd/roachtest/test_runner.go b/pkg/cmd/roachtest/test_runner.go
index 877022769e..9655eba8f7 100644
--- a/pkg/cmd/roachtest/test_runner.go
+++ b/pkg/cmd/roachtest/test_runner.go
@@ -20,13 +20,13 @@ import (
"net"
"net/http"
"os"
+ "os/exec"
"path/filepath"
"runtime"
"sort"
"strconv"
"strings"
"sync"
- "sync/atomic"
"time"
"github.com/cockroachdb/cockroach/pkg/cmd/internal/issues"
@@ -50,7 +50,6 @@ import (
)
var errTestsFailed = fmt.Errorf("some tests failed")
-var errClusterProvisioningFailed = fmt.Errorf("some clusters could not be created")
// testRunner runs tests.
type testRunner struct {
@@ -95,9 +94,6 @@ type testRunner struct {
// completed maintains information on all completed test runs.
completed []completedTestInfo
}
-
- // Counts cluster creation errors across all workers.
- numClusterErrs int32
}
// newTestRunner constructs a testRunner.
@@ -181,7 +177,6 @@ func (r *testRunner) Run(
clustersOpt clustersOpt,
topt testOpts,
lopt loggingOpt,
- clusterAllocator clusterAllocatorFn,
) error {
// Validate options.
if len(tests) == 0 {
@@ -225,8 +220,58 @@ func (r *testRunner) Run(
}
}
}
- if clusterAllocator == nil {
- clusterAllocator = defaultClusterAllocator(r, clustersOpt, lopt)
+
+ var numConcurrentClusterCreations int
+ if cloud == "aws" {
+ // AWS has ridiculous API calls limits, so we're going to create one cluster
+ // at a time. Internally, roachprod has throttling for the calls required to
+ // create a single cluster.
+ numConcurrentClusterCreations = 1
+ } else {
+ numConcurrentClusterCreations = 1000
+ }
+ clusterFactory := newClusterFactory(
+ clustersOpt.user, clustersOpt.clusterID, lopt.artifactsDir, r.cr, numConcurrentClusterCreations)
+
+ // allocateCluster will be used by workers to create new clusters (or to attach
+ // to an existing one).
+ allocateCluster := func(
+ ctx context.Context,
+ t registry.TestSpec,
+ alloc *quotapool.IntAlloc,
+ artifactsDir string,
+ wStatus *workerStatus,
+ ) (*clusterImpl, error) {
+ wStatus.SetStatus("creating cluster")
+ defer wStatus.SetStatus("")
+
+ lopt.l.PrintfCtx(ctx, "Creating new cluster for test %s: %s", t.Name, t.Cluster)
+
+ existingClusterName := clustersOpt.clusterName
+ if existingClusterName != "" {
+ // Logs for attaching to a cluster go to a dedicated log file.
+ logPath := filepath.Join(artifactsDir, runnerLogsDir, "cluster-create", existingClusterName+".log")
+ clusterL, err := logger.RootLogger(logPath, lopt.tee)
+ if err != nil {
+ return nil, err
+ }
+ defer clusterL.Close()
+ opt := attachOpt{
+ skipValidation: r.config.skipClusterValidationOnAttach,
+ skipStop: r.config.skipClusterStopOnAttach,
+ skipWipe: r.config.skipClusterWipeOnAttach,
+ }
+ return attachToExistingCluster(ctx, existingClusterName, clusterL, t.Cluster, opt, r.cr)
+ }
+
+ cfg := clusterConfig{
+ spec: t.Cluster,
+ artifactsDir: artifactsDir,
+ username: clustersOpt.user,
+ localCluster: clustersOpt.typ == localCluster,
+ alloc: alloc,
+ }
+ return clusterFactory.newCluster(ctx, cfg, wStatus.SetStatus, lopt.tee)
}
// Seed the default rand source so that different runs get different cluster
@@ -249,25 +294,23 @@ func (r *testRunner) Run(
qp := quotapool.NewIntPool("cloud cpu", uint64(clustersOpt.cpuQuota))
l := lopt.l
- var wg sync.WaitGroup
+ var wg sync.WaitGroup
for i := 0; i < parallelism; i++ {
i := i // Copy for closure.
wg.Add(1)
if err := r.stopper.RunAsyncTask(ctx, "worker", func(ctx context.Context) {
defer wg.Done()
- err := r.runWorker(
+ if err := r.runWorker(
ctx, fmt.Sprintf("w%d", i) /* name */, r.work, qp,
r.stopper.ShouldQuiesce(),
clustersOpt.keepClustersOnTestFailure,
- lopt.artifactsDir, lopt.literalArtifactsDir, lopt.tee, lopt.stdout,
- clusterAllocator,
+ lopt.artifactsDir, lopt.literalArtifactsDir, lopt.runnerLogPath, lopt.tee, lopt.stdout,
+ allocateCluster,
topt,
l,
- )
-
- if err != nil {
+ ); err != nil {
// A worker returned an error. Let's shut down.
msg := fmt.Sprintf("Worker %d returned with error. Quiescing. Error: %v", i, err)
shout(ctx, l, lopt.stdout, msg)
@@ -300,83 +343,12 @@ func (r *testRunner) Run(
}
passFailLine := r.generateReport()
shout(ctx, l, lopt.stdout, passFailLine)
-
- if r.numClusterErrs > 0 {
- shout(ctx, l, lopt.stdout, "%d clusters could not be created", r.numClusterErrs)
- return errClusterProvisioningFailed
- }
-
if len(r.status.fail) > 0 {
return errTestsFailed
}
return nil
}
-// N.B. currently this value is hardcoded per cloud provider.
-func numConcurrentClusterCreations() int {
- var res int
- if cloud == "aws" {
- // AWS has ridiculous API calls limits, so we're going to create one cluster
- // at a time. Internally, roachprod has throttling for the calls required to
- // create a single cluster.
- res = 1
- } else {
- res = 1000
- }
- return res
-}
-
-// defaultClusterAllocator is used by workers to create new clusters (or to attach
-// to an existing one).
-//
-// N.B. the resulting clusterAllocatorFn reuses the same clusterFactory to allocate clusters.
-func defaultClusterAllocator(
- r *testRunner, clustersOpt clustersOpt, lopt loggingOpt,
-) clusterAllocatorFn {
- clusterFactory := newClusterFactory(
- clustersOpt.user, clustersOpt.clusterID, lopt.artifactsDir, r.cr, numConcurrentClusterCreations())
-
- allocateCluster := func(
- ctx context.Context,
- t registry.TestSpec,
- alloc *quotapool.IntAlloc,
- artifactsDir string,
- wStatus *workerStatus,
- ) (*clusterImpl, error) {
- wStatus.SetStatus("creating cluster")
- defer wStatus.SetStatus("")
-
- existingClusterName := clustersOpt.clusterName
- if existingClusterName != "" {
- // Logs for attaching to a cluster go to a dedicated log file.
- logPath := filepath.Join(artifactsDir, runnerLogsDir, "cluster-create", existingClusterName+".log")
- clusterL, err := logger.RootLogger(logPath, lopt.tee)
- if err != nil {
- return nil, err
- }
- defer clusterL.Close()
- opt := attachOpt{
- skipValidation: r.config.skipClusterValidationOnAttach,
- skipStop: r.config.skipClusterStopOnAttach,
- skipWipe: r.config.skipClusterWipeOnAttach,
- }
- lopt.l.PrintfCtx(ctx, "Attaching to existing cluster %s for test %s", existingClusterName, t.Name)
- return attachToExistingCluster(ctx, existingClusterName, clusterL, t.Cluster, opt, r.cr)
- }
- lopt.l.PrintfCtx(ctx, "Creating new cluster for test %s: %s", t.Name, t.Cluster)
-
- cfg := clusterConfig{
- spec: t.Cluster,
- artifactsDir: artifactsDir,
- username: clustersOpt.user,
- localCluster: clustersOpt.typ == localCluster,
- alloc: alloc,
- }
- return clusterFactory.newCluster(ctx, cfg, wStatus.SetStatus, lopt.tee)
- }
- return allocateCluster
-}
-
type clusterAllocatorFn func(
ctx context.Context,
t registry.TestSpec,
@@ -396,18 +368,14 @@ type clusterAllocatorFn func(
// nonresponsive to timeout case) which might still be running and doing
// arbitrary things to the cluster it was using.
//
-// If a cluster cannot be provisioned (owing to an infrastructure issue), the corresponding
-// test is skipped; the provisioning error is posted to github; the count of cluster provisioning
-// errors is incremented.
-//
-// runWorker returns either error (other than cluster provisioning) or the count of cluster provisioning errors.
-//
// Args:
// name: The worker's name, to be used as a prefix for log messages.
// artifactsRootDir: The artifacts dir. Each test's logs are going to be under a
// run_<n> dir. If empty, test log files will not be created.
// literalArtifactsDir: The literal on-agent path where artifacts are stored.
// Only used for teamcity[publishArtifacts] messages.
+// testRunnerLogPath: The path to the test runner's log. It will be copied to
+// failing tests' artifacts dir if running under TeamCity.
// stdout: The Writer to use for messages that need to go to stdout (e.g. the
// "=== RUN" and "--- FAIL" lines).
// teeOpt: The teeing option for future test loggers.
@@ -421,6 +389,7 @@ func (r *testRunner) runWorker(
debug bool,
artifactsRootDir string,
literalArtifactsDir string,
+ testRunnerLogPath string,
teeOpt logger.TeeOptType,
stdout io.Writer,
allocateCluster clusterAllocatorFn,
@@ -434,6 +403,7 @@ func (r *testRunner) runWorker(
}()
var c *clusterImpl // The cluster currently being used.
+ var err error
// When this method returns we'll destroy the cluster we had at the time.
// Note that, if debug was set, c has been set to nil.
defer func() {
@@ -453,6 +423,7 @@ func (r *testRunner) runWorker(
l.PrintfCtx(ctx, "Worker exiting with canceled ctx. Not destroying cluster.")
}
}()
+
// Loop until there's no more work in the pool, we get interrupted, or an
// error occurs.
for {
@@ -476,71 +447,24 @@ func (r *testRunner) runWorker(
}
}
var testToRun testToRunRes
- var err error
-
wStatus.SetTest(nil /* test */, testToRunRes{})
wStatus.SetStatus("getting work")
- testToRun, err = r.getWork(
+ testToRun, c, err = r.getWork(
ctx, work, qp, c, interrupt, l,
getWorkCallbacks{
+ createCluster: func(ctx context.Context, ttr testToRunRes) (*clusterImpl, error) {
+ wStatus.SetTest(nil /* test */, ttr)
+ return allocateCluster(ctx, ttr.spec, ttr.alloc, artifactsRootDir, wStatus)
+ },
onDestroy: func() {
wStatus.SetCluster(nil)
},
})
- if err != nil {
- // Problem selecting a test, bail out.
+ if err != nil || testToRun.noWork {
return err
}
- if testToRun.noWork {
- shout(ctx, l, stdout, "no work remaining; runWorker is bailing out...")
- return nil
- }
- // Attempt to reuse existing cluster.
- if c != nil && testToRun.canReuseCluster {
- err = func() error {
- l.PrintfCtx(ctx, "Using existing cluster: %s. Wiping", c.name)
- if err := c.WipeE(ctx, l); err != nil {
- return err
- }
- if err := c.RunE(ctx, c.All(), "rm -rf "+perfArtifactsDir); err != nil {
- return errors.Wrapf(err, "failed to remove perf artifacts dir")
- }
- if c.localCertsDir != "" {
- if err := os.RemoveAll(c.localCertsDir); err != nil {
- return errors.Wrapf(err,
- "failed to remove local certs in %s", c.localCertsDir)
- }
- c.localCertsDir = ""
- }
- // Overwrite the spec of the cluster with the one coming from the test. In
- // particular, this overwrites the reuse policy to reflect what the test
- // intends to do with it.
- c.spec = testToRun.spec.Cluster
- return nil
- }()
- if err != nil {
- // N.B. handle any error during reuse attempt as clusterCreateErr.
- shout(ctx, l, stdout, "Unable to reuse cluster: %s due to: %s. Will attempt to create a fresh one",
- c.Name(), err)
- atomic.AddInt32(&r.numClusterErrs, 1)
- // Let's attempt to create a fresh one.
- testToRun.canReuseCluster = false
- }
- }
- var clusterCreateErr error
-
- if !testToRun.canReuseCluster {
- // Create a new cluster if can't reuse or reuse attempt failed.
- // N.B. non-reusable cluster would have been destroyed above.
- wStatus.SetTest(nil /* test */, testToRun)
- wStatus.SetStatus("creating cluster")
- c, clusterCreateErr = allocateCluster(ctx, testToRun.spec, testToRun.alloc, artifactsRootDir, wStatus)
- if clusterCreateErr != nil {
- atomic.AddInt32(&r.numClusterErrs, 1)
- shout(ctx, l, stdout, "Unable to create (or reuse) cluster for test %s due to: %s.",
- testToRun.spec.Name, clusterCreateErr)
- }
- }
+ c.status("running test")
+
// Prepare the test's logger.
logPath := ""
var artifactsDir string
@@ -571,51 +495,28 @@ func (r *testRunner) runWorker(
l: testL,
versionsBinaryOverride: topt.versionsBinaryOverride,
}
+ // Tell the cluster that, from now on, it will be run "on behalf of this
+ // test".
+ c.setTest(t)
+ wStatus.SetCluster(c)
+ wStatus.SetTest(t, testToRun)
+ wStatus.SetStatus("running test")
+
// Now run the test.
l.PrintfCtx(ctx, "starting test: %s:%d", testToRun.spec.Name, testToRun.runNum)
-
- if clusterCreateErr != nil {
- // N.B. cluster creation must have failed...
- // We don't want to prematurely abort the test suite since it's likely a transient issue.
- // Instead, let's report an infrastructure issue, mark the test as failed and continue with the next test.
- // Note, we fake the test name so that all cluster creation errors are posted to the same github issue.
- oldName := t.spec.Name
- oldOwner := t.spec.Owner
- // Generate failure reason and mark the test failed to preclude fetching (cluster) artifacts.
- t.printAndFail(0, clusterCreateErr)
- issueOutput := "test %s was skipped due to %s"
- issueOutput = fmt.Sprintf(issueOutput, oldName, t.FailureMsg())
- // N.B. issue title is of the form "roachtest: ${t.spec.Name} failed" (see UnitTestFormatter).
- t.spec.Name = "cluster_creation"
- t.spec.Owner = registry.OwnerDevInf
- r.maybePostGithubIssue(ctx, l, t, stdout, issueOutput)
- // Restore test name and owner.
- t.spec.Name = oldName
- t.spec.Owner = oldOwner
- } else {
- // Tell the cluster that, from now on, it will be run "on behalf of this
- // test".
- c.status("running test")
- c.setTest(t)
- wStatus.SetCluster(c)
- wStatus.SetTest(t, testToRun)
- wStatus.SetStatus("running test")
-
- err = r.runTest(ctx, t, testToRun.runNum, testToRun.runCount, c, stdout, testL)
- }
-
- if err != nil {
+ if err := r.runTest(
+ ctx, t, testToRun.runNum, testToRun.runCount, c, testRunnerLogPath, stdout, testL,
+ ); err != nil {
shout(ctx, l, stdout, "test returned error: %s: %s", t.Name(), err)
// Mark the test as failed if it isn't already.
if !t.Failed() {
t.printAndFail(0 /* skip */, err)
}
} else {
- msg := "test passed: %s (run %d)"
+ msg := "test passed"
if t.Failed() {
- msg = "test failed: %s (run %d)"
+ msg = fmt.Sprintf("test failed: %s (run %d)", t.Name(), testToRun.runNum)
}
- msg = fmt.Sprintf(msg, t.Name(), testToRun.runNum)
l.PrintfCtx(ctx, msg)
}
testL.Close()
@@ -626,23 +527,22 @@ func (r *testRunner) runWorker(
} else {
failureMsg += t.FailureMsg()
}
- if c != nil {
- if debug {
- // Save the cluster for future debugging.
- c.Save(ctx, failureMsg, l)
-
- // Continue with a fresh cluster.
- c = nil
- } else {
- // On any test failure or error, we destroy the cluster. We could be
- // more selective, but this sounds safer.
- l.PrintfCtx(ctx, "destroying cluster %s because: %s", c, failureMsg)
- c.Destroy(context.Background(), closeLogger, l)
- c = nil
- }
+
+ if debug {
+ // Save the cluster for future debugging.
+ c.Save(ctx, failureMsg, l)
+
+ // Continue with a fresh cluster.
+ c = nil
+ } else {
+ // On any test failure or error, we destroy the cluster. We could be
+ // more selective, but this sounds safer.
+ l.PrintfCtx(ctx, "destroying cluster %s because: %s", c, failureMsg)
+ c.Destroy(context.Background(), closeLogger, l)
+ c = nil
}
+
if err != nil {
- // N.B. bail out iff runTest exits exceptionally.
return err
}
} else {
@@ -701,16 +601,21 @@ func allStacks() []byte {
// An error is returned if the test is still running (on another goroutine) when
// this returns. This happens when the test doesn't respond to cancellation.
+// Returns true if the test is considered to have passed, false otherwise.
//
// Args:
// c: The cluster on which the test will run. runTest() does not wipe or destroy
// the cluster.
+// testRunnerLogPath: The path to the test runner's log. It will be copied to
+// the test's artifacts dir if the test fails and we're running under
+// TeamCity.
func (r *testRunner) runTest(
ctx context.Context,
t *testImpl,
runNum int,
runCount int,
c *clusterImpl,
+ testRunnerLogPath string,
stdout io.Writer,
l *logger.Logger,
) error {
@@ -764,6 +669,13 @@ func (r *testRunner) runTest(
if teamCity {
shout(ctx, l, stdout, "##teamcity[testFailed name='%s' details='%s' flowId='%s']",
t.Name(), teamCityEscape(output), runID)
+
+ // Copy a snapshot of the testrunner's log to the test's artifacts dir
+ // so that we collect it below.
+ cp := exec.Command("cp", testRunnerLogPath, t.ArtifactsDir())
+ if err := cp.Run(); err != nil {
+ l.ErrorfCtx(ctx, "failed to copy test runner's logs to test artifacts: %s", err)
+ }
}
shout(ctx, l, stdout, "--- FAIL: %s (%s)\n%s", runID, durationStr, output)
@@ -835,7 +747,8 @@ func (r *testRunner) runTest(
l.PrintfCtx(ctx, "cluster needs to survive until %s, but has expiration: %s. Extending.",
minExp, c.expiration)
if err := c.Extend(ctx, extend, l); err != nil {
- return errors.Wrapf(err, "failed to extend cluster: %s", c.name)
+ t.printfAndFail(0 /* skip */, "failed to extend cluster: %s", err)
+ return nil
}
}
@@ -853,6 +766,7 @@ func (r *testRunner) runTest(
go func() {
defer close(testReturnedCh) // closed only after we've grabbed the debug info below
+ // This is the call to actually run the test.
defer func() {
// We only have to record panics if the panic'd value is not the sentinel
// produced by t.Fatal*().
@@ -862,12 +776,10 @@ func (r *testRunner) runTest(
}
}()
- // This is the call to actually run the test.
t.Spec().(*registry.TestSpec).Run(runCtx, t, c)
}()
var timedOut bool
-
select {
case <-testReturnedCh:
s := "success"
@@ -1144,7 +1056,8 @@ func (r *testRunner) generateReport() string {
}
type getWorkCallbacks struct {
- onDestroy func()
+ createCluster func(context.Context, testToRunRes) (*clusterImpl, error)
+ onDestroy func()
}
// getWork selects the next test to run and creates a suitable cluster for it if
@@ -1154,6 +1067,7 @@ type getWorkCallbacks struct {
// preferred. If a test that can reuse it is not found (or if there's no more
// work), the cluster is destroyed (and so its resources are released).
//
+// If the cluster is to be reused, getWork() wipes it.
func (r *testRunner) getWork(
ctx context.Context,
work *workPool,
@@ -1162,25 +1076,54 @@ func (r *testRunner) getWork(
interrupt <-chan struct{},
l *logger.Logger,
callbacks getWorkCallbacks,
-) (testToRunRes, error) {
+) (testToRunRes, *clusterImpl, error) {
select {
case <-interrupt:
- return testToRunRes{}, fmt.Errorf("interrupted")
+ return testToRunRes{}, nil, fmt.Errorf("interrupted")
default:
}
testToRun, err := work.getTestToRun(ctx, c, qp, r.cr, callbacks.onDestroy, l)
if err != nil {
- return testToRunRes{}, err
+ return testToRunRes{}, nil, err
}
if !testToRun.noWork {
l.PrintfCtx(ctx, "Selected test: %s run: %d.", testToRun.spec.Name, testToRun.runNum)
+ }
+ // Are we done?
+ if testToRun.noWork {
+ return testToRun, nil, nil
+ }
+
+ // Create a cluster, if we no longer have one.
+ if testToRun.canReuseCluster {
+ l.PrintfCtx(ctx, "Using existing cluster: %s. Wiping", c.name)
+ if err := c.WipeE(ctx, l); err != nil {
+ return testToRunRes{}, nil, err
+ }
+ if err := c.RunE(ctx, c.All(), "rm -rf "+perfArtifactsDir); err != nil {
+ return testToRunRes{}, nil, errors.Wrapf(err, "failed to remove perf artifacts dir")
+ }
+ if c.localCertsDir != "" {
+ if err := os.RemoveAll(c.localCertsDir); err != nil {
+ return testToRunRes{}, nil, errors.Wrapf(err,
+ "failed to remove local certs in %s", c.localCertsDir)
+ }
+ c.localCertsDir = ""
+ }
+ // Overwrite the spec of the cluster with the one coming from the test. In
+ // particular, this overwrites the reuse policy to reflect what the test
+ // intends to do with it.
+ c.spec = testToRun.spec.Cluster
} else {
- // We're done--there are no remaining tests.
- return testToRun, nil
+ var err error
+ c, err = callbacks.createCluster(ctx, testToRun)
+ if err != nil {
+ return testToRunRes{}, nil, err
+ }
}
- return testToRun, nil
+ return testToRun, c, nil
}
// addWorker updates the bookkeeping for one more worker.
diff --git a/pkg/cmd/roachtest/test_test.go b/pkg/cmd/roachtest/test_test.go
index 8d2ebb0b92..b7616af3fc 100644
--- a/pkg/cmd/roachtest/test_test.go
+++ b/pkg/cmd/roachtest/test_test.go
@@ -26,7 +26,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test"
"github.com/cockroachdb/cockroach/pkg/roachprod/logger"
"github.com/cockroachdb/cockroach/pkg/testutils"
- "github.com/cockroachdb/cockroach/pkg/util/quotapool"
"github.com/cockroachdb/cockroach/pkg/util/stop"
"github.com/cockroachdb/cockroach/pkg/util/syncutil"
"github.com/cockroachdb/cockroach/pkg/util/version"
@@ -90,16 +89,6 @@ func nilLogger() *logger.Logger {
return l
}
-func alwaysFailingClusterAllocator(
- ctx context.Context,
- t registry.TestSpec,
- alloc *quotapool.IntAlloc,
- artifactsDir string,
- wStatus *workerStatus,
-) (*clusterImpl, error) {
- return nil, errors.New("cluster creation failed")
-}
-
func TestRunnerRun(t *testing.T) {
ctx := context.Background()
r := mkReg(t)
@@ -164,21 +153,11 @@ func TestRunnerRun(t *testing.T) {
cpuQuota: 1000,
keepClustersOnTestFailure: false,
}
- var clusterAllocator clusterAllocatorFn
- // run without cluster allocator error injection
err := runner.Run(ctx, tests, 1, /* count */
- defaultParallelism, copt, testOpts{}, lopt, clusterAllocator)
-
- assertTestCompletion(t, tests, c.filters, runner.getCompletedTests(), err, c.expErr)
+ defaultParallelism, copt, testOpts{}, lopt)
- // N.B. skip the case of no matching tests
- if len(tests) > 0 {
- // run _with_ cluster allocator error injection
- clusterAllocator = alwaysFailingClusterAllocator
- err = runner.Run(ctx, tests, 1, /* count */
- defaultParallelism, copt, testOpts{}, lopt, clusterAllocator)
-
- assertTestCompletion(t, tests, c.filters, runner.getCompletedTests(), err, "some clusters could not be created")
+ if !testutils.IsError(err, c.expErr) {
+ t.Fatalf("expected err: %q, but found %v. Filters: %s", c.expErr, err, c.filters)
}
out := stdout.String() + "\n" + stderr.String()
if exp := c.expOut; exp != "" && !strings.Contains(out, exp) {
@@ -188,29 +167,6 @@ func TestRunnerRun(t *testing.T) {
}
}
-// verifies that actual test completion conditions match the expected
-func assertTestCompletion(
- t *testing.T,
- tests []registry.TestSpec,
- filters []string,
- completed []completedTestInfo,
- actualErr error,
- expectedErr string,
-) {
- require.True(t, len(completed) == len(tests))
-
- for _, info := range completed {
- if info.test == "pass" {
- require.True(t, info.pass)
- } else if info.test == "fail" {
- require.True(t, !info.pass)
- }
- }
- if !testutils.IsError(actualErr, expectedErr) {
- t.Fatalf("expected err: %q, but found %v. Filters: %s", expectedErr, actualErr, filters)
- }
-}
-
type syncedBuffer struct {
mu syncutil.Mutex
buf bytes.Buffer
@@ -259,7 +215,7 @@ func TestRunnerTestTimeout(t *testing.T) {
},
}
err := runner.Run(ctx, []registry.TestSpec{test}, 1, /* count */
- defaultParallelism, copt, testOpts{}, lopt, nil /* clusterAllocator */)
+ defaultParallelism, copt, testOpts{}, lopt)
if !testutils.IsError(err, "some tests failed") {
t.Fatalf("expected error \"some tests failed\", got: %v", err)
}
@@ -351,7 +307,7 @@ func runExitCodeTest(t *testing.T, injectedError error) error {
stderr: ioutil.Discard,
artifactsDir: "",
}
- return runner.Run(ctx, tests, 1, 1, clustersOpt{}, testOpts{}, lopt, nil /* clusterAllocator */)
+ return runner.Run(ctx, tests, 1, 1, clustersOpt{}, testOpts{}, lopt)
}
func TestExitCode(t *testing.T) {
diff --git a/pkg/cmd/roachtest/tests/drain.go b/pkg/cmd/roachtest/tests/drain.go
index 6cf9387f69..7d75784793 100644
--- a/pkg/cmd/roachtest/tests/drain.go
+++ b/pkg/cmd/roachtest/tests/drain.go
@@ -31,164 +31,166 @@ import (
func registerDrain(r registry.Registry) {
{
r.Add(registry.TestSpec{
- Name: "drain/conn-wait",
+ Name: "drain/early-exit-conn-wait",
Owner: registry.OwnerSQLExperience,
Cluster: r.MakeClusterSpec(1),
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
- runConnectionWait(ctx, t, c)
+ runEarlyExitInConnectionWait(ctx, t, c)
},
- },
- )
+ })
+
+ r.Add(registry.TestSpec{
+ Name: "drain/warn-conn-wait-timeout",
+ Owner: registry.OwnerSQLExperience,
+ Cluster: r.MakeClusterSpec(1),
+ Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
+ runTestWarningForConnWait(ctx, t, c)
+ },
+ })
}
}
-func runConnectionWait(ctx context.Context, t test.Test, c cluster.Cluster) {
+// runEarlyExitInConnectionWait is to verify that draining proceeds immediately
+// after connections are closed client-side.
+func runEarlyExitInConnectionWait(ctx context.Context, t test.Test, c cluster.Cluster) {
var err error
+ const (
+ // Set the duration of each phase of the draining period.
+ drainWaitDuration = 5 * time.Second
+ connectionWaitDuration = 100 * time.Second
+ queryWaitDuration = 10 * time.Second
+ // pokeDuringConnWaitTimestamp is the timestamp after the server
+ // starts waiting for SQL connections to close (with the start of the whole
+ // draining process marked as timestamp 0). It should be set larger than
+ // drainWaitDuration, but smaller than (drainWaitDuration +
+ // connectionWaitDuration).
+ pokeDuringConnWaitTimestamp = 15 * time.Second
+ connMaxLifetime = 10 * time.Second
+ connMaxCount = 5
+ nodeToDrain = 1
+ )
+ totalWaitDuration := drainWaitDuration + connectionWaitDuration + queryWaitDuration
- err = c.PutE(ctx, t.L(), t.Cockroach(), "./cockroach", c.All())
- require.NoError(t, err, "cannot mount cockroach binary")
+ prepareCluster(ctx, t, c, drainWaitDuration, connectionWaitDuration, queryWaitDuration)
- // Verify that draining proceeds immediately after connections are closed client-side.
- {
- const (
- // Set the duration of each phase of the draining period.
- drainWaitDuration = 10 * time.Second
- connectionWaitDuration = 100 * time.Second
- queryWaitDuration = 10 * time.Second
- // pokeDuringConnWaitTimestamp is the timestamp after the server
- // starts waiting for SQL connections to close (with the start of the whole
- // draining process marked as timestamp 0). It should be set larger than
- // drainWaitDuration, but smaller than (drainWaitDuration +
- // connectionWaitDuration).
- pokeDuringConnWaitTimestamp = 45 * time.Second
- connMaxLifetime = 30 * time.Second
- connMaxCount = 5
- nodeToDrain = 1
- )
- totalWaitDuration := drainWaitDuration + connectionWaitDuration + queryWaitDuration
-
- prepareCluster(ctx, t, c, drainWaitDuration, connectionWaitDuration, queryWaitDuration)
-
- db := c.Conn(ctx, t.L(), nodeToDrain)
- defer db.Close()
-
- db.SetConnMaxLifetime(connMaxLifetime)
- db.SetMaxOpenConns(connMaxCount)
-
- var conns []*gosql.Conn
-
- // Get two connections from the connection pools.
- for j := 0; j < 2; j++ {
- conn, err := db.Conn(ctx)
-
- require.NoError(t, err, "failed to a SQL connection from the connection pool")
-
- conns = append(conns, conn)
- }
-
- // Start draining the node.
- m := c.NewMonitor(ctx, c.Node(nodeToDrain))
-
- m.Go(func(ctx context.Context) error {
- t.Status(fmt.Sprintf("start draining node %d", nodeToDrain))
- return c.RunE(ctx,
- c.Node(nodeToDrain),
- fmt.Sprintf("./cockroach node drain --insecure --drain-wait=%fs",
- totalWaitDuration.Seconds()))
- })
+ db := c.Conn(ctx, t.L(), nodeToDrain)
+ defer db.Close()
- drainStartTimestamp := timeutil.Now()
+ db.SetConnMaxLifetime(connMaxLifetime)
+ db.SetMaxOpenConns(connMaxCount)
- // Sleep till the server is in the status of waiting for users to close SQL
- // connections. Verify that the server is rejecting new SQL connections now.
- time.Sleep(pokeDuringConnWaitTimestamp)
- _, err = db.Conn(ctx)
- if err != nil {
- t.Status(fmt.Sprintf("%s after draining starts, server is rejecting "+
- "new SQL connections: %v", pokeDuringConnWaitTimestamp, err))
- } else {
- t.Fatal(errors.New("new SQL connections should not be allowed when the server " +
- "starts waiting for the user to close SQL connections"))
- }
+ var conns []*gosql.Conn
- require.Equalf(t, db.Stats().OpenConnections, 2, "number of open connections should be 2")
+ // Get two connections from the connection pools.
+ for j := 0; j < 2; j++ {
+ conn, err := db.Conn(ctx)
- t.Status("number of open connections: ", db.Stats().OpenConnections)
+ require.NoError(t, err, "failed to a SQL connection from the connection pool")
- randConn := conns[rand.Intn(len(conns))]
+ conns = append(conns, conn)
+ }
- // When server is waiting clients to close connections, verify that SQL
- // queries do not fail.
- _, err = randConn.ExecContext(ctx, "SELECT 1;")
+ // Start draining the node.
+ m := c.NewMonitor(ctx, c.Node(nodeToDrain))
+
+ m.Go(func(ctx context.Context) error {
+ t.Status(fmt.Sprintf("start draining node %d", nodeToDrain))
+ return c.RunE(ctx,
+ c.Node(nodeToDrain),
+ fmt.Sprintf("./cockroach node drain --insecure --drain-wait=%fs",
+ totalWaitDuration.Seconds()))
+ })
+
+ drainStartTimestamp := timeutil.Now()
+
+ // Sleep till the server is in the status of waiting for users to close SQL
+ // connections. Verify that the server is rejecting new SQL connections now.
+ time.Sleep(pokeDuringConnWaitTimestamp)
+
+ if _, err := db.Conn(ctx); err != nil {
+ t.Status(fmt.Sprintf("%s after draining starts, server is rejecting "+
+ "new SQL connections: %v", pokeDuringConnWaitTimestamp, err))
+ } else {
+ t.Fatal(errors.New("new SQL connections should not be allowed when the server " +
+ "starts waiting for the user to close SQL connections"))
+ }
- require.NoError(t, err, "expected query not to fail before the "+
- "server starts draining SQL connections")
+ require.Equalf(t, db.Stats().OpenConnections, 2, "number of open connections should be 2")
- for _, conn := range conns {
- err := conn.Close()
- require.NoError(t, err,
- "expected connection to be able to be successfully closed client-side")
- }
+ t.Status("number of open connections: ", db.Stats().OpenConnections)
- t.Status("all SQL connections are put back to the connection pool")
+ randConn := conns[rand.Intn(len(conns))]
+ t.Status("execting sql query with connection %s", randConn)
- err = m.WaitE()
- require.NoError(t, err, "error waiting for the draining to finish")
+ // When server is waiting clients to close connections, verify that SQL
+ // queries do not fail.
+ _, err = randConn.ExecContext(ctx, "SELECT 1;")
+ require.NoError(t, err, "expected query not to fail before the "+
+ "server starts draining SQL connections")
- drainEndTimestamp := timeutil.Now()
- actualDrainDuration := drainEndTimestamp.Sub(drainStartTimestamp).Seconds()
+ for _, conn := range conns {
+ err := conn.Close()
+ require.NoError(t, err,
+ "expected connection to be able to be successfully closed client-side")
+ }
+
+ t.Status("all SQL connections are put back to the connection pool")
- t.L().Printf("the draining lasted %f seconds", actualDrainDuration)
+ err = m.WaitE()
+ require.NoError(t, err, "error waiting for the draining to finish")
- if actualDrainDuration >= float64(totalWaitDuration)-10 {
- t.Fatal(errors.New("the draining process didn't early exit " +
- "when waiting for server to close all SQL connections"))
- }
+ drainEndTimestamp := timeutil.Now()
+ actualDrainDuration := drainEndTimestamp.Sub(drainStartTimestamp).Seconds()
- // Fully quit the draining node so that we can restart it for the next test.
- quitNode(ctx, t, c, nodeToDrain)
+ t.L().Printf("the draining lasted %f seconds", actualDrainDuration)
+
+ if actualDrainDuration >= float64(totalWaitDuration)-10 {
+ t.Fatal(errors.New("the draining process didn't early exit " +
+ "when waiting for server to close all SQL connections"))
}
- // Verify a warning exists in the case that connectionWait expires.
- {
- const (
- // Set the duration of the draining period.
- drainWaitDuration = 0 * time.Second
- connectionWaitDuration = 10 * time.Second
- queryWaitDuration = 20 * time.Second
- nodeToDrain = 1
- )
+}
- totalWaitDuration := drainWaitDuration + connectionWaitDuration + queryWaitDuration
+// runTestWarningForConnWait is to verify a warning exists in the case that
+// connectionWait expires.
+func runTestWarningForConnWait(ctx context.Context, t test.Test, c cluster.Cluster) {
+ var err error
+ const (
+ // Set the duration of the draining period.
+ drainWaitDuration = 0 * time.Second
+ connectionWaitDuration = 10 * time.Second
+ queryWaitDuration = 20 * time.Second
+ nodeToDrain = 1
+ )
- prepareCluster(ctx, t, c, drainWaitDuration, connectionWaitDuration, queryWaitDuration)
+ totalWaitDuration := drainWaitDuration + connectionWaitDuration + queryWaitDuration
- db := c.Conn(ctx, t.L(), nodeToDrain)
- defer db.Close()
+ prepareCluster(ctx, t, c, drainWaitDuration, connectionWaitDuration, queryWaitDuration)
- // Get a connection from the connection pool.
- _, err = db.Conn(ctx)
+ db := c.Conn(ctx, t.L(), nodeToDrain)
+ defer db.Close()
- require.NoError(t, err, "cannot get a SQL connection from the connection pool")
+ // Get a connection from the connection pool.
+ _, err = db.Conn(ctx)
- m := c.NewMonitor(ctx, c.Node(nodeToDrain))
- m.Go(func(ctx context.Context) error {
- t.Status(fmt.Sprintf("draining node %d", nodeToDrain))
- return c.RunE(ctx,
- c.Node(nodeToDrain),
- fmt.Sprintf("./cockroach node drain --insecure --drain-wait=%fs",
- totalWaitDuration.Seconds()))
- })
+ require.NoError(t, err, "cannot get a SQL connection from the connection pool")
- err = m.WaitE()
- require.NoError(t, err, "error waiting for the draining to finish")
+ m := c.NewMonitor(ctx, c.Node(nodeToDrain))
+ m.Go(func(ctx context.Context) error {
+ t.Status(fmt.Sprintf("draining node %d", nodeToDrain))
+ return c.RunE(ctx,
+ c.Node(nodeToDrain),
+ fmt.Sprintf("./cockroach node drain --insecure --drain-wait=%fs",
+ totalWaitDuration.Seconds()))
+ })
- logFile := filepath.Join("logs", "*.log")
- err = c.RunE(ctx, c.Node(nodeToDrain),
- "grep", "-q", "'proceeding to drain SQL connections'", logFile)
- require.NoError(t, err, "warning is not logged in the log file")
- }
+ err = m.WaitE()
+ require.NoError(t, err, "error waiting for the draining to finish")
+ logFile := filepath.Join("logs", "*.log")
+ err = c.RunE(ctx, c.Node(nodeToDrain),
+ "grep", "-q", "'proceeding to drain SQL connections'", logFile)
+ require.NoError(t, err, "warning is not logged in the log file")
}
// prepareCluster is to start the server on nodes in the given cluster, and set
@@ -201,6 +203,9 @@ func prepareCluster(
connectionWait time.Duration,
queryWait time.Duration,
) {
+ var err error
+ err = c.PutE(ctx, t.L(), t.Cockroach(), "./cockroach", c.All())
+ require.NoError(t, err, "cannot mount cockroach binary")
c.Start(ctx, t.L(), option.DefaultStartOpts(), install.MakeClusterSettings(), c.All())
@@ -215,22 +220,6 @@ func prepareCluster(
connectionWait.Seconds(),
queryWait.Seconds(),
)
- _, err := db.ExecContext(ctx, waitPhasesSettingStmt)
+ _, err = db.ExecContext(ctx, waitPhasesSettingStmt)
require.NoError(t, err, "cannot set cluster setting")
}
-
-func quitNode(ctx context.Context, t test.Test, c cluster.Cluster, node int) {
- args := []string{
- "./cockroach", "quit", "--insecure", "--logtostderr=INFO",
- fmt.Sprintf("--port={pgport:%d}", node)}
- result, err := c.RunWithDetailsSingleNode(ctx, t.L(), c.Node(node), args...)
- output := result.Stdout + result.Stderr
- t.L().Printf("cockroach quit:\n%s\n", output)
- require.NoError(t, err, "cannot quit cockroach")
-
- stopOpts := option.DefaultStopOpts()
- stopOpts.RoachprodOpts.Sig = 0
- stopOpts.RoachprodOpts.Wait = true
- c.Stop(ctx, t.L(), stopOpts, c.All())
- t.L().Printf("stopped cluster")
-}
diff --git a/pkg/cmd/roachtest/tests/drt.go b/pkg/cmd/roachtest/tests/drt.go
index 3841ef186f..f43ad6f26c 100644
--- a/pkg/cmd/roachtest/tests/drt.go
+++ b/pkg/cmd/roachtest/tests/drt.go
@@ -10,6 +10,8 @@
package tests
+//go:generate mockgen -package tests -destination drt_generated_test.go . PromClient
+
import (
"context"
"fmt"
@@ -18,17 +20,21 @@ import (
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/prometheus"
"github.com/cockroachdb/cockroach/pkg/roachprod/logger"
"github.com/cockroachdb/errors"
+ promv1 "github.com/prometheus/client_golang/api/prometheus/v1"
"github.com/prometheus/common/model"
)
-//go:generate mockgen -package tests -destination drt_generated_test.go github.com/cockroachdb/cockroach/pkg/cmd/roachtest/prometheus Client
+// PromClient is an interface allowing queries against Prometheus.
+type PromClient interface {
+ Query(ctx context.Context, query string, ts time.Time) (model.Value, promv1.Warnings, error)
+}
type tpccChaosEventProcessor struct {
workloadInstances []workloadInstance
workloadNodeIP string
ops []string
ch chan ChaosEvent
- promClient prometheus.Client
+ promClient PromClient
errs []error
// allowZeroSuccessDuringUptime allows 0 successes during an uptime event.
diff --git a/pkg/cmd/roachtest/tests/drt_generated_test.go b/pkg/cmd/roachtest/tests/drt_generated_test.go
index f811a50317..c2ee148e18 100644
--- a/pkg/cmd/roachtest/tests/drt_generated_test.go
+++ b/pkg/cmd/roachtest/tests/drt_generated_test.go
@@ -1,5 +1,5 @@
// Code generated by MockGen. DO NOT EDIT.
-// Source: github.com/cockroachdb/cockroach/pkg/cmd/roachtest/prometheus (interfaces: Client)
+// Source: github.com/cockroachdb/cockroach/pkg/cmd/roachtest/tests (interfaces: PromClient)
// Package tests is a generated GoMock package.
package tests
@@ -14,31 +14,31 @@ import (
model "github.com/prometheus/common/model"
)
-// MockClient is a mock of Client interface.
-type MockClient struct {
+// MockPromClient is a mock of PromClient interface.
+type MockPromClient struct {
ctrl *gomock.Controller
- recorder *MockClientMockRecorder
+ recorder *MockPromClientMockRecorder
}
-// MockClientMockRecorder is the mock recorder for MockClient.
-type MockClientMockRecorder struct {
- mock *MockClient
+// MockPromClientMockRecorder is the mock recorder for MockPromClient.
+type MockPromClientMockRecorder struct {
+ mock *MockPromClient
}
-// NewMockClient creates a new mock instance.
-func NewMockClient(ctrl *gomock.Controller) *MockClient {
- mock := &MockClient{ctrl: ctrl}
- mock.recorder = &MockClientMockRecorder{mock}
+// NewMockPromClient creates a new mock instance.
+func NewMockPromClient(ctrl *gomock.Controller) *MockPromClient {
+ mock := &MockPromClient{ctrl: ctrl}
+ mock.recorder = &MockPromClientMockRecorder{mock}
return mock
}
// EXPECT returns an object that allows the caller to indicate expected use.
-func (m *MockClient) EXPECT() *MockClientMockRecorder {
+func (m *MockPromClient) EXPECT() *MockPromClientMockRecorder {
return m.recorder
}
// Query mocks base method.
-func (m *MockClient) Query(arg0 context.Context, arg1 string, arg2 time.Time) (model.Value, v1.Warnings, error) {
+func (m *MockPromClient) Query(arg0 context.Context, arg1 string, arg2 time.Time) (model.Value, v1.Warnings, error) {
m.ctrl.T.Helper()
ret := m.ctrl.Call(m, "Query", arg0, arg1, arg2)
ret0, _ := ret[0].(model.Value)
@@ -48,7 +48,7 @@ func (m *MockClient) Query(arg0 context.Context, arg1 string, arg2 time.Time) (m
}
// Query indicates an expected call of Query.
-func (mr *MockClientMockRecorder) Query(arg0, arg1, arg2 interface{}) *gomock.Call {
+func (mr *MockPromClientMockRecorder) Query(arg0, arg1, arg2 interface{}) *gomock.Call {
mr.mock.ctrl.T.Helper()
- return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Query", reflect.TypeOf((*MockClient)(nil).Query), arg0, arg1, arg2)
+ return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Query", reflect.TypeOf((*MockPromClient)(nil).Query), arg0, arg1, arg2)
}
diff --git a/pkg/cmd/roachtest/tests/drt_test.go b/pkg/cmd/roachtest/tests/drt_test.go
index 7715a1f8f8..01a52bbcf8 100644
--- a/pkg/cmd/roachtest/tests/drt_test.go
+++ b/pkg/cmd/roachtest/tests/drt_test.go
@@ -518,8 +518,8 @@ func TestTPCCChaosEventProcessor(t *testing.T) {
allowZeroSuccessDuringUptime: tc.allowZeroSuccessDuringUptime,
maxErrorsDuringUptime: tc.maxErrorsDuringUptime,
- promClient: func(ctrl *gomock.Controller) prometheus.Client {
- c := NewMockClient(ctrl)
+ promClient: func(ctrl *gomock.Controller) PromClient {
+ c := NewMockPromClient(ctrl)
e := c.EXPECT()
for _, m := range tc.mockPromQueries {
e.Query(ctx, m.q, m.t).Return(
diff --git a/pkg/cmd/roachtest/tests/slow_drain.go b/pkg/cmd/roachtest/tests/slow_drain.go
index a8c97b760d..2d4f3a5125 100644
--- a/pkg/cmd/roachtest/tests/slow_drain.go
+++ b/pkg/cmd/roachtest/tests/slow_drain.go
@@ -27,7 +27,7 @@ import (
func registerSlowDrain(r registry.Registry) {
numNodes := 6
- duration := time.Minute
+ duration := 30 * time.Minute
r.Add(registry.TestSpec{
Name: fmt.Sprintf("slow-drain/duration=%s", duration),
@@ -55,6 +55,7 @@ func runSlowDrain(ctx context.Context, t test.Test, c cluster.Cluster, duration
require.NoError(t, err)
c.Start(ctx, t.L(), option.DefaultStartOpts(), install.MakeClusterSettings(), c.All())
+ c.Run(ctx, c.Node(pinnedNodeID), `./cockroach workload init kv --drop --splits 1000`)
run := func(stmt string) {
db := c.Conn(ctx, t.L(), pinnedNodeID)
@@ -86,30 +87,34 @@ func runSlowDrain(ctx context.Context, t test.Test, c cluster.Cluster, duration
}
}
- {
- db := c.Conn(ctx, t.L(), pinnedNodeID)
- defer db.Close()
+ run(fmt.Sprintf(`ALTER RANGE default CONFIGURE ZONE USING num_replicas=%d`, replicationFactor))
+ run(fmt.Sprintf(`ALTER DATABASE system CONFIGURE ZONE USING num_replicas=%d`, replicationFactor))
- // Set the replication factor.
- run(fmt.Sprintf(`ALTER RANGE default CONFIGURE ZONE USING num_replicas=%d`, replicationFactor))
- run(fmt.Sprintf(`ALTER DATABASE system CONFIGURE ZONE USING num_replicas=%d`, replicationFactor))
+ db := c.Conn(ctx, t.L(), pinnedNodeID)
+ defer db.Close()
- // Wait for initial up-replication.
- waitForReplication(db)
- }
+ waitForReplication(db)
- // Drain the last 5 nodes from the cluster, resulting in immovable leases on
- // at least one of the nodes.
m := c.NewMonitor(ctx)
+ m.Go(func(ctx context.Context) error {
+ return c.RunE(ctx, c.Node(pinnedNodeID),
+ fmt.Sprintf("./cockroach workload run kv --max-rate 500 --tolerate-errors --duration=%s {pgurl:1-6}",
+ duration.String(),
+ ),
+ )
+ },
+ )
+
+ // Let the workload run for a small amount of time.
+ time.Sleep(1 * time.Minute)
+
+ // Drain the last 5 nodes from the cluster, resulting in immovable leases on at least one of the nodes.
for nodeID := 2; nodeID <= numNodes; nodeID++ {
id := nodeID
m.Go(func(ctx context.Context) error {
drain := func(id int) error {
t.Status(fmt.Sprintf("draining node %d", id))
- return c.RunE(ctx,
- c.Node(id),
- fmt.Sprintf("./cockroach node drain %d --insecure --drain-wait=%s", id, duration.String()),
- )
+ return c.RunE(ctx, c.Node(id), fmt.Sprintf("./cockroach node drain %d --insecure", id))
}
return drain(id)
})
@@ -120,7 +125,6 @@ func runSlowDrain(ctx context.Context, t test.Test, c cluster.Cluster, duration
// Check for more verbose logging concerning lease transfer stalls.
// The extra logging should exist on the logs of at least one of the nodes.
- t.Status("checking for stalling drain logging...")
found := false
for nodeID := 2; nodeID <= numNodes; nodeID++ {
if err := c.RunE(ctx, c.Node(nodeID),
@@ -130,10 +134,8 @@ func runSlowDrain(ctx context.Context, t test.Test, c cluster.Cluster, duration
}
}
require.True(t, found)
- t.Status("log messages found")
- // Expect the drain timeout to expire.
- t.Status("waiting for the drain timeout to elapse...")
+ // Expect a failed drain.
err = m.WaitE()
require.Error(t, err)
}
diff --git a/pkg/cmd/roachtest/tests/sstable_corruption.go b/pkg/cmd/roachtest/tests/sstable_corruption.go
index 237591ba54..d90b315d21 100644
--- a/pkg/cmd/roachtest/tests/sstable_corruption.go
+++ b/pkg/cmd/roachtest/tests/sstable_corruption.go
@@ -43,7 +43,7 @@ func runSSTableCorruption(ctx context.Context, t test.Test, c cluster.Cluster) {
// to have multiple ranges, and some sstables with only table keys.
t.Status("importing tpcc fixture")
c.Run(ctx, workloadNode,
- "./cockroach workload fixtures import tpcc --warehouses=500 --fks=false --checks=false")
+ "./cockroach workload fixtures import tpcc --warehouses=100 --fks=false --checks=false")
return nil
})
m.Wait()
@@ -55,7 +55,7 @@ func runSSTableCorruption(ctx context.Context, t test.Test, c cluster.Cluster) {
result, err := c.RunWithDetailsSingleNode(ctx, t.L(), c.Node(node),
"./cockroach debug pebble manifest dump {store-dir}/MANIFEST-* | grep -v added | grep -v deleted | grep \"\\[/Table\"")
if err != nil {
- t.Fatalf("could not find tables to corrupt: %s\nstdout: %s\nstderr: %s", err, result.Stdout, result.Stderr)
+ t.Fatal(err)
}
tableSSTs := strings.Split(result.Stdout, "\n")
if len(tableSSTs) == 0 {
diff --git a/pkg/cmd/roachtest/work_pool.go b/pkg/cmd/roachtest/work_pool.go
index 30ea8e7d70..2475ff9341 100644
--- a/pkg/cmd/roachtest/work_pool.go
+++ b/pkg/cmd/roachtest/work_pool.go
@@ -172,9 +172,6 @@ func (p *workPool) selectTestForCluster(
// If multiple tests are eligible to run, one with the most runs left is chosen.
// TODO(andrei): We could be smarter in guessing what kind of cluster is best to
// allocate.
-//
-// ensures: !testToRunRes.noWork || error == nil
-//
func (p *workPool) selectTest(ctx context.Context, qp *quotapool.IntPool) (testToRunRes, error) {
var ttr testToRunRes
alloc, err := qp.AcquireFunc(ctx, func(ctx context.Context, pi quotapool.PoolInfo) (uint64, error) {
diff --git a/pkg/gen/docs.bzl b/pkg/gen/docs.bzl
index 73faa4a4d0..7af9c489b3 100644
--- a/pkg/gen/docs.bzl
+++ b/pkg/gen/docs.bzl
@@ -50,7 +50,6 @@ DOCS_SRCS = [
"//docs/generated/sql/bnf:alter_table_partition_by.bnf",
"//docs/generated/sql/bnf:alter_table_set_schema_stmt.bnf",
"//docs/generated/sql/bnf:alter_table_stmt.bnf",
- "//docs/generated/sql/bnf:alter_tenant_csetting_stmt.bnf",
"//docs/generated/sql/bnf:alter_type.bnf",
"//docs/generated/sql/bnf:alter_view.bnf",
"//docs/generated/sql/bnf:alter_view_owner_stmt.bnf",
@@ -184,7 +183,6 @@ DOCS_SRCS = [
"//docs/generated/sql/bnf:set_exprs_internal.bnf",
"//docs/generated/sql/bnf:set_local_stmt.bnf",
"//docs/generated/sql/bnf:set_operation.bnf",
- "//docs/generated/sql/bnf:set_or_reset_csetting_stmt.bnf",
"//docs/generated/sql/bnf:set_rest.bnf",
"//docs/generated/sql/bnf:set_rest_more.bnf",
"//docs/generated/sql/bnf:set_session_stmt.bnf",
@@ -204,7 +202,6 @@ DOCS_SRCS = [
"//docs/generated/sql/bnf:show_indexes_stmt.bnf",
"//docs/generated/sql/bnf:show_jobs.bnf",
"//docs/generated/sql/bnf:show_keys.bnf",
- "//docs/generated/sql/bnf:show_local_or_tenant_csettings_stmt.bnf",
"//docs/generated/sql/bnf:show_locality.bnf",
"//docs/generated/sql/bnf:show_locality_stmt.bnf",
"//docs/generated/sql/bnf:show_partitions_stmt.bnf",
diff --git a/pkg/gen/protobuf.bzl b/pkg/gen/protobuf.bzl
index f931812340..df87c11dff 100644
--- a/pkg/gen/protobuf.bzl
+++ b/pkg/gen/protobuf.bzl
@@ -20,7 +20,6 @@ PROTOBUF_SRCS = [
"//pkg/kv/kvnemesis:kvnemesis_go_proto",
"//pkg/kv/kvserver/closedts/ctpb:ctpb_go_proto",
"//pkg/kv/kvserver/concurrency/lock:lock_go_proto",
- "//pkg/kv/kvserver/concurrency/poison:poison_go_proto",
"//pkg/kv/kvserver/kvserverpb:kvserverpb_go_proto",
"//pkg/kv/kvserver/liveness/livenesspb:livenesspb_go_proto",
"//pkg/kv/kvserver/loqrecovery/loqrecoverypb:loqrecoverypb_go_proto",
@@ -39,7 +38,6 @@ PROTOBUF_SRCS = [
"//pkg/sql/contentionpb:contentionpb_go_proto",
"//pkg/sql/execinfrapb:execinfrapb_go_proto",
"//pkg/sql/inverted:inverted_go_proto",
- "//pkg/sql/lex:lex_go_proto",
"//pkg/sql/pgwire/pgerror:pgerror_go_proto",
"//pkg/sql/protoreflect/test:protoreflecttest_go_proto",
"//pkg/sql/rowenc/rowencpb:rowencpb_go_proto",
diff --git a/pkg/gossip/infostore.go b/pkg/gossip/infostore.go
index 0933a73ec1..87c2a1223a 100644
--- a/pkg/gossip/infostore.go
+++ b/pkg/gossip/infostore.go
@@ -28,7 +28,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/syncutil"
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
"github.com/cockroachdb/errors"
- "github.com/cockroachdb/redact"
)
type stringMatcher interface {
@@ -254,7 +253,7 @@ func (is *infoStore) addInfo(key string, i *Info) error {
if highWaterStamp, ok := is.highWaterStamps[i.NodeID]; ok && highWaterStamp >= i.OrigStamp {
// Report both timestamps in the crash.
log.Fatalf(context.Background(),
- "high water stamp %d >= %d", redact.Safe(highWaterStamp), redact.Safe(i.OrigStamp))
+ "high water stamp %d >= %d", log.Safe(highWaterStamp), log.Safe(i.OrigStamp))
}
}
// Update info map.
diff --git a/pkg/jobs/BUILD.bazel b/pkg/jobs/BUILD.bazel
index d25942ec95..040ae7af44 100644
--- a/pkg/jobs/BUILD.bazel
+++ b/pkg/jobs/BUILD.bazel
@@ -89,7 +89,6 @@ go_test(
"testutils_test.go",
],
embed = [":jobs"],
- shard_count = 16,
deps = [
"//pkg/base",
"//pkg/clusterversion",
diff --git a/pkg/jobs/job_scheduler.go b/pkg/jobs/job_scheduler.go
index b6ed842f46..e0a1390af6 100644
--- a/pkg/jobs/job_scheduler.go
+++ b/pkg/jobs/job_scheduler.go
@@ -30,7 +30,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/metric"
"github.com/cockroachdb/cockroach/pkg/util/stop"
- "github.com/cockroachdb/cockroach/pkg/util/syncutil"
"github.com/cockroachdb/errors"
"github.com/cockroachdb/logtags"
)
@@ -376,54 +375,6 @@ func (s *jobScheduler) schedulerEnabledOnThisNode(ctx context.Context) bool {
return enabled
}
-type syncCancelFunc struct {
- syncutil.Mutex
- context.CancelFunc
-}
-
-// newCancelWhenDisabled arranges for scheduler enabled setting callback to cancel
-// currently executing context.
-func newCancelWhenDisabled(sv *settings.Values) *syncCancelFunc {
- sf := &syncCancelFunc{}
- schedulerEnabledSetting.SetOnChange(sv, func(ctx context.Context) {
- if !schedulerEnabledSetting.Get(sv) {
- sf.Lock()
- if sf.CancelFunc != nil {
- sf.CancelFunc()
- }
- sf.Unlock()
- }
- })
- return sf
-}
-
-// withCancelOnDisabled executes provided function with the context which will be cancelled
-// if scheduler is disabled.
-func (sf *syncCancelFunc) withCancelOnDisabled(
- ctx context.Context, sv *settings.Values, f func(ctx context.Context) error,
-) error {
- ctx, cancel := func() (context.Context, context.CancelFunc) {
- sf.Lock()
- defer sf.Unlock()
-
- ctx, cancel := context.WithCancel(ctx)
- sf.CancelFunc = cancel
-
- if !schedulerEnabledSetting.Get(sv) {
- cancel()
- }
-
- return ctx, func() {
- sf.Lock()
- defer sf.Unlock()
- cancel()
- sf.CancelFunc = nil
- }
- }()
- defer cancel()
- return f(ctx)
-}
-
func (s *jobScheduler) runDaemon(ctx context.Context, stopper *stop.Stopper) {
_ = stopper.RunAsyncTask(ctx, "job-scheduler", func(ctx context.Context) {
initialDelay := getInitialScanDelay(s.TestingKnobs)
@@ -433,8 +384,6 @@ func (s *jobScheduler) runDaemon(ctx context.Context, stopper *stop.Stopper) {
log.Errorf(ctx, "error registering executor metrics: %+v", err)
}
- whenDisabled := newCancelWhenDisabled(&s.Settings.SV)
-
for timer := time.NewTimer(initialDelay); ; timer.Reset(
getWaitPeriod(ctx, &s.Settings.SV, s.schedulerEnabledOnThisNode, jitter, s.TestingKnobs)) {
select {
@@ -446,11 +395,10 @@ func (s *jobScheduler) runDaemon(ctx context.Context, stopper *stop.Stopper) {
}
maxSchedules := schedulerMaxJobsPerIterationSetting.Get(&s.Settings.SV)
- if err := whenDisabled.withCancelOnDisabled(ctx, &s.Settings.SV, func(ctx context.Context) error {
- return s.DB.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error {
- return s.executeSchedules(ctx, maxSchedules, txn)
- })
- }); err != nil {
+ err := s.DB.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error {
+ return s.executeSchedules(ctx, maxSchedules, txn)
+ })
+ if err != nil {
log.Errorf(ctx, "error executing schedules: %+v", err)
}
diff --git a/pkg/jobs/job_scheduler_test.go b/pkg/jobs/job_scheduler_test.go
index ea9e390f9b..4a2588f183 100644
--- a/pkg/jobs/job_scheduler_test.go
+++ b/pkg/jobs/job_scheduler_test.go
@@ -826,84 +826,3 @@ func TestSchedulerCanBeRestrictedToSingleNode(t *testing.T) {
})
}
}
-
-type blockUntilCancelledExecutor struct {
- started, done chan struct{}
-}
-
-var _ ScheduledJobExecutor = (*blockUntilCancelledExecutor)(nil)
-
-func (e *blockUntilCancelledExecutor) ExecuteJob(
- ctx context.Context,
- cfg *scheduledjobs.JobExecutionConfig,
- env scheduledjobs.JobSchedulerEnv,
- schedule *ScheduledJob,
- txn *kv.Txn,
-) error {
- defer close(e.done)
- close(e.started)
- <-ctx.Done()
- return ctx.Err()
-}
-
-func (e *blockUntilCancelledExecutor) NotifyJobTermination(
- ctx context.Context,
- jobID jobspb.JobID,
- jobStatus Status,
- details jobspb.Details,
- env scheduledjobs.JobSchedulerEnv,
- schedule *ScheduledJob,
- ex sqlutil.InternalExecutor,
- txn *kv.Txn,
-) error {
- return nil
-}
-
-func (e *blockUntilCancelledExecutor) Metrics() metric.Struct {
- return nil
-}
-
-func (e *blockUntilCancelledExecutor) GetCreateScheduleStatement(
- ctx context.Context,
- env scheduledjobs.JobSchedulerEnv,
- txn *kv.Txn,
- sj *ScheduledJob,
- ex sqlutil.InternalExecutor,
-) (string, error) {
- return "", errors.AssertionFailedf("unexpected GetCreateScheduleStatement call")
-}
-
-func TestDisablingSchedulerCancelsSchedules(t *testing.T) {
- defer leaktest.AfterTest(t)()
- defer log.Scope(t).Close(t)
-
- const executorName = "block-until-cancelled-executor"
- ex := &blockUntilCancelledExecutor{
- started: make(chan struct{}),
- done: make(chan struct{}),
- }
- defer registerScopedScheduledJobExecutor(executorName, ex)()
-
- knobs := base.TestingKnobs{
- JobsTestingKnobs: fastDaemonKnobs(overridePaceSetting(10 * time.Millisecond)),
- }
- ts, _, _ := serverutils.StartServer(t, base.TestServerArgs{Knobs: knobs})
- defer ts.Stopper().Stop(context.Background())
-
- // Create schedule which blocks until its context cancelled due to disabled scheduler.
- // We only need to create one schedule. This is because
- // scheduler executes its batch of schedules sequentially, and so, creating more
- // than one doesn't change anything since we block.
- schedule := NewScheduledJob(scheduledjobs.ProdJobSchedulerEnv)
- schedule.SetScheduleLabel("test schedule")
- schedule.SetOwner(security.TestUserName())
- schedule.SetNextRun(timeutil.Now())
- schedule.SetExecutionDetails(executorName, jobspb.ExecutionArguments{})
- require.NoError(t, schedule.Create(
- context.Background(), ts.InternalExecutor().(sqlutil.InternalExecutor), nil))
-
- <-ex.started
- // Disable scheduler and verify all running schedules were cancelled.
- schedulerEnabledSetting.Override(context.Background(), &ts.ClusterSettings().SV, false)
- <-ex.done
-}
diff --git a/pkg/keysbase/BUILD.bazel b/pkg/keysbase/BUILD.bazel
deleted file mode 100644
index 8577d05c14..0000000000
--- a/pkg/keysbase/BUILD.bazel
+++ /dev/null
@@ -1,8 +0,0 @@
-load("@io_bazel_rules_go//go:def.bzl", "go_library")
-
-go_library(
- name = "keysbase",
- srcs = ["data.go"],
- importpath = "github.com/cockroachdb/cockroach/pkg/keysbase",
- visibility = ["//visibility:public"],
-)
diff --git a/pkg/keysbase/data.go b/pkg/keysbase/data.go
deleted file mode 100644
index f7f2de9bee..0000000000
--- a/pkg/keysbase/data.go
+++ /dev/null
@@ -1,36 +0,0 @@
-// Copyright 2022 The Cockroach Authors.
-//
-// Use of this software is governed by the Business Source License
-// included in the file licenses/BSL.txt.
-//
-// As of the Change Date specified in that file, in accordance with
-// the Business Source License, use of this software will be governed
-// by the Apache License, Version 2.0, included in the file
-// licenses/APL.txt.
-
-package keysbase
-
-// KeyMax is a maximum key value which sorts after all other keys.
-var KeyMax = []byte{0xff, 0xff}
-
-// PrefixEnd determines the end key given b as a prefix, that is the key that
-// sorts precisely behind all keys starting with prefix: "1" is added to the
-// final byte and the carry propagated. The special cases of nil and KeyMin
-// always returns KeyMax.
-func PrefixEnd(b []byte) []byte {
- if len(b) == 0 {
- return KeyMax
- }
- // Switched to "make and copy" pattern in #4963 for performance.
- end := make([]byte, len(b))
- copy(end, b)
- for i := len(end) - 1; i >= 0; i-- {
- end[i] = end[i] + 1
- if end[i] != 0 {
- return end[:i+1]
- }
- }
- // This statement will only be reached if the key is already a maximal byte
- // string (i.e. already \xff...).
- return b
-}
diff --git a/pkg/kv/BUILD.bazel b/pkg/kv/BUILD.bazel
index 12eaaf6ffa..337b13521f 100644
--- a/pkg/kv/BUILD.bazel
+++ b/pkg/kv/BUILD.bazel
@@ -56,7 +56,6 @@ go_test(
],
data = glob(["testdata/**"]),
embed = [":kv"],
- shard_count = 16,
deps = [
"//pkg/base",
"//pkg/config/zonepb",
diff --git a/pkg/kv/bulk/BUILD.bazel b/pkg/kv/bulk/BUILD.bazel
index 3d7fcdf1a8..77310783c1 100644
--- a/pkg/kv/bulk/BUILD.bazel
+++ b/pkg/kv/bulk/BUILD.bazel
@@ -13,7 +13,6 @@ go_library(
visibility = ["//visibility:public"],
deps = [
"//pkg/keys",
- "//pkg/kv",
"//pkg/kv/kvclient/rangecache",
"//pkg/kv/kvserver",
"//pkg/kv/kvserver/kvserverbase",
@@ -29,6 +28,7 @@ go_library(
"//pkg/util/mon",
"//pkg/util/timeutil",
"@com_github_cockroachdb_errors//:errors",
+ "@com_github_cockroachdb_redact//:redact",
],
)
diff --git a/pkg/kv/bulk/buffering_adder.go b/pkg/kv/bulk/buffering_adder.go
index 28f2f4a52e..59e9484839 100644
--- a/pkg/kv/bulk/buffering_adder.go
+++ b/pkg/kv/bulk/buffering_adder.go
@@ -16,8 +16,6 @@ import (
"strings"
"time"
- "github.com/cockroachdb/cockroach/pkg/keys"
- "github.com/cockroachdb/cockroach/pkg/kv"
"github.com/cockroachdb/cockroach/pkg/kv/kvclient/rangecache"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverbase"
"github.com/cockroachdb/cockroach/pkg/roachpb"
@@ -49,19 +47,16 @@ type BufferingAdder struct {
// currently buffered kvs.
curBuf kvBuf
- kvSize sz
sorted bool
initialSplits int
- lastFlush time.Time
flushCounts struct {
- total int
- bufferSize int
- totalSort time.Duration
- totalFlush time.Duration
- totalFilling time.Duration
+ total int
+ bufferSize int
+ totalSort time.Duration
+ totalFlush time.Duration
}
// name of the BufferingAdder for the purpose of logging only.
@@ -81,7 +76,7 @@ var _ kvserverbase.BulkAdder = &BufferingAdder{}
// encounter an error and need to be split and retired to be applied.
func MakeBulkAdder(
ctx context.Context,
- db *kv.DB,
+ db SSTSender,
rangeCache *rangecache.RangeCache,
settings *cluster.Settings,
timestamp hlc.Timestamp,
@@ -97,15 +92,27 @@ func MakeBulkAdder(
if opts.StepBufferSize == 0 {
opts.StepBufferSize = 32 << 20
}
+ if opts.SSTSize == nil {
+ opts.SSTSize = func() int64 { return 16 << 20 }
+ }
+ if opts.SplitAndScatterAfter == nil {
+ // splitting _before_ hitting max reduces chance of auto-splitting after the
+ // range is full and is more expensive to split/move.
+ opts.SplitAndScatterAfter = func() int64 { return 48 << 20 }
+ } else if opts.SplitAndScatterAfter() == kvserverbase.DisableExplicitSplits {
+ opts.SplitAndScatterAfter = nil
+ }
b := &BufferingAdder{
name: opts.Name,
sink: SSTBatcher{
db: db,
+ maxSize: opts.SSTSize,
rc: rangeCache,
settings: settings,
skipDuplicates: opts.SkipDuplicates,
disallowShadowingBelow: opts.DisallowShadowingBelow,
+ splitAfter: opts.SplitAndScatterAfter,
batchTS: opts.BatchTimestamp,
writeAtBatchTS: opts.WriteAtBatchTimestamp,
},
@@ -116,7 +123,6 @@ func MakeBulkAdder(
bulkMon: bulkMon,
sorted: true,
initialSplits: opts.InitialSplitsIfUnordered,
- lastFlush: timeutil.Now(),
}
// If no monitor is attached to the instance of a bulk adder, we do not
@@ -148,29 +154,14 @@ func (b *BufferingAdder) SetOnFlush(fn func(summary roachpb.BulkOpSummary)) {
// Close closes the underlying SST builder.
func (b *BufferingAdder) Close(ctx context.Context) {
- log.VEventf(ctx, 1,
- "%s adder ingested %s (%s); spent %s filling, %v sorting, %v flushing (%v sink, %v sending, %v splitting, %v scattering %v)",
+ log.VEventf(ctx, 2,
+ "bulk adder %s ingested %s, flushed %d due to buffer (%s) size. Flushed chunked as %d files (%d after split-retries), %d due to ranges, %d due to sst size.",
b.name,
sz(b.sink.totalRows.DataSize),
- sorted(b.sorted),
- timing(b.flushCounts.totalFilling),
- timing(b.flushCounts.totalSort),
- timing(b.flushCounts.totalFlush),
- timing(b.sink.flushCounts.flushWait),
- timing(b.sink.flushCounts.sendWait),
- timing(b.sink.flushCounts.splitWait),
- timing(b.sink.flushCounts.scatterWait),
- b.sink.flushCounts.scatterMoved,
- )
- log.VEventf(ctx, 2, "%s adder flushed %d times, %d due to buffer size (%s); flushing chunked into %d files (%d for ranges, %d for sst size, +%d after split-retries)",
- b.name,
- b.flushCounts.total,
b.flushCounts.bufferSize,
sz(b.memAcc.Used()),
- b.sink.flushCounts.total,
- b.sink.flushCounts.split,
- b.sink.flushCounts.sstSize,
- b.sink.flushCounts.files-b.sink.flushCounts.total,
+ b.sink.flushCounts.total, b.sink.flushCounts.files,
+ b.sink.flushCounts.split, b.sink.flushCounts.sstSize,
)
b.sink.Close()
@@ -190,9 +181,8 @@ func (b *BufferingAdder) Add(ctx context.Context, key roachpb.Key, value []byte)
if err := b.curBuf.append(key, value); err != nil {
return err
}
- b.kvSize += sz(len(key) + len(value))
- if b.curBuf.MemSize > sz(b.curBufferSize) {
+ if b.curBuf.MemSize > int(b.curBufferSize) {
// This is an optimization to try and increase the current buffer size if
// our memory account permits it. This would lead to creation of a fewer
// number of SSTs.
@@ -213,14 +203,6 @@ func (b *BufferingAdder) Add(ctx context.Context, key roachpb.Key, value []byte)
return nil
}
-func (b *BufferingAdder) bufferedKeys() int {
- return len(b.curBuf.entries)
-}
-
-func (b *BufferingAdder) bufferedMemSize() sz {
- return b.curBuf.MemSize
-}
-
// CurrentBufferFill returns the current buffer fill percentage.
func (b *BufferingAdder) CurrentBufferFill() float32 {
return float32(b.curBuf.MemSize) / float32(b.curBufferSize)
@@ -233,8 +215,7 @@ func (b *BufferingAdder) IsEmpty() bool {
func (b *BufferingAdder) sizeFlush(ctx context.Context) error {
b.flushCounts.bufferSize++
- log.VEventf(ctx, 2, "%s adder triggering flush of %s of KVs in %s buffer",
- b.name, b.kvSize, b.bufferedMemSize())
+ log.VEventf(ctx, 3, "buffer size triggering flush of %s buffer", sz(b.curBuf.MemSize))
return b.doFlush(ctx, true)
}
@@ -244,13 +225,10 @@ func (b *BufferingAdder) Flush(ctx context.Context) error {
}
func (b *BufferingAdder) doFlush(ctx context.Context, forSize bool) error {
- b.flushCounts.totalFilling += timeutil.Since(b.lastFlush)
-
- if b.bufferedKeys() == 0 {
+ if b.curBuf.Len() == 0 {
if b.onFlush != nil {
b.onFlush(b.sink.GetBatchSummary())
}
- b.lastFlush = timeutil.Now()
return nil
}
if err := b.sink.Reset(ctx); err != nil {
@@ -273,14 +251,10 @@ func (b *BufferingAdder) doFlush(ctx context.Context, forSize bool) error {
// If this is the first flush and is due to size, if it was unsorted then
// create initial splits if requested before flushing.
- if b.initialSplits > 0 {
- if forSize && !b.sorted {
- if err := b.createInitialSplits(ctx); err != nil {
- return err
- }
+ if b.flushCounts.total == 0 && forSize && b.initialSplits != 0 && !b.sorted {
+ if err := b.createInitialSplits(ctx); err != nil {
+ return err
}
- // Disable doing initial splits going forward.
- b.initialSplits = 0
}
for i := range b.curBuf.entries {
@@ -301,133 +275,51 @@ func (b *BufferingAdder) doFlush(ctx context.Context, forSize bool) error {
dueToSize := b.sink.flushCounts.sstSize - before.sstSize
log.Infof(ctx,
- "%s adder flushing %s (%s buffered/%0.2gx) wrote %d SSTs (avg: %s) with %d for splits, %d for size, took %v",
- b.name,
- b.kvSize,
- b.curBuf.MemSize,
- float64(b.kvSize)/float64(b.curBuf.MemSize),
- files,
- sz(written/int64(files)),
- dueToSplits,
- dueToSize,
- timing(timeutil.Since(beforeSort)),
+ "flushing %s buffer wrote %d SSTs (avg: %s) with %d for splits, %d for size, took %v",
+ sz(b.curBuf.MemSize), files, sz(written/int64(files)), dueToSplits, dueToSize, timeutil.Since(beforeSort),
)
}
-
if log.V(4) {
log.Infof(ctx,
- "%s adder has ingested %s (%s); spent %s filling, %v sorting, %v flushing (%v sink, %v sending, %v splitting, %v scattering %v)",
+ "bulk adder %s has ingested %s, spent %v sorting and %v flushing (%v sending, %v splitting). Flushed %d times due to buffer (%s) size. Flushed chunked as %d files (%d after split-retries), %d due to ranges, %d due to sst size.",
b.name,
sz(b.sink.totalRows.DataSize),
- sorted(b.sorted),
- timing(b.flushCounts.totalFilling),
- timing(b.flushCounts.totalSort),
- timing(b.flushCounts.totalFlush),
- timing(b.sink.flushCounts.flushWait),
- timing(b.sink.flushCounts.sendWait),
- timing(b.sink.flushCounts.splitWait),
- timing(b.sink.flushCounts.scatterWait),
- b.sink.flushCounts.scatterMoved,
- )
- }
-
- if log.V(5) {
- log.Infof(ctx,
- "%s adder has flushed %d times due to buffer size (%s), chunked as %d files (%d for ranges, %d for sst size, +%d for split-retries)",
- b.name,
+ b.flushCounts.totalSort,
+ b.flushCounts.totalFlush,
+ b.sink.flushCounts.sendWait,
+ b.sink.flushCounts.splitWait,
b.flushCounts.bufferSize,
sz(b.memAcc.Used()),
- b.sink.flushCounts.total,
- b.sink.flushCounts.split,
- b.sink.flushCounts.sstSize,
- b.sink.flushCounts.files-b.sink.flushCounts.total,
+ b.sink.flushCounts.total, b.sink.flushCounts.files,
+ b.sink.flushCounts.split, b.sink.flushCounts.sstSize,
)
}
-
if b.onFlush != nil {
b.onFlush(b.sink.GetBatchSummary())
}
b.curBuf.Reset()
- b.kvSize = 0
- b.lastFlush = timeutil.Now()
return nil
}
func (b *BufferingAdder) createInitialSplits(ctx context.Context) error {
- log.Infof(ctx, "%s adder creating up to %d initial splits from %d KVs in %s buffer",
- b.name, b.initialSplits, b.curBuf.Len(), b.curBuf.MemSize)
+ targetSize := b.curBuf.Len() / b.initialSplits
+ log.Infof(ctx, "creating up to %d initial splits from %d keys in %s buffer", b.initialSplits, b.curBuf.Len(), sz(b.curBuf.MemSize))
hour := hlc.Timestamp{WallTime: timeutil.Now().Add(time.Hour).UnixNano()}
- before := timeutil.Now()
-
- created := 0
- width := len(b.curBuf.entries) / b.initialSplits
- for i := 0; i < b.initialSplits; i++ {
- expire := hour
- if i == 0 {
- // If we over-split because our input is loosely ordered and we're just
- // seeing a sample of the first span here vs a sample of all of it, then
- // we may not fill enough for these splits to remain on their own. In that
- // case we'd really prefer the other splits be merged away first rather
- // than the first split, as it serves the important purpose of segregating
- // this processor's span from the one below it when is being constantly
- // re-scattered by that processor, so give the first split an extra hour.
- expire = hour.Add(time.Hour.Nanoseconds(), 0)
- }
- splitAt := i * width
- if splitAt >= len(b.curBuf.entries) {
- break
- }
- // Typically we split at splitAt if, and only if, its range still includes
- // the prior split, indicating no other processor is also splitting this
- // span. However, for the first split, there is no prior split, so we can
- // use the next split instead, as it too proves the range that need to be
- // split still has enough "width" (i.e. between two splits) to indicate that
- // another processor hasn't already split it.
- predicateAt := splitAt - width
- if predicateAt < 0 {
- next := splitAt + width
- if next > len(b.curBuf.entries)-1 {
- next = len(b.curBuf.entries) - 1
- }
- predicateAt = next
- }
- splitKey, err := keys.EnsureSafeSplitKey(b.curBuf.Key(splitAt))
- if err != nil {
- log.Warningf(ctx, "failed to generate pre-split key for key %s", b.curBuf.Key(splitAt))
- continue
- }
- predicateKey := b.curBuf.Key(predicateAt)
- log.VEventf(ctx, 1, "pre-splitting span %d of %d at %s", i, b.initialSplits, splitKey)
- resp, err := b.sink.db.SplitAndScatter(ctx, splitKey, expire, predicateKey)
- if err != nil {
+ for i := targetSize; i < b.curBuf.Len(); i += targetSize {
+ k := b.curBuf.Key(i)
+ prev := b.curBuf.Key(i - targetSize)
+ log.VEventf(ctx, 1, "splitting at key %d / %d: %s", i, b.curBuf.Len(), k)
+ if err := b.sink.db.SplitAndScatter(ctx, k, hour, prev); err != nil {
// TODO(dt): a typed error would be nice here.
if strings.Contains(err.Error(), "predicate") {
- log.VEventf(ctx, 1, "%s adder split at %s rejected, had previously split and no longer included %s",
- b.name, splitKey, predicateKey)
+ log.VEventf(ctx, 1, "split at %s rejected, had previously split and no longer included %s", k, prev)
continue
}
return err
}
-
- b.sink.flushCounts.splitWait += resp.Timing.Split
- b.sink.flushCounts.scatterWait += resp.Timing.Scatter
- if resp.ScatteredStats != nil {
- moved := sz(resp.ScatteredStats.Total())
- b.sink.flushCounts.scatterMoved += moved
- if resp.ScatteredStats.Total() > 0 {
- log.VEventf(ctx, 1, "pre-split scattered %s in non-empty range %s",
- moved, resp.ScatteredSpan)
- }
- }
- created++
}
-
- log.Infof(ctx, "%s adder created %d initial splits in %v from %d keys in %s buffer",
- b.name, created, timing(timeutil.Since(before)), b.curBuf.Len(), b.curBuf.MemSize)
-
- b.sink.initialSplitDone = true
return nil
}
diff --git a/pkg/kv/bulk/kv_buf.go b/pkg/kv/bulk/kv_buf.go
index 6ae5e91c7e..4e8f71f23e 100644
--- a/pkg/kv/bulk/kv_buf.go
+++ b/pkg/kv/bulk/kv_buf.go
@@ -26,7 +26,7 @@ import (
type kvBuf struct {
entries []kvBufEntry
slab []byte
- MemSize sz // size of buffered data including per-entry overhead
+ MemSize int // size of buffered data including per-entry overhead
}
// each entry in the buffer has a key and value -- the actual bytes of these are
@@ -57,7 +57,7 @@ func (b *kvBuf) append(k, v []byte) error {
return errors.Errorf("length %d exceeds limit %d", len(v), maxLen)
}
- b.MemSize += sz(len(k) + len(v) + entryOverhead)
+ b.MemSize += len(k) + len(v) + entryOverhead
var e kvBufEntry
e.keySpan = uint64(len(b.slab)<<lenBits) | uint64(len(k)&lenMask)
b.slab = append(b.slab, k...)
diff --git a/pkg/kv/bulk/kv_buf_test.go b/pkg/kv/bulk/kv_buf_test.go
index ae77662671..7e29efcc0e 100644
--- a/pkg/kv/bulk/kv_buf_test.go
+++ b/pkg/kv/bulk/kv_buf_test.go
@@ -26,7 +26,7 @@ type kvPair struct {
value []byte
}
-func makeTestData(num int) (kvs []kvPair, totalSize sz) {
+func makeTestData(num int) (kvs []kvPair, totalSize int) {
kvs = make([]kvPair, num)
r, _ := randutil.NewTestRand()
alloc := make([]byte, num*500)
@@ -41,7 +41,7 @@ func makeTestData(num int) (kvs []kvPair, totalSize sz) {
alloc = alloc[len(kvs[i].key):]
kvs[i].value = alloc[:randutil.RandIntInRange(r, 0, 1000)]
alloc = alloc[len(kvs[i].value):]
- totalSize += sz(len(kvs[i].key) + len(kvs[i].value))
+ totalSize += len(kvs[i].key) + len(kvs[i].value)
}
return kvs, totalSize
}
@@ -63,7 +63,7 @@ func TestKvBuf(t *testing.T) {
if expected, actual := len(src), b.Len(); expected != actual {
t.Fatalf("expected len %d got %d", expected, actual)
}
- if expected, actual := totalSize+sz(len(src)*16), b.MemSize; expected != actual {
+ if expected, actual := totalSize+len(src)*16, b.MemSize; expected != actual {
t.Fatalf("expected len %d got %d", expected, actual)
}
diff --git a/pkg/kv/bulk/setting.go b/pkg/kv/bulk/setting.go
index c332ccf290..bb419dc94d 100644
--- a/pkg/kv/bulk/setting.go
+++ b/pkg/kv/bulk/setting.go
@@ -16,19 +16,21 @@ import (
"github.com/cockroachdb/cockroach/pkg/settings/cluster"
)
-var (
- // IngestBatchSize controls the size of ingest ssts.
- IngestBatchSize = settings.RegisterByteSizeSetting(
+// IngestBatchSize is a cluster setting that controls the maximum size of the
+// payload in an AddSSTable request.
+var IngestBatchSize = func() *settings.ByteSizeSetting {
+ s := settings.RegisterByteSizeSetting(
settings.TenantWritable,
"kv.bulk_ingest.batch_size",
"the maximum size of the payload in an AddSSTable request",
16<<20,
)
-)
+ return s
+}()
-// ingestFileSize determines the target size files sent via AddSSTable requests.
+// IngestFileSize determines the target size files sent via AddSSTable requests.
// It returns the smaller of the IngestBatchSize and Raft command size settings.
-func ingestFileSize(st *cluster.Settings) int64 {
+func IngestFileSize(st *cluster.Settings) int64 {
desiredSize := IngestBatchSize.Get(&st.SV)
maxCommandSize := kvserver.MaxCommandSize.Get(&st.SV)
if desiredSize > maxCommandSize {
diff --git a/pkg/kv/bulk/sst_batcher.go b/pkg/kv/bulk/sst_batcher.go
index d2663ae6eb..21e5302e3e 100644
--- a/pkg/kv/bulk/sst_batcher.go
+++ b/pkg/kv/bulk/sst_batcher.go
@@ -16,7 +16,6 @@ import (
"time"
"github.com/cockroachdb/cockroach/pkg/keys"
- "github.com/cockroachdb/cockroach/pkg/kv"
"github.com/cockroachdb/cockroach/pkg/kv/kvclient/rangecache"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverbase"
"github.com/cockroachdb/cockroach/pkg/roachpb"
@@ -29,6 +28,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
"github.com/cockroachdb/errors"
+ "github.com/cockroachdb/redact"
)
var (
@@ -39,13 +39,6 @@ var (
400*1<<10, // 400 Kib
)
- splitAfter = settings.RegisterByteSizeSetting(
- settings.TenantWritable,
- "bulkio.ingest.scatter_after_size",
- "amount of data added to any one range after which a new range should be split off and scattered",
- 48<<20,
- )
-
ingestDelay = settings.RegisterDurationSetting(
settings.TenantWritable,
"bulkio.ingest.flush_delay",
@@ -57,23 +50,14 @@ var (
type sz int64
-func (b sz) String() string { return string(humanizeutil.IBytes(int64(b))) }
-func (b sz) SafeValue() {}
-
-type timing time.Duration
-
-func (t timing) String() string { return time.Duration(t).Round(time.Second).String() }
-func (t timing) SafeValue() {}
-
-type sorted bool
+func (b sz) String() string {
+ return redact.StringWithoutMarkers(b)
+}
-func (t sorted) String() string {
- if t {
- return "sorted"
- }
- return "unsorted"
+// SafeFormat implements the redact.SafeFormatter interface.
+func (b sz) SafeFormat(w redact.SafePrinter, _ rune) {
+ w.Print(humanizeutil.IBytes(int64(b)))
}
-func (t sorted) SafeValue() {}
// SSTBatcher is a helper for bulk-adding many KVs in chunks via AddSSTable. An
// SSTBatcher can be handed KVs repeatedly and will make them into SSTs that are
@@ -82,9 +66,11 @@ func (t sorted) SafeValue() {}
// it to attempt to flush SSTs before they cross range boundaries to minimize
// expensive on-split retries.
type SSTBatcher struct {
- db *kv.DB
- rc *rangecache.RangeCache
- settings *cluster.Settings
+ db SSTSender
+ rc *rangecache.RangeCache
+ settings *cluster.Settings
+ maxSize func() int64
+ splitAfter func() int64
// disallowShadowingBelow is described on roachpb.AddSSTableRequest.
disallowShadowingBelow hlc.Timestamp
@@ -113,8 +99,6 @@ type SSTBatcher struct {
// writeAtBatchTS is passed to the writeAtBatchTs argument to db.AddSStable.
writeAtBatchTS bool
- initialSplitDone bool
-
// The rest of the fields accumulated state as opposed to configuration. Some,
// like totalRows, are accumulated _across_ batches and are not reset between
// batches when Reset() is called.
@@ -125,12 +109,8 @@ type SSTBatcher struct {
sstSize int
files int // a single flush might create multiple files.
- scatterMoved sz
-
- flushWait time.Duration
- sendWait time.Duration
- splitWait time.Duration
- scatterWait time.Duration
+ sendWait time.Duration
+ splitWait time.Duration
}
// Tracking for if we have "filled" a range in case we want to split/scatter.
flushedToCurrentRange int64
@@ -155,14 +135,16 @@ type SSTBatcher struct {
// MakeSSTBatcher makes a ready-to-use SSTBatcher.
func MakeSSTBatcher(
ctx context.Context,
- db *kv.DB,
+ db SSTSender,
settings *cluster.Settings,
+ flushBytes func() int64,
disallowShadowingBelow hlc.Timestamp,
writeAtBatchTs bool,
) (*SSTBatcher, error) {
b := &SSTBatcher{
db: db,
settings: settings,
+ maxSize: flushBytes,
disallowShadowingBelow: disallowShadowingBelow,
writeAtBatchTS: writeAtBatchTs,
}
@@ -173,9 +155,9 @@ func MakeSSTBatcher(
// MakeStreamSSTBatcher creates a batcher configured to ingest duplicate keys
// that might be received from a cluster to cluster stream.
func MakeStreamSSTBatcher(
- ctx context.Context, db *kv.DB, settings *cluster.Settings,
+ ctx context.Context, db SSTSender, settings *cluster.Settings, flushBytes func() int64,
) (*SSTBatcher, error) {
- b := &SSTBatcher{db: db, settings: settings, ingestAll: true}
+ b := &SSTBatcher{db: db, settings: settings, maxSize: flushBytes, ingestAll: true}
err := b.Reset(ctx)
return b, err
}
@@ -304,7 +286,7 @@ func (b *SSTBatcher) flushIfNeeded(ctx context.Context, nextKey roachpb.Key) err
return b.Reset(ctx)
}
- if b.sstWriter.DataSize >= ingestFileSize(b.settings) {
+ if b.sstWriter.DataSize >= b.maxSize() {
if err := b.doFlush(ctx, sizeFlush, nextKey); err != nil {
return err
}
@@ -329,8 +311,6 @@ func (b *SSTBatcher) doFlush(ctx context.Context, reason int, nextKey roachpb.Ke
if b.sstWriter.DataSize == 0 {
return nil
}
- beforeFlush := timeutil.Now()
-
b.flushCounts.total++
if delay := ingestDelay.Get(&b.settings.SV); delay != 0 {
@@ -344,7 +324,7 @@ func (b *SSTBatcher) doFlush(ctx context.Context, reason int, nextKey roachpb.Ke
}
}
- hour := hlc.Timestamp{WallTime: beforeFlush.Add(time.Hour).UnixNano()}
+ hour := hlc.Timestamp{WallTime: timeutil.Now().Add(time.Hour).UnixNano()}
start := roachpb.Key(append([]byte(nil), b.batchStartKey...))
// The end key of the WriteBatch request is exclusive, but batchEndKey is
@@ -353,7 +333,7 @@ func (b *SSTBatcher) doFlush(ctx context.Context, reason int, nextKey roachpb.Ke
size := b.sstWriter.DataSize
if reason == sizeFlush {
- log.VEventf(ctx, 3, "flushing %s SST due to size > %s", sz(size), sz(ingestFileSize(b.settings)))
+ log.VEventf(ctx, 3, "flushing %s SST due to size > %s", sz(size), sz(b.maxSize()))
b.flushCounts.sstSize++
// On first flush, if it is due to size, we introduce one split at the start
@@ -363,30 +343,17 @@ func (b *SSTBatcher) doFlush(ctx context.Context, reason int, nextKey roachpb.Ke
// minimizes impact on other adders (e.g. causing extra SST splitting).
//
// We only do this splitting if the caller expects the sst_batcher to
- // split and scatter the data as it ingests it i.e. splitAfter > 0.
- if !b.initialSplitDone && b.flushCounts.total == 1 && splitAfter.Get(&b.settings.SV) > 0 {
+ // split and scatter the data as it ingests it (which is the case when
+ // splitAfter) is set.
+ if b.flushCounts.total == 1 && b.splitAfter != nil {
if splitAt, err := keys.EnsureSafeSplitKey(start); err != nil {
- log.Warningf(ctx, "failed to generate split key to separate ingestion span: %v", err)
+ log.Warningf(ctx, "%v", err)
} else {
- if log.V(1) {
- log.Infof(ctx, "splitting on first flush to separate ingestion span using key %v", start)
- }
// NB: Passing 'hour' here is technically illegal until 19.2 is
// active, but the value will be ignored before that, and we don't
// have access to the cluster version here.
- reply, err := b.db.SplitAndScatter(ctx, splitAt, hour)
- if err != nil {
- log.Warningf(ctx, "failed ot split at first key to separate ingestion span: %v", err)
- } else {
- b.flushCounts.splitWait += reply.Timing.Split
- b.flushCounts.scatterWait += reply.Timing.Scatter
- if reply.ScatteredStats != nil {
- moved := sz(reply.ScatteredStats.Total())
- b.flushCounts.scatterMoved += moved
- if moved > 0 {
- log.VEventf(ctx, 1, "starting split scattered %s in non-empty range %s", moved, reply.ScatteredSpan)
- }
- }
+ if err := b.db.SplitAndScatter(ctx, splitAt, hour); err != nil {
+ log.Warningf(ctx, "%v", err)
}
}
}
@@ -426,28 +393,21 @@ func (b *SSTBatcher) doFlush(ctx context.Context, reason int, nextKey roachpb.Ke
b.lastFlushKey = append(b.lastFlushKey[:0], b.flushKey...)
b.flushedToCurrentRange = size
}
- if splitSize := splitAfter.Get(&b.settings.SV); splitSize > 0 {
- if b.flushedToCurrentRange > splitSize && nextKey != nil {
+ if b.splitAfter != nil {
+ if splitAfter := b.splitAfter(); b.flushedToCurrentRange > splitAfter && nextKey != nil {
if splitAt, err := keys.EnsureSafeSplitKey(nextKey); err != nil {
log.Warningf(ctx, "%v", err)
} else {
+ beforeSplit := timeutil.Now()
+
log.VEventf(ctx, 2, "%s added since last split, splitting/scattering for next range at %v", sz(b.flushedToCurrentRange), end)
- reply, err := b.db.SplitAndScatter(ctx, splitAt, hour)
- if err != nil {
+ // NB: Passing 'hour' here is technically illegal until 19.2 is
+ // active, but the value will be ignored before that, and we don't
+ // have access to the cluster version here.
+ if err := b.db.SplitAndScatter(ctx, splitAt, hour); err != nil {
log.Warningf(ctx, "failed to split and scatter during ingest: %+v", err)
}
- b.flushCounts.splitWait += reply.Timing.Split
- b.flushCounts.scatterWait += reply.Timing.Scatter
- if reply.ScatteredStats != nil {
- moved := sz(reply.ScatteredStats.Total())
- b.flushCounts.scatterMoved += moved
- if moved > 0 {
- // This is unexpected, since 'filling' a range without hitting a
- // an existing split suggests non-overlapping input, so we expect
- // our still-to-fill RHS to be empty and cheap to move.
- log.VEventf(ctx, 1, "filled-range split scattered %s in non-empty range %s", moved, reply.ScatteredSpan)
- }
- }
+ b.flushCounts.splitWait += timeutil.Since(beforeSplit)
}
b.flushedToCurrentRange = 0
}
@@ -456,7 +416,6 @@ func (b *SSTBatcher) doFlush(ctx context.Context, reason int, nextKey roachpb.Ke
b.rowCounter.DataSize += b.sstWriter.DataSize
b.totalRows.Add(b.rowCounter.BulkOpSummary)
- b.flushCounts.flushWait += timeutil.Since(beforeFlush)
return nil
}
@@ -475,6 +434,39 @@ func (b *SSTBatcher) GetSummary() roachpb.BulkOpSummary {
return b.totalRows
}
+// SSTSender is an interface to send SST data to an engine.
+type SSTSender interface {
+ AddSSTable(
+ ctx context.Context,
+ begin, end interface{},
+ data []byte,
+ disallowConflicts bool,
+ disallowShadowing bool,
+ disallowShadowingBelow hlc.Timestamp,
+ stats *enginepb.MVCCStats,
+ ingestAsWrites bool,
+ batchTs hlc.Timestamp,
+ ) error
+
+ AddSSTableAtBatchTimestamp(
+ ctx context.Context,
+ begin, end interface{},
+ data []byte,
+ disallowConflicts bool,
+ disallowShadowing bool,
+ disallowShadowingBelow hlc.Timestamp,
+ stats *enginepb.MVCCStats,
+ ingestAsWrites bool,
+ batchTs hlc.Timestamp,
+ ) (hlc.Timestamp, error)
+
+ SplitAndScatter(
+ ctx context.Context, key roachpb.Key, expirationTime hlc.Timestamp, predicateKeys ...roachpb.Key,
+ ) error
+
+ Clock() *hlc.Clock
+}
+
type sstSpan struct {
start, end roachpb.Key
sstBytes []byte
@@ -487,7 +479,7 @@ type sstSpan struct {
// for each side of the split in the error, and each are retried.
func AddSSTable(
ctx context.Context,
- db *kv.DB,
+ db SSTSender,
start, end roachpb.Key,
sstBytes []byte,
disallowShadowingBelow hlc.Timestamp,
@@ -561,11 +553,6 @@ func AddSSTable(
log.VEventf(ctx, 3, "adding %s AddSSTable [%s,%s) took %v", sz(len(item.sstBytes)), item.start, item.end, timeutil.Since(before))
return nil
}
- // Retry on AmbiguousResult.
- if errors.HasType(err, (*roachpb.AmbiguousResultError)(nil)) {
- log.Warningf(ctx, "addsstable [%s,%s) attempt %d failed: %+v", start, end, i, err)
- continue
- }
// This range has split -- we need to split the SST to try again.
if m := (*roachpb.RangeKeyMismatchError)(nil); errors.As(err, &m) {
// TODO(andrei): We just use the first of m.Ranges; presumably we
@@ -594,6 +581,11 @@ func AddSSTable(
work = append([]*sstSpan{left, right}, work...)
return nil
}
+ // Retry on AmbiguousResult.
+ if errors.HasType(err, (*roachpb.AmbiguousResultError)(nil)) {
+ log.Warningf(ctx, "addsstable [%s,%s) attempt %d failed: %+v", start, end, i, err)
+ continue
+ }
}
return errors.Wrapf(err, "addsstable [%s,%s)", item.start, item.end)
}(); err != nil {
@@ -612,7 +604,7 @@ func AddSSTable(
// passed in is over the top level SST passed into AddSSTTable().
func createSplitSSTable(
ctx context.Context,
- db *kv.DB,
+ db SSTSender,
start, splitKey roachpb.Key,
disallowShadowingBelow hlc.Timestamp,
iter storage.SimpleMVCCIterator,
diff --git a/pkg/kv/bulk/sst_batcher_test.go b/pkg/kv/bulk/sst_batcher_test.go
index ac1482f288..ae0bd0f99c 100644
--- a/pkg/kv/bulk/sst_batcher_test.go
+++ b/pkg/kv/bulk/sst_batcher_test.go
@@ -132,7 +132,7 @@ func runTestImport(t *testing.T, batchSizeValue int64) {
ts := hlc.Timestamp{WallTime: 100}
b, err := bulk.MakeBulkAdder(
- ctx, kvDB, mockCache, s.ClusterSettings(), ts, kvserverbase.BulkAdderOptions{MinBufferSize: batchSize()}, nil, /* bulkMon */
+ ctx, kvDB, mockCache, s.ClusterSettings(), ts, kvserverbase.BulkAdderOptions{MinBufferSize: batchSize(), SSTSize: batchSize}, nil, /* bulkMon */
)
if err != nil {
t.Fatal(err)
diff --git a/pkg/kv/db.go b/pkg/kv/db.go
index 231252f00b..05e834a1a2 100644
--- a/pkg/kv/db.go
+++ b/pkg/kv/db.go
@@ -14,11 +14,9 @@ import (
"context"
"fmt"
"strings"
- "time"
"github.com/cockroachdb/cockroach/pkg/base"
"github.com/cockroachdb/cockroach/pkg/roachpb"
- "github.com/cockroachdb/cockroach/pkg/sql/sessiondatapb"
"github.com/cockroachdb/cockroach/pkg/storage/enginepb"
"github.com/cockroachdb/cockroach/pkg/util/admission"
"github.com/cockroachdb/cockroach/pkg/util/hlc"
@@ -26,7 +24,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/protoutil"
"github.com/cockroachdb/cockroach/pkg/util/retry"
"github.com/cockroachdb/cockroach/pkg/util/stop"
- "github.com/cockroachdb/cockroach/pkg/util/timeutil"
"github.com/cockroachdb/errors"
)
@@ -585,55 +582,24 @@ func (db *DB) AdminSplit(
return getOneErr(db.Run(ctx, b), b)
}
-// SplitAndScatterResult carries wraps information about the SplitAndScatter
-// call, including how long each step took or stats for range scattered.
-type SplitAndScatterResult struct {
- // Timing indicates how long each step in this multi-step call took.
- Timing struct {
- Split time.Duration
- Scatter time.Duration
- }
- // Stats describe the scattered range, as returned by the AdminScatter call.
- ScatteredStats *enginepb.MVCCStats
- ScatteredSpan roachpb.Span
-}
-
// SplitAndScatter is a helper that wraps AdminSplit + AdminScatter.
func (db *DB) SplitAndScatter(
ctx context.Context, key roachpb.Key, expirationTime hlc.Timestamp, predicateKeys ...roachpb.Key,
-) (SplitAndScatterResult, error) {
- beforeSplit := timeutil.Now()
+) error {
b := &Batch{}
b.adminSplit(key, expirationTime, predicateKeys)
if err := getOneErr(db.Run(ctx, b), b); err != nil {
- return SplitAndScatterResult{}, err
+ return err
}
- beforeScatter := timeutil.Now()
scatterReq := &roachpb.AdminScatterRequest{
RequestHeader: roachpb.RequestHeaderFromSpan(roachpb.Span{Key: key, EndKey: key.Next()}),
RandomizeLeases: true,
}
- raw, pErr := SendWrapped(ctx, db.NonTransactionalSender(), scatterReq)
- if pErr != nil {
- return SplitAndScatterResult{}, pErr.GoError()
- }
- reply := SplitAndScatterResult{}
- reply.Timing.Split = beforeScatter.Sub(beforeSplit)
- reply.Timing.Scatter = timeutil.Since(beforeScatter)
- resp, ok := raw.(*roachpb.AdminScatterResponse)
- if !ok {
- return reply, errors.Errorf("unexpected response of type %T for AdminScatter", raw)
- }
- reply.ScatteredStats = resp.MVCCStats
- if len(resp.RangeInfos) > 0 {
- reply.ScatteredSpan = roachpb.Span{
- Key: resp.RangeInfos[0].Desc.StartKey.AsRawKey(),
- EndKey: resp.RangeInfos[0].Desc.EndKey.AsRawKey(),
- }
+ if _, pErr := SendWrapped(ctx, db.NonTransactionalSender(), scatterReq); pErr != nil {
+ return pErr.GoError()
}
-
- return reply, nil
+ return nil
}
// AdminUnsplit removes the sticky bit of the range specified by splitKey.
@@ -910,25 +876,6 @@ func (db *DB) Txn(ctx context.Context, retryable func(context.Context, *Txn) err
return runTxn(ctx, txn, retryable)
}
-// TxnWithSteppingEnabled is the same Txn, but represents a request originating
-// from SQL and has stepping enabled and quality of service set.
-func (db *DB) TxnWithSteppingEnabled(
- ctx context.Context,
- qualityOfService sessiondatapb.QoSLevel,
- retryable func(context.Context, *Txn) error,
-) error {
- // TODO(radu): we should open a tracing Span here (we need to figure out how
- // to use the correct tracer).
-
- // Observed timestamps don't work with multi-tenancy. See:
- //
- // https://github.com/cockroachdb/cockroach/issues/48008
- nodeID, _ := db.ctx.NodeID.OptionalNodeID() // zero if not available
- txn := NewTxnWithSteppingEnabled(ctx, db, nodeID, qualityOfService)
- txn.SetDebugName("unnamed")
- return runTxn(ctx, txn, retryable)
-}
-
// TxnRootKV is the same as Txn, but specifically represents a request
// originating within KV, and that is at the root of the tree of requests. For
// KV usage that should be subject to admission control. Do not use this for
diff --git a/pkg/kv/kvclient/kvcoord/BUILD.bazel b/pkg/kv/kvclient/kvcoord/BUILD.bazel
index 817e23cca3..3f22e3713e 100644
--- a/pkg/kv/kvclient/kvcoord/BUILD.bazel
+++ b/pkg/kv/kvclient/kvcoord/BUILD.bazel
@@ -134,7 +134,6 @@ go_test(
],
data = glob(["testdata/**"]),
embed = [":kvcoord"],
- shard_count = 16,
tags = ["no-remote"],
deps = [
"//build/bazelutil:noop",
diff --git a/pkg/kv/kvclient/kvcoord/dist_sender.go b/pkg/kv/kvclient/kvcoord/dist_sender.go
index 3b7229ac65..8d3bee67d1 100644
--- a/pkg/kv/kvclient/kvcoord/dist_sender.go
+++ b/pkg/kv/kvclient/kvcoord/dist_sender.go
@@ -609,7 +609,7 @@ func (ds *DistSender) getRoutingInfo(
}
if !containsFn(returnToken.Desc(), descKey) {
log.Fatalf(ctx, "programming error: range resolution returning non-matching descriptor: "+
- "desc: %s, key: %s, reverse: %t", returnToken.Desc(), descKey, redact.Safe(useReverseScan))
+ "desc: %s, key: %s, reverse: %t", returnToken.Desc(), descKey, log.Safe(useReverseScan))
}
}
@@ -764,7 +764,7 @@ func (ds *DistSender) Send(
// We already verified above that the batch contains only scan requests of the same type.
// Such a batch should never need splitting.
log.Fatalf(ctx, "batch with MaxSpanRequestKeys=%d, TargetBytes=%d needs splitting",
- redact.Safe(ba.MaxSpanRequestKeys), redact.Safe(ba.TargetBytes))
+ log.Safe(ba.MaxSpanRequestKeys), log.Safe(ba.TargetBytes))
}
var singleRplChunk [1]*roachpb.BatchResponse
rplChunks := singleRplChunk[:0:1]
@@ -2212,18 +2212,16 @@ func (ds *DistSender) sendToReplicas(
// Has the caller given up?
if ctx.Err() != nil {
+ reportedErr := errors.Wrap(ctx.Err(), "context done during DistSender.Send")
+ log.Eventf(ctx, "%v", reportedErr)
+ if ambiguousError != nil {
+ return nil, roachpb.NewAmbiguousResultErrorf(reportedErr.Error())
+ }
// Don't consider this a sendError, because sendErrors indicate that we
// were unable to reach a replica that could serve the request, and they
// cause range cache evictions. Context cancellations just mean the
// sender changed its mind or the request timed out.
-
- if ambiguousError != nil {
- err = roachpb.NewAmbiguousResultError(errors.Wrapf(ambiguousError, "context done during DistSender.Send"))
- } else {
- err = errors.Wrap(ctx.Err(), "aborted during DistSender.Send")
- }
- log.Eventf(ctx, "%v", err)
- return nil, err
+ return nil, errors.Wrap(ctx.Err(), "aborted during DistSender.Send")
}
}
}
diff --git a/pkg/kv/kvclient/kvcoord/dist_sender_test.go b/pkg/kv/kvclient/kvcoord/dist_sender_test.go
index 779ead4e98..338077d063 100644
--- a/pkg/kv/kvclient/kvcoord/dist_sender_test.go
+++ b/pkg/kv/kvclient/kvcoord/dist_sender_test.go
@@ -3151,14 +3151,14 @@ func TestParallelCommitsDetectIntentMissingCause(t *testing.T) {
queryTxnFn: func() (roachpb.TransactionStatus, bool, error) {
return roachpb.ABORTED, txnRecordSynthesized, nil
},
- expErr: "result is ambiguous: intent missing and record aborted",
+ expErr: "result is ambiguous (intent missing and record aborted)",
},
{
name: "QueryTxn error, unresolved ambiguity",
queryTxnFn: func() (roachpb.TransactionStatus, bool, error) {
return 0, false, errors.New("unable to query txn")
},
- expErr: "result is ambiguous: error=unable to query txn [intent missing]",
+ expErr: "result is ambiguous (error=unable to query txn [intent missing])",
},
}
for _, test := range testCases {
diff --git a/pkg/kv/kvclient/kvcoord/txn_coord_sender_test.go b/pkg/kv/kvclient/kvcoord/txn_coord_sender_test.go
index aa178d762c..27ada82dfd 100644
--- a/pkg/kv/kvclient/kvcoord/txn_coord_sender_test.go
+++ b/pkg/kv/kvclient/kvcoord/txn_coord_sender_test.go
@@ -670,7 +670,7 @@ func TestTxnCoordSenderGCWithAmbiguousResultErr(t *testing.T) {
testutils.RunTrueAndFalse(t, "errOnFirst", func(t *testing.T, errOnFirst bool) {
key := roachpb.Key("a")
- are := roachpb.NewAmbiguousResultErrorf("very ambiguous")
+ are := roachpb.NewAmbiguousResultError("very ambiguous")
knobs := &kvserver.StoreTestingKnobs{
TestingResponseFilter: func(ctx context.Context, ba roachpb.BatchRequest, br *roachpb.BatchResponse) *roachpb.Error {
for _, req := range ba.Requests {
diff --git a/pkg/kv/kvclient/kvtenant/connector.go b/pkg/kv/kvclient/kvtenant/connector.go
index 3db120cc51..71756dea4c 100644
--- a/pkg/kv/kvclient/kvtenant/connector.go
+++ b/pkg/kv/kvclient/kvtenant/connector.go
@@ -61,11 +61,6 @@ type Connector interface {
// primitives.
serverpb.RegionsServer
- // TenantStatusServer is the subset of the serverpb.StatusInterface that is
- // used by the SQL system to query for debug information, such as tenant-specific
- // range reports.
- serverpb.TenantStatusServer
-
// TokenBucketProvider provides access to the tenant cost control token
// bucket.
TokenBucketProvider
diff --git a/pkg/kv/kvclient/rangefeed/BUILD.bazel b/pkg/kv/kvclient/rangefeed/BUILD.bazel
index a419196ed2..55201509a2 100644
--- a/pkg/kv/kvclient/rangefeed/BUILD.bazel
+++ b/pkg/kv/kvclient/rangefeed/BUILD.bazel
@@ -32,7 +32,6 @@ go_library(
"//pkg/util/timeutil",
"@com_github_cockroachdb_errors//:errors",
"@com_github_cockroachdb_logtags//:logtags",
- "@com_github_cockroachdb_redact//:redact",
],
)
diff --git a/pkg/kv/kvclient/rangefeed/rangefeed.go b/pkg/kv/kvclient/rangefeed/rangefeed.go
index 85e9e8a7e9..fb377e484e 100644
--- a/pkg/kv/kvclient/rangefeed/rangefeed.go
+++ b/pkg/kv/kvclient/rangefeed/rangefeed.go
@@ -31,7 +31,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
"github.com/cockroachdb/errors"
"github.com/cockroachdb/logtags"
- "github.com/cockroachdb/redact"
)
//go:generate mockgen -destination=mocks_generated_test.go --package=rangefeed . DB
@@ -300,7 +299,7 @@ func (f *RangeFeed) run(ctx context.Context, frontier *span.Frontier) {
}
if err != nil && ctx.Err() == nil && restartLogEvery.ShouldLog() {
log.Warningf(ctx, "rangefeed failed %d times, restarting: %v",
- redact.Safe(i), err)
+ log.Safe(i), err)
}
if ctx.Err() != nil {
log.VEventf(ctx, 1, "exiting rangefeed")
diff --git a/pkg/kv/kvnemesis/validator_test.go b/pkg/kv/kvnemesis/validator_test.go
index da549ba68b..5f8eb2db3f 100644
--- a/pkg/kv/kvnemesis/validator_test.go
+++ b/pkg/kv/kvnemesis/validator_test.go
@@ -163,32 +163,32 @@ func TestValidate(t *testing.T) {
},
{
name: "one ambiguous put with successful write",
- steps: []Step{step(withResult(put(`a`, `v1`), roachpb.NewAmbiguousResultError(errors.New("boom"))))},
+ steps: []Step{step(withResult(put(`a`, `v1`), roachpb.NewAmbiguousResultError(``)))},
kvs: kvs(kv(`a`, 1, `v1`)),
expected: nil,
},
{
name: "one ambiguous delete with successful write",
- steps: []Step{step(withResult(del(`a`), roachpb.NewAmbiguousResultError(errors.New("boom"))))},
+ steps: []Step{step(withResult(del(`a`), roachpb.NewAmbiguousResultError(``)))},
kvs: kvs(tombstone(`a`, 1)),
expected: []string{`unable to validate delete operations in ambiguous transactions: [d]"a":missing-><nil>`},
},
{
name: "one ambiguous put with failed write",
- steps: []Step{step(withResult(put(`a`, `v1`), roachpb.NewAmbiguousResultError(errors.New("boom"))))},
+ steps: []Step{step(withResult(put(`a`, `v1`), roachpb.NewAmbiguousResultError(``)))},
kvs: nil,
expected: nil,
},
{
name: "one ambiguous delete with failed write",
- steps: []Step{step(withResult(del(`a`), roachpb.NewAmbiguousResultError(errors.New("boom"))))},
+ steps: []Step{step(withResult(del(`a`), roachpb.NewAmbiguousResultError(``)))},
kvs: nil,
expected: nil,
},
{
name: "one ambiguous delete with failed write before a later committed delete",
steps: []Step{
- step(withResult(del(`a`), roachpb.NewAmbiguousResultError(errors.New("boom")))),
+ step(withResult(del(`a`), roachpb.NewAmbiguousResultError(``))),
step(withResult(del(`a`), nil)),
},
kvs: kvs(tombstone(`a`, 1)),
@@ -511,7 +511,7 @@ func TestValidate(t *testing.T) {
step(withResult(closureTxn(ClosureTxnType_Commit,
withResult(put(`a`, `v1`), nil),
withResult(put(`b`, `v2`), nil),
- ), roachpb.NewAmbiguousResultError(errors.New("boom")))),
+ ), roachpb.NewAmbiguousResultError(``))),
},
kvs: kvs(kv(`a`, 1, `v1`), kv(`b`, 1, `v2`)),
expected: nil,
@@ -522,7 +522,7 @@ func TestValidate(t *testing.T) {
step(withResult(closureTxn(ClosureTxnType_Commit,
withResult(put(`a`, `v1`), nil),
withResult(del(`b`), nil),
- ), roachpb.NewAmbiguousResultError(errors.New("boom")))),
+ ), roachpb.NewAmbiguousResultError(``))),
},
kvs: kvs(kv(`a`, 1, `v1`), tombstone(`b`, 1)),
// TODO(sarkesian): If able to determine the tombstone resulting from a
@@ -538,7 +538,7 @@ func TestValidate(t *testing.T) {
step(withResult(closureTxn(ClosureTxnType_Commit,
withResult(put(`a`, `v1`), nil),
withResult(put(`b`, `v2`), nil),
- ), roachpb.NewAmbiguousResultError(errors.New("boom")))),
+ ), roachpb.NewAmbiguousResultError(``))),
},
kvs: nil,
expected: nil,
@@ -549,7 +549,7 @@ func TestValidate(t *testing.T) {
step(withResult(closureTxn(ClosureTxnType_Commit,
withResult(put(`a`, `v1`), nil),
withResult(del(`b`), nil),
- ), roachpb.NewAmbiguousResultError(errors.New("boom")))),
+ ), roachpb.NewAmbiguousResultError(``))),
},
kvs: nil,
expected: nil,
@@ -560,7 +560,7 @@ func TestValidate(t *testing.T) {
step(withResult(closureTxn(ClosureTxnType_Commit,
withResult(put(`a`, `v1`), nil),
withResult(put(`b`, `v2`), nil),
- ), roachpb.NewAmbiguousResultError(errors.New("boom")))),
+ ), roachpb.NewAmbiguousResultError(``))),
},
kvs: kvs(kv(`a`, 1, `v1`), kv(`b`, 2, `v2`)),
expected: []string{
@@ -573,7 +573,7 @@ func TestValidate(t *testing.T) {
step(withResult(withTimestamp(closureTxn(ClosureTxnType_Commit,
withResult(put(`a`, `v1`), nil),
withResult(del(`b`), nil),
- ), 2), roachpb.NewAmbiguousResultError(errors.New("boom")))),
+ ), 2), roachpb.NewAmbiguousResultError(``))),
},
kvs: kvs(kv(`a`, 1, `v1`), tombstone(`b`, 2)),
// TODO(sarkesian): If able to determine the tombstone resulting from a
diff --git a/pkg/kv/kvserver/BUILD.bazel b/pkg/kv/kvserver/BUILD.bazel
index 8e8c388fb3..e24b2673dc 100644
--- a/pkg/kv/kvserver/BUILD.bazel
+++ b/pkg/kv/kvserver/BUILD.bazel
@@ -37,6 +37,7 @@ go_library(
"replica_backpressure.go",
"replica_batch_updates.go",
"replica_circuit_breaker.go",
+ "replica_circuit_breaker_cancelstorage.go",
"replica_closedts.go",
"replica_command.go",
"replica_consistency.go",
@@ -123,7 +124,6 @@ go_library(
"//pkg/kv/kvserver/closedts/sidetransport",
"//pkg/kv/kvserver/closedts/tracker",
"//pkg/kv/kvserver/concurrency",
- "//pkg/kv/kvserver/concurrency/poison",
"//pkg/kv/kvserver/constraint",
"//pkg/kv/kvserver/gc",
"//pkg/kv/kvserver/idalloc",
@@ -132,6 +132,8 @@ go_library(
"//pkg/kv/kvserver/kvserverpb",
"//pkg/kv/kvserver/liveness",
"//pkg/kv/kvserver/liveness/livenesspb",
+ "//pkg/kv/kvserver/protectedts",
+ "//pkg/kv/kvserver/protectedts/ptpb",
"//pkg/kv/kvserver/raftentry",
"//pkg/kv/kvserver/rangefeed",
"//pkg/kv/kvserver/rditer",
@@ -225,6 +227,7 @@ go_test(
"client_rangefeed_test.go",
"client_relocate_range_test.go",
"client_replica_backpressure_test.go",
+ "client_replica_circuit_breaker_bench_test.go",
"client_replica_circuit_breaker_test.go",
"client_replica_gc_test.go",
"client_replica_test.go",
@@ -304,8 +307,8 @@ go_test(
],
data = glob(["testdata/**"]),
embed = [":kvserver"],
- shard_count = 8,
- tags = ["cpu:2"],
+ shard_count = 4,
+ tags = ["cpu:4"],
deps = [
"//pkg/base",
"//pkg/cli/exit",
@@ -333,8 +336,10 @@ go_test(
"//pkg/kv/kvserver/kvserverpb",
"//pkg/kv/kvserver/liveness",
"//pkg/kv/kvserver/liveness/livenesspb",
+ "//pkg/kv/kvserver/protectedts",
"//pkg/kv/kvserver/protectedts/ptpb",
"//pkg/kv/kvserver/protectedts/ptstorage",
+ "//pkg/kv/kvserver/protectedts/ptverifier",
"//pkg/kv/kvserver/raftentry",
"//pkg/kv/kvserver/rditer",
"//pkg/kv/kvserver/readsummary/rspb",
@@ -355,7 +360,6 @@ go_test(
"//pkg/server/telemetry",
"//pkg/settings/cluster",
"//pkg/spanconfig",
- "//pkg/spanconfig/spanconfigptsreader",
"//pkg/spanconfig/spanconfigstore",
"//pkg/sql",
"//pkg/sql/catalog/bootstrap",
diff --git a/pkg/kv/kvserver/allocator.go b/pkg/kv/kvserver/allocator.go
index e756660ed2..7eb8af2d03 100644
--- a/pkg/kv/kvserver/allocator.go
+++ b/pkg/kv/kvserver/allocator.go
@@ -185,17 +185,6 @@ const (
nonVoterTarget
)
-// replicaStatus represents whether a replica is currently alive,
-// dead or decommissioning.
-type replicaStatus int
-
-const (
- _ replicaStatus = iota
- alive
- dead
- decommissioning
-)
-
// AddChangeType returns the roachpb.ReplicaChangeType corresponding to the
// given targetReplicaType.
//
diff --git a/pkg/kv/kvserver/batcheval/BUILD.bazel b/pkg/kv/kvserver/batcheval/BUILD.bazel
index cb85b4c9d2..bc5d66ddb7 100644
--- a/pkg/kv/kvserver/batcheval/BUILD.bazel
+++ b/pkg/kv/kvserver/batcheval/BUILD.bazel
@@ -86,7 +86,6 @@ go_library(
"//pkg/util/tracing",
"//pkg/util/uuid",
"@com_github_cockroachdb_errors//:errors",
- "@com_github_cockroachdb_redact//:redact",
"@com_github_gogo_protobuf//types",
"@com_github_kr_pretty//:pretty",
"@org_golang_x_time//rate",
diff --git a/pkg/kv/kvserver/batcheval/cmd_push_txn.go b/pkg/kv/kvserver/batcheval/cmd_push_txn.go
index ab7c60ddf6..fd8e78eac7 100644
--- a/pkg/kv/kvserver/batcheval/cmd_push_txn.go
+++ b/pkg/kv/kvserver/batcheval/cmd_push_txn.go
@@ -24,7 +24,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/hlc"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/errors"
- "github.com/cockroachdb/redact"
)
func init() {
@@ -269,10 +268,10 @@ func PushTxn(
s = "failed to push"
}
log.Infof(ctx, "%s %s (push type=%s) %s: %s (pushee last active: %s)",
- args.PusherTxn.Short(), redact.Safe(s),
- redact.Safe(pushType),
+ args.PusherTxn.Short(), log.Safe(s),
+ log.Safe(pushType),
args.PusheeTxn.Short(),
- redact.Safe(reason),
+ log.Safe(reason),
reply.PusheeTxn.LastActive())
}
diff --git a/pkg/kv/kvserver/client_merge_test.go b/pkg/kv/kvserver/client_merge_test.go
index c2e662ceae..c42254439d 100644
--- a/pkg/kv/kvserver/client_merge_test.go
+++ b/pkg/kv/kvserver/client_merge_test.go
@@ -417,7 +417,6 @@ func TestStoreRangeMergeTimestampCache(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)
skip.UnderShort(t)
- skip.UnderRace(t)
testutils.RunTrueAndFalse(t, "disjoint-leaseholders", func(t *testing.T, disjointLeaseholders bool) {
testutils.RunTrueAndFalse(t, "through-snapshot", func(t *testing.T, throughSnapshot bool) {
@@ -4466,8 +4465,6 @@ func TestMergeQueueSeesNonVoters(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)
- skip.UnderRace(t)
-
type test struct {
name string
leftVoters, rightVoters, leftNonVoters, rightNonVoters []int
diff --git a/pkg/kv/kvserver/client_protectedts_test.go b/pkg/kv/kvserver/client_protectedts_test.go
index 74eb5378c8..da28e02c3e 100644
--- a/pkg/kv/kvserver/client_protectedts_test.go
+++ b/pkg/kv/kvserver/client_protectedts_test.go
@@ -20,9 +20,8 @@ import (
"github.com/cockroachdb/cockroach/pkg/kv/kvserver"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/protectedts/ptpb"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/protectedts/ptstorage"
+ "github.com/cockroachdb/cockroach/pkg/kv/kvserver/protectedts/ptverifier"
"github.com/cockroachdb/cockroach/pkg/roachpb"
- "github.com/cockroachdb/cockroach/pkg/spanconfig"
- "github.com/cockroachdb/cockroach/pkg/spanconfig/spanconfigptsreader"
"github.com/cockroachdb/cockroach/pkg/sql"
"github.com/cockroachdb/cockroach/pkg/testutils"
"github.com/cockroachdb/cockroach/pkg/testutils/skip"
@@ -211,15 +210,12 @@ func TestProtectedTimestamps(t *testing.T) {
thresh := thresholdFromTrace(trace)
require.Truef(t, thresh.Less(ptsRec.Timestamp), "threshold: %v, protected %v %q", thresh, ptsRec.Timestamp, trace)
- // Verify that the record did indeed make its way down into KV where the
- // replica can read it from.
- ptsReader := tc.GetFirstStoreFromServer(t, 0).GetStoreConfig().ProtectedTimestampReader
- require.NoError(
- t,
- verifyProtectionTimestampExistsOnSpans(
- ctx, t, tc, ptsReader, ptsRec.Timestamp, ptsRec.DeprecatedSpans,
- ),
- )
+ // Verify that the record indeed did apply as far as the replica is concerned.
+ ptv := ptverifier.New(s0.DB(), pts)
+ require.NoError(t, ptv.Verify(ctx, ptsRec.ID.GetUUID()))
+ ptsRecVerified, err := ptsWithDB.GetRecord(ctx, nil /* txn */, ptsRec.ID.GetUUID())
+ require.NoError(t, err)
+ require.True(t, ptsRecVerified.Verified)
// Make a new record that is doomed to fail.
failedRec := ptsRec
@@ -230,15 +226,9 @@ func TestProtectedTimestamps(t *testing.T) {
_, err = ptsWithDB.GetRecord(ctx, nil /* txn */, failedRec.ID.GetUUID())
require.NoError(t, err)
- // Verify that the record did indeed make its way down into KV where the
- // replica can read it from. We then verify (below) that the failed record
- // does not affect the ability to GC.
- require.NoError(
- t,
- verifyProtectionTimestampExistsOnSpans(
- ctx, t, tc, ptsReader, failedRec.Timestamp, failedRec.DeprecatedSpans,
- ),
- )
+ // Verify that it indeed did fail.
+ verifyErr := ptv.Verify(ctx, failedRec.ID.GetUUID())
+ require.Regexp(t, "failed to verify protection", verifyErr)
// Add a new record that is after the old record.
laterRec := ptsRec
@@ -246,12 +236,7 @@ func TestProtectedTimestamps(t *testing.T) {
laterRec.Timestamp = afterWrites
laterRec.Timestamp.Logical = 0
require.NoError(t, ptsWithDB.Protect(ctx, nil /* txn */, &laterRec))
- require.NoError(
- t,
- verifyProtectionTimestampExistsOnSpans(
- ctx, t, tc, ptsReader, laterRec.Timestamp, laterRec.DeprecatedSpans,
- ),
- )
+ require.NoError(t, ptv.Verify(ctx, laterRec.ID.GetUUID()))
// Release the record that had succeeded and ensure that GC eventually
// happens up to the protected timestamp of the new record.
@@ -278,38 +263,3 @@ func TestProtectedTimestamps(t *testing.T) {
require.Len(t, state.Records, 0)
require.Equal(t, int(state.NumRecords), len(state.Records))
}
-
-// verifyProtectionTimestampExistsOnSpans refreshes the PTS state in KV and
-// ensures a protection at the given protectionTimestamp exists for all the
-// supplied spans.
-func verifyProtectionTimestampExistsOnSpans(
- ctx context.Context,
- t *testing.T,
- tc *testcluster.TestCluster,
- ptsReader spanconfig.ProtectedTSReader,
- protectionTimestamp hlc.Timestamp,
- spans roachpb.Spans,
-) error {
- if err := spanconfigptsreader.TestingRefreshPTSState(
- ctx, t, ptsReader, tc.Server(0).Clock().Now(),
- ); err != nil {
- return err
- }
- for _, sp := range spans {
- timestamps, _, err := ptsReader.GetProtectionTimestamps(ctx, sp)
- if err != nil {
- return err
- }
- found := false
- for _, ts := range timestamps {
- if ts.Equal(protectionTimestamp) {
- found = true
- break
- }
- }
- if !found {
- return errors.Newf("protection timestamp %s does not exist on span %s", protectionTimestamp, sp)
- }
- }
- return nil
-}
diff --git a/pkg/kv/kvserver/client_replica_circuit_breaker_bench_test.go b/pkg/kv/kvserver/client_replica_circuit_breaker_bench_test.go
new file mode 100644
index 0000000000..686a6a6c55
--- /dev/null
+++ b/pkg/kv/kvserver/client_replica_circuit_breaker_bench_test.go
@@ -0,0 +1,124 @@
+// Copyright 2022 The Cockroach Authors.
+//
+// Use of this software is governed by the Business Source License
+// included in the file licenses/BSL.txt.
+//
+// As of the Change Date specified in that file, in accordance with
+// the Business Source License, use of this software will be governed
+// by the Apache License, Version 2.0, included in the file
+// licenses/APL.txt.
+
+package kvserver_test
+
+import (
+ "context"
+ "fmt"
+ "math/rand"
+ "strconv"
+ "sync"
+ "testing"
+
+ "github.com/cockroachdb/cockroach/pkg/base"
+ "github.com/cockroachdb/cockroach/pkg/keys"
+ "github.com/cockroachdb/cockroach/pkg/kv/kvserver"
+ "github.com/cockroachdb/cockroach/pkg/roachpb"
+ "github.com/cockroachdb/cockroach/pkg/testutils/testcluster"
+ "github.com/cockroachdb/cockroach/pkg/util/encoding"
+ "github.com/cockroachdb/cockroach/pkg/util/leaktest"
+ "github.com/cockroachdb/cockroach/pkg/util/log"
+ "github.com/cockroachdb/cockroach/pkg/util/stop"
+ "github.com/stretchr/testify/require"
+)
+
+type replicaCircuitBreakerBench struct {
+ *testcluster.TestCluster
+ pool *sync.Pool // *BatchRequest
+}
+
+func (tc *replicaCircuitBreakerBench) repl(b *testing.B) *kvserver.Replica {
+ return tc.GetFirstStoreFromServer(b, 0).LookupReplica(keys.MustAddr(tc.ScratchRange(b)))
+}
+
+func setupCircuitBreakerReplicaBench(
+ b *testing.B, breakerEnabled bool, cs string,
+) (*replicaCircuitBreakerBench, *stop.Stopper) {
+ b.Helper()
+
+ var numShards int
+ {
+ _, err := fmt.Sscanf(cs, "mutexmap-%d", &numShards)
+ require.NoError(b, err)
+ }
+ sFn := func() kvserver.CancelStorage { return &kvserver.MapCancelStorage{NumShards: numShards} }
+
+ var knobs kvserver.StoreTestingKnobs
+ knobs.CancelStorageFactory = sFn
+
+ var args base.TestClusterArgs
+ args.ServerArgs.Knobs.Store = &knobs
+ tc := testcluster.StartTestCluster(b, 1, args)
+
+ stmt := `SET CLUSTER SETTING kv.replica_circuit_breaker.slow_replication_threshold = '1000s'`
+ if !breakerEnabled {
+ stmt = `SET CLUSTER SETTING kv.replica_circuit_breaker.slow_replication_threshold = '0s'`
+ }
+ _, err := tc.ServerConn(0).Exec(stmt)
+ require.NoError(b, err)
+ wtc := &replicaCircuitBreakerBench{
+ TestCluster: tc,
+ }
+ wtc.pool = &sync.Pool{
+ New: func() interface{} {
+ repl := wtc.repl(b)
+ var ba roachpb.BatchRequest
+ ba.RangeID = repl.RangeID
+ ba.Timestamp = repl.Clock().NowAsClockTimestamp().ToTimestamp()
+ var k roachpb.Key
+ k = append(k, repl.Desc().StartKey.AsRawKey()...)
+ k = encoding.EncodeUint64Ascending(k, uint64(rand.Intn(1000)))
+ ba.Add(roachpb.NewGet(k, false))
+ return &ba
+ },
+ }
+ return wtc, tc.Stopper()
+}
+
+func BenchmarkReplicaCircuitBreakerSendOverhead(b *testing.B) {
+ defer leaktest.AfterTest(b)()
+ defer log.Scope(b).Close(b)
+ ctx := context.Background()
+
+ for _, enabled := range []bool{false, true} {
+ b.Run("enabled="+strconv.FormatBool(enabled), func(b *testing.B) {
+ dss := []string{
+ "mutexmap-1", "mutexmap-2", "mutexmap-4", "mutexmap-8", "mutexmap-12", "mutexmap-16",
+ "mutexmap-20", "mutexmap-24", "mutexmap-32", "mutexmap-64",
+ }
+ if !enabled {
+ dss = dss[:1]
+ }
+
+ for _, ds := range dss {
+ b.Run(ds, func(b *testing.B) {
+ b.ReportAllocs()
+ tc, stopper := setupCircuitBreakerReplicaBench(b, enabled, ds)
+ defer stopper.Stop(ctx)
+
+ repl := tc.repl(b)
+
+ b.ResetTimer()
+ b.RunParallel(func(pb *testing.PB) {
+ for pb.Next() {
+ ba := tc.pool.Get().(*roachpb.BatchRequest)
+ _, err := repl.Send(ctx, *ba)
+ tc.pool.Put(ba)
+ if err != nil {
+ b.Fatal(err)
+ }
+ }
+ })
+ })
+ }
+ })
+ }
+}
diff --git a/pkg/kv/kvserver/client_replica_circuit_breaker_test.go b/pkg/kv/kvserver/client_replica_circuit_breaker_test.go
index de63960736..0293834831 100644
--- a/pkg/kv/kvserver/client_replica_circuit_breaker_test.go
+++ b/pkg/kv/kvserver/client_replica_circuit_breaker_test.go
@@ -36,7 +36,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/stop"
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
- "github.com/cockroachdb/cockroach/pkg/util/tracing"
"github.com/cockroachdb/errors"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
@@ -83,13 +82,12 @@ func TestReplicaCircuitBreaker_LeaseholderTripped(t *testing.T) {
defer log.Scope(t).Close(t)
tc := setupCircuitBreakerTest(t)
defer tc.Stopper().Stop(context.Background())
- k := tc.ScratchRange(t)
// Get lease on n1.
require.NoError(t, tc.Write(n1))
// Disable the probe so that when the breaker trips, it stays tripped.
tc.SetProbeEnabled(n1, false)
- tc.TripBreaker(n1)
+ tc.Report(n1, errors.New("boom"))
s1 := tc.GetFirstStoreFromServer(t, n1)
s2 := tc.GetFirstStoreFromServer(t, n2)
@@ -99,10 +97,10 @@ func TestReplicaCircuitBreaker_LeaseholderTripped(t *testing.T) {
require.Zero(t, s2.Metrics().ReplicaCircuitBreakerCurTripped.Value())
require.Zero(t, s2.Metrics().ReplicaCircuitBreakerCumTripped.Count())
- // n1 can still serve reads despite the breaker having tripped, as there is a
- // valid lease and no poisoned latches prevent the read.
- require.NoError(t, tc.Read(n1))
- require.NoError(t, tc.FollowerRead(n1))
+ // n1 could theoretically still serve reads (there is a valid lease
+ // and none of the latches are taken), but since it is hard to determine
+ // that upfront we currently fail all reads as well.
+ tc.RequireIsBreakerOpen(t, tc.Read(n1))
tc.RequireIsBreakerOpen(t, tc.Write(n1))
// When we go through the KV client stack, we still get the breaker error
@@ -110,29 +108,23 @@ func TestReplicaCircuitBreaker_LeaseholderTripped(t *testing.T) {
tc.RequireIsBreakerOpen(t, tc.WriteDS(n1))
tc.RequireIsBreakerOpen(t, tc.WriteDS(n2))
- // Can't transfer the lease away while breaker is tripped. (This would be
- // a bad idea, since n1 would stop serving strong reads, thus making the
- // outage worse).
- tc.RequireIsBreakerOpen(t,
- tc.TransferRangeLease(tc.LookupRangeOrFatal(t, k), tc.Target(n2)),
- )
-
// n2 does not have the lease so all it does is redirect to the leaseholder
- // n1, but it can serve follower reads.
+ // n1.
tc.RequireIsNotLeaseholderError(t, tc.Read(n2))
- require.NoError(t, tc.FollowerRead(n2))
tc.RequireIsNotLeaseholderError(t, tc.Write(n2))
// Enable the probe. Even a read should trigger the probe
// and within due time the breaker should heal.
tc.SetProbeEnabled(n1, true)
- require.NoError(t, tc.Read(n1)) // this always worked
- // Writes heal soon.
+ tc.UntripsSoon(t, tc.Read, n1)
+ // Same behavior on writes.
+ tc.Report(n1, errors.New("boom again"))
tc.UntripsSoon(t, tc.Write, n1)
- // Currently tripped drops back to zero, all-time remains at one.
+ // Currently tripped drops back to zero, all-time is two (since we tripped
+ // it twice)
require.EqualValues(t, 0, s1.Metrics().ReplicaCircuitBreakerCurTripped.Value())
- require.EqualValues(t, 1, s1.Metrics().ReplicaCircuitBreakerCumTripped.Count())
+ require.EqualValues(t, 2, s1.Metrics().ReplicaCircuitBreakerCumTripped.Count())
// s2 wasn't affected by any breaker events.
require.Zero(t, s2.Metrics().ReplicaCircuitBreakerCurTripped.Value())
require.Zero(t, s2.Metrics().ReplicaCircuitBreakerCumTripped.Count())
@@ -142,9 +134,9 @@ func TestReplicaCircuitBreaker_LeaseholderTripped(t *testing.T) {
// breaker on follower n2. Before the breaker is tripped, we see
// NotLeaseholderError. When it's tripped, those are supplanted by the breaker
// errors. Once we allow the breaker to probe, the breaker untrips. In
-// particular, this tests that the probe can succeed even when run on a follower
-// (which would not be true if it required the local Replica to execute an
-// operation that requires the lease).
+// particular, this tests that the probe can succeed even when run on a
+// follower (which would not be true if it required the local Replica to
+// execute an operation that requires the lease).
func TestReplicaCircuitBreaker_FollowerTripped(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)
@@ -155,7 +147,7 @@ func TestReplicaCircuitBreaker_FollowerTripped(t *testing.T) {
require.NoError(t, tc.Write(n1))
// Disable the probe on n2 so that when the breaker trips, it stays tripped.
tc.SetProbeEnabled(n2, false)
- tc.TripBreaker(n2)
+ tc.Report(n2, errors.New("boom"))
// We didn't trip the leaseholder n1, so it is unaffected.
require.NoError(t, tc.Read(n1))
@@ -166,21 +158,31 @@ func TestReplicaCircuitBreaker_FollowerTripped(t *testing.T) {
// time of writing it would propagate it.
require.NoError(t, tc.WriteDS(n1))
- tc.RequireIsNotLeaseholderError(t, tc.Read(n2))
- tc.RequireIsNotLeaseholderError(t, tc.Write(n2))
- require.NoError(t, tc.FollowerRead(n2))
+ tc.RequireIsBreakerOpen(t, tc.Read(n2))
+ tc.RequireIsBreakerOpen(t, tc.Write(n2))
- // Enable the probe again. n2 should untrip soon.
+ // Enable the probe. Even a read should trigger the probe
+ // and within due time the breaker should heal, giving us
+ // NotLeaseholderErrors again.
+ //
+ // TODO(tbg): this test would be more meaningful with follower reads. They
+ // should succeed when the breaker is open and fail if the breaker is
+ // tripped. However knowing that the circuit breaker check sits at the top
+ // of Replica.sendWithRangeID, it's clear that it won't make a difference.
tc.SetProbeEnabled(n2, true)
- tc.RequireIsNotLeaseholderError(t, tc.Read(n2))
- tc.RequireIsNotLeaseholderError(t, tc.Write(n2))
testutils.SucceedsSoon(t, func() error {
- // NB: this is slightly contrived - the mere act of accessing Err() is what
- // triggers the probe! Regular requests on the replica wouldn'd do that,
- // since we're intentionally preferring a NotLeaseholderError over a breaker
- // error (and thus aren't ever accessing the breaker when we can't serve the
- // request).
- return tc.repls[n2].Breaker().Signal().Err()
+ if err := tc.Read(n2); !errors.HasType(err, (*roachpb.NotLeaseHolderError)(nil)) {
+ return err
+ }
+ return nil
+ })
+ // Same behavior on writes.
+ tc.Report(n2, errors.New("boom again"))
+ testutils.SucceedsSoon(t, func() error {
+ if err := tc.Write(n2); !errors.HasType(err, (*roachpb.NotLeaseHolderError)(nil)) {
+ return err
+ }
+ return nil
})
}
@@ -200,45 +202,34 @@ func TestReplicaCircuitBreaker_LeaselessTripped(t *testing.T) {
// disabled.
require.NoError(t, tc.Write(n1))
tc.SetProbeEnabled(n1, false)
- tc.TripBreaker(n1)
- resumeHeartbeats := tc.ExpireAllLeasesAndN1LivenessRecord(t, pauseHeartbeats)
-
- // On n1, run into the circuit breaker when requesting lease. We have to
- // resume heartbeats for this to not time out, as requesting the new lease
- // entails doing liveness checks which can't succeed if nobody is
- // heartbeating, and we'd get stuck in liveness before reaching the circuit
- // breaker. (In other words, replica circuit breaking doesn't fail-fast
- // requests reliably if liveness is unavailable; this is tracked in #74616).
- // We don't attempt to acquire a lease on n2 since it would try and succeed
- // (except the test harness categorically prevents n2 from getting a lease,
- // injecting an error).
- resumeHeartbeats()
- testutils.SucceedsSoon(t, func() error {
- err := tc.Read(n1)
- if errors.HasType(err, (*roachpb.NotLeaseHolderError)(nil)) {
- // Retriable errors can occur when manipulating the liveness record in
- // preparation for requesting a lease, such as:
- //
- // [NotLeaseHolderError] failed to manipulate liveness record: heartbeat
- // failed on epoch increment; r45: replica (n1,s1):1 not lease holder;
- // current lease is repl=(n1,s1):1 seq=1 start=0,0 epo=1 pro=[...]
- return err
- }
- tc.RequireIsBreakerOpen(t, err)
- tc.RequireIsBreakerOpen(t, tc.Write(n1))
- return nil
- })
+ tc.Report(n1, errors.New("boom"))
+ resumeHeartbeats := tc.ExpireAllLeases(t, pauseHeartbeats)
- // Can still perform follower reads on both nodes, as this does not rely on
- // the lease and does not consult the breaker.
- require.NoError(t, tc.FollowerRead(n1))
- require.NoError(t, tc.FollowerRead(n2))
+ // n2 (not n1) will return a NotLeaseholderError. This may be surprising -
+ // why isn't it trying and succeeding to acquire a lease - but it does
+ // not do that because it sees that the new leaseholder (n2) is not live
+ // itself. We'll revisit this after re-enabling liveness later in the test.
+ {
+ err := tc.Read(n2)
+ // At time of writing: not incrementing epoch on n1 because next
+ // leaseholder (n2) not live.
+ t.Log(err)
+ tc.RequireIsNotLeaseholderError(t, err)
+ // Same behavior for write on n2.
+ tc.RequireIsNotLeaseholderError(t, tc.Write(n2))
+ }
+ // On n1, run into the circuit breaker when requesting lease.
+ {
+ tc.RequireIsBreakerOpen(t, tc.Read(n1))
+ tc.RequireIsBreakerOpen(t, tc.Write(n1))
+ }
// Let the breaker heal and things should go back to normal. This is not a
// trivial thing to hold, as the probe needs to go through for this, and if
// we're not careful, the probe itself is held up by the breaker as well, or
// the probe will try to acquire a lease (which we're currently careful to
// avoid).
+ resumeHeartbeats()
tc.SetProbeEnabled(n1, true)
tc.UntripsSoon(t, tc.Read, n1)
tc.UntripsSoon(t, tc.Write, n1)
@@ -263,33 +254,21 @@ func TestReplicaCircuitBreaker_Leaseholder_QuorumLoss(t *testing.T) {
tc.StopServer(n2) // lose quorum
// We didn't lose the liveness range (which is only on n1).
- tc.HeartbeatNodeLiveness(t, n1)
-
- // Read still works, as we have valid lease and no poisoned latch
- // underneath.
- require.NoError(t, tc.Read(n1))
+ require.NoError(t, tc.Server(n1).HeartbeatNodeLiveness())
tc.SetSlowThreshold(10 * time.Millisecond)
{
err := tc.Write(n1)
var ae *roachpb.AmbiguousResultError
require.True(t, errors.As(err, &ae), "%+v", err)
t.Log(err)
- tc.RequireIsBreakerOpen(t, err)
- }
- // We still have a valid lease, but now the above write is holding a poisoned
- // latch (this is true despite the write itself having returned already).
- // However, can still serve follower reads because those don't check latches
- // (nor do they need the lease, though there is a valid one in this case).
- {
- tc.RequireIsBreakerOpen(t, tc.Read(n1))
- require.NoError(t, tc.FollowerRead(n1))
}
+ tc.RequireIsBreakerOpen(t, tc.Read(n1))
// Bring n2 back and service should be restored.
tc.SetSlowThreshold(0) // reset
require.NoError(t, tc.RestartServer(n2))
- tc.UntripsSoon(t, tc.Write, n1) // poisoned latch goes away
- require.NoError(t, tc.Read(n1))
+ tc.UntripsSoon(t, tc.Read, n1)
+ require.NoError(t, tc.Write(n1))
}
// In this test, the range is on n1 and n2 and we place the lease on n2 and
@@ -308,13 +287,12 @@ func TestReplicaCircuitBreaker_Follower_QuorumLoss(t *testing.T) {
// Get lease to n2 so that we can lose it without taking down the system ranges.
desc := tc.LookupRangeOrFatal(t, tc.ScratchRange(t))
tc.TransferRangeLeaseOrFatal(t, desc, tc.Target(n2))
- resumeHeartbeats := tc.ExpireAllLeasesAndN1LivenessRecord(t, keepHeartbeats)
+ resumeHeartbeats := tc.ExpireAllLeases(t, keepHeartbeats)
tc.StopServer(n2) // lose quorum and leaseholder
resumeHeartbeats()
// We didn't lose the liveness range (which is only on n1).
- tc.HeartbeatNodeLiveness(t, n1)
-
+ require.NoError(t, tc.Server(n1).HeartbeatNodeLiveness())
tc.SetSlowThreshold(10 * time.Millisecond)
tc.RequireIsBreakerOpen(t, tc.Write(n1))
tc.RequireIsBreakerOpen(t, tc.Read(n1))
@@ -322,8 +300,8 @@ func TestReplicaCircuitBreaker_Follower_QuorumLoss(t *testing.T) {
// Bring n2 back and service should be restored.
tc.SetSlowThreshold(0) // reset
require.NoError(t, tc.RestartServer(n2))
- tc.UntripsSoon(t, tc.Write, n1)
- require.NoError(t, tc.Read(n1))
+ tc.UntripsSoon(t, tc.Read, n1)
+ require.NoError(t, tc.Write(n1))
}
// This test is skipped but documents that the current circuit breakers cannot
@@ -375,7 +353,7 @@ func TestReplicaCircuitBreaker_Liveness_QuorumLoss(t *testing.T) {
// Expire all leases. We also pause all heartbeats but that doesn't really
// matter since the liveness range is unavailable anyway.
- resume := tc.ExpireAllLeasesAndN1LivenessRecord(t, pauseHeartbeats)
+ resume := tc.ExpireAllLeases(t, pauseHeartbeats)
defer resume()
// Since there isn't a lease, and the liveness range is down, the circuit
@@ -395,11 +373,6 @@ func TestReplicaCircuitBreaker_Liveness_QuorumLoss(t *testing.T) {
}
type dummyStream struct {
- name string
- t interface {
- Helper()
- Logf(string, ...interface{})
- }
ctx context.Context
recv chan *roachpb.RangeFeedEvent
}
@@ -409,8 +382,7 @@ func (s *dummyStream) Context() context.Context {
}
func (s *dummyStream) Send(ev *roachpb.RangeFeedEvent) error {
- if ev.Val == nil && ev.Error == nil {
- s.t.Logf("%s: ignoring event: %v", s.name, ev)
+ if ev.Val == nil {
return nil
}
select {
@@ -438,44 +410,39 @@ func TestReplicaCircuitBreaker_RangeFeed(t *testing.T) {
args := &roachpb.RangeFeedRequest{
Span: roachpb.Span{Key: desc.StartKey.AsRawKey(), EndKey: desc.EndKey.AsRawKey()},
}
-
- ctx, cancel := context.WithCancel(ctx)
+ // This test shouldn't take in excess of 45s even under the worst of conditions.
+ ctx, cancel := context.WithTimeout(ctx, testutils.DefaultSucceedsSoonDuration)
defer cancel()
- stream1 := &dummyStream{t: t, ctx: ctx, name: "rangefeed1", recv: make(chan *roachpb.RangeFeedEvent)}
+ stream1 := &dummyStream{ctx: ctx, recv: make(chan *roachpb.RangeFeedEvent)}
require.NoError(t, tc.Stopper().RunAsyncTask(ctx, "stream1", func(ctx context.Context) {
err := tc.repls[0].RangeFeed(args, stream1).GoError()
if ctx.Err() != nil {
- return // main goroutine stopping
+ return
}
assert.NoError(t, err) // avoid Fatal on goroutine
}))
- readOneVal := func(ctx context.Context, stream *dummyStream, timeout time.Duration) error {
+ readOneVal := func(t *testing.T, stream *dummyStream) {
for {
+ var done bool
select {
- case <-time.After(timeout):
- return errors.Errorf("%s: read timed out after %.2fs", stream.name, timeout.Seconds())
case <-ctx.Done():
- return ctx.Err()
+ t.Fatal(ctx.Err())
case ev := <-stream.recv:
- if ev.Error != nil {
- return ev.Error.Error.GoError()
- }
- if ev.Val != nil {
- t.Logf("%s: %s", stream.name, ev)
- return nil
- }
+ t.Log(ev)
+ done = true
+ }
+ if done {
+ break
}
}
}
- testutils.SucceedsSoon(t, func() error {
- require.NoError(t, tc.Write(n1))
- return readOneVal(ctx, stream1, time.Millisecond)
- })
+ require.NoError(t, tc.Write(n1))
+ readOneVal(t, stream1)
// NB: keep heartbeats because we're not trying to lose the liveness range.
- undo := tc.ExpireAllLeasesAndN1LivenessRecord(t, keepHeartbeats)
+ undo := tc.ExpireAllLeases(t, keepHeartbeats)
undo()
tc.SetSlowThreshold(10 * time.Millisecond)
tc.StopServer(n2)
@@ -483,11 +450,11 @@ func TestReplicaCircuitBreaker_RangeFeed(t *testing.T) {
// Start another stream during the "outage" to make sure it isn't rejected by
// the breaker.
- stream2 := &dummyStream{t: t, ctx: ctx, name: "rangefeed2", recv: make(chan *roachpb.RangeFeedEvent)}
+ stream2 := &dummyStream{ctx: ctx, recv: make(chan *roachpb.RangeFeedEvent)}
require.NoError(t, tc.Stopper().RunAsyncTask(ctx, "stream2", func(ctx context.Context) {
err := tc.repls[0].RangeFeed(args, stream2).GoError()
if ctx.Err() != nil {
- return // main goroutine stopping
+ return
}
assert.NoError(t, err) // avoid Fatal on goroutine
}))
@@ -496,13 +463,8 @@ func TestReplicaCircuitBreaker_RangeFeed(t *testing.T) {
require.NoError(t, tc.RestartServer(n2))
tc.UntripsSoon(t, tc.Write, n1)
- require.NoError(t, readOneVal(ctx, stream1, testutils.DefaultSucceedsSoonDuration))
- // For the stream that started mid-way through the outage, we expect it to
- // return a circuit breaker error, but in theory it could also never have
- // tried to acquire a lease, in which case it might return a value as well.
- if err := readOneVal(ctx, stream2, testutils.DefaultSucceedsSoonDuration); err != nil {
- tc.RequireIsBreakerOpen(t, err)
- }
+ readOneVal(t, stream1)
+ readOneVal(t, stream2)
}
func TestReplicaCircuitBreaker_ExemptRequests(t *testing.T) {
@@ -515,7 +477,7 @@ func TestReplicaCircuitBreaker_ExemptRequests(t *testing.T) {
// disabled, i.e. it will stay tripped.
require.NoError(t, tc.Write(n1))
tc.SetProbeEnabled(n1, false)
- tc.TripBreaker(n1)
+ tc.Report(n1, errors.New("boom"))
exemptRequests := []func() roachpb.Request{
func() roachpb.Request { return &roachpb.ExportRequest{ReturnSST: true} },
@@ -548,41 +510,31 @@ func TestReplicaCircuitBreaker_ExemptRequests(t *testing.T) {
for _, reqFn := range exemptRequests {
req := reqFn()
- tc.Run(t, fmt.Sprintf("with-existing-lease/%s", req.Method()), func(t *testing.T) {
+ t.Run(fmt.Sprintf("with-existing-lease/%s", req.Method()), func(t *testing.T) {
require.NoError(t, tc.Send(n1, req))
})
}
for _, reqFn := range exemptRequests {
req := reqFn()
- tc.Run(t, fmt.Sprintf("with-acquire-lease/%s", req.Method()), func(t *testing.T) {
- resumeHeartbeats := tc.ExpireAllLeasesAndN1LivenessRecord(t, pauseHeartbeats)
+ t.Run(fmt.Sprintf("with-acquire-lease/%s", req.Method()), func(t *testing.T) {
+ resumeHeartbeats := tc.ExpireAllLeases(t, pauseHeartbeats)
resumeHeartbeats() // intentionally resume right now so that lease can be acquired
- // NB: when looking into the traces here, we sometimes see - as expected -
- // that when the request tries to acquire a lease, the breaker is still
- // tripped. That's why there is a retry loop here.
- testutils.SucceedsSoon(t, func() error {
- err := tc.Send(n1, req)
- if errors.HasType(err, (*roachpb.NotLeaseHolderError)(nil)) {
- return err
- }
- require.NoError(t, err)
- return nil
- })
+ require.NoError(t, tc.Send(n1, req))
})
}
- resumeHeartbeats := tc.ExpireAllLeasesAndN1LivenessRecord(t, pauseHeartbeats)
+ resumeHeartbeats := tc.ExpireAllLeases(t, pauseHeartbeats)
+ defer resumeHeartbeats() // can't acquire leases until test ends
for _, reqFn := range exemptRequests {
req := reqFn()
- tc.Run(t, fmt.Sprintf("with-unavailable-lease/%s", req.Method()), func(t *testing.T) {
- if m := req.Method(); m == roachpb.Probe {
- // Probe does not require the lease, and is the most-tested of the bunch
- // already. We don't have to test it again here, which would require undue
- // amounts of special-casing below.
- skip.IgnoreLintf(t, "subtest does not apply to %s", m)
- }
-
+ if req.Method() == roachpb.Probe {
+ // Probe does not require the lease, and is the most-tested of the bunch
+ // already. We don't have to test it again here, which would require undue
+ // amounts of special-casing below.
+ continue
+ }
+ t.Run(fmt.Sprintf("with-unavailable-lease/%s", req.Method()), func(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Millisecond)
defer cancel()
const maxWait = 5 * time.Second
@@ -596,46 +548,6 @@ func TestReplicaCircuitBreaker_ExemptRequests(t *testing.T) {
require.Less(t, timeutil.Since(tBegin), maxWait)
})
}
-
- // Restore the breaker via the probe.
- resumeHeartbeats()
- tc.SetProbeEnabled(n1, true)
- tc.UntripsSoon(t, tc.Write, n1)
-
- // Lose quorum (liveness stays intact).
- tc.SetSlowThreshold(10 * time.Millisecond)
- tc.StopServer(n2)
- // Let the breaker trip. This leaves a poisoned latch behind that at least some of
- // the requests will interact with.
- tc.RequireIsBreakerOpen(t, tc.Write(n1))
- tc.RequireIsBreakerOpen(t, tc.Read(n1))
-
- for _, reqFn := range exemptRequests {
- req := reqFn()
- tc.Run(t, fmt.Sprintf("with-poisoned-latch/%s", req.Method()), func(t *testing.T) {
- if m := req.Method(); m == roachpb.GC {
- // GC without GCKeys acquires no latches and is a pure read. If we want
- // to put a key in there, we need to pick the right timestamp (since you
- // can't GC a live key); it's all rather annoying and not worth it. In
- // the long run, we also completely want to avoid acquiring latches for
- // this request (since it should only mutate keyspace that has since
- // fallen under the GCThreshold), so avoid cooking up anything special
- // here.
- skip.IgnoreLintf(t, "subtest does not apply to %s", m)
- }
- ctx, cancel := context.WithTimeout(context.Background(), 3*time.Millisecond)
- defer cancel()
- const maxWait = 5 * time.Second
- tBegin := timeutil.Now()
- err := tc.SendCtx(ctx, n1, req)
- t.Log(err)
- require.Error(t, err)
- require.Error(t, ctx.Err())
- // Make sure we didn't run into the "long" timeout inside of SendCtx but
- // actually terminated as a result of our ctx cancelling.
- require.Less(t, timeutil.Since(tBegin), maxWait)
- })
- }
}
// Test infrastructure below.
@@ -661,18 +573,19 @@ func makeBreakerToggleable(b *circuit.Breaker) (setProbeEnabled func(bool)) {
}
}
+type replWithKnob struct {
+ *kvserver.Replica
+ setProbeEnabled func(bool)
+}
+
type circuitBreakerTest struct {
- t decoT
*testcluster.TestCluster
slowThresh *atomic.Value // time.Duration
ManualClock *hlc.HybridManualClock
repls []replWithKnob // 0 -> repl on Servers[0], etc
-
- seq int
}
func setupCircuitBreakerTest(t *testing.T) *circuitBreakerTest {
- skip.UnderStressRace(t)
manualClock := hlc.NewHybridManualClock()
var rangeID int64 // atomic
slowThresh := &atomic.Value{} // supports .SetSlowThreshold(x)
@@ -743,8 +656,7 @@ func setupCircuitBreakerTest(t *testing.T) *circuitBreakerTest {
k := tc.ScratchRange(t)
atomic.StoreInt64(&rangeID, int64(tc.LookupRangeOrFatal(t, k).RangeID))
- tc.AddVotersOrFatal(t, k, tc.Target(n2))
- require.NoError(t, tc.WaitForVoters(k, tc.Target(n2)))
+ tc.AddVotersOrFatal(t, k, tc.Target(1))
var repls []replWithKnob
for i := range tc.Servers {
@@ -753,7 +665,6 @@ func setupCircuitBreakerTest(t *testing.T) *circuitBreakerTest {
repls = append(repls, replWithKnob{repl, enableProbe})
}
return &circuitBreakerTest{
- t: decoT{t},
TestCluster: tc,
ManualClock: manualClock,
repls: repls,
@@ -761,28 +672,12 @@ func setupCircuitBreakerTest(t *testing.T) *circuitBreakerTest {
}
}
-// Run is a wrapper around t.Run that allows the test harness to print traces
-// using the subtest's *testing.T.
-func (cbt *circuitBreakerTest) Run(t *testing.T, name string, f func(t *testing.T)) {
- t.Helper()
- t.Run(name, func(t *testing.T) {
- t.Helper()
- outerT := cbt.t
- cbt.t = decoT{t}
- defer func() {
- cbt.t = outerT
- }()
- f(t)
- })
-}
-
func (cbt *circuitBreakerTest) SetProbeEnabled(idx int, to bool) {
cbt.repls[idx].setProbeEnabled(to)
}
-func (cbt *circuitBreakerTest) TripBreaker(idx int) {
- repl := cbt.repls[idx].Replica
- repl.TripBreaker()
+func (cbt *circuitBreakerTest) Report(idx int, err error) {
+ cbt.repls[idx].Replica.Breaker().Report(err)
}
func (cbt *circuitBreakerTest) UntripsSoon(t *testing.T, method func(idx int) error, idx int) {
@@ -791,60 +686,31 @@ func (cbt *circuitBreakerTest) UntripsSoon(t *testing.T, method func(idx int) er
t.Helper()
err := method(idx)
// All errors coming out should be annotated as coming from
- // the circuit breaker. In rare cases, we can also see a
- // NotLeaseholderError such as this one:
- // [NotLeaseHolderError] failed to manipulate liveness record: heartbeat
- // failed on epoch increment; r45: replica (n1,s1):1 not lease holder;
- // current lease is repl=(n1,s1):1 seq=1 start=0,0 epo=1 pro=[...]
- if err != nil &&
- !errors.Is(err, circuit.ErrBreakerOpen) &&
- !errors.HasType(err, (*roachpb.NotLeaseHolderError)(nil)) {
-
+ // the circuit breaker.
+ if err != nil && !errors.Is(err, circuit.ErrBreakerOpen) {
t.Fatalf("saw unexpected error %+v", err)
}
return err
})
}
-func (cbt *circuitBreakerTest) ExpireAllLeasesAndN1LivenessRecord(
- t *testing.T, pauseHeartbeats bool,
-) (undo func()) {
+func (cbt *circuitBreakerTest) ExpireAllLeases(t *testing.T, pauseHeartbeats bool) (undo func()) {
t.Helper()
+ var maxWT int64
var fs []func()
- for idx, srv := range cbt.Servers {
+ for _, srv := range cbt.Servers {
lv := srv.NodeLiveness().(*liveness.NodeLiveness)
-
if pauseHeartbeats {
undo := lv.PauseAllHeartbeatsForTest()
fs = append(fs, undo)
}
-
self, ok := lv.Self()
require.True(t, ok)
-
- cbt.ManualClock.Forward(self.Expiration.WallTime)
- if idx == n1 {
- // Invalidate n1's liveness record, to make sure that ranges on n1 need
- // to acquire a new lease (vs waiting for a heartbeat to the liveness
- // record resuscitating the old one).
- //
- // Needing to do this is the reason for special-casing this entire method
- // around n1; if we stop heartbeats for both nodes, they can't increment
- // each others liveness records: if a node's liveness is paused, it doesn't
- // allow incrementing records neither. (This is silly).
- lv2 := cbt.Server(n2).NodeLiveness().(*liveness.NodeLiveness)
- testutils.SucceedsSoon(t, func() error {
- self, ok := lv.Self()
- require.True(t, ok)
- if self.IsLive(cbt.Server(n2).Clock().Now().GoTime()) {
- // Someone else must have incremented epoch.
- return nil
- }
- return lv2.IncrementEpoch(context.Background(), self)
- })
+ if maxWT < self.Expiration.WallTime {
+ maxWT = self.Expiration.WallTime
}
}
-
+ cbt.ManualClock.Forward(maxWT + 1)
return func() {
for _, f := range fs {
f()
@@ -853,32 +719,15 @@ func (cbt *circuitBreakerTest) ExpireAllLeasesAndN1LivenessRecord(
}
func (cbt *circuitBreakerTest) Send(idx int, req roachpb.Request) error {
- cbt.t.Helper()
return cbt.SendCtx(context.Background(), idx, req)
-}
-func (cbt *circuitBreakerTest) SendCtx(ctx context.Context, idx int, req roachpb.Request) error {
- return cbt.SendCtxTS(ctx, idx, req, cbt.repls[idx].Clock().Now())
}
-func (cbt *circuitBreakerTest) SendCtxTS(
- ctx context.Context, idx int, req roachpb.Request, ts hlc.Timestamp,
-) error {
- cbt.t.Helper()
- ctx, finishAndGet := tracing.ContextWithRecordingSpan(ctx, cbt.repls[idx].Tracer, "SendCtx("+req.Method().String()+")")
- defer time.AfterFunc(10*time.Second, func() {
- rec := tracing.SpanFromContext(ctx).GetConfiguredRecording()
- cbt.t.Logf("slow request: %s", rec)
- }).Stop()
- defer func() {
- cbt.t.Helper()
- rec := finishAndGet()
- cbt.t.Logf("%s", rec)
- }()
+func (cbt *circuitBreakerTest) SendCtx(ctx context.Context, idx int, req roachpb.Request) error {
var ba roachpb.BatchRequest
repl := cbt.repls[idx]
ba.RangeID = repl.Desc().RangeID
- ba.Timestamp = ts
+ ba.Timestamp = repl.Clock().Now()
ba.Add(req)
if h := req.Header(); len(h.Key) == 0 {
h.Key = repl.Desc().StartKey.AsRawKey()
@@ -902,20 +751,29 @@ func (cbt *circuitBreakerTest) SendCtxTS(
if err := ctx.Err(); err != nil && parCtx.Err() == nil {
pErr = roachpb.NewErrorf("timed out waiting for batch response: %v", pErr)
}
+ {
+ var err error
+ repl.VisitBreakerContexts(func(ctx context.Context) {
+ if err == nil && ctx.Value(req) != nil {
+ err = errors.Errorf(
+ "request %s returned but context still tracked in breaker", req,
+ )
+ }
+ })
+ if err != nil {
+ pErr = roachpb.NewErrorf("%s; after %v", err, pErr)
+ }
+ }
return pErr.GoError()
}
func (cbt *circuitBreakerTest) WriteDS(idx int) error {
- cbt.t.Helper()
put := roachpb.NewPut(cbt.repls[idx].Desc().StartKey.AsRawKey(), roachpb.MakeValueFromString("hello"))
return cbt.sendViaDistSender(cbt.Servers[idx].DistSender(), put)
}
-func (cbt *circuitBreakerTest) sendViaDistSender(
- ds *kvcoord.DistSender, req roachpb.Request,
-) error {
- cbt.t.Helper()
+func (*circuitBreakerTest) sendViaDistSender(ds *kvcoord.DistSender, req roachpb.Request) error {
var ba roachpb.BatchRequest
ba.Add(req)
ctx, cancel := context.WithTimeout(context.Background(), testutils.DefaultSucceedsSoonDuration)
@@ -932,17 +790,16 @@ func (cbt *circuitBreakerTest) sendViaDistSender(
func (*circuitBreakerTest) RequireIsBreakerOpen(t *testing.T, err error) {
t.Helper()
- t.Log(err)
- // NB: we will see AmbiguousResultError here when proposals are inflight while
- // the breaker trips. These are wrapping errors, so the assertions below will
- // look through them.
+ // We also accept an ambiguous result wrapping a breaker error; this occurs
+ // when the breaker trips while a write is already inflight.
+ if aErr := (*roachpb.AmbiguousResultError)(nil); errors.As(err, &aErr) && aErr.WrappedErr != nil {
+ err = aErr.WrappedErr.GoError()
+ }
require.True(t, errors.Is(err, circuit.ErrBreakerOpen), "%+v", err)
- require.True(t, errors.HasType(err, (*roachpb.ReplicaUnavailableError)(nil)), "%+v", err)
}
func (*circuitBreakerTest) RequireIsNotLeaseholderError(t *testing.T, err error) {
t.Helper()
- t.Log(err)
ok := errors.HasType(err, (*roachpb.NotLeaseHolderError)(nil))
require.True(t, ok, "%+v", err)
}
@@ -955,56 +812,13 @@ func (cbt *circuitBreakerTest) SetSlowThreshold(dur time.Duration) {
}
func (cbt *circuitBreakerTest) Write(idx int) error {
- cbt.t.Helper()
repl := cbt.repls[idx]
- cbt.seq++
- put := roachpb.NewPut(
- repl.Desc().StartKey.AsRawKey(), roachpb.MakeValueFromString(fmt.Sprintf("hello-%d", cbt.seq)),
- )
+ put := roachpb.NewPut(repl.Desc().StartKey.AsRawKey(), roachpb.MakeValueFromString("hello"))
return cbt.Send(idx, put)
}
func (cbt *circuitBreakerTest) Read(idx int) error {
- cbt.t.Helper()
repl := cbt.repls[idx]
get := roachpb.NewGet(repl.Desc().StartKey.AsRawKey(), false /* forUpdate */)
return cbt.Send(idx, get)
}
-
-func (cbt *circuitBreakerTest) FollowerRead(idx int) error {
- cbt.t.Helper()
- repl := cbt.repls[idx]
- get := roachpb.NewGet(repl.Desc().StartKey.AsRawKey(), false /* forUpdate */)
- ctx := context.Background()
- ts := repl.GetClosedTimestamp(ctx)
- return cbt.SendCtxTS(ctx, idx, get, ts)
-}
-
-func (cbt *circuitBreakerTest) HeartbeatNodeLiveness(t *testing.T, idx int) {
- // Retry loop is needed because heartbeat may race with internal heartbeat
- // loop.
- testutils.SucceedsSoon(t, func() error {
- return cbt.Server(idx).HeartbeatNodeLiveness()
- })
-}
-
-type replWithKnob struct {
- *kvserver.Replica
- setProbeEnabled func(bool)
-}
-
-type logT interface {
- Helper()
- Logf(string, ...interface{})
-}
-
-type decoT struct {
- logT
-}
-
-func (t *decoT) Logf(format string, args ...interface{}) {
- // It can be difficult to spot the actual failure among all of the
- // traces, so this is a convenient place to annotate the logging
- // (or disable it one-off).
- t.logT.Logf("info:\n"+format, args...)
-}
diff --git a/pkg/kv/kvserver/client_replica_test.go b/pkg/kv/kvserver/client_replica_test.go
index 681c1ba188..a14aa81a3a 100644
--- a/pkg/kv/kvserver/client_replica_test.go
+++ b/pkg/kv/kvserver/client_replica_test.go
@@ -38,7 +38,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/server"
"github.com/cockroachdb/cockroach/pkg/settings/cluster"
"github.com/cockroachdb/cockroach/pkg/spanconfig"
- "github.com/cockroachdb/cockroach/pkg/spanconfig/spanconfigptsreader"
"github.com/cockroachdb/cockroach/pkg/sql"
"github.com/cockroachdb/cockroach/pkg/sql/catalog/bootstrap"
"github.com/cockroachdb/cockroach/pkg/storage"
@@ -3518,12 +3517,10 @@ func TestStrictGCEnforcement(t *testing.T) {
t.Helper()
testutils.SucceedsSoon(t, func() error {
for i := 0; i < tc.NumServers(); i++ {
- ptsReader := tc.GetFirstStoreFromServer(t, 0).GetStoreConfig().ProtectedTimestampReader
- _, asOf, err := ptsReader.GetProtectionTimestamps(ctx, tableSpan)
- if err != nil {
- return err
- }
- if asOf.Less(min) {
+ ptp := tc.Server(i).ExecutorConfig().(sql.ExecutorConfig).ProtectedTimestampProvider
+ if ptp.Iterate(ctx, tableKey, tableKey, func(record *ptpb.Record) (wantMore bool) {
+ return false
+ }).Less(min) {
return errors.Errorf("not yet read")
}
}
@@ -3575,27 +3572,11 @@ func TestStrictGCEnforcement(t *testing.T) {
}
refreshPastLeaseStart = func(t *testing.T) {
for i := 0; i < tc.NumServers(); i++ {
- ptsReader := tc.GetFirstStoreFromServer(t, 0).GetStoreConfig().ProtectedTimestampReader
- _, r := getFirstStoreReplica(t, tc.Server(i), tableKey)
- l, _ := r.GetLease()
- require.NoError(
- t,
- spanconfigptsreader.TestingRefreshPTSState(ctx, t, ptsReader, l.Start.ToTimestamp().Next()),
- )
- err := r.ReadProtectedTimestamps(ctx)
- require.NoError(t, err)
- }
- }
- refreshCacheAndUpdatePTSState = func(t *testing.T, nodeID roachpb.NodeID) {
- for i := 0; i < tc.NumServers(); i++ {
- if tc.Server(i).NodeID() != nodeID {
- continue
- }
ptp := tc.Server(i).ExecutorConfig().(sql.ExecutorConfig).ProtectedTimestampProvider
- require.NoError(t, ptp.Refresh(ctx, tc.Server(i).Clock().Now()))
_, r := getFirstStoreReplica(t, tc.Server(i), tableKey)
- err := r.ReadProtectedTimestamps(ctx)
- require.NoError(t, err)
+ l, _ := r.GetLease()
+ require.NoError(t, ptp.Refresh(ctx, l.Start.ToTimestamp().Next()))
+ r.ReadProtectedTimestamps(ctx)
}
}
)
@@ -3651,15 +3632,13 @@ func TestStrictGCEnforcement(t *testing.T) {
}))
assertScanRejected(t)
- desc, err := tc.LookupRange(tableKey)
- require.NoError(t, err)
- target, err := tc.FindRangeLeaseHolder(desc, nil)
- require.NoError(t, err)
- refreshCacheAndUpdatePTSState(t, target.NodeID)
+ require.NoError(t, ptp.Verify(ctx, rec.ID.GetUUID()))
assertScanOk(t)
// Transfer the lease and demonstrate that the query succeeds because we're
// cautious in the face of lease transfers.
+ desc, err := tc.LookupRange(tableKey)
+ require.NoError(t, err)
require.NoError(t, tc.TransferRangeLease(desc, tc.Target(1)))
assertScanOk(t)
})
diff --git a/pkg/kv/kvserver/client_spanconfigs_test.go b/pkg/kv/kvserver/client_spanconfigs_test.go
index 3f46b364df..c9bfd59838 100644
--- a/pkg/kv/kvserver/client_spanconfigs_test.go
+++ b/pkg/kv/kvserver/client_spanconfigs_test.go
@@ -47,7 +47,6 @@ func TestSpanConfigUpdateAppliedToReplica(t *testing.T) {
Store: &kvserver.StoreTestingKnobs{
DisableMergeQueue: true,
DisableSplitQueue: true,
- DisableGCQueue: true,
},
SpanConfig: &spanconfig.TestingKnobs{
StoreKVSubscriberOverride: mockSubscriber,
@@ -119,12 +118,6 @@ func (m *mockSpanConfigSubscriber) GetSpanConfigForKey(
return m.Store.GetSpanConfigForKey(ctx, key)
}
-func (m *mockSpanConfigSubscriber) GetProtectionTimestamps(
- context.Context, roachpb.Span,
-) ([]hlc.Timestamp, hlc.Timestamp, error) {
- panic("unimplemented")
-}
-
func (m *mockSpanConfigSubscriber) LastUpdated() hlc.Timestamp {
panic("unimplemented")
}
diff --git a/pkg/kv/kvserver/client_split_test.go b/pkg/kv/kvserver/client_split_test.go
index 64430abd59..1847bf028a 100644
--- a/pkg/kv/kvserver/client_split_test.go
+++ b/pkg/kv/kvserver/client_split_test.go
@@ -38,7 +38,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/rpc"
"github.com/cockroachdb/cockroach/pkg/server"
- "github.com/cockroachdb/cockroach/pkg/spanconfig"
"github.com/cockroachdb/cockroach/pkg/sql/catalog/bootstrap"
"github.com/cockroachdb/cockroach/pkg/sql/catalog/systemschema"
"github.com/cockroachdb/cockroach/pkg/storage"
@@ -2385,9 +2384,6 @@ func TestUnsplittableRange(t *testing.T) {
DefaultZoneConfigOverride: &zoneConfig,
DefaultSystemZoneConfigOverride: &zoneSystemConfig,
},
- SpanConfig: &spanconfig.TestingKnobs{
- ProtectedTSReaderOverrideFn: spanconfig.EmptyProtectedTSReader,
- },
},
})
s := serv.(*server.TestServer)
@@ -2433,7 +2429,9 @@ func TestUnsplittableRange(t *testing.T) {
manualClock.Increment(10 * ttl.Nanoseconds())
// Trigger the MVCC GC queue, which should clean up the earlier version of the
// row. Once the first version of the row is cleaned up, the range should
- // exit the split queue purgatory.
+ // exit the split queue purgatory. We need to tickle the protected timestamp
+ // subsystem to release a timestamp at which we get to actually remove the data.
+ require.NoError(t, store.GetStoreConfig().ProtectedTimestampCache.Refresh(ctx, s.Clock().Now()))
repl := store.LookupReplica(tableKey)
if err := store.ManualMVCCGC(repl); err != nil {
t.Fatal(err)
diff --git a/pkg/kv/kvserver/closed_timestamp_test.go b/pkg/kv/kvserver/closed_timestamp_test.go
index 862d12b248..55e7035853 100644
--- a/pkg/kv/kvserver/closed_timestamp_test.go
+++ b/pkg/kv/kvserver/closed_timestamp_test.go
@@ -606,8 +606,6 @@ func TestClosedTimestampFrozenAfterSubsumption(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)
- skip.UnderRace(t)
-
for _, test := range []struct {
name string
// transferLease, if set, will be called while the RHS is subsumed in order
diff --git a/pkg/kv/kvserver/concurrency/BUILD.bazel b/pkg/kv/kvserver/concurrency/BUILD.bazel
index 76721b2f43..4e28adfbb8 100644
--- a/pkg/kv/kvserver/concurrency/BUILD.bazel
+++ b/pkg/kv/kvserver/concurrency/BUILD.bazel
@@ -18,7 +18,6 @@ go_library(
"//pkg/keys",
"//pkg/kv",
"//pkg/kv/kvserver/concurrency/lock",
- "//pkg/kv/kvserver/concurrency/poison",
"//pkg/kv/kvserver/intentresolver",
"//pkg/kv/kvserver/spanlatch",
"//pkg/kv/kvserver/spanset",
@@ -52,11 +51,9 @@ go_test(
],
data = glob(["testdata/**"]),
embed = [":concurrency"],
- shard_count = 16,
deps = [
"//pkg/kv/kvserver/batcheval",
"//pkg/kv/kvserver/concurrency/lock",
- "//pkg/kv/kvserver/concurrency/poison",
"//pkg/kv/kvserver/intentresolver",
"//pkg/kv/kvserver/spanlatch",
"//pkg/kv/kvserver/spanset",
diff --git a/pkg/kv/kvserver/concurrency/concurrency_control.go b/pkg/kv/kvserver/concurrency/concurrency_control.go
index c5d27ebe1f..883a83e603 100644
--- a/pkg/kv/kvserver/concurrency/concurrency_control.go
+++ b/pkg/kv/kvserver/concurrency/concurrency_control.go
@@ -18,7 +18,6 @@ import (
"time"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/concurrency/lock"
- "github.com/cockroachdb/cockroach/pkg/kv/kvserver/concurrency/poison"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/spanset"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/txnwait"
"github.com/cockroachdb/cockroach/pkg/roachpb"
@@ -190,15 +189,6 @@ type RequestSequencer interface {
// does so, it will not return a request guard.
SequenceReq(context.Context, *Guard, Request, RequestEvalKind) (*Guard, Response, *Error)
- // PoisonReq idempotently marks a Guard as poisoned, indicating that its
- // latches may be held for an indefinite amount of time. Requests waiting on
- // this Guard will be notified. Latch acquisitions under poison.Policy_Error
- // react to this by failing with a poison.PoisonedError, while requests under
- // poison.Policy_Wait continue waiting, but propagate the poisoning upwards.
- //
- // See poison.Policy for details.
- PoisonReq(*Guard)
-
// FinishReq marks the request as complete, releasing any protection
// the request had against conflicting requests and allowing conflicting
// requests that are blocked on this one to proceed. The guard should not
@@ -395,9 +385,6 @@ type Request struct {
// with a WriteIntentError instead of entering the queue and waiting.
MaxLockWaitQueueLength int
- // The poison.Policy to use for this Request.
- PoisonPolicy poison.Policy
-
// The individual requests in the batch.
Requests []roachpb.RequestUnion
@@ -477,12 +464,9 @@ type latchManager interface {
// WaitFor waits for conflicting latches on the specified spans without adding
// any latches itself. Fast path for operations that only require flushing out
// old operations without blocking any new ones.
- WaitFor(ctx context.Context, spans *spanset.SpanSet, pp poison.Policy) *Error
-
- // Poison a guard's latches, allowing waiters to fail fast.
- Poison(latchGuard)
+ WaitFor(ctx context.Context, spans *spanset.SpanSet) *Error
- // Release a guard's latches, relinquish its protection from conflicting requests.
+ // Releases latches, relinquish its protection from conflicting requests.
Release(latchGuard)
// Metrics returns information about the state of the latchManager.
diff --git a/pkg/kv/kvserver/concurrency/concurrency_manager.go b/pkg/kv/kvserver/concurrency/concurrency_manager.go
index 5bc0d22414..c3ee4f1cd1 100644
--- a/pkg/kv/kvserver/concurrency/concurrency_manager.go
+++ b/pkg/kv/kvserver/concurrency/concurrency_manager.go
@@ -230,7 +230,7 @@ func (m *managerImpl) sequenceReqWithGuard(ctx context.Context, g *Guard) (Respo
// them.
if shouldWaitOnLatchesWithoutAcquiring(g.Req) {
log.Event(ctx, "waiting on latches without acquiring")
- return nil, m.lm.WaitFor(ctx, g.Req.LatchSpans, g.Req.PoisonPolicy)
+ return nil, m.lm.WaitFor(ctx, g.Req.LatchSpans)
}
// Provide the manager with an opportunity to intercept the request. It
@@ -382,15 +382,6 @@ func shouldWaitOnLatchesWithoutAcquiring(req Request) bool {
return req.isSingle(roachpb.Barrier)
}
-// PoisonReq implements the RequestSequencer interface.
-func (m *managerImpl) PoisonReq(g *Guard) {
- // NB: g.lg == nil is the case for requests that ignore latches, see
- // shouldIgnoreLatches.
- if g.lg != nil {
- m.lm.Poison(g.lg)
- }
-}
-
// FinishReq implements the RequestSequencer interface.
func (m *managerImpl) FinishReq(g *Guard) {
// NOTE: we release latches _before_ exiting lock wait-queues deliberately.
diff --git a/pkg/kv/kvserver/concurrency/concurrency_manager_test.go b/pkg/kv/kvserver/concurrency/concurrency_manager_test.go
index cfe06d29e9..e533b51748 100644
--- a/pkg/kv/kvserver/concurrency/concurrency_manager_test.go
+++ b/pkg/kv/kvserver/concurrency/concurrency_manager_test.go
@@ -54,10 +54,9 @@ import (
// The input files use the following DSL:
//
// new-txn name=<txn-name> ts=<int>[,<int>] epoch=<int> [uncertainty-limit=<int>[,<int>]]
-// new-request name=<req-name> txn=<txn-name>|none ts=<int>[,<int>] [priority] [inconsistent] [wait-policy=<policy>] [lock-timeout] [max-lock-wait-queue-length=<int>] [poison-policy=[err|wait]]
+// new-request name=<req-name> txn=<txn-name>|none ts=<int>[,<int>] [priority] [inconsistent] [wait-policy=<policy>] [lock-timeout] [max-lock-wait-queue-length=<int>]
// <proto-name> [<field-name>=<field-value>...] (hint: see scanSingleRequest)
// sequence req=<req-name> [eval-kind=<pess|opt|pess-after-opt]
-// poison req=<req-name>
// finish req=<req-name>
//
// handle-write-intent-error req=<req-name> txn=<txn-name> key=<key> lease-seq=<seq>
@@ -170,8 +169,6 @@ func TestConcurrencyManagerBasic(t *testing.T) {
d.ScanArgs(t, "max-lock-wait-queue-length", &maxLockWaitQueueLength)
}
- pp := scanPoisonPolicy(t, d)
-
// Each roachpb.Request is provided on an indented line.
reqs, reqUnions := scanRequests(t, d, c)
latchSpans, lockSpans := c.collectSpans(t, txn, ts, reqs)
@@ -187,7 +184,6 @@ func TestConcurrencyManagerBasic(t *testing.T) {
Requests: reqUnions,
LatchSpans: latchSpans,
LockSpans: lockSpans,
- PoisonPolicy: pp,
}
return ""
@@ -261,20 +257,6 @@ func TestConcurrencyManagerBasic(t *testing.T) {
})
return c.waitAndCollect(t, mon)
- case "poison":
- var reqName string
- d.ScanArgs(t, "req", &reqName)
- guard, ok := c.guardsByReqName[reqName]
- if !ok {
- d.Fatalf(t, "unknown request: %s", reqName)
- }
-
- opName := fmt.Sprintf("poison %s", reqName)
- mon.runSync(opName, func(ctx context.Context) {
- log.Event(ctx, "poisoning request")
- m.PoisonReq(guard)
- })
- return c.waitAndCollect(t, mon)
case "handle-write-intent-error":
var reqName string
d.ScanArgs(t, "req", &reqName)
diff --git a/pkg/kv/kvserver/concurrency/datadriven_util_test.go b/pkg/kv/kvserver/concurrency/datadriven_util_test.go
index 11067bce44..e9e8f9483a 100644
--- a/pkg/kv/kvserver/concurrency/datadriven_util_test.go
+++ b/pkg/kv/kvserver/concurrency/datadriven_util_test.go
@@ -15,7 +15,6 @@ import (
"testing"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/concurrency/lock"
- "github.com/cockroachdb/cockroach/pkg/kv/kvserver/concurrency/poison"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/storage/enginepb"
"github.com/cockroachdb/cockroach/pkg/util/hlc"
@@ -76,24 +75,6 @@ func scanWaitPolicy(t *testing.T, d *datadriven.TestData, required bool) lock.Wa
}
}
-func scanPoisonPolicy(t *testing.T, d *datadriven.TestData) poison.Policy {
- const key = "poison-policy"
- if !d.HasArg(key) {
- return poison.Policy_Error
- }
- var policy string
- d.ScanArgs(t, key, &policy)
- switch policy {
- case "error":
- return poison.Policy_Error
- case "wait":
- return poison.Policy_Wait
- default:
- d.Fatalf(t, "unknown poison policy: %s", policy)
- return 0
- }
-}
-
func scanSingleRequest(
t *testing.T, d *datadriven.TestData, line string, txns map[string]*roachpb.Transaction,
) roachpb.Request {
diff --git a/pkg/kv/kvserver/concurrency/latch_manager.go b/pkg/kv/kvserver/concurrency/latch_manager.go
index b0f0705734..b0a4b8eb10 100644
--- a/pkg/kv/kvserver/concurrency/latch_manager.go
+++ b/pkg/kv/kvserver/concurrency/latch_manager.go
@@ -13,7 +13,6 @@ package concurrency
import (
"context"
- "github.com/cockroachdb/cockroach/pkg/kv/kvserver/concurrency/poison"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/spanlatch"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/spanset"
"github.com/cockroachdb/cockroach/pkg/roachpb"
@@ -25,7 +24,7 @@ type latchManagerImpl struct {
}
func (m *latchManagerImpl) Acquire(ctx context.Context, req Request) (latchGuard, *Error) {
- lg, err := m.m.Acquire(ctx, req.LatchSpans, req.PoisonPolicy)
+ lg, err := m.m.Acquire(ctx, req.LatchSpans)
if err != nil {
return nil, roachpb.NewError(err)
}
@@ -33,7 +32,7 @@ func (m *latchManagerImpl) Acquire(ctx context.Context, req Request) (latchGuard
}
func (m *latchManagerImpl) AcquireOptimistic(req Request) latchGuard {
- lg := m.m.AcquireOptimistic(req.LatchSpans, req.PoisonPolicy)
+ lg := m.m.AcquireOptimistic(req.LatchSpans)
return lg
}
@@ -51,20 +50,14 @@ func (m *latchManagerImpl) WaitUntilAcquired(
return lg, nil
}
-func (m *latchManagerImpl) WaitFor(
- ctx context.Context, ss *spanset.SpanSet, pp poison.Policy,
-) *Error {
- err := m.m.WaitFor(ctx, ss, pp)
+func (m *latchManagerImpl) WaitFor(ctx context.Context, ss *spanset.SpanSet) *Error {
+ err := m.m.WaitFor(ctx, ss)
if err != nil {
return roachpb.NewError(err)
}
return nil
}
-func (m *latchManagerImpl) Poison(lg latchGuard) {
- m.m.Poison(lg.(*spanlatch.Guard))
-}
-
func (m *latchManagerImpl) Release(lg latchGuard) {
m.m.Release(lg.(*spanlatch.Guard))
}
diff --git a/pkg/kv/kvserver/concurrency/lock/BUILD.bazel b/pkg/kv/kvserver/concurrency/lock/BUILD.bazel
index 0c44ef2c00..dc7ca0cb25 100644
--- a/pkg/kv/kvserver/concurrency/lock/BUILD.bazel
+++ b/pkg/kv/kvserver/concurrency/lock/BUILD.bazel
@@ -1,32 +1,21 @@
load("@rules_proto//proto:defs.bzl", "proto_library")
load("@io_bazel_rules_go//proto:def.bzl", "go_proto_library")
-load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
+load("@io_bazel_rules_go//go:def.bzl", "go_library")
go_library(
name = "lock",
- srcs = [
- "lock_waiter.go",
- "locking.go",
- ],
+ srcs = ["locking.go"],
embed = [":lock_go_proto"],
importpath = "github.com/cockroachdb/cockroach/pkg/kv/kvserver/concurrency/lock",
visibility = ["//visibility:public"],
- deps = ["@com_github_cockroachdb_redact//:redact"],
)
proto_library(
name = "lock_proto",
- srcs = [
- "lock_waiter.proto",
- "locking.proto",
- ],
+ srcs = ["locking.proto"],
strip_import_prefix = "/pkg",
visibility = ["//visibility:public"],
- deps = [
- "//pkg/storage/enginepb:enginepb_proto",
- "@com_github_gogo_protobuf//gogoproto:gogo_proto",
- "@com_google_protobuf//:duration_proto",
- ],
+ deps = ["@com_github_gogo_protobuf//gogoproto:gogo_proto"],
)
go_proto_library(
@@ -35,22 +24,5 @@ go_proto_library(
importpath = "github.com/cockroachdb/cockroach/pkg/kv/kvserver/concurrency/lock",
proto = ":lock_proto",
visibility = ["//visibility:public"],
- deps = [
- "//pkg/storage/enginepb",
- "@com_github_gogo_protobuf//gogoproto",
- ],
-)
-
-go_test(
- name = "lock_test",
- srcs = ["lock_waiter_test.go"],
- deps = [
- ":lock",
- "//pkg/roachpb",
- "//pkg/storage/enginepb",
- "//pkg/util/hlc",
- "//pkg/util/uuid",
- "@com_github_cockroachdb_redact//:redact",
- "@com_github_stretchr_testify//require",
- ],
+ deps = ["@com_github_gogo_protobuf//gogoproto"],
)
diff --git a/pkg/kv/kvserver/concurrency/lock/lock_waiter.go b/pkg/kv/kvserver/concurrency/lock/lock_waiter.go
deleted file mode 100644
index be334625d4..0000000000
--- a/pkg/kv/kvserver/concurrency/lock/lock_waiter.go
+++ /dev/null
@@ -1,30 +0,0 @@
-// Copyright 2022 The Cockroach Authors.
-//
-// Use of this software is governed by the Business Source License
-// included in the file licenses/BSL.txt.
-//
-// As of the Change Date specified in that file, in accordance with
-// the Business Source License, use of this software will be governed
-// by the Apache License, Version 2.0, included in the file
-// licenses/APL.txt.
-
-// Package lock provides type definitions for locking-related concepts used by
-// concurrency control in the key-value layer.
-package lock
-
-import "github.com/cockroachdb/redact"
-
-// SafeFormat implements redact.SafeFormatter.
-func (lw Waiter) SafeFormat(w redact.SafePrinter, _ rune) {
- expand := w.Flag('+')
-
- txnIDRedactableString := redact.Sprint(nil)
- if lw.WaitingTxn != nil {
- if expand {
- txnIDRedactableString = redact.Sprint(lw.WaitingTxn.ID)
- } else {
- txnIDRedactableString = redact.Sprint(lw.WaitingTxn.Short())
- }
- }
- w.Printf("waiting_txn:%s active_waiter:%t strength:%s wait_duration:%s", txnIDRedactableString, lw.ActiveWaiter, lw.Strength, lw.WaitDuration)
-}
diff --git a/pkg/kv/kvserver/concurrency/lock/lock_waiter.proto b/pkg/kv/kvserver/concurrency/lock/lock_waiter.proto
deleted file mode 100644
index e5bfce38ea..0000000000
--- a/pkg/kv/kvserver/concurrency/lock/lock_waiter.proto
+++ /dev/null
@@ -1,35 +0,0 @@
-// Copyright 2022 The Cockroach Authors.
-//
-// Use of this software is governed by the Business Source License
-// included in the file licenses/BSL.txt.
-//
-// As of the Change Date specified in that file, in accordance with
-// the Business Source License, use of this software will be governed
-// by the Apache License, Version 2.0, included in the file
-// licenses/APL.txt.
-
-syntax = "proto3";
-package cockroach.kv.kvserver.concurrency.lock;
-option go_package = "lock";
-
-import "kv/kvserver/concurrency/lock/locking.proto";
-import "storage/enginepb/mvcc3.proto";
-import "gogoproto/gogo.proto";
-import "google/protobuf/duration.proto";
-
-// Waiter represents a transaction (or non-transactional operation) that is
-// waiting in the wait queue of readers or writers on an individual lock.
-message Waiter {
- // The transaction associated with this waiter, or nil in the case of a
- // non-transactional waiter.
- storage.enginepb.TxnMeta waiting_txn = 1;
- // Represents if this operation is actively waiting on the lock. While all
- // readers are active waiters, there are some cases in which writers may not
- // be actively waiting, for instance in the case of a broken reservation.
- bool active_waiter = 2;
- // The strength at which this waiter is attempting to acquire the lock.
- Strength strength = 3;
- // The wall clock duration since this operation began waiting on the lock.
- google.protobuf.Duration wait_duration = 4 [(gogoproto.nullable) = false,
- (gogoproto.stdduration) = true];
-}
diff --git a/pkg/kv/kvserver/concurrency/lock/lock_waiter_test.go b/pkg/kv/kvserver/concurrency/lock/lock_waiter_test.go
deleted file mode 100644
index 32d01ce4e9..0000000000
--- a/pkg/kv/kvserver/concurrency/lock/lock_waiter_test.go
+++ /dev/null
@@ -1,79 +0,0 @@
-// Copyright 2022 The Cockroach Authors.
-//
-// Use of this software is governed by the Business Source License
-// included in the file licenses/BSL.txt.
-//
-// As of the Change Date specified in that file, in accordance with
-// the Business Source License, use of this software will be governed
-// by the Apache License, Version 2.0, included in the file
-// licenses/APL.txt.
-
-// Package lock provides type definitions for locking-related concepts used by
-// concurrency control in the key-value layer.
-package lock_test
-
-import (
- "testing"
- "time"
-
- "github.com/cockroachdb/cockroach/pkg/kv/kvserver/concurrency/lock"
- "github.com/cockroachdb/cockroach/pkg/roachpb"
- "github.com/cockroachdb/cockroach/pkg/storage/enginepb"
- "github.com/cockroachdb/cockroach/pkg/util/hlc"
- "github.com/cockroachdb/cockroach/pkg/util/uuid"
- "github.com/cockroachdb/redact"
- "github.com/stretchr/testify/require"
-)
-
-func TestWaiterSafeFormat(t *testing.T) {
- ts := hlc.Timestamp{Logical: 1}
- txnMeta := &enginepb.TxnMeta{
- Key: roachpb.Key("foo"),
- ID: uuid.NamespaceDNS,
- Epoch: 2,
- WriteTimestamp: ts,
- MinTimestamp: ts,
- Priority: 957356782,
- Sequence: 123,
- CoordinatorNodeID: 3,
- }
- waiter := &lock.Waiter{
- WaitingTxn: txnMeta,
- ActiveWaiter: true,
- Strength: lock.Exclusive,
- WaitDuration: 135 * time.Second,
- }
-
- require.EqualValues(t,
- "waiting_txn:6ba7b810 active_waiter:true strength:Exclusive wait_duration:2m15s",
- redact.Sprint(waiter).StripMarkers())
- require.EqualValues(t,
- "waiting_txn:6ba7b810-9dad-11d1-80b4-00c04fd430c8 active_waiter:true strength:Exclusive wait_duration:2m15s",
- redact.Sprintf("%+v", waiter).StripMarkers())
- require.EqualValues(t,
- "waiting_txn:6ba7b810 active_waiter:true strength:Exclusive wait_duration:2m15s",
- redact.Sprint(waiter).Redact())
- require.EqualValues(t,
- "waiting_txn:‹×› active_waiter:true strength:Exclusive wait_duration:2m15s",
- redact.Sprintf("%+v", waiter).Redact())
-
- nonTxnWaiter := &lock.Waiter{
- WaitingTxn: nil,
- ActiveWaiter: false,
- Strength: lock.None,
- WaitDuration: 17 * time.Millisecond,
- }
-
- require.EqualValues(t,
- "waiting_txn:<nil> active_waiter:false strength:None wait_duration:17ms",
- redact.Sprint(nonTxnWaiter).StripMarkers())
- require.EqualValues(t,
- "waiting_txn:<nil> active_waiter:false strength:None wait_duration:17ms",
- redact.Sprintf("%+v", nonTxnWaiter).StripMarkers())
- require.EqualValues(t,
- "waiting_txn:<nil> active_waiter:false strength:None wait_duration:17ms",
- redact.Sprint(nonTxnWaiter).Redact())
- require.EqualValues(t,
- "waiting_txn:<nil> active_waiter:false strength:None wait_duration:17ms",
- redact.Sprintf("%+v", nonTxnWaiter).Redact())
-}
diff --git a/pkg/kv/kvserver/concurrency/lock/locking.go b/pkg/kv/kvserver/concurrency/lock/locking.go
index f22a010d50..75d665d8f0 100644
--- a/pkg/kv/kvserver/concurrency/lock/locking.go
+++ b/pkg/kv/kvserver/concurrency/lock/locking.go
@@ -25,11 +25,5 @@ func init() {
}
}
-// SafeValue implements redact.SafeValue.
-func (Strength) SafeValue() {}
-
-// SafeValue implements redact.SafeValue.
-func (Durability) SafeValue() {}
-
// SafeValue implements redact.SafeValue.
func (WaitPolicy) SafeValue() {}
diff --git a/pkg/kv/kvserver/concurrency/lock_table_test.go b/pkg/kv/kvserver/concurrency/lock_table_test.go
index b5a1e30ce1..c78ec55fb0 100644
--- a/pkg/kv/kvserver/concurrency/lock_table_test.go
+++ b/pkg/kv/kvserver/concurrency/lock_table_test.go
@@ -21,7 +21,6 @@ import (
"time"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/concurrency/lock"
- "github.com/cockroachdb/cockroach/pkg/kv/kvserver/concurrency/poison"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/spanlatch"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/spanset"
"github.com/cockroachdb/cockroach/pkg/roachpb"
@@ -871,7 +870,7 @@ func doWork(ctx context.Context, item *workItem, e *workloadExecutor) error {
// cancellation, the code makes sure to release latches when returning
// early due to error. Otherwise other requests will get stuck and
// group.Wait() will not return until the test times out.
- lg, err = e.lm.Acquire(context.Background(), item.request.LatchSpans, poison.Policy_Error)
+ lg, err = e.lm.Acquire(context.Background(), item.request.LatchSpans)
if err != nil {
return err
}
@@ -1415,7 +1414,7 @@ func doBenchWork(item *benchWorkItem, env benchEnv, doneCh chan<- error) {
var err error
firstIter := true
for {
- if lg, err = env.lm.Acquire(context.Background(), item.LatchSpans, poison.Policy_Error); err != nil {
+ if lg, err = env.lm.Acquire(context.Background(), item.LatchSpans); err != nil {
doneCh <- err
return
}
@@ -1450,7 +1449,7 @@ func doBenchWork(item *benchWorkItem, env benchEnv, doneCh chan<- error) {
return
}
// Release locks.
- if lg, err = env.lm.Acquire(context.Background(), item.LatchSpans, poison.Policy_Error); err != nil {
+ if lg, err = env.lm.Acquire(context.Background(), item.LatchSpans); err != nil {
doneCh <- err
return
}
diff --git a/pkg/kv/kvserver/concurrency/poison/BUILD.bazel b/pkg/kv/kvserver/concurrency/poison/BUILD.bazel
deleted file mode 100644
index cf282b9e15..0000000000
--- a/pkg/kv/kvserver/concurrency/poison/BUILD.bazel
+++ /dev/null
@@ -1,61 +0,0 @@
-load("@rules_proto//proto:defs.bzl", "proto_library")
-load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
-load("@io_bazel_rules_go//proto:def.bzl", "go_proto_library")
-
-proto_library(
- name = "poison_proto",
- srcs = [
- "error.proto",
- "policy.proto",
- ],
- strip_import_prefix = "/pkg",
- visibility = ["//visibility:public"],
- deps = [
- "//pkg/roachpb:roachpb_proto",
- "//pkg/util/hlc:hlc_proto",
- "@com_github_gogo_protobuf//gogoproto:gogo_proto",
- ],
-)
-
-go_proto_library(
- name = "poison_go_proto",
- compilers = ["//pkg/cmd/protoc-gen-gogoroach:protoc-gen-gogoroach_compiler"],
- importpath = "github.com/cockroachdb/cockroach/pkg/kv/kvserver/concurrency/poison",
- proto = ":poison_proto",
- visibility = ["//visibility:public"],
- deps = [
- "//pkg/roachpb",
- "//pkg/util/hlc",
- "@com_github_gogo_protobuf//gogoproto",
- ],
-)
-
-go_library(
- name = "poison",
- srcs = ["error.go"],
- embed = [":poison_go_proto"],
- importpath = "github.com/cockroachdb/cockroach/pkg/kv/kvserver/concurrency/poison",
- visibility = ["//visibility:public"],
- deps = [
- "//pkg/roachpb",
- "//pkg/util/hlc",
- "@com_github_cockroachdb_errors//:errors",
- ],
-)
-
-go_test(
- name = "poison_test",
- srcs = ["error_test.go"],
- data = glob(["testdata/**"]),
- deps = [
- ":poison",
- "//pkg/keys",
- "//pkg/roachpb",
- "//pkg/testutils/echotest",
- "//pkg/util/hlc",
- "//pkg/util/leaktest",
- "@com_github_cockroachdb_errors//:errors",
- "@com_github_cockroachdb_redact//:redact",
- "@com_github_stretchr_testify//require",
- ],
-)
diff --git a/pkg/kv/kvserver/concurrency/poison/error.go b/pkg/kv/kvserver/concurrency/poison/error.go
deleted file mode 100644
index 3c473fc40a..0000000000
--- a/pkg/kv/kvserver/concurrency/poison/error.go
+++ /dev/null
@@ -1,42 +0,0 @@
-// Copyright 2022 The Cockroach Authors.
-//
-// Use of this software is governed by the Business Source License
-// included in the file licenses/BSL.txt.
-//
-// As of the Change Date specified in that file, in accordance with
-// the Business Source License, use of this software will be governed
-// by the Apache License, Version 2.0, included in the file
-// licenses/APL.txt.
-
-package poison
-
-import (
- "fmt"
-
- "github.com/cockroachdb/cockroach/pkg/roachpb"
- "github.com/cockroachdb/cockroach/pkg/util/hlc"
- "github.com/cockroachdb/errors"
-)
-
-// NewPoisonedError instantiates a *PoisonedError referencing a poisoned latch
-// (as identified by span and timestamp).
-func NewPoisonedError(span roachpb.Span, ts hlc.Timestamp) *PoisonedError {
- return &PoisonedError{Span: span, Timestamp: ts}
-}
-
-var _ errors.SafeFormatter = (*PoisonedError)(nil)
-var _ fmt.Formatter = (*PoisonedError)(nil)
-
-// SafeFormatError implements errors.SafeFormatter.
-func (e *PoisonedError) SafeFormatError(p errors.Printer) error {
- p.Printf("encountered poisoned latch %s@%s", e.Span, e.Timestamp)
- return nil
-}
-
-// Format implements fmt.Formatter.
-func (e *PoisonedError) Format(s fmt.State, verb rune) { errors.FormatError(e, s, verb) }
-
-// Error implements error.
-func (e *PoisonedError) Error() string {
- return fmt.Sprint(e)
-}
diff --git a/pkg/kv/kvserver/concurrency/poison/error.proto b/pkg/kv/kvserver/concurrency/poison/error.proto
deleted file mode 100644
index 3ebe47586f..0000000000
--- a/pkg/kv/kvserver/concurrency/poison/error.proto
+++ /dev/null
@@ -1,26 +0,0 @@
-// Copyright 2022 The Cockroach Authors.
-//
-// Use of this software is governed by the Business Source License
-// included in the file licenses/BSL.txt.
-//
-// As of the Change Date specified in that file, in accordance with
-// the Business Source License, use of this software will be governed
-// by the Apache License, Version 2.0, included in the file
-// licenses/APL.txt.
-
-syntax = "proto3";
-package cockroach.kv.kvserver.concurrency.poison;
-option go_package = "poison";
-
-import "util/hlc/timestamp.proto";
-import "roachpb/data.proto";
-import "gogoproto/gogo.proto";
-
-// PoisonedError indicates that a request failed fast during sequencing as a
-// result of having encountered a poisoned latch under Policy_Error.
-//
-// See also concurrency.RequestSequencer.
-message PoisonedError {
- roachpb.Span span = 1 [(gogoproto.nullable) = false];
- util.hlc.Timestamp timestamp = 2 [(gogoproto.nullable) = false];
-}
diff --git a/pkg/kv/kvserver/concurrency/poison/error_test.go b/pkg/kv/kvserver/concurrency/poison/error_test.go
deleted file mode 100644
index 360501d4e6..0000000000
--- a/pkg/kv/kvserver/concurrency/poison/error_test.go
+++ /dev/null
@@ -1,39 +0,0 @@
-// Copyright 2022 The Cockroach Authors.
-//
-// Use of this software is governed by the Business Source License
-// included in the file licenses/BSL.txt.
-//
-// As of the Change Date specified in that file, in accordance with
-// the Business Source License, use of this software will be governed
-// by the Apache License, Version 2.0, included in the file
-// licenses/APL.txt.
-
-package poison_test
-
-import (
- "context"
- "path/filepath"
- "testing"
-
- _ "github.com/cockroachdb/cockroach/pkg/keys" // to init roachpb.PrettyPrintRange
- "github.com/cockroachdb/cockroach/pkg/kv/kvserver/concurrency/poison"
- "github.com/cockroachdb/cockroach/pkg/roachpb"
- "github.com/cockroachdb/cockroach/pkg/testutils/echotest"
- "github.com/cockroachdb/cockroach/pkg/util/hlc"
- "github.com/cockroachdb/cockroach/pkg/util/leaktest"
- "github.com/cockroachdb/errors"
- "github.com/cockroachdb/redact"
- "github.com/stretchr/testify/require"
-)
-
-func TestPoisonedError(t *testing.T) {
- defer leaktest.AfterTest(t)()
- ctx := context.Background()
- err := errors.DecodeError(ctx, errors.EncodeError(ctx, poison.NewPoisonedError(
- roachpb.Span{Key: roachpb.Key("a")}, hlc.Timestamp{WallTime: 1},
- )))
- require.True(t, errors.HasType(err, (*poison.PoisonedError)(nil)), "%+v", err)
- var buf redact.StringBuilder
- buf.Printf("%s", err)
- echotest.Require(t, string(buf.RedactableString()), filepath.Join("testdata", "poisoned_error.txt"))
-}
diff --git a/pkg/kv/kvserver/concurrency/poison/policy.proto b/pkg/kv/kvserver/concurrency/poison/policy.proto
deleted file mode 100644
index 5f3371cbf2..0000000000
--- a/pkg/kv/kvserver/concurrency/poison/policy.proto
+++ /dev/null
@@ -1,35 +0,0 @@
-// Copyright 2022 The Cockroach Authors.
-//
-// Use of this software is governed by the Business Source License
-// included in the file licenses/BSL.txt.
-//
-// As of the Change Date specified in that file, in accordance with
-// the Business Source License, use of this software will be governed
-// by the Apache License, Version 2.0, included in the file
-// licenses/APL.txt.
-
-syntax = "proto3";
-package cockroach.kv.kvserver.concurrency.poison;
-option go_package = "poison";
-
-import "gogoproto/gogo.proto";
-
-// Policy determines how a request will react to encountering a poisoned
-// latch. A poisoned latch is a latch for which the holder is unable to make
-// progress. That is, waiters of this latch should not expect to be able to
-// acquire this latch "for some time"; in practice this is the case of an
-// unavailable Replica.
-//
-// The name is inspired by Rust's mutexes, which undergo poisoning[^1] when a
-// thread panics while holding the mutex.
-//
-// [^1]: https://doc.rust-lang.org/std/sync/struct.Mutex.html#poisoning
-enum Policy {
- // Policy_Wait instructs a request to return an error upon encountering
- // a poisoned latch.
- Wait = 0;
-
- // Policy_Error instructs a request to return an error upon encountering
- // a poisoned latch.
- Error = 1;
-}
diff --git a/pkg/kv/kvserver/concurrency/poison/testdata/poisoned_error.txt b/pkg/kv/kvserver/concurrency/poison/testdata/poisoned_error.txt
deleted file mode 100644
index b4b6bf4061..0000000000
--- a/pkg/kv/kvserver/concurrency/poison/testdata/poisoned_error.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-echo
-----
-encountered poisoned latch ‹a›@0.000000001,0
diff --git a/pkg/kv/kvserver/concurrency/testdata/concurrency_manager/poison_policy_err b/pkg/kv/kvserver/concurrency/testdata/concurrency_manager/poison_policy_err
deleted file mode 100644
index 2fffae01a7..0000000000
--- a/pkg/kv/kvserver/concurrency/testdata/concurrency_manager/poison_policy_err
+++ /dev/null
@@ -1,60 +0,0 @@
-# This test sets up the following situation:
-#
-# e <- put (PoisonPolicyErr; waiting)
-# b---f <- scan (PoisonPolicyErr; waiting)
-# c <- put (PoisonPolicyErr; sequenced, poisoned)
-#
-# Since everyone uses PoisonPolicyErr, the chain unwinds. However, only `b---f`
-# gets an error, since it overlaps `c`. `e` can proceed once `c` and `b---f`
-# have finished.
-
-new-request txn=none name=putc ts=10,0
- put key=c value=hi
-----
-
-sequence req=putc
-----
-[1] sequence putc: sequencing request
-[1] sequence putc: acquiring latches
-[1] sequence putc: scanning lock table for conflicting locks
-[1] sequence putc: sequencing complete, returned guard
-
-new-request txn=none name=readbf ts=11,1
- scan key=b endkey=f
-----
-
-sequence req=readbf
-----
-[2] sequence readbf: sequencing request
-[2] sequence readbf: acquiring latches
-[2] sequence readbf: waiting to acquire read latch {b-f}@11.000000000,1, held by write latch [email protected],0
-[2] sequence readbf: blocked on select in spanlatch.(*Manager).waitForSignal
-
-new-request txn=none name=pute ts=11,0
- put key=e value=hi
-----
-
-sequence req=pute
-----
-[3] sequence pute: sequencing request
-[3] sequence pute: acquiring latches
-[3] sequence pute: waiting to acquire write latch [email protected],0, held by read latch {b-f}@11.000000000,1
-[3] sequence pute: blocked on select in spanlatch.(*Manager).waitForSignal
-
-poison req=putc
-----
-[-] poison putc: poisoning request
-[2] sequence readbf: sequencing complete, returned error: encountered poisoned latch [email protected],0
-[3] sequence pute: scanning lock table for conflicting locks
-[3] sequence pute: sequencing complete, returned guard
-
-finish req=putc
-----
-[-] finish putc: finishing request
-
-finish req=pute
-----
-[-] finish pute: finishing request
-
-reset
-----
diff --git a/pkg/kv/kvserver/concurrency/testdata/concurrency_manager/poison_policy_err_indirect b/pkg/kv/kvserver/concurrency/testdata/concurrency_manager/poison_policy_err_indirect
deleted file mode 100644
index 2615d3524c..0000000000
--- a/pkg/kv/kvserver/concurrency/testdata/concurrency_manager/poison_policy_err_indirect
+++ /dev/null
@@ -1,61 +0,0 @@
-# This test sets up the following situation:
-#
-# e <- put (PoisonPolicyError; waiting)
-# b---f <- scan (PoisonPolicyWait; waiting)
-# c <- put (PoisonPolicyWait; sequenced, poisoned)
-#
-# When `c` gets poisoned (and continues waiting), the same
-# happens to `b---f`, which prompts `e` to fail fast.
-
-new-request txn=none name=putc ts=10,0 poison-policy=wait
- put key=c value=hi
-----
-
-sequence req=putc
-----
-[1] sequence putc: sequencing request
-[1] sequence putc: acquiring latches
-[1] sequence putc: scanning lock table for conflicting locks
-[1] sequence putc: sequencing complete, returned guard
-
-new-request txn=none name=readbf ts=11,1 poison-policy=wait
- scan key=b endkey=f
-----
-
-sequence req=readbf
-----
-[2] sequence readbf: sequencing request
-[2] sequence readbf: acquiring latches
-[2] sequence readbf: waiting to acquire read latch {b-f}@11.000000000,1, held by write latch [email protected],0
-[2] sequence readbf: blocked on select in spanlatch.(*Manager).waitForSignal
-
-new-request txn=none name=pute ts=11,0
- put key=e value=hi
-----
-
-sequence req=pute
-----
-[3] sequence pute: sequencing request
-[3] sequence pute: acquiring latches
-[3] sequence pute: waiting to acquire write latch [email protected],0, held by read latch {b-f}@11.000000000,1
-[3] sequence pute: blocked on select in spanlatch.(*Manager).waitForSignal
-
-poison req=putc
-----
-[-] poison putc: poisoning request
-[2] sequence readbf: encountered poisoned latch; continuing to wait
-[2] sequence readbf: blocked on select in spanlatch.(*Manager).waitForSignal
-[3] sequence pute: sequencing complete, returned error: encountered poisoned latch {b-f}@11.000000000,1
-
-finish req=putc
-----
-[-] finish putc: finishing request
-[2] sequence readbf: scanning lock table for conflicting locks
-[2] sequence readbf: sequencing complete, returned guard
-
-finish req=readbf
-----
-[-] finish readbf: finishing request
-
-reset
-----
diff --git a/pkg/kv/kvserver/concurrency/testdata/concurrency_manager/poison_policy_wait_disjoint b/pkg/kv/kvserver/concurrency/testdata/concurrency_manager/poison_policy_wait_disjoint
deleted file mode 100644
index bf7646ff84..0000000000
--- a/pkg/kv/kvserver/concurrency/testdata/concurrency_manager/poison_policy_wait_disjoint
+++ /dev/null
@@ -1,59 +0,0 @@
-# This test sets up the following situation:
-#
-# e <- put (PoisonPolicyWait; waiting)
-# b---f <- scan (PoisonPolicyError; waiting)
-# c <- put (PoisonPolicyWait; sequenced, poisoned)
-#
-# The top and bottom request use PoisonPolicyWait, so the scan returns when `c`
-# is poisoned, which in turn lets `e` through. However, `c` continues to wait.
-
-new-request txn=none name=putc ts=10,0 poison-policy=wait
- put key=c value=hi
-----
-
-sequence req=putc
-----
-[1] sequence putc: sequencing request
-[1] sequence putc: acquiring latches
-[1] sequence putc: scanning lock table for conflicting locks
-[1] sequence putc: sequencing complete, returned guard
-
-new-request txn=none name=readbf ts=11,1
- scan key=b endkey=f
-----
-
-sequence req=readbf
-----
-[2] sequence readbf: sequencing request
-[2] sequence readbf: acquiring latches
-[2] sequence readbf: waiting to acquire read latch {b-f}@11.000000000,1, held by write latch [email protected],0
-[2] sequence readbf: blocked on select in spanlatch.(*Manager).waitForSignal
-
-new-request txn=none name=pute ts=11,0 poison-policy=wait
- put key=e value=hi
-----
-
-sequence req=pute
-----
-[3] sequence pute: sequencing request
-[3] sequence pute: acquiring latches
-[3] sequence pute: waiting to acquire write latch [email protected],0, held by read latch {b-f}@11.000000000,1
-[3] sequence pute: blocked on select in spanlatch.(*Manager).waitForSignal
-
-poison req=putc
-----
-[-] poison putc: poisoning request
-[2] sequence readbf: sequencing complete, returned error: encountered poisoned latch [email protected],0
-[3] sequence pute: scanning lock table for conflicting locks
-[3] sequence pute: sequencing complete, returned guard
-
-finish req=putc
-----
-[-] finish putc: finishing request
-
-finish req=pute
-----
-[-] finish pute: finishing request
-
-reset
-----
diff --git a/pkg/kv/kvserver/concurrency/testdata/concurrency_manager/poison_policy_wait_overlapping b/pkg/kv/kvserver/concurrency/testdata/concurrency_manager/poison_policy_wait_overlapping
deleted file mode 100644
index 4e8799dde3..0000000000
--- a/pkg/kv/kvserver/concurrency/testdata/concurrency_manager/poison_policy_wait_overlapping
+++ /dev/null
@@ -1,61 +0,0 @@
-# This test sets up the following situation:
-#
-# c <- put (PoisonPolicyWait; waiting)
-# b---f <- scan (PoisonPolicyError; waiting)
-# c <- put (PoisonPolicyWait; sequenced, poisoned)
-#
-# When the bottom `c` is poisoned, `b---f` fails fast, and
-# the top `c` poisons itself but continues to wait.
-
-new-request txn=none name=put1 ts=10,0 poison-policy=wait
- put key=c value=hi
-----
-
-sequence req=put1
-----
-[1] sequence put1: sequencing request
-[1] sequence put1: acquiring latches
-[1] sequence put1: scanning lock table for conflicting locks
-[1] sequence put1: sequencing complete, returned guard
-
-new-request txn=none name=readbf ts=11,1
- scan key=b endkey=f
-----
-
-sequence req=readbf
-----
-[2] sequence readbf: sequencing request
-[2] sequence readbf: acquiring latches
-[2] sequence readbf: waiting to acquire read latch {b-f}@11.000000000,1, held by write latch [email protected],0
-[2] sequence readbf: blocked on select in spanlatch.(*Manager).waitForSignal
-
-new-request txn=none name=put2 ts=11,0 poison-policy=wait
- put key=c value=bar
-----
-
-sequence req=put2
-----
-[3] sequence put2: sequencing request
-[3] sequence put2: acquiring latches
-[3] sequence put2: waiting to acquire write latch [email protected],0, held by write latch [email protected],0
-[3] sequence put2: blocked on select in spanlatch.(*Manager).waitForSignal
-
-poison req=put1
-----
-[-] poison put1: poisoning request
-[2] sequence readbf: sequencing complete, returned error: encountered poisoned latch [email protected],0
-[3] sequence put2: encountered poisoned latch; continuing to wait
-[3] sequence put2: blocked on select in spanlatch.(*Manager).waitForSignal
-
-finish req=put1
-----
-[-] finish put1: finishing request
-[3] sequence put2: scanning lock table for conflicting locks
-[3] sequence put2: sequencing complete, returned guard
-
-finish req=put2
-----
-[-] finish put2: finishing request
-
-reset
-----
diff --git a/pkg/kv/kvserver/helpers_test.go b/pkg/kv/kvserver/helpers_test.go
index 2d0156d689..a6e9218302 100644
--- a/pkg/kv/kvserver/helpers_test.go
+++ b/pkg/kv/kvserver/helpers_test.go
@@ -227,6 +227,13 @@ func (r *Replica) Breaker() *circuit2.Breaker {
return r.breaker.wrapped
}
+func (r *Replica) VisitBreakerContexts(fn func(ctx context.Context)) {
+ r.breaker.cancels.Visit(func(ctx context.Context, _ func()) (remove bool) {
+ fn(ctx)
+ return false // keep
+ })
+}
+
func (r *Replica) AssertState(ctx context.Context, reader storage.Reader) {
r.raftMu.Lock()
defer r.raftMu.Unlock()
@@ -476,14 +483,12 @@ func (r *Replica) MaybeUnquiesceAndWakeLeader() bool {
return r.maybeUnquiesceAndWakeLeaderLocked()
}
-func (r *Replica) ReadProtectedTimestamps(ctx context.Context) error {
+func (r *Replica) ReadProtectedTimestamps(ctx context.Context) {
var ts cachedProtectedTimestampState
defer r.maybeUpdateCachedProtectedTS(&ts)
r.mu.RLock()
defer r.mu.RUnlock()
- var err error
- ts, err = r.readProtectedTimestampsRLocked(ctx)
- return err
+ ts = r.readProtectedTimestampsRLocked(ctx, nil /* f */)
}
// ClosedTimestampPolicy returns the closed timestamp policy of the range, which
@@ -494,11 +499,6 @@ func (r *Replica) ClosedTimestampPolicy() roachpb.RangeClosedTimestampPolicy {
return r.closedTimestampPolicyRLocked()
}
-// TripBreaker synchronously trips the breaker.
-func (r *Replica) TripBreaker() {
- r.breaker.tripSync(errors.New("injected error"))
-}
-
// GetCircuitBreaker returns the circuit breaker controlling
// connection attempts to the specified node.
func (t *RaftTransport) GetCircuitBreaker(
diff --git a/pkg/kv/kvserver/kvserverbase/bulk_adder.go b/pkg/kv/kvserver/kvserverbase/bulk_adder.go
index c1a1c4470d..fc1b1f2798 100644
--- a/pkg/kv/kvserver/kvserverbase/bulk_adder.go
+++ b/pkg/kv/kvserver/kvserverbase/bulk_adder.go
@@ -25,6 +25,16 @@ type BulkAdderOptions struct {
// behalf of which it is adding data.
Name string
+ // SSTSize is the size at which an SST will be flushed and a new one started.
+ // SSTs are also split during a buffer flush to avoid spanning range bounds so
+ // they may be smaller than this limit.
+ SSTSize func() int64
+
+ // SplitAndScatterAfter is the number of bytes which if added without hitting
+ // an existing split will cause the adder to split and scatter the next span.
+ // A function returning -1 is interpreted as indicating not to split.
+ SplitAndScatterAfter func() int64
+
// MinBufferSize is the initial size of the BulkAdder buffer. It indicates the
// amount of memory we require to be able to buffer data before flushing for
// SST creation.
@@ -74,6 +84,10 @@ type BulkAdderOptions struct {
InitialSplitsIfUnordered int
}
+// DisableExplicitSplits can be returned by a SplitAndScatterAfter function to
+// indicate that the SSTBatcher should not issue explicit splits.
+const DisableExplicitSplits = -1
+
// BulkAdderFactory describes a factory function for BulkAdders.
type BulkAdderFactory func(
ctx context.Context, db *kv.DB, timestamp hlc.Timestamp, opts BulkAdderOptions,
diff --git a/pkg/kv/kvserver/loqrecovery/loqrecoverypb/recovery.proto b/pkg/kv/kvserver/loqrecovery/loqrecoverypb/recovery.proto
index 0ec5262077..fe0427288a 100644
--- a/pkg/kv/kvserver/loqrecovery/loqrecoverypb/recovery.proto
+++ b/pkg/kv/kvserver/loqrecovery/loqrecoverypb/recovery.proto
@@ -47,7 +47,7 @@ message ReplicaInfo {
uint64 raft_applied_index = 4;
uint64 raft_committed_index = 5;
repeated DescriptorChangeInfo raft_log_descriptor_changes = 6 [(gogoproto.nullable) = false,
- (gogoproto.jsontag) = "raft_log_descriptor_changes,omitempty"];
+ (gogoproto.jsontag) = ",omitempty"];
}
// Collection of replica information gathered from a collect-info run on a single node.
diff --git a/pkg/kv/kvserver/main_test.go b/pkg/kv/kvserver/main_test.go
index 81e3d2bd7a..cb2ceefbb1 100644
--- a/pkg/kv/kvserver/main_test.go
+++ b/pkg/kv/kvserver/main_test.go
@@ -87,8 +87,7 @@ func TestMain(m *testing.M) {
delete(notBelowRaftProtos, reflect.TypeOf(&roachpb.InternalTimeSeriesData{}))
delete(notBelowRaftProtos, reflect.TypeOf(&enginepb.MVCCMetadataSubsetForMergeSerialization{}))
for typ := range notBelowRaftProtos {
- // NB: don't set failed=true. In a bazel world, we may just end up sharding in a way that
- // doesn't observe some of the protos below raft.
+ failed = true
fmt.Printf("%s: not observed below raft!\n", typ)
}
diff --git a/pkg/kv/kvserver/mvcc_gc_queue.go b/pkg/kv/kvserver/mvcc_gc_queue.go
index d218a1389a..de016745c2 100644
--- a/pkg/kv/kvserver/mvcc_gc_queue.go
+++ b/pkg/kv/kvserver/mvcc_gc_queue.go
@@ -180,11 +180,7 @@ func (mgcq *mvccGCQueue) shouldQueue(
// Consult the protected timestamp state to determine whether we can GC and
// the timestamp which can be used to calculate the score.
_, conf := repl.DescAndSpanConfig()
- canGC, _, gcTimestamp, oldThreshold, newThreshold, err := repl.checkProtectedTimestampsForGC(ctx, conf.TTL())
- if err != nil {
- log.VErrEventf(ctx, 2, "failed to check protected timestamp for gc: %v", err)
- return false, 0
- }
+ canGC, _, gcTimestamp, oldThreshold, newThreshold := repl.checkProtectedTimestampsForGC(ctx, conf.TTL())
if !canGC {
return false, 0
}
@@ -529,10 +525,7 @@ func (mgcq *mvccGCQueue) process(
// Consult the protected timestamp state to determine whether we can GC and
// the timestamp which can be used to calculate the score and updated GC
// threshold.
- canGC, cacheTimestamp, gcTimestamp, oldThreshold, newThreshold, err := repl.checkProtectedTimestampsForGC(ctx, conf.TTL())
- if err != nil {
- return false, err
- }
+ canGC, cacheTimestamp, gcTimestamp, oldThreshold, newThreshold := repl.checkProtectedTimestampsForGC(ctx, conf.TTL())
if !canGC {
return false, nil
}
diff --git a/pkg/kv/kvserver/node_liveness_test.go b/pkg/kv/kvserver/node_liveness_test.go
index f4c04407b3..8e96674258 100644
--- a/pkg/kv/kvserver/node_liveness_test.go
+++ b/pkg/kv/kvserver/node_liveness_test.go
@@ -1020,7 +1020,7 @@ func TestNodeLivenessRetryAmbiguousResultError(t *testing.T) {
if val := injectError.Load(); val != nil && val.(bool) {
atomic.AddInt32(&injectedErrorCount, 1)
injectError.Store(false)
- return roachpb.NewError(roachpb.NewAmbiguousResultErrorf("test"))
+ return roachpb.NewError(roachpb.NewAmbiguousResultError("test"))
}
return nil
}
@@ -1063,7 +1063,7 @@ func TestNodeLivenessRetryAmbiguousResultOnCreateError(t *testing.T) {
defer log.Scope(t).Close(t)
errorsToTest := []error{
- roachpb.NewAmbiguousResultErrorf("test"),
+ roachpb.NewAmbiguousResultError("test"),
roachpb.NewTransactionStatusError(roachpb.TransactionStatusError_REASON_UNKNOWN, "foo"),
kv.OnePCNotAllowedError{},
}
@@ -1207,7 +1207,8 @@ func TestNodeLivenessNoRetryOnAmbiguousResultCausedByCancellation(t *testing.T)
// Check that Heartbeat() returned an ambiguous error, and take that as proof
// that the heartbeat wasn't retried.
- require.True(t, errors.HasType(err, (*roachpb.AmbiguousResultError)(nil)), "%+v", err)
+ require.Error(t, err)
+ require.Equal(t, "result is ambiguous (context done during DistSender.Send: context canceled)", err.Error())
}
func verifyNodeIsDecommissioning(t *testing.T, tc *testcluster.TestCluster, nodeID roachpb.NodeID) {
diff --git a/pkg/kv/kvserver/protectedts/BUILD.bazel b/pkg/kv/kvserver/protectedts/BUILD.bazel
index 85d31485cc..e0b055fb23 100644
--- a/pkg/kv/kvserver/protectedts/BUILD.bazel
+++ b/pkg/kv/kvserver/protectedts/BUILD.bazel
@@ -15,7 +15,6 @@ go_library(
"//pkg/kv/kvserver/protectedts/ptpb",
"//pkg/roachpb",
"//pkg/settings",
- "//pkg/spanconfig",
"//pkg/util/hlc",
"//pkg/util/metric",
"//pkg/util/stop",
diff --git a/pkg/kv/kvserver/protectedts/protectedts.go b/pkg/kv/kvserver/protectedts/protectedts.go
index 2fedb889f3..10349f7e9a 100644
--- a/pkg/kv/kvserver/protectedts/protectedts.go
+++ b/pkg/kv/kvserver/protectedts/protectedts.go
@@ -18,7 +18,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/kv"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/protectedts/ptpb"
"github.com/cockroachdb/cockroach/pkg/roachpb"
- "github.com/cockroachdb/cockroach/pkg/spanconfig"
"github.com/cockroachdb/cockroach/pkg/util/hlc"
"github.com/cockroachdb/cockroach/pkg/util/metric"
"github.com/cockroachdb/cockroach/pkg/util/stop"
@@ -39,6 +38,7 @@ var ErrExists = errors.New("protected timestamp record already exists")
type Provider interface {
Storage
Cache
+ Verifier
Reconciler
Start(context.Context, *stop.Stopper) error
@@ -115,7 +115,6 @@ type Iterator func(*ptpb.Record) (wantMore bool)
// by any Records at a given asOf can move its GC threshold up to that
// timestamp less its GC TTL.
type Cache interface {
- spanconfig.ProtectedTSReader
// Iterate examines the records with spans which overlap with [from, to).
// Nil values for from or to are equivalent to Key{}. The order of records
@@ -133,6 +132,16 @@ type Cache interface {
Refresh(_ context.Context, asOf hlc.Timestamp) error
}
+// Verifier provides a mechanism to verify that a created Record will certainly
+// apply.
+type Verifier interface {
+
+ // Verify returns an error if the record of the provided ID cannot be
+ // verified. If nil is returned then the record has been proven to apply
+ // until it is removed.
+ Verify(context.Context, uuid.UUID) error
+}
+
// Reconciler provides a mechanism to reconcile protected timestamp records with
// external state.
type Reconciler interface {
@@ -169,9 +178,3 @@ func (c *emptyCache) QueryRecord(
func (c *emptyCache) Refresh(_ context.Context, asOf hlc.Timestamp) error {
return nil
}
-
-func (c *emptyCache) GetProtectionTimestamps(
- context.Context, roachpb.Span,
-) (protectionTimestamps []hlc.Timestamp, asOf hlc.Timestamp, err error) {
- return protectionTimestamps, (*hlc.Clock)(c).Now(), nil
-}
diff --git a/pkg/kv/kvserver/protectedts/protectedts_test.go b/pkg/kv/kvserver/protectedts/protectedts_test.go
index 8fd0ff5149..8b3b6f8752 100644
--- a/pkg/kv/kvserver/protectedts/protectedts_test.go
+++ b/pkg/kv/kvserver/protectedts/protectedts_test.go
@@ -17,6 +17,7 @@ func TestProtectedTimestamps(t *testing.T) {
var (
_ Provider
_ Cache
+ _ Verifier
_ Storage
_ = EmptyCache(nil)
_ = ErrNotExists
diff --git a/pkg/kv/kvserver/protectedts/ptcache/cache.go b/pkg/kv/kvserver/protectedts/ptcache/cache.go
index 6d277ffb65..b379cdf498 100644
--- a/pkg/kv/kvserver/protectedts/ptcache/cache.go
+++ b/pkg/kv/kvserver/protectedts/ptcache/cache.go
@@ -126,21 +126,6 @@ func (c *Cache) Refresh(ctx context.Context, asOf hlc.Timestamp) error {
return nil
}
-// GetProtectionTimestamps is part of the spanconfig.ProtectedTSReader
-// interface.
-func (c *Cache) GetProtectionTimestamps(
- ctx context.Context, sp roachpb.Span,
-) (protectionTimestamps []hlc.Timestamp, asOf hlc.Timestamp, err error) {
- readAt := c.Iterate(ctx,
- sp.Key,
- sp.EndKey,
- func(rec *ptpb.Record) (wantMore bool) {
- protectionTimestamps = append(protectionTimestamps, rec.Timestamp)
- return true
- })
- return protectionTimestamps, readAt, nil
-}
-
// Start starts the periodic fetching of the Cache. A Cache must not be used
// until after it has been started. An error will be returned if it has
// already been started.
diff --git a/pkg/kv/kvserver/protectedts/ptcache/cache_test.go b/pkg/kv/kvserver/protectedts/ptcache/cache_test.go
index 37a5dd43ad..cd251dd372 100644
--- a/pkg/kv/kvserver/protectedts/ptcache/cache_test.go
+++ b/pkg/kv/kvserver/protectedts/ptcache/cache_test.go
@@ -68,7 +68,7 @@ func TestCacheBasic(t *testing.T) {
// Then we'll add a record and make sure it gets seen.
sp := tableSpan(42)
- r, createdAt := protect(t, tc.Server(0), p, s.Clock().Now(), sp)
+ r, createdAt := protect(t, tc.Server(0), p, sp)
testutils.SucceedsSoon(t, func() error {
var coveredBy []*ptpb.Record
seenTS := c.Iterate(ctx, sp.Key, sp.EndKey,
@@ -140,7 +140,7 @@ func TestRefresh(t *testing.T) {
st.verifyCounters(t, 1, 0) // just need to scan meta
})
t.Run("needs refresh, with change", func(t *testing.T) {
- _, createdAt := protect(t, s, p, s.Clock().Now(), metaTableSpan)
+ _, createdAt := protect(t, s, p, metaTableSpan)
st.resetCounters()
require.NoError(t, c.Refresh(ctx, createdAt))
st.verifyCounters(t, 2, 1) // need to scan meta and then scan everything
@@ -177,7 +177,7 @@ func TestRefresh(t *testing.T) {
require.Regexp(t, "boom", c.Refresh(ctx, s.Clock().Now()).Error())
})
t.Run("error propagates while fetching records", func(t *testing.T) {
- protect(t, s, p, s.Clock().Now(), metaTableSpan)
+ protect(t, s, p, metaTableSpan)
st.setFilter(func(ba roachpb.BatchRequest) *roachpb.Error {
if scanReq, ok := ba.GetArg(roachpb.Scan); ok {
scan := scanReq.(*roachpb.ScanRequest)
@@ -192,7 +192,7 @@ func TestRefresh(t *testing.T) {
})
t.Run("Iterate does not hold mutex", func(t *testing.T) {
inIterate := make(chan chan struct{})
- rec, createdAt := protect(t, s, p, s.Clock().Now(), metaTableSpan)
+ rec, createdAt := protect(t, s, p, metaTableSpan)
require.NoError(t, c.Refresh(ctx, createdAt))
go c.Iterate(ctx, keys.MinKey, keys.MaxKey, func(r *ptpb.Record) (wantMore bool) {
if r.ID.GetUUID() != rec.ID.GetUUID() {
@@ -271,8 +271,8 @@ func TestQueryRecord(t *testing.T) {
waitForAsOfAfter(t, c, hlc.Timestamp{})
// Create two records.
sp42 := tableSpan(42)
- r1, createdAt1 := protect(t, s, p, s.Clock().Now(), sp42)
- r2, createdAt2 := protect(t, s, p, s.Clock().Now(), sp42)
+ r1, createdAt1 := protect(t, s, p, sp42)
+ r2, createdAt2 := protect(t, s, p, sp42)
// Ensure they both don't exist and that the read timestamps precede the
// create timestamps.
exists1, asOf := c.QueryRecord(ctx, r1.ID.GetUUID())
@@ -291,7 +291,7 @@ func TestQueryRecord(t *testing.T) {
require.True(t, !asOf.Less(createdAt2))
// Release 2 and then create 3.
require.NoError(t, p.Release(ctx, nil /* txn */, r2.ID.GetUUID()))
- r3, createdAt3 := protect(t, s, p, s.Clock().Now(), sp42)
+ r3, createdAt3 := protect(t, s, p, sp42)
exists2, asOf = c.QueryRecord(ctx, r2.ID.GetUUID())
require.True(t, exists2)
require.True(t, asOf.Less(createdAt3))
@@ -329,10 +329,10 @@ func TestIterate(t *testing.T) {
sp42 := tableSpan(42)
sp43 := tableSpan(43)
sp44 := tableSpan(44)
- r1, _ := protect(t, s, p, s.Clock().Now(), sp42)
- r2, _ := protect(t, s, p, s.Clock().Now(), sp43)
- r3, _ := protect(t, s, p, s.Clock().Now(), sp44)
- r4, _ := protect(t, s, p, s.Clock().Now(), sp42, sp43)
+ r1, _ := protect(t, s, p, sp42)
+ r2, _ := protect(t, s, p, sp43)
+ r3, _ := protect(t, s, p, sp44)
+ r4, _ := protect(t, s, p, sp42, sp43)
require.NoError(t, c.Refresh(ctx, s.Clock().Now()))
t.Run("all", func(t *testing.T) {
var recs records
@@ -373,102 +373,6 @@ func (recs *records) sorted() []*ptpb.Record {
return *recs
}
-func TestGetProtectionTimestamps(t *testing.T) {
- ctx := context.Background()
- tc := testcluster.StartTestCluster(t, 1, base.TestClusterArgs{})
- defer tc.Stopper().Stop(ctx)
- // Set the poll interval to be very long.
- s := tc.Server(0)
- protectedts.PollInterval.Override(ctx, &s.ClusterSettings().SV, 500*time.Hour)
-
- ts := func(nanos int) hlc.Timestamp {
- return hlc.Timestamp{
- WallTime: int64(nanos),
- }
- }
- sp42 := tableSpan(42)
- sp43 := tableSpan(43)
- sp44 := tableSpan(44)
- sp4243 := roachpb.Span{Key: sp42.Key, EndKey: sp43.EndKey}
-
- for _, testCase := range []struct {
- name string
- test func(t *testing.T, p protectedts.Storage, c *ptcache.Cache, cleanup func(...*ptpb.Record))
- }{
- {
- name: "multiple records apply to a single span",
- test: func(t *testing.T, p protectedts.Storage, c *ptcache.Cache, cleanup func(...*ptpb.Record)) {
- r1, _ := protect(t, s, p, ts(10), sp42)
- r2, _ := protect(t, s, p, ts(11), sp42)
- r3, _ := protect(t, s, p, ts(6), sp42)
- require.NoError(t, c.Refresh(ctx, s.Clock().Now()))
-
- protectionTimestamps, _, err := c.GetProtectionTimestamps(ctx, sp42)
- require.NoError(t, err)
- sort.Slice(protectionTimestamps, func(i, j int) bool {
- return protectionTimestamps[i].Less(protectionTimestamps[j])
- })
- require.Equal(t, []hlc.Timestamp{ts(6), ts(10), ts(11)}, protectionTimestamps)
- cleanup(r1, r2, r3)
- },
- },
- {
- name: "no records apply",
- test: func(t *testing.T, p protectedts.Storage, c *ptcache.Cache, cleanup func(...*ptpb.Record)) {
- r1, _ := protect(t, s, p, ts(5), sp43)
- r2, _ := protect(t, s, p, ts(10), sp44)
- require.NoError(t, c.Refresh(ctx, s.Clock().Now()))
- protectionTimestamps, _, err := c.GetProtectionTimestamps(ctx, sp42)
- require.NoError(t, err)
- require.Equal(t, []hlc.Timestamp(nil), protectionTimestamps)
- cleanup(r1, r2)
- },
- },
- {
- name: "multiple overlapping spans multiple records",
- test: func(t *testing.T, p protectedts.Storage, c *ptcache.Cache, cleanup func(...*ptpb.Record)) {
- r1, _ := protect(t, s, p, ts(10), sp42)
- r2, _ := protect(t, s, p, ts(15), sp42)
- r3, _ := protect(t, s, p, ts(5), sp43)
- r4, _ := protect(t, s, p, ts(6), sp43)
- r5, _ := protect(t, s, p, ts(25), keys.EverythingSpan)
- // Also add a record that doesn't overlap with the requested span and
- // ensure it isn't retrieved below.
- r6, _ := protect(t, s, p, ts(20), sp44)
- require.NoError(t, c.Refresh(ctx, s.Clock().Now()))
-
- protectionTimestamps, _, err := c.GetProtectionTimestamps(ctx, sp4243)
- require.NoError(t, err)
- sort.Slice(protectionTimestamps, func(i, j int) bool {
- return protectionTimestamps[i].Less(protectionTimestamps[j])
- })
- require.Equal(
- t, []hlc.Timestamp{ts(5), ts(6), ts(10), ts(15), ts(25)}, protectionTimestamps,
- )
- cleanup(r1, r2, r3, r4, r5, r6)
- },
- },
- } {
- t.Run(testCase.name, func(t *testing.T) {
- p := ptstorage.WithDatabase(ptstorage.New(s.ClusterSettings(),
- s.InternalExecutor().(sqlutil.InternalExecutor), nil /* knobs */), s.DB())
-
- c := ptcache.New(ptcache.Config{
- Settings: s.ClusterSettings(),
- DB: s.DB(),
- Storage: p,
- })
- require.NoError(t, c.Start(ctx, tc.Stopper()))
-
- testCase.test(t, p, c, func(records ...*ptpb.Record) {
- for _, r := range records {
- require.NoError(t, p.Release(ctx, nil, r.ID.GetUUID()))
- }
- })
- })
- }
-}
-
func TestSettingChangedLeadsToFetch(t *testing.T) {
ctx := context.Background()
tc := testcluster.StartTestCluster(t, 1, base.TestClusterArgs{})
@@ -521,12 +425,9 @@ func tableSpan(tableID uint32) roachpb.Span {
}
func protect(
- t *testing.T,
- s serverutils.TestServerInterface,
- p protectedts.Storage,
- protectTS hlc.Timestamp,
- spans ...roachpb.Span,
+ t *testing.T, s serverutils.TestServerInterface, p protectedts.Storage, spans ...roachpb.Span,
) (r *ptpb.Record, createdAt hlc.Timestamp) {
+ protectTS := s.Clock().Now()
r = &ptpb.Record{
ID: uuid.MakeV4().GetBytes(),
Timestamp: protectTS,
diff --git a/pkg/kv/kvserver/protectedts/ptprovider/BUILD.bazel b/pkg/kv/kvserver/protectedts/ptprovider/BUILD.bazel
index cd021496f2..e3620f19cb 100644
--- a/pkg/kv/kvserver/protectedts/ptprovider/BUILD.bazel
+++ b/pkg/kv/kvserver/protectedts/ptprovider/BUILD.bazel
@@ -12,6 +12,7 @@ go_library(
"//pkg/kv/kvserver/protectedts/ptcache",
"//pkg/kv/kvserver/protectedts/ptreconcile",
"//pkg/kv/kvserver/protectedts/ptstorage",
+ "//pkg/kv/kvserver/protectedts/ptverifier",
"//pkg/settings/cluster",
"//pkg/sql/sqlutil",
"//pkg/util/metric",
diff --git a/pkg/kv/kvserver/protectedts/ptprovider/provider.go b/pkg/kv/kvserver/protectedts/ptprovider/provider.go
index 377039e80c..c537151bf2 100644
--- a/pkg/kv/kvserver/protectedts/ptprovider/provider.go
+++ b/pkg/kv/kvserver/protectedts/ptprovider/provider.go
@@ -21,6 +21,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/protectedts/ptcache"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/protectedts/ptreconcile"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/protectedts/ptstorage"
+ "github.com/cockroachdb/cockroach/pkg/kv/kvserver/protectedts/ptverifier"
"github.com/cockroachdb/cockroach/pkg/settings/cluster"
"github.com/cockroachdb/cockroach/pkg/sql/sqlutil"
"github.com/cockroachdb/cockroach/pkg/util/metric"
@@ -38,9 +39,9 @@ type Config struct {
Knobs *protectedts.TestingKnobs
}
-// Provider is the concrete implementation of protectedts.Provider interface.
-type Provider struct {
+type provider struct {
protectedts.Storage
+ protectedts.Verifier
protectedts.Cache
protectedts.Reconciler
metric.Struct
@@ -52,16 +53,16 @@ func New(cfg Config) (protectedts.Provider, error) {
return nil, err
}
storage := ptstorage.New(cfg.Settings, cfg.InternalExecutor, cfg.Knobs)
+ verifier := ptverifier.New(cfg.DB, storage)
reconciler := ptreconcile.New(cfg.Settings, cfg.DB, storage, cfg.ReconcileStatusFuncs)
- cache := ptcache.New(ptcache.Config{
- DB: cfg.DB,
- Storage: storage,
- Settings: cfg.Settings,
- })
-
- return &Provider{
- Storage: storage,
- Cache: cache,
+ return &provider{
+ Storage: storage,
+ Cache: ptcache.New(ptcache.Config{
+ DB: cfg.DB,
+ Storage: storage,
+ Settings: cfg.Settings,
+ }),
+ Verifier: verifier,
Reconciler: reconciler,
Struct: reconciler.Metrics(),
}, nil
@@ -80,15 +81,13 @@ func validateConfig(cfg Config) error {
}
}
-// Start implements the protectedts.Provider interface.
-func (p *Provider) Start(ctx context.Context, stopper *stop.Stopper) error {
+func (p *provider) Start(ctx context.Context, stopper *stop.Stopper) error {
if cache, ok := p.Cache.(*ptcache.Cache); ok {
return cache.Start(ctx, stopper)
}
return nil
}
-// Metrics implements the protectedts.Provider interface.
-func (p *Provider) Metrics() metric.Struct {
+func (p *provider) Metrics() metric.Struct {
return p.Struct
}
diff --git a/pkg/kv/kvserver/protectedts/ptstorage/BUILD.bazel b/pkg/kv/kvserver/protectedts/ptstorage/BUILD.bazel
index 1879f9fa65..f94955048a 100644
--- a/pkg/kv/kvserver/protectedts/ptstorage/BUILD.bazel
+++ b/pkg/kv/kvserver/protectedts/ptstorage/BUILD.bazel
@@ -39,7 +39,6 @@ go_test(
"validate_test.go",
],
embed = [":ptstorage"],
- shard_count = 16,
deps = [
"//pkg/base",
"//pkg/keys",
diff --git a/pkg/kv/kvserver/protectedts/ptverifier/BUILD.bazel b/pkg/kv/kvserver/protectedts/ptverifier/BUILD.bazel
new file mode 100644
index 0000000000..969c0cba66
--- /dev/null
+++ b/pkg/kv/kvserver/protectedts/ptverifier/BUILD.bazel
@@ -0,0 +1,48 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
+
+go_library(
+ name = "ptverifier",
+ srcs = ["verifier.go"],
+ importpath = "github.com/cockroachdb/cockroach/pkg/kv/kvserver/protectedts/ptverifier",
+ visibility = ["//visibility:public"],
+ deps = [
+ "//pkg/kv",
+ "//pkg/kv/kvserver/protectedts",
+ "//pkg/kv/kvserver/protectedts/ptpb",
+ "//pkg/roachpb",
+ "//pkg/util/hlc",
+ "//pkg/util/uuid",
+ "@com_github_cockroachdb_errors//:errors",
+ ],
+)
+
+go_test(
+ name = "ptverifier_test",
+ size = "small",
+ srcs = [
+ "main_test.go",
+ "verifier_test.go",
+ ],
+ deps = [
+ ":ptverifier",
+ "//pkg/base",
+ "//pkg/keys",
+ "//pkg/kv",
+ "//pkg/kv/kvclient/kvcoord",
+ "//pkg/kv/kvserver/protectedts",
+ "//pkg/kv/kvserver/protectedts/ptpb",
+ "//pkg/kv/kvserver/protectedts/ptstorage",
+ "//pkg/roachpb",
+ "//pkg/security",
+ "//pkg/security/securitytest",
+ "//pkg/server",
+ "//pkg/sql/sqlutil",
+ "//pkg/testutils/serverutils",
+ "//pkg/testutils/testcluster",
+ "//pkg/util/leaktest",
+ "//pkg/util/randutil",
+ "//pkg/util/uuid",
+ "@com_github_cockroachdb_errors//:errors",
+ "@com_github_stretchr_testify//require",
+ ],
+)
diff --git a/pkg/sql/scheduledlogging/main_test.go b/pkg/kv/kvserver/protectedts/ptverifier/main_test.go
similarity index 93%
rename from pkg/sql/scheduledlogging/main_test.go
rename to pkg/kv/kvserver/protectedts/ptverifier/main_test.go
index 6f02005f4e..8fff74647b 100644
--- a/pkg/sql/scheduledlogging/main_test.go
+++ b/pkg/kv/kvserver/protectedts/ptverifier/main_test.go
@@ -1,4 +1,4 @@
-// Copyright 2022 The Cockroach Authors.
+// Copyright 2019 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
@@ -8,7 +8,7 @@
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.
-package scheduledlogging_test
+package ptverifier_test
import (
"os"
diff --git a/pkg/kv/kvserver/protectedts/ptverifier/verifier.go b/pkg/kv/kvserver/protectedts/ptverifier/verifier.go
new file mode 100644
index 0000000000..c3f46349c8
--- /dev/null
+++ b/pkg/kv/kvserver/protectedts/ptverifier/verifier.go
@@ -0,0 +1,147 @@
+// Copyright 2019 The Cockroach Authors.
+//
+// Use of this software is governed by the Business Source License
+// included in the file licenses/BSL.txt.
+//
+// As of the Change Date specified in that file, in accordance with
+// the Business Source License, use of this software will be governed
+// by the Apache License, Version 2.0, included in the file
+// licenses/APL.txt.
+
+package ptverifier
+
+import (
+ "bytes"
+ "context"
+ "fmt"
+
+ "github.com/cockroachdb/cockroach/pkg/kv"
+ "github.com/cockroachdb/cockroach/pkg/kv/kvserver/protectedts"
+ "github.com/cockroachdb/cockroach/pkg/kv/kvserver/protectedts/ptpb"
+ "github.com/cockroachdb/cockroach/pkg/roachpb"
+ "github.com/cockroachdb/cockroach/pkg/util/hlc"
+ "github.com/cockroachdb/cockroach/pkg/util/uuid"
+ "github.com/cockroachdb/errors"
+)
+
+// verifier implements protectedts.Verifier.
+type verifier struct {
+ db *kv.DB
+ s protectedts.Storage
+}
+
+// New returns a new Verifier.
+func New(db *kv.DB, s protectedts.Storage) protectedts.Verifier {
+ return &verifier{db: db, s: s}
+}
+
+// Verify verifies that a record with the provided id is verified.
+// If it is not verified this call will perform verification and mark the
+// record as verified.
+func (v *verifier) Verify(ctx context.Context, id uuid.UUID) error {
+ // First we go read the record and note the timestamp at which we read it.
+ r, ts, err := getRecordWithTimestamp(ctx, v.s, v.db, id)
+ if err != nil {
+ return errors.Wrapf(err, "failed to fetch record %s", id)
+ }
+
+ // TODO(adityamaru): Remove this once we delete all `Verify` calls. The new
+ // subsystem is not going to provide verification semantics. Until then mark
+ // the record as verified so it is a noop.
+ if r.DeprecatedSpans == nil {
+ return nil
+ }
+
+ if r.Verified { // already verified
+ return nil
+ }
+
+ b := makeVerificationBatch(r, ts)
+ if err := v.db.Run(ctx, &b); err != nil {
+ return err
+ }
+
+ // Check the responses and synthesize an error if one occurred.
+ if err := parseResponse(&b, r); err != nil {
+ return err
+ }
+ // Mark the record as verified.
+ return errors.Wrapf(v.db.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error {
+ return v.s.MarkVerified(ctx, txn, id)
+ }), "failed to mark %v as verified", id)
+}
+
+// getRecordWithTimestamp fetches the record with the provided id and returns
+// the hlc timestamp at which that read occurred.
+func getRecordWithTimestamp(
+ ctx context.Context, s protectedts.Storage, db *kv.DB, id uuid.UUID,
+) (r *ptpb.Record, readAt hlc.Timestamp, err error) {
+ if err = db.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error {
+ r, err = s.GetRecord(ctx, txn, id)
+ readAt = txn.ReadTimestamp()
+ return err
+ }); err != nil {
+ return nil, hlc.Timestamp{}, err
+ }
+ return r, readAt, nil
+}
+
+func makeVerificationBatch(r *ptpb.Record, aliveAt hlc.Timestamp) kv.Batch {
+ // Need to perform validation, build a batch and run it.
+ mergedSpans, _ := roachpb.MergeSpans(&r.DeprecatedSpans)
+ var b kv.Batch
+ for _, s := range mergedSpans {
+ var req roachpb.AdminVerifyProtectedTimestampRequest
+ req.RecordAliveAt = aliveAt
+ req.Protected = r.Timestamp
+ req.RecordID = r.ID.GetUUID()
+ req.Key = s.Key
+ req.EndKey = s.EndKey
+ b.AddRawRequest(&req)
+ }
+ return b
+}
+
+func parseResponse(b *kv.Batch, r *ptpb.Record) error {
+ rawResponse := b.RawResponse()
+ var errBuilder bytes.Buffer
+ for _, resp := range rawResponse.Responses {
+ resp := resp.GetInner().(*roachpb.AdminVerifyProtectedTimestampResponse)
+ if len(resp.DeprecatedFailedRanges) == 0 && len(resp.VerificationFailedRanges) == 0 {
+ continue
+ }
+
+ // Write the error header the first time we encounter failed ranges.
+ if errBuilder.Len() == 0 {
+ _, _ = errBuilder.WriteString(fmt.Sprintf("failed to verify protection record %s with ts: %s:\n",
+ r.ID.String(), r.Timestamp.String()))
+ }
+
+ useDeprecated := len(resp.VerificationFailedRanges) == 0
+ for _, failedRange := range resp.VerificationFailedRanges {
+ if failedRange.Reason != "" {
+ // Write the per range reason for failure.
+ _, _ = errBuilder.WriteString(fmt.Sprintf("range ID: %d, range span: %s - %s: %s\n",
+ failedRange.RangeID, failedRange.StartKey.String(), failedRange.EndKey.String(),
+ failedRange.Reason))
+ } else {
+ // If no reason was saved, dump relevant information.
+ _, _ = errBuilder.WriteString(fmt.Sprintf("range ID: %d, range span: %s - %s\n",
+ failedRange.RangeID, failedRange.StartKey.String(), failedRange.EndKey.String()))
+ }
+ }
+
+ if !useDeprecated {
+ continue
+ }
+
+ for _, rangeDesc := range resp.DeprecatedFailedRanges {
+ _, _ = errBuilder.WriteString(fmt.Sprintf("range ID: %d, range span: %s - %s\n",
+ rangeDesc.RangeID, rangeDesc.StartKey.String(), rangeDesc.EndKey.String()))
+ }
+ }
+ if errBuilder.Len() > 0 {
+ return errors.Newf("protected ts verification error: %s", errBuilder.String())
+ }
+ return nil
+}
diff --git a/pkg/kv/kvserver/protectedts/ptverifier/verifier_test.go b/pkg/kv/kvserver/protectedts/ptverifier/verifier_test.go
new file mode 100644
index 0000000000..de87dfbd53
--- /dev/null
+++ b/pkg/kv/kvserver/protectedts/ptverifier/verifier_test.go
@@ -0,0 +1,259 @@
+// Copyright 2019 The Cockroach Authors.
+//
+// Use of this software is governed by the Business Source License
+// included in the file licenses/BSL.txt.
+//
+// As of the Change Date specified in that file, in accordance with
+// the Business Source License, use of this software will be governed
+// by the Apache License, Version 2.0, included in the file
+// licenses/APL.txt.
+
+package ptverifier_test
+
+import (
+ "context"
+ "sync/atomic"
+ "testing"
+ "time"
+
+ "github.com/cockroachdb/cockroach/pkg/base"
+ "github.com/cockroachdb/cockroach/pkg/keys"
+ "github.com/cockroachdb/cockroach/pkg/kv"
+ "github.com/cockroachdb/cockroach/pkg/kv/kvclient/kvcoord"
+ "github.com/cockroachdb/cockroach/pkg/kv/kvserver/protectedts"
+ "github.com/cockroachdb/cockroach/pkg/kv/kvserver/protectedts/ptpb"
+ "github.com/cockroachdb/cockroach/pkg/kv/kvserver/protectedts/ptstorage"
+ "github.com/cockroachdb/cockroach/pkg/kv/kvserver/protectedts/ptverifier"
+ "github.com/cockroachdb/cockroach/pkg/roachpb"
+ "github.com/cockroachdb/cockroach/pkg/sql/sqlutil"
+ "github.com/cockroachdb/cockroach/pkg/testutils/testcluster"
+ "github.com/cockroachdb/cockroach/pkg/util/leaktest"
+ "github.com/cockroachdb/cockroach/pkg/util/uuid"
+ "github.com/cockroachdb/errors"
+ "github.com/stretchr/testify/require"
+)
+
+// TestVerifier tests the business logic of verification by mocking out the
+// actual verification requests but using a real implementation of
+// protectedts.Storage.
+func TestVerifier(t *testing.T) {
+ defer leaktest.AfterTest(t)()
+
+ ctx := context.Background()
+ tc := testcluster.StartTestCluster(t, 1, base.TestClusterArgs{})
+ defer tc.Stopper().Stop(ctx)
+
+ s := tc.Server(0)
+ var senderFunc atomic.Value
+ senderFunc.Store(kv.SenderFunc(nil))
+ ds := s.DistSenderI().(*kvcoord.DistSender)
+ tsf := kvcoord.NewTxnCoordSenderFactory(
+ kvcoord.TxnCoordSenderFactoryConfig{
+ AmbientCtx: s.DB().AmbientContext,
+ HeartbeatInterval: time.Second,
+ Settings: s.ClusterSettings(),
+ Clock: s.Clock(),
+ Stopper: s.Stopper(),
+ },
+ kv.SenderFunc(func(ctx context.Context, ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) {
+ if f := senderFunc.Load().(kv.SenderFunc); f != nil {
+ return f(ctx, ba)
+ }
+ return ds.Send(ctx, ba)
+ }),
+ )
+
+ pts := ptstorage.New(s.ClusterSettings(), s.InternalExecutor().(sqlutil.InternalExecutor), nil /* knobs */)
+ withDB := ptstorage.WithDatabase(pts, s.DB())
+ db := kv.NewDB(s.DB().AmbientContext, tsf, s.Clock(), s.Stopper())
+ ptv := ptverifier.New(db, pts)
+ makeTableSpan := func(tableID uint32) roachpb.Span {
+ k := keys.SystemSQLCodec.TablePrefix(tableID)
+ return roachpb.Span{Key: k, EndKey: k.PrefixEnd()}
+ }
+
+ createRecord := func(t *testing.T, tables ...uint32) *ptpb.Record {
+ spans := make([]roachpb.Span, len(tables))
+ for i, tid := range tables {
+ spans[i] = makeTableSpan(tid)
+ }
+ r := ptpb.Record{
+ ID: uuid.MakeV4().GetBytes(),
+ Timestamp: s.Clock().Now(),
+ Mode: ptpb.PROTECT_AFTER,
+ DeprecatedSpans: spans,
+ }
+ require.Nil(t, s.DB().Txn(ctx, func(ctx context.Context, txn *kv.Txn) error {
+ return pts.Protect(ctx, txn, &r)
+ }))
+ return &r
+ }
+ ensureVerified := func(t *testing.T, id uuid.UUID, verified bool) {
+ got, err := withDB.GetRecord(ctx, nil, id)
+ require.NoError(t, err)
+ require.Equal(t, verified, got.Verified)
+ }
+ release := func(t *testing.T, id uuid.UUID) {
+ require.NoError(t, withDB.Release(ctx, nil, id))
+ }
+ for _, c := range []struct {
+ name string
+ test func(t *testing.T)
+ }{
+ {
+ name: "record doesn't exist",
+ test: func(t *testing.T) {
+ require.Regexp(t, protectedts.ErrNotExists.Error(),
+ ptv.Verify(ctx, uuid.MakeV4()).Error())
+ },
+ },
+ {
+ name: "verification failed with injected error",
+ test: func(t *testing.T) {
+ defer senderFunc.Store(senderFunc.Load())
+ r := createRecord(t, 42)
+ senderFunc.Store(kv.SenderFunc(func(
+ ctx context.Context, ba roachpb.BatchRequest,
+ ) (*roachpb.BatchResponse, *roachpb.Error) {
+ if _, ok := ba.GetArg(roachpb.AdminVerifyProtectedTimestamp); ok {
+ return nil, roachpb.NewError(errors.New("boom"))
+ }
+ return ds.Send(ctx, ba)
+ }))
+ require.Regexp(t, "boom", ptv.Verify(ctx, r.ID.GetUUID()).Error())
+ ensureVerified(t, r.ID.GetUUID(), false)
+ release(t, r.ID.GetUUID())
+ },
+ },
+ {
+ name: "verification failed with injected response",
+ test: func(t *testing.T) {
+ defer senderFunc.Store(senderFunc.Load())
+ r := createRecord(t, 42)
+ senderFunc.Store(kv.SenderFunc(func(
+ ctx context.Context, ba roachpb.BatchRequest,
+ ) (*roachpb.BatchResponse, *roachpb.Error) {
+ if _, ok := ba.GetArg(roachpb.AdminVerifyProtectedTimestamp); ok {
+ var resp roachpb.BatchResponse
+ resp.Add(&roachpb.AdminVerifyProtectedTimestampResponse{
+ VerificationFailedRanges: []roachpb.AdminVerifyProtectedTimestampResponse_FailedRange{{
+ RangeID: 42,
+ StartKey: roachpb.RKey(r.DeprecatedSpans[0].Key),
+ EndKey: roachpb.RKey(r.DeprecatedSpans[0].EndKey),
+ }},
+ })
+ return &resp, nil
+ }
+ return ds.Send(ctx, ba)
+ }))
+ require.Regexp(t, "protected ts verification error: failed to verify protection.*\n"+
+ "range ID: 42, range span: /Table/42 - /Table/43",
+ ptv.Verify(ctx, r.ID.GetUUID()).Error())
+ ensureVerified(t, r.ID.GetUUID(), false)
+ release(t, r.ID.GetUUID())
+ },
+ },
+ {
+ name: "verification failed with injected response over two spans",
+ test: func(t *testing.T) {
+ defer senderFunc.Store(senderFunc.Load())
+ r := createRecord(t, 42, 12)
+ senderFunc.Store(kv.SenderFunc(func(
+ ctx context.Context, ba roachpb.BatchRequest,
+ ) (*roachpb.BatchResponse, *roachpb.Error) {
+ if _, ok := ba.GetArg(roachpb.AdminVerifyProtectedTimestamp); ok {
+ var resp roachpb.BatchResponse
+ resp.Add(&roachpb.AdminVerifyProtectedTimestampResponse{
+ VerificationFailedRanges: []roachpb.AdminVerifyProtectedTimestampResponse_FailedRange{{
+ RangeID: 42,
+ StartKey: roachpb.RKey(r.DeprecatedSpans[0].Key),
+ EndKey: roachpb.RKey(r.DeprecatedSpans[0].EndKey),
+ Reason: "foo",
+ }},
+ })
+ resp.Add(&roachpb.AdminVerifyProtectedTimestampResponse{
+ VerificationFailedRanges: []roachpb.AdminVerifyProtectedTimestampResponse_FailedRange{{
+ RangeID: 12,
+ StartKey: roachpb.RKey(r.DeprecatedSpans[1].Key),
+ EndKey: roachpb.RKey(r.DeprecatedSpans[1].EndKey),
+ Reason: "bar",
+ }},
+ })
+ return &resp, nil
+ }
+ return ds.Send(ctx, ba)
+ }))
+ require.Regexp(t, "protected ts verification error: failed to verify protection.*\n"+
+ "range ID: 42, "+
+ "range span: /Table/42 - /Table/43: foo\nrange ID: 12, "+
+ "range span: /Table/12 - /Table/13: bar",
+ ptv.Verify(ctx, r.ID.GetUUID()).Error())
+ ensureVerified(t, r.ID.GetUUID(), false)
+ release(t, r.ID.GetUUID())
+ },
+ },
+ {
+ // TODO(adityamaru): Remove in 21.2.
+ name: "verification failed with deprecated failed ranges response",
+ test: func(t *testing.T) {
+ defer senderFunc.Store(senderFunc.Load())
+ r := createRecord(t, 42)
+ senderFunc.Store(kv.SenderFunc(func(
+ ctx context.Context, ba roachpb.BatchRequest,
+ ) (*roachpb.BatchResponse, *roachpb.Error) {
+ if _, ok := ba.GetArg(roachpb.AdminVerifyProtectedTimestamp); ok {
+ var resp roachpb.BatchResponse
+ resp.Add(&roachpb.AdminVerifyProtectedTimestampResponse{
+ DeprecatedFailedRanges: []roachpb.RangeDescriptor{{
+ RangeID: 42,
+ StartKey: roachpb.RKey(r.DeprecatedSpans[0].Key),
+ EndKey: roachpb.RKey(r.DeprecatedSpans[0].EndKey),
+ }},
+ })
+ return &resp, nil
+ }
+ return ds.Send(ctx, ba)
+ }))
+ require.Regexp(t, "protected ts verification error: failed to verify protection."+
+ "*\nrange ID: 42, range span: /Table/42 - /Table/43",
+ ptv.Verify(ctx, r.ID.GetUUID()).Error())
+ ensureVerified(t, r.ID.GetUUID(), false)
+ release(t, r.ID.GetUUID())
+ },
+ },
+ {
+ name: "verification succeeded",
+ test: func(t *testing.T) {
+ defer senderFunc.Store(senderFunc.Load())
+ r := createRecord(t, 42)
+ senderFunc.Store(kv.SenderFunc(func(
+ ctx context.Context, ba roachpb.BatchRequest,
+ ) (*roachpb.BatchResponse, *roachpb.Error) {
+ if _, ok := ba.GetArg(roachpb.AdminVerifyProtectedTimestamp); ok {
+ var resp roachpb.BatchResponse
+ resp.Add(&roachpb.AdminVerifyProtectedTimestampResponse{})
+ return &resp, nil
+ }
+ return ds.Send(ctx, ba)
+ }))
+ require.NoError(t, ptv.Verify(ctx, r.ID.GetUUID()))
+ ensureVerified(t, r.ID.GetUUID(), true)
+ // Show that we don't send again once we've already verified.
+ sawVerification := false
+ senderFunc.Store(kv.SenderFunc(func(
+ ctx context.Context, ba roachpb.BatchRequest,
+ ) (*roachpb.BatchResponse, *roachpb.Error) {
+ if _, ok := ba.GetArg(roachpb.AdminVerifyProtectedTimestamp); ok {
+ sawVerification = true
+ }
+ return ds.Send(ctx, ba)
+ }))
+ require.NoError(t, ptv.Verify(ctx, r.ID.GetUUID()))
+ require.False(t, sawVerification)
+ release(t, r.ID.GetUUID())
+ },
+ },
+ } {
+ t.Run(c.name, c.test)
+ }
+}
diff --git a/pkg/kv/kvserver/queue.go b/pkg/kv/kvserver/queue.go
index e9adac48e1..40034f86a9 100644
--- a/pkg/kv/kvserver/queue.go
+++ b/pkg/kv/kvserver/queue.go
@@ -33,7 +33,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
"github.com/cockroachdb/cockroach/pkg/util/tracing"
"github.com/cockroachdb/errors"
- "github.com/cockroachdb/redact"
)
const (
@@ -569,7 +568,7 @@ func (bq *baseQueue) Async(
ctx context.Context, opName string, wait bool, fn func(ctx context.Context, h queueHelper),
) {
if log.V(3) {
- log.InfofDepth(ctx, 2, "%s", redact.Safe(opName))
+ log.InfofDepth(ctx, 2, "%s", log.Safe(opName))
}
opName += " (" + bq.name + ")"
bgCtx := bq.AnnotateCtx(context.Background())
@@ -582,7 +581,7 @@ func (bq *baseQueue) Async(
func(ctx context.Context) {
fn(ctx, baseQueueHelper{bq})
}); err != nil && bq.addLogN.ShouldLog() {
- log.Infof(ctx, "rate limited in %s: %s", redact.Safe(opName), err)
+ log.Infof(ctx, "rate limited in %s: %s", log.Safe(opName), err)
}
}
diff --git a/pkg/kv/kvserver/raft.go b/pkg/kv/kvserver/raft.go
index a60f73ddfb..6d71515ab9 100644
--- a/pkg/kv/kvserver/raft.go
+++ b/pkg/kv/kvserver/raft.go
@@ -19,7 +19,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverbase"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverpb"
"github.com/cockroachdb/cockroach/pkg/util/log"
- "github.com/cockroachdb/redact"
"go.etcd.io/etcd/raft/v3"
"go.etcd.io/etcd/raft/v3/raftpb"
)
@@ -121,29 +120,29 @@ func wrapNumbersAsSafe(v ...interface{}) {
for i := range v {
switch v[i].(type) {
case uint:
- v[i] = redact.Safe(v[i])
+ v[i] = log.Safe(v[i])
case uint8:
- v[i] = redact.Safe(v[i])
+ v[i] = log.Safe(v[i])
case uint16:
- v[i] = redact.Safe(v[i])
+ v[i] = log.Safe(v[i])
case uint32:
- v[i] = redact.Safe(v[i])
+ v[i] = log.Safe(v[i])
case uint64:
- v[i] = redact.Safe(v[i])
+ v[i] = log.Safe(v[i])
case int:
- v[i] = redact.Safe(v[i])
+ v[i] = log.Safe(v[i])
case int8:
- v[i] = redact.Safe(v[i])
+ v[i] = log.Safe(v[i])
case int16:
- v[i] = redact.Safe(v[i])
+ v[i] = log.Safe(v[i])
case int32:
- v[i] = redact.Safe(v[i])
+ v[i] = log.Safe(v[i])
case int64:
- v[i] = redact.Safe(v[i])
+ v[i] = log.Safe(v[i])
case float32:
- v[i] = redact.Safe(v[i])
+ v[i] = log.Safe(v[i])
case float64:
- v[i] = redact.Safe(v[i])
+ v[i] = log.Safe(v[i])
default:
}
}
diff --git a/pkg/kv/kvserver/raft_log_queue.go b/pkg/kv/kvserver/raft_log_queue.go
index 408009389d..9cf07ad296 100644
--- a/pkg/kv/kvserver/raft_log_queue.go
+++ b/pkg/kv/kvserver/raft_log_queue.go
@@ -28,7 +28,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
"github.com/cockroachdb/errors"
- "github.com/cockroachdb/redact"
"go.etcd.io/etcd/raft/v3"
"go.etcd.io/etcd/raft/v3/tracker"
)
@@ -364,17 +363,6 @@ const (
truncatableIndexChosenViaLastIndex = "last index"
)
-// No assumption should be made about the relationship between
-// RaftStatus.Commit, FirstIndex, LastIndex. This is because:
-// - In some cases they are not updated or read atomically.
-// - FirstIndex is a potentially future first index, after the pending
-// truncations have been applied. Currently, pending truncations are being
-// proposed through raft, so one can be sure that these pending truncations
-// do not refer to entries that are not already in the log. However, this
-// situation may change in the future. In general, we should not make an
-// assumption on what is in the local raft log based solely on FirstIndex,
-// and should be based on whether [FirstIndex,LastIndex] is a non-empty
-// interval.
type truncateDecisionInput struct {
RaftStatus raft.Status
LogSize, MaxLogSize int64
@@ -580,24 +568,20 @@ func computeTruncateDecision(input truncateDecisionInput) truncateDecision {
}
// We've inherited the unfortunate semantics for {First,Last}Index from
- // raft.Storage: both {First,Last}Index are inclusive. The way we've
- // initialized repl.FirstIndex is to set it to the first index in the
- // possibly-empty log (TruncatedState.Index + 1), and allowing LastIndex to
- // fall behind it when the log is empty (TruncatedState.Index). The
- // initialization is done when minting a new replica from either the
- // truncated state of incoming snapshot, or using the default initial log
- // index. This makes for the confusing situation where FirstIndex >
- // LastIndex. We can detect this special empty log case by comparing
- // checking if `FirstIndex == LastIndex + 1`. Similar to this, we can have
- // the case that `FirstIndex = CommitIndex + 1` when there are no committed
- // entries. Additionally, FirstIndex adjusts for the pending log
- // truncations, which allows for FirstIndex to be greater than LastIndex and
- // commited index by more than 1 (see the comment with
- // truncateDecisionInput). So all invariant checking below is gated on first
- // ensuring that the log is not empty, i.e., FirstIndex <= LastIndex.
- //
- // If the raft log is not empty, and there are committed entries, we can
- // assert on the following invariants:
+ // raft.Storage. Specifically, both {First,Last}Index are inclusive, so
+ // there's no way to represent an empty log. The way we've initialized
+ // repl.FirstIndex is to set it to the first index in the possibly-empty log
+ // (TruncatedState.Index + 1), and allowing LastIndex to fall behind it when
+ // the log is empty (TruncatedState.Index). The initialization is done when
+ // minting a new replica from either the truncated state of incoming
+ // snapshot, or using the default initial log index. This makes for the
+ // confusing situation where FirstIndex > LastIndex. We can detect this
+ // special empty log case by comparing checking if
+ // `FirstIndex == LastIndex + 1` (`logEmpty` below). Similar to this, we can
+ // have the case that `FirstIndex = CommitIndex + 1` when there are no
+ // committed entries (which we check for in `noCommittedEntries` below).
+ // Having done that (i.e. if the raft log is not empty, and there are
+ // committed entries), we can assert on the following invariants:
//
// FirstIndex <= LastIndex (0)
// NewFirstIndex >= FirstIndex (1)
@@ -617,8 +601,8 @@ func computeTruncateDecision(input truncateDecisionInput) truncateDecision {
// consider the empty case. Something like
// https://github.com/nvanbenschoten/optional could help us emulate an
// `option<uint64>` type if we care enough.
- logEmpty := input.FirstIndex > input.LastIndex
- noCommittedEntries := input.FirstIndex > input.RaftStatus.Commit
+ logEmpty := input.FirstIndex == input.LastIndex+1
+ noCommittedEntries := input.FirstIndex == input.RaftStatus.Commit+1
logIndexValid := logEmpty ||
(decision.NewFirstIndex >= input.FirstIndex) && (decision.NewFirstIndex <= input.LastIndex)
@@ -720,14 +704,14 @@ func (rlq *raftLogQueue) process(
// Can and should the raft logs be truncated?
if !decision.ShouldTruncate() {
- log.VEventf(ctx, 3, "%s", redact.Safe(decision.String()))
+ log.VEventf(ctx, 3, "%s", log.Safe(decision.String()))
return false, nil
}
if n := decision.NumNewRaftSnapshots(); log.V(1) || n > 0 && rlq.logSnapshots.ShouldProcess(timeutil.Now()) {
- log.Infof(ctx, "%v", redact.Safe(decision.String()))
+ log.Infof(ctx, "%v", log.Safe(decision.String()))
} else {
- log.VEventf(ctx, 1, "%v", redact.Safe(decision.String()))
+ log.VEventf(ctx, 1, "%v", log.Safe(decision.String()))
}
b := &kv.Batch{}
truncRequest := &roachpb.TruncateLogRequest{
diff --git a/pkg/kv/kvserver/raft_log_queue_test.go b/pkg/kv/kvserver/raft_log_queue_test.go
index d2ba4982f4..20adf8fad7 100644
--- a/pkg/kv/kvserver/raft_log_queue_test.go
+++ b/pkg/kv/kvserver/raft_log_queue_test.go
@@ -925,19 +925,12 @@ func waitForTruncationForTesting(
// Flush the engine to advance durability, which triggers truncation.
require.NoError(t, r.Engine().Flush())
}
- // FirstIndex should have changed.
+ // FirstIndex has changed.
firstIndex, err := r.GetFirstIndex()
require.NoError(t, err)
if firstIndex != newFirstIndex {
return errors.Errorf("expected firstIndex == %d, got %d", newFirstIndex, firstIndex)
}
- // Some low-level tests also look at the raftEntryCache or sideloaded
- // storage, which are updated after, and non-atomically with the change to
- // first index (latter holds Replica.mu). Since the raftLogTruncator holds Replica.raftMu
- // for the duration of its work, we can, by acquiring and releasing raftMu here, ensure
- // that we have waited for it to finish.
- r.raftMu.Lock()
- defer r.raftMu.Unlock()
return nil
})
}
diff --git a/pkg/kv/kvserver/raft_log_truncator.go b/pkg/kv/kvserver/raft_log_truncator.go
index 47c34d7e61..ad0aef0c48 100644
--- a/pkg/kv/kvserver/raft_log_truncator.go
+++ b/pkg/kv/kvserver/raft_log_truncator.go
@@ -380,10 +380,19 @@ func (r rangesByRangeID) Swap(i, j int) {
// deadlock (see storage.Engine.RegisterFlushCompletedCallback).
func (t *raftLogTruncator) durabilityAdvancedCallback() {
runTruncation := false
+ doneRunning := func() {}
t.mu.Lock()
if !t.mu.runningTruncation && len(t.mu.addRanges) > 0 {
runTruncation = true
t.mu.runningTruncation = true
+ doneRunning = func() {
+ t.mu.Lock()
+ defer t.mu.Unlock()
+ if !t.mu.runningTruncation {
+ panic("expected runningTruncation")
+ }
+ t.mu.runningTruncation = false
+ }
}
if !runTruncation && len(t.mu.addRanges) > 0 {
t.mu.queuedDurabilityCB = true
@@ -394,31 +403,20 @@ func (t *raftLogTruncator) durabilityAdvancedCallback() {
}
if err := t.stopper.RunAsyncTask(t.ambientCtx, "raft-log-truncation",
func(ctx context.Context) {
+ defer doneRunning()
for {
t.durabilityAdvanced(ctx)
- shouldReturn := false
t.mu.Lock()
queued := t.mu.queuedDurabilityCB
t.mu.queuedDurabilityCB = false
- if !queued {
- t.mu.runningTruncation = false
- shouldReturn = true
- }
t.mu.Unlock()
- if shouldReturn {
+ if !queued {
return
}
}
}); err != nil {
// Task did not run because stopper is stopped.
- func() {
- t.mu.Lock()
- defer t.mu.Unlock()
- if !t.mu.runningTruncation {
- panic("expected runningTruncation")
- }
- t.mu.runningTruncation = false
- }()
+ doneRunning()
}
}
diff --git a/pkg/kv/kvserver/replica.go b/pkg/kv/kvserver/replica.go
index f1e4863662..65a593eabf 100644
--- a/pkg/kv/kvserver/replica.go
+++ b/pkg/kv/kvserver/replica.go
@@ -1024,8 +1024,8 @@ func (r *Replica) getImpliedGCThresholdRLocked(
// If we have a protected timestamp record which precedes the implied
// threshold, use the threshold it implies instead.
- if !c.earliestProtectionTimestamp.IsEmpty() && c.earliestProtectionTimestamp.Less(threshold) {
- return c.earliestProtectionTimestamp.Prev()
+ if c.earliestRecord != nil && c.earliestRecord.Timestamp.Less(threshold) {
+ return c.earliestRecord.Timestamp.Prev()
}
return threshold
}
@@ -1303,7 +1303,7 @@ func (r *Replica) assertStateRaftMuLockedReplicaMuRLocked(
pretty.Diff(diskState, r.mu.state))
r.mu.state.Desc, diskState.Desc = nil, nil
log.Fatalf(ctx, "on-disk and in-memory state diverged: %s",
- redact.Safe(pretty.Diff(diskState, r.mu.state)))
+ log.Safe(pretty.Diff(diskState, r.mu.state)))
}
if r.isInitializedRLocked() {
if !r.startKey.Equal(r.mu.state.Desc.StartKey) {
diff --git a/pkg/kv/kvserver/replica_application_cmd.go b/pkg/kv/kvserver/replica_application_cmd.go
index 8d738a77ba..07c056babe 100644
--- a/pkg/kv/kvserver/replica_application_cmd.go
+++ b/pkg/kv/kvserver/replica_application_cmd.go
@@ -122,7 +122,9 @@ func (c *replicatedCmd) Ctx() context.Context {
// AckErrAndFinish implements the apply.Command interface.
func (c *replicatedCmd) AckErrAndFinish(ctx context.Context, err error) error {
if c.IsLocal() {
- c.response.Err = roachpb.NewError(roachpb.NewAmbiguousResultError(err))
+ c.response.Err = roachpb.NewError(
+ roachpb.NewAmbiguousResultError(
+ err.Error()))
}
return c.AckOutcomeAndFinish(ctx)
}
diff --git a/pkg/kv/kvserver/replica_backpressure.go b/pkg/kv/kvserver/replica_backpressure.go
index 8ac0388794..3ae46e823c 100644
--- a/pkg/kv/kvserver/replica_backpressure.go
+++ b/pkg/kv/kvserver/replica_backpressure.go
@@ -108,19 +108,17 @@ func canBackpressureBatch(ba *roachpb.BatchRequest) bool {
return false
}
-// signallerForBatch returns the signaller to use for this batch. This is the
-// Replica's breaker's signaller except if any request in the batch uses
-// poison.Policy_Wait, in which case it's a neverTripSignaller. In particular,
-// `(signaller).C() == nil` signals that the request bypasses the circuit
-// breakers.
-func (r *Replica) signallerForBatch(ba *roachpb.BatchRequest) signaller {
+// bypassReplicaCircuitBreakerForBatch returns whether the provided
+// BatchRequest bypasses the per-Replica circuit breaker. This is the
+// case if any request in the batch is requesting to do so.
+func bypassReplicaCircuitBreakerForBatch(ba *roachpb.BatchRequest) bool {
for _, ru := range ba.Requests {
req := ru.GetInner()
if roachpb.BypassesReplicaCircuitBreaker(req) {
- return neverTripSignaller{}
+ return true
}
}
- return r.breaker.Signal()
+ return false
}
// shouldBackpressureWrites returns whether writes to the range should be
diff --git a/pkg/kv/kvserver/replica_circuit_breaker.go b/pkg/kv/kvserver/replica_circuit_breaker.go
index 300f1db2fd..e64ddb75f3 100644
--- a/pkg/kv/kvserver/replica_circuit_breaker.go
+++ b/pkg/kv/kvserver/replica_circuit_breaker.go
@@ -37,8 +37,7 @@ type replicaInCircuitBreaker interface {
Desc() *roachpb.RangeDescriptor
Send(context.Context, roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error)
slowReplicationThreshold(ba *roachpb.BatchRequest) (time.Duration, bool)
- replicaUnavailableError(err error) error
- poisonInflightLatches(err error)
+ replicaUnavailableError() error
}
var defaultReplicaCircuitBreakerSlowReplicationThreshold = envutil.EnvOrDefaultDuration(
@@ -75,15 +74,127 @@ type replicaCircuitBreaker struct {
stopper *stop.Stopper
r replicaInCircuitBreaker
st *cluster.Settings
+ cancels CancelStorage
wrapped *circuit.Breaker
versionIsActive int32 // atomic
}
+// Register takes a cancelable context and its cancel function (which the caller
+// must cancel when the request has finished), and registers them with the
+// circuit breaker. If the breaker is already tripped, its error is returned
+// immediately and the caller should not continue processing the request.
+// Otherwise, the cancel function is invoked if the breaker trips. The caller is
+// provided with a token and signaller for use in a call to
+// UnregisterAndAdjustError upon request completion. That method also takes the
+// error (if any) resulting from the request to ensure that in the case of a
+// tripped breaker, the error reflects this fact.
+func (br *replicaCircuitBreaker) Register(
+ ctx context.Context, cancel func(),
+) (_token interface{}, _ signaller, _ error) {
+ brSig := br.Signal()
+
+ // TODO(tbg): we may want to exclude more requests from this check, or allow
+ // requests to exclude themselves from the check (via their header). This
+ // latter mechanism could also replace hasBypassCircuitBreakerMarker.
+ if hasBypassCircuitBreakerMarker(ctx) {
+ // NB: brSig.C() == nil.
+ brSig = neverTripSignaller{}
+ }
+
+ if brSig.C() == nil {
+ // Circuit breakers are disabled and/or this is a probe request, so don't do
+ // any work registering the context. UnregisterAndAdjustError will know that we didn't
+ // since it checks the same brSig for a nil C().
+ return ctx, brSig, nil
+ }
+
+ // NB: it might be tempting to check the breaker error first to avoid the call
+ // to Set below if the breaker is tripped at this point. However, the ordering
+ // here, subtly, is required to avoid situations in which the cancel is still
+ // in the map despite the probe having shut down (in which case cancel will
+ // not be invoked until the probe is next triggered, which maybe "never").
+ //
+ // To see this, consider the case in which the breaker is initially not
+ // tripped when we check, but then trips immediately and has the probe fail
+ // (and terminate). Since the probe is in charge of cancelling all tracked
+ // requests, we must ensure that this probe sees our request. Adding the
+ // request prior to calling Signal() means that if we see an untripped
+ // breaker, no probe is running - consequently should the breaker then trip,
+ // it will observe our cancel, thus avoiding a leak. If we observe a tripped
+ // breaker, we also need to remove our own cancel, as the probe may already
+ // have passed the point at which it iterates through the cancels prior to us
+ // inserting it. The cancel may be invoked twice, but that's ok.
+ //
+ // See TestReplicaCircuitBreaker_NoCancelRace.
+ tok := br.cancels.Set(ctx, cancel)
+ if err := brSig.Err(); err != nil {
+ br.cancels.Del(tok)
+ cancel()
+ return nil, nil, err
+ }
+
+ return tok, brSig, nil
+}
+
+// UnregisterAndAdjustError releases a tracked cancel function upon request
+// completion. The error resulting from the request is passed in to allow
+// decorating it in case the breaker tripped while the request was in-flight.
+//
+// See Register.
+func (br *replicaCircuitBreaker) UnregisterAndAdjustError(
+ tok interface{}, sig signaller, pErr *roachpb.Error,
+) *roachpb.Error {
+ if sig.C() == nil {
+ // Breakers were disabled and we never put the cancel in the registry.
+ return pErr
+ }
+
+ br.cancels.Del(tok)
+
+ brErr := sig.Err()
+ if pErr == nil || brErr == nil {
+ return pErr
+ }
+
+ // The breaker tripped and the command is returning an error. Make sure the
+ // error reflects the tripped breaker.
+
+ err := pErr.GoError()
+ if ae := (&roachpb.AmbiguousResultError{}); errors.As(err, &ae) {
+ // The breaker tripped while a command was inflight, so we have to
+ // propagate an ambiguous result. We don't want to replace it, but there
+ // is a way to stash an Error in it so we use that.
+ //
+ // TODO(tbg): could also wrap it; there is no other write to WrappedErr
+ // in the codebase and it might be better to remove it. Nested *Errors
+ // are not a good idea.
+ wrappedErr := brErr
+ if ae.WrappedErr != nil {
+ wrappedErr = errors.Wrapf(brErr, "%v", ae.WrappedErr)
+ }
+ ae.WrappedErr = roachpb.NewError(wrappedErr)
+ return roachpb.NewError(ae)
+ } else if le := (&roachpb.NotLeaseHolderError{}); errors.As(err, &le) {
+ // When a lease acquisition triggered by this request is short-circuited
+ // by the breaker, it will return an opaque NotLeaseholderError, which we
+ // replace with the breaker's error.
+ return roachpb.NewError(errors.CombineErrors(brErr, le))
+ }
+ return pErr
+}
+
func (br *replicaCircuitBreaker) HasMark(err error) bool {
return br.wrapped.HasMark(err)
}
+func (br *replicaCircuitBreaker) cancelAllTrackedContexts() {
+ br.cancels.Visit(func(ctx context.Context, cancel func()) (remove bool) {
+ cancel()
+ return true // remove
+ })
+}
+
func (br *replicaCircuitBreaker) canEnable() bool {
b := atomic.LoadInt32(&br.versionIsActive) == 1
if b {
@@ -102,7 +213,11 @@ func (br *replicaCircuitBreaker) enabled() bool {
return replicaCircuitBreakerSlowReplicationThreshold.Get(&br.st.SV) > 0 && br.canEnable()
}
-func (br *replicaCircuitBreaker) TripAsync(err error) {
+func (br *replicaCircuitBreaker) newError() error {
+ return br.r.replicaUnavailableError()
+}
+
+func (br *replicaCircuitBreaker) TripAsync() {
if !br.enabled() {
return
}
@@ -110,15 +225,11 @@ func (br *replicaCircuitBreaker) TripAsync(err error) {
_ = br.stopper.RunAsyncTask(
br.ambCtx.AnnotateCtx(context.Background()), "trip-breaker",
func(ctx context.Context) {
- br.tripSync(err)
+ br.wrapped.Report(br.newError())
},
)
}
-func (br *replicaCircuitBreaker) tripSync(err error) {
- br.wrapped.Report(br.r.replicaUnavailableError(err))
-}
-
type signaller interface {
Err() error
C() <-chan struct{}
@@ -141,6 +252,7 @@ func newReplicaCircuitBreaker(
stopper *stop.Stopper,
ambientCtx log.AmbientContext,
r replicaInCircuitBreaker,
+ s CancelStorage,
onTrip func(),
onReset func(),
) *replicaCircuitBreaker {
@@ -150,6 +262,8 @@ func newReplicaCircuitBreaker(
r: r,
st: cs,
}
+ br.cancels = s
+ br.cancels.Reset()
br.wrapped = circuit.NewBreaker(circuit.Options{
Name: "breaker", // log bridge has ctx tags
AsyncProbe: br.asyncProbe,
@@ -185,6 +299,16 @@ func (r replicaCircuitBreakerLogger) OnReset(br *circuit.Breaker) {
r.EventHandler.OnReset(br)
}
+type probeKey struct{}
+
+func hasBypassCircuitBreakerMarker(ctx context.Context) bool {
+ return ctx.Value(probeKey{}) != nil
+}
+
+func withBypassCircuitBreakerMarker(ctx context.Context) context.Context {
+ return context.WithValue(ctx, probeKey{}, probeKey{})
+}
+
func (br *replicaCircuitBreaker) asyncProbe(report func(error), done func()) {
bgCtx := br.ambCtx.AnnotateCtx(context.Background())
if err := br.stopper.RunAsyncTask(bgCtx, "replica-probe", func(ctx context.Context) {
@@ -195,19 +319,13 @@ func (br *replicaCircuitBreaker) asyncProbe(report func(error), done func()) {
return
}
- brErr := br.Signal().Err()
- if brErr == nil {
- // This shouldn't happen, but if we're not even tripped, don't do
- // anything.
- return
- }
-
- // Poison any inflight latches. Note that any new request that is added in
- // while the probe is running but after poisonInflightLatches has been
- // invoked will remain untouched. We rely on the replica to periodically
- // access the circuit breaker to trigger additional probes in that case.
- // (This happens in refreshProposalsLocked).
- br.r.poisonInflightLatches(brErr)
+ // First, tell all current requests to fail fast. Note that clients insert
+ // first, then check the breaker (and remove themselves if breaker already
+ // tripped then). This prevents any cancels from sneaking in after the probe
+ // gets past this point, which could otherwise leave cancels hanging until
+ // "something" triggers the next probe (which may be never if no more traffic
+ // arrives at the Replica). See Register.
+ br.cancelAllTrackedContexts()
err := sendProbe(ctx, br.r)
report(err)
}); err != nil {
@@ -216,9 +334,10 @@ func (br *replicaCircuitBreaker) asyncProbe(report func(error), done func()) {
}
func sendProbe(ctx context.Context, r replicaInCircuitBreaker) error {
- // NB: ProbeRequest has the bypassesCircuitBreaker flag. If in the future we
- // enhance the probe, we may need to allow any additional requests we send to
- // chose to bypass the circuit breaker explicitly.
+ // NB: we don't need to put this marker since ProbeRequest has the
+ // canBypassReplicaCircuitBreaker flag, but if in the future we do
+ // additional work in this method we may need it.
+ ctx = withBypassCircuitBreakerMarker(ctx)
desc := r.Desc()
if !desc.IsInitialized() {
return nil
@@ -240,13 +359,12 @@ func sendProbe(ctx context.Context, r replicaInCircuitBreaker) error {
return pErr.GoError()
},
); err != nil {
- return r.replicaUnavailableError(err)
+ return errors.CombineErrors(r.replicaUnavailableError(), err)
}
return nil
}
func replicaUnavailableError(
- err error,
desc *roachpb.RangeDescriptor,
replDesc roachpb.ReplicaDescriptor,
lm liveness.IsLiveMap,
@@ -271,22 +389,25 @@ func replicaUnavailableError(
var _ redact.SafeFormatter = desc
var _ redact.SafeFormatter = replDesc
- if len(nonLiveRepls.AsProto()) > 0 {
- err = errors.Wrapf(err, "replicas on non-live nodes: %v (lost quorum: %t)", nonLiveRepls, !canMakeProgress)
- }
-
+ err := roachpb.NewReplicaUnavailableError(desc, replDesc)
err = errors.Wrapf(
err,
"raft status: %+v", redact.Safe(rs), // raft status contains no PII
)
+ if len(nonLiveRepls.AsProto()) > 0 {
+ err = errors.Wrapf(err, "replicas on non-live nodes: %v (lost quorum: %t)", nonLiveRepls, !canMakeProgress)
+ }
- return roachpb.NewReplicaUnavailableError(err, desc, replDesc)
+ return err
}
-func (r *Replica) replicaUnavailableError(err error) error {
+func (r *Replica) replicaUnavailableError() error {
desc := r.Desc()
replDesc, _ := desc.GetReplicaDescriptor(r.store.StoreID())
- isLiveMap, _ := r.store.livenessMap.Load().(liveness.IsLiveMap)
- return replicaUnavailableError(err, desc, replDesc, isLiveMap, r.RaftStatus())
+ var isLiveMap liveness.IsLiveMap
+ if nl := r.store.cfg.NodeLiveness; nl != nil { // exclude unit test
+ isLiveMap = nl.GetIsLiveMap()
+ }
+ return replicaUnavailableError(desc, replDesc, isLiveMap, r.RaftStatus())
}
diff --git a/pkg/kv/kvserver/replica_circuit_breaker_cancelstorage.go b/pkg/kv/kvserver/replica_circuit_breaker_cancelstorage.go
new file mode 100644
index 0000000000..c8f46bcffa
--- /dev/null
+++ b/pkg/kv/kvserver/replica_circuit_breaker_cancelstorage.go
@@ -0,0 +1,115 @@
+// Copyright 2022 The Cockroach Authors.
+//
+// Use of this software is governed by the Business Source License
+// included in the file licenses/BSL.txt.
+//
+// As of the Change Date specified in that file, in accordance with
+// the Business Source License, use of this software will be governed
+// by the Apache License, Version 2.0, included in the file
+// licenses/APL.txt.
+
+package kvserver
+
+import (
+ "context"
+ "sync"
+ "unsafe"
+
+ "github.com/cockroachdb/cockroach/pkg/util/syncutil"
+)
+
+// CancelStorage implements tracking of context cancellation functions
+// for use by Replica circuit breakers.
+type CancelStorage interface {
+ // Reset initializes the storage. Not thread safe.
+ Reset()
+ // Set adds context and associated cancel func to the storage. Returns a token
+ // that can be passed to Del.
+ //
+ // Set is thread-safe.
+ Set(_ context.Context, cancel func()) (token interface{})
+ // Del removes a cancel func, as identified by the token returned from Set.
+ //
+ // Del is thread-safe.
+ Del(token interface{})
+ // Visit invokes the provided closure with each (context,cancel) pair currently
+ // present in the storage. Items for which the visitor returns true are removed
+ // from the storage.
+ //
+ // Visit is thread-safe, but it is illegal to invoke methods of the
+ // CancelStorage from within the visitor.
+ Visit(func(context.Context, func()) (remove bool))
+}
+
+type cancelToken struct {
+ ctx context.Context
+}
+
+func (tok *cancelToken) fasthash() int {
+ // From https://github.com/taylorza/go-lfsr/blob/7ec2b93980f950da1e36c6682771e6fe14c144c2/lfsr.go#L46-L48.
+ s := int(uintptr(unsafe.Pointer(tok)))
+ b := (s >> 0) ^ (s >> 2) ^ (s >> 3) ^ (s >> 4)
+ return (s >> 1) | (b << 7)
+}
+
+var cancelTokenPool = sync.Pool{
+ New: func() interface{} { return &cancelToken{} },
+}
+
+type mapCancelShard struct {
+ syncutil.Mutex
+ m map[*cancelToken]func()
+}
+
+// A MapCancelStorage implements CancelStorage via shards of mutex-protected
+// maps.
+type MapCancelStorage struct {
+ NumShards int
+ sl []*mapCancelShard
+}
+
+// Reset implements CancelStorage.
+func (m *MapCancelStorage) Reset() {
+ if m.NumShards == 0 {
+ m.NumShards = 1
+ }
+ m.sl = make([]*mapCancelShard, m.NumShards)
+ for i := range m.sl {
+ s := &mapCancelShard{}
+ s.m = map[*cancelToken]func(){}
+ m.sl[i] = s
+ }
+}
+
+// Set implements CancelStorage.
+func (m *MapCancelStorage) Set(ctx context.Context, cancel func()) interface{} {
+ tok := cancelTokenPool.Get().(*cancelToken)
+ tok.ctx = ctx
+ shard := m.sl[tok.fasthash()%len(m.sl)]
+ shard.Lock()
+ shard.m[tok] = cancel
+ shard.Unlock()
+ return tok
+}
+
+// Del implements CancelStorage.
+func (m *MapCancelStorage) Del(tok interface{}) {
+ ttok := tok.(*cancelToken)
+ shard := m.sl[ttok.fasthash()%len(m.sl)]
+ shard.Lock()
+ delete(shard.m, tok.(*cancelToken))
+ shard.Unlock()
+}
+
+// Visit implements CancelStorage.
+func (m *MapCancelStorage) Visit(fn func(context.Context, func()) (remove bool)) {
+ for _, shard := range m.sl {
+ shard.Lock()
+ for tok, cancel := range shard.m {
+ if fn(tok.ctx, cancel) {
+ delete(shard.m, tok)
+ }
+ }
+ shard.Unlock()
+ }
+}
diff --git a/pkg/kv/kvserver/replica_circuit_breaker_test.go b/pkg/kv/kvserver/replica_circuit_breaker_test.go
index ab1f9aa32d..c7b6029b7d 100644
--- a/pkg/kv/kvserver/replica_circuit_breaker_test.go
+++ b/pkg/kv/kvserver/replica_circuit_breaker_test.go
@@ -12,14 +12,23 @@ package kvserver
import (
"context"
+ "fmt"
+ "math/rand"
+ "runtime"
+ "strconv"
"testing"
+ "time"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/liveness"
"github.com/cockroachdb/cockroach/pkg/roachpb"
+ "github.com/cockroachdb/cockroach/pkg/settings/cluster"
"github.com/cockroachdb/cockroach/pkg/testutils"
"github.com/cockroachdb/cockroach/pkg/testutils/echotest"
+ "github.com/cockroachdb/cockroach/pkg/util/ctxgroup"
+ "github.com/cockroachdb/cockroach/pkg/util/hlc"
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
"github.com/cockroachdb/cockroach/pkg/util/log"
+ "github.com/cockroachdb/cockroach/pkg/util/stop"
"github.com/cockroachdb/errors"
"github.com/cockroachdb/redact"
"github.com/stretchr/testify/require"
@@ -39,12 +48,155 @@ func TestReplicaUnavailableError(t *testing.T) {
lm := liveness.IsLiveMap{
1: liveness.IsLiveMapEntry{IsLive: true},
}
- wrappedErr := errors.New("probe failed")
rs := raft.Status{}
- ctx := context.Background()
- err := errors.DecodeError(ctx, errors.EncodeError(ctx, replicaUnavailableError(
- wrappedErr, desc, desc.Replicas().AsProto()[0], lm, &rs),
- ))
- require.True(t, errors.Is(err, wrappedErr), "%+v", err)
+ err := replicaUnavailableError(desc, desc.Replicas().AsProto()[0], lm, &rs)
echotest.Require(t, string(redact.Sprint(err)), testutils.TestDataPath(t, "replica_unavailable_error.txt"))
}
+
+type circuitBreakerReplicaMock struct {
+ clock *hlc.Clock
+}
+
+func (c *circuitBreakerReplicaMock) Clock() *hlc.Clock {
+ return c.clock
+}
+
+func (c *circuitBreakerReplicaMock) Desc() *roachpb.RangeDescriptor {
+ return &roachpb.RangeDescriptor{}
+}
+
+func (c *circuitBreakerReplicaMock) Send(
+ ctx context.Context, ba roachpb.BatchRequest,
+) (*roachpb.BatchResponse, *roachpb.Error) {
+ return ba.CreateReply(), nil
+}
+
+func (c *circuitBreakerReplicaMock) slowReplicationThreshold(
+ ba *roachpb.BatchRequest,
+) (time.Duration, bool) {
+ return 0, false
+}
+
+func (c *circuitBreakerReplicaMock) replicaUnavailableError() error {
+ return errors.New("unavailable")
+}
+
+// This test verifies that when the breaker trips and untrips again,
+// there is no scenario under which the request's cancel leaks.
+func TestReplicaCircuitBreaker_NoCancelRace(t *testing.T) {
+ defer leaktest.AfterTest(t)()
+ br, stopper := setupCircuitBreakerTest(t, "mutexmap-1")
+ ctx, cancel := context.WithCancel(context.Background())
+ defer cancel()
+ defer stopper.Stop(ctx)
+
+ g := ctxgroup.WithContext(ctx)
+ const count = 100
+ for i := 0; i < count; i++ {
+ i := i // for goroutine
+ g.GoCtx(func(ctx context.Context) error {
+ ctx, cancel := context.WithCancel(ctx)
+ tok, sig, err := br.Register(ctx, cancel)
+ if err != nil {
+ _ = err // ignoring intentionally
+ return nil
+ }
+ if i == count/2 {
+ br.TripAsync() // probe will succeed
+ }
+ runtime.Gosched()
+ time.Sleep(time.Duration(rand.Intn(int(time.Millisecond))))
+ var pErr *roachpb.Error
+ if i%2 == 0 {
+ pErr = roachpb.NewErrorf("boom")
+ }
+ _ = br.UnregisterAndAdjustError(tok, sig, pErr)
+ return nil
+ })
+ }
+ require.NoError(t, g.Wait())
+ var n int
+ br.cancels.Visit(func(ctx context.Context, _ func()) (remove bool) {
+ n++
+ return false // keep
+ })
+ require.Zero(t, n, "found tracked requests")
+}
+
+func TestReplicaCircuitBreaker_Register(t *testing.T) {
+ defer leaktest.AfterTest(t)()
+ br, stopper := setupCircuitBreakerTest(t, "mutexmap-1")
+ defer stopper.Stop(context.Background())
+ ctx := withBypassCircuitBreakerMarker(context.Background())
+ tok, sig, err := br.Register(ctx, func() {})
+ require.NoError(t, err)
+ defer br.UnregisterAndAdjustError(tok, sig, nil /* pErr */)
+ require.Zero(t, sig.C())
+ var n int
+ br.cancels.Visit(func(ctx context.Context, f func()) (remove bool) {
+ n++
+ return false // keep
+ })
+ require.Zero(t, n, "probe context got added to CancelStorage")
+}
+
+func setupCircuitBreakerTest(t testing.TB, cs string) (*replicaCircuitBreaker, *stop.Stopper) {
+ st := cluster.MakeTestingClusterSettings()
+ // Enable circuit breakers.
+ replicaCircuitBreakerSlowReplicationThreshold.Override(context.Background(), &st.SV, time.Hour)
+ r := &circuitBreakerReplicaMock{clock: hlc.NewClock(hlc.UnixNano, 500*time.Millisecond)}
+ var numShards int
+ {
+ _, err := fmt.Sscanf(cs, "mutexmap-%d", &numShards)
+ require.NoError(t, err)
+ }
+ s := &MapCancelStorage{NumShards: numShards}
+ onTrip := func() {}
+ onReset := func() {}
+ stopper := stop.NewStopper()
+ br := newReplicaCircuitBreaker(st, stopper, log.AmbientContext{}, r, s, onTrip, onReset)
+ return br, stopper
+}
+
+func BenchmarkReplicaCircuitBreaker_Register(b *testing.B) {
+ defer leaktest.AfterTest(b)()
+
+ for _, enabled := range []bool{false, true} {
+ b.Run("enabled="+strconv.FormatBool(enabled), func(b *testing.B) {
+ dss := []string{
+ "mutexmap-1", "mutexmap-2", "mutexmap-4", "mutexmap-8", "mutexmap-12", "mutexmap-16",
+ "mutexmap-20", "mutexmap-24", "mutexmap-32", "mutexmap-64",
+ }
+ if !enabled {
+ dss = dss[:1]
+ }
+ for _, ds := range dss {
+ b.Run(ds, func(b *testing.B) {
+ b.ReportAllocs()
+ br, stopper := setupCircuitBreakerTest(b, ds)
+ defer stopper.Stop(context.Background())
+
+ var dur time.Duration
+ if enabled {
+ dur = time.Hour
+ }
+ replicaCircuitBreakerSlowReplicationThreshold.Override(context.Background(), &br.st.SV, dur)
+ b.ResetTimer()
+ b.RunParallel(func(pb *testing.PB) {
+ for pb.Next() {
+ ctx, cancel := context.WithCancel(context.Background())
+ tok, sig, err := br.Register(ctx, cancel)
+ if err != nil {
+ b.Error(err)
+ }
+ if pErr := br.UnregisterAndAdjustError(tok, sig, nil); pErr != nil {
+ b.Error(pErr)
+ }
+ cancel()
+ }
+ })
+ })
+ }
+ })
+ }
+}
diff --git a/pkg/kv/kvserver/replica_closedts_test.go b/pkg/kv/kvserver/replica_closedts_test.go
index 97692e703b..6726105910 100644
--- a/pkg/kv/kvserver/replica_closedts_test.go
+++ b/pkg/kv/kvserver/replica_closedts_test.go
@@ -28,7 +28,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/server"
"github.com/cockroachdb/cockroach/pkg/testutils"
"github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
- "github.com/cockroachdb/cockroach/pkg/testutils/skip"
"github.com/cockroachdb/cockroach/pkg/testutils/sqlutils"
"github.com/cockroachdb/cockroach/pkg/testutils/testcluster"
"github.com/cockroachdb/cockroach/pkg/util/hlc"
@@ -46,9 +45,6 @@ import (
func TestBumpSideTransportClosed(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)
-
- skip.UnderRace(t)
-
ctx := context.Background()
type setupArgs struct {
diff --git a/pkg/kv/kvserver/replica_command.go b/pkg/kv/kvserver/replica_command.go
index 70588bfa58..6ed71ebceb 100644
--- a/pkg/kv/kvserver/replica_command.go
+++ b/pkg/kv/kvserver/replica_command.go
@@ -3353,29 +3353,39 @@ func (r *Replica) adminScatter(
}
ri := r.GetRangeInfo(ctx)
- stats := r.GetMVCCStats()
return roachpb.AdminScatterResponse{
RangeInfos: []roachpb.RangeInfo{ri},
- MVCCStats: &stats,
}, nil
}
-// TODO(arul): AdminVerifyProtectedTimestampRequest can entirely go away in
-// 22.2.
func (r *Replica) adminVerifyProtectedTimestamp(
- ctx context.Context, _ roachpb.AdminVerifyProtectedTimestampRequest,
+ ctx context.Context, args roachpb.AdminVerifyProtectedTimestampRequest,
) (resp roachpb.AdminVerifyProtectedTimestampResponse, err error) {
- // AdminVerifyProtectedTimestampRequest is not supported starting from the
- // 22.1 release. We expect nodes running a 22.1 binary to still service this
- // request in a {21.2, 22.1} mixed version cluster. This can happen if the
- // request is initiated on a 21.2 node and the leaseholder of the range it is
- // trying to verify is on a 22.1 node.
- //
- // We simply return true without attempting to verify in such a case. This
- // ensures upstream jobs (backups) don't fail as a result. It is okay to
- // return true regardless even if the PTS record being verified does not apply
- // as the failure mode is non-destructive. Infact, this is the reason we're
- // no longer supporting Verification past 22.1.
- resp.Verified = true
+ var doesNotApplyReason string
+ resp.Verified, doesNotApplyReason, err = r.protectedTimestampRecordApplies(ctx, &args)
+ if err != nil {
+ return resp, err
+ }
+
+ // In certain cases we do not want to return an error even if we failed to
+ // verify the protected ts record. This ensures that executeAdminBatch adds
+ // the response to the batch, thereby allowing us to aggregate the
+ // verification failures across all AdminVerifyProtectedTimestampRequests and
+ // construct a more informative error to show to the user.
+ if doesNotApplyReason != "" {
+ if !resp.Verified {
+ desc := r.Desc()
+ failedRange := roachpb.AdminVerifyProtectedTimestampResponse_FailedRange{
+ RangeID: int64(desc.GetRangeID()),
+ StartKey: desc.GetStartKey(),
+ EndKey: desc.EndKey,
+ Reason: doesNotApplyReason,
+ }
+ resp.VerificationFailedRanges = append(resp.VerificationFailedRanges, failedRange)
+ // TODO(adityamaru): This is here for compatibility with 20.2, remove in
+ // 21.2.
+ resp.DeprecatedFailedRanges = append(resp.DeprecatedFailedRanges, *r.Desc())
+ }
+ }
return resp, nil
}
diff --git a/pkg/kv/kvserver/replica_consistency.go b/pkg/kv/kvserver/replica_consistency.go
index fd270fe5ef..3fc943c631 100644
--- a/pkg/kv/kvserver/replica_consistency.go
+++ b/pkg/kv/kvserver/replica_consistency.go
@@ -253,7 +253,7 @@ func (r *Replica) CheckConsistency(
if !haveDelta {
return resp, nil
}
- log.Fatalf(ctx, "found a delta of %+v", redact.Safe(delta))
+ log.Fatalf(ctx, "found a delta of %+v", log.Safe(delta))
}
}
diff --git a/pkg/kv/kvserver/replica_destroy.go b/pkg/kv/kvserver/replica_destroy.go
index a6a8ed9fd5..cdc32adb3e 100644
--- a/pkg/kv/kvserver/replica_destroy.go
+++ b/pkg/kv/kvserver/replica_destroy.go
@@ -209,7 +209,8 @@ func (r *Replica) disconnectReplicationRaftMuLocked(ctx context.Context) {
// share the error across proposals).
p.finishApplication(ctx, proposalResult{
Err: roachpb.NewError(
- roachpb.NewAmbiguousResultError(apply.ErrRemoved)),
+ roachpb.NewAmbiguousResultError(
+ apply.ErrRemoved.Error())),
})
}
r.mu.internalRaftGroup = nil
diff --git a/pkg/kv/kvserver/replica_init.go b/pkg/kv/kvserver/replica_init.go
index 5f7e1629d3..2545b21644 100644
--- a/pkg/kv/kvserver/replica_init.go
+++ b/pkg/kv/kvserver/replica_init.go
@@ -137,8 +137,14 @@ func newUnloadedReplica(
onReset := func() {
store.Metrics().ReplicaCircuitBreakerCurTripped.Dec(1)
}
+ var cancelStorage CancelStorage
+ if f := r.store.cfg.TestingKnobs.CancelStorageFactory; f != nil {
+ cancelStorage = f()
+ } else {
+ cancelStorage = &MapCancelStorage{}
+ }
r.breaker = newReplicaCircuitBreaker(
- store.cfg.Settings, store.stopper, r.AmbientContext, r, onTrip, onReset,
+ store.cfg.Settings, store.stopper, r.AmbientContext, r, cancelStorage, onTrip, onReset,
)
return r
}
diff --git a/pkg/kv/kvserver/replica_learner_test.go b/pkg/kv/kvserver/replica_learner_test.go
index 3edeb38dae..9599fae010 100644
--- a/pkg/kv/kvserver/replica_learner_test.go
+++ b/pkg/kv/kvserver/replica_learner_test.go
@@ -27,7 +27,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverpb"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/server"
- "github.com/cockroachdb/cockroach/pkg/server/serverpb"
"github.com/cockroachdb/cockroach/pkg/testutils"
"github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
"github.com/cockroachdb/cockroach/pkg/testutils/skip"
@@ -313,7 +312,6 @@ func TestLearnerSnapshotFailsRollback(t *testing.T) {
defer log.Scope(t).Close(t)
skip.UnderShort(t) // Takes 90s.
- skip.UnderRace(t)
runTest := func(t *testing.T, replicaType roachpb.ReplicaType) {
var rejectSnapshots int64
@@ -360,7 +358,15 @@ func TestLearnerSnapshotFailsRollback(t *testing.T) {
})
}
-func testRaftSnapshotsToNonVoters(t *testing.T, drainReceivingNode bool) {
+// TestNonVoterCatchesUpViaRaftSnapshotQueue ensures that a non-voting replica
+// in need of a snapshot will receive one via the raft snapshot queue. This is
+// also meant to test that a non-voting replica that is initialized via an
+// `INITIAL` snapshot during its addition is not ignored by the raft snapshot
+// queue for future snapshots.
+func TestNonVoterCatchesUpViaRaftSnapshotQueue(t *testing.T) {
+ defer leaktest.AfterTest(t)()
+ defer log.Scope(t).Close(t)
+
skip.UnderShort(t, "this test sleeps for a few seconds")
var skipInitialSnapshot int64
@@ -381,7 +387,6 @@ func testRaftSnapshotsToNonVoters(t *testing.T, drainReceivingNode bool) {
// Disable the raft snapshot queue, we will manually queue a replica into it
// below.
ltk.storeKnobs.DisableRaftSnapshotQueue = true
-
tc := testcluster.StartTestCluster(
t, 2, base.TestClusterArgs{
ServerArgs: base.TestServerArgs{Knobs: knobs},
@@ -401,8 +406,6 @@ func testRaftSnapshotsToNonVoters(t *testing.T, drainReceivingNode bool) {
return err
})
- // Wait until we remove the lock that prevents the raft snapshot queue from
- // sending this replica a snapshot.
select {
case <-nonVoterSnapLockRemoved:
case <-time.After(testutils.DefaultSucceedsSoonDuration):
@@ -418,16 +421,6 @@ func testRaftSnapshotsToNonVoters(t *testing.T, drainReceivingNode bool) {
time.Sleep(kvserver.RaftLogQueuePendingSnapshotGracePeriod)
- if drainReceivingNode {
- // Draining nodes shouldn't reject raft snapshots, so this should have no
- // effect on the outcome of this test.
- const drainingServerIdx = 1
- const drainingNodeID = drainingServerIdx + 1
- client, err := tc.GetAdminClient(ctx, t, drainingServerIdx)
- require.NoError(t, err)
- drain(ctx, t, client, drainingNodeID)
- }
-
testutils.SucceedsSoon(t, func() error {
// Manually enqueue the leaseholder replica into its store's raft snapshot
// queue. We expect it to pick up on the fact that the non-voter on its range
@@ -453,62 +446,6 @@ func testRaftSnapshotsToNonVoters(t *testing.T, drainReceivingNode bool) {
require.NoError(t, g.Wait())
}
-func drain(ctx context.Context, t *testing.T, client serverpb.AdminClient, drainingNodeID int) {
- stream, err := client.Drain(ctx, &serverpb.DrainRequest{
- NodeId: strconv.Itoa(drainingNodeID),
- DoDrain: true,
- })
- require.NoError(t, err)
-
- // Wait until the draining node acknowledges that it's draining.
- _, err = stream.Recv()
- require.NoError(t, err)
-}
-
-// TestSnapshotsToDrainingNodes tests that rebalancing snapshots to draining
-// receivers are rejected, but Raft snapshots aren't.
-func TestSnapshotsToDrainingNodes(t *testing.T) {
- defer leaktest.AfterTest(t)()
- defer log.Scope(t).Close(t)
-
- t.Run("rebalancing snapshots", func(t *testing.T) {
- ctx := context.Background()
-
- // We set up a 2 node test cluster with the second node marked draining.
- const drainingServerIdx = 1
- const drainingNodeID = drainingServerIdx + 1
- tc := testcluster.StartTestCluster(
- t, 2, base.TestClusterArgs{
- ReplicationMode: base.ReplicationManual,
- },
- )
- defer tc.Stopper().Stop(ctx)
- client, err := tc.GetAdminClient(ctx, t, drainingServerIdx)
- require.NoError(t, err)
- drain(ctx, t, client, drainingNodeID)
-
- // Now, we try to add a replica to it, we expect that to fail.
- scratchKey := tc.ScratchRange(t)
- _, err = tc.AddVoters(scratchKey, makeReplicationTargets(drainingNodeID)...)
- require.Regexp(t, "store is draining", err)
- })
-
- t.Run("raft snapshots", func(t *testing.T) {
- testRaftSnapshotsToNonVoters(t, true /* drainReceivingNode */)
- })
-}
-
-// TestNonVoterCatchesUpViaRaftSnapshotQueue ensures that a non-voting replica
-// in need of a snapshot will receive one via the raft snapshot queue. This is
-// also meant to test that a non-voting replica that is initialized via an
-// `INITIAL` snapshot during its addition is not ignored by the raft snapshot
-// queue for future snapshots.
-func TestNonVoterCatchesUpViaRaftSnapshotQueue(t *testing.T) {
- defer leaktest.AfterTest(t)()
- defer log.Scope(t).Close(t)
- testRaftSnapshotsToNonVoters(t, false /* drainReceivingNode */)
-}
-
func TestSplitWithLearnerOrJointConfig(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)
diff --git a/pkg/kv/kvserver/replica_proposal.go b/pkg/kv/kvserver/replica_proposal.go
index fa08b6fc22..bbef1c9cfa 100644
--- a/pkg/kv/kvserver/replica_proposal.go
+++ b/pkg/kv/kvserver/replica_proposal.go
@@ -35,7 +35,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/quotapool"
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
"github.com/cockroachdb/cockroach/pkg/util/tracing"
- "github.com/cockroachdb/redact"
"github.com/kr/pretty"
"golang.org/x/time/rate"
)
@@ -159,16 +158,6 @@ func (proposal *ProposalData) signalProposalResult(pr proposalResult) {
if proposal.doneCh != nil {
proposal.doneCh <- pr
proposal.doneCh = nil
- // Need to remove any span from the proposal, as the signalled caller
- // will likely finish it, and if we then end up applying this proposal
- // we'll try to make a ChildSpan off `proposal.ctx` and this will
- // trigger the Span use-after-finish assertions.
- //
- // See: https://github.com/cockroachdb/cockroach/pull/76858#issuecomment-1048179588
- //
- // NB: `proposal.ec.repl` might already have been cleared if we arrive here
- // through finishApplication.
- proposal.ctx = context.Background()
}
}
@@ -237,7 +226,7 @@ func (r *Replica) leasePostApplyLocked(
switch {
case s2 < s1:
log.Fatalf(ctx, "lease sequence inversion, prevLease=%s, newLease=%s",
- redact.Safe(prevLease), redact.Safe(newLease))
+ log.Safe(prevLease), log.Safe(newLease))
case s2 == s1:
// If the sequence numbers are the same, make sure they're actually
// the same lease. This can happen when callers are using
@@ -245,13 +234,13 @@ func (r *Replica) leasePostApplyLocked(
// splitPostApply. It can also happen during lease extensions.
if !prevLease.Equivalent(*newLease) {
log.Fatalf(ctx, "sequence identical for different leases, prevLease=%s, newLease=%s",
- redact.Safe(prevLease), redact.Safe(newLease))
+ log.Safe(prevLease), log.Safe(newLease))
}
case s2 == s1+1:
// Lease sequence incremented by 1. Expected case.
case s2 > s1+1 && jumpOpt == assertNoLeaseJump:
log.Fatalf(ctx, "lease sequence jump, prevLease=%s, newLease=%s",
- redact.Safe(prevLease), redact.Safe(newLease))
+ log.Safe(prevLease), log.Safe(newLease))
}
}
diff --git a/pkg/kv/kvserver/replica_protected_timestamp.go b/pkg/kv/kvserver/replica_protected_timestamp.go
index fb86e67198..cc23a79c78 100644
--- a/pkg/kv/kvserver/replica_protected_timestamp.go
+++ b/pkg/kv/kvserver/replica_protected_timestamp.go
@@ -12,9 +12,12 @@ package kvserver
import (
"context"
+ "fmt"
"time"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/gc"
+ "github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverbase"
+ "github.com/cockroachdb/cockroach/pkg/kv/kvserver/protectedts/ptpb"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/util/hlc"
"github.com/cockroachdb/cockroach/pkg/util/log"
@@ -24,8 +27,8 @@ import (
// cachedProtectedTimestampState is used to cache information about the state
// of protected timestamps as they pertain to this replica. The data is
// refreshed when the replica examines protected timestamps when being
-// considered for gc. It is consulted when determining whether a request can be
-// served.
+// considered for gc or when verifying a protected timestamp record.
+// It is consulted when determining whether a request can be served.
type cachedProtectedTimestampState struct {
// readAt denotes the timestamp at which this record was read.
// It is used to coordinate updates to this field. It is also used to
@@ -34,8 +37,8 @@ type cachedProtectedTimestampState struct {
// that protected timestamps have not been observed. In this case we must
// assume that any protected timestamp could exist to provide the contract
// on verify.
- readAt hlc.Timestamp
- earliestProtectionTimestamp hlc.Timestamp
+ readAt hlc.Timestamp
+ earliestRecord *ptpb.Record
}
// clearIfNotNewer clears the state in ts if it is not newer than the passed
@@ -64,35 +67,185 @@ func (r *Replica) maybeUpdateCachedProtectedTS(ts *cachedProtectedTimestampState
}
}
+// protectedTimestampRecordApplies returns true if it is this case that the
+// record which protects the `protected` timestamp. It returns false if it may
+// not. If the state of the cache is not sufficiently new to determine whether
+// the record will apply, the cache is refreshed and then the check is performed
+// again. See r.protectedTimestampRecordCurrentlyApplies() for more details.
+func (r *Replica) protectedTimestampRecordApplies(
+ ctx context.Context, args *roachpb.AdminVerifyProtectedTimestampRequest,
+) (willApply bool, doesNotApplyReason string, _ error) {
+ // Check the state of the cache without a refresh.
+ willApply, cacheTooOld, doesNotApplyReason, err := r.protectedTimestampRecordCurrentlyApplies(
+ ctx, args)
+ if err != nil {
+ return false, doesNotApplyReason, err
+ }
+ if !cacheTooOld {
+ return willApply, doesNotApplyReason, nil
+ }
+ // Refresh the cache so that we know that the next time we come around we're
+ // certain to either see the record or see a timestamp for readAt that is
+ // greater than or equal to recordAliveAt.
+ if err := r.store.protectedtsCache.Refresh(ctx, args.RecordAliveAt); err != nil {
+ return false, doesNotApplyReason, err
+ }
+ willApply, cacheTooOld, doesNotApplyReason, err = r.protectedTimestampRecordCurrentlyApplies(
+ ctx, args)
+ if err != nil {
+ return false, doesNotApplyReason, err
+ }
+ if cacheTooOld {
+ return false, doesNotApplyReason, errors.AssertionFailedf(
+ "cache was not updated after being refreshed")
+ }
+ return willApply, doesNotApplyReason, nil
+}
+
func (r *Replica) readProtectedTimestampsRLocked(
- ctx context.Context,
-) (ts cachedProtectedTimestampState, _ error) {
+ ctx context.Context, f func(r *ptpb.Record),
+) (ts cachedProtectedTimestampState) {
desc := r.descRLocked()
gcThreshold := *r.mu.state.GCThreshold
- sp := roachpb.Span{
- Key: roachpb.Key(desc.StartKey),
- EndKey: roachpb.Key(desc.EndKey),
+ ts.readAt = r.store.protectedtsCache.Iterate(ctx,
+ roachpb.Key(desc.StartKey),
+ roachpb.Key(desc.EndKey),
+ func(rec *ptpb.Record) (wantMore bool) {
+ // Check if we've already GC'd past the timestamp this record was trying
+ // to protect, in which case we know that the record does not apply.
+ // Note that when we implement PROTECT_AT, we'll need to consult some
+ // replica state here to determine whether the record indeed has been
+ // applied.
+ if isValid := gcThreshold.LessEq(rec.Timestamp); !isValid {
+ return true
+ }
+ if f != nil {
+ f(rec)
+ }
+ if ts.earliestRecord == nil || rec.Timestamp.Less(ts.earliestRecord.Timestamp) {
+ ts.earliestRecord = rec
+ }
+ return true
+ })
+ return ts
+}
+
+// protectedTimestampRecordCurrentlyApplies determines whether a record with
+// the specified ID which protects `protected` and is known to exist at
+// `recordAliveAt` will apply given the current state of the cache. This method
+// is called by `r.protectedTimestampRecordApplies()`. It may be the case that
+// the current state of the cache is too old to determine whether the record
+// will apply. In such cases the cache should be refreshed to recordAliveAt and
+// then this method should be called again.
+// In certain cases we return a doesNotApplyReason explaining why the protected
+// ts record does not currently apply. We do not want to return an error so that
+// we can aggregate the reasons across multiple
+// AdminVerifyProtectedTimestampRequest, as explained in
+// adminVerifyProtectedTimestamp.
+func (r *Replica) protectedTimestampRecordCurrentlyApplies(
+ ctx context.Context, args *roachpb.AdminVerifyProtectedTimestampRequest,
+) (willApply, cacheTooOld bool, doesNotApplyReason string, _ error) {
+ // We first need to check that we're the current leaseholder.
+ // TODO(ajwerner): what other conditions with regards to time do we need to
+ // check? I don't think there are any. If the recordAliveAt is after our
+ // liveness expiration that's okay because we're either going to find the
+ // record or we're not and if we don't then we'll push the cache and re-assert
+ // that we're still the leaseholder. If somebody else becomes the leaseholder
+ // then they will have to go through the same process.
+ ls, pErr := r.redirectOnOrAcquireLease(ctx)
+ if pErr != nil {
+ return false, false, "", pErr.GoError()
}
- var protectionTimestamps []hlc.Timestamp
- var err error
- protectionTimestamps, ts.readAt, err = r.store.protectedtsReader.GetProtectionTimestamps(ctx, sp)
- if err != nil {
- return ts, err
- }
- earliestTS := hlc.Timestamp{}
- for _, protectionTimestamp := range protectionTimestamps {
- // Check if the timestamp the record was trying to protect is strictly
- // below the GCThreshold, in which case, we know the record does not apply.
- if isValid := gcThreshold.LessEq(protectionTimestamp); !isValid {
- continue
- }
- if earliestTS.IsEmpty() || protectionTimestamp.Less(earliestTS) {
- earliestTS = protectionTimestamp
+
+ // NB: It should be the case that the recordAliveAt timestamp
+ // is before the current time and that the above lease check means that
+ // the replica is the leaseholder at the current time. If recordAliveAt
+ // happened to be newer than the current time we'd need to make sure that
+ // the current Replica will be live at that time. Given that recordAliveAt
+ // has to be before the batch timestamp for this request and we should
+ // have forwarded the local clock to the batch timestamp this can't
+ // happen.
+ // TODO(ajwerner): do we need to assert that indeed the recordAliveAt precedes
+ // the batch timestamp? Probably not a bad sanity check.
+
+ // We may be reading the protected timestamp cache while we're holding
+ // the Replica.mu for reading. If we do so and find newer state in the cache
+ // then we want to, update the replica's cache of its state. The guarantee
+ // we provide is that if a record is successfully verified then the Replica's
+ // cachedProtectedTS will have a readAt value high enough to include that
+ // record.
+ var read cachedProtectedTimestampState
+ defer r.maybeUpdateCachedProtectedTS(&read)
+ r.mu.RLock()
+ defer r.mu.RUnlock()
+ defer read.clearIfNotNewer(r.mu.cachedProtectedTS)
+
+ // If the key that routed this request to this range is now out of this
+ // range's bounds, return an error for the client to try again on the
+ // correct range.
+ desc := r.descRLocked()
+ if !kvserverbase.ContainsKeyRange(desc, args.Key, args.EndKey) {
+ return false, false, "", roachpb.NewRangeKeyMismatchErrorWithCTPolicy(ctx, args.Key, args.EndKey, desc,
+ r.mu.state.Lease, r.closedTimestampPolicyRLocked())
+ }
+ if args.Protected.LessEq(*r.mu.state.GCThreshold) {
+ gcReason := fmt.Sprintf("protected ts: %s is less than equal to the GCThreshold: %s for the"+
+ " range %s - %s", args.Protected.String(), r.mu.state.GCThreshold.String(),
+ desc.StartKey.String(), desc.EndKey.String())
+ return false, false, gcReason, nil
+ }
+ if args.RecordAliveAt.Less(ls.Lease.Start.ToTimestamp()) {
+ return true, false, "", nil
+ }
+
+ // Now we're in the case where maybe it is possible that we're going to later
+ // attempt to set the GC threshold above our protected point so to prevent
+ // that we add some state to the replica.
+ r.protectedTimestampMu.Lock()
+ defer r.protectedTimestampMu.Unlock()
+ if args.Protected.Less(r.protectedTimestampMu.pendingGCThreshold) {
+ gcReason := fmt.Sprintf(
+ "protected ts: %s is less than the pending GCThreshold: %s for the range %s - %s",
+ args.Protected.String(), r.protectedTimestampMu.pendingGCThreshold.String(),
+ desc.StartKey.String(), desc.EndKey.String())
+ return false, false, gcReason, nil
+ }
+
+ var seen bool
+ read = r.readProtectedTimestampsRLocked(ctx, func(r *ptpb.Record) {
+ // Comparing record ID and the timestamp ensures that we find the record
+ // that we are verifying.
+ // A PTS record can be updated with a new Timestamp to protect, and so we
+ // need to ensure that we are not seeing the old version of the record in
+ // case the cache has not been updated.
+ if r.ID.GetUUID() == args.RecordID && args.Protected.LessEq(r.Timestamp) {
+ seen = true
}
+ })
+
+ // If we observed the record in question then we know that all future attempts
+ // to run GC will observe the Record if it still exists. The one hazard we
+ // need to avoid is a race whereby an attempt to run GC first checks the
+ // protected timestamp state and then attempts to increase the GC threshold.
+ // We set the minStateReadTimestamp here to avoid such races. The MVCC GC
+ // queue will call markPendingGC just prior to sending a request to update the
+ // GC threshold which will verify the safety of the new value relative to
+ // minStateReadTimestamp.
+ if seen {
+ r.protectedTimestampMu.minStateReadTimestamp = read.readAt
+ return true, false, "", nil
+ }
+
+ isCacheTooOld := read.readAt.Less(args.RecordAliveAt)
+ // Protected timestamp state has progressed past the point at which we
+ // should see this record. This implies that the record has been removed.
+ if !isCacheTooOld {
+ recordRemovedReason := "protected ts record has been removed"
+ return false, false, recordRemovedReason, nil
}
- ts.earliestProtectionTimestamp = earliestTS
- return ts, nil
+ // Retry, since the cache is too old.
+ return false, true, "", nil
}
// checkProtectedTimestampsForGC determines whether the Replica can run GC. If
@@ -108,7 +261,7 @@ func (r *Replica) readProtectedTimestampsRLocked(
// old gc threshold, and the new gc threshold.
func (r *Replica) checkProtectedTimestampsForGC(
ctx context.Context, gcTTL time.Duration,
-) (canGC bool, cacheTimestamp, gcTimestamp, oldThreshold, newThreshold hlc.Timestamp, _ error) {
+) (canGC bool, cacheTimestamp, gcTimestamp, oldThreshold, newThreshold hlc.Timestamp) {
// We may be reading the protected timestamp cache while we're holding
// the Replica.mu for reading. If we do so and find newer state in the cache
@@ -127,16 +280,12 @@ func (r *Replica) checkProtectedTimestampsForGC(
// read.earliestRecord is the record with the earliest timestamp which is
// greater than the existing gcThreshold.
- var err error
- read, err = r.readProtectedTimestampsRLocked(ctx)
- if err != nil {
- return false, hlc.Timestamp{}, hlc.Timestamp{}, hlc.Timestamp{}, hlc.Timestamp{}, err
- }
+ read = r.readProtectedTimestampsRLocked(ctx, nil)
gcTimestamp = read.readAt
- if !read.earliestProtectionTimestamp.IsEmpty() {
+ if read.earliestRecord != nil {
// NB: we want to allow GC up to the timestamp preceding the earliest valid
- // protection timestamp.
- impliedGCTimestamp := gc.TimestampForThreshold(read.earliestProtectionTimestamp.Prev(), gcTTL)
+ // record.
+ impliedGCTimestamp := gc.TimestampForThreshold(read.earliestRecord.Timestamp.Prev(), gcTTL)
if impliedGCTimestamp.Less(gcTimestamp) {
gcTimestamp = impliedGCTimestamp
}
@@ -145,12 +294,12 @@ func (r *Replica) checkProtectedTimestampsForGC(
if gcTimestamp.Less(lease.Start.ToTimestamp()) {
log.VEventf(ctx, 1, "not gc'ing replica %v due to new lease %v started after %v",
r, lease, gcTimestamp)
- return false, hlc.Timestamp{}, hlc.Timestamp{}, hlc.Timestamp{}, hlc.Timestamp{}, nil
+ return false, hlc.Timestamp{}, hlc.Timestamp{}, hlc.Timestamp{}, hlc.Timestamp{}
}
newThreshold = gc.CalculateThreshold(gcTimestamp, gcTTL)
- return true, read.readAt, gcTimestamp, oldThreshold, newThreshold, nil
+ return true, read.readAt, gcTimestamp, oldThreshold, newThreshold
}
// markPendingGC is called just prior to sending the GC request to increase the
diff --git a/pkg/kv/kvserver/replica_protected_timestamp_test.go b/pkg/kv/kvserver/replica_protected_timestamp_test.go
index f02ca7af3e..31c54420f8 100644
--- a/pkg/kv/kvserver/replica_protected_timestamp_test.go
+++ b/pkg/kv/kvserver/replica_protected_timestamp_test.go
@@ -12,23 +12,397 @@ package kvserver
import (
"context"
- "math/rand"
+ "fmt"
"testing"
"time"
"github.com/cockroachdb/cockroach/pkg/keys"
+ "github.com/cockroachdb/cockroach/pkg/kv/kvserver/pro
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment