Skip to content

Instantly share code, notes, and snippets.

@ajwerner
Created September 10, 2019 19:46
Show Gist options
  • Save ajwerner/9e98a6ed5285408b57275de60faba872 to your computer and use it in GitHub Desktop.
Save ajwerner/9e98a6ed5285408b57275de60faba872 to your computer and use it in GitHub Desktop.
diff off of 3ad8bda97d42fbfe978e4c5eb2ec3e3a291c1608
diff --git a/pkg/ccl/importccl/import_processor.go b/pkg/ccl/importccl/import_processor.go
index 641f7258ef..21c18d71e5 100644
--- a/pkg/ccl/importccl/import_processor.go
+++ b/pkg/ccl/importccl/import_processor.go
@@ -263,7 +263,7 @@ func (cp *readImportDataProcessor) presplitTableBoundaries(ctx context.Context)
stickyBitEnabled := cp.flowCtx.Cfg.Settings.Version.IsActive(cluster.VersionStickyBit)
expirationTime := hlc.Timestamp{}
if stickyBitEnabled {
- expirationTime = cp.flowCtx.Cfg.DB.Clock().Now().Add(time.Hour.Nanoseconds(), 0)
+ expirationTime = hlc.Timestamp{WallTime: cp.spec.WalltimeNanos + time.Hour.Nanoseconds()}
}
for _, tbl := range cp.spec.Tables {
for _, span := range tbl.Desc.AllIndexSpans() {
diff --git a/pkg/storage/replica_command.go b/pkg/storage/replica_command.go
index 64311f1ede..d3da9fc787 100644
--- a/pkg/storage/replica_command.go
+++ b/pkg/storage/replica_command.go
@@ -40,6 +40,7 @@ import (
crdberrors "github.com/cockroachdb/errors"
"github.com/cockroachdb/logtags"
"github.com/gogo/protobuf/proto"
+ "github.com/kr/pretty"
"github.com/pkg/errors"
"go.etcd.io/etcd/raft"
"go.etcd.io/etcd/raft/raftpb"
@@ -54,7 +55,7 @@ func (r *Replica) AdminSplit(
return roachpb.AdminSplitResponse{}, roachpb.NewErrorf("cannot split range with no key provided")
}
- err := r.executeAdminCommandWithDescriptor(ctx, func(desc *roachpb.RangeDescriptor) error {
+ err := r.executeAdminCommandWithDescriptor(ctx, "split", func(desc *roachpb.RangeDescriptor) error {
var err error
reply, err = r.adminSplitWithDescriptor(ctx, args, desc, true /* delayable */, reason)
return err
@@ -71,7 +72,8 @@ func maybeDescriptorChangedError(desc *roachpb.RangeDescriptor, err error) (stri
return fmt.Sprintf("descriptor changed: expected %s != [actual] nil (range subsumed)", desc), true
} else if err := detail.ActualValue.GetProto(&actualDesc); err == nil &&
desc.RangeID == actualDesc.RangeID && !desc.Equal(actualDesc) {
- return fmt.Sprintf("descriptor changed: [expected] %s != [actual] %s", desc, &actualDesc), true
+ return fmt.Sprintf("descriptor changed: [expected] %s != [actual] %s: %v",
+ desc, &actualDesc, pretty.Diff(desc, &actualDesc)), true
}
}
return "", false
@@ -434,7 +436,7 @@ func (r *Replica) AdminUnsplit(
ctx context.Context, args roachpb.AdminUnsplitRequest, reason string,
) (roachpb.AdminUnsplitResponse, *roachpb.Error) {
var reply roachpb.AdminUnsplitResponse
- err := r.executeAdminCommandWithDescriptor(ctx, func(desc *roachpb.RangeDescriptor) error {
+ err := r.executeAdminCommandWithDescriptor(ctx, "unsplit", func(desc *roachpb.RangeDescriptor) error {
var err error
reply, err = r.adminUnsplitWithDescriptor(ctx, args, desc, reason)
return err
@@ -513,12 +515,16 @@ func (r *Replica) adminUnsplitWithDescriptor(
// executeAdminCommandWithDescriptor wraps a read-modify-write operation for RangeDescriptors in a
// retry loop.
func (r *Replica) executeAdminCommandWithDescriptor(
- ctx context.Context, updateDesc func(*roachpb.RangeDescriptor) error,
+ ctx context.Context, op string, updateDesc func(*roachpb.RangeDescriptor) error,
) *roachpb.Error {
retryOpts := base.DefaultRetryOptions()
- retryOpts.MaxRetries = 10
+ retryOpts.MaxRetries = 2
var lastErr error
+ var i int
for retryable := retry.StartWithCtx(ctx, retryOpts); retryable.Next(); {
+ if i++; i > 1 {
+ log.Infof(ctx, "retrying %v for this %dth time after %v", op, i, lastErr)
+ }
// The replica may have been destroyed since the start of the retry loop.
// We need to explicitly check this condition. Having a valid lease, as we
// verify below, does not imply that the range still exists: even after a
@@ -867,6 +873,15 @@ func IsSnapshotError(err error) bool {
})
}
+// IsConditionFailedError returns true iff the error indicates a
+// CPut failed.
+func IsConditionFailedError(err error) bool {
+ return causer.Visit(err, func(err error) bool {
+ _, ok := errors.Cause(err).(*roachpb.ConditionFailedError)
+ return ok
+ })
+}
+
// ChangeReplicas atomically changes the replicas that are members of a range.
// The change is performed in a distributed transaction and takes effect when
// that transaction is committed. This transaction confirms that the supplied
@@ -2279,12 +2294,16 @@ func (r *Replica) adminScatter(
// Loop until the replicate queue decides there is nothing left to do for the
// range. Note that we disable lease transfers until the final step as
// transferring the lease prevents any further action on this node.
+ every := log.Every(time.Minute)
var allowLeaseTransfer bool
canTransferLease := func() bool { return allowLeaseTransfer }
for re := retry.StartWithCtx(ctx, retryOpts); re.Next(); {
requeue, err := rq.processOneChange(ctx, r, canTransferLease, false /* dryRun */)
if err != nil {
- if IsSnapshotError(err) {
+ if IsSnapshotError(err) || IsConditionFailedError(err) {
+ if every.ShouldLog() {
+ log.Infof(ctx, "retrying admin scatter: %v", err)
+ }
continue
}
break
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment