Created
September 10, 2019 19:46
-
-
Save ajwerner/9e98a6ed5285408b57275de60faba872 to your computer and use it in GitHub Desktop.
diff off of 3ad8bda97d42fbfe978e4c5eb2ec3e3a291c1608
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/pkg/ccl/importccl/import_processor.go b/pkg/ccl/importccl/import_processor.go | |
index 641f7258ef..21c18d71e5 100644 | |
--- a/pkg/ccl/importccl/import_processor.go | |
+++ b/pkg/ccl/importccl/import_processor.go | |
@@ -263,7 +263,7 @@ func (cp *readImportDataProcessor) presplitTableBoundaries(ctx context.Context) | |
stickyBitEnabled := cp.flowCtx.Cfg.Settings.Version.IsActive(cluster.VersionStickyBit) | |
expirationTime := hlc.Timestamp{} | |
if stickyBitEnabled { | |
- expirationTime = cp.flowCtx.Cfg.DB.Clock().Now().Add(time.Hour.Nanoseconds(), 0) | |
+ expirationTime = hlc.Timestamp{WallTime: cp.spec.WalltimeNanos + time.Hour.Nanoseconds()} | |
} | |
for _, tbl := range cp.spec.Tables { | |
for _, span := range tbl.Desc.AllIndexSpans() { | |
diff --git a/pkg/storage/replica_command.go b/pkg/storage/replica_command.go | |
index 64311f1ede..d3da9fc787 100644 | |
--- a/pkg/storage/replica_command.go | |
+++ b/pkg/storage/replica_command.go | |
@@ -40,6 +40,7 @@ import ( | |
crdberrors "github.com/cockroachdb/errors" | |
"github.com/cockroachdb/logtags" | |
"github.com/gogo/protobuf/proto" | |
+ "github.com/kr/pretty" | |
"github.com/pkg/errors" | |
"go.etcd.io/etcd/raft" | |
"go.etcd.io/etcd/raft/raftpb" | |
@@ -54,7 +55,7 @@ func (r *Replica) AdminSplit( | |
return roachpb.AdminSplitResponse{}, roachpb.NewErrorf("cannot split range with no key provided") | |
} | |
- err := r.executeAdminCommandWithDescriptor(ctx, func(desc *roachpb.RangeDescriptor) error { | |
+ err := r.executeAdminCommandWithDescriptor(ctx, "split", func(desc *roachpb.RangeDescriptor) error { | |
var err error | |
reply, err = r.adminSplitWithDescriptor(ctx, args, desc, true /* delayable */, reason) | |
return err | |
@@ -71,7 +72,8 @@ func maybeDescriptorChangedError(desc *roachpb.RangeDescriptor, err error) (stri | |
return fmt.Sprintf("descriptor changed: expected %s != [actual] nil (range subsumed)", desc), true | |
} else if err := detail.ActualValue.GetProto(&actualDesc); err == nil && | |
desc.RangeID == actualDesc.RangeID && !desc.Equal(actualDesc) { | |
- return fmt.Sprintf("descriptor changed: [expected] %s != [actual] %s", desc, &actualDesc), true | |
+ return fmt.Sprintf("descriptor changed: [expected] %s != [actual] %s: %v", | |
+ desc, &actualDesc, pretty.Diff(desc, &actualDesc)), true | |
} | |
} | |
return "", false | |
@@ -434,7 +436,7 @@ func (r *Replica) AdminUnsplit( | |
ctx context.Context, args roachpb.AdminUnsplitRequest, reason string, | |
) (roachpb.AdminUnsplitResponse, *roachpb.Error) { | |
var reply roachpb.AdminUnsplitResponse | |
- err := r.executeAdminCommandWithDescriptor(ctx, func(desc *roachpb.RangeDescriptor) error { | |
+ err := r.executeAdminCommandWithDescriptor(ctx, "unsplit", func(desc *roachpb.RangeDescriptor) error { | |
var err error | |
reply, err = r.adminUnsplitWithDescriptor(ctx, args, desc, reason) | |
return err | |
@@ -513,12 +515,16 @@ func (r *Replica) adminUnsplitWithDescriptor( | |
// executeAdminCommandWithDescriptor wraps a read-modify-write operation for RangeDescriptors in a | |
// retry loop. | |
func (r *Replica) executeAdminCommandWithDescriptor( | |
- ctx context.Context, updateDesc func(*roachpb.RangeDescriptor) error, | |
+ ctx context.Context, op string, updateDesc func(*roachpb.RangeDescriptor) error, | |
) *roachpb.Error { | |
retryOpts := base.DefaultRetryOptions() | |
- retryOpts.MaxRetries = 10 | |
+ retryOpts.MaxRetries = 2 | |
var lastErr error | |
+ var i int | |
for retryable := retry.StartWithCtx(ctx, retryOpts); retryable.Next(); { | |
+ if i++; i > 1 { | |
+ log.Infof(ctx, "retrying %v for this %dth time after %v", op, i, lastErr) | |
+ } | |
// The replica may have been destroyed since the start of the retry loop. | |
// We need to explicitly check this condition. Having a valid lease, as we | |
// verify below, does not imply that the range still exists: even after a | |
@@ -867,6 +873,15 @@ func IsSnapshotError(err error) bool { | |
}) | |
} | |
+// IsConditionFailedError returns true iff the error indicates a | |
+// CPut failed. | |
+func IsConditionFailedError(err error) bool { | |
+ return causer.Visit(err, func(err error) bool { | |
+ _, ok := errors.Cause(err).(*roachpb.ConditionFailedError) | |
+ return ok | |
+ }) | |
+} | |
+ | |
// ChangeReplicas atomically changes the replicas that are members of a range. | |
// The change is performed in a distributed transaction and takes effect when | |
// that transaction is committed. This transaction confirms that the supplied | |
@@ -2279,12 +2294,16 @@ func (r *Replica) adminScatter( | |
// Loop until the replicate queue decides there is nothing left to do for the | |
// range. Note that we disable lease transfers until the final step as | |
// transferring the lease prevents any further action on this node. | |
+ every := log.Every(time.Minute) | |
var allowLeaseTransfer bool | |
canTransferLease := func() bool { return allowLeaseTransfer } | |
for re := retry.StartWithCtx(ctx, retryOpts); re.Next(); { | |
requeue, err := rq.processOneChange(ctx, r, canTransferLease, false /* dryRun */) | |
if err != nil { | |
- if IsSnapshotError(err) { | |
+ if IsSnapshotError(err) || IsConditionFailedError(err) { | |
+ if every.ShouldLog() { | |
+ log.Infof(ctx, "retrying admin scatter: %v", err) | |
+ } | |
continue | |
} | |
break |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment