Fix panic if too many instances are unavailable#314
Conversation
| fn next_instance( | ||
| &self, | ||
| accept_health_check: HealthCheck, | ||
| ) -> (&InstanceData, ShouldHealthCheck) { | ||
| let threads_allowed = THREADS_ALLOWED.load(Relaxed); | ||
| let index = self.slot.instance_balancer.fetch_add(1, Relaxed); | ||
| let index = index % self.slot.instances.len(); | ||
| let instance = &self.slot.instances[index]; | ||
| match (instance.should_try(), threads_allowed, accept_health_check) { | ||
| (InstanceAttempt::Failed, _, _) | ||
| | (InstanceAttempt::Retry, true, _) | ||
| | (InstanceAttempt::Retry, false, HealthCheck::Avoid) => {} | ||
| (InstanceAttempt::Working, _, _) => return (instance, ShouldHealthCheck::RunDirectly), | ||
| (InstanceAttempt::Retry, false, HealthCheck::Possible) => { | ||
| return (instance, ShouldHealthCheck::HealthCheckFirst) | ||
| } | ||
| } | ||
| for i in 0..self.slot.instances.len() - 1 { | ||
| let instance = &self.slot.instances[index + i]; | ||
|
|
||
| let (head, tail) = self.slot.instances.split_at(index); | ||
| for (i, instance) in iter::chain(tail, head).enumerate() { | ||
| match (instance.should_try(), threads_allowed, accept_health_check) { |
There was a problem hiding this comment.
The fact that it skips one instance is intentional because there is a "fast path" just above that already tests the first instance. So the fix should just be adding a modulo self.slot.instances.len().
But you're right that we could remove this fast path that brings nothing (I don't really know why it was there in the first place).
fn next_instance(
&self,
accept_health_check: HealthCheck,
) -> (&InstanceData, ShouldHealthCheck) {
let threads_allowed = THREADS_ALLOWED.load(Relaxed);
let index = self.slot.instance_balancer.load(Relaxed);
let (head, tail) = self
.slot
.instances
.split_at((index + 1) % self.slot.instances.len());
for (i, instance) in tail.iter().chain(head).enumerate() {
let i = i + 1;
match (instance.should_try(), threads_allowed, accept_health_check) {
(InstanceAttempt::Failed, _, _)
| (InstanceAttempt::Retry, true, _)
| (InstanceAttempt::Retry, false, HealthCheck::Avoid) => continue,
(InstanceAttempt::Working, _, _) => {
// This not true round-robin in case of multithreaded acces
// This is degraded mode so best-effort is attempted at best
self.slot.instance_balancer.fetch_add(i, Relaxed);
return (instance, ShouldHealthCheck::RunDirectly);
}
(InstanceAttempt::Retry, false, HealthCheck::Possible) => {
// This not true round-robin in case of multithreaded acces
// This is degraded mode so best-effort is attempted at best
self.slot.instance_balancer.fetch_add(i, Relaxed);
return (instance, ShouldHealthCheck::HealthCheckFirst);
}
}
}
// No instance is valid, return a failed instance for an attempt
let index = self.slot.instance_balancer.fetch_add(1, Relaxed);
let index = index % self.slot.instances.len();
// Instance is not valid, don't try health check, it would only slow things down
(&self.slot.instances[index], ShouldHealthCheck::RunDirectly)
}There was a problem hiding this comment.
The fact that it skips one instance is intentional because there is a "fast path" just above that already tests the first instance.
But shouldn’t it skip the first instance instead of the last?
There was a problem hiding this comment.
Yeah the logic of this function didn't make the most sense, it should be simplified into just the loop like I did in the code just above.
24d0346 to
cc8d1d7
Compare
sosthene-nitrokey
left a comment
There was a problem hiding this comment.
LGTM, I think we should try to make have #315 before the release though, if I don't have something by noon we'll disable threads on windows for now.
cc8d1d7 to
739f71c
Compare
|
Okay, I’ve dropped the release commit from this PR so that we can merge it already. |
This fixes a regression from #218: The current implementation does not calculate the index correctly when looping over the available instances. This can be fixed by replacing the index with an iterator. The old loop also skipped the last instance because ranges are exclusive, so
for i in 0..self.slot.instances.len() - 1yields values withi < self.slot.instances.len() - 1. I think this was an oversight and fixed it with this PR.Fixes: #313