Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import com.slack.astra.logstore.search.SearchResultAggregator;
import com.slack.astra.logstore.search.SearchResultAggregatorImpl;
import com.slack.astra.metadata.schema.FieldType;
import com.slack.astra.util.RuntimeHalterImpl;
import java.time.Duration;
import java.time.Instant;
import java.util.ArrayList;
Expand Down Expand Up @@ -130,8 +131,13 @@ public SearchResult<T> query(SearchQuery query, Duration queryTimeout) {
// represents a parse failure ) and instead of returning an empty
// result we throw back an error to the user
throw new IllegalArgumentException(throwable);
} else if (throwable instanceof VirtualMachineError) {
// we can't recover from these, so don't.
LOG.error("Fatal Chunk Query Exception: ", throwable);
new RuntimeHalterImpl().handleFatal(throwable);
} else {
LOG.warn("Chunk Query Exception", throwable);
}
LOG.warn("Chunk Query Exception", throwable);
}
// else UNAVAILABLE (ie, timedout), return 0 snapshots
return (SearchResult<T>) SearchResult.error();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import java.io.IOException;
import java.time.Duration;
import java.util.NoSuchElementException;
import java.util.Objects;
import java.util.Optional;
import java.util.UUID;
import java.util.concurrent.ConcurrentHashMap;
Expand All @@ -31,6 +32,7 @@
import org.apache.lucene.index.SnapshotDeletionPolicy;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.MMapDirectory;
import org.opensearch.common.lucene.index.OpenSearchDirectoryReader;
Expand Down Expand Up @@ -156,7 +158,7 @@ public LuceneIndexStoreImpl(
try {
refresh();
} catch (Exception e) {
LOG.error("Error running scheduled commit", e);
LOG.error("Error running scheduled refresh", e);
}
},
config.refreshDuration.toMillis(),
Expand Down Expand Up @@ -272,6 +274,22 @@ private void handleNonFatal(Throwable ex) {
LOG.error(String.format("Exception %s processing", ex));
}

private void handleMaybeFatal(AlreadyClosedException e) {
// If we hit an an AlreadyClosedException, it may have a tragic exception associated with it.
// If it does, then invoke the runtime halter.
Boolean hasTragicException;
indexWriterLock.lock();
try {
hasTragicException =
indexWriter.map(IndexWriter::getTragicException).map(Objects::nonNull).orElse(false);
} finally {
indexWriterLock.unlock();
}
if (hasTragicException) {
new RuntimeHalterImpl().handleFatal(e);
}
}

@Override
public void addMessage(Trace.Span message) {
try {
Expand Down Expand Up @@ -302,6 +320,9 @@ public void commit() {
LOG.debug("Indexer finished commit for: " + indexDirectory.getDirectory().toString());
} catch (IOException e) {
handleNonFatal(e);
} catch (AlreadyClosedException e) {
// already closed here may mean there was a tragic exception that is unrecoverable
handleMaybeFatal(e);
}
});
}
Expand All @@ -316,6 +337,9 @@ public void refresh() {
LOG.debug("Indexer finished refresh for: " + indexDirectory.getDirectory().toString());
} catch (IOException e) {
handleNonFatal(e);
} catch (AlreadyClosedException e) {
// already closed here may mean there was a tragic exception that is unrecoverable
handleMaybeFatal(e);
}
});
}
Expand Down