-
Notifications
You must be signed in to change notification settings - Fork 503
feat(caching): add negative entry caching to stat cache #4678
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
e823b96
7a5f03e
8dc2447
e1e862c
d7a9b1c
b131ba2
fc25812
a962ac3
90497a5
da42774
0c3b882
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -682,6 +682,8 @@ type MetadataCacheConfig struct { | |
|
|
||
| EnableMetadataPrefetch bool `yaml:"enable-metadata-prefetch"` | ||
|
|
||
| EnableNonexistentEntryCaching bool `yaml:"enable-nonexistent-entry-caching"` | ||
|
|
||
| EnableNonexistentTypeCache bool `yaml:"enable-nonexistent-type-cache"` | ||
|
|
||
| ExperimentalMetadataPrefetchOnMount string `yaml:"experimental-metadata-prefetch-on-mount"` | ||
|
|
@@ -999,6 +1001,8 @@ func BuildFlagSet(flagSet *pflag.FlagSet) error { | |
| return err | ||
| } | ||
|
|
||
| flagSet.BoolP("enable-nonexistent-entry-caching", "", false, "Cache paths confirmed to not exist (404s) in the unified stat cache.") | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we already have a negative-stat-cache-ttl. why not put this feature also under that flag. Now we have one flag for ttl and another for enabling/disabing the feature
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should moving to use "negative-ttl-secs" alone be a good measure without needing to maintain two separate flags? I am in favor of the decision. I chose entry-caching to align with the terminology of 'entries' in the stat cache (which stores directory entries). |
||
|
|
||
| flagSet.BoolP("enable-nonexistent-type-cache", "", false, "Once set, if an inode is not found in GCS, a type cache entry with type NonexistentType will be created. This also means new file/dir created might not be seen. For example, if this flag is set, and metadata-cache-ttl-secs is set, then if we create the same file/node in the meantime using the same mount, since we are not refreshing the cache, it will still return nil. This flag has been deprecated in favour of a single unified flag metadata-cache-negative-ttl-secs.") | ||
|
|
||
| flagSet.BoolP("enable-rapid-appends", "", true, "Enables support for appends to unfinalized object using streaming writes") | ||
|
|
@@ -1608,6 +1612,10 @@ func BindFlags(v *viper.Viper, flagSet *pflag.FlagSet) error { | |
| return err | ||
| } | ||
|
|
||
| if err := v.BindPFlag("metadata-cache.enable-nonexistent-entry-caching", flagSet.Lookup("enable-nonexistent-entry-caching")); err != nil { | ||
| return err | ||
| } | ||
|
|
||
| if err := v.BindPFlag("metadata-cache.enable-nonexistent-type-cache", flagSet.Lookup("enable-nonexistent-type-cache")); err != nil { | ||
| return err | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -686,6 +686,7 @@ func (d *dirInode) LookUpChild(ctx context.Context, name string) (*Core, error) | |
| // 1. Try Directory FIRST (since it's the preferred return type) | ||
| var dirResult *Core | ||
| var err error | ||
| var dirCacheMiss bool | ||
| if d.Bucket().BucketType().Hierarchical { | ||
| dirResult, err = findExplicitFolder(ctx, d.Bucket(), NewDirName(d.Name(), name), true) | ||
| } else { | ||
|
|
@@ -696,19 +697,27 @@ func (d *dirInode) LookUpChild(ctx context.Context, name string) (*Core, error) | |
| if dirResult != nil { | ||
| return dirResult, nil | ||
| } | ||
| // If we hit a real error (not a cache miss), exit early. | ||
| if err != nil && !errors.As(err, &cacheMissErr) { | ||
| if errors.As(err, &cacheMissErr) { | ||
| dirCacheMiss = true | ||
| } else if err != nil { | ||
| return nil, err | ||
| } | ||
|
|
||
| // 2. Try File ONLY if directory wasn't found | ||
| var fileCacheMiss bool | ||
| fileResult, err := findExplicitInode(ctx, d.Bucket(), NewFileName(d.Name(), name), true) | ||
| if err != nil && !errors.As(err, &cacheMissErr) { | ||
| if fileResult != nil { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i didnt follow. why these changes are required
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| return fileResult, nil | ||
| } | ||
| if errors.As(err, &cacheMissErr) { | ||
| fileCacheMiss = true | ||
| } else if err != nil { | ||
| return nil, err | ||
| } | ||
|
|
||
| if fileResult != nil { | ||
| return fileResult, nil | ||
| // 3. Short-circuit on confirmed negative hits for BOTH dir and file | ||
| if !dirCacheMiss && !fileCacheMiss { | ||
| return nil, nil // Definitive ENOENT | ||
| } | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -51,15 +51,17 @@ func NewFastStatBucket( | |
| negativeCacheTTL time.Duration, | ||
| isTypeCacheDeprecated bool, | ||
| implicitDir bool, | ||
| enableNonexistentEntryCaching bool, | ||
| ) (b gcs.Bucket) { | ||
| fsb := &fastStatBucket{ | ||
| cache: cache, | ||
| clock: clock, | ||
| wrapped: wrapped, | ||
| primaryCacheTTL: primaryCacheTTL, | ||
| negativeCacheTTL: negativeCacheTTL, | ||
| isTypeCacheDeprecated: isTypeCacheDeprecated, | ||
| implicitDir: implicitDir, | ||
| cache: cache, | ||
| clock: clock, | ||
| wrapped: wrapped, | ||
| primaryCacheTTL: primaryCacheTTL, | ||
| negativeCacheTTL: negativeCacheTTL, | ||
| isTypeCacheDeprecated: isTypeCacheDeprecated, | ||
| implicitDir: implicitDir, | ||
| enableNonexistentEntryCaching: enableNonexistentEntryCaching, | ||
| } | ||
|
|
||
| b = fsb | ||
|
|
@@ -92,6 +94,8 @@ type fastStatBucket struct { | |
| isTypeCacheDeprecated bool | ||
|
|
||
| implicitDir bool | ||
|
|
||
| enableNonexistentEntryCaching bool | ||
| } | ||
|
|
||
| //////////////////////////////////////////////////////////////////////// | ||
|
|
@@ -124,6 +128,11 @@ func (b *fastStatBucket) insertListing(ctx context.Context, listing *gcs.Listing | |
| return | ||
| } | ||
|
|
||
| if len(listing.MinObjects) == 0 && len(listing.CollapsedRuns) == 0 && b.enableNonexistentEntryCaching && strings.HasSuffix(dirName, "/") { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add a comment what is this check doing. when does minObjects and collapsed Runs are empty.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| b.cache.AddNegativeEntry(dirName, b.clock.Now().Add(b.negativeCacheTTL)) | ||
| return | ||
| } | ||
|
|
||
| expiration := b.clock.Now().Add(b.primaryCacheTTL) | ||
|
|
||
| // 1. Parent Directory Inference (Implicit Check) | ||
|
|
@@ -244,6 +253,11 @@ func (b *fastStatBucket) addNegativeEntry(name string) { | |
| b.mu.Lock() | ||
| defer b.mu.Unlock() | ||
|
|
||
| if !b.enableNonexistentEntryCaching { | ||
| b.cache.Erase(name) | ||
| return | ||
| } | ||
|
|
||
| expiration := b.clock.Now().Add(b.negativeCacheTTL) | ||
| b.cache.AddNegativeEntry(name, expiration) | ||
| } | ||
|
|
@@ -253,6 +267,11 @@ func (b *fastStatBucket) addNegativeEntryForFolder(name string) { | |
| b.mu.Lock() | ||
| defer b.mu.Unlock() | ||
|
|
||
| if !b.enableNonexistentEntryCaching { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why did you put this check for folders?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The fastStatBucket handles caching for both files and folders. addNegativeEntryForFolder is called when a folder lookup fails. If |
||
| b.cache.Erase(name) | ||
| return | ||
| } | ||
|
|
||
| expiration := b.clock.Now().Add(b.negativeCacheTTL) | ||
| b.cache.AddNegativeEntryForFolder(name, expiration) | ||
| } | ||
|
|
@@ -271,6 +290,9 @@ func (b *fastStatBucket) lookUp(name string) (hit bool, m *gcs.MinObject) { | |
| defer b.mu.Unlock() | ||
|
|
||
| hit, m = b.cache.LookUp(name, b.clock.Now()) | ||
| if hit && m == nil && !b.enableNonexistentEntryCaching { | ||
| return false, nil | ||
| } | ||
| return | ||
| } | ||
|
|
||
|
|
@@ -279,6 +301,9 @@ func (b *fastStatBucket) lookUpFolder(name string) (bool, *gcs.Folder) { | |
| defer b.mu.Unlock() | ||
|
|
||
| hit, f := b.cache.LookUpFolder(name, b.clock.Now()) | ||
| if hit && f == nil && !b.enableNonexistentEntryCaching { | ||
| return false, nil | ||
| } | ||
| return hit, f | ||
| } | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: what does entry-caching mean?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It was implying "creating an entry".