Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 104 additions & 0 deletions packages/docs/src/content/docs/open-source/usage/search/filters.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,110 @@ You can use the `filters` interface to filter the search results.
Filters are available for numeric, boolean, string, enum, and geopoint properties.
Depending on the type of the property, you can use different operators.

## Logical Operators

With Orama you can combine multiple filters using logical operators like `and`, `or`, and `not`.

- `and`: this operator accepts an array of filters. All conditions must be true.
- `or`: this operator accepts an array of filters. At least one condition must be true.
- `not`: this operator accepts a single filter. The condition must be false.

All operators can be nested.

### Examples

This filter will return all documents that have the category `"electronics"`:

```javascript copy
const results = search(db, {
term: "phone",
where: {
category: "electronics"
}
})
```

This filter will return all documents that have the category `"electronics"` and the price less than `100`:

```javascript copy
const results = search(db, {
term: "phone",
where: {
category: "electronics",
price: { lt: 100 }
}
})
```

With the above filters, you can only filter applying `and` operator implicitly. You can also use the `and` operator explicitly:

```javascript copy
const results = search(db, {
term: "phone",
where: {
and: [
{ category: "electronics" },
{ price: { lt: 100 } }
]
}
})
```

This filter will return all documents that have:
- (category = "electronics") OR (price < 100).

```javascript copy
const results = search(db, {
term: "phone",
where: {
or: [
{ category: "electronics" },
{ price: { lt: 100 } }
]
}
})
```

This filter will return all documents that have:
- (price < 100) AND NOT (category = "electronics").

```javascript copy
const results = search(db, {
term: "phone",
where: {
and: [
{ price: { lt: 100 } },
{ not: { category: "electronics" } }
]
}
})
```

This filter will return all documents that have:
- ( (category = "electronics") AND (price < 100) ) OR ( NOT (category = "electronics") AND (price < 100) )

```javascript copy
const results = search(db, {
term: "phone",
where: {
or: [
{
and: [
{ category: "electronics" },
{ price: { lt: 100 } }
]
},
{
and: [
{ not: { category: "electronics" } },
{ price: { lt: 100 } }
]
}
]
}
})
```

## String operators

On string properties it performs an exact matching on tokens so it is advised to disable stemming for the properties
Expand Down
42 changes: 40 additions & 2 deletions packages/orama/src/components/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ import { RadixTree } from '../trees/radix.js'
import { BKDTree } from '../trees/bkd.js'
import { BoolNode } from '../trees/bool.js'

import { convertDistanceToMeters, setIntersection, setUnion } from '../utils.js'
import { convertDistanceToMeters, setIntersection, setUnion, setDifference } from '../utils.js'
import { BM25 } from './algorithms.js'
import { getInnerType, getVectorSize, isArrayType, isVectorType } from './defaults.js'
import {
Expand Down Expand Up @@ -424,7 +424,7 @@ export function calculateResultScores(
const oramaFrequencies = index.frequencies[prop]

// oramaOccurrences[term] can be undefined, 0, string, or { [k: string]: number }
const termOccurrences = typeof oramaOccurrences[term] === 'number' ? oramaOccurrences[term] ?? 0 : 0
const termOccurrences = typeof oramaOccurrences[term] === 'number' ? (oramaOccurrences[term] ?? 0) : 0

// Calculate TF-IDF value for each term, in each document, for each index.
const documentIDsLength = documentIDs.length
Expand Down Expand Up @@ -596,6 +596,44 @@ export function searchByWhereClause<T extends AnyOrama>(
filters: Partial<WhereCondition<T['schema']>>,
language: string | undefined
): Set<InternalDocumentID> {
// Handle logical operators
if ('and' in filters && filters.and && Array.isArray(filters.and)) {
const andFilters = filters.and
if (andFilters.length === 0) {
return new Set()
}

const results = andFilters.map((filter) => searchByWhereClause(index, tokenizer, filter, language))
return setIntersection(...results)
}

if ('or' in filters && filters.or && Array.isArray(filters.or)) {
const orFilters = filters.or
if (orFilters.length === 0) {
return new Set()
}

const results = orFilters.map((filter) => searchByWhereClause(index, tokenizer, filter, language))
// Use reduce to union all sets
return results.reduce((acc, set) => setUnion(acc, set), new Set<InternalDocumentID>())
}

if ('not' in filters && filters.not) {
const notFilter = filters.not
// Get all document IDs from the internal document store
const allDocs = new Set<InternalDocumentID>()

// Get all document IDs from the internal document store
const docsStore = index.sharedInternalDocumentStore
for (let i = 1; i <= docsStore.internalIdToId.length; i++) {
allDocs.add(i)
}

const notResult = searchByWhereClause(index, tokenizer, notFilter, language)
return setDifference(allDocs, notResult)
}

// Handle regular property filters (existing logic)
const filterKeys = Object.keys(filters)

const filtersMap: Record<string, Set<InternalDocumentID>> = filterKeys.reduce(
Expand Down
3 changes: 2 additions & 1 deletion packages/orama/src/internals.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,6 @@ export {
safeArrayPush,
setIntersection,
setUnion,
setDifference
} from './utils.js'
export { normalizeToken } from './components/tokenizer/index.js'
export { normalizeToken } from './components/tokenizer/index.js'
16 changes: 13 additions & 3 deletions packages/orama/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -240,9 +240,19 @@ export type Operator<Value> = Value extends 'string'
: Value extends 'geopoint'
? GeosearchOperation
: never
export type WhereCondition<TSchema> = {
[key in keyof TSchema]?: Operator<TSchema[key]>
}
export type WhereCondition<TSchema> =
| {
[key in keyof TSchema]?: Operator<TSchema[key]>
}
| {
and?: WhereCondition<TSchema>[]
}
| {
or?: WhereCondition<TSchema>[]
}
| {
not?: WhereCondition<TSchema>
}

/**
* A custom sorter function item as [id, score, document].
Expand Down
83 changes: 47 additions & 36 deletions packages/orama/src/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -339,116 +339,126 @@ export function isPromise(obj: any): obj is Promise<unknown> {
}

/**
* Checks if the provided input is an async function or if the input is an array
* Checks if the provided input is an async function or if the input is an array
* containing at least one async function.
*
* @param func - A single function or an array of functions to check.
* Non-function values are ignored.
* @returns `true` if the input is an async function or an array containing at least
* @returns `true` if the input is an async function or an array containing at least
* one async function, otherwise `false`.
*/
export function isAsyncFunction(func: any): boolean {
if (Array.isArray(func)) {
return func.some(item => isAsyncFunction(item));
return func.some((item) => isAsyncFunction(item))
}

return func?.constructor?.name === 'AsyncFunction'
}

const withIntersection = 'intersection' in (new Set());
const withIntersection = 'intersection' in new Set()

export function setIntersection<V>(...sets: Set<V>[]): Set<V> {
// Fast path 1
if (sets.length === 0) {
return new Set();
return new Set()
}
// Fast path 2
if (sets.length === 1) {
return sets[0];
return sets[0]
}
// Fast path 3
if (sets.length === 2) {
const set1 = sets[0];
const set2 = sets[1];
const set1 = sets[0]
const set2 = sets[1]

if (withIntersection) {
return set1.intersection(set2);
return set1.intersection(set2)
}
const result = new Set<V>();
const base = set1.size < set2.size ? set1 : set2;
const other = base === set1 ? set2 : set1;
const result = new Set<V>()
const base = set1.size < set2.size ? set1 : set2
const other = base === set1 ? set2 : set1
for (const value of base) {
if (other.has(value)) {
result.add(value);
result.add(value)
}
}
return result;
return result
}

// Slow path
// Find the smallest set
const min = {
index: 0,
size: sets[0].size,
size: sets[0].size
}
for (let i = 1; i < sets.length; i++) {
if (sets[i].size < min.size) {
min.index = i;
min.size = sets[i].size;
min.index = i
min.size = sets[i].size
}
}

if (withIntersection) {
let base = sets[min.index];
let base = sets[min.index]
for (let i = 0; i < sets.length; i++) {
if (i === min.index) {
continue;
continue
}
base = base.intersection(sets[i]);
base = base.intersection(sets[i])
}

return base;
return base
}

// manual implementation:
// intersect all sets with the smallest set
const base = sets[min.index];
const base = sets[min.index]
for (let i = 0; i < sets.length; i++) {
if (i === min.index) {
continue;
continue
}
const other = sets[i];
const other = sets[i]
for (const value of base) {
if (!other.has(value)) {
base.delete(value);
base.delete(value)
}
}
}

return base;
return base
}

const withUnion = 'union' in (new Set());
const withUnion = 'union' in new Set()
export function setUnion<V>(set1: Set<V> | undefined, set2: Set<V>) {
if (withUnion) {
if (set1) {
return set1.union(set2);
return set1.union(set2)
}
return set2;
return set2
}

if (!set1) {
return new Set(set2);
return new Set(set2)
}
return new Set([...set1, ...set2]);
return new Set([...set1, ...set2])
}

export function setDifference<V>(set1: Set<V>, set2: Set<V>): Set<V> {
const result = new Set<V>()
for (const value of set1) {
if (!set2.has(value)) {
result.add(value)
}
}
return result
}

// This code is taken from https://github.com/davidmarkclements/atomic-sleep, MIT licensed at the time of commit b8149d3ca276c84a54fa8fa1478f9cc79aabc15a.
// All credits go to the original author (David Mark Clements, https://github.com/davidmarkclements).
export function sleep(ms: number) {
if (typeof SharedArrayBuffer !== 'undefined' && typeof Atomics !== 'undefined') {
const nil = new Int32Array(new SharedArrayBuffer(4))
const valid = ms > 0 && ms < Infinity
const valid = ms > 0 && ms < Infinity
if (valid === false) {
if (typeof ms !== 'number' && typeof ms !== 'bigint') {
throw TypeError('sleep: ms must be a number')
Expand All @@ -457,16 +467,17 @@ export function sleep(ms: number) {
}

Atomics.wait(nil, 0, 0, Number(ms))

} else {
const valid = ms > 0 && ms < Infinity
const valid = ms > 0 && ms < Infinity
if (valid === false) {
if (typeof ms !== 'number' && typeof ms !== 'bigint') {
throw TypeError('sleep: ms must be a number')
}
throw RangeError('sleep: ms must be a number that is greater than 0 but less than Infinity')
}
const target = Date.now() + Number(ms)
while (target > Date.now()){ /* empty */ }
while (target > Date.now()) {
/* empty */
}
}
}
}
Loading