Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 25 additions & 1 deletion udata/api_fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -546,12 +546,16 @@ def field_constructor(**kwargs):
parser.add_argument("q", type=str, location="args")

for filterable in filterables:
extra_kwargs = {}
if filterable.get("is_list"):
extra_kwargs["action"] = "append"
parser.add_argument(
# Use the custom label from `nested_filters` if there's one.
filterable.get("label", filterable["key"]),
type=filterable["type"],
location="args",
choices=filterable.get("choices", None),
**extra_kwargs,
)

cls.__index_parser__ = parser
Expand Down Expand Up @@ -596,9 +600,12 @@ def apply_sort_filters(base_query) -> UDataQuerySet:
if query:
base_query = filterable["query"](base_query, filter)
else:
column = filterable["column"]
if filterable.get("is_list"):
column = f"{column}__all"
base_query = base_query.filter(
**{
filterable["column"]: filter,
column: filter,
}
)

Expand Down Expand Up @@ -992,6 +999,23 @@ def compute_filter(column: str, field, info, filterable) -> dict:
if "key" not in filterable:
filterable["key"] = column

# For simple list fields (e.g. tags), allow multiple filter values via
# action="append" and use __all to match documents containing all values.
# Excluded: EmbeddedDocumentListField (filtered on a sub-field like
# badges__kind, where __all semantics don't apply).
# Excluded: ListField(ReferenceField) (e.g. Reuse.datasets,
# Dataservice.contact_points) — these are filtered by a single ObjectId
# and nobody needs multi-ID filtering (?dataset=id1&dataset=id2) today.
# Supporting it would also require updating the ObjectId validation above.
if (
isinstance(field, mongo_fields.ListField)
and not isinstance(field, mongo_fields.EmbeddedDocumentListField)
and not isinstance(
field.field, mongo_fields.ReferenceField | mongo_fields.LazyReferenceField
)
):
filterable["is_list"] = True

# If we do a filter on a embed document, get the class info
# of this document to see if there is a default filter value
embed_info = None
Expand Down
2 changes: 1 addition & 1 deletion udata/core/dataservices/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def parse_filters(dataservices, args):
phrase_query = " ".join([f'"{elem}"' for elem in args["q"].split(" ")])
dataservices = dataservices.search_text(phrase_query)
if args.get("tag"):
dataservices = dataservices.filter(tags=args["tag"])
dataservices = dataservices.filter(tags__all=args["tag"])
if args.get("organization"):
if not ObjectId.is_valid(args["organization"]):
api.abort(400, "Organization arg must be an identifier")
Expand Down
2 changes: 1 addition & 1 deletion udata/core/reuse/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def parse_filters(reuses, args):
if args.get("type"):
reuses = reuses.filter(type=args["type"])
if args.get("tag"):
reuses = reuses.filter(tags=args["tag"])
reuses = reuses.filter(tags__all=args["tag"])
if args.get("organization"):
if not ObjectId.is_valid(args["organization"]):
api.abort(400, "Organization arg must be an identifier")
Expand Down
7 changes: 2 additions & 5 deletions udata/core/site/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
from udata.core.organization.api import OrgApiParser
from udata.core.organization.csv import OrganizationCsvAdapter
from udata.core.organization.models import Organization
from udata.core.reuse.api import ReuseApiParser
from udata.core.reuse.csv import ReuseCsvAdapter
from udata.core.tags.csv import TagCsvAdapter
from udata.core.tags.models import Tag
Expand Down Expand Up @@ -135,13 +134,11 @@ def get(self):
@api.route("/site/reuses.csv", endpoint="site_reuses_csv")
class SiteReusesCsv(API):
def get(self):
params = multi_to_dict(request.args)
# redirect to EXPORT_CSV dataset if feature is enabled and no filter is set
exported_models = current_app.config.get("EXPORT_CSV_MODELS", [])
if not params and "reuse" in exported_models:
if not request.args and "reuse" in exported_models:
return redirect(get_export_url("reuse"))
params["facets"] = False
reuses = ReuseApiParser.parse_filters(get_csv_queryset(Reuse), params)
reuses = Reuse.apply_sort_filters(get_csv_queryset(Reuse))
return csv.stream(ReuseCsvAdapter(reuses), "reuses")


Expand Down
9 changes: 8 additions & 1 deletion udata/tests/api/test_dataservices_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,13 +83,20 @@ def test_dataservices_api_list_with_filters(self):
response = self.get(url_for("api.dataservices", organization_badge="bad-badge"))
assert400(response)

# filter on tag
# filter on single tag
tag_dataservice = DataserviceFactory(tags=["my-tag", "other"])
response = self.get(url_for("api.dataservices", tag="my-tag"))
assert200(response)
assert len(response.json["data"]) == 1
assert response.json["data"][0]["id"] == str(tag_dataservice.id)

# filter on multiple tags should exclude partial matches
DataserviceFactory(tags=["my-tag"])
response = self.get("/api/1/dataservices/?tag=my-tag&tag=other")
assert200(response)
assert len(response.json["data"]) == 1
assert response.json["data"][0]["id"] == str(tag_dataservice.id)

# filter on topic
topic_dataservice = DataserviceFactory()
topic = TopicFactory()
Expand Down
9 changes: 8 additions & 1 deletion udata/tests/api/test_reuses_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,12 +81,19 @@ def test_reuse_api_list_with_filters(self):
topic_reuse = ReuseFactory(topic="transport_and_mobility", type="api")
type_reuse = ReuseFactory(topic="health", type="application")

# filter on tag
# filter on single tag
response = self.get(url_for("api.reuses", tag="my-tag"))
assert200(response)
assert len(response.json["data"]) == 1
assert response.json["data"][0]["id"] == str(tag_reuse.id)

# filter on multiple tags should exclude partial matches
ReuseFactory(tags=["my-tag"], topic="health", type="api")
response = self.get("/api/1/reuses/?tag=my-tag&tag=other")
assert200(response)
assert len(response.json["data"]) == 1
assert response.json["data"][0]["id"] == str(tag_reuse.id)

# filter on featured
response = self.get(url_for("api.reuses", featured="true"))
assert200(response)
Expand Down
18 changes: 18 additions & 0 deletions udata/tests/apiv2/test_dataservices.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,21 @@ def test_dataservice_search_with_model_query_param(self):

response = self.get("/api/2/dataservices/search/?model=malicious")
self.assert200(response)

def test_dataservice_search_single_tag(self):
tag_dataservice = DataserviceFactory(tags=["my-tag", "other"])
DataserviceFactory(tags=["unrelated"])

response = self.get("/api/2/dataservices/search/?tag=my-tag")
self.assert200(response)
assert len(response.json["data"]) == 1
assert response.json["data"][0]["id"] == str(tag_dataservice.id)

def test_dataservice_search_multiple_tags(self):
tag_dataservice = DataserviceFactory(tags=["my-tag", "other"])
DataserviceFactory(tags=["my-tag"])

response = self.get("/api/2/dataservices/search/?tag=my-tag&tag=other")
self.assert200(response)
assert len(response.json["data"]) == 1
assert response.json["data"][0]["id"] == str(tag_dataservice.id)
22 changes: 20 additions & 2 deletions udata/tests/apiv2/test_reuses.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,29 @@
from udata.core.reuse.factories import ReuseFactory
from udata.core.reuse.factories import VisibleReuseFactory
from udata.tests.api import APITestCase


class ReuseSearchAPIV2Test(APITestCase):
def test_reuse_search_with_model_query_param(self):
"""Searching reuses with 'model' as query param should not crash."""
ReuseFactory.create_batch(3)
VisibleReuseFactory.create_batch(3)

response = self.get("/api/2/reuses/search/?model=malicious")
self.assert200(response)

def test_reuse_search_single_tag(self):
tag_reuse = VisibleReuseFactory(tags=["my-tag", "other"])
VisibleReuseFactory(tags=["unrelated"])

response = self.get("/api/2/reuses/search/?tag=my-tag")
self.assert200(response)
assert len(response.json["data"]) == 1
assert response.json["data"][0]["id"] == str(tag_reuse.id)

def test_reuse_search_multiple_tags(self):
tag_reuse = VisibleReuseFactory(tags=["my-tag", "other"])
VisibleReuseFactory(tags=["my-tag"])

response = self.get("/api/2/reuses/search/?tag=my-tag&tag=other")
self.assert200(response)
assert len(response.json["data"]) == 1
assert response.json["data"][0]["id"] == str(tag_reuse.id)
99 changes: 99 additions & 0 deletions udata/tests/site/test_site_csv_exports.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,24 @@ def test_datasets_csv_with_filters(self):
self.assertNotIn(str(dataset.id), ids)
self.assertNotIn(str(hidden_dataset.id), ids)

def test_datasets_csv_with_multiple_tags_filter(self):
self.app.config["EXPORT_CSV_MODELS"] = []
both_tags = DatasetFactory(resources=[ResourceFactory()], tags=["tag-a", "tag-b"])
only_a = DatasetFactory(resources=[ResourceFactory()], tags=["tag-a"])

response = self.get("/api/1/site/datasets.csv?tag=tag-a&tag=tag-b")

self.assert200(response)
csvfile = StringIO(response.data.decode("utf8"))
reader = csv.get_reader(csvfile)
next(reader)
rows = list(reader)
ids = [row[0] for row in rows]

self.assertEqual(len(rows), 1)
self.assertIn(str(both_tags.id), ids)
self.assertNotIn(str(only_a.id), ids)

def test_datasets_csv_with_badge_filter(self):
self.app.config["EXPORT_CSV_MODELS"] = []
dataset_with_badge = DatasetFactory(resources=[ResourceFactory()])
Expand Down Expand Up @@ -209,6 +227,24 @@ def test_resources_csv_with_filters(self):
for resource in dataset.resources:
self.assertIn((str(dataset.id), str(resource.id)), ids)

def test_resources_csv_with_multiple_tags_filter(self):
self.app.config["EXPORT_CSV_MODELS"] = []
both_tags = DatasetFactory(resources=[ResourceFactory()], tags=["tag-a", "tag-b"])
only_a = DatasetFactory(resources=[ResourceFactory()], tags=["tag-a"])

response = self.get("/api/1/site/resources.csv?tag=tag-a&tag=tag-b")

self.assert200(response)
csvfile = StringIO(response.data.decode("utf8"))
reader = csv.get_reader(csvfile)
next(reader)
rows = list(reader)
ids = [row[0] for row in rows]

self.assertEqual(len(rows), 1)
self.assertIn(str(both_tags.id), ids)
self.assertNotIn(str(only_a.id), ids)

def test_organizations_csv(self):
self.app.config["EXPORT_CSV_MODELS"] = []
orgs = [OrganizationFactory() for _ in range(5)]
Expand Down Expand Up @@ -334,6 +370,47 @@ def test_reuses_csv_with_filters(self):
self.assertNotIn(str(reuse.id), ids)
self.assertNotIn(str(hidden_reuse.id), ids)

def test_reuses_csv_with_dataset_filter(self):
self.app.config["EXPORT_CSV_MODELS"] = []
dataset = DatasetFactory()
matching_reuse = ReuseFactory(datasets=[dataset])
other_reuse = ReuseFactory(datasets=[DatasetFactory()])

response = self.get(f"/api/1/site/reuses.csv?dataset={dataset.id}")

self.assert200(response)
csvfile = StringIO(response.data.decode("utf8"))
reader = csv.get_reader(csvfile)
next(reader)
rows = list(reader)
ids = [row[0] for row in rows]

self.assertEqual(len(rows), 1)
self.assertIn(str(matching_reuse.id), ids)
self.assertNotIn(str(other_reuse.id), ids)

def test_reuses_csv_with_multiple_tags_filter(self):
self.app.config["EXPORT_CSV_MODELS"] = []
both_tags = ReuseFactory(datasets=[DatasetFactory()], tags=["tag-a", "tag-b"])
both_tags_and_more = ReuseFactory(
datasets=[DatasetFactory()], tags=["tag-a", "tag-b", "tag-c"]
)
only_a = ReuseFactory(datasets=[DatasetFactory()], tags=["tag-a"])

response = self.get("/api/1/site/reuses.csv?tag=tag-a&tag=tag-b")

self.assert200(response)
csvfile = StringIO(response.data.decode("utf8"))
reader = csv.get_reader(csvfile)
next(reader)
rows = list(reader)
ids = [row[0] for row in rows]

self.assertEqual(len(rows), 2)
self.assertIn(str(both_tags.id), ids)
self.assertIn(str(both_tags_and_more.id), ids)
self.assertNotIn(str(only_a.id), ids)

def test_dataservices_csv(self):
self.app.config["EXPORT_CSV_MODELS"] = []
dataservices = [DataserviceFactory(datasets=[DatasetFactory()]) for _ in range(5)]
Expand Down Expand Up @@ -417,6 +494,28 @@ def test_dataservices_csv_with_filters(self):
for dataservice in dataservices:
self.assertNotIn(str(dataservice.id), ids)

def test_dataservices_csv_with_multiple_tags_filter(self):
self.app.config["EXPORT_CSV_MODELS"] = []
both_tags = DataserviceFactory(datasets=[DatasetFactory()], tags=["tag-a", "tag-b"])
both_tags_and_more = DataserviceFactory(
datasets=[DatasetFactory()], tags=["tag-a", "tag-b", "tag-c"]
)
only_a = DataserviceFactory(datasets=[DatasetFactory()], tags=["tag-a"])

response = self.get("/api/1/site/dataservices.csv?tag=tag-a&tag=tag-b")

self.assert200(response)
csvfile = StringIO(response.data.decode("utf8"))
reader = csv.get_reader(csvfile)
next(reader)
rows = list(reader)
ids = [row[0] for row in rows]

self.assertEqual(len(rows), 2)
self.assertIn(str(both_tags.id), ids)
self.assertIn(str(both_tags_and_more.id), ids)
self.assertNotIn(str(only_a.id), ids)

def test_harvest_csv(self):
self.app.config["EXPORT_CSV_MODELS"] = []
organization = OrganizationFactory()
Expand Down
13 changes: 13 additions & 0 deletions udata/tests/site/test_site_rdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,19 @@ def test_catalog_rdf_filter(self, client):
datasets = list(graph.subjects(RDF.type, DCAT.Dataset))
assert len(datasets) == 5

def test_catalog_rdf_filter_multiple_tags(self, client):
DatasetFactory.create_batch(2, tags=["tag-a", "tag-b"])
DatasetFactory.create_batch(3, tags=["tag-a"])

url = url_for("api.site_rdf_catalog_format", _format="xml", tag=["tag-a", "tag-b"])

response = client.get(url, headers={"Accept": "application/xml"})
assert200(response)

graph = Graph().parse(data=response.data, format="xml")
datasets = list(graph.subjects(RDF.type, DCAT.Dataset))
assert len(datasets) == 2

def test_catalog_rdf_dataservices(self, client):
dataset_a = DatasetFactory.create()
dataset_b = DatasetFactory.create()
Expand Down