Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions udata/core/dataservices/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
from udata.core.metrics.helpers import get_stock_metrics
from udata.core.metrics.models import WithMetrics
from udata.core.owned import Owned, OwnedQuerySet
from udata.core.spam.models import SpamMixin
from udata.i18n import lazy_gettext as _
from udata.mongo.document import UDataDocument as Document
from udata.mongo.extras_fields import ExtrasField
Expand Down Expand Up @@ -187,6 +188,7 @@ def filter_by_reuse(base_query, filter_value):
)
class Dataservice(
Auditable,
SpamMixin,
WithMetrics,
WithAccessType,
DataserviceBadgeMixin,
Expand All @@ -212,6 +214,9 @@ class Dataservice(

verbose_name = _("dataservice")

def fields_to_check_for_spam(self):
return {"title": self.title, "description": self.description}

def __str__(self):
return self.title or ""

Expand Down Expand Up @@ -389,3 +394,4 @@ def count_followers(self):


post_save.connect(Dataservice.post_save, sender=Dataservice)
post_save.connect(SpamMixin.post_save, sender=Dataservice)
6 changes: 6 additions & 0 deletions udata/core/dataset/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
from udata.core.metrics.helpers import get_stock_metrics
from udata.core.metrics.models import WithMetrics
from udata.core.owned import Owned, OwnedQuerySet
from udata.core.spam.models import SpamMixin
from udata.core.spatial.api_fields import spatial_coverage_fields
from udata.core.spatial.models import SpatialCoverage
from udata.frontend.markdown import mdstrip
Expand Down Expand Up @@ -565,6 +566,7 @@ class DatasetBadgeMixin(BadgeMixin):
@generate_fields()
class Dataset(
Auditable,
SpamMixin,
WithMetrics,
WithAccessType,
DatasetBadgeMixin,
Expand Down Expand Up @@ -692,6 +694,9 @@ def __str__(self):

missing_resources = False

def fields_to_check_for_spam(self):
return {"title": self.title, "description": self.description}

@cached_property
def resources_len(self):
# :ResourcesLengthProperty
Expand Down Expand Up @@ -1142,6 +1147,7 @@ def count_followers(self):
pre_init.connect(Dataset.pre_init, sender=Dataset)
pre_save.connect(Dataset.pre_save, sender=Dataset)
post_save.connect(Dataset.post_save, sender=Dataset)
post_save.connect(SpamMixin.post_save, sender=Dataset)


class CommunityResource(ResourceMixin, WithMetrics, Owned, Document[OwnedQuerySet]):
Expand Down
6 changes: 6 additions & 0 deletions udata/core/organization/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from udata.core.linkable import Linkable
from udata.core.metrics.helpers import get_stock_metrics
from udata.core.metrics.models import WithMetrics
from udata.core.spam.models import SpamMixin
from udata.core.storages import avatars, default_image_basename
from udata.frontend.markdown import mdstrip
from udata.i18n import lazy_gettext as _
Expand Down Expand Up @@ -167,6 +168,7 @@ class OrganizationBadgeMixin(BadgeMixin):
@generate_fields()
class Organization(
Auditable,
SpamMixin,
WithMetrics,
OrganizationBadgeMixin,
Linkable,
Expand Down Expand Up @@ -271,6 +273,9 @@ def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.compute_aggregate_metrics = True

def fields_to_check_for_spam(self):
return {"name": self.name, "description": self.description}

@classmethod
def pre_save(cls, sender, document, **kwargs):
cls.before_save.send(document)
Expand Down Expand Up @@ -450,3 +455,4 @@ def count_followers(self):

pre_save.connect(Organization.pre_save, sender=Organization)
post_save.connect(Organization.post_save, sender=Organization)
post_save.connect(SpamMixin.post_save, sender=Organization)
3 changes: 2 additions & 1 deletion udata/core/reports/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from udata.core.discussions.models import Discussion
from udata.core.organization.models import Organization
from udata.core.reuse.models import Reuse
from udata.core.user.models import User
from udata.i18n import lazy_gettext as _

REASON_PERSONAL_DATA = "personal_data"
Expand Down Expand Up @@ -31,4 +32,4 @@ def reports_reasons_translations() -> list:


REPORT_REASONS_CHOICES: list[str] = [item["value"] for item in reports_reasons_translations()]
REPORTABLE_MODELS = [Dataset, Reuse, Discussion, Organization, Dataservice]
REPORTABLE_MODELS = [Dataset, Reuse, Discussion, Organization, Dataservice, User]
14 changes: 13 additions & 1 deletion udata/core/reuse/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from udata.core.metrics.models import WithMetrics
from udata.core.owned import Owned, OwnedQuerySet
from udata.core.reuse.api_fields import BIGGEST_IMAGE_SIZE, reuse_permissions_fields
from udata.core.spam.models import SpamMixin
from udata.core.storages import default_image_basename, images
from udata.frontend.markdown import mdstrip
from udata.i18n import lazy_gettext as _
Expand Down Expand Up @@ -88,7 +89,14 @@ class ReuseBadgeMixin(BadgeMixin):
mask="*,datasets{id,title,uri,page}",
)
class Reuse(
Datetimed, Auditable, WithMetrics, ReuseBadgeMixin, Linkable, Owned, Document[ReuseQuerySet]
Datetimed,
Auditable,
SpamMixin,
WithMetrics,
ReuseBadgeMixin,
Linkable,
Owned,
Document[ReuseQuerySet],
):
title = field(
StringField(required=True),
Expand Down Expand Up @@ -215,6 +223,9 @@ def __str__(self):

verbose_name = _("reuse")

def fields_to_check_for_spam(self):
return {"title": self.title, "description": self.description}

@classmethod
def pre_save(cls, sender, document, **kwargs):
# Emit before_save
Expand Down Expand Up @@ -338,3 +349,4 @@ def count_followers(self):

pre_save.connect(Reuse.pre_save, sender=Reuse)
post_save.connect(Reuse.post_save, sender=Reuse)
post_save.connect(SpamMixin.post_save, sender=Reuse)
95 changes: 95 additions & 0 deletions udata/core/spam/tests/test_spam.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
import pytest

from udata.core.dataservices.factories import DataserviceFactory
from udata.core.dataset.factories import DatasetFactory
from udata.core.discussions.models import Discussion, Message
from udata.core.organization.factories import OrganizationFactory
from udata.core.reports.constants import REASON_AUTO_SPAM
from udata.core.reports.models import Report
from udata.core.reuse.factories import ReuseFactory
from udata.core.user.factories import UserFactory
from udata.tests.api import APITestCase

Expand Down Expand Up @@ -119,3 +122,95 @@ def test_dismissed_spam_in_embed_not_reflagged(self):
Report.objects(subject=discussion, reason=REASON_AUTO_SPAM, dismissed_at=None).count(),
0,
)

@pytest.mark.options(SPAM_WORDS=["spam"])
def test_dataset_spam_in_title(self):
dataset = DatasetFactory(title="This is spam content")
self.assertTrue(self.has_spam_report(dataset))

@pytest.mark.options(SPAM_WORDS=["spam"])
def test_dataset_spam_in_description(self):
dataset = DatasetFactory(title="Normal title", description="Buy spam products now")
self.assertTrue(self.has_spam_report(dataset))

@pytest.mark.options(SPAM_WORDS=["spam"])
def test_dataset_no_spam(self):
dataset = DatasetFactory(title="Normal title", description="Normal description")
self.assertFalse(self.has_spam_report(dataset))

@pytest.mark.options(SPAM_WORDS=["spam"])
def test_dataset_spam_not_reflagged_after_dismiss(self):
from datetime import datetime

dataset = DatasetFactory(title="This is spam content")
self.assertTrue(self.has_spam_report(dataset))

report = Report.objects(subject=dataset, reason=REASON_AUTO_SPAM).first()
report.dismissed_at = datetime.utcnow()
report.save()

dataset.reload()
dataset.description = "Updated description"
dataset.save()

self.assertFalse(self.has_spam_report(dataset))

@pytest.mark.options(SPAM_WORDS=["spam"])
def test_reuse_spam_in_title(self):
reuse = ReuseFactory(title="This is spam content")
self.assertTrue(self.has_spam_report(reuse))

@pytest.mark.options(SPAM_WORDS=["spam"])
def test_reuse_spam_in_description(self):
reuse = ReuseFactory(title="Normal title", description="Buy spam products now")
self.assertTrue(self.has_spam_report(reuse))

@pytest.mark.options(SPAM_WORDS=["spam"])
def test_reuse_no_spam(self):
reuse = ReuseFactory(title="Normal title", description="Normal description")
self.assertFalse(self.has_spam_report(reuse))

@pytest.mark.options(SPAM_WORDS=["spam"])
def test_organization_spam_in_name(self):
org = OrganizationFactory(name="Spam Organization")
self.assertTrue(self.has_spam_report(org))

@pytest.mark.options(SPAM_WORDS=["spam"])
def test_organization_spam_in_description(self):
org = OrganizationFactory(name="Normal Org", description="Buy spam products now")
self.assertTrue(self.has_spam_report(org))

@pytest.mark.options(SPAM_WORDS=["spam"])
def test_organization_no_spam(self):
org = OrganizationFactory(name="Normal Org", description="Normal description")
self.assertFalse(self.has_spam_report(org))

@pytest.mark.options(SPAM_WORDS=["spam"])
def test_dataservice_spam_in_title(self):
dataservice = DataserviceFactory(title="This is spam content")
self.assertTrue(self.has_spam_report(dataservice))

@pytest.mark.options(SPAM_WORDS=["spam"])
def test_dataservice_spam_in_description(self):
dataservice = DataserviceFactory(title="Normal title", description="Buy spam products now")
self.assertTrue(self.has_spam_report(dataservice))

@pytest.mark.options(SPAM_WORDS=["spam"])
def test_dataservice_no_spam(self):
dataservice = DataserviceFactory(title="Normal title", description="Normal description")
self.assertFalse(self.has_spam_report(dataservice))

@pytest.mark.options(SPAM_WORDS=["spam"])
def test_user_spam_in_about(self):
user = UserFactory(about="Buy spam products now")
self.assertTrue(self.has_spam_report(user))

@pytest.mark.options(SPAM_WORDS=["spam"])
def test_user_spam_in_website(self):
user = UserFactory(website="https://spam.example.com")
self.assertTrue(self.has_spam_report(user))

@pytest.mark.options(SPAM_WORDS=["spam"])
def test_user_no_spam(self):
user = UserFactory(about="Normal bio", website="https://example.com")
self.assertFalse(self.has_spam_report(user))
7 changes: 6 additions & 1 deletion udata/core/user/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from udata.core.followers.models import Follow
from udata.core.linkable import Linkable
from udata.core.metrics.models import WithMetrics
from udata.core.spam.models import SpamMixin
from udata.core.storages import avatars, default_image_basename
from udata.frontend.markdown import mdstrip
from udata.i18n import lazy_gettext as _
Expand Down Expand Up @@ -61,7 +62,7 @@ class UserSettings(EmbeddedDocument):


@generate_fields()
class User(WithMetrics, UserMixin, Linkable, Document):
class User(SpamMixin, WithMetrics, UserMixin, Linkable, Document):
slug = field(
SlugField(max_length=255, required=True, populate_from="fullname"),
auditable=False,
Expand Down Expand Up @@ -147,6 +148,9 @@ class User(WithMetrics, UserMixin, Linkable, Document):

verbose_name = _("account")

def fields_to_check_for_spam(self):
return {"about": self.about, "website": self.website}
Comment on lines +151 to +152
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe add first and last name as well?

I think we could want to have custom detection for users.
For example, having a website in self.about field could raise a report.


__metrics_keys__ = [
"datasets",
"reuses",
Expand Down Expand Up @@ -372,6 +376,7 @@ def count_following(self):

pre_save.connect(User.pre_save, sender=User)
post_save.connect(User.post_save, sender=User)
post_save.connect(SpamMixin.post_save, sender=User)


def match_email_invitations(sender, **kwargs):
Expand Down