Browse Source

Adde new rules

master
Lars Chr. Duus Hausmann 4 years ago
parent
commit
c0567be20b
  1. 4
      lists/high-spammy-domains.txt
  2. 28
      lists/high-spammy-tlds.txt
  3. 69
      lists/spam-links.txt
  4. 9
      lists/spammy-tlds.txt

4
lists/high-spammy-domains.txt

@ -0,0 +1,4 @@
# List of domains with a high probability of sending spam.
/mungaikihanya\.com$/i
/onlyone\-invest\.com$/i

28
lists/high-spammy-tlds.txt

@ -0,0 +1,28 @@
# List of TLDs (or SLDs) with a high probability of hosting spam domains.
# rspamd doesn't provide TLD extraction for mail addresses. The eSLD (effective second level domain) is matched against this list.
/\.bid$/i
/\.club$/i
/\.date$/i
/\.download$/i
/\.faith$/i
/\.fun$/i
/\.host$/i
/\.icu$/i
/\.kim$/i
/\.loan$/i
/\.online$/i
/\.pet$/i
/\.pro$/i
/\.red$/i
/\.review$/i
/\.site$/i
/\.space$/i
/\.stream$/i
/\.top$/i
/\.trade$/i
/\.vip$/i
/\.website$/i
/\.win$/i
/\.world$/i
/\.xyz$/i

69
lists/spam-links.txt

@ -0,0 +1,69 @@
# SPAM_LINK_1
# Something like .... /plugin.php?t=147&SeBJYnc8AzD8YLd4kvf4uNR=Fqz&12i=Cwb&4f=cL4g
# the common parts are:
# - the first parameter name is one char long
# - at least two more parameter follow
/\/[a-z]+\.php\?\w=[a-zA-Z0-9]+(&[\w\d]+=[a-zA-Z0-9]+){2,}/i SPAM_LINK_1:0.4
# SPAM_LINK_3
# Something like ..../l/lt2K2240EH14R/1014LP2140G4657WU60A33287012SM1334722588
# Common parts:
# - first part is always one character
# - three parts in total
/\/\w\/\w{10,}\/\w{10,}/i SPAM_LINK_3:0.4
# SPAM_LINK_4
# /pass.php?utm_source=6900l3njtv&utm_medium=nc6600mc98&utm_campaign=a1q4sxq0wo&utm_term=tvec4xo652&utm_content=403g22e07g
# Common parts
# - always a .php file in the root of the domain
# - only GA tracking parameters
# - values for utm_source, utm_medium and utm_campaign are always the same (at least between 2017-07 and 2017-10),
# utm_term varies slightly and utm_content is random
# - all tracking parameters have 10 chars
/\/[a-z]+\.php\?utm_source=[a-zA-Z0-9]{10}&utm_medium=[a-zA-Z0-9]{10}&utm_campaign=[a-zA-Z0-9]{10}&utm_term=[a-zA-Z0-9]{10}&utm_content=[a-zA-Z0-9]{10}/i SPAM_LINK_4:0.4
# SPAM_LINK_5
# sth. /mw/index.php/campaigns/pc118pw7p78bf/track-url/eo948g9ba3535/955e46674ff54a5792d9fa1782e483d77e4fdfc8
/\/campaigns\/[a-zA-Z0-9]{13}\/track-url\/[a-zA-Z0-9]{13}\/[a-zA-Z0-9]{40}/i SPAM_LINK_5:0.4
# SPAM_LINK_6
/\/[a-zA-Z0-9]{13,18}\/[a-zA-Z0-9-_]{43}\/[a-zA-Z0-9-_]{107,128}/i SPAM_LINK_6:0.4
# SPAM_LINK_7
# looks almost the same as SPAM_LINK_6
# characteristics:
# - TLD: .date or .trade
# - Domain always with leading www.
# - path:
# First part between 7 and 10 chars
# Second part between 16 and 22 chars
# Third part always(?) 43 chars
# Fourth part > 80 chars but varying in length
/www\.[a-zA-Z0-9]+\.(date|trade)\/[a-zA-Z0-9-_]{6,13}\/[a-zA-Z0-9-_]{13,24}\/[a-zA-Z0-9-_]{40,65}\/[a-zA-Z0-9-_]{80,999}/i SPAM_LINK_7:0.4
# SPAM_LINK_8
# Primarily used in bitcoin/crypto currency spam
# characteristics:
# - emails contain at least one link to .../?xtl=<random{120-300}>&eih=<random{,40}>
# - emails contain at least one link to .../?xul=<random>
/\?xtl=[a-zA-Z0-9]{100,300}&eih=[a-zA-Z0-9]{20,40}/i SPAM_LINK_8a:0.1
/\?xul=[a-zA-Z0-9]{70,120}&eih=[a-zA-Z0-9]{20,40}/i SPAM_LINK_8b:0.1
# SPAM_LINK_9
# Primarily used in bitcoin/crypto currency spam
# - Subdomain of .page.link
# - Very short path (typically less than 5 chars)
/\.page\.link\/[a-zA-Z0-9]{1,8}/i SPAM_LINK_9:0.4
# Special rule for this persistent spammer
/onlyone\-invest\.com/.*/i ONLYONE_INVEST:0.4
# MY_SHOPIFY
# - Subdomain of myshopify.com
# - Path is /apps/<random>/indexjs.html or apps/<random>/<random>>/index_js.html
# - The first url parameter *might* always start with "sum"
/\.myshopify\.com\/apps\/[a-zA-Z0-9]+\/index_?js.html/i MY_SHOPIFY_a:0.2
/\.myshopify\.com\/apps\/[a-zA-Z0-9]+\/[a-zA-Z0-9]+\/index_?js.html/i MY_SHOPIFY_b:0.2

9
lists/spammy-tlds.txt

@ -0,0 +1,9 @@
# List of TLDs (or SLDs) with a high probability of hosting spam domains *AND A HIGH RISK OF FALSE POSITIVES*.
# rspamd doesn't provide TLD extraction for mail addresses. The eSLD (effective second level domain) is matched against this list.
/\.br$/i
/\.cn$/i
/\.co$/i
/\.com\.cn$/i
/\.jp$/i
/\.us$/i
Loading…
Cancel
Save