From 9808d18fa6cd19400f08897014a9948f168927df Mon Sep 17 00:00:00 2001 From: Aqua-sama Date: Mon, 9 Jul 2018 20:19:30 +0200 Subject: AdBlock rule testing --- lib/web/urlfilter/adblockrule.cpp | 120 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 120 insertions(+) create mode 100644 lib/web/urlfilter/adblockrule.cpp (limited to 'lib/web/urlfilter/adblockrule.cpp') diff --git a/lib/web/urlfilter/adblockrule.cpp b/lib/web/urlfilter/adblockrule.cpp new file mode 100644 index 0000000..d707783 --- /dev/null +++ b/lib/web/urlfilter/adblockrule.cpp @@ -0,0 +1,120 @@ +#include "adblockrule.h" + +bool isMatchingDomain(const QString &domain, const QString &filter) +{ + // domain and filter are the same + if(domain == filter) { + return true; + } + + // domain can't be matched by filter if it doesn't end with filter + // ex. example2.com isn't matched by example.com + if(!domain.endsWith(filter)) { + return false; + } + + // match with subdomains + // ex. subdomain.example.com is matched by example.com + int index = domain.indexOf(filter); + + // match if (domain ends with filter) && (filter has been found) and (character before filter is '.') + return index > 0 && domain[index - 1] == QLatin1Char('.'); +} + +// adblock format documentation +// https://adblockplus.org/filters + +// QString::mid(pos, len) - Returns a string starting at the specified position index. +// QString::chop(len) - Removes n characters from the end of the string. +// QString::remove(pos, len) - Removes n characters from the string, starting at the given position index. + +AdBlockRule::AdBlockRule(const QString &filter) +{ + QString parsedLine = filter.trimmed(); + + // there is no rule, or it"s a comment + if(parsedLine.isEmpty() || parsedLine.startsWith("!")) { + return; + } + + // css rule - ignore for now + if(parsedLine.contains(QLatin1Literal("##")) || parsedLine.contains(QLatin1Literal("#@#"))) { + return; + } + + m_isEnabled = true; + + // exception rules + if(parsedLine.startsWith(QLatin1Literal("@@"))) { + m_isException = true; + parsedLine.remove(0, 2); + } + + // regular expression rules + if(parsedLine.startsWith(QLatin1Literal("/")) && parsedLine.endsWith(QLatin1Literal("/"))) { + parsedLine = parsedLine.mid(1, parsedLine.length() - 2); + + matchType = RegularExpressionMatch; + regexp.setPattern(parsedLine); + return; + } + + // basic filter rules + if(parsedLine.startsWith(QLatin1Literal("|")) && parsedLine.endsWith(QLatin1Literal("|"))) { + matchType = StringEquals; + match = parsedLine.mid(1, parsedLine.length() - 2); + return; + } + + // Basic filter rules can use wildcards, which were supported by QRegExp, + // but were deprecated in QRegularExpression. + + // remove beginning and ending wildcards + if(parsedLine.startsWith(QLatin1Literal("*"))) + parsedLine = parsedLine.mid(1); + + if(parsedLine.endsWith(QLatin1Literal("*"))) + parsedLine.chop(1); + + if(parsedLine.startsWith(QLatin1Literal("||")) && parsedLine.endsWith(QLatin1Literal("^"))) { + matchType = DomainMatch; + match = parsedLine.mid(2, parsedLine.length() - 3); + return; + } + + // check for wildcards and translate to regexp + // wildcard "*" - any number of characters + // separator "^" - end, ? or / + if(parsedLine.contains(QLatin1Literal("*")) || parsedLine.contains(QLatin1Literal("^"))) { + matchType = RegularExpressionMatch; + parsedLine.replace(QLatin1Literal("*"), QLatin1Literal(".*")); + parsedLine.replace(QLatin1Literal("^"), QLatin1Literal("($|\\?|\\/)")); + regexp.setPattern(parsedLine); + return; + } + + matcher.setPattern(parsedLine); +} + +bool AdBlockRule::isEnabled() const +{ + return m_isEnabled; +} + +bool AdBlockRule::shouldBlock(const QUrl &url) const +{ + switch (matchType) { + case RegularExpressionMatch: + if(regexp.match(url.toString()).hasMatch()) + return !m_isException; + + case StringEquals: + return url.toString() == match; + + case DomainMatch: + return isMatchingDomain(url.host(), match); + + default: + return false; + } +} -- cgit v1.2.1