aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAqua-sama <aqua@iserlohn-fortress.net>2018-07-09 20:19:30 +0200
committerAqua-sama <aqua@iserlohn-fortress.net>2018-07-09 20:19:30 +0200
commit9808d18fa6cd19400f08897014a9948f168927df (patch)
treeb4224f737ec01391e7d443ad142c94ec4c0bb17f
parentRemove PageMenu (diff)
downloadsmolbote-9808d18fa6cd19400f08897014a9948f168927df.tar.xz
AdBlock rule testing
-rw-r--r--lib/web/CMakeLists.txt3
-rw-r--r--lib/web/urlfilter/adblockrule.cpp120
-rw-r--r--lib/web/urlfilter/adblockrule.h43
-rw-r--r--test/CMakeLists.txt9
-rw-r--r--test/urlfilter/adblocktest.cpp40
-rw-r--r--test/urlfilter/adblocktest.h15
6 files changed, 230 insertions, 0 deletions
diff --git a/lib/web/CMakeLists.txt b/lib/web/CMakeLists.txt
index 601c1d5..9515566 100644
--- a/lib/web/CMakeLists.txt
+++ b/lib/web/CMakeLists.txt
@@ -12,6 +12,9 @@ add_library(web
urlfilter/filterdomain.h
urlfilter/filterrule.cpp
urlfilter/filterrule.h
+
+ urlfilter/adblockrule.cpp
+ urlfilter/adblockrule.h
)
target_link_libraries(web Qt5::WebEngineWidgets)
diff --git a/lib/web/urlfilter/adblockrule.cpp b/lib/web/urlfilter/adblockrule.cpp
new file mode 100644
index 0000000..d707783
--- /dev/null
+++ b/lib/web/urlfilter/adblockrule.cpp
@@ -0,0 +1,120 @@
+#include "adblockrule.h"
+
+bool isMatchingDomain(const QString &domain, const QString &filter)
+{
+ // domain and filter are the same
+ if(domain == filter) {
+ return true;
+ }
+
+ // domain can't be matched by filter if it doesn't end with filter
+ // ex. example2.com isn't matched by example.com
+ if(!domain.endsWith(filter)) {
+ return false;
+ }
+
+ // match with subdomains
+ // ex. subdomain.example.com is matched by example.com
+ int index = domain.indexOf(filter);
+
+ // match if (domain ends with filter) && (filter has been found) and (character before filter is '.')
+ return index > 0 && domain[index - 1] == QLatin1Char('.');
+}
+
+// adblock format documentation
+// https://adblockplus.org/filters
+
+// QString::mid(pos, len) - Returns a string starting at the specified position index.
+// QString::chop(len) - Removes n characters from the end of the string.
+// QString::remove(pos, len) - Removes n characters from the string, starting at the given position index.
+
+AdBlockRule::AdBlockRule(const QString &filter)
+{
+ QString parsedLine = filter.trimmed();
+
+ // there is no rule, or it"s a comment
+ if(parsedLine.isEmpty() || parsedLine.startsWith("!")) {
+ return;
+ }
+
+ // css rule - ignore for now
+ if(parsedLine.contains(QLatin1Literal("##")) || parsedLine.contains(QLatin1Literal("#@#"))) {
+ return;
+ }
+
+ m_isEnabled = true;
+
+ // exception rules
+ if(parsedLine.startsWith(QLatin1Literal("@@"))) {
+ m_isException = true;
+ parsedLine.remove(0, 2);
+ }
+
+ // regular expression rules
+ if(parsedLine.startsWith(QLatin1Literal("/")) && parsedLine.endsWith(QLatin1Literal("/"))) {
+ parsedLine = parsedLine.mid(1, parsedLine.length() - 2);
+
+ matchType = RegularExpressionMatch;
+ regexp.setPattern(parsedLine);
+ return;
+ }
+
+ // basic filter rules
+ if(parsedLine.startsWith(QLatin1Literal("|")) && parsedLine.endsWith(QLatin1Literal("|"))) {
+ matchType = StringEquals;
+ match = parsedLine.mid(1, parsedLine.length() - 2);
+ return;
+ }
+
+ // Basic filter rules can use wildcards, which were supported by QRegExp,
+ // but were deprecated in QRegularExpression.
+
+ // remove beginning and ending wildcards
+ if(parsedLine.startsWith(QLatin1Literal("*")))
+ parsedLine = parsedLine.mid(1);
+
+ if(parsedLine.endsWith(QLatin1Literal("*")))
+ parsedLine.chop(1);
+
+ if(parsedLine.startsWith(QLatin1Literal("||")) && parsedLine.endsWith(QLatin1Literal("^"))) {
+ matchType = DomainMatch;
+ match = parsedLine.mid(2, parsedLine.length() - 3);
+ return;
+ }
+
+ // check for wildcards and translate to regexp
+ // wildcard "*" - any number of characters
+ // separator "^" - end, ? or /
+ if(parsedLine.contains(QLatin1Literal("*")) || parsedLine.contains(QLatin1Literal("^"))) {
+ matchType = RegularExpressionMatch;
+ parsedLine.replace(QLatin1Literal("*"), QLatin1Literal(".*"));
+ parsedLine.replace(QLatin1Literal("^"), QLatin1Literal("($|\\?|\\/)"));
+ regexp.setPattern(parsedLine);
+ return;
+ }
+
+ matcher.setPattern(parsedLine);
+}
+
+bool AdBlockRule::isEnabled() const
+{
+ return m_isEnabled;
+}
+
+bool AdBlockRule::shouldBlock(const QUrl &url) const
+{
+ switch (matchType) {
+ case RegularExpressionMatch:
+ if(regexp.match(url.toString()).hasMatch())
+ return !m_isException;
+
+ case StringEquals:
+ return url.toString() == match;
+
+ case DomainMatch:
+ return isMatchingDomain(url.host(), match);
+
+ default:
+ return false;
+ }
+}
diff --git a/lib/web/urlfilter/adblockrule.h b/lib/web/urlfilter/adblockrule.h
new file mode 100644
index 0000000..e1cabae
--- /dev/null
+++ b/lib/web/urlfilter/adblockrule.h
@@ -0,0 +1,43 @@
+#ifndef ADBLOCKRULE_H
+#define ADBLOCKRULE_H
+
+#include <QObject>
+#include <QString>
+#include <QRegularExpression>
+#include <QUrl>
+#include <QWebEngineUrlRequestInfo>
+
+class AdBlockRule
+{
+public:
+
+ enum MatchType {
+ InvalidMatch,
+ RegularExpressionMatch,
+ StringContains,
+ StringStartsWith,
+ StringEndsWith,
+ StringEquals,
+ DomainMatch
+ };
+
+ AdBlockRule(const QString &filter);
+
+ bool isEnabled() const;
+ bool shouldBlock(const QUrl &url) const;
+
+private:
+ Q_DISABLE_COPY(AdBlockRule)
+
+ bool m_isEnabled = false;
+ bool m_isException = false;
+
+ MatchType matchType = InvalidMatch;
+ QHash<QWebEngineUrlRequestInfo::ResourceType, bool> m_resourceTypeOptions;
+
+ QString match;
+ QRegularExpression regexp;
+ QStringMatcher matcher;
+};
+
+#endif // ADBLOCKRULE_H
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index a44da07..414d616 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -12,3 +12,12 @@ target_include_directories(UrlFilterTest
)
target_link_libraries(UrlFilterTest Qt5::Test web)
+
+add_executable(AdBlockTest
+ urlfilter/adblocktest.cpp
+ urlfilter/adblocktest.h
+)
+
+target_include_directories(AdBlockTest PRIVATE ../lib/web)
+
+target_link_libraries(AdBlockTest Qt5::Test web)
diff --git a/test/urlfilter/adblocktest.cpp b/test/urlfilter/adblocktest.cpp
new file mode 100644
index 0000000..d1060f1
--- /dev/null
+++ b/test/urlfilter/adblocktest.cpp
@@ -0,0 +1,40 @@
+#include "adblocktest.h"
+#include <QtTest/QtTest>
+#include "urlfilter/adblockrule.h"
+
+void AdBlockTest::blockByAddressPart()
+{
+ AdBlockRule rule("/banner/*/img^");
+
+ QCOMPARE(rule.shouldBlock(QUrl("http://example.com/banner/foo/img")), true);
+ QCOMPARE(rule.shouldBlock(QUrl("http://example.com/banner/foo/bar/img?param")), true);
+ QCOMPARE(rule.shouldBlock(QUrl("http://example.com/banner//img/foo")), true);
+
+ QCOMPARE(rule.shouldBlock(QUrl("http://example.com/banner/img")), false);
+ QCOMPARE(rule.shouldBlock(QUrl("http://example.com/banner/foo/imgraph")), false);
+ QCOMPARE(rule.shouldBlock(QUrl("http://example.com/banner/foo/img.gif")), false);
+}
+
+void AdBlockTest::blockByDomain()
+{
+ AdBlockRule rule("||ads.example.com^");
+
+ QCOMPARE(rule.shouldBlock(QUrl("http://ads.example.com/foo.gif")), true);
+ QCOMPARE(rule.shouldBlock(QUrl("http://server1.ads.example.com/foo.gif")), true);
+ QCOMPARE(rule.shouldBlock(QUrl("https://ads.example.com:8000/")), true);
+
+ QCOMPARE(rule.shouldBlock(QUrl("http://ads.example.com.ua/foo.gif")), false);
+ QCOMPARE(rule.shouldBlock(QUrl("http://example.com/redirect/http://ads.example.com/")), false);
+}
+
+void AdBlockTest::blockExactAddress()
+{
+ AdBlockRule rule("|http://example.com/|");
+
+ QCOMPARE(rule.shouldBlock(QUrl("http://example.com/")), true);
+
+ QCOMPARE(rule.shouldBlock(QUrl("http://example.com/foo.gif")), false);
+ QCOMPARE(rule.shouldBlock(QUrl("http://example.info/redirect/http://example.com/")), false);
+}
+
+QTEST_GUILESS_MAIN(AdBlockTest)
diff --git a/test/urlfilter/adblocktest.h b/test/urlfilter/adblocktest.h
new file mode 100644
index 0000000..a7b9f12
--- /dev/null
+++ b/test/urlfilter/adblocktest.h
@@ -0,0 +1,15 @@
+#ifndef ADBLOCKTEST_H
+#define ADBLOCKTEST_H
+
+#include <QObject>
+class AdBlockTest : public QObject
+{
+ Q_OBJECT
+
+private slots:
+ void blockByAddressPart();
+ void blockByDomain();
+ void blockExactAddress();
+};
+
+#endif // ADBLOCKTEST_H