aboutsummaryrefslogtreecommitdiff
path: root/lib/adblock
diff options
context:
space:
mode:
authorAqua-sama <aqua@iserlohn-fortress.net>2017-12-18 14:32:45 +0100
committerAqua-sama <aqua@iserlohn-fortress.net>2017-12-18 14:32:45 +0100
commite2b1cc628b304e3f153abc17fb350aa781e26b36 (patch)
tree269fc864b57b9356dd19c78b2965cb595371e0b7 /lib/adblock
parentProfileView dialog (diff)
downloadsmolbote-e2b1cc628b304e3f153abc17fb350aa781e26b36.tar.xz
Basic adblock FilterRule
Diffstat (limited to 'lib/adblock')
-rw-r--r--lib/adblock/adblock.qbs39
-rw-r--r--lib/adblock/filterrule.cpp102
-rw-r--r--lib/adblock/filterrule.h62
-rw-r--r--lib/adblock/test/filtertest.cpp63
-rw-r--r--lib/adblock/test/filtertest.h28
-rw-r--r--lib/adblock/test/main.cpp4
6 files changed, 298 insertions, 0 deletions
diff --git a/lib/adblock/adblock.qbs b/lib/adblock/adblock.qbs
new file mode 100644
index 0000000..7a59462
--- /dev/null
+++ b/lib/adblock/adblock.qbs
@@ -0,0 +1,39 @@
+import qbs 1.0
+
+Project {
+ name: "Filter"
+
+ StaticLibrary {
+ name: "adblock"
+
+ Depends { name: "cpp" }
+ Depends {
+ name: "Qt"
+ versionAtLeast: "5.9.0"
+ submodules: ["core"]
+ }
+
+ files: [
+ "filterrule.h",
+ "filterrule.cpp",
+ ]
+ }
+
+ CppApplication {
+ name: "adblock-test"
+
+ Depends {
+ name: "Qt"
+ versionAtLeast: "5.9.0"
+ submodules: ["core", "test"]
+ }
+
+ Depends { name: "adblock" }
+
+ files: [
+ "test/main.cpp",
+ "test/filtertest.cpp",
+ "test/filtertest.h",
+ ]
+ }
+}
diff --git a/lib/adblock/filterrule.cpp b/lib/adblock/filterrule.cpp
new file mode 100644
index 0000000..147e5fd
--- /dev/null
+++ b/lib/adblock/filterrule.cpp
@@ -0,0 +1,102 @@
+/*******************************************************************************
+ **
+ ** smolbote: yet another qute browser
+ ** Copyright (C) 2017 Xian Nox
+ **
+ ** This program is free software: you can redistribute it and/or modify
+ ** it under the terms of the GNU General Public License as published by
+ ** the Free Software Foundation, either version 3 of the License, or
+ ** (at your option) any later version.
+ **
+ ** This program is distributed in the hope that it will be useful,
+ ** but WITHOUT ANY WARRANTY; without even the implied warranty of
+ ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ ** GNU General Public License for more details.
+ **
+ ** You should have received a copy of the GNU General Public License
+ ** along with this program. If not, see <http://www.gnu.org/licenses/>.
+ **
+ ******************************************************************************/
+
+#include "filterrule.h"
+#include <QUrl>
+
+/* TODO
+ * - split this class into two: FilterRule that only deals with matching, and AdBlockRule, which only deals with parsing
+ */
+
+/* The AdBlock standard is an incredible mess, vaguely explaiend on:
+ * - https://adblockplus.org/filters
+ * - https://adblockplus.org/filter-cheatsheet
+ */
+
+FilterRule::FilterRule(const QString &line)
+{
+ valid = parse(line);
+}
+
+FilterRule::~FilterRule()
+{
+}
+
+bool FilterRule::isValid() const
+{
+ return valid;
+}
+
+bool FilterRule::isException() const
+{
+ return exception;
+}
+
+bool FilterRule::shouldBlock(const QUrl &requestUrl) const
+{
+ QRegularExpressionMatch match = rule.match(requestUrl.toString());
+ return match.hasMatch();
+}
+
+bool FilterRule::parse(const QString &line)
+{
+ // skip for comments and empty rules
+ if(line.startsWith('!') || line.trimmed().isEmpty()) {
+ return false;
+ }
+
+ // make a copy of the pattern so we can snap off the parts we've already parsed
+ QString pattern = line;
+
+ if(pattern.startsWith("@@")) {
+ exception = true;
+ pattern = pattern.mid(2); // remove @@
+ }
+
+ rule.setPattern(createRegExpPattern(pattern));
+ return true;
+}
+
+QString createRegExpPattern(const QString &line)
+{
+ QString pattern = line;
+
+ // replace . (any character) with \. (a dot)
+ pattern.replace('.', "\\.");
+
+ // translate adblock special characters into regex
+ // replace wildcard (*) with '.*' (zero or more of any element)
+ pattern.replace('*', ".*");
+
+ // replace separator (^) with '($|\?|\/)' (end of string, or ?, or /, or :number)
+ pattern.replace('^', "($|\\?|\\/|:\\d+)");
+
+ // replace || with ^\w+://([\w,\d,\.]+)?
+ pattern.replace("||", "^\\w+://([\\w,\\d,\\.]+)?");
+
+ if(pattern.startsWith('|') && pattern.endsWith('|')) {
+ // replace | at start with ^ (start of string)
+ pattern.replace(0, 1, '^');
+ // replace | at end with $ (end of string)
+ pattern.replace('|', '$');
+ }
+
+ return pattern;
+}
diff --git a/lib/adblock/filterrule.h b/lib/adblock/filterrule.h
new file mode 100644
index 0000000..66731e2
--- /dev/null
+++ b/lib/adblock/filterrule.h
@@ -0,0 +1,62 @@
+/*******************************************************************************
+ **
+ ** smolbote: yet another qute browser
+ ** Copyright (C) 2017 Xian Nox
+ **
+ ** This program is free software: you can redistribute it and/or modify
+ ** it under the terms of the GNU General Public License as published by
+ ** the Free Software Foundation, either version 3 of the License, or
+ ** (at your option) any later version.
+ **
+ ** This program is distributed in the hope that it will be useful,
+ ** but WITHOUT ANY WARRANTY; without even the implied warranty of
+ ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ ** GNU General Public License for more details.
+ **
+ ** You should have received a copy of the GNU General Public License
+ ** along with this program. If not, see <http://www.gnu.org/licenses/>.
+ **
+ ******************************************************************************/
+
+#ifndef FILTERRULE_H
+#define FILTERRULE_H
+
+#include <QRegularExpression>
+
+class QUrl;
+class FilterRule
+{
+public:
+
+ FilterRule(const QString &line);
+
+ // delete the copy constructor and assignment operator
+ FilterRule(const FilterRule&) = delete;
+ FilterRule& operator=(const FilterRule&) = delete;
+
+ // move constructor
+ FilterRule(FilterRule&& other) {
+ valid = other.valid;
+ exception = other.exception;
+ rule = other.rule;
+ }
+
+ ~FilterRule();
+
+ bool isValid() const;
+ bool isException() const;
+ bool shouldBlock(const QUrl &requestUrl) const;
+
+private:
+ bool parse(const QString &line);
+
+ bool valid;
+ bool exception;
+
+ QRegularExpression rule;
+
+};
+
+QString createRegExpPattern(const QString &line);
+
+#endif // FILTERRULE_H
diff --git a/lib/adblock/test/filtertest.cpp b/lib/adblock/test/filtertest.cpp
new file mode 100644
index 0000000..c778bc7
--- /dev/null
+++ b/lib/adblock/test/filtertest.cpp
@@ -0,0 +1,63 @@
+#include "filtertest.h"
+#include <QtTest/QtTest>
+#include "../filterrule.h"
+#include <QUrl>
+
+FilterTest::FilterTest(QObject *parent) : QObject(parent)
+{
+}
+
+void FilterTest::initTestCase()
+{
+ addressRule = new FilterRule("/banner/*/img^");
+ QVERIFY(addressRule->isValid() == true);
+ QVERIFY(addressRule->isException() == false);
+
+ domainRule = new FilterRule("||ads.example.com^");
+ QVERIFY(domainRule->isValid() == true);
+ QVERIFY(domainRule->isException() == false);
+
+ exactAddressRule = new FilterRule("|http://example.com/|");
+ QVERIFY(exactAddressRule->isValid() == true);
+ QVERIFY(exactAddressRule->isException() == false);
+}
+
+void FilterTest::testAddressBlock()
+{
+ // This rule blocks:
+ QVERIFY(addressRule->shouldBlock(QUrl("http://example.com/banner/foo/img")) == true);
+ QVERIFY(addressRule->shouldBlock(QUrl("http://example.com/banner/foo/bar/img?param")) == true);
+ QVERIFY(addressRule->shouldBlock(QUrl("http://example.com/banner//img/foo")) == true);
+
+ // This rule doesn't block:
+ QVERIFY(addressRule->shouldBlock(QUrl("http://example.com/banner/img")) == false);
+ QVERIFY(addressRule->shouldBlock(QUrl("http://example.com/banner/foo/imgraph")) == false);
+ QVERIFY(addressRule->shouldBlock(QUrl("http://example.com/banner/foo/img.gif")) == false);
+}
+
+void FilterTest::testDomainBlock()
+{
+ // This rule blocks:
+ QVERIFY(domainRule->shouldBlock(QUrl("http://ads.example.com/foo.gif")) == true);
+ QVERIFY(domainRule->shouldBlock(QUrl("http://server1.ads.example.com/foo.gif")) == true);
+ QVERIFY(domainRule->shouldBlock(QUrl("https://ads.example.com:8000/")) == true);
+
+ // This rule doesn't block:
+ QVERIFY(domainRule->shouldBlock(QUrl("http://ads.example.com.ua/foo.gif")) == false);
+ QVERIFY(domainRule->shouldBlock(QUrl("http://example.com/redirect/http://ads.example.com/")) == false);
+}
+
+void FilterTest::testExactAddressBlock()
+{
+ // This rule blocks:
+ QVERIFY(exactAddressRule->shouldBlock(QUrl("http://example.com/")) == true);
+
+ // This rule doesn't block:
+ QVERIFY(exactAddressRule->shouldBlock(QUrl("http://example.com/foo.gif")) == false);
+ QVERIFY(exactAddressRule->shouldBlock(QUrl("http://example.info/redirect/http://example.com/")) == false);
+}
+
+void FilterTest::cleanupTestCase()
+{
+ delete addressRule;
+}
diff --git a/lib/adblock/test/filtertest.h b/lib/adblock/test/filtertest.h
new file mode 100644
index 0000000..45cdde1
--- /dev/null
+++ b/lib/adblock/test/filtertest.h
@@ -0,0 +1,28 @@
+#ifndef FILTERTEST_H
+#define FILTERTEST_H
+
+#include <QObject>
+
+class FilterRule;
+class FilterTest : public QObject
+{
+ Q_OBJECT
+public:
+ explicit FilterTest(QObject *parent = nullptr);
+
+signals:
+
+private slots:
+ void initTestCase();
+ void testAddressBlock();
+ void testDomainBlock();
+ void testExactAddressBlock();
+ void cleanupTestCase();
+
+private:
+ FilterRule *addressRule;
+ FilterRule *domainRule;
+ FilterRule *exactAddressRule;
+};
+
+#endif // FILTERTEST_H
diff --git a/lib/adblock/test/main.cpp b/lib/adblock/test/main.cpp
new file mode 100644
index 0000000..3284e47
--- /dev/null
+++ b/lib/adblock/test/main.cpp
@@ -0,0 +1,4 @@
+#include <QtTest/QtTest>
+#include "filtertest.h"
+
+QTEST_APPLESS_MAIN(FilterTest)