diff options
author | Aqua-sama <aqua@iserlohn-fortress.net> | 2017-12-18 14:32:45 +0100 |
---|---|---|
committer | Aqua-sama <aqua@iserlohn-fortress.net> | 2017-12-18 14:32:45 +0100 |
commit | e2b1cc628b304e3f153abc17fb350aa781e26b36 (patch) | |
tree | 269fc864b57b9356dd19c78b2965cb595371e0b7 /lib/adblock | |
parent | ProfileView dialog (diff) | |
download | smolbote-e2b1cc628b304e3f153abc17fb350aa781e26b36.tar.xz |
Basic adblock FilterRule
Diffstat (limited to 'lib/adblock')
-rw-r--r-- | lib/adblock/adblock.qbs | 39 | ||||
-rw-r--r-- | lib/adblock/filterrule.cpp | 102 | ||||
-rw-r--r-- | lib/adblock/filterrule.h | 62 | ||||
-rw-r--r-- | lib/adblock/test/filtertest.cpp | 63 | ||||
-rw-r--r-- | lib/adblock/test/filtertest.h | 28 | ||||
-rw-r--r-- | lib/adblock/test/main.cpp | 4 |
6 files changed, 298 insertions, 0 deletions
diff --git a/lib/adblock/adblock.qbs b/lib/adblock/adblock.qbs new file mode 100644 index 0000000..7a59462 --- /dev/null +++ b/lib/adblock/adblock.qbs @@ -0,0 +1,39 @@ +import qbs 1.0 + +Project { + name: "Filter" + + StaticLibrary { + name: "adblock" + + Depends { name: "cpp" } + Depends { + name: "Qt" + versionAtLeast: "5.9.0" + submodules: ["core"] + } + + files: [ + "filterrule.h", + "filterrule.cpp", + ] + } + + CppApplication { + name: "adblock-test" + + Depends { + name: "Qt" + versionAtLeast: "5.9.0" + submodules: ["core", "test"] + } + + Depends { name: "adblock" } + + files: [ + "test/main.cpp", + "test/filtertest.cpp", + "test/filtertest.h", + ] + } +} diff --git a/lib/adblock/filterrule.cpp b/lib/adblock/filterrule.cpp new file mode 100644 index 0000000..147e5fd --- /dev/null +++ b/lib/adblock/filterrule.cpp @@ -0,0 +1,102 @@ +/******************************************************************************* + ** + ** smolbote: yet another qute browser + ** Copyright (C) 2017 Xian Nox + ** + ** This program is free software: you can redistribute it and/or modify + ** it under the terms of the GNU General Public License as published by + ** the Free Software Foundation, either version 3 of the License, or + ** (at your option) any later version. + ** + ** This program is distributed in the hope that it will be useful, + ** but WITHOUT ANY WARRANTY; without even the implied warranty of + ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + ** GNU General Public License for more details. + ** + ** You should have received a copy of the GNU General Public License + ** along with this program. If not, see <http://www.gnu.org/licenses/>. + ** + ******************************************************************************/ + +#include "filterrule.h" +#include <QUrl> + +/* TODO + * - split this class into two: FilterRule that only deals with matching, and AdBlockRule, which only deals with parsing + */ + +/* The AdBlock standard is an incredible mess, vaguely explaiend on: + * - https://adblockplus.org/filters + * - https://adblockplus.org/filter-cheatsheet + */ + +FilterRule::FilterRule(const QString &line) +{ + valid = parse(line); +} + +FilterRule::~FilterRule() +{ +} + +bool FilterRule::isValid() const +{ + return valid; +} + +bool FilterRule::isException() const +{ + return exception; +} + +bool FilterRule::shouldBlock(const QUrl &requestUrl) const +{ + QRegularExpressionMatch match = rule.match(requestUrl.toString()); + return match.hasMatch(); +} + +bool FilterRule::parse(const QString &line) +{ + // skip for comments and empty rules + if(line.startsWith('!') || line.trimmed().isEmpty()) { + return false; + } + + // make a copy of the pattern so we can snap off the parts we've already parsed + QString pattern = line; + + if(pattern.startsWith("@@")) { + exception = true; + pattern = pattern.mid(2); // remove @@ + } + + rule.setPattern(createRegExpPattern(pattern)); + return true; +} + +QString createRegExpPattern(const QString &line) +{ + QString pattern = line; + + // replace . (any character) with \. (a dot) + pattern.replace('.', "\\."); + + // translate adblock special characters into regex + // replace wildcard (*) with '.*' (zero or more of any element) + pattern.replace('*', ".*"); + + // replace separator (^) with '($|\?|\/)' (end of string, or ?, or /, or :number) + pattern.replace('^', "($|\\?|\\/|:\\d+)"); + + // replace || with ^\w+://([\w,\d,\.]+)? + pattern.replace("||", "^\\w+://([\\w,\\d,\\.]+)?"); + + if(pattern.startsWith('|') && pattern.endsWith('|')) { + // replace | at start with ^ (start of string) + pattern.replace(0, 1, '^'); + // replace | at end with $ (end of string) + pattern.replace('|', '$'); + } + + return pattern; +} diff --git a/lib/adblock/filterrule.h b/lib/adblock/filterrule.h new file mode 100644 index 0000000..66731e2 --- /dev/null +++ b/lib/adblock/filterrule.h @@ -0,0 +1,62 @@ +/******************************************************************************* + ** + ** smolbote: yet another qute browser + ** Copyright (C) 2017 Xian Nox + ** + ** This program is free software: you can redistribute it and/or modify + ** it under the terms of the GNU General Public License as published by + ** the Free Software Foundation, either version 3 of the License, or + ** (at your option) any later version. + ** + ** This program is distributed in the hope that it will be useful, + ** but WITHOUT ANY WARRANTY; without even the implied warranty of + ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + ** GNU General Public License for more details. + ** + ** You should have received a copy of the GNU General Public License + ** along with this program. If not, see <http://www.gnu.org/licenses/>. + ** + ******************************************************************************/ + +#ifndef FILTERRULE_H +#define FILTERRULE_H + +#include <QRegularExpression> + +class QUrl; +class FilterRule +{ +public: + + FilterRule(const QString &line); + + // delete the copy constructor and assignment operator + FilterRule(const FilterRule&) = delete; + FilterRule& operator=(const FilterRule&) = delete; + + // move constructor + FilterRule(FilterRule&& other) { + valid = other.valid; + exception = other.exception; + rule = other.rule; + } + + ~FilterRule(); + + bool isValid() const; + bool isException() const; + bool shouldBlock(const QUrl &requestUrl) const; + +private: + bool parse(const QString &line); + + bool valid; + bool exception; + + QRegularExpression rule; + +}; + +QString createRegExpPattern(const QString &line); + +#endif // FILTERRULE_H diff --git a/lib/adblock/test/filtertest.cpp b/lib/adblock/test/filtertest.cpp new file mode 100644 index 0000000..c778bc7 --- /dev/null +++ b/lib/adblock/test/filtertest.cpp @@ -0,0 +1,63 @@ +#include "filtertest.h" +#include <QtTest/QtTest> +#include "../filterrule.h" +#include <QUrl> + +FilterTest::FilterTest(QObject *parent) : QObject(parent) +{ +} + +void FilterTest::initTestCase() +{ + addressRule = new FilterRule("/banner/*/img^"); + QVERIFY(addressRule->isValid() == true); + QVERIFY(addressRule->isException() == false); + + domainRule = new FilterRule("||ads.example.com^"); + QVERIFY(domainRule->isValid() == true); + QVERIFY(domainRule->isException() == false); + + exactAddressRule = new FilterRule("|http://example.com/|"); + QVERIFY(exactAddressRule->isValid() == true); + QVERIFY(exactAddressRule->isException() == false); +} + +void FilterTest::testAddressBlock() +{ + // This rule blocks: + QVERIFY(addressRule->shouldBlock(QUrl("http://example.com/banner/foo/img")) == true); + QVERIFY(addressRule->shouldBlock(QUrl("http://example.com/banner/foo/bar/img?param")) == true); + QVERIFY(addressRule->shouldBlock(QUrl("http://example.com/banner//img/foo")) == true); + + // This rule doesn't block: + QVERIFY(addressRule->shouldBlock(QUrl("http://example.com/banner/img")) == false); + QVERIFY(addressRule->shouldBlock(QUrl("http://example.com/banner/foo/imgraph")) == false); + QVERIFY(addressRule->shouldBlock(QUrl("http://example.com/banner/foo/img.gif")) == false); +} + +void FilterTest::testDomainBlock() +{ + // This rule blocks: + QVERIFY(domainRule->shouldBlock(QUrl("http://ads.example.com/foo.gif")) == true); + QVERIFY(domainRule->shouldBlock(QUrl("http://server1.ads.example.com/foo.gif")) == true); + QVERIFY(domainRule->shouldBlock(QUrl("https://ads.example.com:8000/")) == true); + + // This rule doesn't block: + QVERIFY(domainRule->shouldBlock(QUrl("http://ads.example.com.ua/foo.gif")) == false); + QVERIFY(domainRule->shouldBlock(QUrl("http://example.com/redirect/http://ads.example.com/")) == false); +} + +void FilterTest::testExactAddressBlock() +{ + // This rule blocks: + QVERIFY(exactAddressRule->shouldBlock(QUrl("http://example.com/")) == true); + + // This rule doesn't block: + QVERIFY(exactAddressRule->shouldBlock(QUrl("http://example.com/foo.gif")) == false); + QVERIFY(exactAddressRule->shouldBlock(QUrl("http://example.info/redirect/http://example.com/")) == false); +} + +void FilterTest::cleanupTestCase() +{ + delete addressRule; +} diff --git a/lib/adblock/test/filtertest.h b/lib/adblock/test/filtertest.h new file mode 100644 index 0000000..45cdde1 --- /dev/null +++ b/lib/adblock/test/filtertest.h @@ -0,0 +1,28 @@ +#ifndef FILTERTEST_H +#define FILTERTEST_H + +#include <QObject> + +class FilterRule; +class FilterTest : public QObject +{ + Q_OBJECT +public: + explicit FilterTest(QObject *parent = nullptr); + +signals: + +private slots: + void initTestCase(); + void testAddressBlock(); + void testDomainBlock(); + void testExactAddressBlock(); + void cleanupTestCase(); + +private: + FilterRule *addressRule; + FilterRule *domainRule; + FilterRule *exactAddressRule; +}; + +#endif // FILTERTEST_H diff --git a/lib/adblock/test/main.cpp b/lib/adblock/test/main.cpp new file mode 100644 index 0000000..3284e47 --- /dev/null +++ b/lib/adblock/test/main.cpp @@ -0,0 +1,4 @@ +#include <QtTest/QtTest> +#include "filtertest.h" + +QTEST_APPLESS_MAIN(FilterTest) |