From e2b1cc628b304e3f153abc17fb350aa781e26b36 Mon Sep 17 00:00:00 2001 From: Aqua-sama Date: Mon, 18 Dec 2017 14:32:45 +0100 Subject: Basic adblock FilterRule --- lib/adblock/adblock.qbs | 39 +++++++++++++++ lib/adblock/filterrule.cpp | 102 ++++++++++++++++++++++++++++++++++++++++ lib/adblock/filterrule.h | 62 ++++++++++++++++++++++++ lib/adblock/test/filtertest.cpp | 63 +++++++++++++++++++++++++ lib/adblock/test/filtertest.h | 28 +++++++++++ lib/adblock/test/main.cpp | 4 ++ smolbote.qbs | 1 + 7 files changed, 299 insertions(+) create mode 100644 lib/adblock/adblock.qbs create mode 100644 lib/adblock/filterrule.cpp create mode 100644 lib/adblock/filterrule.h create mode 100644 lib/adblock/test/filtertest.cpp create mode 100644 lib/adblock/test/filtertest.h create mode 100644 lib/adblock/test/main.cpp diff --git a/lib/adblock/adblock.qbs b/lib/adblock/adblock.qbs new file mode 100644 index 0000000..7a59462 --- /dev/null +++ b/lib/adblock/adblock.qbs @@ -0,0 +1,39 @@ +import qbs 1.0 + +Project { + name: "Filter" + + StaticLibrary { + name: "adblock" + + Depends { name: "cpp" } + Depends { + name: "Qt" + versionAtLeast: "5.9.0" + submodules: ["core"] + } + + files: [ + "filterrule.h", + "filterrule.cpp", + ] + } + + CppApplication { + name: "adblock-test" + + Depends { + name: "Qt" + versionAtLeast: "5.9.0" + submodules: ["core", "test"] + } + + Depends { name: "adblock" } + + files: [ + "test/main.cpp", + "test/filtertest.cpp", + "test/filtertest.h", + ] + } +} diff --git a/lib/adblock/filterrule.cpp b/lib/adblock/filterrule.cpp new file mode 100644 index 0000000..147e5fd --- /dev/null +++ b/lib/adblock/filterrule.cpp @@ -0,0 +1,102 @@ +/******************************************************************************* + ** + ** smolbote: yet another qute browser + ** Copyright (C) 2017 Xian Nox + ** + ** This program is free software: you can redistribute it and/or modify + ** it under the terms of the GNU General Public License as published by + ** the Free Software Foundation, either version 3 of the License, or + ** (at your option) any later version. + ** + ** This program is distributed in the hope that it will be useful, + ** but WITHOUT ANY WARRANTY; without even the implied warranty of + ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + ** GNU General Public License for more details. + ** + ** You should have received a copy of the GNU General Public License + ** along with this program. If not, see . + ** + ******************************************************************************/ + +#include "filterrule.h" +#include + +/* TODO + * - split this class into two: FilterRule that only deals with matching, and AdBlockRule, which only deals with parsing + */ + +/* The AdBlock standard is an incredible mess, vaguely explaiend on: + * - https://adblockplus.org/filters + * - https://adblockplus.org/filter-cheatsheet + */ + +FilterRule::FilterRule(const QString &line) +{ + valid = parse(line); +} + +FilterRule::~FilterRule() +{ +} + +bool FilterRule::isValid() const +{ + return valid; +} + +bool FilterRule::isException() const +{ + return exception; +} + +bool FilterRule::shouldBlock(const QUrl &requestUrl) const +{ + QRegularExpressionMatch match = rule.match(requestUrl.toString()); + return match.hasMatch(); +} + +bool FilterRule::parse(const QString &line) +{ + // skip for comments and empty rules + if(line.startsWith('!') || line.trimmed().isEmpty()) { + return false; + } + + // make a copy of the pattern so we can snap off the parts we've already parsed + QString pattern = line; + + if(pattern.startsWith("@@")) { + exception = true; + pattern = pattern.mid(2); // remove @@ + } + + rule.setPattern(createRegExpPattern(pattern)); + return true; +} + +QString createRegExpPattern(const QString &line) +{ + QString pattern = line; + + // replace . (any character) with \. (a dot) + pattern.replace('.', "\\."); + + // translate adblock special characters into regex + // replace wildcard (*) with '.*' (zero or more of any element) + pattern.replace('*', ".*"); + + // replace separator (^) with '($|\?|\/)' (end of string, or ?, or /, or :number) + pattern.replace('^', "($|\\?|\\/|:\\d+)"); + + // replace || with ^\w+://([\w,\d,\.]+)? + pattern.replace("||", "^\\w+://([\\w,\\d,\\.]+)?"); + + if(pattern.startsWith('|') && pattern.endsWith('|')) { + // replace | at start with ^ (start of string) + pattern.replace(0, 1, '^'); + // replace | at end with $ (end of string) + pattern.replace('|', '$'); + } + + return pattern; +} diff --git a/lib/adblock/filterrule.h b/lib/adblock/filterrule.h new file mode 100644 index 0000000..66731e2 --- /dev/null +++ b/lib/adblock/filterrule.h @@ -0,0 +1,62 @@ +/******************************************************************************* + ** + ** smolbote: yet another qute browser + ** Copyright (C) 2017 Xian Nox + ** + ** This program is free software: you can redistribute it and/or modify + ** it under the terms of the GNU General Public License as published by + ** the Free Software Foundation, either version 3 of the License, or + ** (at your option) any later version. + ** + ** This program is distributed in the hope that it will be useful, + ** but WITHOUT ANY WARRANTY; without even the implied warranty of + ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + ** GNU General Public License for more details. + ** + ** You should have received a copy of the GNU General Public License + ** along with this program. If not, see . + ** + ******************************************************************************/ + +#ifndef FILTERRULE_H +#define FILTERRULE_H + +#include + +class QUrl; +class FilterRule +{ +public: + + FilterRule(const QString &line); + + // delete the copy constructor and assignment operator + FilterRule(const FilterRule&) = delete; + FilterRule& operator=(const FilterRule&) = delete; + + // move constructor + FilterRule(FilterRule&& other) { + valid = other.valid; + exception = other.exception; + rule = other.rule; + } + + ~FilterRule(); + + bool isValid() const; + bool isException() const; + bool shouldBlock(const QUrl &requestUrl) const; + +private: + bool parse(const QString &line); + + bool valid; + bool exception; + + QRegularExpression rule; + +}; + +QString createRegExpPattern(const QString &line); + +#endif // FILTERRULE_H diff --git a/lib/adblock/test/filtertest.cpp b/lib/adblock/test/filtertest.cpp new file mode 100644 index 0000000..c778bc7 --- /dev/null +++ b/lib/adblock/test/filtertest.cpp @@ -0,0 +1,63 @@ +#include "filtertest.h" +#include +#include "../filterrule.h" +#include + +FilterTest::FilterTest(QObject *parent) : QObject(parent) +{ +} + +void FilterTest::initTestCase() +{ + addressRule = new FilterRule("/banner/*/img^"); + QVERIFY(addressRule->isValid() == true); + QVERIFY(addressRule->isException() == false); + + domainRule = new FilterRule("||ads.example.com^"); + QVERIFY(domainRule->isValid() == true); + QVERIFY(domainRule->isException() == false); + + exactAddressRule = new FilterRule("|http://example.com/|"); + QVERIFY(exactAddressRule->isValid() == true); + QVERIFY(exactAddressRule->isException() == false); +} + +void FilterTest::testAddressBlock() +{ + // This rule blocks: + QVERIFY(addressRule->shouldBlock(QUrl("http://example.com/banner/foo/img")) == true); + QVERIFY(addressRule->shouldBlock(QUrl("http://example.com/banner/foo/bar/img?param")) == true); + QVERIFY(addressRule->shouldBlock(QUrl("http://example.com/banner//img/foo")) == true); + + // This rule doesn't block: + QVERIFY(addressRule->shouldBlock(QUrl("http://example.com/banner/img")) == false); + QVERIFY(addressRule->shouldBlock(QUrl("http://example.com/banner/foo/imgraph")) == false); + QVERIFY(addressRule->shouldBlock(QUrl("http://example.com/banner/foo/img.gif")) == false); +} + +void FilterTest::testDomainBlock() +{ + // This rule blocks: + QVERIFY(domainRule->shouldBlock(QUrl("http://ads.example.com/foo.gif")) == true); + QVERIFY(domainRule->shouldBlock(QUrl("http://server1.ads.example.com/foo.gif")) == true); + QVERIFY(domainRule->shouldBlock(QUrl("https://ads.example.com:8000/")) == true); + + // This rule doesn't block: + QVERIFY(domainRule->shouldBlock(QUrl("http://ads.example.com.ua/foo.gif")) == false); + QVERIFY(domainRule->shouldBlock(QUrl("http://example.com/redirect/http://ads.example.com/")) == false); +} + +void FilterTest::testExactAddressBlock() +{ + // This rule blocks: + QVERIFY(exactAddressRule->shouldBlock(QUrl("http://example.com/")) == true); + + // This rule doesn't block: + QVERIFY(exactAddressRule->shouldBlock(QUrl("http://example.com/foo.gif")) == false); + QVERIFY(exactAddressRule->shouldBlock(QUrl("http://example.info/redirect/http://example.com/")) == false); +} + +void FilterTest::cleanupTestCase() +{ + delete addressRule; +} diff --git a/lib/adblock/test/filtertest.h b/lib/adblock/test/filtertest.h new file mode 100644 index 0000000..45cdde1 --- /dev/null +++ b/lib/adblock/test/filtertest.h @@ -0,0 +1,28 @@ +#ifndef FILTERTEST_H +#define FILTERTEST_H + +#include + +class FilterRule; +class FilterTest : public QObject +{ + Q_OBJECT +public: + explicit FilterTest(QObject *parent = nullptr); + +signals: + +private slots: + void initTestCase(); + void testAddressBlock(); + void testDomainBlock(); + void testExactAddressBlock(); + void cleanupTestCase(); + +private: + FilterRule *addressRule; + FilterRule *domainRule; + FilterRule *exactAddressRule; +}; + +#endif // FILTERTEST_H diff --git a/lib/adblock/test/main.cpp b/lib/adblock/test/main.cpp new file mode 100644 index 0000000..3284e47 --- /dev/null +++ b/lib/adblock/test/main.cpp @@ -0,0 +1,4 @@ +#include +#include "filtertest.h" + +QTEST_APPLESS_MAIN(FilterTest) diff --git a/smolbote.qbs b/smolbote.qbs index 0fb4000..d74dffe 100644 --- a/smolbote.qbs +++ b/smolbote.qbs @@ -21,6 +21,7 @@ Project { "src/lib/downloads/downloads.qbs", "src/lib/navigation/navigation.qbs", "lib/settings/settings.qbs", + "lib/adblock/adblock.qbs", "test/test.qbs", ] -- cgit v1.2.1