diff options
-rw-r--r-- | lib/urlfilter/CMakeLists.txt | 9 | ||||
-rw-r--r-- | lib/urlfilter/filterleaf.cpp | 26 | ||||
-rw-r--r-- | lib/urlfilter/filterleaf.h | 43 | ||||
-rw-r--r-- | lib/urlfilter/filtertree.cpp | 86 | ||||
-rw-r--r-- | lib/urlfilter/filtertree.h | 46 | ||||
-rw-r--r-- | lib/urlfilter/formats/hostlistrule.cpp | 21 | ||||
-rw-r--r-- | lib/urlfilter/formats/hostlistrule.h | 16 | ||||
-rw-r--r-- | linux/.config | 2 | ||||
-rw-r--r-- | test/CMakeLists.txt | 15 | ||||
-rw-r--r-- | test/adblock/adblocktest.cpp (renamed from test/urlfilter/adblocktest.cpp) | 0 | ||||
-rw-r--r-- | test/adblock/adblocktest.h (renamed from test/urlfilter/adblocktest.h) | 0 | ||||
-rw-r--r-- | test/hostlist.txt | 6 | ||||
-rw-r--r-- | test/hostlist/hostlisttest.cpp | 42 | ||||
-rw-r--r-- | test/hostlist/hostlisttest.h | 19 |
14 files changed, 327 insertions, 4 deletions
diff --git a/lib/urlfilter/CMakeLists.txt b/lib/urlfilter/CMakeLists.txt index 842f18f..375ffa7 100644 --- a/lib/urlfilter/CMakeLists.txt +++ b/lib/urlfilter/CMakeLists.txt @@ -10,6 +10,15 @@ add_library(urlfilter formats/adblockrule.cpp formats/adblockrule.h + + formats/hostlistrule.cpp + formats/hostlistrule.h + + # filter tree + filtertree.cpp + filtertree.h + filterleaf.cpp + filterleaf.h ) target_link_libraries(urlfilter Qt5::WebEngineWidgets) diff --git a/lib/urlfilter/filterleaf.cpp b/lib/urlfilter/filterleaf.cpp new file mode 100644 index 0000000..3bd10bf --- /dev/null +++ b/lib/urlfilter/filterleaf.cpp @@ -0,0 +1,26 @@ +#include "filterleaf.h" + +FilterLeaf::FilterLeaf(FilterLeaf && other) +{ + m_isBlocking = other.m_isBlocking; + m_request = std::move(other.m_request); + m_redirect = std::move(other.m_redirect); +} + +FilterLeaf & FilterLeaf::operator=(FilterLeaf && other) +{ + m_isBlocking = other.m_isBlocking; + m_request = std::move(other.m_request); + m_redirect = std::move(other.m_redirect); + return *this; +} + +const QString FilterLeaf::request() const +{ + return QString::fromStdString(m_request); +} + +const QString FilterLeaf::redirect() const +{ + return QString::fromStdString(m_redirect); +} diff --git a/lib/urlfilter/filterleaf.h b/lib/urlfilter/filterleaf.h new file mode 100644 index 0000000..6d9caae --- /dev/null +++ b/lib/urlfilter/filterleaf.h @@ -0,0 +1,43 @@ +/* + * This file is part of smolbote. It's copyrighted by the contributors recorded + * in the version control history of the file, available from its original + * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote + * + * SPDX-License-Identifier: GPL-3.0 + */ + +#ifndef SMOLBOTE_FILTERLEAF_H +#define SMOLBOTE_FILTERLEAF_H + +#include <QObject> +#include <QString> + +class FilterLeaf +{ +public: + enum Action { + NotMatched, + Allow, + Block, + Redirect + }; + + FilterLeaf(FilterLeaf &&other); + FilterLeaf& operator=(FilterLeaf &&other); + ~FilterLeaf() = default; + + virtual bool match(const QString &requestUrl) const = 0; + virtual Action action() const = 0; + + const QString request() const; + const QString redirect() const; + +protected: + explicit FilterLeaf() = default; + + bool m_isBlocking; + std::string m_request; + std::string m_redirect; +}; + +#endif // SMOLBOTE_FILTERLEAF_H diff --git a/lib/urlfilter/filtertree.cpp b/lib/urlfilter/filtertree.cpp new file mode 100644 index 0000000..8844a76 --- /dev/null +++ b/lib/urlfilter/filtertree.cpp @@ -0,0 +1,86 @@ +/* + * This file is part of smolbote. It's copyrighted by the contributors recorded + * in the version control history of the file, available from its original + * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote + * + * SPDX-License-Identifier: GPL-3.0 + */ + +#include "filtertree.h" +#include "filterleaf.h" +#include <QTextStream> +#include "formats/hostlistrule.h" + +bool loadHostlist(QIODevice &from, FilterTree* tree) +{ + Q_ASSERT(from.isReadable()); + QTextStream stream(&from); + while(!stream.atEnd()) { + const QString line = stream.readLine().trimmed(); + if(line.isEmpty() || line.startsWith(QLatin1Literal("#"))) + continue; + + const QStringList &parts = line.split(QLatin1Literal(" ")); + if(parts.length() < 2) { +#ifdef QT_DEBUG + qDebug("Cannot parse: %s", qUtf8Printable(line)); +#endif + return false; + } + + for(int i = 1; i < parts.length(); ++i) { + // HostlistRule(domain, redirect) + auto *rule = new HostlistRule(parts.at(i), parts.constFirst()); + // addRule(rule, enable_on_domain) + const bool added = tree->addRule(rule, QString()); + if(!added) + return false; + } + + } + return true; +} + +const QStringList FilterTree::branches() const +{ + QStringList branches; + for(auto &branch : m_branches) { + branches.append(QString::fromStdString(branch.domain)); + } + return branches; +} + +QVector<const FilterLeaf *> FilterTree::match(const QString& domain, const QString& requestUrl) const +{ + QVector<const FilterLeaf *> leaves; + for(const auto &branch : m_branches) { + if(branch.domain == domain.toStdString()) { + + for(const auto leaf : branch.leaves) { + if(leaf->match(requestUrl)) { + leaves.append(leaf); + } + } + + } + } + return leaves; +} + +bool FilterTree::addRule(FilterLeaf *rule, const QString& domain) +{ + for(auto &branch : m_branches) { + if(branch.domain == domain.toStdString()) { + branch.leaves.emplace_back(rule); + return true; + } + } + + // no branch was found + Branch branch; + branch.domain = domain.toStdString(); + // TODO: for some reason, can't add rule here + //branch.leaves.emplace_back(rule); + m_branches.emplace_back(std::move(branch)); + return this->addRule(rule, domain); +} diff --git a/lib/urlfilter/filtertree.h b/lib/urlfilter/filtertree.h new file mode 100644 index 0000000..8cecf50 --- /dev/null +++ b/lib/urlfilter/filtertree.h @@ -0,0 +1,46 @@ +/* + * This file is part of smolbote. It's copyrighted by the contributors recorded + * in the version control history of the file, available from its original + * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote + * + * SPDX-License-Identifier: GPL-3.0 + */ + +#ifndef SMOLBOTE_FILTERTREE_H +#define SMOLBOTE_FILTERTREE_H + +#include <QObject> +#include <QVector> +#include <vector> +#include <QIODevice> +#include "filterleaf.h" + +/** FilterTree: B+ tree of filter rules + * The root of the tree contains branches that represent domains, on which their rules are to be applied. + * Each branch contains leaves - rules + */ +class FilterTree : public QObject +{ + Q_OBJECT + +public: + const QStringList branches() const; + QVector<const FilterLeaf *> match(const QString &domain, const QString &requestUrl) const; + + bool addRule(FilterLeaf *rule, const QString &domain); + +private: + struct Branch { + ~Branch() { qDeleteAll(leaves); } + + // TODO: replace domain type with domain-matching class + std::string domain; + std::vector<FilterLeaf *> leaves; + }; + + std::vector<Branch> m_branches; +}; + +bool loadHostlist(QIODevice &from, FilterTree *tree); + +#endif // SMOLBOTE_FILTERTREE_H diff --git a/lib/urlfilter/formats/hostlistrule.cpp b/lib/urlfilter/formats/hostlistrule.cpp new file mode 100644 index 0000000..1df6b3e --- /dev/null +++ b/lib/urlfilter/formats/hostlistrule.cpp @@ -0,0 +1,21 @@ +#include "hostlistrule.h" + +HostlistRule::HostlistRule(const QString &domain, const QString& redirect) +{ + this->m_isBlocking = (redirect == QLatin1Literal("0.0.0.0")); + this->m_request = domain.toStdString(); + this->m_redirect = redirect.toStdString(); +} + +bool HostlistRule::match(const QString& requestUrl) const +{ + return (m_request == requestUrl.toStdString()); +} + +FilterLeaf::Action HostlistRule::action() const +{ + if(m_isBlocking) + return FilterLeaf::Block; + return FilterLeaf::Redirect; +} + diff --git a/lib/urlfilter/formats/hostlistrule.h b/lib/urlfilter/formats/hostlistrule.h new file mode 100644 index 0000000..764a2e2 --- /dev/null +++ b/lib/urlfilter/formats/hostlistrule.h @@ -0,0 +1,16 @@ +#ifndef SMOLBOTE_HOSTLIST_RULE_H +#define SMOLBOTE_HOSTLIST_RULE_H + +#include "../filterleaf.h" +#include <QString> + +class HostlistRule : public FilterLeaf +{ +public: + explicit HostlistRule(const QString &domain, const QString &redirect); + + bool match(const QString &requestUrl) const override; + FilterLeaf::Action action() const override; +}; + +#endif // SMOLBOTE_HOSTLIST_RULE_H diff --git a/linux/.config b/linux/.config index ea2d51c..eb0fb92 100644 --- a/linux/.config +++ b/linux/.config @@ -48,7 +48,7 @@ CONFIG_USEPLASMA=y # Devel # CONFIG_QTWARNINGS=y -# CONFIG_TESTS is not set +CONFIG_TESTS=y # CONFIG_LLVMLIBCPP is not set # diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 4302ab2..6b740d7 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -3,10 +3,19 @@ set(CMAKE_AUTOMOC ON) set(CMAKE_AUTORCC ON) add_executable(AdBlockTest - urlfilter/adblocktest.cpp - urlfilter/adblocktest.h + adblock/adblocktest.cpp + adblock/adblocktest.h ) target_include_directories(AdBlockTest PRIVATE ../lib/urlfilter) - target_link_libraries(AdBlockTest Qt5::Test urlfilter) + +add_executable(HostlistTest + hostlist/hostlisttest.cpp + hostlist/hostlisttest.h +) +target_include_directories(HostlistTest PRIVATE ../lib/urlfilter/) +target_link_libraries(HostlistTest Qt5::Test urlfilter) + +add_test(NAME urlfilter-adblock COMMAND AdBlockTest WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}) +add_test(NAME urlfilter-hostlist COMMAND HostlistTest WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}) diff --git a/test/urlfilter/adblocktest.cpp b/test/adblock/adblocktest.cpp index b31d965..b31d965 100644 --- a/test/urlfilter/adblocktest.cpp +++ b/test/adblock/adblocktest.cpp diff --git a/test/urlfilter/adblocktest.h b/test/adblock/adblocktest.h index 95cb7e2..95cb7e2 100644 --- a/test/urlfilter/adblocktest.h +++ b/test/adblock/adblocktest.h diff --git a/test/hostlist.txt b/test/hostlist.txt new file mode 100644 index 0000000..a0b4e5c --- /dev/null +++ b/test/hostlist.txt @@ -0,0 +1,6 @@ +# This is a comment, and after it comes a blank line + +127.0.0.1 localhost.localdomain + +0.0.0.0 blockeddomain.com +0.0.0.0 blockeddomain.first blockeddomain.second diff --git a/test/hostlist/hostlisttest.cpp b/test/hostlist/hostlisttest.cpp new file mode 100644 index 0000000..46f6a85 --- /dev/null +++ b/test/hostlist/hostlisttest.cpp @@ -0,0 +1,42 @@ +#include "hostlisttest.h" +#include <QtTest/QtTest> + +void HostlistTest::parseList() +{ + //FilterTree tree; + + // load filters + QFile hostlist("hostlist.txt"); + QCOMPARE(hostlist.open(QIODevice::ReadOnly | QIODevice::Text), true); + QCOMPARE(loadHostlist(hostlist, &tree), true); + + QCOMPARE(tree.branches().length(), 1); +} + +void HostlistTest::checkRules() +{ + // test block + QVector<const FilterLeaf *> block = tree.match(QString(), "blockeddomain.com"); + QCOMPARE(block.length(), 1); + QCOMPARE(block.constFirst()->action(), FilterLeaf::Block); + + // test redirect + QVector<const FilterLeaf *> redirectResult = tree.match(QString(), "localhost.localdomain"); + QCOMPARE(redirectResult.length(), 1); + QCOMPARE(redirectResult.at(0)->action(), FilterLeaf::Redirect); + QCOMPARE(redirectResult.at(0)->redirect(), "127.0.0.1"); + + // two domains on one line + QVector<const FilterLeaf *> blockFirst = tree.match(QString(), "blockeddomain.first"); + QCOMPARE(blockFirst.length(), 1); + QCOMPARE(blockFirst.constFirst()->action(), FilterLeaf::Block); + QVector<const FilterLeaf *> blockSecond = tree.match(QString(), "blockeddomain.second"); + QCOMPARE(blockSecond.length(), 1); + QCOMPARE(blockSecond.constFirst()->action(), FilterLeaf::Block); + + // domain not on list + QVector<const FilterLeaf *> missing = tree.match(QString(), "other.domain"); + QCOMPARE(missing.length(), 0); +} + +QTEST_GUILESS_MAIN(HostlistTest) diff --git a/test/hostlist/hostlisttest.h b/test/hostlist/hostlisttest.h new file mode 100644 index 0000000..9a87e0d --- /dev/null +++ b/test/hostlist/hostlisttest.h @@ -0,0 +1,19 @@ +#ifndef HOSTLIST_TEST +#define HOSTLIST_TEST + +#include <QObject> +#include "filtertree.h" + +class HostlistTest : public QObject +{ + Q_OBJECT + +private slots: + void parseList(); + void checkRules(); + +private: + FilterTree tree; +}; + +#endif |