From c74367d82c1c7bec393548d2e5014c794333822f Mon Sep 17 00:00:00 2001 From: Aqua-sama Date: Tue, 16 Oct 2018 17:25:40 +0200 Subject: urlfilter: Add FilterTree class FilterTree is a class that holds filter rules, sorted by the domain they are to be applied on. The rules are to follow FilterLeaf as interface. - Add a hostlist rule format to FilterTree. - Add a test for hostlist format. --- lib/urlfilter/CMakeLists.txt | 9 ++++ lib/urlfilter/filterleaf.cpp | 26 ++++++++++ lib/urlfilter/filterleaf.h | 43 +++++++++++++++++ lib/urlfilter/filtertree.cpp | 86 ++++++++++++++++++++++++++++++++++ lib/urlfilter/filtertree.h | 46 ++++++++++++++++++ lib/urlfilter/formats/hostlistrule.cpp | 21 +++++++++ lib/urlfilter/formats/hostlistrule.h | 16 +++++++ 7 files changed, 247 insertions(+) create mode 100644 lib/urlfilter/filterleaf.cpp create mode 100644 lib/urlfilter/filterleaf.h create mode 100644 lib/urlfilter/filtertree.cpp create mode 100644 lib/urlfilter/filtertree.h create mode 100644 lib/urlfilter/formats/hostlistrule.cpp create mode 100644 lib/urlfilter/formats/hostlistrule.h (limited to 'lib') diff --git a/lib/urlfilter/CMakeLists.txt b/lib/urlfilter/CMakeLists.txt index 842f18f..375ffa7 100644 --- a/lib/urlfilter/CMakeLists.txt +++ b/lib/urlfilter/CMakeLists.txt @@ -10,6 +10,15 @@ add_library(urlfilter formats/adblockrule.cpp formats/adblockrule.h + + formats/hostlistrule.cpp + formats/hostlistrule.h + + # filter tree + filtertree.cpp + filtertree.h + filterleaf.cpp + filterleaf.h ) target_link_libraries(urlfilter Qt5::WebEngineWidgets) diff --git a/lib/urlfilter/filterleaf.cpp b/lib/urlfilter/filterleaf.cpp new file mode 100644 index 0000000..3bd10bf --- /dev/null +++ b/lib/urlfilter/filterleaf.cpp @@ -0,0 +1,26 @@ +#include "filterleaf.h" + +FilterLeaf::FilterLeaf(FilterLeaf && other) +{ + m_isBlocking = other.m_isBlocking; + m_request = std::move(other.m_request); + m_redirect = std::move(other.m_redirect); +} + +FilterLeaf & FilterLeaf::operator=(FilterLeaf && other) +{ + m_isBlocking = other.m_isBlocking; + m_request = std::move(other.m_request); + m_redirect = std::move(other.m_redirect); + return *this; +} + +const QString FilterLeaf::request() const +{ + return QString::fromStdString(m_request); +} + +const QString FilterLeaf::redirect() const +{ + return QString::fromStdString(m_redirect); +} diff --git a/lib/urlfilter/filterleaf.h b/lib/urlfilter/filterleaf.h new file mode 100644 index 0000000..6d9caae --- /dev/null +++ b/lib/urlfilter/filterleaf.h @@ -0,0 +1,43 @@ +/* + * This file is part of smolbote. It's copyrighted by the contributors recorded + * in the version control history of the file, available from its original + * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote + * + * SPDX-License-Identifier: GPL-3.0 + */ + +#ifndef SMOLBOTE_FILTERLEAF_H +#define SMOLBOTE_FILTERLEAF_H + +#include +#include + +class FilterLeaf +{ +public: + enum Action { + NotMatched, + Allow, + Block, + Redirect + }; + + FilterLeaf(FilterLeaf &&other); + FilterLeaf& operator=(FilterLeaf &&other); + ~FilterLeaf() = default; + + virtual bool match(const QString &requestUrl) const = 0; + virtual Action action() const = 0; + + const QString request() const; + const QString redirect() const; + +protected: + explicit FilterLeaf() = default; + + bool m_isBlocking; + std::string m_request; + std::string m_redirect; +}; + +#endif // SMOLBOTE_FILTERLEAF_H diff --git a/lib/urlfilter/filtertree.cpp b/lib/urlfilter/filtertree.cpp new file mode 100644 index 0000000..8844a76 --- /dev/null +++ b/lib/urlfilter/filtertree.cpp @@ -0,0 +1,86 @@ +/* + * This file is part of smolbote. It's copyrighted by the contributors recorded + * in the version control history of the file, available from its original + * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote + * + * SPDX-License-Identifier: GPL-3.0 + */ + +#include "filtertree.h" +#include "filterleaf.h" +#include +#include "formats/hostlistrule.h" + +bool loadHostlist(QIODevice &from, FilterTree* tree) +{ + Q_ASSERT(from.isReadable()); + QTextStream stream(&from); + while(!stream.atEnd()) { + const QString line = stream.readLine().trimmed(); + if(line.isEmpty() || line.startsWith(QLatin1Literal("#"))) + continue; + + const QStringList &parts = line.split(QLatin1Literal(" ")); + if(parts.length() < 2) { +#ifdef QT_DEBUG + qDebug("Cannot parse: %s", qUtf8Printable(line)); +#endif + return false; + } + + for(int i = 1; i < parts.length(); ++i) { + // HostlistRule(domain, redirect) + auto *rule = new HostlistRule(parts.at(i), parts.constFirst()); + // addRule(rule, enable_on_domain) + const bool added = tree->addRule(rule, QString()); + if(!added) + return false; + } + + } + return true; +} + +const QStringList FilterTree::branches() const +{ + QStringList branches; + for(auto &branch : m_branches) { + branches.append(QString::fromStdString(branch.domain)); + } + return branches; +} + +QVector FilterTree::match(const QString& domain, const QString& requestUrl) const +{ + QVector leaves; + for(const auto &branch : m_branches) { + if(branch.domain == domain.toStdString()) { + + for(const auto leaf : branch.leaves) { + if(leaf->match(requestUrl)) { + leaves.append(leaf); + } + } + + } + } + return leaves; +} + +bool FilterTree::addRule(FilterLeaf *rule, const QString& domain) +{ + for(auto &branch : m_branches) { + if(branch.domain == domain.toStdString()) { + branch.leaves.emplace_back(rule); + return true; + } + } + + // no branch was found + Branch branch; + branch.domain = domain.toStdString(); + // TODO: for some reason, can't add rule here + //branch.leaves.emplace_back(rule); + m_branches.emplace_back(std::move(branch)); + return this->addRule(rule, domain); +} diff --git a/lib/urlfilter/filtertree.h b/lib/urlfilter/filtertree.h new file mode 100644 index 0000000..8cecf50 --- /dev/null +++ b/lib/urlfilter/filtertree.h @@ -0,0 +1,46 @@ +/* + * This file is part of smolbote. It's copyrighted by the contributors recorded + * in the version control history of the file, available from its original + * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote + * + * SPDX-License-Identifier: GPL-3.0 + */ + +#ifndef SMOLBOTE_FILTERTREE_H +#define SMOLBOTE_FILTERTREE_H + +#include +#include +#include +#include +#include "filterleaf.h" + +/** FilterTree: B+ tree of filter rules + * The root of the tree contains branches that represent domains, on which their rules are to be applied. + * Each branch contains leaves - rules + */ +class FilterTree : public QObject +{ + Q_OBJECT + +public: + const QStringList branches() const; + QVector match(const QString &domain, const QString &requestUrl) const; + + bool addRule(FilterLeaf *rule, const QString &domain); + +private: + struct Branch { + ~Branch() { qDeleteAll(leaves); } + + // TODO: replace domain type with domain-matching class + std::string domain; + std::vector leaves; + }; + + std::vector m_branches; +}; + +bool loadHostlist(QIODevice &from, FilterTree *tree); + +#endif // SMOLBOTE_FILTERTREE_H diff --git a/lib/urlfilter/formats/hostlistrule.cpp b/lib/urlfilter/formats/hostlistrule.cpp new file mode 100644 index 0000000..1df6b3e --- /dev/null +++ b/lib/urlfilter/formats/hostlistrule.cpp @@ -0,0 +1,21 @@ +#include "hostlistrule.h" + +HostlistRule::HostlistRule(const QString &domain, const QString& redirect) +{ + this->m_isBlocking = (redirect == QLatin1Literal("0.0.0.0")); + this->m_request = domain.toStdString(); + this->m_redirect = redirect.toStdString(); +} + +bool HostlistRule::match(const QString& requestUrl) const +{ + return (m_request == requestUrl.toStdString()); +} + +FilterLeaf::Action HostlistRule::action() const +{ + if(m_isBlocking) + return FilterLeaf::Block; + return FilterLeaf::Redirect; +} + diff --git a/lib/urlfilter/formats/hostlistrule.h b/lib/urlfilter/formats/hostlistrule.h new file mode 100644 index 0000000..764a2e2 --- /dev/null +++ b/lib/urlfilter/formats/hostlistrule.h @@ -0,0 +1,16 @@ +#ifndef SMOLBOTE_HOSTLIST_RULE_H +#define SMOLBOTE_HOSTLIST_RULE_H + +#include "../filterleaf.h" +#include + +class HostlistRule : public FilterLeaf +{ +public: + explicit HostlistRule(const QString &domain, const QString &redirect); + + bool match(const QString &requestUrl) const override; + FilterLeaf::Action action() const override; +}; + +#endif // SMOLBOTE_HOSTLIST_RULE_H -- cgit v1.2.1