aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAqua-sama <aqua@iserlohn-fortress.net>2018-10-16 17:25:40 +0200
committerAqua-sama <aqua@iserlohn-fortress.net>2018-10-16 17:25:40 +0200
commitc74367d82c1c7bec393548d2e5014c794333822f (patch)
tree909bcde935c84e566db528b1ab25d81778e13036
parentAdd workaround for QTBUG-62511 (diff)
downloadsmolbote-c74367d82c1c7bec393548d2e5014c794333822f.tar.xz
urlfilter: Add FilterTree class
FilterTree is a class that holds filter rules, sorted by the domain they are to be applied on. The rules are to follow FilterLeaf as interface. - Add a hostlist rule format to FilterTree. - Add a test for hostlist format.
-rw-r--r--lib/urlfilter/CMakeLists.txt9
-rw-r--r--lib/urlfilter/filterleaf.cpp26
-rw-r--r--lib/urlfilter/filterleaf.h43
-rw-r--r--lib/urlfilter/filtertree.cpp86
-rw-r--r--lib/urlfilter/filtertree.h46
-rw-r--r--lib/urlfilter/formats/hostlistrule.cpp21
-rw-r--r--lib/urlfilter/formats/hostlistrule.h16
-rw-r--r--linux/.config2
-rw-r--r--test/CMakeLists.txt15
-rw-r--r--test/adblock/adblocktest.cpp (renamed from test/urlfilter/adblocktest.cpp)0
-rw-r--r--test/adblock/adblocktest.h (renamed from test/urlfilter/adblocktest.h)0
-rw-r--r--test/hostlist.txt6
-rw-r--r--test/hostlist/hostlisttest.cpp42
-rw-r--r--test/hostlist/hostlisttest.h19
14 files changed, 327 insertions, 4 deletions
diff --git a/lib/urlfilter/CMakeLists.txt b/lib/urlfilter/CMakeLists.txt
index 842f18f..375ffa7 100644
--- a/lib/urlfilter/CMakeLists.txt
+++ b/lib/urlfilter/CMakeLists.txt
@@ -10,6 +10,15 @@ add_library(urlfilter
formats/adblockrule.cpp
formats/adblockrule.h
+
+ formats/hostlistrule.cpp
+ formats/hostlistrule.h
+
+ # filter tree
+ filtertree.cpp
+ filtertree.h
+ filterleaf.cpp
+ filterleaf.h
)
target_link_libraries(urlfilter Qt5::WebEngineWidgets)
diff --git a/lib/urlfilter/filterleaf.cpp b/lib/urlfilter/filterleaf.cpp
new file mode 100644
index 0000000..3bd10bf
--- /dev/null
+++ b/lib/urlfilter/filterleaf.cpp
@@ -0,0 +1,26 @@
+#include "filterleaf.h"
+
+FilterLeaf::FilterLeaf(FilterLeaf && other)
+{
+ m_isBlocking = other.m_isBlocking;
+ m_request = std::move(other.m_request);
+ m_redirect = std::move(other.m_redirect);
+}
+
+FilterLeaf & FilterLeaf::operator=(FilterLeaf && other)
+{
+ m_isBlocking = other.m_isBlocking;
+ m_request = std::move(other.m_request);
+ m_redirect = std::move(other.m_redirect);
+ return *this;
+}
+
+const QString FilterLeaf::request() const
+{
+ return QString::fromStdString(m_request);
+}
+
+const QString FilterLeaf::redirect() const
+{
+ return QString::fromStdString(m_redirect);
+}
diff --git a/lib/urlfilter/filterleaf.h b/lib/urlfilter/filterleaf.h
new file mode 100644
index 0000000..6d9caae
--- /dev/null
+++ b/lib/urlfilter/filterleaf.h
@@ -0,0 +1,43 @@
+/*
+ * This file is part of smolbote. It's copyrighted by the contributors recorded
+ * in the version control history of the file, available from its original
+ * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote
+ *
+ * SPDX-License-Identifier: GPL-3.0
+ */
+
+#ifndef SMOLBOTE_FILTERLEAF_H
+#define SMOLBOTE_FILTERLEAF_H
+
+#include <QObject>
+#include <QString>
+
+class FilterLeaf
+{
+public:
+ enum Action {
+ NotMatched,
+ Allow,
+ Block,
+ Redirect
+ };
+
+ FilterLeaf(FilterLeaf &&other);
+ FilterLeaf& operator=(FilterLeaf &&other);
+ ~FilterLeaf() = default;
+
+ virtual bool match(const QString &requestUrl) const = 0;
+ virtual Action action() const = 0;
+
+ const QString request() const;
+ const QString redirect() const;
+
+protected:
+ explicit FilterLeaf() = default;
+
+ bool m_isBlocking;
+ std::string m_request;
+ std::string m_redirect;
+};
+
+#endif // SMOLBOTE_FILTERLEAF_H
diff --git a/lib/urlfilter/filtertree.cpp b/lib/urlfilter/filtertree.cpp
new file mode 100644
index 0000000..8844a76
--- /dev/null
+++ b/lib/urlfilter/filtertree.cpp
@@ -0,0 +1,86 @@
+/*
+ * This file is part of smolbote. It's copyrighted by the contributors recorded
+ * in the version control history of the file, available from its original
+ * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote
+ *
+ * SPDX-License-Identifier: GPL-3.0
+ */
+
+#include "filtertree.h"
+#include "filterleaf.h"
+#include <QTextStream>
+#include "formats/hostlistrule.h"
+
+bool loadHostlist(QIODevice &from, FilterTree* tree)
+{
+ Q_ASSERT(from.isReadable());
+ QTextStream stream(&from);
+ while(!stream.atEnd()) {
+ const QString line = stream.readLine().trimmed();
+ if(line.isEmpty() || line.startsWith(QLatin1Literal("#")))
+ continue;
+
+ const QStringList &parts = line.split(QLatin1Literal(" "));
+ if(parts.length() < 2) {
+#ifdef QT_DEBUG
+ qDebug("Cannot parse: %s", qUtf8Printable(line));
+#endif
+ return false;
+ }
+
+ for(int i = 1; i < parts.length(); ++i) {
+ // HostlistRule(domain, redirect)
+ auto *rule = new HostlistRule(parts.at(i), parts.constFirst());
+ // addRule(rule, enable_on_domain)
+ const bool added = tree->addRule(rule, QString());
+ if(!added)
+ return false;
+ }
+
+ }
+ return true;
+}
+
+const QStringList FilterTree::branches() const
+{
+ QStringList branches;
+ for(auto &branch : m_branches) {
+ branches.append(QString::fromStdString(branch.domain));
+ }
+ return branches;
+}
+
+QVector<const FilterLeaf *> FilterTree::match(const QString& domain, const QString& requestUrl) const
+{
+ QVector<const FilterLeaf *> leaves;
+ for(const auto &branch : m_branches) {
+ if(branch.domain == domain.toStdString()) {
+
+ for(const auto leaf : branch.leaves) {
+ if(leaf->match(requestUrl)) {
+ leaves.append(leaf);
+ }
+ }
+
+ }
+ }
+ return leaves;
+}
+
+bool FilterTree::addRule(FilterLeaf *rule, const QString& domain)
+{
+ for(auto &branch : m_branches) {
+ if(branch.domain == domain.toStdString()) {
+ branch.leaves.emplace_back(rule);
+ return true;
+ }
+ }
+
+ // no branch was found
+ Branch branch;
+ branch.domain = domain.toStdString();
+ // TODO: for some reason, can't add rule here
+ //branch.leaves.emplace_back(rule);
+ m_branches.emplace_back(std::move(branch));
+ return this->addRule(rule, domain);
+}
diff --git a/lib/urlfilter/filtertree.h b/lib/urlfilter/filtertree.h
new file mode 100644
index 0000000..8cecf50
--- /dev/null
+++ b/lib/urlfilter/filtertree.h
@@ -0,0 +1,46 @@
+/*
+ * This file is part of smolbote. It's copyrighted by the contributors recorded
+ * in the version control history of the file, available from its original
+ * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote
+ *
+ * SPDX-License-Identifier: GPL-3.0
+ */
+
+#ifndef SMOLBOTE_FILTERTREE_H
+#define SMOLBOTE_FILTERTREE_H
+
+#include <QObject>
+#include <QVector>
+#include <vector>
+#include <QIODevice>
+#include "filterleaf.h"
+
+/** FilterTree: B+ tree of filter rules
+ * The root of the tree contains branches that represent domains, on which their rules are to be applied.
+ * Each branch contains leaves - rules
+ */
+class FilterTree : public QObject
+{
+ Q_OBJECT
+
+public:
+ const QStringList branches() const;
+ QVector<const FilterLeaf *> match(const QString &domain, const QString &requestUrl) const;
+
+ bool addRule(FilterLeaf *rule, const QString &domain);
+
+private:
+ struct Branch {
+ ~Branch() { qDeleteAll(leaves); }
+
+ // TODO: replace domain type with domain-matching class
+ std::string domain;
+ std::vector<FilterLeaf *> leaves;
+ };
+
+ std::vector<Branch> m_branches;
+};
+
+bool loadHostlist(QIODevice &from, FilterTree *tree);
+
+#endif // SMOLBOTE_FILTERTREE_H
diff --git a/lib/urlfilter/formats/hostlistrule.cpp b/lib/urlfilter/formats/hostlistrule.cpp
new file mode 100644
index 0000000..1df6b3e
--- /dev/null
+++ b/lib/urlfilter/formats/hostlistrule.cpp
@@ -0,0 +1,21 @@
+#include "hostlistrule.h"
+
+HostlistRule::HostlistRule(const QString &domain, const QString& redirect)
+{
+ this->m_isBlocking = (redirect == QLatin1Literal("0.0.0.0"));
+ this->m_request = domain.toStdString();
+ this->m_redirect = redirect.toStdString();
+}
+
+bool HostlistRule::match(const QString& requestUrl) const
+{
+ return (m_request == requestUrl.toStdString());
+}
+
+FilterLeaf::Action HostlistRule::action() const
+{
+ if(m_isBlocking)
+ return FilterLeaf::Block;
+ return FilterLeaf::Redirect;
+}
+
diff --git a/lib/urlfilter/formats/hostlistrule.h b/lib/urlfilter/formats/hostlistrule.h
new file mode 100644
index 0000000..764a2e2
--- /dev/null
+++ b/lib/urlfilter/formats/hostlistrule.h
@@ -0,0 +1,16 @@
+#ifndef SMOLBOTE_HOSTLIST_RULE_H
+#define SMOLBOTE_HOSTLIST_RULE_H
+
+#include "../filterleaf.h"
+#include <QString>
+
+class HostlistRule : public FilterLeaf
+{
+public:
+ explicit HostlistRule(const QString &domain, const QString &redirect);
+
+ bool match(const QString &requestUrl) const override;
+ FilterLeaf::Action action() const override;
+};
+
+#endif // SMOLBOTE_HOSTLIST_RULE_H
diff --git a/linux/.config b/linux/.config
index ea2d51c..eb0fb92 100644
--- a/linux/.config
+++ b/linux/.config
@@ -48,7 +48,7 @@ CONFIG_USEPLASMA=y
# Devel
#
CONFIG_QTWARNINGS=y
-# CONFIG_TESTS is not set
+CONFIG_TESTS=y
# CONFIG_LLVMLIBCPP is not set
#
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 4302ab2..6b740d7 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -3,10 +3,19 @@ set(CMAKE_AUTOMOC ON)
set(CMAKE_AUTORCC ON)
add_executable(AdBlockTest
- urlfilter/adblocktest.cpp
- urlfilter/adblocktest.h
+ adblock/adblocktest.cpp
+ adblock/adblocktest.h
)
target_include_directories(AdBlockTest PRIVATE ../lib/urlfilter)
-
target_link_libraries(AdBlockTest Qt5::Test urlfilter)
+
+add_executable(HostlistTest
+ hostlist/hostlisttest.cpp
+ hostlist/hostlisttest.h
+)
+target_include_directories(HostlistTest PRIVATE ../lib/urlfilter/)
+target_link_libraries(HostlistTest Qt5::Test urlfilter)
+
+add_test(NAME urlfilter-adblock COMMAND AdBlockTest WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR})
+add_test(NAME urlfilter-hostlist COMMAND HostlistTest WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR})
diff --git a/test/urlfilter/adblocktest.cpp b/test/adblock/adblocktest.cpp
index b31d965..b31d965 100644
--- a/test/urlfilter/adblocktest.cpp
+++ b/test/adblock/adblocktest.cpp
diff --git a/test/urlfilter/adblocktest.h b/test/adblock/adblocktest.h
index 95cb7e2..95cb7e2 100644
--- a/test/urlfilter/adblocktest.h
+++ b/test/adblock/adblocktest.h
diff --git a/test/hostlist.txt b/test/hostlist.txt
new file mode 100644
index 0000000..a0b4e5c
--- /dev/null
+++ b/test/hostlist.txt
@@ -0,0 +1,6 @@
+# This is a comment, and after it comes a blank line
+
+127.0.0.1 localhost.localdomain
+
+0.0.0.0 blockeddomain.com
+0.0.0.0 blockeddomain.first blockeddomain.second
diff --git a/test/hostlist/hostlisttest.cpp b/test/hostlist/hostlisttest.cpp
new file mode 100644
index 0000000..46f6a85
--- /dev/null
+++ b/test/hostlist/hostlisttest.cpp
@@ -0,0 +1,42 @@
+#include "hostlisttest.h"
+#include <QtTest/QtTest>
+
+void HostlistTest::parseList()
+{
+ //FilterTree tree;
+
+ // load filters
+ QFile hostlist("hostlist.txt");
+ QCOMPARE(hostlist.open(QIODevice::ReadOnly | QIODevice::Text), true);
+ QCOMPARE(loadHostlist(hostlist, &tree), true);
+
+ QCOMPARE(tree.branches().length(), 1);
+}
+
+void HostlistTest::checkRules()
+{
+ // test block
+ QVector<const FilterLeaf *> block = tree.match(QString(), "blockeddomain.com");
+ QCOMPARE(block.length(), 1);
+ QCOMPARE(block.constFirst()->action(), FilterLeaf::Block);
+
+ // test redirect
+ QVector<const FilterLeaf *> redirectResult = tree.match(QString(), "localhost.localdomain");
+ QCOMPARE(redirectResult.length(), 1);
+ QCOMPARE(redirectResult.at(0)->action(), FilterLeaf::Redirect);
+ QCOMPARE(redirectResult.at(0)->redirect(), "127.0.0.1");
+
+ // two domains on one line
+ QVector<const FilterLeaf *> blockFirst = tree.match(QString(), "blockeddomain.first");
+ QCOMPARE(blockFirst.length(), 1);
+ QCOMPARE(blockFirst.constFirst()->action(), FilterLeaf::Block);
+ QVector<const FilterLeaf *> blockSecond = tree.match(QString(), "blockeddomain.second");
+ QCOMPARE(blockSecond.length(), 1);
+ QCOMPARE(blockSecond.constFirst()->action(), FilterLeaf::Block);
+
+ // domain not on list
+ QVector<const FilterLeaf *> missing = tree.match(QString(), "other.domain");
+ QCOMPARE(missing.length(), 0);
+}
+
+QTEST_GUILESS_MAIN(HostlistTest)
diff --git a/test/hostlist/hostlisttest.h b/test/hostlist/hostlisttest.h
new file mode 100644
index 0000000..9a87e0d
--- /dev/null
+++ b/test/hostlist/hostlisttest.h
@@ -0,0 +1,19 @@
+#ifndef HOSTLIST_TEST
+#define HOSTLIST_TEST
+
+#include <QObject>
+#include "filtertree.h"
+
+class HostlistTest : public QObject
+{
+ Q_OBJECT
+
+private slots:
+ void parseList();
+ void checkRules();
+
+private:
+ FilterTree tree;
+};
+
+#endif