aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAqua-sama <aqua@iserlohn-fortress.net>2019-01-09 19:38:58 +0100
committerAqua-sama <aqua@iserlohn-fortress.net>2019-01-09 19:38:58 +0100
commit3d2ae07c455c0e423c64f19e445518427a5684fa (patch)
tree58f6b47c3db33658a6f2e605fd021f08d1fa9964
parentAdd assorted unfished doc files to repo (diff)
downloadsmolbote-3d2ae07c455c0e423c64f19e445518427a5684fa.tar.xz
Rewrite lib/urlfilter
- Make HostList and AdBlockList implementations independent from each other - Move urlfilter tests to lib/urlfilter
-rw-r--r--doc/Bugs.asciidoc3
-rw-r--r--lib/urlfilter/adblock/adblocklist.cpp188
-rw-r--r--lib/urlfilter/adblock/adblocklist.h42
-rw-r--r--lib/urlfilter/adblock/parser.cpp75
-rw-r--r--lib/urlfilter/adblock/parser.h (renamed from lib/urlfilter/formats/adblockrule_parse.h)13
-rw-r--r--lib/urlfilter/domain.cpp65
-rw-r--r--lib/urlfilter/domain.h33
-rw-r--r--lib/urlfilter/filterleaf.cpp14
-rw-r--r--lib/urlfilter/filterleaf.h60
-rw-r--r--lib/urlfilter/filtertree.cpp94
-rw-r--r--lib/urlfilter/filtertree.h61
-rw-r--r--lib/urlfilter/formats/adblocklist.cpp95
-rw-r--r--lib/urlfilter/formats/adblocklist.h32
-rw-r--r--lib/urlfilter/formats/adblockrule.cpp63
-rw-r--r--lib/urlfilter/formats/adblockrule_parse.cpp181
-rw-r--r--lib/urlfilter/formats/hostlistrule.cpp29
-rw-r--r--lib/urlfilter/formats/hostlistrule.h27
-rw-r--r--lib/urlfilter/hostlist/hostlist.cpp79
-rw-r--r--lib/urlfilter/hostlist/hostlist.h44
-rw-r--r--lib/urlfilter/matcher.h (renamed from lib/urlfilter/formats/adblockrule.h)88
-rw-r--r--lib/urlfilter/meson.build31
-rw-r--r--lib/urlfilter/test/adblock.cpp88
-rw-r--r--lib/urlfilter/test/adblock.txt26
-rw-r--r--lib/urlfilter/test/hostlist.cpp34
-rw-r--r--lib/urlfilter/test/hostlist.txt (renamed from test/hostlist.txt)0
-rw-r--r--lib/urlfilter/test/matcher.cpp42
-rw-r--r--lib/urlfilter/urlfilter.h43
-rw-r--r--meson.build6
-rw-r--r--src/browser.cpp30
-rw-r--r--src/browser.h4
-rw-r--r--src/meson.build3
-rw-r--r--src/webengine/filter.cpp70
-rw-r--r--src/webengine/filter.h43
-rw-r--r--src/webengine/urlinterceptor.cpp53
-rw-r--r--src/webengine/urlinterceptor.h16
-rw-r--r--test/adblock.txt4
-rw-r--r--test/adblock/adblocktest.cpp83
-rw-r--r--test/hostlist/hostlisttest.cpp61
-rw-r--r--test/hostlist/hostlisttest.h23
-rw-r--r--test/matcherbenchmark/matcherbenchmark.cpp91
-rw-r--r--test/matcherbenchmark/matcherbenchmark.h20
-rw-r--r--test/meson.build34
-rw-r--r--test/urlfilter/urlfiltertest.cpp61
43 files changed, 811 insertions, 1341 deletions
diff --git a/doc/Bugs.asciidoc b/doc/Bugs.asciidoc
index e6ba9dd..d4ab1ec 100644
--- a/doc/Bugs.asciidoc
+++ b/doc/Bugs.asciidoc
@@ -40,6 +40,9 @@ Only affects Qt 5.11.0. Set __QTBUG_68224_WORKAROUND__.
=== rcc embeds time in output
https://bugreports.qt.io/browse/QTBUG-62511
+=== QWebEngineUrlRequestInterceptor does not allow to pass the Referer header
+https://bugreports.qt.io/browse/QTBUG-60203
+
=== Wayland bugs
=== mainwindow.maximized doesn't work
diff --git a/lib/urlfilter/adblock/adblocklist.cpp b/lib/urlfilter/adblock/adblocklist.cpp
new file mode 100644
index 0000000..c749e9e
--- /dev/null
+++ b/lib/urlfilter/adblock/adblocklist.cpp
@@ -0,0 +1,188 @@
+/*
+ * This file is part of smolbote. It's copyrighted by the contributors recorded
+ * in the version control history of the file, available from its original
+ * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote
+ *
+ * SPDX-License-Identifier: GPL-3.0
+ */
+
+#include "adblocklist.h"
+#include "parser.h"
+#include <QIODevice>
+#include <QTextStream>
+#include <QDebug>
+
+AdBlockList::AdBlockList(QIODevice *device)
+{
+ Q_ASSERT(device->isOpen());
+
+ QTextStream list(device);
+ while (!list.atEnd()) {
+ parseLine(list.readLine());
+ }
+
+ qDebug() << m_metadata;
+}
+
+AdBlockList::~AdBlockList()
+{
+ for(Rule &r : rules) {
+ delete r.matcher;
+ }
+}
+
+QString AdBlockList::metadata(const QString& key) const
+{
+ return m_metadata.value(key);
+}
+
+int AdBlockList::ruleCount() const
+{
+ return rules.size();
+}
+
+std::pair<UrlFilter::MatchResult, QString> AdBlockList::match(const QUrl& firstParty, const QUrl& requestUrl, QWebEngineUrlRequestInfo::ResourceType type) const
+{
+ const QString domain = firstParty.host();
+ const QString request = requestUrl.toString();
+
+ for(const Rule &r : rules) {
+ // if there are options specified, but not the one we need
+ if(!r.options.isEmpty() && !r.options.contains(type))
+ continue;
+
+ if(r.disabledOn.contains(domain))
+ continue;
+
+ if(!r.enabledOn.isEmpty() && !r.enabledOn.contains(domain))
+ continue;
+
+ if(r.matcher->hasMatch(request))
+ return std::make_pair(r.action, QString());
+ }
+
+ return std::make_pair(UrlFilter::NotMatched, QString());
+}
+
+void AdBlockList::parseLine(const QString& line)
+{
+ QString parsedLine = line.trimmed();
+
+ if(parsedLine.isEmpty())
+ return;
+
+ if(parsedLine.startsWith(QLatin1Literal("!"))) {
+ const auto comment = parseComment(parsedLine);
+
+ if(comment) {
+ const auto key = comment.value().first;
+ if(keys.contains(key))
+ m_metadata[key] = comment.value().second;
+ }
+
+ return;
+ }
+
+ // css rule -> filterleaves cannot do element blocking
+ if(parsedLine.contains(QLatin1Literal("##")) || parsedLine.contains(QLatin1Literal("#@#"))) {
+ qDebug("TODO: %s", qUtf8Printable(parsedLine));
+ return;
+ }
+
+ Rule r;
+ r.action = UrlFilter::Block;
+
+ // exception rules
+ if(parsedLine.startsWith(QLatin1Literal("@@"))) {
+ r.action = UrlFilter::Allow;
+ parsedLine.remove(0, 2);
+ }
+
+ bool matchCase = false;
+
+ // parse options
+ {
+ const int sepPos = parsedLine.indexOf(QLatin1Literal("$"));
+ if(sepPos != -1) {
+ const auto options = parsedLine.mid(sepPos + 1).split(QLatin1Literal(","));
+ parsedLine = parsedLine.mid(0, sepPos);
+
+ for(const QString &option : options) {
+ if(option.startsWith(QLatin1Literal("domain"))) {
+ const auto domainList = option.mid(7).split(QLatin1Literal("|"));
+
+ for(const QString &domain : domainList) {
+ if(domain.startsWith(QLatin1Literal("~"))) {
+ r.disabledOn.append(domain.mid(1));
+ } else {
+ r.enabledOn.append(domain);
+ }
+ }
+ } else if(option.endsWith(QLatin1Literal("match-case"))) {
+ matchCase = !option.startsWith(QLatin1Literal("~"));
+
+ } else {
+ const auto pair = parseResourceOption(option);
+ if(pair)
+ r.options.insert(pair.value().first, pair.value().second);
+ }
+ }
+ }
+ }
+
+ if(parsedLine.startsWith(QLatin1Literal("/")) && parsedLine.endsWith(QLatin1Literal("/"))) {
+ // regular expression rule
+ parsedLine = parsedLine.mid(1, parsedLine.length() - 2);
+ r.matcher = new ContentsMatcher<QRegularExpression>(parsedLine, UrlFilter::RegularExpressionMatch);
+
+ } else if(parsedLine.startsWith(QLatin1Literal("||")) && parsedLine.endsWith(QLatin1Literal("^"))) {
+ parsedLine = parsedLine.mid(2, parsedLine.length() - 3);
+ r.matcher = new ContentsMatcher<QString>(parsedLine, UrlFilter::DomainMatch);
+
+ } else if(parsedLine.startsWith(QLatin1Literal("|")) && parsedLine.endsWith(QLatin1Literal("|"))) {
+ // string equals rule
+ parsedLine = parsedLine.mid(1, parsedLine.length() - 2);
+ r.matcher = new ContentsMatcher<QStringMatcher>(parsedLine, UrlFilter::StringEquals);
+
+ } else if(parsedLine.startsWith(QLatin1Literal("||"))) {
+ // string starts with rule
+ parsedLine = parsedLine.mid(2);
+ r.matcher = new ContentsMatcher<QStringMatcher>(parsedLine, UrlFilter::StringStartsWith);
+
+ } else if(parsedLine.endsWith(QLatin1Literal("|"))) {
+ // string ends with rule
+ parsedLine.chop(1);
+ r.matcher = new ContentsMatcher<QStringMatcher>(parsedLine, UrlFilter::StringEndsWith);
+
+ } else {
+ // generic contains rule
+
+ // remove beginning and ending wildcards
+ if(parsedLine.startsWith(QLatin1Literal("*")))
+ parsedLine = parsedLine.mid(1);
+
+ if(parsedLine.endsWith(QLatin1Literal("*")))
+ parsedLine.chop(1);
+
+ if(parsedLine.contains(QLatin1Literal("*")) || parsedLine.contains(QLatin1Literal("^"))) {
+ // check for wildcards and translate to regexp
+ // wildcard "*" - any number of characters
+ // separator "^" - end, ? or /
+ parsedLine.replace(QLatin1Literal("||"), QLatin1Literal("^\\w+://"));
+ parsedLine.replace(QLatin1Literal("|"), QLatin1Literal("\\|"));
+ parsedLine.replace(QLatin1Literal("*"), QLatin1Literal(".*"));
+ parsedLine.replace(QLatin1Literal("^"), QLatin1Literal("($|\\?|\\/)"));
+
+ r.matcher = new ContentsMatcher<QRegularExpression>(parsedLine, UrlFilter::RegularExpressionMatch);
+
+ } else {
+ r.matcher = new ContentsMatcher<QStringMatcher>(parsedLine, UrlFilter::StringContains);
+ }
+ }
+
+ r.matcher->setCaseSensitive(matchCase);
+
+ Q_CHECK_PTR(r.matcher);
+ rules.emplace_back(std::move(r));
+}
+
diff --git a/lib/urlfilter/adblock/adblocklist.h b/lib/urlfilter/adblock/adblocklist.h
new file mode 100644
index 0000000..ee41e11
--- /dev/null
+++ b/lib/urlfilter/adblock/adblocklist.h
@@ -0,0 +1,42 @@
+/*
+ * This file is part of smolbote. It's copyrighted by the contributors recorded
+ * in the version control history of the file, available from its original
+ * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote
+ *
+ * SPDX-License-Identifier: GPL-3.0
+ */
+
+#include "urlfilter.h"
+#include "matcher.h"
+#include <QHash>
+#include <QWebEngineUrlRequestInfo>
+
+class QIODevice;
+class AdBlockList : public UrlFilter
+{
+public:
+ // TODO: check if all keys are listed
+ const QStringList keys = { "Version", "Title", "Last modified", "Expires", "Homepage", "Licence", "Redirect" };
+
+ AdBlockList(QIODevice *device);
+ ~AdBlockList();
+
+ QString metadata(const QString &key) const override;
+ int ruleCount() const;
+ std::pair<MatchResult, QString> match(const QUrl &firstParty, const QUrl &requestUrl, QWebEngineUrlRequestInfo::ResourceType type) const override;
+
+protected:
+ void parseLine(const QString &line);
+
+private:
+ QHash<QString, QString> m_metadata;
+
+ struct Rule {
+ UrlFilter::MatchResult action = UrlFilter::NotMatched;
+ Matcher *matcher;
+ QStringList enabledOn, disabledOn;
+ QHash<QWebEngineUrlRequestInfo::ResourceType, bool> options;
+ };
+
+ std::vector<Rule> rules;
+};
diff --git a/lib/urlfilter/adblock/parser.cpp b/lib/urlfilter/adblock/parser.cpp
new file mode 100644
index 0000000..1e7f0bc
--- /dev/null
+++ b/lib/urlfilter/adblock/parser.cpp
@@ -0,0 +1,75 @@
+/*
+ * This file is part of smolbote. It's copyrighted by the contributors recorded
+ * in the version control history of the file, available from its original
+ * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote
+ *
+ * SPDX-License-Identifier: GPL-3.0
+ */
+
+#include "parser.h"
+
+std::optional<std::pair<QString, QString>> parseComment(QString &line)
+{
+ const QLatin1Literal separator(": ");
+ if(line.contains(separator)) {
+ const QStringList comment = line.mid(1).split(QLatin1Literal(": "));
+ return std::make_pair(comment.at(0).trimmed(), comment.at(1).trimmed());
+ } else
+ return std::nullopt;
+}
+
+std::optional<std::pair<QWebEngineUrlRequestInfo::ResourceType, bool>> parseResourceOption(const QString &option)
+{
+ const bool exception = !option.startsWith(QLatin1Literal("~"));
+
+ if(option.endsWith(QLatin1Literal("script"))) {
+ // external scripts loaded via HTML script tag
+ return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeScript, exception);
+
+ } else if(option.endsWith(QLatin1Literal("image"))) {
+ // regular images, typically loaded via HTML img tag
+ return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeImage, exception);
+
+ } else if(option.endsWith(QLatin1Literal("stylesheet"))) {
+ // external CSS stylesheet files
+ return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeStylesheet, exception);
+
+ } else if(option.endsWith(QLatin1Literal("object"))) {
+ // content handled by browser plugins, e.g. Flash or Java
+ return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeObject, exception);
+
+ } else if(option.endsWith(QLatin1Literal("xmlhttprequest"))) {
+ // requests started using the XMLHttpRequest object or fetch() API
+ return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeXhr, exception);
+
+ } else if(option.endsWith(QLatin1Literal("object-subrequest"))) {
+ // requests started by plugins like Flash
+ return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypePluginResource, exception);
+
+ } else if(option.endsWith(QLatin1Literal("subdocument"))) {
+ // embedded pages, usually included via HTML frames
+ return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeSubFrame, exception);
+
+ } else if(option.endsWith(QLatin1Literal("ping"))) {
+ // requests started by <a ping> or navigator.sendBeacon()
+ return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypePing, exception);
+
+ } else if(option.endsWith(QLatin1Literal("websocket"))) {
+ // requests initiated via WebSocket object
+ qDebug("Resource type 'websocket' not available");
+
+ } else if(option.endsWith(QLatin1Literal("webrtc"))) {
+ // connections opened via RTCPeerConnection instances to ICE servers
+ qDebug("Resource type 'webrtc' not available");
+
+ } else if(option.endsWith(QLatin1Literal("document"))) {
+ // the page itself
+ return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeMainFrame, exception);
+
+ } else if(option.endsWith(QLatin1Literal("other"))) {
+ return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeUnknown, exception);
+ }
+
+ qDebug("TODO: %s", qUtf8Printable(option));
+ return std::nullopt;
+}
diff --git a/lib/urlfilter/formats/adblockrule_parse.h b/lib/urlfilter/adblock/parser.h
index 01255ca..c73a9cf 100644
--- a/lib/urlfilter/formats/adblockrule_parse.h
+++ b/lib/urlfilter/adblock/parser.h
@@ -6,12 +6,9 @@
* SPDX-License-Identifier: GPL-3.0
*/
-#ifndef ADBLOCKRULE_PARSE_H
-#define ADBLOCKRULE_PARSE_H
+#include <QWebEngineUrlRequestInfo>
+#include <optional>
+#include <utility>
-class AdBlockRule;
-
-AdBlockRule *parseRule_adblock(const QString &filter);
-std::optional<QPair<QWebEngineUrlRequestInfo::ResourceType, bool>> parseOption(const QString &option);
-
-#endif // ADBLOCKRULE_PARSE_H
+std::optional<std::pair<QString, QString>> parseComment(QString &line);
+std::optional<std::pair<QWebEngineUrlRequestInfo::ResourceType, bool>> parseResourceOption(const QString &option);
diff --git a/lib/urlfilter/domain.cpp b/lib/urlfilter/domain.cpp
deleted file mode 100644
index 2bfd524..0000000
--- a/lib/urlfilter/domain.cpp
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * This file is part of smolbote. It's copyrighted by the contributors recorded
- * in the version control history of the file, available from its original
- * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote
- *
- * SPDX-License-Identifier: GPL-3.0
- */
-
-#include "domain.h"
-
-Domain::Domain(const QString &domain)
- : m_domain(domain)
- , m_hash(qHash(domain, 0))
-{
-}
-
-Domain::Domain(Domain &&other)
- : m_domain(std::move(other.m_domain))
- , m_hash(std::move(other.m_hash))
-{
-}
-
-Domain &Domain::operator=(Domain &&other)
-{
- m_domain = std::move(other.m_domain);
- m_hash = other.m_hash;
- return *this;
-}
-
-bool Domain::matches(const QUrl &url) const
-{
- // empty domain matches all
- if(m_domain.isEmpty() || url.isEmpty())
- return true;
-
- const QString domain = url.host();
-
- // domain and filter are the same
- if(domain == m_domain) {
- return true;
- }
-
- // domain cannot be matched if it doesn't end with filter
- // ex. example2.com isn't matched by example.com
- if(!domain.endsWith(m_domain)) {
- return false;
- }
-
- // match with subdomains
- // ex. subdomain.example.com is matched by example.com
- int index = domain.indexOf(m_domain);
-
- // match if (domain ends with filter) && (filter has been found) and (character before filter is '.')
- return index > 0 && domain[index - 1] == QLatin1Char('.');
-}
-
-bool Domain::matchesExactly(uint hash) const
-{
- return (m_hash == hash);
-}
-
-QString Domain::host() const
-{
- return m_domain;
-}
diff --git a/lib/urlfilter/domain.h b/lib/urlfilter/domain.h
deleted file mode 100644
index 0406f0f..0000000
--- a/lib/urlfilter/domain.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * This file is part of smolbote. It's copyrighted by the contributors recorded
- * in the version control history of the file, available from its original
- * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote
- *
- * SPDX-License-Identifier: GPL-3.0
- */
-
-#ifndef SMOLBOTE_DOMAIN_H
-#define SMOLBOTE_DOMAIN_H
-
-#include <QString>
-#include <QUrl>
-
-class Domain
-{
-public:
- explicit Domain(const QString &domain);
- explicit Domain(Domain &&other);
- Domain &operator=(Domain &&other);
-
- // match domain and subdomains of domain
- bool matches(const QUrl &url) const;
- // exact match of domain
- bool matchesExactly(uint hash) const;
- QString host() const;
-
-private:
- QString m_domain;
- uint m_hash;
-};
-
-#endif // SMOLBOTE_DOMAIN_H
diff --git a/lib/urlfilter/filterleaf.cpp b/lib/urlfilter/filterleaf.cpp
deleted file mode 100644
index 5797718..0000000
--- a/lib/urlfilter/filterleaf.cpp
+++ /dev/null
@@ -1,14 +0,0 @@
-#include "filterleaf.h"
-
-const QString FilterLeaf::request() const
-{
- return m_request;
-}
-
-std::optional<bool> FilterLeaf::option(QWebEngineUrlRequestInfo::ResourceType opt) const
-{
- if(resourceTypeOptions.contains(opt))
- return resourceTypeOptions.value(opt);
- else
- return std::nullopt;
-}
diff --git a/lib/urlfilter/filterleaf.h b/lib/urlfilter/filterleaf.h
deleted file mode 100644
index 64f465d..0000000
--- a/lib/urlfilter/filterleaf.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * This file is part of smolbote. It's copyrighted by the contributors recorded
- * in the version control history of the file, available from its original
- * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote
- *
- * SPDX-License-Identifier: GPL-3.0
- */
-
-#ifndef SMOLBOTE_FILTERLEAF_H
-#define SMOLBOTE_FILTERLEAF_H
-
-#include <QHash>
-#include <QObject>
-#include <QString>
-#include <QWebEngineUrlRequestInfo>
-#include <optional>
-#include <utility>
-#include <QVariant>
-
-class FilterLeaf
-{
-public:
- enum Action {
- NotMatched,
- Allow,
- Block,
- Redirect
- };
-
- enum UrlMatchType {
- InvalidMatch,
- RegularExpressionMatch,
- StringContains,
- StringStartsWith,
- StringEndsWith,
- StringEquals,
- DomainMatch
- };
-
- virtual ~FilterLeaf() = default;
-
- virtual bool match(const QUrl &requestUrl) const = 0;
- virtual std::pair<Action, QVariant> action() const = 0;
-
- const QString request() const;
- std::optional<bool> option(QWebEngineUrlRequestInfo::ResourceType opt) const;
-
-protected:
- // rule matching
- UrlMatchType matchType = InvalidMatch;
- QHash<QWebEngineUrlRequestInfo::ResourceType, bool> resourceTypeOptions;
- QString m_request;
-
- // rule action
- bool m_isBlocking;
-};
-
-Q_DECLARE_METATYPE(FilterLeaf::Action)
-
-#endif // SMOLBOTE_FILTERLEAF_H
diff --git a/lib/urlfilter/filtertree.cpp b/lib/urlfilter/filtertree.cpp
deleted file mode 100644
index 2cdd6d0..0000000
--- a/lib/urlfilter/filtertree.cpp
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * This file is part of smolbote. It's copyrighted by the contributors recorded
- * in the version control history of the file, available from its original
- * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote
- *
- * SPDX-License-Identifier: GPL-3.0
- */
-
-#include "filtertree.h"
-#include "filterleaf.h"
-#include "formats/hostlistrule.h"
-#include <QTextStream>
-
-bool loadHostlist(QIODevice &from, FilterTree *tree)
-{
- Q_ASSERT(from.isReadable());
- QTextStream stream(&from);
- while(!stream.atEnd()) {
- const QString line = stream.readLine().trimmed();
- if(line.isEmpty() || line.startsWith(QLatin1Literal("#")))
- continue;
-
- const QStringList &parts = line.split(QLatin1Literal(" "));
- if(parts.length() < 2) {
-#ifdef QT_DEBUG
- qDebug("Cannot parse: %s", qUtf8Printable(line));
-#endif
- return false;
- }
-
- for(int i = 1; i < parts.length(); ++i) {
- // HostlistRule(domain, redirect)
- auto *rule = new HostlistRule(parts.at(i), parts.constFirst());
- // addRule(rule, enable_on_domain)
- const bool added = tree->addRule(rule, QString());
- if(!added)
- return false;
- }
- }
- return true;
-}
-
-FilterTree::~FilterTree()
-{
- for(auto &branch : m_branches) {
- qDeleteAll(branch.leaves);
- branch.leaves.clear();
- }
-}
-
-const QStringList FilterTree::branches() const
-{
- QStringList branches;
- for(auto &branch : m_branches) {
- branches.append(branch.domain.host());
- }
- return branches;
-}
-
-QVector<const FilterLeaf *> FilterTree::match(const QUrl &domain, const QUrl &requestUrl) const
-{
- QVector<const FilterLeaf *> leaves;
- for(const auto &branch : m_branches) {
- if(branch.domain.matches(domain)) {
-
- for(const auto leaf : branch.leaves) {
- if(leaf->match(requestUrl)) {
- leaves.append(leaf);
- }
- }
- }
- }
- return leaves;
-}
-
-bool FilterTree::addRule(FilterLeaf *rule, const QString &domain)
-{
- branchLock.lock();
- this->branch(domain).leaves.emplace_back(rule);
- branchLock.unlock();
- return true;
-}
-
-FilterTree::Branch & FilterTree::branch(const QString& domain)
-{
- for(auto &branch : m_branches) {
- if(branch.domain.matches(QUrl(domain)))
- return branch;
- }
-
- // no branch was found
- Branch branch(domain);
- return m_branches.emplace_back(std::move(branch));
-}
diff --git a/lib/urlfilter/filtertree.h b/lib/urlfilter/filtertree.h
deleted file mode 100644
index f453a3d..0000000
--- a/lib/urlfilter/filtertree.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * This file is part of smolbote. It's copyrighted by the contributors recorded
- * in the version control history of the file, available from its original
- * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote
- *
- * SPDX-License-Identifier: GPL-3.0
- */
-
-#ifndef SMOLBOTE_FILTERTREE_H
-#define SMOLBOTE_FILTERTREE_H
-
-#include "domain.h"
-#include "filterleaf.h"
-#include <QIODevice>
-#include <QObject>
-#include <QVector>
-#include <vector>
-#include <QMutex>
-
-/** FilterTree: B+ tree of filter rules
- * The tree contains branches that represent domains
- * Each domain-branch contains leaves (rules) that are to be applied to it.
- * Rules may be applied to multiple branches.
- */
-class FilterTree : public QObject
-{
- Q_OBJECT
-
-public:
- ~FilterTree();
-
- const QStringList branches() const;
- QVector<const FilterLeaf *> match(const QUrl &domain, const QUrl &requestUrl) const;
-
- bool addRule(FilterLeaf *rule, const QString &domain);
-
-private:
- struct Branch {
- explicit Branch(const QString &host)
- : domain(host)
- {
- }
- explicit Branch(Branch &&other)
- : domain(std::move(other.domain))
- , leaves(std::move(other.leaves))
- {
- }
-
- Domain domain;
- std::vector<FilterLeaf *> leaves;
- };
-
- Branch& branch(const QString &domain);
-
- QMutex branchLock;
- std::vector<Branch> m_branches;
-};
-
-bool loadHostlist(QIODevice &from, FilterTree *tree);
-
-#endif // SMOLBOTE_FILTERTREE_H
diff --git a/lib/urlfilter/formats/adblocklist.cpp b/lib/urlfilter/formats/adblocklist.cpp
deleted file mode 100644
index 772c252..0000000
--- a/lib/urlfilter/formats/adblocklist.cpp
+++ /dev/null
@@ -1,95 +0,0 @@
-#include "adblocklist.h"
-
-AdBlockList::AdBlockList()
-{
-}
-
-QString AdBlockList::metadata(const QString &key) const
-{
- return m_metadata.value(key, QString());
-}
-
-FilterLeaf::Action AdBlockList::match(const QUrl &firstParty, const QUrl &requestUrl, QWebEngineUrlRequestInfo::ResourceType type) const
-{
- const QString request = requestUrl.toString();
-
- for(auto &filter : m_rules) {
- if(filter.matcher->hasMatch(request))
- return filter.action;
- }
- return FilterLeaf::NotMatched;
-}
-
-bool AdBlockList::parseLine(const QString &line)
-{
- // remove whitespace from start/end of the line
- QString parsedLine = line.trimmed();
-
- // check if the line is empty
- if(parsedLine.isEmpty())
- return false;
-
- // parse comment
- if(parsedLine.startsWith(QLatin1Literal("!")))
- return parseComment(parsedLine);
-
- Filter filter;
-
- // exception rules
- if(parsedLine.startsWith(QLatin1Literal("@@"))) {
- filter.action = FilterLeaf::Allow;
- parsedLine.remove(0, 2);
- }
-
- // remove '*' at the beginning and the end
- if(parsedLine.startsWith(QLatin1Literal("*")))
- parsedLine = parsedLine.mid(1);
- if(parsedLine.endsWith(QLatin1Literal("*")))
- parsedLine.chop(1);
-
- if(parsedLine.startsWith(QLatin1Literal("/")) && parsedLine.endsWith(QLatin1Literal("/"))) {
- // regular expression rule
- parsedLine = parsedLine.mid(1, parsedLine.length() - 2);
- filter.matcher = new ContentsMatcher<QRegularExpression>(parsedLine, FilterLeaf::RegularExpressionMatch);
-
- } else if(parsedLine.contains(QLatin1Literal("*"))) {
- parsedLine = QRegularExpression::wildcardToRegularExpression(parsedLine);
- filter.matcher = new ContentsMatcher<QRegularExpression>(parsedLine, FilterLeaf::RegularExpressionMatch);
-
- } else if(parsedLine.startsWith(QLatin1Literal("||")) && parsedLine.endsWith(QLatin1Literal("^"))) {
-// matchType = FilterLeaf::DomainMatch;
- parsedLine = parsedLine.mid(2, parsedLine.length() - 3);
- filter.matcher = new ContentsMatcher<QString>(parsedLine, FilterLeaf::DomainMatch);
-
- } else if(parsedLine.startsWith(QLatin1Literal("|")) && parsedLine.endsWith(QLatin1Literal("|"))) {
- // string equals rule
- parsedLine = parsedLine.mid(1, parsedLine.length() - 2);
- filter.matcher = new ContentsMatcher<QStringMatcher>(parsedLine, FilterLeaf::StringEquals);
-
- } else if(parsedLine.startsWith(QLatin1Literal("||"))) {
- // string starts with rule
- parsedLine = parsedLine.mid(2);
- filter.matcher = new ContentsMatcher<QStringMatcher>(parsedLine, FilterLeaf::StringStartsWith);
-
- } else if(parsedLine.endsWith(QLatin1Literal("|"))) {
- // string ends with rule
- parsedLine.chop(1);
- filter.matcher = new ContentsMatcher<QStringMatcher>(parsedLine, FilterLeaf::StringEndsWith);
-
- } else {
- // generic contains rule
- filter.matcher = new ContentsMatcher<QStringMatcher>(parsedLine, FilterLeaf::StringContains);
- }
-
-
- Q_CHECK_PTR(filter.matcher);
- m_rules.emplace_back(std::move(filter));
- return true;
-}
-
-bool AdBlockList::parseComment(const QString &commentLine)
-{
- const QStringList comment = commentLine.mid(1).split(QLatin1Literal(": "));
- m_metadata[comment.at(0).trimmed()] = comment.at(1).trimmed();
- return true;
-}
diff --git a/lib/urlfilter/formats/adblocklist.h b/lib/urlfilter/formats/adblocklist.h
deleted file mode 100644
index 34a2120..0000000
--- a/lib/urlfilter/formats/adblocklist.h
+++ /dev/null
@@ -1,32 +0,0 @@
-#ifndef ADBLOCKLIST_H
-#define ADBLOCKLIST_H
-
-#include <QHash>
-#include "adblockrule.h"
-
-class AdBlockList
-{
-public:
- AdBlockList();
-
- QString metadata(const QString &key) const;
- FilterLeaf::Action match(const QUrl &firstParty, const QUrl &requestUrl, QWebEngineUrlRequestInfo::ResourceType type = QWebEngineUrlRequestInfo::ResourceTypeUnknown) const;
-
- bool parseLine(const QString &line);
-
-protected:
- bool parseComment(const QString &commentLine);
-
-private:
- struct Filter
- {
- FilterLeaf::Action action = FilterLeaf::Block;
- Matcher *matcher;
- };
-
- QHash<QString, QString> m_metadata;
- //QMap<QString, Filter> m_rules;
- std::vector<Filter> m_rules;
-};
-
-#endif // ADBLOCKLIST_H
diff --git a/lib/urlfilter/formats/adblockrule.cpp b/lib/urlfilter/formats/adblockrule.cpp
deleted file mode 100644
index 60e817f..0000000
--- a/lib/urlfilter/formats/adblockrule.cpp
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * This file is part of smolbote. It's copyrighted by the contributors recorded
- * in the version control history of the file, available from its original
- * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote
- *
- * SPDX-License-Identifier: GPL-3.0
- */
-
-#include "adblockrule.h"
-#include <QRegExp>
-#include <QStringMatcher>
-
-AdBlockRule::AdBlockRule(FilterLeaf::UrlMatchType matchType, const QString &filter, FilterLeaf::Action action)
-{
- this->matchType = matchType;
- this->m_request = filter;
- this->m_isBlocking = (action == FilterLeaf::Block);
- //matcher.setPattern(filter);
- if(matchType == FilterLeaf::RegularExpressionMatch)
- regExp = new QRegExp(filter);
- else
- stringMatcher = new QStringMatcher(filter);
-}
-
-void AdBlockRule::mergeOptions(const QHash<QWebEngineUrlRequestInfo::ResourceType, bool> &options)
-{
- this->resourceTypeOptions.unite(options);
-}
-
-bool AdBlockRule::match(const QUrl &requestUrl) const
-{
- switch(matchType) {
- case FilterLeaf::RegularExpressionMatch:
- return (regExp->indexIn(requestUrl.toString()) != -1);
- default:
- return false;
- }
-}
-
-bool AdBlockRule::match(const QUrl &requestUrl, QWebEngineUrlRequestInfo::ResourceType type) const
-{
- // if request is of the required type, or there are no types set (== apply to all requests)
- if(this->resourceTypeOptions.contains(type) || this->resourceTypeOptions.isEmpty()) {
- switch(matchType) {
- case FilterLeaf::RegularExpressionMatch:
- return (regExp->indexIn(requestUrl.toString()) != -1);
- default:
- qWarning("Match type not implemented, returning false!");
- return false;
- }
- }
-
- // request type is not matched
- return false;
-}
-
-std::pair<FilterLeaf::Action, QVariant> AdBlockRule::action() const
-{
- if(m_isBlocking)
- return std::make_pair(FilterLeaf::Block, QVariant());
- else
- return std::make_pair(FilterLeaf::Allow, QVariant());
-}
diff --git a/lib/urlfilter/formats/adblockrule_parse.cpp b/lib/urlfilter/formats/adblockrule_parse.cpp
deleted file mode 100644
index c01ddfd..0000000
--- a/lib/urlfilter/formats/adblockrule_parse.cpp
+++ /dev/null
@@ -1,181 +0,0 @@
-/*
- * This file is part of smolbote. It's copyrighted by the contributors recorded
- * in the version control history of the file, available from its original
- * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote
- *
- * SPDX-License-Identifier: GPL-3.0
- */
-
-#include "adblockrule.h"
-#include "adblockrule_parse.h"
-
-// adblock format documentation
-// https://adblockplus.org/filters
-
-// QString::mid(pos, len) const - Returns a string starting at the specified position index.
-// QString::chop(len) - Removes n characters from the end of the string.
-// QString::remove(pos, len) - Removes n characters from the string, starting at the given position index.
-// QString::trimmed() const - Remove whitespace from start and end
-
-AdBlockRule *parseRule_adblock(const QString &filter)
-{
- QString parsedLine = filter.trimmed();
-
- // there is no rule, or it's a comment
- if(parsedLine.isEmpty() || parsedLine.startsWith("!")) {
- return nullptr;
- }
-
- // css rule -> filterleaves cannot do element blocking
- if(parsedLine.contains(QLatin1Literal("##")) || parsedLine.contains(QLatin1Literal("#@#"))) {
- return nullptr;
- }
-
- // exception rules
- FilterLeaf::Action action = FilterLeaf::Block;
- if(parsedLine.startsWith(QLatin1Literal("@@"))) {
- action = FilterLeaf::Allow;
- parsedLine.remove(0, 2);
- }
-
- // parse options
- QStringList enabledOn, disabledOn;
- QHash<QWebEngineUrlRequestInfo::ResourceType, bool> optionsHash;
- {
- const int sepPos = parsedLine.indexOf(QLatin1Literal("$"));
- if(sepPos != -1) {
- const auto options = parsedLine.mid(sepPos + 1).split(QLatin1Literal(","));
- parsedLine = parsedLine.mid(0, sepPos);
-
- for(const QString &option : options) {
- if(option.startsWith(QLatin1Literal("domain"))) {
- const auto domainList = option.mid(7).split(QLatin1Literal("|"));
-
- for(const QString &domain : domainList) {
- if(domain.startsWith(QLatin1Literal("~"))) {
- disabledOn.append(domain.mid(1));
- } else {
- enabledOn.append(domain);
- }
- }
- } else {
- const auto pair = parseOption(option);
- if(pair)
- optionsHash.insert(pair.value().first, pair.value().second);
- }
- }
- }
- }
-
- FilterLeaf::UrlMatchType matchType = FilterLeaf::InvalidMatch;
-
- if(parsedLine.startsWith(QLatin1Literal("/")) && parsedLine.endsWith(QLatin1Literal("/"))) {
- // regular expression rule
- matchType = FilterLeaf::RegularExpressionMatch;
- parsedLine = parsedLine.mid(1, parsedLine.length() - 2);
-
- } else if(parsedLine.startsWith(QLatin1Literal("||")) && parsedLine.endsWith(QLatin1Literal("^"))) {
- matchType = FilterLeaf::DomainMatch;
- parsedLine = parsedLine.mid(2, parsedLine.length() - 3);
-
- } else if(parsedLine.startsWith(QLatin1Literal("|")) && parsedLine.endsWith(QLatin1Literal("|"))) {
- // string equals rule
- matchType = FilterLeaf::StringEquals;
- parsedLine = parsedLine.mid(1, parsedLine.length() - 2);
-
- } else if(parsedLine.startsWith(QLatin1Literal("||"))) {
- // string starts with rule
- matchType = FilterLeaf::StringStartsWith;
- parsedLine = parsedLine.mid(2);
-
- } else if(parsedLine.endsWith(QLatin1Literal("|"))) {
- // string ends with rule
- matchType = FilterLeaf::StringEndsWith;
- parsedLine.chop(1);
-
- } else {
- // generic contains rule
- matchType = FilterLeaf::StringContains;
-
- // Basic filter rules can use wildcards, which were supported by QRegExp,
- // but were deprecated in QRegularExpression.
-
- // remove beginning and ending wildcards
- if(parsedLine.startsWith(QLatin1Literal("*")))
- parsedLine = parsedLine.mid(1);
-
- if(parsedLine.endsWith(QLatin1Literal("*")))
- parsedLine.chop(1);
-
- if(parsedLine.contains(QLatin1Literal("*")) || parsedLine.contains(QLatin1Literal("^"))) {
- // check for wildcards and translate to regexp
- // wildcard "*" - any number of characters
- // separator "^" - end, ? or /
- parsedLine.replace(QLatin1Literal("||"), QLatin1Literal("^\\w+://"));
- parsedLine.replace(QLatin1Literal("|"), QLatin1Literal("\\|"));
- parsedLine.replace(QLatin1Literal("*"), QLatin1Literal(".*"));
- parsedLine.replace(QLatin1Literal("^"), QLatin1Literal("($|\\?|\\/)"));
-
- matchType = FilterLeaf::RegularExpressionMatch;
- }
- }
-
- AdBlockRule *rule = new AdBlockRule(matchType, parsedLine, action);
- rule->mergeOptions(optionsHash);
- return rule;
-}
-
-std::optional<QPair<QWebEngineUrlRequestInfo::ResourceType, bool>> parseOption(const QString &option)
-{
- const bool exception = !option.startsWith(QLatin1Literal("~"));
-
- if(option.endsWith(QLatin1Literal("script"))) {
- // external scripts loaded via HTML script tag
- return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeScript, exception);
-
- } else if(option.endsWith(QLatin1Literal("image"))) {
- // regular images, typically loaded via HTML img tag
- return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeImage, exception);
-
- } else if(option.endsWith(QLatin1Literal("stylesheet"))) {
- // external CSS stylesheet files
- return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeStylesheet, exception);
-
- } else if(option.endsWith(QLatin1Literal("object"))) {
- // content handled by browser plugins, e.g. Flash or Java
- return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeObject, exception);
-
- } else if(option.endsWith(QLatin1Literal("xmlhttprequest"))) {
- // requests started using the XMLHttpRequest object or fetch() API
- return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeXhr, exception);
-
- } else if(option.endsWith(QLatin1Literal("object-subrequest"))) {
- // requests started by plugins like Flash
- return qMakePair(QWebEngineUrlRequestInfo::ResourceTypePluginResource, exception);
-
- } else if(option.endsWith(QLatin1Literal("subdocument"))) {
- // embedded pages, usually included via HTML frames
- return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeSubFrame, exception);
-
- } else if(option.endsWith(QLatin1Literal("ping"))) {
- // requests started by <a ping> or navigator.sendBeacon()
- return qMakePair(QWebEngineUrlRequestInfo::ResourceTypePing, exception);
-
- } else if(option.endsWith(QLatin1Literal("websocket"))) {
- // requests initiated via WebSocket object
- qDebug("Resource type 'websocket' not available");
-
- } else if(option.endsWith(QLatin1Literal("webrtc"))) {
- // connections opened via RTCPeerConnection instances to ICE servers
- qDebug("Resource type 'webrtc' not available");
-
- } else if(option.endsWith(QLatin1Literal("document"))) {
- // the page itself
- return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeMainFrame, exception);
-
- } else if(option.endsWith(QLatin1Literal("other"))) {
- return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeUnknown, exception);
- }
-
- return std::nullopt;
-}
diff --git a/lib/urlfilter/formats/hostlistrule.cpp b/lib/urlfilter/formats/hostlistrule.cpp
deleted file mode 100644
index ad2c2a6..0000000
--- a/lib/urlfilter/formats/hostlistrule.cpp
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * This file is part of smolbote. It's copyrighted by the contributors recorded
- * in the version control history of the file, available from its original
- * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote
- *
- * SPDX-License-Identifier: GPL-3.0
- */
-
-#include "hostlistrule.h"
-
-HostlistRule::HostlistRule(const QString &domain, const QString &redirect)
-{
- this->m_isBlocking = (redirect == QLatin1Literal("0.0.0.0"));
- this->m_request = domain;
- this->m_redirect = redirect;
-}
-
-bool HostlistRule::match(const QUrl &requestUrl) const
-{
- //qDebug("checking [%s] against [%s]", qUtf8Printable(requestUrl.host()), qUtf8Printable(m_request));
- return (m_request == requestUrl.host());
-}
-
-std::pair<FilterLeaf::Action, QVariant> HostlistRule::action() const
-{
- if(m_isBlocking)
- return std::make_pair(FilterLeaf::Block, QVariant());
- return std::make_pair(FilterLeaf::Redirect, QVariant(m_redirect));
-}
diff --git a/lib/urlfilter/formats/hostlistrule.h b/lib/urlfilter/formats/hostlistrule.h
deleted file mode 100644
index 58ec690..0000000
--- a/lib/urlfilter/formats/hostlistrule.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * This file is part of smolbote. It's copyrighted by the contributors recorded
- * in the version control history of the file, available from its original
- * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote
- *
- * SPDX-License-Identifier: GPL-3.0
- */
-
-#ifndef SMOLBOTE_HOSTLIST_RULE_H
-#define SMOLBOTE_HOSTLIST_RULE_H
-
-#include "../filterleaf.h"
-#include <QString>
-
-class HostlistRule : public FilterLeaf
-{
-public:
- explicit HostlistRule(const QString &domain, const QString &redirect);
-
- bool match(const QUrl &requestUrl) const override;
- std::pair<FilterLeaf::Action, QVariant> action() const override;
-
-private:
- QString m_redirect;
-};
-
-#endif // SMOLBOTE_HOSTLIST_RULE_H
diff --git a/lib/urlfilter/hostlist/hostlist.cpp b/lib/urlfilter/hostlist/hostlist.cpp
new file mode 100644
index 0000000..ec0b214
--- /dev/null
+++ b/lib/urlfilter/hostlist/hostlist.cpp
@@ -0,0 +1,79 @@
+/*
+ * This file is part of smolbote. It's copyrighted by the contributors recorded
+ * in the version control history of the file, available from its original
+ * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote
+ *
+ * SPDX-License-Identifier: GPL-3.0
+ */
+
+#include "hostlist.h"
+#include <QIODevice>
+#include <QTextStream>
+#include <QDebug>
+
+HostList::HostList(QIODevice *device)
+{
+ Q_ASSERT(device->isOpen());
+
+ QTextStream list(device);
+ while (!list.atEnd()) {
+ parseLine(list.readLine());
+ }
+
+ qDebug() << m_metadata;
+}
+
+QString HostList::metadata(const QString& key) const
+{
+ return m_metadata.value(key);
+}
+
+int HostList::ruleCount() const
+{
+ return rules.size();
+}
+
+std::pair<UrlFilter::MatchResult, QString> HostList::match(const QUrl& firstParty, const QUrl& requestUrl, QWebEngineUrlRequestInfo::ResourceType type) const
+{
+ Q_UNUSED(firstParty);
+ Q_UNUSED(type);
+
+ const QString domain = requestUrl.host();
+ const uint domainHash = qHash(domain);
+
+ for(const Rule &r : rules) {
+ if(r.domainHash == domainHash)
+ return std::make_pair(r.action, r.redirect);
+ }
+
+ return std::make_pair(UrlFilter::NotMatched, QString());
+}
+
+void HostList::parseLine(const QString& line)
+{
+ // check comment
+ if(line.startsWith(QLatin1Literal("#")))
+ return;
+
+ QString parsedLine = line.trimmed();
+
+ // malformed rule
+ if(!parsedLine.contains(QLatin1Literal(" ")))
+ return;
+
+ const QStringList parts = parsedLine.split(QLatin1Literal(" "));
+ const QString redirect = parts.at(0);
+ const auto action = (redirect == QLatin1Literal("0.0.0.0")) ? UrlFilter::Block : UrlFilter::Redirect;
+
+ for(int i = 1; i < parts.size(); i++) {
+ const QString domain = parts.at(i);
+ Rule r;
+ r.action = action;
+ r.domainHash = qHash(domain);
+ if(action == UrlFilter::Redirect)
+ r.redirect = redirect;
+
+ rules.emplace_back(std::move(r));
+ }
+}
+
diff --git a/lib/urlfilter/hostlist/hostlist.h b/lib/urlfilter/hostlist/hostlist.h
new file mode 100644
index 0000000..d4a8d87
--- /dev/null
+++ b/lib/urlfilter/hostlist/hostlist.h
@@ -0,0 +1,44 @@
+/*
+ * This file is part of smolbote. It's copyrighted by the contributors recorded
+ * in the version control history of the file, available from its original
+ * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote
+ *
+ * SPDX-License-Identifier: GPL-3.0
+ */
+
+#ifndef SMOLBOTE_URLFILTER_HOSTLIST
+#define SMOLBOTE_URLFILTER_HOSTLIST
+
+#include "urlfilter.h"
+#include <QHash>
+#include <vector>
+#include <QWebEngineUrlRequestInfo>
+
+class QIODevice;
+class HostList : public UrlFilter
+{
+public:
+
+ HostList(QIODevice *device);
+ ~HostList() = default;
+
+ QString metadata(const QString &key) const override;
+ int ruleCount() const;
+ std::pair<MatchResult, QString> match(const QUrl &firstParty, const QUrl &requestUrl, QWebEngineUrlRequestInfo::ResourceType type) const override;
+
+protected:
+ void parseLine(const QString &line);
+
+private:
+ QHash<QString, QString> m_metadata;
+
+ struct Rule {
+ UrlFilter::MatchResult action = UrlFilter::NotMatched;
+ uint domainHash;
+ QString redirect;
+ };
+
+ std::vector<Rule> rules;
+};
+
+#endif // SMOLBOTE_URLFILTER_HOSTLIST
diff --git a/lib/urlfilter/formats/adblockrule.h b/lib/urlfilter/matcher.h
index 6be3cdf..6696958 100644
--- a/lib/urlfilter/formats/adblockrule.h
+++ b/lib/urlfilter/matcher.h
@@ -6,17 +6,24 @@
* SPDX-License-Identifier: GPL-3.0
*/
-#ifndef SMOLBOTE_ADBLOCKRULE_H
-#define SMOLBOTE_ADBLOCKRULE_H
+#ifndef SMOLBOTE_URLFILTER_MATCHER
+#define SMOLBOTE_URLFILTER_MATCHER
-#include "../filterleaf.h"
-#include <optional>
+#include <QUrl>
+#include <QString>
+#include <utility>
#include <QRegularExpression>
#include <QStringMatcher>
+#include <QWebEngineUrlRequestInfo>
+/** An interface class so we can use templated ContentsMatcher interchangeably
+ */
class Matcher
{
public:
+ virtual ~Matcher() = default;
+
+ virtual void setCaseSensitive(bool matchCase) = 0;
virtual bool hasMatch(const QString &where) const = 0;
};
@@ -24,12 +31,10 @@ template <typename T>
class ContentsMatcher : public Matcher
{
public:
- ContentsMatcher(const QString &pattern, FilterLeaf::UrlMatchType matchType)
+ ContentsMatcher(const QString &pattern, UrlFilter::MatchType type)
+ : patternLength(pattern.length())
+ , matchType(type)
{
- this->matchType = matchType;
- patternLength = pattern.length();
-
-
if constexpr(std::is_same_v<T, QRegularExpression>) {
matcher.setPatternOptions(matcher.patternOptions() | QRegularExpression::CaseInsensitiveOption);
matcher.setPattern(pattern);
@@ -38,7 +43,19 @@ public:
matcher.setPattern(pattern);
} else if constexpr(std::is_same_v<T, QString>) {
matcher = QUrl::fromUserInput(pattern).host();
-// qDebug("matcher: %s", qUtf8Printable(matcher));
+ }
+ }
+ ~ContentsMatcher() = default;
+
+ void setCaseSensitive(bool matchCase) override
+ {
+ if constexpr(std::is_same_v<T, QRegularExpression>) {
+ auto options = matcher.patternOptions();
+ options.setFlag(QRegularExpression::CaseInsensitiveOption, !matchCase);
+ matcher.setPatternOptions(options);
+
+ } else if constexpr(std::is_same_v<T, QStringMatcher>) {
+ matcher.setCaseSensitivity(matchCase ? Qt::CaseSensitive : Qt::CaseInsensitive);
}
}
@@ -46,68 +63,47 @@ public:
{
if constexpr(std::is_same_v<T, QStringMatcher>) {
switch (matchType) {
- case FilterLeaf::InvalidMatch:
- case FilterLeaf::RegularExpressionMatch:
- case FilterLeaf::DomainMatch:
+ case UrlFilter::InvalidMatch:
+ case UrlFilter::RegularExpressionMatch:
+ case UrlFilter::DomainMatch:
qWarning("ContentsMatcher is a String Matcher, but not doing string matching!");
return false;
- case FilterLeaf::StringContains:
+ case UrlFilter::StringContains:
return (matcher.indexIn(where) != -1);
- case FilterLeaf::StringStartsWith:
+ case UrlFilter::StringStartsWith:
return (matcher.indexIn(where) == 0);
- case FilterLeaf::StringEndsWith:
+ case UrlFilter::StringEndsWith:
return (matcher.indexIn(where) == where.length() - patternLength);
- case FilterLeaf::StringEquals:
+ case UrlFilter::StringEquals:
return (matcher.indexIn(where) == 0) && (patternLength == where.length());
}
} else if constexpr(std::is_same_v<T, QRegularExpression>) {
- if(matchType != FilterLeaf::RegularExpressionMatch)
+ if(matchType != UrlFilter::RegularExpressionMatch)
qWarning("ContentsMatcher is a regular expression, but not doing a regular expression match!");
return matcher.match(where).hasMatch();
} else if constexpr(std::is_same_v<T, QString>) {
// TODO: fix
- if(matchType == FilterLeaf::DomainMatch) {
+ if(matchType == UrlFilter::DomainMatch) {
// qDebug("matching %s", qUtf8Printable(QUrl(where).host()));
return QUrl(where).host().endsWith(matcher);
} else
return matcher == where;
- } else {
- qWarning("Matcher has no backend, returning false");
- return false;
}
+
+ qWarning("Matcher has no backend, returning false");
+ return false;
}
private:
- int patternLength;
+ const int patternLength;
+ const UrlFilter::MatchType matchType;
T matcher;
- FilterLeaf::UrlMatchType matchType;
};
-class AdBlockRule : public FilterLeaf
-{
-public:
- explicit AdBlockRule(FilterLeaf::UrlMatchType matchType, const QString &filter, FilterLeaf::Action action);
- ~AdBlockRule()
- {
- delete stringMatcher;
- delete regExp;
- };
-
- void mergeOptions(const QHash<QWebEngineUrlRequestInfo::ResourceType, bool> &options);
-
- bool match(const QUrl &requestUrl) const override;
- bool match(const QUrl &requestUrl, QWebEngineUrlRequestInfo::ResourceType type) const;
- std::pair<FilterLeaf::Action, QVariant> action() const override;
-
-private:
- /* Once C++20 comes out, perhaps this can be replaced with a concept template */
- QStringMatcher *stringMatcher = nullptr;
- QRegExp *regExp = nullptr;
-};
+#endif // SMOLBOTE_URLFILTER_MATCHER
-#endif // SMOLBOTE_ADBLOCKRULE_H
diff --git a/lib/urlfilter/meson.build b/lib/urlfilter/meson.build
index 1f4f47c..b017eb5 100644
--- a/lib/urlfilter/meson.build
+++ b/lib/urlfilter/meson.build
@@ -1,19 +1,26 @@
-urlfilter_inc = include_directories('.')
-
-urlfilter_moc = qt5.preprocess(
- moc_headers: 'filtertree.h',
- dependencies: dep_qt5
-)
-
urlfilter_lib = static_library('urlfilter',
- ['filtertree.cpp', 'filterleaf.cpp', urlfilter_moc,
- 'domain.cpp', 'domain.h',
- 'formats/adblockrule.cpp', 'formats/adblockrule_parse.cpp', 'formats/hostlistrule.cpp',
- 'formats/adblocklist.cpp'],
+ ['urlfilter.h', 'matcher.h',
+ 'hostlist/hostlist.cpp', 'hostlist/hostlist.h',
+ 'adblock/adblocklist.cpp', 'adblock/adblocklist.h', 'adblock/parser.cpp', 'adblock/parser.h'],
dependencies: dep_qt5
)
dep_urlfilter = declare_dependency(
- include_directories: urlfilter_inc,
+ include_directories: include_directories('.'),
link_with: urlfilter_lib
)
+
+if get_option('testing').enabled()
+ test('urlfilter: matcher',
+ executable('urlfilter-matcher', dependencies: [dep_qt5, dep_gtest, dep_urlfilter], sources: ['test/matcher.cpp']),
+ workdir: meson.current_source_dir() / 'test'
+ )
+ test('urlfilter: host list',
+ executable('urlfilter-hostlist', dependencies: [dep_qt5, dep_gtest, dep_urlfilter], sources: ['test/hostlist.cpp']),
+ workdir: meson.current_source_dir() / 'test'
+ )
+ test('urlfilter: adblock list',
+ executable('urlfilter-adblocklist', dependencies: [dep_qt5, dep_gtest, dep_urlfilter], sources: ['test/adblock.cpp']),
+ workdir: meson.current_source_dir() / 'test'
+ )
+endif
diff --git a/lib/urlfilter/test/adblock.cpp b/lib/urlfilter/test/adblock.cpp
new file mode 100644
index 0000000..ecb94ee
--- /dev/null
+++ b/lib/urlfilter/test/adblock.cpp
@@ -0,0 +1,88 @@
+#include "urlfilter.h"
+#include "adblock/adblocklist.h"
+#include <gtest/gtest.h>
+#include <QFile>
+
+AdBlockList *list = nullptr;
+
+TEST(AdBlockList, MetaData) {
+ EXPECT_STREQ(qUtf8Printable(list->metadata("Homepage")), "http://example.com/");
+ EXPECT_STREQ(qUtf8Printable(list->metadata("Title")), "FooList");
+ EXPECT_STREQ(qUtf8Printable(list->metadata("Expires")), "5 days");
+ EXPECT_STREQ(qUtf8Printable(list->metadata("Redirect")), "http://example.com/list.txt");
+ EXPECT_STREQ(qUtf8Printable(list->metadata("Version")), "1234");
+}
+
+TEST(AdBlockList, BasicFilter) {
+ // Rule: /banner/*/img^
+ EXPECT_EQ(list->match(QUrl(), QUrl("http://example.com/banner/foo/img"), QWebEngineUrlRequestInfo::ResourceTypeImage).first, UrlFilter::Block);
+ EXPECT_EQ(list->match(QUrl(), QUrl("http://example.com/banner/foo/bar/img?param"), QWebEngineUrlRequestInfo::ResourceTypeImage).first, UrlFilter::Block);
+ EXPECT_EQ(list->match(QUrl(), QUrl("http://example.com/banner//img/foo"), QWebEngineUrlRequestInfo::ResourceTypeImage).first, UrlFilter::Block);
+
+ EXPECT_EQ(list->match(QUrl(), QUrl("http://example.com/banner/foo.png"), QWebEngineUrlRequestInfo::ResourceTypeImage).first, UrlFilter::NotMatched);
+ EXPECT_EQ(list->match(QUrl(), QUrl("http://example.com/banner/img"), QWebEngineUrlRequestInfo::ResourceTypeImage).first, UrlFilter::NotMatched);
+ EXPECT_EQ(list->match(QUrl(), QUrl("http://example.com/banner/foo/imgraph"), QWebEngineUrlRequestInfo::ResourceTypeImage).first, UrlFilter::NotMatched);
+ EXPECT_EQ(list->match(QUrl(), QUrl("http://example.com/banner/foo/img.gif"), QWebEngineUrlRequestInfo::ResourceTypeImage).first, UrlFilter::NotMatched);
+
+ EXPECT_EQ(list->match(QUrl(), QUrl("http://example.com/banner/ads/img.png"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::NotMatched);
+}
+
+TEST(AdBlockList, MatchBeginningEnd) {
+ // Rule: |http://beginning-pattern.com
+ EXPECT_EQ(list->match(QUrl(), QUrl("http://beginning-pattern.com"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::Block);
+ EXPECT_EQ(list->match(QUrl(), QUrl("https://beginning-pattern.com"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::NotMatched);
+ // Rule: end-pattern|
+ EXPECT_EQ(list->match(QUrl(), QUrl("https://endpattern.com/end-pattern"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::Block);
+ EXPECT_EQ(list->match(QUrl(), QUrl("https://endpattern.com/end-pattern/foo"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::NotMatched);
+}
+
+TEST(AdBlockList, Domain) {
+ // Rule: ||ads.example.com^
+ EXPECT_EQ(list->match(QUrl(), QUrl("http://ads.example.com/foo.gif"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::Block);
+ EXPECT_EQ(list->match(QUrl(), QUrl("http://server1.ads.example.com/foo.gif"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::Block);
+ EXPECT_EQ(list->match(QUrl(), QUrl("https://ads.example.com:8000/"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::Block);
+
+ EXPECT_EQ(list->match(QUrl(), QUrl("http://ads.example.com.ua/foo.gif"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::NotMatched);
+ EXPECT_EQ(list->match(QUrl(), QUrl("http://example.com/redirect/http://ads.example.com/"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::NotMatched);
+}
+
+TEST(AdBlockList, RegularExpression) {
+ // Rule: /banner\d+/
+ EXPECT_EQ(list->match(QUrl(), QUrl("http://example.com/banner123"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::Block);
+ EXPECT_EQ(list->match(QUrl(), QUrl("http://example.com/banner321"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::Block);
+ EXPECT_EQ(list->match(QUrl(), QUrl("http://example.com/banners"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::NotMatched);
+}
+
+TEST(AdBlockList, MatchCase) {
+ // Rule: matchThisCase$match-case
+ EXPECT_EQ(list->match(QUrl(), QUrl("http://matchcase.com/matchThisCase"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::Block);
+ EXPECT_EQ(list->match(QUrl(), QUrl("http://matchcase.com/MatchThisCase"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::NotMatched);
+}
+
+TEST(AdBlockList, DomainOption) {
+ // Rule: domain-limited-string$domain=example.com
+ EXPECT_EQ(list->match(QUrl("https://example.com"), QUrl("https://example.com/domain-limited-string/foo"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::Block);
+ EXPECT_EQ(list->match(QUrl("https://example.com"), QUrl("https://example.com/another-domain-string/foo"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::NotMatched);
+ EXPECT_EQ(list->match(QUrl("https://another.com"), QUrl("https://example.com/domain-limited-string/foo"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::NotMatched);
+
+ //Rule: exception-limited-string$domain=~example.com
+ EXPECT_EQ(list->match(QUrl("https://another.com"), QUrl("https://example.com/exception-limited-string/foo"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::Block);
+ EXPECT_EQ(list->match(QUrl("https://example.com"), QUrl("https://example.com/exception-limited-string/foo"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::NotMatched);
+}
+
+int main(int argc, char **argv) {
+ QFile f("adblock.txt");
+ if(!f.open(QIODevice::ReadOnly | QIODevice::Text)) {
+ qDebug("Could not open list");
+ return -1;
+ }
+
+ list = new AdBlockList(&f);
+ f.close();
+
+ qDebug("Parsed %i rules", list->ruleCount());
+
+ testing::InitGoogleTest(&argc, argv);
+ return RUN_ALL_TESTS();
+}
+
diff --git a/lib/urlfilter/test/adblock.txt b/lib/urlfilter/test/adblock.txt
new file mode 100644
index 0000000..635ce09
--- /dev/null
+++ b/lib/urlfilter/test/adblock.txt
@@ -0,0 +1,26 @@
+! Homepage: http://example.com/
+! Title: FooList
+! Expires: 5 days
+! Redirect: http://example.com/list.txt
+! Version: 1234
+
+/banner/*/img^
+||ads.example.com^
+|http://example.com/|
+/banner\d+/
+
+! match beginning
+||http://beginning-pattern.com
+! match end
+end-pattern|
+
+! options
+! match-case
+matchThisCase$match-case
+
+! domain limiting
+! only apply this filter on this domain
+domain-limited-string$domain=example.com
+! apply this filter to all domains but the listed one
+exception-limited-string$domain=~example.com
+
diff --git a/lib/urlfilter/test/hostlist.cpp b/lib/urlfilter/test/hostlist.cpp
new file mode 100644
index 0000000..041cd5f
--- /dev/null
+++ b/lib/urlfilter/test/hostlist.cpp
@@ -0,0 +1,34 @@
+#include <gtest/gtest.h>
+#include "hostlist/hostlist.h"
+#include <QFile>
+
+HostList *list = nullptr;
+
+TEST(AdBlockList, Block) {
+ EXPECT_EQ(list->match(QUrl(), QUrl::fromUserInput("blockeddomain.com"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::Block);
+ EXPECT_EQ(list->match(QUrl(), QUrl::fromUserInput("blockeddomain.first"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::Block);
+ EXPECT_EQ(list->match(QUrl(), QUrl::fromUserInput("blockeddomain.second"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::Block);
+
+ const auto r = list->match(QUrl(), QUrl::fromUserInput("localhost.localdomain"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame);
+ EXPECT_EQ(r.first, UrlFilter::Redirect);
+ EXPECT_EQ(r.second, QString("127.0.0.1"));
+
+ EXPECT_EQ(list->match(QUrl(), QUrl::fromUserInput("other.domain"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::NotMatched);
+}
+
+int main(int argc, char **argv) {
+ QFile f("hostlist.txt");
+ if(!f.open(QIODevice::ReadOnly | QIODevice::Text)) {
+ qDebug("Could not open list");
+ return -1;
+ }
+
+ list = new HostList(&f);
+ f.close();
+
+ qDebug("Parsed %i rules", list->ruleCount());
+
+ testing::InitGoogleTest(&argc, argv);
+ return RUN_ALL_TESTS();
+}
+
diff --git a/test/hostlist.txt b/lib/urlfilter/test/hostlist.txt
index a0b4e5c..a0b4e5c 100644
--- a/test/hostlist.txt
+++ b/lib/urlfilter/test/hostlist.txt
diff --git a/lib/urlfilter/test/matcher.cpp b/lib/urlfilter/test/matcher.cpp
new file mode 100644
index 0000000..1c1efbf
--- /dev/null
+++ b/lib/urlfilter/test/matcher.cpp
@@ -0,0 +1,42 @@
+#include "urlfilter.h"
+#include "matcher.h"
+#include <gtest/gtest.h>
+
+TEST(Matcher, StringContains) {
+ ContentsMatcher<QStringMatcher> matcher("spam-pattern", UrlFilter::StringContains);
+ EXPECT_TRUE(matcher.hasMatch("this string contains a spam-pattern"));
+ EXPECT_FALSE(matcher.hasMatch("this string does not contain the pattern"));
+}
+
+TEST(Matcher, StringStartsWith) {
+ ContentsMatcher<QStringMatcher> matcher("beginning", UrlFilter::StringStartsWith);
+ EXPECT_TRUE(matcher.hasMatch("beginning this string is the pattern"));
+ EXPECT_FALSE(matcher.hasMatch("ending this string is the pattern, the word beginning"));
+ EXPECT_FALSE(matcher.hasMatch("this would be a string where the pattern cannot be found"));
+}
+
+TEST(Matcher, StringEndsWith) {
+ ContentsMatcher<QStringMatcher> matcher("ending", UrlFilter::StringEndsWith);
+ EXPECT_TRUE(matcher.hasMatch("this string has the proper ending"));
+ EXPECT_FALSE(matcher.hasMatch("and this string doesn't"));
+}
+
+TEST(Matcher, StringEquals) {
+ ContentsMatcher<QStringMatcher> matcher("string-to-match", UrlFilter::StringEquals);
+ EXPECT_TRUE(matcher.hasMatch("string-to-match"));
+ EXPECT_FALSE(matcher.hasMatch("same-len-string"));
+ EXPECT_FALSE(matcher.hasMatch("not the string-to-match"));
+}
+
+TEST(Matcher, RegularExpression) {
+ ContentsMatcher<QRegularExpression> matcher("banner\\d+", UrlFilter::RegularExpressionMatch);
+ EXPECT_TRUE(matcher.hasMatch("http://another.com/banner123"));
+ EXPECT_TRUE(matcher.hasMatch("http://another.com/banner321"));
+ EXPECT_FALSE(matcher.hasMatch("http://another.com/banners"));
+
+}
+
+int main(int argc, char **argv) {
+ testing::InitGoogleTest(&argc, argv);
+ return RUN_ALL_TESTS();
+}
diff --git a/lib/urlfilter/urlfilter.h b/lib/urlfilter/urlfilter.h
new file mode 100644
index 0000000..e15122a
--- /dev/null
+++ b/lib/urlfilter/urlfilter.h
@@ -0,0 +1,43 @@
+/*
+ * This file is part of smolbote. It's copyrighted by the contributors recorded
+ * in the version control history of the file, available from its original
+ * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote
+ *
+ * SPDX-License-Identifier: GPL-3.0
+ */
+
+#include <QUrl>
+#include <QString>
+#include <utility>
+#include <QWebEngineUrlRequestInfo>
+
+#ifndef SMOLBOTE_URLFILTER_FILTER
+#define SMOLBOTE_URLFILTER_FILTER
+
+class UrlFilter
+{
+public:
+ enum MatchResult {
+ NotMatched,
+ Allow,
+ Block,
+ Redirect
+ };
+
+ enum MatchType {
+ InvalidMatch,
+ RegularExpressionMatch,
+ StringContains,
+ StringStartsWith,
+ StringEndsWith,
+ StringEquals,
+ DomainMatch
+ };
+
+ virtual ~UrlFilter() = default;
+
+ virtual QString metadata(const QString &key) const = 0;
+ virtual std::pair<MatchResult, QString> match(const QUrl &firstParty, const QUrl &requestUrl, QWebEngineUrlRequestInfo::ResourceType type) const = 0;
+};
+
+#endif // SMOLBOTE_URLFILTER_FILTER
diff --git a/meson.build b/meson.build
index d903910..1d333ca 100644
--- a/meson.build
+++ b/meson.build
@@ -72,6 +72,8 @@ interfaces_moc = qt5.preprocess(
dependencies: dep_qt5
)
+dep_gtest = dependency('gtest', required: get_option('testing'))
+
subdir('lib/about')
subdir('lib/addressbar')
subdir('lib/bookmarks')
@@ -89,7 +91,3 @@ subdir('doc')
subdir('plugins/ConfigurationEditor')
subdir('plugins/ProfileEditor')
-if get_option('testing').enabled()
- subdir('test')
-endif
-
diff --git a/src/browser.cpp b/src/browser.cpp
index 42bbc5d..3a23eeb 100644
--- a/src/browser.cpp
+++ b/src/browser.cpp
@@ -18,7 +18,6 @@
#include "profilemanager.h"
#include "subwindow/subwindow.h"
#include "util.h"
-#include "webengine/filter.h"
#include "webengine/urlinterceptor.h"
#include "webprofile.h"
#include <QAction>
@@ -35,6 +34,9 @@
#include <version.h>
#include "mainwindow/menubar.h"
#include "webengine/webview.h"
+#include "urlfilter.h"
+#include "adblock/adblocklist.h"
+#include "hostlist/hostlist.h"
Browser::Browser(int &argc, char *argv[], bool allowSecondary)
: SingleApplication(argc, argv, allowSecondary, SingleApplication::User | SingleApplication::SecondaryNotification | SingleApplication::ExcludeAppVersion)
@@ -99,7 +101,16 @@ QPair<QString, Profile *> Browser::loadProfile(const QString &id, bool isOffTheR
profile = m_profileManager->createProfile(id, isOffTheRecord);
}
connect(profile, &WebProfile::downloadRequested, m_downloads.get(), &DownloadsWidget::addDownload);
- auto *interceptor = new UrlRequestInterceptor(m_urlFilter.get(), profile, profile);
+ auto *interceptor = new UrlRequestInterceptor(profile, profile);
+ for(UrlFilter *filter : m_filters) {
+ interceptor->addFilter(filter);
+ }
+ const auto headers = m_config->value<QStringList>("filter.header").value_or(QStringList());
+ for(const QString &header : headers) {
+ const auto h = header.split(QLatin1Literal(":"));
+ if(h.length() == 2)
+ interceptor->addHttpHeader(h.at(0).toLatin1(), h.at(1).toLatin1());
+ }
profile->setRequestInterceptor(interceptor);
return QPair<QString, WebProfile *>(m_profileManager->id(profile), profile);
@@ -164,7 +175,20 @@ void Browser::setup(QVector<QPluginLoader *> plugins)
// downloads
m_downloads = std::make_unique<DownloadsWidget>(m_config->value<QString>("downloads.path").value());
// url request filter
- m_urlFilter = std::make_unique<Filter>(m_config);
+ for(const QString &hostlist : Util::files(m_config->value<QString>("filter.hosts").value_or(QString()))) {
+ QFile f(hostlist);
+ if(f.open(QIODevice::ReadOnly | QIODevice::Text)) {
+ m_filters.append(new HostList(&f));
+ f.close();
+ }
+ }
+ for(const QString &adblock : Util::files(m_config->value<QString>("filter.adblock").value_or(QString()))) {
+ QFile f(adblock);
+ if(f.open(QIODevice::ReadOnly | QIODevice::Text)) {
+ m_filters.append(new AdBlockList(&f));
+ f.close();
+ }
+ }
// cookie request filter
// load profiles
diff --git a/src/browser.h b/src/browser.h
index 53ee521..8a40152 100644
--- a/src/browser.h
+++ b/src/browser.h
@@ -19,10 +19,10 @@
#include <QMenu>
#include <QPluginLoader>
+class UrlFilter;
class Configuration;
class BookmarksWidget;
class DownloadsWidget;
-class Filter;
class MainWindow;
class ProfileManager;
class Browser : public SingleApplication, public BrowserInterface
@@ -91,7 +91,7 @@ private:
std::shared_ptr<BookmarksWidget> m_bookmarks;
std::unique_ptr<DownloadsWidget> m_downloads;
ProfileManager *m_profileManager;
- std::unique_ptr<Filter> m_urlFilter;
+ QVector<UrlFilter *> m_filters;
QVector<MainWindow *> m_windows;
QVector<PluginInfo*> m_plugins;
diff --git a/src/meson.build b/src/meson.build
index f07a2ec..fb338d8 100644
--- a/src/meson.build
+++ b/src/meson.build
@@ -4,7 +4,7 @@ poi_moc = qt5.preprocess(
'mainwindow/mainwindow.h', 'mainwindow/menubar.h', 'mainwindow/widgets/dockwidget.h', 'mainwindow/widgets/menusearch.h', 'mainwindow/widgets/navigationbar.h', 'mainwindow/widgets/searchform.h',
'session/savesessiondialog.h', 'session/sessiondialog.h', 'session/sessionform.h',
'subwindow/subwindow.h', 'subwindow/tabwidget.h',
- 'webengine/filter.h', 'webengine/urlinterceptor.h', 'webengine/webpage.h', 'webengine/webview.h'],
+ 'webengine/urlinterceptor.h', 'webengine/webpage.h', 'webengine/webview.h'],
ui_files: ['mainwindow/widgets/searchform.ui', 'session/savesessiondialog.ui', 'session/sessiondialog.ui', 'session/sessionform.ui'],
qresources: '../data/resources.qrc',
rcc_extra_arguments: ['--format-version=1'],
@@ -35,7 +35,6 @@ poi = executable(get_option('poiName'), install: true,
'subwindow/subwindow.cpp',
'subwindow/tabwidget.cpp',
- 'webengine/filter.cpp',
'webengine/urlinterceptor.cpp',
'webengine/webpage.cpp',
'webengine/webview.cpp',
diff --git a/src/webengine/filter.cpp b/src/webengine/filter.cpp
deleted file mode 100644
index f1a38af..0000000
--- a/src/webengine/filter.cpp
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * This file is part of smolbote. It's copyrighted by the contributors recorded
- * in the version control history of the file, available from its original
- * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote
- *
- * SPDX-License-Identifier: GPL-3.0
- */
-
-#include "filter.h"
-#include "configuration.h"
-#include "urlinterceptor.h"
-#include "util.h"
-#include <QDir>
-#include <QJsonArray>
-#include <QJsonDocument>
-#include <QTextStream>
-
-Filter::Filter::Filter(const std::unique_ptr<Configuration> &config, QObject *parent)
- : QObject(parent)
-{
- // parse headers
- if(config->exists("filter.header")) {
- const auto headers = config->value<QStringList>("filter.header").value();
- for(const QString header : headers) {
- const auto list = header.split(QLatin1Literal(":"));
- if(list.length() == 2)
- m_headers.insert(list.at(0).toLatin1(), list.at(1).toLatin1());
- }
-#ifdef QT_DEBUG
- qDebug("Added %i custom http headers", m_headers.size());
-#endif
- }
-
- const QStringList hostfiles = Util::files(config->value<QString>("filter.hosts").value());
- //qDebug("filter.path=[%s]", qUtf8Printable(config->value<QString>("filter.hosts").value()));
- for(const QString &hostfile : hostfiles) {
- QFile f(hostfile);
- if(f.open(QIODevice::ReadOnly | QIODevice::Text)) {
-#ifdef QT_DEBUG
- qDebug("Loading hostlist filters [%s]", qUtf8Printable(hostfile));
-#endif
- loadHostlist(f, &filters);
- f.close();
- }
- }
-}
-
-void Filter::filterRequest(QWebEngineUrlRequestInfo &info) const
-{
- auto matches = filters.match(info.firstPartyUrl().toString(), info.requestUrl().toString());
- for(const auto &rule : matches) {
- switch(rule->action().first) {
- case FilterLeaf::NotMatched:
-#ifdef QT_DEBUG
- qDebug("Paradoxical match: request matched, but not matched.");
- qDebug(" - %s", qUtf8Printable(info.requestUrl().toString()));
-#endif
- break;
- case FilterLeaf::Block:
- //qDebug("block %s", qUtf8Printable(info.requestUrl().toString()));
- info.block(true);
- break;
- case FilterLeaf::Allow:
- info.block(false);
- break;
- //case FilterLeaf::Redirect:
- // break;
- }
- }
-}
diff --git a/src/webengine/filter.h b/src/webengine/filter.h
deleted file mode 100644
index c49bed9..0000000
--- a/src/webengine/filter.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * This file is part of smolbote. It's copyrighted by the contributors recorded
- * in the version control history of the file, available from its original
- * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote
- *
- * SPDX-License-Identifier: GPL-3.0
- */
-
-#ifndef SMOLBOTE_FILTER_H
-#define SMOLBOTE_FILTER_H
-
-#include <QByteArray>
-#include <QMap>
-#include <QVector>
-#include <optional>
-#include <memory>
-#include "filtertree.h"
-
-class Configuration;
-class Filter : public QObject
-{
- Q_OBJECT
-public:
- struct HostRule {
- bool isBlocking;
- };
-
- explicit Filter(const std::unique_ptr<Configuration> &config, QObject *parent = nullptr);
- ~Filter() override = default;
-
- void filterRequest(QWebEngineUrlRequestInfo &info) const;
-
- const QMap<QByteArray, QByteArray> headers() const
- {
- return qAsConst(m_headers);
- }
-
-private:
- FilterTree filters;
- QMap<QByteArray, QByteArray> m_headers;
-};
-
-#endif // SMOLBOTE_FILTER_H
diff --git a/src/webengine/urlinterceptor.cpp b/src/webengine/urlinterceptor.cpp
index 7e5630f..490dea6 100644
--- a/src/webengine/urlinterceptor.cpp
+++ b/src/webengine/urlinterceptor.cpp
@@ -7,36 +7,61 @@
*/
#include "urlinterceptor.h"
-#include "formats/adblockrule.h"
-#include <QDir>
-#include <QJsonArray>
-#include <QJsonDocument>
-#include <QTextStream>
-#include <boost/algorithm/string.hpp>
-#include "configuration.h"
-#include "filter.h"
#include "webprofile.h"
+#include "urlfilter.h"
// test DNT on https://browserleaks.com/donottrack
-UrlRequestInterceptor::UrlRequestInterceptor(Filter* filter, WebProfile* profile, QObject* parent)
+UrlRequestInterceptor::UrlRequestInterceptor(WebProfile* profile, QObject* parent)
: QWebEngineUrlRequestInterceptor(parent)
{
- Q_CHECK_PTR(filter);
- m_filter = filter;
Q_CHECK_PTR(profile);
m_profile = profile;
}
+void UrlRequestInterceptor::addHttpHeader(const QByteArray &key, const QByteArray &value)
+{
+ headers.append(qMakePair(key, value));
+}
+
+void UrlRequestInterceptor::addFilter(UrlFilter *filter)
+{
+ if(filter != nullptr)
+ filters.append(filter);
+}
+void UrlRequestInterceptor::removeFilter(UrlFilter *filter)
+{
+ if(filter != nullptr)
+ filters.removeOne(filter);
+}
+
void UrlRequestInterceptor::interceptRequest(QWebEngineUrlRequestInfo &info)
{
- m_filter->filterRequest(info);
+ for(const auto *filter : filters) {
+ const auto match = filter->match(info.firstPartyUrl(), info.requestUrl(), info.resourceType());
+
+ // skip if no match
+ if(match.first == UrlFilter::NotMatched)
+ continue;
+
+ else {
+ if(match.first == UrlFilter::Allow)
+ info.block(false);
+ else if(match.first == UrlFilter::Block)
+ info.block(true);
+ else if(match.first == UrlFilter::Redirect)
+ info.redirect(QUrl::fromUserInput(match.second));
+ // we found a match, skip the rest
+ break;
+ }
+ }
// set headers
- for(auto i = m_filter->headers().constBegin(); i != m_filter->headers().constEnd(); ++i) {
- info.setHttpHeader(i.key(), i.value());
+ for(const auto &header : headers) {
+ info.setHttpHeader(header.first, header.second);
}
for(auto i = m_profile->headers().constBegin(); i != m_profile->headers().constEnd(); ++i) {
info.setHttpHeader(i.key(), i.value());
}
}
+
diff --git a/src/webengine/urlinterceptor.h b/src/webengine/urlinterceptor.h
index 62fd683..4909586 100644
--- a/src/webengine/urlinterceptor.h
+++ b/src/webengine/urlinterceptor.h
@@ -9,26 +9,30 @@
#ifndef SMOLBOTE_URLREQUESTINTERCEPTOR_H
#define SMOLBOTE_URLREQUESTINTERCEPTOR_H
-#include <QByteArray>
#include <QVector>
#include <QWebEngineUrlRequestInterceptor>
-#include <memory>
+#include <QByteArray>
-class Filter;
+class UrlFilter;
class WebProfile;
-class Configuration;
class UrlRequestInterceptor : public QWebEngineUrlRequestInterceptor
{
Q_OBJECT
public:
- explicit UrlRequestInterceptor(Filter *filter, WebProfile *profile, QObject *parent = nullptr);
+ explicit UrlRequestInterceptor(WebProfile *profile, QObject *parent = nullptr);
~UrlRequestInterceptor() override = default;
+ void addHttpHeader(const QByteArray &key, const QByteArray &value);
+
+ void addFilter(UrlFilter *filter);
+ void removeFilter(UrlFilter *filter);
+
void interceptRequest(QWebEngineUrlRequestInfo &info) override;
private:
- Filter *m_filter;
WebProfile *m_profile;
+ QVector<QPair<QByteArray, QByteArray>> headers;
+ QVector<UrlFilter*> filters;
};
#endif // SMOLBOTE_URLREQUESTINTERCEPTOR_H
diff --git a/test/adblock.txt b/test/adblock.txt
deleted file mode 100644
index cd284e8..0000000
--- a/test/adblock.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-/banner/*/img^
-||ads.example.com^
-|http://example.com/|
-/banner\d+/
diff --git a/test/adblock/adblocktest.cpp b/test/adblock/adblocktest.cpp
deleted file mode 100644
index bbcaf0e..0000000
--- a/test/adblock/adblocktest.cpp
+++ /dev/null
@@ -1,83 +0,0 @@
-#include "formats/adblockrule.h"
-#include "formats/adblockrule_parse.h"
-#include <gtest/gtest.h>
-
-TEST(Matcher, StringContains) {
- ContentsMatcher<QStringMatcher> matcher("spam-pattern", FilterLeaf::StringContains);
- EXPECT_TRUE(matcher.hasMatch("this string contains a spam-pattern"));
- EXPECT_FALSE(matcher.hasMatch("this string does not contain the pattern"));
-}
-
-TEST(Matcher, StringStartsWith) {
- ContentsMatcher<QStringMatcher> matcher("beginning", FilterLeaf::StringStartsWith);
- EXPECT_TRUE(matcher.hasMatch("beginning this string is the pattern"));
- EXPECT_FALSE(matcher.hasMatch("ending this string is the pattern, the word beginning"));
- EXPECT_FALSE(matcher.hasMatch("this would be a string where the pattern cannot be found"));
-}
-
-TEST(Matcher, StringEndsWith) {
- ContentsMatcher<QStringMatcher> matcher("ending", FilterLeaf::StringEndsWith);
- EXPECT_TRUE(matcher.hasMatch("this string has the proper ending"));
- EXPECT_FALSE(matcher.hasMatch("and this string doesn't"));
-}
-
-TEST(Matcher, StringEquals) {
- ContentsMatcher<QStringMatcher> matcher("string-to-match", FilterLeaf::StringEquals);
- EXPECT_TRUE(matcher.hasMatch("string-to-match"));
- EXPECT_FALSE(matcher.hasMatch("same-len-string"));
- EXPECT_FALSE(matcher.hasMatch("not the string-to-match"));
-}
-
-TEST(Matcher, RegularExpression) {
- ContentsMatcher<QRegularExpression> matcher("banner\\d+", FilterLeaf::RegularExpressionMatch);
- EXPECT_TRUE(matcher.hasMatch("http://another.com/banner123"));
- EXPECT_TRUE(matcher.hasMatch("http://another.com/banner321"));
- EXPECT_FALSE(matcher.hasMatch("http://another.com/banners"));
-
-}
-
-TEST(AdBlockRule, SimpleRule) {
- AdBlockRule *rule = parseRule_adblock("/spamdomain/$domain=spamdomain.com,image");
- EXPECT_TRUE(rule->match(QUrl("subdomain.spamdomain.com")));
-// QCOMPARE(rule->action().first == FilterLeaf::Block, true);
-// QCOMPARE(rule->option(QWebEngineUrlRequestInfo::ResourceTypeImage).value(), true);
-}
-
-TEST(AdBlockRule, AddressPart) {
- AdBlockRule *rule = parseRule_adblock("/banner/*/img^");
- EXPECT_TRUE(rule->match(QUrl("http://example.com/banner/foo/img")));
- EXPECT_TRUE(rule->match(QUrl("http://example.com/banner/foo/bar/img?param")));
- EXPECT_TRUE(rule->match(QUrl("http://example.com/banner//img/foo")));
- EXPECT_FALSE(rule->match(QUrl("http://example.com/banner/img")));
- EXPECT_FALSE(rule->match(QUrl("http://example.com/banner/foo/imgraph")));
- EXPECT_FALSE(rule->match(QUrl("http://example.com/banner/foo/img.gif")));
-}
-
-TEST(AdBlockRule, Domain){
- AdBlockRule *rule = parseRule_adblock("||ads.example.com^");
- EXPECT_TRUE(rule->match(QUrl("http://ads.example.com/foo.gif")));
- EXPECT_TRUE(rule->match(QUrl("http://server1.ads.example.com/foo.gif")));
- EXPECT_TRUE(rule->match(QUrl("https://ads.example.com:8000/")));
- EXPECT_FALSE(rule->match(QUrl("http://ads.example.com.ua/foo.gif")));
- EXPECT_FALSE(rule->match(QUrl("http://example.com/redirect/http://ads.example.com/")));
-}
-
-
-TEST(AdBlockRule, ExactAddress){
- AdBlockRule *rule = parseRule_adblock("|http://example.com/|");
- EXPECT_TRUE(rule->match(QUrl("http://example.com/")));
- EXPECT_FALSE(rule->match(QUrl("http://example.com/foo.gif")));
- EXPECT_FALSE(rule->match(QUrl("http://example.info/redirect/http://example.com/")));
-}
-
-TEST(AdBlockRule, RegularExpression) {
- AdBlockRule *rule = parseRule_adblock("/banner\\d+/");
- EXPECT_TRUE(rule->match(QUrl("http://another.com/banner123")));
- EXPECT_TRUE(rule->match(QUrl("http://another.com/banner321")));
- EXPECT_FALSE(rule->match(QUrl("http://another.com/banners")));
-}
-
-int main(int argc, char **argv) {
- testing::InitGoogleTest(&argc, argv);
- return RUN_ALL_TESTS();
-}
diff --git a/test/hostlist/hostlisttest.cpp b/test/hostlist/hostlisttest.cpp
deleted file mode 100644
index 7f5c954..0000000
--- a/test/hostlist/hostlisttest.cpp
+++ /dev/null
@@ -1,61 +0,0 @@
-#include "hostlisttest.h"
-#include <QtTest/QtTest>
-
-void HostlistTest::parseList()
-{
- // load filters
- QFile hostlist("hostlist.txt");
- QCOMPARE(hostlist.open(QIODevice::ReadOnly | QIODevice::Text), true);
- QCOMPARE(loadHostlist(hostlist, &tree), true);
-
- // hostlist filters are applied to all domains, so there should only be one branch
- QCOMPARE(tree.branches().length(), 1);
-}
-
-void HostlistTest::checkRules_data()
-{
- QTest::addColumn<QUrl>("domain");
- QTest::addColumn<QUrl>("request");
- QTest::addColumn<int>("matches");
- QTest::addColumn<FilterLeaf::Action>("action");
-
- const QVector<QUrl> domains{ QUrl(), QUrl::fromUserInput("testdomain.host") };
- for(const QUrl &domain : domains) {
- QTest::newRow("block (1 domain per line)") << domain << QUrl::fromUserInput("blockeddomain.com") << 1 << FilterLeaf::Block;
- QTest::newRow("block (2 domains per line #1)") << domain << QUrl::fromUserInput("blockeddomain.first") << 1 << FilterLeaf::Block;
- QTest::newRow("block (2 domains per line #2)") << domain << QUrl::fromUserInput("blockeddomain.second") << 1 << FilterLeaf::Block;
- QTest::newRow("redirect") << domain << QUrl::fromUserInput("localhost.localdomain") << 1 << FilterLeaf::Redirect;
- QTest::newRow("domain not in hostlist") << domain << QUrl::fromUserInput("other.domain") << 0 << FilterLeaf::NotMatched;
- }
-}
-
-void HostlistTest::checkRules()
-{
- QFETCH(QUrl, domain);
- QFETCH(QUrl, request);
- QFETCH(int, matches);
- QFETCH(FilterLeaf::Action, action);
-
- auto result = tree.match(domain, request);
- QCOMPARE(result.length(), matches);
- if(matches > 0)
- QCOMPARE(result.constFirst()->action().first, action);
- if(action == FilterLeaf::Redirect)
- QCOMPARE(result.constFirst()->action().second, QLatin1Literal("127.0.0.1"));
-}
-
-void HostlistTest::benchmark_parse()
-{
- QFile hostlist("hostlist-benchmark.txt");
- if(hostlist.open(QIODevice::ReadOnly | QIODevice::Text)) {
- FilterTree benchmarkTree;
- bool loaded;
- QBENCHMARK {
- loaded = loadHostlist(hostlist, &benchmarkTree);
- }
- QCOMPARE(loaded, true);
- hostlist.close();
- }
-}
-
-QTEST_GUILESS_MAIN(HostlistTest)
diff --git a/test/hostlist/hostlisttest.h b/test/hostlist/hostlisttest.h
deleted file mode 100644
index 96051a9..0000000
--- a/test/hostlist/hostlisttest.h
+++ /dev/null
@@ -1,23 +0,0 @@
-#ifndef HOSTLIST_TEST
-#define HOSTLIST_TEST
-
-#include "filtertree.h"
-#include <QObject>
-
-class HostlistTest : public QObject
-{
- Q_OBJECT
-
-private slots:
- void parseList();
-
- void checkRules_data();
- void checkRules();
-
- void benchmark_parse();
-
-private:
- FilterTree tree;
-};
-
-#endif
diff --git a/test/matcherbenchmark/matcherbenchmark.cpp b/test/matcherbenchmark/matcherbenchmark.cpp
deleted file mode 100644
index 84406d5..0000000
--- a/test/matcherbenchmark/matcherbenchmark.cpp
+++ /dev/null
@@ -1,91 +0,0 @@
-#include "matcherbenchmark.h"
-#include <string>
-#include <regex>
-#include <regex.h>
-#include <QtTest/QTest>
-#include <QRegExp>
-#include <QRegularExpression>
-#include <QStringMatcher>
-#include <boost/regex.hpp>
-
-void MatcherBenchmark::qstringcontains()
-{
- const QString pattern("spamdomain");
- const QString request("subdomain.spamdomain.com");
-
- QCOMPARE(request.contains(pattern), true);
- QBENCHMARK {
- request.contains(pattern);
- }
-}
-
-void MatcherBenchmark::qstringmatcher()
-{
- const QStringMatcher pattern("spamdomain");
- const QString request("subdomain.spamdomain.com");
-
- QCOMPARE(pattern.indexIn(request) != -1, true);
- QBENCHMARK {
- pattern.indexIn(request);
- }
-}
-
-void MatcherBenchmark::qregexp()
-{
- const QRegExp pattern("spamdomain");
- const QString request("subdomain.spamdomain.com");
-
- QCOMPARE(pattern.indexIn(request) != -1, true);
- QBENCHMARK {
- pattern.indexIn(request);
- }
-}
-
-void MatcherBenchmark::qregularexpressionmatch()
-{
- const QRegularExpression pattern("spamdomain");
- const QString request("subdomain.spamdomain.com");
-
- QCOMPARE(pattern.match(request).hasMatch(), true);
- QBENCHMARK {
- pattern.match(request).hasMatch();
- }
-}
-
-void MatcherBenchmark::stdregex()
-{
- const std::regex pattern("spamdomain");
- const std::string request("subdomain.spamdomain.com");
-
- QCOMPARE(std::regex_search(request, pattern), true);
- QBENCHMARK {
- std::regex_search(request, pattern);
- }
-}
-
-void MatcherBenchmark::cregex()
-{
- regex_t pattern;
- QCOMPARE(regcomp(&pattern, "spamdomain", 0), 0);
- const std::string request("subdomain.spamdomain.com");
-
- QCOMPARE(regexec(&pattern, request.c_str(), 0, NULL, 0), false);
- QBENCHMARK {
- regexec(&pattern, request.c_str(), 0, NULL, 0);
- }
-
- regfree(&pattern);
-}
-
-void MatcherBenchmark::boostregex()
-{
- const boost::regex pattern("spamdomain");
- const std::string request("subdomain.spamdomain.com");
-
- QCOMPARE(boost::regex_search(request, pattern), true);
- QBENCHMARK {
- boost::regex_search(request, pattern);
- }
-}
-
-QTEST_GUILESS_MAIN(MatcherBenchmark)
diff --git a/test/matcherbenchmark/matcherbenchmark.h b/test/matcherbenchmark/matcherbenchmark.h
deleted file mode 100644
index deb4495..0000000
--- a/test/matcherbenchmark/matcherbenchmark.h
+++ /dev/null
@@ -1,20 +0,0 @@
-#ifndef MATCHER_BENCHMARK
-#define MATCHER_BENCHMARK
-
-#include <QObject>
-
-class MatcherBenchmark : public QObject
-{
- Q_OBJECT
-
-private slots:
- void qstringcontains();
- void qstringmatcher();
- void qregexp();
- void qregularexpressionmatch();
- void stdregex();
- void cregex();
- void boostregex();
-};
-
-#endif
diff --git a/test/meson.build b/test/meson.build
deleted file mode 100644
index 75e38ed..0000000
--- a/test/meson.build
+++ /dev/null
@@ -1,34 +0,0 @@
-dep_gtest = dependency('gtest')
-
-test('urlfilter-AdBlockList', executable('AdBlockList',
- dependencies: [dep_gtest, dep_qt5, dep_urlfilter],
- sources: ['urlfilter/urlfiltertest.cpp']
-))
-
-# Adblock parsing test
-adblock = executable('AdblockTest',
- dependencies: [dep_gtest, dep_qt5, dep_urlfilter],
- sources: ['adblock/adblocktest.cpp']
-)
-test('urlfilter-adblock', adblock, workdir: meson.current_source_dir())
-
-# Hostlist parsing test
-hostlist = executable('HostlistTest',
- dependencies: [dep_qt5, dep_urlfilter],
- sources: ['hostlist/hostlisttest.cpp', qt5.preprocess(moc_headers: 'hostlist/hostlisttest.h', dependencies: dep_qt5)]
-)
-test('urlfilter-hostlist', hostlist, workdir: meson.current_source_dir())
-
-# matching algorithms benchmark
-matcherbenchmark = executable('MatcherBenchmark',
- dependencies: [dep_qt5, dependency('boost', modules: 'regex')],
- sources: ['matcherbenchmark/matcherbenchmark.cpp', qt5.preprocess(moc_headers: 'matcherbenchmark/matcherbenchmark.h', dependencies: dep_qt5)]
-)
-
-# SingleApplication issue#40 test app
-singleapp = executable('SingleApplication',
- cpp_args: ['-DQAPPLICATION_CLASS=QApplication'],
- dependencies: [dep_qt5, dep_SingleApplication],
- sources: ['singleapplication-40/main.cpp']
-)
-
diff --git a/test/urlfilter/urlfiltertest.cpp b/test/urlfilter/urlfiltertest.cpp
deleted file mode 100644
index f6cdbd4..0000000
--- a/test/urlfilter/urlfiltertest.cpp
+++ /dev/null
@@ -1,61 +0,0 @@
-#include "formats/adblockrule.h"
-#include "formats/adblockrule_parse.h"
-#include "formats/adblocklist.h"
-#include <gtest/gtest.h>
-
-AdBlockList list;
-
-TEST(AdBlockList, MetaData) {
- EXPECT_STREQ(qUtf8Printable(list.metadata("Homepage")), "http://example.com/");
- EXPECT_STREQ(qUtf8Printable(list.metadata("Title")), "FooList");
- EXPECT_STREQ(qUtf8Printable(list.metadata("Expires")), "5 days");
- EXPECT_STREQ(qUtf8Printable(list.metadata("Redirect")), "http://example.com/list.txt");
- EXPECT_STREQ(qUtf8Printable(list.metadata("Version")), "1234");
-}
-
-TEST(AdBlockList, Contains) {
- EXPECT_TRUE(list.match(QUrl(), QUrl("http://example.com/banner/foo.png")));
- EXPECT_FALSE(list.match(QUrl(), QUrl("http://example.com/banner/foo/img")));
-
-// AdBlockRule *rule = parseRule_adblock("/banner/*/img^");
-// EXPECT_TRUE(rule->match(QUrl("http://example.com/banner/foo/img")));
-// EXPECT_TRUE(rule->match(QUrl("http://example.com/banner/foo/bar/img?param")));
-// EXPECT_TRUE(rule->match(QUrl("http://example.com/banner//img/foo")));
-// EXPECT_FALSE(rule->match(QUrl("http://example.com/banner/img")));
-// EXPECT_FALSE(rule->match(QUrl("http://example.com/banner/foo/imgraph")));
-// EXPECT_FALSE(rule->match(QUrl("http://example.com/banner/foo/img.gif")));
-}
-
-TEST(AdBlockList, ContainsWildcard) {
- EXPECT_TRUE(list.match(QUrl(), QUrl("http://example.com/banner/ads/img.png")));
-}
-
-TEST(AdBlockList, Domain) {
- EXPECT_TRUE(list.match(QUrl(), QUrl("http://ads.example.com/foo.gif")));
- EXPECT_TRUE(list.match(QUrl(), QUrl("http://server1.ads.example.com/foo.gif")));
- EXPECT_TRUE(list.match(QUrl(), QUrl("https://ads.example.com:8000/")));
- EXPECT_FALSE(list.match(QUrl(), QUrl("http://ads.example.com.ua/foo.gif")));
- EXPECT_FALSE(list.match(QUrl(), QUrl("http://example.com/redirect/http://ads.example.com/")));
-}
-
-TEST(AdBlockList, RegularExpression) {
- EXPECT_TRUE(list.match(QUrl(), QUrl("http://example.com/banner123")));
- EXPECT_TRUE(list.match(QUrl(), QUrl("http://example.com/banner321")));
- EXPECT_FALSE(list.match(QUrl(), QUrl("http://example.com/banners")));
-}
-
-int main(int argc, char **argv) {
- list.parseLine("! Homepage: http://example.com/");
- list.parseLine("! Title: FooList");
- list.parseLine("! Expires: 5 days");
- list.parseLine("! Redirect: http://example.com/list.txt");
- list.parseLine("! Version: 1234");
-
- EXPECT_TRUE(list.parseLine("/banner/foo.png"));
- EXPECT_TRUE(list.parseLine("/banner/*/img.png"));
- EXPECT_TRUE(list.parseLine("||ads.example.com^"));
- EXPECT_TRUE(list.parseLine("/banner\\d+/"));
-
- testing::InitGoogleTest(&argc, argv);
- return RUN_ALL_TESTS();
-}