diff options
43 files changed, 811 insertions, 1341 deletions
diff --git a/doc/Bugs.asciidoc b/doc/Bugs.asciidoc index e6ba9dd..d4ab1ec 100644 --- a/doc/Bugs.asciidoc +++ b/doc/Bugs.asciidoc @@ -40,6 +40,9 @@ Only affects Qt 5.11.0. Set __QTBUG_68224_WORKAROUND__. === rcc embeds time in output https://bugreports.qt.io/browse/QTBUG-62511 +=== QWebEngineUrlRequestInterceptor does not allow to pass the Referer header +https://bugreports.qt.io/browse/QTBUG-60203 + === Wayland bugs === mainwindow.maximized doesn't work diff --git a/lib/urlfilter/adblock/adblocklist.cpp b/lib/urlfilter/adblock/adblocklist.cpp new file mode 100644 index 0000000..c749e9e --- /dev/null +++ b/lib/urlfilter/adblock/adblocklist.cpp @@ -0,0 +1,188 @@ +/* + * This file is part of smolbote. It's copyrighted by the contributors recorded + * in the version control history of the file, available from its original + * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote + * + * SPDX-License-Identifier: GPL-3.0 + */ + +#include "adblocklist.h" +#include "parser.h" +#include <QIODevice> +#include <QTextStream> +#include <QDebug> + +AdBlockList::AdBlockList(QIODevice *device) +{ + Q_ASSERT(device->isOpen()); + + QTextStream list(device); + while (!list.atEnd()) { + parseLine(list.readLine()); + } + + qDebug() << m_metadata; +} + +AdBlockList::~AdBlockList() +{ + for(Rule &r : rules) { + delete r.matcher; + } +} + +QString AdBlockList::metadata(const QString& key) const +{ + return m_metadata.value(key); +} + +int AdBlockList::ruleCount() const +{ + return rules.size(); +} + +std::pair<UrlFilter::MatchResult, QString> AdBlockList::match(const QUrl& firstParty, const QUrl& requestUrl, QWebEngineUrlRequestInfo::ResourceType type) const +{ + const QString domain = firstParty.host(); + const QString request = requestUrl.toString(); + + for(const Rule &r : rules) { + // if there are options specified, but not the one we need + if(!r.options.isEmpty() && !r.options.contains(type)) + continue; + + if(r.disabledOn.contains(domain)) + continue; + + if(!r.enabledOn.isEmpty() && !r.enabledOn.contains(domain)) + continue; + + if(r.matcher->hasMatch(request)) + return std::make_pair(r.action, QString()); + } + + return std::make_pair(UrlFilter::NotMatched, QString()); +} + +void AdBlockList::parseLine(const QString& line) +{ + QString parsedLine = line.trimmed(); + + if(parsedLine.isEmpty()) + return; + + if(parsedLine.startsWith(QLatin1Literal("!"))) { + const auto comment = parseComment(parsedLine); + + if(comment) { + const auto key = comment.value().first; + if(keys.contains(key)) + m_metadata[key] = comment.value().second; + } + + return; + } + + // css rule -> filterleaves cannot do element blocking + if(parsedLine.contains(QLatin1Literal("##")) || parsedLine.contains(QLatin1Literal("#@#"))) { + qDebug("TODO: %s", qUtf8Printable(parsedLine)); + return; + } + + Rule r; + r.action = UrlFilter::Block; + + // exception rules + if(parsedLine.startsWith(QLatin1Literal("@@"))) { + r.action = UrlFilter::Allow; + parsedLine.remove(0, 2); + } + + bool matchCase = false; + + // parse options + { + const int sepPos = parsedLine.indexOf(QLatin1Literal("$")); + if(sepPos != -1) { + const auto options = parsedLine.mid(sepPos + 1).split(QLatin1Literal(",")); + parsedLine = parsedLine.mid(0, sepPos); + + for(const QString &option : options) { + if(option.startsWith(QLatin1Literal("domain"))) { + const auto domainList = option.mid(7).split(QLatin1Literal("|")); + + for(const QString &domain : domainList) { + if(domain.startsWith(QLatin1Literal("~"))) { + r.disabledOn.append(domain.mid(1)); + } else { + r.enabledOn.append(domain); + } + } + } else if(option.endsWith(QLatin1Literal("match-case"))) { + matchCase = !option.startsWith(QLatin1Literal("~")); + + } else { + const auto pair = parseResourceOption(option); + if(pair) + r.options.insert(pair.value().first, pair.value().second); + } + } + } + } + + if(parsedLine.startsWith(QLatin1Literal("/")) && parsedLine.endsWith(QLatin1Literal("/"))) { + // regular expression rule + parsedLine = parsedLine.mid(1, parsedLine.length() - 2); + r.matcher = new ContentsMatcher<QRegularExpression>(parsedLine, UrlFilter::RegularExpressionMatch); + + } else if(parsedLine.startsWith(QLatin1Literal("||")) && parsedLine.endsWith(QLatin1Literal("^"))) { + parsedLine = parsedLine.mid(2, parsedLine.length() - 3); + r.matcher = new ContentsMatcher<QString>(parsedLine, UrlFilter::DomainMatch); + + } else if(parsedLine.startsWith(QLatin1Literal("|")) && parsedLine.endsWith(QLatin1Literal("|"))) { + // string equals rule + parsedLine = parsedLine.mid(1, parsedLine.length() - 2); + r.matcher = new ContentsMatcher<QStringMatcher>(parsedLine, UrlFilter::StringEquals); + + } else if(parsedLine.startsWith(QLatin1Literal("||"))) { + // string starts with rule + parsedLine = parsedLine.mid(2); + r.matcher = new ContentsMatcher<QStringMatcher>(parsedLine, UrlFilter::StringStartsWith); + + } else if(parsedLine.endsWith(QLatin1Literal("|"))) { + // string ends with rule + parsedLine.chop(1); + r.matcher = new ContentsMatcher<QStringMatcher>(parsedLine, UrlFilter::StringEndsWith); + + } else { + // generic contains rule + + // remove beginning and ending wildcards + if(parsedLine.startsWith(QLatin1Literal("*"))) + parsedLine = parsedLine.mid(1); + + if(parsedLine.endsWith(QLatin1Literal("*"))) + parsedLine.chop(1); + + if(parsedLine.contains(QLatin1Literal("*")) || parsedLine.contains(QLatin1Literal("^"))) { + // check for wildcards and translate to regexp + // wildcard "*" - any number of characters + // separator "^" - end, ? or / + parsedLine.replace(QLatin1Literal("||"), QLatin1Literal("^\\w+://")); + parsedLine.replace(QLatin1Literal("|"), QLatin1Literal("\\|")); + parsedLine.replace(QLatin1Literal("*"), QLatin1Literal(".*")); + parsedLine.replace(QLatin1Literal("^"), QLatin1Literal("($|\\?|\\/)")); + + r.matcher = new ContentsMatcher<QRegularExpression>(parsedLine, UrlFilter::RegularExpressionMatch); + + } else { + r.matcher = new ContentsMatcher<QStringMatcher>(parsedLine, UrlFilter::StringContains); + } + } + + r.matcher->setCaseSensitive(matchCase); + + Q_CHECK_PTR(r.matcher); + rules.emplace_back(std::move(r)); +} + diff --git a/lib/urlfilter/adblock/adblocklist.h b/lib/urlfilter/adblock/adblocklist.h new file mode 100644 index 0000000..ee41e11 --- /dev/null +++ b/lib/urlfilter/adblock/adblocklist.h @@ -0,0 +1,42 @@ +/* + * This file is part of smolbote. It's copyrighted by the contributors recorded + * in the version control history of the file, available from its original + * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote + * + * SPDX-License-Identifier: GPL-3.0 + */ + +#include "urlfilter.h" +#include "matcher.h" +#include <QHash> +#include <QWebEngineUrlRequestInfo> + +class QIODevice; +class AdBlockList : public UrlFilter +{ +public: + // TODO: check if all keys are listed + const QStringList keys = { "Version", "Title", "Last modified", "Expires", "Homepage", "Licence", "Redirect" }; + + AdBlockList(QIODevice *device); + ~AdBlockList(); + + QString metadata(const QString &key) const override; + int ruleCount() const; + std::pair<MatchResult, QString> match(const QUrl &firstParty, const QUrl &requestUrl, QWebEngineUrlRequestInfo::ResourceType type) const override; + +protected: + void parseLine(const QString &line); + +private: + QHash<QString, QString> m_metadata; + + struct Rule { + UrlFilter::MatchResult action = UrlFilter::NotMatched; + Matcher *matcher; + QStringList enabledOn, disabledOn; + QHash<QWebEngineUrlRequestInfo::ResourceType, bool> options; + }; + + std::vector<Rule> rules; +}; diff --git a/lib/urlfilter/adblock/parser.cpp b/lib/urlfilter/adblock/parser.cpp new file mode 100644 index 0000000..1e7f0bc --- /dev/null +++ b/lib/urlfilter/adblock/parser.cpp @@ -0,0 +1,75 @@ +/* + * This file is part of smolbote. It's copyrighted by the contributors recorded + * in the version control history of the file, available from its original + * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote + * + * SPDX-License-Identifier: GPL-3.0 + */ + +#include "parser.h" + +std::optional<std::pair<QString, QString>> parseComment(QString &line) +{ + const QLatin1Literal separator(": "); + if(line.contains(separator)) { + const QStringList comment = line.mid(1).split(QLatin1Literal(": ")); + return std::make_pair(comment.at(0).trimmed(), comment.at(1).trimmed()); + } else + return std::nullopt; +} + +std::optional<std::pair<QWebEngineUrlRequestInfo::ResourceType, bool>> parseResourceOption(const QString &option) +{ + const bool exception = !option.startsWith(QLatin1Literal("~")); + + if(option.endsWith(QLatin1Literal("script"))) { + // external scripts loaded via HTML script tag + return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeScript, exception); + + } else if(option.endsWith(QLatin1Literal("image"))) { + // regular images, typically loaded via HTML img tag + return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeImage, exception); + + } else if(option.endsWith(QLatin1Literal("stylesheet"))) { + // external CSS stylesheet files + return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeStylesheet, exception); + + } else if(option.endsWith(QLatin1Literal("object"))) { + // content handled by browser plugins, e.g. Flash or Java + return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeObject, exception); + + } else if(option.endsWith(QLatin1Literal("xmlhttprequest"))) { + // requests started using the XMLHttpRequest object or fetch() API + return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeXhr, exception); + + } else if(option.endsWith(QLatin1Literal("object-subrequest"))) { + // requests started by plugins like Flash + return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypePluginResource, exception); + + } else if(option.endsWith(QLatin1Literal("subdocument"))) { + // embedded pages, usually included via HTML frames + return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeSubFrame, exception); + + } else if(option.endsWith(QLatin1Literal("ping"))) { + // requests started by <a ping> or navigator.sendBeacon() + return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypePing, exception); + + } else if(option.endsWith(QLatin1Literal("websocket"))) { + // requests initiated via WebSocket object + qDebug("Resource type 'websocket' not available"); + + } else if(option.endsWith(QLatin1Literal("webrtc"))) { + // connections opened via RTCPeerConnection instances to ICE servers + qDebug("Resource type 'webrtc' not available"); + + } else if(option.endsWith(QLatin1Literal("document"))) { + // the page itself + return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeMainFrame, exception); + + } else if(option.endsWith(QLatin1Literal("other"))) { + return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeUnknown, exception); + } + + qDebug("TODO: %s", qUtf8Printable(option)); + return std::nullopt; +} diff --git a/lib/urlfilter/formats/adblockrule_parse.h b/lib/urlfilter/adblock/parser.h index 01255ca..c73a9cf 100644 --- a/lib/urlfilter/formats/adblockrule_parse.h +++ b/lib/urlfilter/adblock/parser.h @@ -6,12 +6,9 @@ * SPDX-License-Identifier: GPL-3.0 */ -#ifndef ADBLOCKRULE_PARSE_H -#define ADBLOCKRULE_PARSE_H +#include <QWebEngineUrlRequestInfo> +#include <optional> +#include <utility> -class AdBlockRule; - -AdBlockRule *parseRule_adblock(const QString &filter); -std::optional<QPair<QWebEngineUrlRequestInfo::ResourceType, bool>> parseOption(const QString &option); - -#endif // ADBLOCKRULE_PARSE_H +std::optional<std::pair<QString, QString>> parseComment(QString &line); +std::optional<std::pair<QWebEngineUrlRequestInfo::ResourceType, bool>> parseResourceOption(const QString &option); diff --git a/lib/urlfilter/domain.cpp b/lib/urlfilter/domain.cpp deleted file mode 100644 index 2bfd524..0000000 --- a/lib/urlfilter/domain.cpp +++ /dev/null @@ -1,65 +0,0 @@ -/* - * This file is part of smolbote. It's copyrighted by the contributors recorded - * in the version control history of the file, available from its original - * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote - * - * SPDX-License-Identifier: GPL-3.0 - */ - -#include "domain.h" - -Domain::Domain(const QString &domain) - : m_domain(domain) - , m_hash(qHash(domain, 0)) -{ -} - -Domain::Domain(Domain &&other) - : m_domain(std::move(other.m_domain)) - , m_hash(std::move(other.m_hash)) -{ -} - -Domain &Domain::operator=(Domain &&other) -{ - m_domain = std::move(other.m_domain); - m_hash = other.m_hash; - return *this; -} - -bool Domain::matches(const QUrl &url) const -{ - // empty domain matches all - if(m_domain.isEmpty() || url.isEmpty()) - return true; - - const QString domain = url.host(); - - // domain and filter are the same - if(domain == m_domain) { - return true; - } - - // domain cannot be matched if it doesn't end with filter - // ex. example2.com isn't matched by example.com - if(!domain.endsWith(m_domain)) { - return false; - } - - // match with subdomains - // ex. subdomain.example.com is matched by example.com - int index = domain.indexOf(m_domain); - - // match if (domain ends with filter) && (filter has been found) and (character before filter is '.') - return index > 0 && domain[index - 1] == QLatin1Char('.'); -} - -bool Domain::matchesExactly(uint hash) const -{ - return (m_hash == hash); -} - -QString Domain::host() const -{ - return m_domain; -} diff --git a/lib/urlfilter/domain.h b/lib/urlfilter/domain.h deleted file mode 100644 index 0406f0f..0000000 --- a/lib/urlfilter/domain.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * This file is part of smolbote. It's copyrighted by the contributors recorded - * in the version control history of the file, available from its original - * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote - * - * SPDX-License-Identifier: GPL-3.0 - */ - -#ifndef SMOLBOTE_DOMAIN_H -#define SMOLBOTE_DOMAIN_H - -#include <QString> -#include <QUrl> - -class Domain -{ -public: - explicit Domain(const QString &domain); - explicit Domain(Domain &&other); - Domain &operator=(Domain &&other); - - // match domain and subdomains of domain - bool matches(const QUrl &url) const; - // exact match of domain - bool matchesExactly(uint hash) const; - QString host() const; - -private: - QString m_domain; - uint m_hash; -}; - -#endif // SMOLBOTE_DOMAIN_H diff --git a/lib/urlfilter/filterleaf.cpp b/lib/urlfilter/filterleaf.cpp deleted file mode 100644 index 5797718..0000000 --- a/lib/urlfilter/filterleaf.cpp +++ /dev/null @@ -1,14 +0,0 @@ -#include "filterleaf.h" - -const QString FilterLeaf::request() const -{ - return m_request; -} - -std::optional<bool> FilterLeaf::option(QWebEngineUrlRequestInfo::ResourceType opt) const -{ - if(resourceTypeOptions.contains(opt)) - return resourceTypeOptions.value(opt); - else - return std::nullopt; -} diff --git a/lib/urlfilter/filterleaf.h b/lib/urlfilter/filterleaf.h deleted file mode 100644 index 64f465d..0000000 --- a/lib/urlfilter/filterleaf.h +++ /dev/null @@ -1,60 +0,0 @@ -/* - * This file is part of smolbote. It's copyrighted by the contributors recorded - * in the version control history of the file, available from its original - * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote - * - * SPDX-License-Identifier: GPL-3.0 - */ - -#ifndef SMOLBOTE_FILTERLEAF_H -#define SMOLBOTE_FILTERLEAF_H - -#include <QHash> -#include <QObject> -#include <QString> -#include <QWebEngineUrlRequestInfo> -#include <optional> -#include <utility> -#include <QVariant> - -class FilterLeaf -{ -public: - enum Action { - NotMatched, - Allow, - Block, - Redirect - }; - - enum UrlMatchType { - InvalidMatch, - RegularExpressionMatch, - StringContains, - StringStartsWith, - StringEndsWith, - StringEquals, - DomainMatch - }; - - virtual ~FilterLeaf() = default; - - virtual bool match(const QUrl &requestUrl) const = 0; - virtual std::pair<Action, QVariant> action() const = 0; - - const QString request() const; - std::optional<bool> option(QWebEngineUrlRequestInfo::ResourceType opt) const; - -protected: - // rule matching - UrlMatchType matchType = InvalidMatch; - QHash<QWebEngineUrlRequestInfo::ResourceType, bool> resourceTypeOptions; - QString m_request; - - // rule action - bool m_isBlocking; -}; - -Q_DECLARE_METATYPE(FilterLeaf::Action) - -#endif // SMOLBOTE_FILTERLEAF_H diff --git a/lib/urlfilter/filtertree.cpp b/lib/urlfilter/filtertree.cpp deleted file mode 100644 index 2cdd6d0..0000000 --- a/lib/urlfilter/filtertree.cpp +++ /dev/null @@ -1,94 +0,0 @@ -/* - * This file is part of smolbote. It's copyrighted by the contributors recorded - * in the version control history of the file, available from its original - * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote - * - * SPDX-License-Identifier: GPL-3.0 - */ - -#include "filtertree.h" -#include "filterleaf.h" -#include "formats/hostlistrule.h" -#include <QTextStream> - -bool loadHostlist(QIODevice &from, FilterTree *tree) -{ - Q_ASSERT(from.isReadable()); - QTextStream stream(&from); - while(!stream.atEnd()) { - const QString line = stream.readLine().trimmed(); - if(line.isEmpty() || line.startsWith(QLatin1Literal("#"))) - continue; - - const QStringList &parts = line.split(QLatin1Literal(" ")); - if(parts.length() < 2) { -#ifdef QT_DEBUG - qDebug("Cannot parse: %s", qUtf8Printable(line)); -#endif - return false; - } - - for(int i = 1; i < parts.length(); ++i) { - // HostlistRule(domain, redirect) - auto *rule = new HostlistRule(parts.at(i), parts.constFirst()); - // addRule(rule, enable_on_domain) - const bool added = tree->addRule(rule, QString()); - if(!added) - return false; - } - } - return true; -} - -FilterTree::~FilterTree() -{ - for(auto &branch : m_branches) { - qDeleteAll(branch.leaves); - branch.leaves.clear(); - } -} - -const QStringList FilterTree::branches() const -{ - QStringList branches; - for(auto &branch : m_branches) { - branches.append(branch.domain.host()); - } - return branches; -} - -QVector<const FilterLeaf *> FilterTree::match(const QUrl &domain, const QUrl &requestUrl) const -{ - QVector<const FilterLeaf *> leaves; - for(const auto &branch : m_branches) { - if(branch.domain.matches(domain)) { - - for(const auto leaf : branch.leaves) { - if(leaf->match(requestUrl)) { - leaves.append(leaf); - } - } - } - } - return leaves; -} - -bool FilterTree::addRule(FilterLeaf *rule, const QString &domain) -{ - branchLock.lock(); - this->branch(domain).leaves.emplace_back(rule); - branchLock.unlock(); - return true; -} - -FilterTree::Branch & FilterTree::branch(const QString& domain) -{ - for(auto &branch : m_branches) { - if(branch.domain.matches(QUrl(domain))) - return branch; - } - - // no branch was found - Branch branch(domain); - return m_branches.emplace_back(std::move(branch)); -} diff --git a/lib/urlfilter/filtertree.h b/lib/urlfilter/filtertree.h deleted file mode 100644 index f453a3d..0000000 --- a/lib/urlfilter/filtertree.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * This file is part of smolbote. It's copyrighted by the contributors recorded - * in the version control history of the file, available from its original - * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote - * - * SPDX-License-Identifier: GPL-3.0 - */ - -#ifndef SMOLBOTE_FILTERTREE_H -#define SMOLBOTE_FILTERTREE_H - -#include "domain.h" -#include "filterleaf.h" -#include <QIODevice> -#include <QObject> -#include <QVector> -#include <vector> -#include <QMutex> - -/** FilterTree: B+ tree of filter rules - * The tree contains branches that represent domains - * Each domain-branch contains leaves (rules) that are to be applied to it. - * Rules may be applied to multiple branches. - */ -class FilterTree : public QObject -{ - Q_OBJECT - -public: - ~FilterTree(); - - const QStringList branches() const; - QVector<const FilterLeaf *> match(const QUrl &domain, const QUrl &requestUrl) const; - - bool addRule(FilterLeaf *rule, const QString &domain); - -private: - struct Branch { - explicit Branch(const QString &host) - : domain(host) - { - } - explicit Branch(Branch &&other) - : domain(std::move(other.domain)) - , leaves(std::move(other.leaves)) - { - } - - Domain domain; - std::vector<FilterLeaf *> leaves; - }; - - Branch& branch(const QString &domain); - - QMutex branchLock; - std::vector<Branch> m_branches; -}; - -bool loadHostlist(QIODevice &from, FilterTree *tree); - -#endif // SMOLBOTE_FILTERTREE_H diff --git a/lib/urlfilter/formats/adblocklist.cpp b/lib/urlfilter/formats/adblocklist.cpp deleted file mode 100644 index 772c252..0000000 --- a/lib/urlfilter/formats/adblocklist.cpp +++ /dev/null @@ -1,95 +0,0 @@ -#include "adblocklist.h" - -AdBlockList::AdBlockList() -{ -} - -QString AdBlockList::metadata(const QString &key) const -{ - return m_metadata.value(key, QString()); -} - -FilterLeaf::Action AdBlockList::match(const QUrl &firstParty, const QUrl &requestUrl, QWebEngineUrlRequestInfo::ResourceType type) const -{ - const QString request = requestUrl.toString(); - - for(auto &filter : m_rules) { - if(filter.matcher->hasMatch(request)) - return filter.action; - } - return FilterLeaf::NotMatched; -} - -bool AdBlockList::parseLine(const QString &line) -{ - // remove whitespace from start/end of the line - QString parsedLine = line.trimmed(); - - // check if the line is empty - if(parsedLine.isEmpty()) - return false; - - // parse comment - if(parsedLine.startsWith(QLatin1Literal("!"))) - return parseComment(parsedLine); - - Filter filter; - - // exception rules - if(parsedLine.startsWith(QLatin1Literal("@@"))) { - filter.action = FilterLeaf::Allow; - parsedLine.remove(0, 2); - } - - // remove '*' at the beginning and the end - if(parsedLine.startsWith(QLatin1Literal("*"))) - parsedLine = parsedLine.mid(1); - if(parsedLine.endsWith(QLatin1Literal("*"))) - parsedLine.chop(1); - - if(parsedLine.startsWith(QLatin1Literal("/")) && parsedLine.endsWith(QLatin1Literal("/"))) { - // regular expression rule - parsedLine = parsedLine.mid(1, parsedLine.length() - 2); - filter.matcher = new ContentsMatcher<QRegularExpression>(parsedLine, FilterLeaf::RegularExpressionMatch); - - } else if(parsedLine.contains(QLatin1Literal("*"))) { - parsedLine = QRegularExpression::wildcardToRegularExpression(parsedLine); - filter.matcher = new ContentsMatcher<QRegularExpression>(parsedLine, FilterLeaf::RegularExpressionMatch); - - } else if(parsedLine.startsWith(QLatin1Literal("||")) && parsedLine.endsWith(QLatin1Literal("^"))) { -// matchType = FilterLeaf::DomainMatch; - parsedLine = parsedLine.mid(2, parsedLine.length() - 3); - filter.matcher = new ContentsMatcher<QString>(parsedLine, FilterLeaf::DomainMatch); - - } else if(parsedLine.startsWith(QLatin1Literal("|")) && parsedLine.endsWith(QLatin1Literal("|"))) { - // string equals rule - parsedLine = parsedLine.mid(1, parsedLine.length() - 2); - filter.matcher = new ContentsMatcher<QStringMatcher>(parsedLine, FilterLeaf::StringEquals); - - } else if(parsedLine.startsWith(QLatin1Literal("||"))) { - // string starts with rule - parsedLine = parsedLine.mid(2); - filter.matcher = new ContentsMatcher<QStringMatcher>(parsedLine, FilterLeaf::StringStartsWith); - - } else if(parsedLine.endsWith(QLatin1Literal("|"))) { - // string ends with rule - parsedLine.chop(1); - filter.matcher = new ContentsMatcher<QStringMatcher>(parsedLine, FilterLeaf::StringEndsWith); - - } else { - // generic contains rule - filter.matcher = new ContentsMatcher<QStringMatcher>(parsedLine, FilterLeaf::StringContains); - } - - - Q_CHECK_PTR(filter.matcher); - m_rules.emplace_back(std::move(filter)); - return true; -} - -bool AdBlockList::parseComment(const QString &commentLine) -{ - const QStringList comment = commentLine.mid(1).split(QLatin1Literal(": ")); - m_metadata[comment.at(0).trimmed()] = comment.at(1).trimmed(); - return true; -} diff --git a/lib/urlfilter/formats/adblocklist.h b/lib/urlfilter/formats/adblocklist.h deleted file mode 100644 index 34a2120..0000000 --- a/lib/urlfilter/formats/adblocklist.h +++ /dev/null @@ -1,32 +0,0 @@ -#ifndef ADBLOCKLIST_H -#define ADBLOCKLIST_H - -#include <QHash> -#include "adblockrule.h" - -class AdBlockList -{ -public: - AdBlockList(); - - QString metadata(const QString &key) const; - FilterLeaf::Action match(const QUrl &firstParty, const QUrl &requestUrl, QWebEngineUrlRequestInfo::ResourceType type = QWebEngineUrlRequestInfo::ResourceTypeUnknown) const; - - bool parseLine(const QString &line); - -protected: - bool parseComment(const QString &commentLine); - -private: - struct Filter - { - FilterLeaf::Action action = FilterLeaf::Block; - Matcher *matcher; - }; - - QHash<QString, QString> m_metadata; - //QMap<QString, Filter> m_rules; - std::vector<Filter> m_rules; -}; - -#endif // ADBLOCKLIST_H diff --git a/lib/urlfilter/formats/adblockrule.cpp b/lib/urlfilter/formats/adblockrule.cpp deleted file mode 100644 index 60e817f..0000000 --- a/lib/urlfilter/formats/adblockrule.cpp +++ /dev/null @@ -1,63 +0,0 @@ -/* - * This file is part of smolbote. It's copyrighted by the contributors recorded - * in the version control history of the file, available from its original - * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote - * - * SPDX-License-Identifier: GPL-3.0 - */ - -#include "adblockrule.h" -#include <QRegExp> -#include <QStringMatcher> - -AdBlockRule::AdBlockRule(FilterLeaf::UrlMatchType matchType, const QString &filter, FilterLeaf::Action action) -{ - this->matchType = matchType; - this->m_request = filter; - this->m_isBlocking = (action == FilterLeaf::Block); - //matcher.setPattern(filter); - if(matchType == FilterLeaf::RegularExpressionMatch) - regExp = new QRegExp(filter); - else - stringMatcher = new QStringMatcher(filter); -} - -void AdBlockRule::mergeOptions(const QHash<QWebEngineUrlRequestInfo::ResourceType, bool> &options) -{ - this->resourceTypeOptions.unite(options); -} - -bool AdBlockRule::match(const QUrl &requestUrl) const -{ - switch(matchType) { - case FilterLeaf::RegularExpressionMatch: - return (regExp->indexIn(requestUrl.toString()) != -1); - default: - return false; - } -} - -bool AdBlockRule::match(const QUrl &requestUrl, QWebEngineUrlRequestInfo::ResourceType type) const -{ - // if request is of the required type, or there are no types set (== apply to all requests) - if(this->resourceTypeOptions.contains(type) || this->resourceTypeOptions.isEmpty()) { - switch(matchType) { - case FilterLeaf::RegularExpressionMatch: - return (regExp->indexIn(requestUrl.toString()) != -1); - default: - qWarning("Match type not implemented, returning false!"); - return false; - } - } - - // request type is not matched - return false; -} - -std::pair<FilterLeaf::Action, QVariant> AdBlockRule::action() const -{ - if(m_isBlocking) - return std::make_pair(FilterLeaf::Block, QVariant()); - else - return std::make_pair(FilterLeaf::Allow, QVariant()); -} diff --git a/lib/urlfilter/formats/adblockrule_parse.cpp b/lib/urlfilter/formats/adblockrule_parse.cpp deleted file mode 100644 index c01ddfd..0000000 --- a/lib/urlfilter/formats/adblockrule_parse.cpp +++ /dev/null @@ -1,181 +0,0 @@ -/* - * This file is part of smolbote. It's copyrighted by the contributors recorded - * in the version control history of the file, available from its original - * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote - * - * SPDX-License-Identifier: GPL-3.0 - */ - -#include "adblockrule.h" -#include "adblockrule_parse.h" - -// adblock format documentation -// https://adblockplus.org/filters - -// QString::mid(pos, len) const - Returns a string starting at the specified position index. -// QString::chop(len) - Removes n characters from the end of the string. -// QString::remove(pos, len) - Removes n characters from the string, starting at the given position index. -// QString::trimmed() const - Remove whitespace from start and end - -AdBlockRule *parseRule_adblock(const QString &filter) -{ - QString parsedLine = filter.trimmed(); - - // there is no rule, or it's a comment - if(parsedLine.isEmpty() || parsedLine.startsWith("!")) { - return nullptr; - } - - // css rule -> filterleaves cannot do element blocking - if(parsedLine.contains(QLatin1Literal("##")) || parsedLine.contains(QLatin1Literal("#@#"))) { - return nullptr; - } - - // exception rules - FilterLeaf::Action action = FilterLeaf::Block; - if(parsedLine.startsWith(QLatin1Literal("@@"))) { - action = FilterLeaf::Allow; - parsedLine.remove(0, 2); - } - - // parse options - QStringList enabledOn, disabledOn; - QHash<QWebEngineUrlRequestInfo::ResourceType, bool> optionsHash; - { - const int sepPos = parsedLine.indexOf(QLatin1Literal("$")); - if(sepPos != -1) { - const auto options = parsedLine.mid(sepPos + 1).split(QLatin1Literal(",")); - parsedLine = parsedLine.mid(0, sepPos); - - for(const QString &option : options) { - if(option.startsWith(QLatin1Literal("domain"))) { - const auto domainList = option.mid(7).split(QLatin1Literal("|")); - - for(const QString &domain : domainList) { - if(domain.startsWith(QLatin1Literal("~"))) { - disabledOn.append(domain.mid(1)); - } else { - enabledOn.append(domain); - } - } - } else { - const auto pair = parseOption(option); - if(pair) - optionsHash.insert(pair.value().first, pair.value().second); - } - } - } - } - - FilterLeaf::UrlMatchType matchType = FilterLeaf::InvalidMatch; - - if(parsedLine.startsWith(QLatin1Literal("/")) && parsedLine.endsWith(QLatin1Literal("/"))) { - // regular expression rule - matchType = FilterLeaf::RegularExpressionMatch; - parsedLine = parsedLine.mid(1, parsedLine.length() - 2); - - } else if(parsedLine.startsWith(QLatin1Literal("||")) && parsedLine.endsWith(QLatin1Literal("^"))) { - matchType = FilterLeaf::DomainMatch; - parsedLine = parsedLine.mid(2, parsedLine.length() - 3); - - } else if(parsedLine.startsWith(QLatin1Literal("|")) && parsedLine.endsWith(QLatin1Literal("|"))) { - // string equals rule - matchType = FilterLeaf::StringEquals; - parsedLine = parsedLine.mid(1, parsedLine.length() - 2); - - } else if(parsedLine.startsWith(QLatin1Literal("||"))) { - // string starts with rule - matchType = FilterLeaf::StringStartsWith; - parsedLine = parsedLine.mid(2); - - } else if(parsedLine.endsWith(QLatin1Literal("|"))) { - // string ends with rule - matchType = FilterLeaf::StringEndsWith; - parsedLine.chop(1); - - } else { - // generic contains rule - matchType = FilterLeaf::StringContains; - - // Basic filter rules can use wildcards, which were supported by QRegExp, - // but were deprecated in QRegularExpression. - - // remove beginning and ending wildcards - if(parsedLine.startsWith(QLatin1Literal("*"))) - parsedLine = parsedLine.mid(1); - - if(parsedLine.endsWith(QLatin1Literal("*"))) - parsedLine.chop(1); - - if(parsedLine.contains(QLatin1Literal("*")) || parsedLine.contains(QLatin1Literal("^"))) { - // check for wildcards and translate to regexp - // wildcard "*" - any number of characters - // separator "^" - end, ? or / - parsedLine.replace(QLatin1Literal("||"), QLatin1Literal("^\\w+://")); - parsedLine.replace(QLatin1Literal("|"), QLatin1Literal("\\|")); - parsedLine.replace(QLatin1Literal("*"), QLatin1Literal(".*")); - parsedLine.replace(QLatin1Literal("^"), QLatin1Literal("($|\\?|\\/)")); - - matchType = FilterLeaf::RegularExpressionMatch; - } - } - - AdBlockRule *rule = new AdBlockRule(matchType, parsedLine, action); - rule->mergeOptions(optionsHash); - return rule; -} - -std::optional<QPair<QWebEngineUrlRequestInfo::ResourceType, bool>> parseOption(const QString &option) -{ - const bool exception = !option.startsWith(QLatin1Literal("~")); - - if(option.endsWith(QLatin1Literal("script"))) { - // external scripts loaded via HTML script tag - return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeScript, exception); - - } else if(option.endsWith(QLatin1Literal("image"))) { - // regular images, typically loaded via HTML img tag - return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeImage, exception); - - } else if(option.endsWith(QLatin1Literal("stylesheet"))) { - // external CSS stylesheet files - return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeStylesheet, exception); - - } else if(option.endsWith(QLatin1Literal("object"))) { - // content handled by browser plugins, e.g. Flash or Java - return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeObject, exception); - - } else if(option.endsWith(QLatin1Literal("xmlhttprequest"))) { - // requests started using the XMLHttpRequest object or fetch() API - return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeXhr, exception); - - } else if(option.endsWith(QLatin1Literal("object-subrequest"))) { - // requests started by plugins like Flash - return qMakePair(QWebEngineUrlRequestInfo::ResourceTypePluginResource, exception); - - } else if(option.endsWith(QLatin1Literal("subdocument"))) { - // embedded pages, usually included via HTML frames - return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeSubFrame, exception); - - } else if(option.endsWith(QLatin1Literal("ping"))) { - // requests started by <a ping> or navigator.sendBeacon() - return qMakePair(QWebEngineUrlRequestInfo::ResourceTypePing, exception); - - } else if(option.endsWith(QLatin1Literal("websocket"))) { - // requests initiated via WebSocket object - qDebug("Resource type 'websocket' not available"); - - } else if(option.endsWith(QLatin1Literal("webrtc"))) { - // connections opened via RTCPeerConnection instances to ICE servers - qDebug("Resource type 'webrtc' not available"); - - } else if(option.endsWith(QLatin1Literal("document"))) { - // the page itself - return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeMainFrame, exception); - - } else if(option.endsWith(QLatin1Literal("other"))) { - return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeUnknown, exception); - } - - return std::nullopt; -} diff --git a/lib/urlfilter/formats/hostlistrule.cpp b/lib/urlfilter/formats/hostlistrule.cpp deleted file mode 100644 index ad2c2a6..0000000 --- a/lib/urlfilter/formats/hostlistrule.cpp +++ /dev/null @@ -1,29 +0,0 @@ -/* - * This file is part of smolbote. It's copyrighted by the contributors recorded - * in the version control history of the file, available from its original - * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote - * - * SPDX-License-Identifier: GPL-3.0 - */ - -#include "hostlistrule.h" - -HostlistRule::HostlistRule(const QString &domain, const QString &redirect) -{ - this->m_isBlocking = (redirect == QLatin1Literal("0.0.0.0")); - this->m_request = domain; - this->m_redirect = redirect; -} - -bool HostlistRule::match(const QUrl &requestUrl) const -{ - //qDebug("checking [%s] against [%s]", qUtf8Printable(requestUrl.host()), qUtf8Printable(m_request)); - return (m_request == requestUrl.host()); -} - -std::pair<FilterLeaf::Action, QVariant> HostlistRule::action() const -{ - if(m_isBlocking) - return std::make_pair(FilterLeaf::Block, QVariant()); - return std::make_pair(FilterLeaf::Redirect, QVariant(m_redirect)); -} diff --git a/lib/urlfilter/formats/hostlistrule.h b/lib/urlfilter/formats/hostlistrule.h deleted file mode 100644 index 58ec690..0000000 --- a/lib/urlfilter/formats/hostlistrule.h +++ /dev/null @@ -1,27 +0,0 @@ -/* - * This file is part of smolbote. It's copyrighted by the contributors recorded - * in the version control history of the file, available from its original - * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote - * - * SPDX-License-Identifier: GPL-3.0 - */ - -#ifndef SMOLBOTE_HOSTLIST_RULE_H -#define SMOLBOTE_HOSTLIST_RULE_H - -#include "../filterleaf.h" -#include <QString> - -class HostlistRule : public FilterLeaf -{ -public: - explicit HostlistRule(const QString &domain, const QString &redirect); - - bool match(const QUrl &requestUrl) const override; - std::pair<FilterLeaf::Action, QVariant> action() const override; - -private: - QString m_redirect; -}; - -#endif // SMOLBOTE_HOSTLIST_RULE_H diff --git a/lib/urlfilter/hostlist/hostlist.cpp b/lib/urlfilter/hostlist/hostlist.cpp new file mode 100644 index 0000000..ec0b214 --- /dev/null +++ b/lib/urlfilter/hostlist/hostlist.cpp @@ -0,0 +1,79 @@ +/* + * This file is part of smolbote. It's copyrighted by the contributors recorded + * in the version control history of the file, available from its original + * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote + * + * SPDX-License-Identifier: GPL-3.0 + */ + +#include "hostlist.h" +#include <QIODevice> +#include <QTextStream> +#include <QDebug> + +HostList::HostList(QIODevice *device) +{ + Q_ASSERT(device->isOpen()); + + QTextStream list(device); + while (!list.atEnd()) { + parseLine(list.readLine()); + } + + qDebug() << m_metadata; +} + +QString HostList::metadata(const QString& key) const +{ + return m_metadata.value(key); +} + +int HostList::ruleCount() const +{ + return rules.size(); +} + +std::pair<UrlFilter::MatchResult, QString> HostList::match(const QUrl& firstParty, const QUrl& requestUrl, QWebEngineUrlRequestInfo::ResourceType type) const +{ + Q_UNUSED(firstParty); + Q_UNUSED(type); + + const QString domain = requestUrl.host(); + const uint domainHash = qHash(domain); + + for(const Rule &r : rules) { + if(r.domainHash == domainHash) + return std::make_pair(r.action, r.redirect); + } + + return std::make_pair(UrlFilter::NotMatched, QString()); +} + +void HostList::parseLine(const QString& line) +{ + // check comment + if(line.startsWith(QLatin1Literal("#"))) + return; + + QString parsedLine = line.trimmed(); + + // malformed rule + if(!parsedLine.contains(QLatin1Literal(" "))) + return; + + const QStringList parts = parsedLine.split(QLatin1Literal(" ")); + const QString redirect = parts.at(0); + const auto action = (redirect == QLatin1Literal("0.0.0.0")) ? UrlFilter::Block : UrlFilter::Redirect; + + for(int i = 1; i < parts.size(); i++) { + const QString domain = parts.at(i); + Rule r; + r.action = action; + r.domainHash = qHash(domain); + if(action == UrlFilter::Redirect) + r.redirect = redirect; + + rules.emplace_back(std::move(r)); + } +} + diff --git a/lib/urlfilter/hostlist/hostlist.h b/lib/urlfilter/hostlist/hostlist.h new file mode 100644 index 0000000..d4a8d87 --- /dev/null +++ b/lib/urlfilter/hostlist/hostlist.h @@ -0,0 +1,44 @@ +/* + * This file is part of smolbote. It's copyrighted by the contributors recorded + * in the version control history of the file, available from its original + * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote + * + * SPDX-License-Identifier: GPL-3.0 + */ + +#ifndef SMOLBOTE_URLFILTER_HOSTLIST +#define SMOLBOTE_URLFILTER_HOSTLIST + +#include "urlfilter.h" +#include <QHash> +#include <vector> +#include <QWebEngineUrlRequestInfo> + +class QIODevice; +class HostList : public UrlFilter +{ +public: + + HostList(QIODevice *device); + ~HostList() = default; + + QString metadata(const QString &key) const override; + int ruleCount() const; + std::pair<MatchResult, QString> match(const QUrl &firstParty, const QUrl &requestUrl, QWebEngineUrlRequestInfo::ResourceType type) const override; + +protected: + void parseLine(const QString &line); + +private: + QHash<QString, QString> m_metadata; + + struct Rule { + UrlFilter::MatchResult action = UrlFilter::NotMatched; + uint domainHash; + QString redirect; + }; + + std::vector<Rule> rules; +}; + +#endif // SMOLBOTE_URLFILTER_HOSTLIST diff --git a/lib/urlfilter/formats/adblockrule.h b/lib/urlfilter/matcher.h index 6be3cdf..6696958 100644 --- a/lib/urlfilter/formats/adblockrule.h +++ b/lib/urlfilter/matcher.h @@ -6,17 +6,24 @@ * SPDX-License-Identifier: GPL-3.0 */ -#ifndef SMOLBOTE_ADBLOCKRULE_H -#define SMOLBOTE_ADBLOCKRULE_H +#ifndef SMOLBOTE_URLFILTER_MATCHER +#define SMOLBOTE_URLFILTER_MATCHER -#include "../filterleaf.h" -#include <optional> +#include <QUrl> +#include <QString> +#include <utility> #include <QRegularExpression> #include <QStringMatcher> +#include <QWebEngineUrlRequestInfo> +/** An interface class so we can use templated ContentsMatcher interchangeably + */ class Matcher { public: + virtual ~Matcher() = default; + + virtual void setCaseSensitive(bool matchCase) = 0; virtual bool hasMatch(const QString &where) const = 0; }; @@ -24,12 +31,10 @@ template <typename T> class ContentsMatcher : public Matcher { public: - ContentsMatcher(const QString &pattern, FilterLeaf::UrlMatchType matchType) + ContentsMatcher(const QString &pattern, UrlFilter::MatchType type) + : patternLength(pattern.length()) + , matchType(type) { - this->matchType = matchType; - patternLength = pattern.length(); - - if constexpr(std::is_same_v<T, QRegularExpression>) { matcher.setPatternOptions(matcher.patternOptions() | QRegularExpression::CaseInsensitiveOption); matcher.setPattern(pattern); @@ -38,7 +43,19 @@ public: matcher.setPattern(pattern); } else if constexpr(std::is_same_v<T, QString>) { matcher = QUrl::fromUserInput(pattern).host(); -// qDebug("matcher: %s", qUtf8Printable(matcher)); + } + } + ~ContentsMatcher() = default; + + void setCaseSensitive(bool matchCase) override + { + if constexpr(std::is_same_v<T, QRegularExpression>) { + auto options = matcher.patternOptions(); + options.setFlag(QRegularExpression::CaseInsensitiveOption, !matchCase); + matcher.setPatternOptions(options); + + } else if constexpr(std::is_same_v<T, QStringMatcher>) { + matcher.setCaseSensitivity(matchCase ? Qt::CaseSensitive : Qt::CaseInsensitive); } } @@ -46,68 +63,47 @@ public: { if constexpr(std::is_same_v<T, QStringMatcher>) { switch (matchType) { - case FilterLeaf::InvalidMatch: - case FilterLeaf::RegularExpressionMatch: - case FilterLeaf::DomainMatch: + case UrlFilter::InvalidMatch: + case UrlFilter::RegularExpressionMatch: + case UrlFilter::DomainMatch: qWarning("ContentsMatcher is a String Matcher, but not doing string matching!"); return false; - case FilterLeaf::StringContains: + case UrlFilter::StringContains: return (matcher.indexIn(where) != -1); - case FilterLeaf::StringStartsWith: + case UrlFilter::StringStartsWith: return (matcher.indexIn(where) == 0); - case FilterLeaf::StringEndsWith: + case UrlFilter::StringEndsWith: return (matcher.indexIn(where) == where.length() - patternLength); - case FilterLeaf::StringEquals: + case UrlFilter::StringEquals: return (matcher.indexIn(where) == 0) && (patternLength == where.length()); } } else if constexpr(std::is_same_v<T, QRegularExpression>) { - if(matchType != FilterLeaf::RegularExpressionMatch) + if(matchType != UrlFilter::RegularExpressionMatch) qWarning("ContentsMatcher is a regular expression, but not doing a regular expression match!"); return matcher.match(where).hasMatch(); } else if constexpr(std::is_same_v<T, QString>) { // TODO: fix - if(matchType == FilterLeaf::DomainMatch) { + if(matchType == UrlFilter::DomainMatch) { // qDebug("matching %s", qUtf8Printable(QUrl(where).host())); return QUrl(where).host().endsWith(matcher); } else return matcher == where; - } else { - qWarning("Matcher has no backend, returning false"); - return false; } + + qWarning("Matcher has no backend, returning false"); + return false; } private: - int patternLength; + const int patternLength; + const UrlFilter::MatchType matchType; T matcher; - FilterLeaf::UrlMatchType matchType; }; -class AdBlockRule : public FilterLeaf -{ -public: - explicit AdBlockRule(FilterLeaf::UrlMatchType matchType, const QString &filter, FilterLeaf::Action action); - ~AdBlockRule() - { - delete stringMatcher; - delete regExp; - }; - - void mergeOptions(const QHash<QWebEngineUrlRequestInfo::ResourceType, bool> &options); - - bool match(const QUrl &requestUrl) const override; - bool match(const QUrl &requestUrl, QWebEngineUrlRequestInfo::ResourceType type) const; - std::pair<FilterLeaf::Action, QVariant> action() const override; - -private: - /* Once C++20 comes out, perhaps this can be replaced with a concept template */ - QStringMatcher *stringMatcher = nullptr; - QRegExp *regExp = nullptr; -}; +#endif // SMOLBOTE_URLFILTER_MATCHER -#endif // SMOLBOTE_ADBLOCKRULE_H diff --git a/lib/urlfilter/meson.build b/lib/urlfilter/meson.build index 1f4f47c..b017eb5 100644 --- a/lib/urlfilter/meson.build +++ b/lib/urlfilter/meson.build @@ -1,19 +1,26 @@ -urlfilter_inc = include_directories('.') - -urlfilter_moc = qt5.preprocess( - moc_headers: 'filtertree.h', - dependencies: dep_qt5 -) - urlfilter_lib = static_library('urlfilter', - ['filtertree.cpp', 'filterleaf.cpp', urlfilter_moc, - 'domain.cpp', 'domain.h', - 'formats/adblockrule.cpp', 'formats/adblockrule_parse.cpp', 'formats/hostlistrule.cpp', - 'formats/adblocklist.cpp'], + ['urlfilter.h', 'matcher.h', + 'hostlist/hostlist.cpp', 'hostlist/hostlist.h', + 'adblock/adblocklist.cpp', 'adblock/adblocklist.h', 'adblock/parser.cpp', 'adblock/parser.h'], dependencies: dep_qt5 ) dep_urlfilter = declare_dependency( - include_directories: urlfilter_inc, + include_directories: include_directories('.'), link_with: urlfilter_lib ) + +if get_option('testing').enabled() + test('urlfilter: matcher', + executable('urlfilter-matcher', dependencies: [dep_qt5, dep_gtest, dep_urlfilter], sources: ['test/matcher.cpp']), + workdir: meson.current_source_dir() / 'test' + ) + test('urlfilter: host list', + executable('urlfilter-hostlist', dependencies: [dep_qt5, dep_gtest, dep_urlfilter], sources: ['test/hostlist.cpp']), + workdir: meson.current_source_dir() / 'test' + ) + test('urlfilter: adblock list', + executable('urlfilter-adblocklist', dependencies: [dep_qt5, dep_gtest, dep_urlfilter], sources: ['test/adblock.cpp']), + workdir: meson.current_source_dir() / 'test' + ) +endif diff --git a/lib/urlfilter/test/adblock.cpp b/lib/urlfilter/test/adblock.cpp new file mode 100644 index 0000000..ecb94ee --- /dev/null +++ b/lib/urlfilter/test/adblock.cpp @@ -0,0 +1,88 @@ +#include "urlfilter.h" +#include "adblock/adblocklist.h" +#include <gtest/gtest.h> +#include <QFile> + +AdBlockList *list = nullptr; + +TEST(AdBlockList, MetaData) { + EXPECT_STREQ(qUtf8Printable(list->metadata("Homepage")), "http://example.com/"); + EXPECT_STREQ(qUtf8Printable(list->metadata("Title")), "FooList"); + EXPECT_STREQ(qUtf8Printable(list->metadata("Expires")), "5 days"); + EXPECT_STREQ(qUtf8Printable(list->metadata("Redirect")), "http://example.com/list.txt"); + EXPECT_STREQ(qUtf8Printable(list->metadata("Version")), "1234"); +} + +TEST(AdBlockList, BasicFilter) { + // Rule: /banner/*/img^ + EXPECT_EQ(list->match(QUrl(), QUrl("http://example.com/banner/foo/img"), QWebEngineUrlRequestInfo::ResourceTypeImage).first, UrlFilter::Block); + EXPECT_EQ(list->match(QUrl(), QUrl("http://example.com/banner/foo/bar/img?param"), QWebEngineUrlRequestInfo::ResourceTypeImage).first, UrlFilter::Block); + EXPECT_EQ(list->match(QUrl(), QUrl("http://example.com/banner//img/foo"), QWebEngineUrlRequestInfo::ResourceTypeImage).first, UrlFilter::Block); + + EXPECT_EQ(list->match(QUrl(), QUrl("http://example.com/banner/foo.png"), QWebEngineUrlRequestInfo::ResourceTypeImage).first, UrlFilter::NotMatched); + EXPECT_EQ(list->match(QUrl(), QUrl("http://example.com/banner/img"), QWebEngineUrlRequestInfo::ResourceTypeImage).first, UrlFilter::NotMatched); + EXPECT_EQ(list->match(QUrl(), QUrl("http://example.com/banner/foo/imgraph"), QWebEngineUrlRequestInfo::ResourceTypeImage).first, UrlFilter::NotMatched); + EXPECT_EQ(list->match(QUrl(), QUrl("http://example.com/banner/foo/img.gif"), QWebEngineUrlRequestInfo::ResourceTypeImage).first, UrlFilter::NotMatched); + + EXPECT_EQ(list->match(QUrl(), QUrl("http://example.com/banner/ads/img.png"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::NotMatched); +} + +TEST(AdBlockList, MatchBeginningEnd) { + // Rule: |http://beginning-pattern.com + EXPECT_EQ(list->match(QUrl(), QUrl("http://beginning-pattern.com"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::Block); + EXPECT_EQ(list->match(QUrl(), QUrl("https://beginning-pattern.com"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::NotMatched); + // Rule: end-pattern| + EXPECT_EQ(list->match(QUrl(), QUrl("https://endpattern.com/end-pattern"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::Block); + EXPECT_EQ(list->match(QUrl(), QUrl("https://endpattern.com/end-pattern/foo"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::NotMatched); +} + +TEST(AdBlockList, Domain) { + // Rule: ||ads.example.com^ + EXPECT_EQ(list->match(QUrl(), QUrl("http://ads.example.com/foo.gif"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::Block); + EXPECT_EQ(list->match(QUrl(), QUrl("http://server1.ads.example.com/foo.gif"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::Block); + EXPECT_EQ(list->match(QUrl(), QUrl("https://ads.example.com:8000/"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::Block); + + EXPECT_EQ(list->match(QUrl(), QUrl("http://ads.example.com.ua/foo.gif"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::NotMatched); + EXPECT_EQ(list->match(QUrl(), QUrl("http://example.com/redirect/http://ads.example.com/"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::NotMatched); +} + +TEST(AdBlockList, RegularExpression) { + // Rule: /banner\d+/ + EXPECT_EQ(list->match(QUrl(), QUrl("http://example.com/banner123"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::Block); + EXPECT_EQ(list->match(QUrl(), QUrl("http://example.com/banner321"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::Block); + EXPECT_EQ(list->match(QUrl(), QUrl("http://example.com/banners"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::NotMatched); +} + +TEST(AdBlockList, MatchCase) { + // Rule: matchThisCase$match-case + EXPECT_EQ(list->match(QUrl(), QUrl("http://matchcase.com/matchThisCase"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::Block); + EXPECT_EQ(list->match(QUrl(), QUrl("http://matchcase.com/MatchThisCase"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::NotMatched); +} + +TEST(AdBlockList, DomainOption) { + // Rule: domain-limited-string$domain=example.com + EXPECT_EQ(list->match(QUrl("https://example.com"), QUrl("https://example.com/domain-limited-string/foo"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::Block); + EXPECT_EQ(list->match(QUrl("https://example.com"), QUrl("https://example.com/another-domain-string/foo"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::NotMatched); + EXPECT_EQ(list->match(QUrl("https://another.com"), QUrl("https://example.com/domain-limited-string/foo"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::NotMatched); + + //Rule: exception-limited-string$domain=~example.com + EXPECT_EQ(list->match(QUrl("https://another.com"), QUrl("https://example.com/exception-limited-string/foo"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::Block); + EXPECT_EQ(list->match(QUrl("https://example.com"), QUrl("https://example.com/exception-limited-string/foo"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::NotMatched); +} + +int main(int argc, char **argv) { + QFile f("adblock.txt"); + if(!f.open(QIODevice::ReadOnly | QIODevice::Text)) { + qDebug("Could not open list"); + return -1; + } + + list = new AdBlockList(&f); + f.close(); + + qDebug("Parsed %i rules", list->ruleCount()); + + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} + diff --git a/lib/urlfilter/test/adblock.txt b/lib/urlfilter/test/adblock.txt new file mode 100644 index 0000000..635ce09 --- /dev/null +++ b/lib/urlfilter/test/adblock.txt @@ -0,0 +1,26 @@ +! Homepage: http://example.com/ +! Title: FooList +! Expires: 5 days +! Redirect: http://example.com/list.txt +! Version: 1234 + +/banner/*/img^ +||ads.example.com^ +|http://example.com/| +/banner\d+/ + +! match beginning +||http://beginning-pattern.com +! match end +end-pattern| + +! options +! match-case +matchThisCase$match-case + +! domain limiting +! only apply this filter on this domain +domain-limited-string$domain=example.com +! apply this filter to all domains but the listed one +exception-limited-string$domain=~example.com + diff --git a/lib/urlfilter/test/hostlist.cpp b/lib/urlfilter/test/hostlist.cpp new file mode 100644 index 0000000..041cd5f --- /dev/null +++ b/lib/urlfilter/test/hostlist.cpp @@ -0,0 +1,34 @@ +#include <gtest/gtest.h> +#include "hostlist/hostlist.h" +#include <QFile> + +HostList *list = nullptr; + +TEST(AdBlockList, Block) { + EXPECT_EQ(list->match(QUrl(), QUrl::fromUserInput("blockeddomain.com"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::Block); + EXPECT_EQ(list->match(QUrl(), QUrl::fromUserInput("blockeddomain.first"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::Block); + EXPECT_EQ(list->match(QUrl(), QUrl::fromUserInput("blockeddomain.second"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::Block); + + const auto r = list->match(QUrl(), QUrl::fromUserInput("localhost.localdomain"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame); + EXPECT_EQ(r.first, UrlFilter::Redirect); + EXPECT_EQ(r.second, QString("127.0.0.1")); + + EXPECT_EQ(list->match(QUrl(), QUrl::fromUserInput("other.domain"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::NotMatched); +} + +int main(int argc, char **argv) { + QFile f("hostlist.txt"); + if(!f.open(QIODevice::ReadOnly | QIODevice::Text)) { + qDebug("Could not open list"); + return -1; + } + + list = new HostList(&f); + f.close(); + + qDebug("Parsed %i rules", list->ruleCount()); + + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} + diff --git a/test/hostlist.txt b/lib/urlfilter/test/hostlist.txt index a0b4e5c..a0b4e5c 100644 --- a/test/hostlist.txt +++ b/lib/urlfilter/test/hostlist.txt diff --git a/lib/urlfilter/test/matcher.cpp b/lib/urlfilter/test/matcher.cpp new file mode 100644 index 0000000..1c1efbf --- /dev/null +++ b/lib/urlfilter/test/matcher.cpp @@ -0,0 +1,42 @@ +#include "urlfilter.h" +#include "matcher.h" +#include <gtest/gtest.h> + +TEST(Matcher, StringContains) { + ContentsMatcher<QStringMatcher> matcher("spam-pattern", UrlFilter::StringContains); + EXPECT_TRUE(matcher.hasMatch("this string contains a spam-pattern")); + EXPECT_FALSE(matcher.hasMatch("this string does not contain the pattern")); +} + +TEST(Matcher, StringStartsWith) { + ContentsMatcher<QStringMatcher> matcher("beginning", UrlFilter::StringStartsWith); + EXPECT_TRUE(matcher.hasMatch("beginning this string is the pattern")); + EXPECT_FALSE(matcher.hasMatch("ending this string is the pattern, the word beginning")); + EXPECT_FALSE(matcher.hasMatch("this would be a string where the pattern cannot be found")); +} + +TEST(Matcher, StringEndsWith) { + ContentsMatcher<QStringMatcher> matcher("ending", UrlFilter::StringEndsWith); + EXPECT_TRUE(matcher.hasMatch("this string has the proper ending")); + EXPECT_FALSE(matcher.hasMatch("and this string doesn't")); +} + +TEST(Matcher, StringEquals) { + ContentsMatcher<QStringMatcher> matcher("string-to-match", UrlFilter::StringEquals); + EXPECT_TRUE(matcher.hasMatch("string-to-match")); + EXPECT_FALSE(matcher.hasMatch("same-len-string")); + EXPECT_FALSE(matcher.hasMatch("not the string-to-match")); +} + +TEST(Matcher, RegularExpression) { + ContentsMatcher<QRegularExpression> matcher("banner\\d+", UrlFilter::RegularExpressionMatch); + EXPECT_TRUE(matcher.hasMatch("http://another.com/banner123")); + EXPECT_TRUE(matcher.hasMatch("http://another.com/banner321")); + EXPECT_FALSE(matcher.hasMatch("http://another.com/banners")); + +} + +int main(int argc, char **argv) { + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/lib/urlfilter/urlfilter.h b/lib/urlfilter/urlfilter.h new file mode 100644 index 0000000..e15122a --- /dev/null +++ b/lib/urlfilter/urlfilter.h @@ -0,0 +1,43 @@ +/* + * This file is part of smolbote. It's copyrighted by the contributors recorded + * in the version control history of the file, available from its original + * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote + * + * SPDX-License-Identifier: GPL-3.0 + */ + +#include <QUrl> +#include <QString> +#include <utility> +#include <QWebEngineUrlRequestInfo> + +#ifndef SMOLBOTE_URLFILTER_FILTER +#define SMOLBOTE_URLFILTER_FILTER + +class UrlFilter +{ +public: + enum MatchResult { + NotMatched, + Allow, + Block, + Redirect + }; + + enum MatchType { + InvalidMatch, + RegularExpressionMatch, + StringContains, + StringStartsWith, + StringEndsWith, + StringEquals, + DomainMatch + }; + + virtual ~UrlFilter() = default; + + virtual QString metadata(const QString &key) const = 0; + virtual std::pair<MatchResult, QString> match(const QUrl &firstParty, const QUrl &requestUrl, QWebEngineUrlRequestInfo::ResourceType type) const = 0; +}; + +#endif // SMOLBOTE_URLFILTER_FILTER diff --git a/meson.build b/meson.build index d903910..1d333ca 100644 --- a/meson.build +++ b/meson.build @@ -72,6 +72,8 @@ interfaces_moc = qt5.preprocess( dependencies: dep_qt5 ) +dep_gtest = dependency('gtest', required: get_option('testing')) + subdir('lib/about') subdir('lib/addressbar') subdir('lib/bookmarks') @@ -89,7 +91,3 @@ subdir('doc') subdir('plugins/ConfigurationEditor') subdir('plugins/ProfileEditor') -if get_option('testing').enabled() - subdir('test') -endif - diff --git a/src/browser.cpp b/src/browser.cpp index 42bbc5d..3a23eeb 100644 --- a/src/browser.cpp +++ b/src/browser.cpp @@ -18,7 +18,6 @@ #include "profilemanager.h" #include "subwindow/subwindow.h" #include "util.h" -#include "webengine/filter.h" #include "webengine/urlinterceptor.h" #include "webprofile.h" #include <QAction> @@ -35,6 +34,9 @@ #include <version.h> #include "mainwindow/menubar.h" #include "webengine/webview.h" +#include "urlfilter.h" +#include "adblock/adblocklist.h" +#include "hostlist/hostlist.h" Browser::Browser(int &argc, char *argv[], bool allowSecondary) : SingleApplication(argc, argv, allowSecondary, SingleApplication::User | SingleApplication::SecondaryNotification | SingleApplication::ExcludeAppVersion) @@ -99,7 +101,16 @@ QPair<QString, Profile *> Browser::loadProfile(const QString &id, bool isOffTheR profile = m_profileManager->createProfile(id, isOffTheRecord); } connect(profile, &WebProfile::downloadRequested, m_downloads.get(), &DownloadsWidget::addDownload); - auto *interceptor = new UrlRequestInterceptor(m_urlFilter.get(), profile, profile); + auto *interceptor = new UrlRequestInterceptor(profile, profile); + for(UrlFilter *filter : m_filters) { + interceptor->addFilter(filter); + } + const auto headers = m_config->value<QStringList>("filter.header").value_or(QStringList()); + for(const QString &header : headers) { + const auto h = header.split(QLatin1Literal(":")); + if(h.length() == 2) + interceptor->addHttpHeader(h.at(0).toLatin1(), h.at(1).toLatin1()); + } profile->setRequestInterceptor(interceptor); return QPair<QString, WebProfile *>(m_profileManager->id(profile), profile); @@ -164,7 +175,20 @@ void Browser::setup(QVector<QPluginLoader *> plugins) // downloads m_downloads = std::make_unique<DownloadsWidget>(m_config->value<QString>("downloads.path").value()); // url request filter - m_urlFilter = std::make_unique<Filter>(m_config); + for(const QString &hostlist : Util::files(m_config->value<QString>("filter.hosts").value_or(QString()))) { + QFile f(hostlist); + if(f.open(QIODevice::ReadOnly | QIODevice::Text)) { + m_filters.append(new HostList(&f)); + f.close(); + } + } + for(const QString &adblock : Util::files(m_config->value<QString>("filter.adblock").value_or(QString()))) { + QFile f(adblock); + if(f.open(QIODevice::ReadOnly | QIODevice::Text)) { + m_filters.append(new AdBlockList(&f)); + f.close(); + } + } // cookie request filter // load profiles diff --git a/src/browser.h b/src/browser.h index 53ee521..8a40152 100644 --- a/src/browser.h +++ b/src/browser.h @@ -19,10 +19,10 @@ #include <QMenu> #include <QPluginLoader> +class UrlFilter; class Configuration; class BookmarksWidget; class DownloadsWidget; -class Filter; class MainWindow; class ProfileManager; class Browser : public SingleApplication, public BrowserInterface @@ -91,7 +91,7 @@ private: std::shared_ptr<BookmarksWidget> m_bookmarks; std::unique_ptr<DownloadsWidget> m_downloads; ProfileManager *m_profileManager; - std::unique_ptr<Filter> m_urlFilter; + QVector<UrlFilter *> m_filters; QVector<MainWindow *> m_windows; QVector<PluginInfo*> m_plugins; diff --git a/src/meson.build b/src/meson.build index f07a2ec..fb338d8 100644 --- a/src/meson.build +++ b/src/meson.build @@ -4,7 +4,7 @@ poi_moc = qt5.preprocess( 'mainwindow/mainwindow.h', 'mainwindow/menubar.h', 'mainwindow/widgets/dockwidget.h', 'mainwindow/widgets/menusearch.h', 'mainwindow/widgets/navigationbar.h', 'mainwindow/widgets/searchform.h', 'session/savesessiondialog.h', 'session/sessiondialog.h', 'session/sessionform.h', 'subwindow/subwindow.h', 'subwindow/tabwidget.h', - 'webengine/filter.h', 'webengine/urlinterceptor.h', 'webengine/webpage.h', 'webengine/webview.h'], + 'webengine/urlinterceptor.h', 'webengine/webpage.h', 'webengine/webview.h'], ui_files: ['mainwindow/widgets/searchform.ui', 'session/savesessiondialog.ui', 'session/sessiondialog.ui', 'session/sessionform.ui'], qresources: '../data/resources.qrc', rcc_extra_arguments: ['--format-version=1'], @@ -35,7 +35,6 @@ poi = executable(get_option('poiName'), install: true, 'subwindow/subwindow.cpp', 'subwindow/tabwidget.cpp', - 'webengine/filter.cpp', 'webengine/urlinterceptor.cpp', 'webengine/webpage.cpp', 'webengine/webview.cpp', diff --git a/src/webengine/filter.cpp b/src/webengine/filter.cpp deleted file mode 100644 index f1a38af..0000000 --- a/src/webengine/filter.cpp +++ /dev/null @@ -1,70 +0,0 @@ -/* - * This file is part of smolbote. It's copyrighted by the contributors recorded - * in the version control history of the file, available from its original - * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote - * - * SPDX-License-Identifier: GPL-3.0 - */ - -#include "filter.h" -#include "configuration.h" -#include "urlinterceptor.h" -#include "util.h" -#include <QDir> -#include <QJsonArray> -#include <QJsonDocument> -#include <QTextStream> - -Filter::Filter::Filter(const std::unique_ptr<Configuration> &config, QObject *parent) - : QObject(parent) -{ - // parse headers - if(config->exists("filter.header")) { - const auto headers = config->value<QStringList>("filter.header").value(); - for(const QString header : headers) { - const auto list = header.split(QLatin1Literal(":")); - if(list.length() == 2) - m_headers.insert(list.at(0).toLatin1(), list.at(1).toLatin1()); - } -#ifdef QT_DEBUG - qDebug("Added %i custom http headers", m_headers.size()); -#endif - } - - const QStringList hostfiles = Util::files(config->value<QString>("filter.hosts").value()); - //qDebug("filter.path=[%s]", qUtf8Printable(config->value<QString>("filter.hosts").value())); - for(const QString &hostfile : hostfiles) { - QFile f(hostfile); - if(f.open(QIODevice::ReadOnly | QIODevice::Text)) { -#ifdef QT_DEBUG - qDebug("Loading hostlist filters [%s]", qUtf8Printable(hostfile)); -#endif - loadHostlist(f, &filters); - f.close(); - } - } -} - -void Filter::filterRequest(QWebEngineUrlRequestInfo &info) const -{ - auto matches = filters.match(info.firstPartyUrl().toString(), info.requestUrl().toString()); - for(const auto &rule : matches) { - switch(rule->action().first) { - case FilterLeaf::NotMatched: -#ifdef QT_DEBUG - qDebug("Paradoxical match: request matched, but not matched."); - qDebug(" - %s", qUtf8Printable(info.requestUrl().toString())); -#endif - break; - case FilterLeaf::Block: - //qDebug("block %s", qUtf8Printable(info.requestUrl().toString())); - info.block(true); - break; - case FilterLeaf::Allow: - info.block(false); - break; - //case FilterLeaf::Redirect: - // break; - } - } -} diff --git a/src/webengine/filter.h b/src/webengine/filter.h deleted file mode 100644 index c49bed9..0000000 --- a/src/webengine/filter.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * This file is part of smolbote. It's copyrighted by the contributors recorded - * in the version control history of the file, available from its original - * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote - * - * SPDX-License-Identifier: GPL-3.0 - */ - -#ifndef SMOLBOTE_FILTER_H -#define SMOLBOTE_FILTER_H - -#include <QByteArray> -#include <QMap> -#include <QVector> -#include <optional> -#include <memory> -#include "filtertree.h" - -class Configuration; -class Filter : public QObject -{ - Q_OBJECT -public: - struct HostRule { - bool isBlocking; - }; - - explicit Filter(const std::unique_ptr<Configuration> &config, QObject *parent = nullptr); - ~Filter() override = default; - - void filterRequest(QWebEngineUrlRequestInfo &info) const; - - const QMap<QByteArray, QByteArray> headers() const - { - return qAsConst(m_headers); - } - -private: - FilterTree filters; - QMap<QByteArray, QByteArray> m_headers; -}; - -#endif // SMOLBOTE_FILTER_H diff --git a/src/webengine/urlinterceptor.cpp b/src/webengine/urlinterceptor.cpp index 7e5630f..490dea6 100644 --- a/src/webengine/urlinterceptor.cpp +++ b/src/webengine/urlinterceptor.cpp @@ -7,36 +7,61 @@ */ #include "urlinterceptor.h" -#include "formats/adblockrule.h" -#include <QDir> -#include <QJsonArray> -#include <QJsonDocument> -#include <QTextStream> -#include <boost/algorithm/string.hpp> -#include "configuration.h" -#include "filter.h" #include "webprofile.h" +#include "urlfilter.h" // test DNT on https://browserleaks.com/donottrack -UrlRequestInterceptor::UrlRequestInterceptor(Filter* filter, WebProfile* profile, QObject* parent) +UrlRequestInterceptor::UrlRequestInterceptor(WebProfile* profile, QObject* parent) : QWebEngineUrlRequestInterceptor(parent) { - Q_CHECK_PTR(filter); - m_filter = filter; Q_CHECK_PTR(profile); m_profile = profile; } +void UrlRequestInterceptor::addHttpHeader(const QByteArray &key, const QByteArray &value) +{ + headers.append(qMakePair(key, value)); +} + +void UrlRequestInterceptor::addFilter(UrlFilter *filter) +{ + if(filter != nullptr) + filters.append(filter); +} +void UrlRequestInterceptor::removeFilter(UrlFilter *filter) +{ + if(filter != nullptr) + filters.removeOne(filter); +} + void UrlRequestInterceptor::interceptRequest(QWebEngineUrlRequestInfo &info) { - m_filter->filterRequest(info); + for(const auto *filter : filters) { + const auto match = filter->match(info.firstPartyUrl(), info.requestUrl(), info.resourceType()); + + // skip if no match + if(match.first == UrlFilter::NotMatched) + continue; + + else { + if(match.first == UrlFilter::Allow) + info.block(false); + else if(match.first == UrlFilter::Block) + info.block(true); + else if(match.first == UrlFilter::Redirect) + info.redirect(QUrl::fromUserInput(match.second)); + // we found a match, skip the rest + break; + } + } // set headers - for(auto i = m_filter->headers().constBegin(); i != m_filter->headers().constEnd(); ++i) { - info.setHttpHeader(i.key(), i.value()); + for(const auto &header : headers) { + info.setHttpHeader(header.first, header.second); } for(auto i = m_profile->headers().constBegin(); i != m_profile->headers().constEnd(); ++i) { info.setHttpHeader(i.key(), i.value()); } } + diff --git a/src/webengine/urlinterceptor.h b/src/webengine/urlinterceptor.h index 62fd683..4909586 100644 --- a/src/webengine/urlinterceptor.h +++ b/src/webengine/urlinterceptor.h @@ -9,26 +9,30 @@ #ifndef SMOLBOTE_URLREQUESTINTERCEPTOR_H #define SMOLBOTE_URLREQUESTINTERCEPTOR_H -#include <QByteArray> #include <QVector> #include <QWebEngineUrlRequestInterceptor> -#include <memory> +#include <QByteArray> -class Filter; +class UrlFilter; class WebProfile; -class Configuration; class UrlRequestInterceptor : public QWebEngineUrlRequestInterceptor { Q_OBJECT public: - explicit UrlRequestInterceptor(Filter *filter, WebProfile *profile, QObject *parent = nullptr); + explicit UrlRequestInterceptor(WebProfile *profile, QObject *parent = nullptr); ~UrlRequestInterceptor() override = default; + void addHttpHeader(const QByteArray &key, const QByteArray &value); + + void addFilter(UrlFilter *filter); + void removeFilter(UrlFilter *filter); + void interceptRequest(QWebEngineUrlRequestInfo &info) override; private: - Filter *m_filter; WebProfile *m_profile; + QVector<QPair<QByteArray, QByteArray>> headers; + QVector<UrlFilter*> filters; }; #endif // SMOLBOTE_URLREQUESTINTERCEPTOR_H diff --git a/test/adblock.txt b/test/adblock.txt deleted file mode 100644 index cd284e8..0000000 --- a/test/adblock.txt +++ /dev/null @@ -1,4 +0,0 @@ -/banner/*/img^ -||ads.example.com^ -|http://example.com/| -/banner\d+/ diff --git a/test/adblock/adblocktest.cpp b/test/adblock/adblocktest.cpp deleted file mode 100644 index bbcaf0e..0000000 --- a/test/adblock/adblocktest.cpp +++ /dev/null @@ -1,83 +0,0 @@ -#include "formats/adblockrule.h" -#include "formats/adblockrule_parse.h" -#include <gtest/gtest.h> - -TEST(Matcher, StringContains) { - ContentsMatcher<QStringMatcher> matcher("spam-pattern", FilterLeaf::StringContains); - EXPECT_TRUE(matcher.hasMatch("this string contains a spam-pattern")); - EXPECT_FALSE(matcher.hasMatch("this string does not contain the pattern")); -} - -TEST(Matcher, StringStartsWith) { - ContentsMatcher<QStringMatcher> matcher("beginning", FilterLeaf::StringStartsWith); - EXPECT_TRUE(matcher.hasMatch("beginning this string is the pattern")); - EXPECT_FALSE(matcher.hasMatch("ending this string is the pattern, the word beginning")); - EXPECT_FALSE(matcher.hasMatch("this would be a string where the pattern cannot be found")); -} - -TEST(Matcher, StringEndsWith) { - ContentsMatcher<QStringMatcher> matcher("ending", FilterLeaf::StringEndsWith); - EXPECT_TRUE(matcher.hasMatch("this string has the proper ending")); - EXPECT_FALSE(matcher.hasMatch("and this string doesn't")); -} - -TEST(Matcher, StringEquals) { - ContentsMatcher<QStringMatcher> matcher("string-to-match", FilterLeaf::StringEquals); - EXPECT_TRUE(matcher.hasMatch("string-to-match")); - EXPECT_FALSE(matcher.hasMatch("same-len-string")); - EXPECT_FALSE(matcher.hasMatch("not the string-to-match")); -} - -TEST(Matcher, RegularExpression) { - ContentsMatcher<QRegularExpression> matcher("banner\\d+", FilterLeaf::RegularExpressionMatch); - EXPECT_TRUE(matcher.hasMatch("http://another.com/banner123")); - EXPECT_TRUE(matcher.hasMatch("http://another.com/banner321")); - EXPECT_FALSE(matcher.hasMatch("http://another.com/banners")); - -} - -TEST(AdBlockRule, SimpleRule) { - AdBlockRule *rule = parseRule_adblock("/spamdomain/$domain=spamdomain.com,image"); - EXPECT_TRUE(rule->match(QUrl("subdomain.spamdomain.com"))); -// QCOMPARE(rule->action().first == FilterLeaf::Block, true); -// QCOMPARE(rule->option(QWebEngineUrlRequestInfo::ResourceTypeImage).value(), true); -} - -TEST(AdBlockRule, AddressPart) { - AdBlockRule *rule = parseRule_adblock("/banner/*/img^"); - EXPECT_TRUE(rule->match(QUrl("http://example.com/banner/foo/img"))); - EXPECT_TRUE(rule->match(QUrl("http://example.com/banner/foo/bar/img?param"))); - EXPECT_TRUE(rule->match(QUrl("http://example.com/banner//img/foo"))); - EXPECT_FALSE(rule->match(QUrl("http://example.com/banner/img"))); - EXPECT_FALSE(rule->match(QUrl("http://example.com/banner/foo/imgraph"))); - EXPECT_FALSE(rule->match(QUrl("http://example.com/banner/foo/img.gif"))); -} - -TEST(AdBlockRule, Domain){ - AdBlockRule *rule = parseRule_adblock("||ads.example.com^"); - EXPECT_TRUE(rule->match(QUrl("http://ads.example.com/foo.gif"))); - EXPECT_TRUE(rule->match(QUrl("http://server1.ads.example.com/foo.gif"))); - EXPECT_TRUE(rule->match(QUrl("https://ads.example.com:8000/"))); - EXPECT_FALSE(rule->match(QUrl("http://ads.example.com.ua/foo.gif"))); - EXPECT_FALSE(rule->match(QUrl("http://example.com/redirect/http://ads.example.com/"))); -} - - -TEST(AdBlockRule, ExactAddress){ - AdBlockRule *rule = parseRule_adblock("|http://example.com/|"); - EXPECT_TRUE(rule->match(QUrl("http://example.com/"))); - EXPECT_FALSE(rule->match(QUrl("http://example.com/foo.gif"))); - EXPECT_FALSE(rule->match(QUrl("http://example.info/redirect/http://example.com/"))); -} - -TEST(AdBlockRule, RegularExpression) { - AdBlockRule *rule = parseRule_adblock("/banner\\d+/"); - EXPECT_TRUE(rule->match(QUrl("http://another.com/banner123"))); - EXPECT_TRUE(rule->match(QUrl("http://another.com/banner321"))); - EXPECT_FALSE(rule->match(QUrl("http://another.com/banners"))); -} - -int main(int argc, char **argv) { - testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/test/hostlist/hostlisttest.cpp b/test/hostlist/hostlisttest.cpp deleted file mode 100644 index 7f5c954..0000000 --- a/test/hostlist/hostlisttest.cpp +++ /dev/null @@ -1,61 +0,0 @@ -#include "hostlisttest.h" -#include <QtTest/QtTest> - -void HostlistTest::parseList() -{ - // load filters - QFile hostlist("hostlist.txt"); - QCOMPARE(hostlist.open(QIODevice::ReadOnly | QIODevice::Text), true); - QCOMPARE(loadHostlist(hostlist, &tree), true); - - // hostlist filters are applied to all domains, so there should only be one branch - QCOMPARE(tree.branches().length(), 1); -} - -void HostlistTest::checkRules_data() -{ - QTest::addColumn<QUrl>("domain"); - QTest::addColumn<QUrl>("request"); - QTest::addColumn<int>("matches"); - QTest::addColumn<FilterLeaf::Action>("action"); - - const QVector<QUrl> domains{ QUrl(), QUrl::fromUserInput("testdomain.host") }; - for(const QUrl &domain : domains) { - QTest::newRow("block (1 domain per line)") << domain << QUrl::fromUserInput("blockeddomain.com") << 1 << FilterLeaf::Block; - QTest::newRow("block (2 domains per line #1)") << domain << QUrl::fromUserInput("blockeddomain.first") << 1 << FilterLeaf::Block; - QTest::newRow("block (2 domains per line #2)") << domain << QUrl::fromUserInput("blockeddomain.second") << 1 << FilterLeaf::Block; - QTest::newRow("redirect") << domain << QUrl::fromUserInput("localhost.localdomain") << 1 << FilterLeaf::Redirect; - QTest::newRow("domain not in hostlist") << domain << QUrl::fromUserInput("other.domain") << 0 << FilterLeaf::NotMatched; - } -} - -void HostlistTest::checkRules() -{ - QFETCH(QUrl, domain); - QFETCH(QUrl, request); - QFETCH(int, matches); - QFETCH(FilterLeaf::Action, action); - - auto result = tree.match(domain, request); - QCOMPARE(result.length(), matches); - if(matches > 0) - QCOMPARE(result.constFirst()->action().first, action); - if(action == FilterLeaf::Redirect) - QCOMPARE(result.constFirst()->action().second, QLatin1Literal("127.0.0.1")); -} - -void HostlistTest::benchmark_parse() -{ - QFile hostlist("hostlist-benchmark.txt"); - if(hostlist.open(QIODevice::ReadOnly | QIODevice::Text)) { - FilterTree benchmarkTree; - bool loaded; - QBENCHMARK { - loaded = loadHostlist(hostlist, &benchmarkTree); - } - QCOMPARE(loaded, true); - hostlist.close(); - } -} - -QTEST_GUILESS_MAIN(HostlistTest) diff --git a/test/hostlist/hostlisttest.h b/test/hostlist/hostlisttest.h deleted file mode 100644 index 96051a9..0000000 --- a/test/hostlist/hostlisttest.h +++ /dev/null @@ -1,23 +0,0 @@ -#ifndef HOSTLIST_TEST -#define HOSTLIST_TEST - -#include "filtertree.h" -#include <QObject> - -class HostlistTest : public QObject -{ - Q_OBJECT - -private slots: - void parseList(); - - void checkRules_data(); - void checkRules(); - - void benchmark_parse(); - -private: - FilterTree tree; -}; - -#endif diff --git a/test/matcherbenchmark/matcherbenchmark.cpp b/test/matcherbenchmark/matcherbenchmark.cpp deleted file mode 100644 index 84406d5..0000000 --- a/test/matcherbenchmark/matcherbenchmark.cpp +++ /dev/null @@ -1,91 +0,0 @@ -#include "matcherbenchmark.h" -#include <string> -#include <regex> -#include <regex.h> -#include <QtTest/QTest> -#include <QRegExp> -#include <QRegularExpression> -#include <QStringMatcher> -#include <boost/regex.hpp> - -void MatcherBenchmark::qstringcontains() -{ - const QString pattern("spamdomain"); - const QString request("subdomain.spamdomain.com"); - - QCOMPARE(request.contains(pattern), true); - QBENCHMARK { - request.contains(pattern); - } -} - -void MatcherBenchmark::qstringmatcher() -{ - const QStringMatcher pattern("spamdomain"); - const QString request("subdomain.spamdomain.com"); - - QCOMPARE(pattern.indexIn(request) != -1, true); - QBENCHMARK { - pattern.indexIn(request); - } -} - -void MatcherBenchmark::qregexp() -{ - const QRegExp pattern("spamdomain"); - const QString request("subdomain.spamdomain.com"); - - QCOMPARE(pattern.indexIn(request) != -1, true); - QBENCHMARK { - pattern.indexIn(request); - } -} - -void MatcherBenchmark::qregularexpressionmatch() -{ - const QRegularExpression pattern("spamdomain"); - const QString request("subdomain.spamdomain.com"); - - QCOMPARE(pattern.match(request).hasMatch(), true); - QBENCHMARK { - pattern.match(request).hasMatch(); - } -} - -void MatcherBenchmark::stdregex() -{ - const std::regex pattern("spamdomain"); - const std::string request("subdomain.spamdomain.com"); - - QCOMPARE(std::regex_search(request, pattern), true); - QBENCHMARK { - std::regex_search(request, pattern); - } -} - -void MatcherBenchmark::cregex() -{ - regex_t pattern; - QCOMPARE(regcomp(&pattern, "spamdomain", 0), 0); - const std::string request("subdomain.spamdomain.com"); - - QCOMPARE(regexec(&pattern, request.c_str(), 0, NULL, 0), false); - QBENCHMARK { - regexec(&pattern, request.c_str(), 0, NULL, 0); - } - - regfree(&pattern); -} - -void MatcherBenchmark::boostregex() -{ - const boost::regex pattern("spamdomain"); - const std::string request("subdomain.spamdomain.com"); - - QCOMPARE(boost::regex_search(request, pattern), true); - QBENCHMARK { - boost::regex_search(request, pattern); - } -} - -QTEST_GUILESS_MAIN(MatcherBenchmark) diff --git a/test/matcherbenchmark/matcherbenchmark.h b/test/matcherbenchmark/matcherbenchmark.h deleted file mode 100644 index deb4495..0000000 --- a/test/matcherbenchmark/matcherbenchmark.h +++ /dev/null @@ -1,20 +0,0 @@ -#ifndef MATCHER_BENCHMARK -#define MATCHER_BENCHMARK - -#include <QObject> - -class MatcherBenchmark : public QObject -{ - Q_OBJECT - -private slots: - void qstringcontains(); - void qstringmatcher(); - void qregexp(); - void qregularexpressionmatch(); - void stdregex(); - void cregex(); - void boostregex(); -}; - -#endif diff --git a/test/meson.build b/test/meson.build deleted file mode 100644 index 75e38ed..0000000 --- a/test/meson.build +++ /dev/null @@ -1,34 +0,0 @@ -dep_gtest = dependency('gtest') - -test('urlfilter-AdBlockList', executable('AdBlockList', - dependencies: [dep_gtest, dep_qt5, dep_urlfilter], - sources: ['urlfilter/urlfiltertest.cpp'] -)) - -# Adblock parsing test -adblock = executable('AdblockTest', - dependencies: [dep_gtest, dep_qt5, dep_urlfilter], - sources: ['adblock/adblocktest.cpp'] -) -test('urlfilter-adblock', adblock, workdir: meson.current_source_dir()) - -# Hostlist parsing test -hostlist = executable('HostlistTest', - dependencies: [dep_qt5, dep_urlfilter], - sources: ['hostlist/hostlisttest.cpp', qt5.preprocess(moc_headers: 'hostlist/hostlisttest.h', dependencies: dep_qt5)] -) -test('urlfilter-hostlist', hostlist, workdir: meson.current_source_dir()) - -# matching algorithms benchmark -matcherbenchmark = executable('MatcherBenchmark', - dependencies: [dep_qt5, dependency('boost', modules: 'regex')], - sources: ['matcherbenchmark/matcherbenchmark.cpp', qt5.preprocess(moc_headers: 'matcherbenchmark/matcherbenchmark.h', dependencies: dep_qt5)] -) - -# SingleApplication issue#40 test app -singleapp = executable('SingleApplication', - cpp_args: ['-DQAPPLICATION_CLASS=QApplication'], - dependencies: [dep_qt5, dep_SingleApplication], - sources: ['singleapplication-40/main.cpp'] -) - diff --git a/test/urlfilter/urlfiltertest.cpp b/test/urlfilter/urlfiltertest.cpp deleted file mode 100644 index f6cdbd4..0000000 --- a/test/urlfilter/urlfiltertest.cpp +++ /dev/null @@ -1,61 +0,0 @@ -#include "formats/adblockrule.h" -#include "formats/adblockrule_parse.h" -#include "formats/adblocklist.h" -#include <gtest/gtest.h> - -AdBlockList list; - -TEST(AdBlockList, MetaData) { - EXPECT_STREQ(qUtf8Printable(list.metadata("Homepage")), "http://example.com/"); - EXPECT_STREQ(qUtf8Printable(list.metadata("Title")), "FooList"); - EXPECT_STREQ(qUtf8Printable(list.metadata("Expires")), "5 days"); - EXPECT_STREQ(qUtf8Printable(list.metadata("Redirect")), "http://example.com/list.txt"); - EXPECT_STREQ(qUtf8Printable(list.metadata("Version")), "1234"); -} - -TEST(AdBlockList, Contains) { - EXPECT_TRUE(list.match(QUrl(), QUrl("http://example.com/banner/foo.png"))); - EXPECT_FALSE(list.match(QUrl(), QUrl("http://example.com/banner/foo/img"))); - -// AdBlockRule *rule = parseRule_adblock("/banner/*/img^"); -// EXPECT_TRUE(rule->match(QUrl("http://example.com/banner/foo/img"))); -// EXPECT_TRUE(rule->match(QUrl("http://example.com/banner/foo/bar/img?param"))); -// EXPECT_TRUE(rule->match(QUrl("http://example.com/banner//img/foo"))); -// EXPECT_FALSE(rule->match(QUrl("http://example.com/banner/img"))); -// EXPECT_FALSE(rule->match(QUrl("http://example.com/banner/foo/imgraph"))); -// EXPECT_FALSE(rule->match(QUrl("http://example.com/banner/foo/img.gif"))); -} - -TEST(AdBlockList, ContainsWildcard) { - EXPECT_TRUE(list.match(QUrl(), QUrl("http://example.com/banner/ads/img.png"))); -} - -TEST(AdBlockList, Domain) { - EXPECT_TRUE(list.match(QUrl(), QUrl("http://ads.example.com/foo.gif"))); - EXPECT_TRUE(list.match(QUrl(), QUrl("http://server1.ads.example.com/foo.gif"))); - EXPECT_TRUE(list.match(QUrl(), QUrl("https://ads.example.com:8000/"))); - EXPECT_FALSE(list.match(QUrl(), QUrl("http://ads.example.com.ua/foo.gif"))); - EXPECT_FALSE(list.match(QUrl(), QUrl("http://example.com/redirect/http://ads.example.com/"))); -} - -TEST(AdBlockList, RegularExpression) { - EXPECT_TRUE(list.match(QUrl(), QUrl("http://example.com/banner123"))); - EXPECT_TRUE(list.match(QUrl(), QUrl("http://example.com/banner321"))); - EXPECT_FALSE(list.match(QUrl(), QUrl("http://example.com/banners"))); -} - -int main(int argc, char **argv) { - list.parseLine("! Homepage: http://example.com/"); - list.parseLine("! Title: FooList"); - list.parseLine("! Expires: 5 days"); - list.parseLine("! Redirect: http://example.com/list.txt"); - list.parseLine("! Version: 1234"); - - EXPECT_TRUE(list.parseLine("/banner/foo.png")); - EXPECT_TRUE(list.parseLine("/banner/*/img.png")); - EXPECT_TRUE(list.parseLine("||ads.example.com^")); - EXPECT_TRUE(list.parseLine("/banner\\d+/")); - - testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} |