diff options
Diffstat (limited to 'lib/urlfilter/formats')
-rw-r--r-- | lib/urlfilter/formats/adblocklist.cpp | 95 | ||||
-rw-r--r-- | lib/urlfilter/formats/adblocklist.h | 32 | ||||
-rw-r--r-- | lib/urlfilter/formats/adblockrule.cpp | 63 | ||||
-rw-r--r-- | lib/urlfilter/formats/adblockrule.h | 113 | ||||
-rw-r--r-- | lib/urlfilter/formats/adblockrule_parse.cpp | 181 | ||||
-rw-r--r-- | lib/urlfilter/formats/adblockrule_parse.h | 17 | ||||
-rw-r--r-- | lib/urlfilter/formats/hostlistrule.cpp | 29 | ||||
-rw-r--r-- | lib/urlfilter/formats/hostlistrule.h | 27 |
8 files changed, 0 insertions, 557 deletions
diff --git a/lib/urlfilter/formats/adblocklist.cpp b/lib/urlfilter/formats/adblocklist.cpp deleted file mode 100644 index 772c252..0000000 --- a/lib/urlfilter/formats/adblocklist.cpp +++ /dev/null @@ -1,95 +0,0 @@ -#include "adblocklist.h" - -AdBlockList::AdBlockList() -{ -} - -QString AdBlockList::metadata(const QString &key) const -{ - return m_metadata.value(key, QString()); -} - -FilterLeaf::Action AdBlockList::match(const QUrl &firstParty, const QUrl &requestUrl, QWebEngineUrlRequestInfo::ResourceType type) const -{ - const QString request = requestUrl.toString(); - - for(auto &filter : m_rules) { - if(filter.matcher->hasMatch(request)) - return filter.action; - } - return FilterLeaf::NotMatched; -} - -bool AdBlockList::parseLine(const QString &line) -{ - // remove whitespace from start/end of the line - QString parsedLine = line.trimmed(); - - // check if the line is empty - if(parsedLine.isEmpty()) - return false; - - // parse comment - if(parsedLine.startsWith(QLatin1Literal("!"))) - return parseComment(parsedLine); - - Filter filter; - - // exception rules - if(parsedLine.startsWith(QLatin1Literal("@@"))) { - filter.action = FilterLeaf::Allow; - parsedLine.remove(0, 2); - } - - // remove '*' at the beginning and the end - if(parsedLine.startsWith(QLatin1Literal("*"))) - parsedLine = parsedLine.mid(1); - if(parsedLine.endsWith(QLatin1Literal("*"))) - parsedLine.chop(1); - - if(parsedLine.startsWith(QLatin1Literal("/")) && parsedLine.endsWith(QLatin1Literal("/"))) { - // regular expression rule - parsedLine = parsedLine.mid(1, parsedLine.length() - 2); - filter.matcher = new ContentsMatcher<QRegularExpression>(parsedLine, FilterLeaf::RegularExpressionMatch); - - } else if(parsedLine.contains(QLatin1Literal("*"))) { - parsedLine = QRegularExpression::wildcardToRegularExpression(parsedLine); - filter.matcher = new ContentsMatcher<QRegularExpression>(parsedLine, FilterLeaf::RegularExpressionMatch); - - } else if(parsedLine.startsWith(QLatin1Literal("||")) && parsedLine.endsWith(QLatin1Literal("^"))) { -// matchType = FilterLeaf::DomainMatch; - parsedLine = parsedLine.mid(2, parsedLine.length() - 3); - filter.matcher = new ContentsMatcher<QString>(parsedLine, FilterLeaf::DomainMatch); - - } else if(parsedLine.startsWith(QLatin1Literal("|")) && parsedLine.endsWith(QLatin1Literal("|"))) { - // string equals rule - parsedLine = parsedLine.mid(1, parsedLine.length() - 2); - filter.matcher = new ContentsMatcher<QStringMatcher>(parsedLine, FilterLeaf::StringEquals); - - } else if(parsedLine.startsWith(QLatin1Literal("||"))) { - // string starts with rule - parsedLine = parsedLine.mid(2); - filter.matcher = new ContentsMatcher<QStringMatcher>(parsedLine, FilterLeaf::StringStartsWith); - - } else if(parsedLine.endsWith(QLatin1Literal("|"))) { - // string ends with rule - parsedLine.chop(1); - filter.matcher = new ContentsMatcher<QStringMatcher>(parsedLine, FilterLeaf::StringEndsWith); - - } else { - // generic contains rule - filter.matcher = new ContentsMatcher<QStringMatcher>(parsedLine, FilterLeaf::StringContains); - } - - - Q_CHECK_PTR(filter.matcher); - m_rules.emplace_back(std::move(filter)); - return true; -} - -bool AdBlockList::parseComment(const QString &commentLine) -{ - const QStringList comment = commentLine.mid(1).split(QLatin1Literal(": ")); - m_metadata[comment.at(0).trimmed()] = comment.at(1).trimmed(); - return true; -} diff --git a/lib/urlfilter/formats/adblocklist.h b/lib/urlfilter/formats/adblocklist.h deleted file mode 100644 index 34a2120..0000000 --- a/lib/urlfilter/formats/adblocklist.h +++ /dev/null @@ -1,32 +0,0 @@ -#ifndef ADBLOCKLIST_H -#define ADBLOCKLIST_H - -#include <QHash> -#include "adblockrule.h" - -class AdBlockList -{ -public: - AdBlockList(); - - QString metadata(const QString &key) const; - FilterLeaf::Action match(const QUrl &firstParty, const QUrl &requestUrl, QWebEngineUrlRequestInfo::ResourceType type = QWebEngineUrlRequestInfo::ResourceTypeUnknown) const; - - bool parseLine(const QString &line); - -protected: - bool parseComment(const QString &commentLine); - -private: - struct Filter - { - FilterLeaf::Action action = FilterLeaf::Block; - Matcher *matcher; - }; - - QHash<QString, QString> m_metadata; - //QMap<QString, Filter> m_rules; - std::vector<Filter> m_rules; -}; - -#endif // ADBLOCKLIST_H diff --git a/lib/urlfilter/formats/adblockrule.cpp b/lib/urlfilter/formats/adblockrule.cpp deleted file mode 100644 index 60e817f..0000000 --- a/lib/urlfilter/formats/adblockrule.cpp +++ /dev/null @@ -1,63 +0,0 @@ -/* - * This file is part of smolbote. It's copyrighted by the contributors recorded - * in the version control history of the file, available from its original - * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote - * - * SPDX-License-Identifier: GPL-3.0 - */ - -#include "adblockrule.h" -#include <QRegExp> -#include <QStringMatcher> - -AdBlockRule::AdBlockRule(FilterLeaf::UrlMatchType matchType, const QString &filter, FilterLeaf::Action action) -{ - this->matchType = matchType; - this->m_request = filter; - this->m_isBlocking = (action == FilterLeaf::Block); - //matcher.setPattern(filter); - if(matchType == FilterLeaf::RegularExpressionMatch) - regExp = new QRegExp(filter); - else - stringMatcher = new QStringMatcher(filter); -} - -void AdBlockRule::mergeOptions(const QHash<QWebEngineUrlRequestInfo::ResourceType, bool> &options) -{ - this->resourceTypeOptions.unite(options); -} - -bool AdBlockRule::match(const QUrl &requestUrl) const -{ - switch(matchType) { - case FilterLeaf::RegularExpressionMatch: - return (regExp->indexIn(requestUrl.toString()) != -1); - default: - return false; - } -} - -bool AdBlockRule::match(const QUrl &requestUrl, QWebEngineUrlRequestInfo::ResourceType type) const -{ - // if request is of the required type, or there are no types set (== apply to all requests) - if(this->resourceTypeOptions.contains(type) || this->resourceTypeOptions.isEmpty()) { - switch(matchType) { - case FilterLeaf::RegularExpressionMatch: - return (regExp->indexIn(requestUrl.toString()) != -1); - default: - qWarning("Match type not implemented, returning false!"); - return false; - } - } - - // request type is not matched - return false; -} - -std::pair<FilterLeaf::Action, QVariant> AdBlockRule::action() const -{ - if(m_isBlocking) - return std::make_pair(FilterLeaf::Block, QVariant()); - else - return std::make_pair(FilterLeaf::Allow, QVariant()); -} diff --git a/lib/urlfilter/formats/adblockrule.h b/lib/urlfilter/formats/adblockrule.h deleted file mode 100644 index 6be3cdf..0000000 --- a/lib/urlfilter/formats/adblockrule.h +++ /dev/null @@ -1,113 +0,0 @@ -/* - * This file is part of smolbote. It's copyrighted by the contributors recorded - * in the version control history of the file, available from its original - * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote - * - * SPDX-License-Identifier: GPL-3.0 - */ - -#ifndef SMOLBOTE_ADBLOCKRULE_H -#define SMOLBOTE_ADBLOCKRULE_H - -#include "../filterleaf.h" -#include <optional> -#include <QRegularExpression> -#include <QStringMatcher> - -class Matcher -{ -public: - virtual bool hasMatch(const QString &where) const = 0; -}; - -template <typename T> -class ContentsMatcher : public Matcher -{ -public: - ContentsMatcher(const QString &pattern, FilterLeaf::UrlMatchType matchType) - { - this->matchType = matchType; - patternLength = pattern.length(); - - - if constexpr(std::is_same_v<T, QRegularExpression>) { - matcher.setPatternOptions(matcher.patternOptions() | QRegularExpression::CaseInsensitiveOption); - matcher.setPattern(pattern); - } else if constexpr(std::is_same_v<T, QStringMatcher>) { - matcher.setCaseSensitivity(Qt::CaseInsensitive); - matcher.setPattern(pattern); - } else if constexpr(std::is_same_v<T, QString>) { - matcher = QUrl::fromUserInput(pattern).host(); -// qDebug("matcher: %s", qUtf8Printable(matcher)); - } - } - - bool hasMatch(const QString &where) const override - { - if constexpr(std::is_same_v<T, QStringMatcher>) { - switch (matchType) { - case FilterLeaf::InvalidMatch: - case FilterLeaf::RegularExpressionMatch: - case FilterLeaf::DomainMatch: - qWarning("ContentsMatcher is a String Matcher, but not doing string matching!"); - return false; - - case FilterLeaf::StringContains: - return (matcher.indexIn(where) != -1); - - case FilterLeaf::StringStartsWith: - return (matcher.indexIn(where) == 0); - - case FilterLeaf::StringEndsWith: - return (matcher.indexIn(where) == where.length() - patternLength); - - case FilterLeaf::StringEquals: - return (matcher.indexIn(where) == 0) && (patternLength == where.length()); - } - - } else if constexpr(std::is_same_v<T, QRegularExpression>) { - if(matchType != FilterLeaf::RegularExpressionMatch) - qWarning("ContentsMatcher is a regular expression, but not doing a regular expression match!"); - return matcher.match(where).hasMatch(); - } else if constexpr(std::is_same_v<T, QString>) { - // TODO: fix - if(matchType == FilterLeaf::DomainMatch) { -// qDebug("matching %s", qUtf8Printable(QUrl(where).host())); - return QUrl(where).host().endsWith(matcher); - } else - return matcher == where; - } else { - qWarning("Matcher has no backend, returning false"); - return false; - } - } - -private: - int patternLength; - T matcher; - FilterLeaf::UrlMatchType matchType; -}; - -class AdBlockRule : public FilterLeaf -{ -public: - explicit AdBlockRule(FilterLeaf::UrlMatchType matchType, const QString &filter, FilterLeaf::Action action); - ~AdBlockRule() - { - delete stringMatcher; - delete regExp; - }; - - void mergeOptions(const QHash<QWebEngineUrlRequestInfo::ResourceType, bool> &options); - - bool match(const QUrl &requestUrl) const override; - bool match(const QUrl &requestUrl, QWebEngineUrlRequestInfo::ResourceType type) const; - std::pair<FilterLeaf::Action, QVariant> action() const override; - -private: - /* Once C++20 comes out, perhaps this can be replaced with a concept template */ - QStringMatcher *stringMatcher = nullptr; - QRegExp *regExp = nullptr; -}; - -#endif // SMOLBOTE_ADBLOCKRULE_H diff --git a/lib/urlfilter/formats/adblockrule_parse.cpp b/lib/urlfilter/formats/adblockrule_parse.cpp deleted file mode 100644 index c01ddfd..0000000 --- a/lib/urlfilter/formats/adblockrule_parse.cpp +++ /dev/null @@ -1,181 +0,0 @@ -/* - * This file is part of smolbote. It's copyrighted by the contributors recorded - * in the version control history of the file, available from its original - * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote - * - * SPDX-License-Identifier: GPL-3.0 - */ - -#include "adblockrule.h" -#include "adblockrule_parse.h" - -// adblock format documentation -// https://adblockplus.org/filters - -// QString::mid(pos, len) const - Returns a string starting at the specified position index. -// QString::chop(len) - Removes n characters from the end of the string. -// QString::remove(pos, len) - Removes n characters from the string, starting at the given position index. -// QString::trimmed() const - Remove whitespace from start and end - -AdBlockRule *parseRule_adblock(const QString &filter) -{ - QString parsedLine = filter.trimmed(); - - // there is no rule, or it's a comment - if(parsedLine.isEmpty() || parsedLine.startsWith("!")) { - return nullptr; - } - - // css rule -> filterleaves cannot do element blocking - if(parsedLine.contains(QLatin1Literal("##")) || parsedLine.contains(QLatin1Literal("#@#"))) { - return nullptr; - } - - // exception rules - FilterLeaf::Action action = FilterLeaf::Block; - if(parsedLine.startsWith(QLatin1Literal("@@"))) { - action = FilterLeaf::Allow; - parsedLine.remove(0, 2); - } - - // parse options - QStringList enabledOn, disabledOn; - QHash<QWebEngineUrlRequestInfo::ResourceType, bool> optionsHash; - { - const int sepPos = parsedLine.indexOf(QLatin1Literal("$")); - if(sepPos != -1) { - const auto options = parsedLine.mid(sepPos + 1).split(QLatin1Literal(",")); - parsedLine = parsedLine.mid(0, sepPos); - - for(const QString &option : options) { - if(option.startsWith(QLatin1Literal("domain"))) { - const auto domainList = option.mid(7).split(QLatin1Literal("|")); - - for(const QString &domain : domainList) { - if(domain.startsWith(QLatin1Literal("~"))) { - disabledOn.append(domain.mid(1)); - } else { - enabledOn.append(domain); - } - } - } else { - const auto pair = parseOption(option); - if(pair) - optionsHash.insert(pair.value().first, pair.value().second); - } - } - } - } - - FilterLeaf::UrlMatchType matchType = FilterLeaf::InvalidMatch; - - if(parsedLine.startsWith(QLatin1Literal("/")) && parsedLine.endsWith(QLatin1Literal("/"))) { - // regular expression rule - matchType = FilterLeaf::RegularExpressionMatch; - parsedLine = parsedLine.mid(1, parsedLine.length() - 2); - - } else if(parsedLine.startsWith(QLatin1Literal("||")) && parsedLine.endsWith(QLatin1Literal("^"))) { - matchType = FilterLeaf::DomainMatch; - parsedLine = parsedLine.mid(2, parsedLine.length() - 3); - - } else if(parsedLine.startsWith(QLatin1Literal("|")) && parsedLine.endsWith(QLatin1Literal("|"))) { - // string equals rule - matchType = FilterLeaf::StringEquals; - parsedLine = parsedLine.mid(1, parsedLine.length() - 2); - - } else if(parsedLine.startsWith(QLatin1Literal("||"))) { - // string starts with rule - matchType = FilterLeaf::StringStartsWith; - parsedLine = parsedLine.mid(2); - - } else if(parsedLine.endsWith(QLatin1Literal("|"))) { - // string ends with rule - matchType = FilterLeaf::StringEndsWith; - parsedLine.chop(1); - - } else { - // generic contains rule - matchType = FilterLeaf::StringContains; - - // Basic filter rules can use wildcards, which were supported by QRegExp, - // but were deprecated in QRegularExpression. - - // remove beginning and ending wildcards - if(parsedLine.startsWith(QLatin1Literal("*"))) - parsedLine = parsedLine.mid(1); - - if(parsedLine.endsWith(QLatin1Literal("*"))) - parsedLine.chop(1); - - if(parsedLine.contains(QLatin1Literal("*")) || parsedLine.contains(QLatin1Literal("^"))) { - // check for wildcards and translate to regexp - // wildcard "*" - any number of characters - // separator "^" - end, ? or / - parsedLine.replace(QLatin1Literal("||"), QLatin1Literal("^\\w+://")); - parsedLine.replace(QLatin1Literal("|"), QLatin1Literal("\\|")); - parsedLine.replace(QLatin1Literal("*"), QLatin1Literal(".*")); - parsedLine.replace(QLatin1Literal("^"), QLatin1Literal("($|\\?|\\/)")); - - matchType = FilterLeaf::RegularExpressionMatch; - } - } - - AdBlockRule *rule = new AdBlockRule(matchType, parsedLine, action); - rule->mergeOptions(optionsHash); - return rule; -} - -std::optional<QPair<QWebEngineUrlRequestInfo::ResourceType, bool>> parseOption(const QString &option) -{ - const bool exception = !option.startsWith(QLatin1Literal("~")); - - if(option.endsWith(QLatin1Literal("script"))) { - // external scripts loaded via HTML script tag - return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeScript, exception); - - } else if(option.endsWith(QLatin1Literal("image"))) { - // regular images, typically loaded via HTML img tag - return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeImage, exception); - - } else if(option.endsWith(QLatin1Literal("stylesheet"))) { - // external CSS stylesheet files - return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeStylesheet, exception); - - } else if(option.endsWith(QLatin1Literal("object"))) { - // content handled by browser plugins, e.g. Flash or Java - return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeObject, exception); - - } else if(option.endsWith(QLatin1Literal("xmlhttprequest"))) { - // requests started using the XMLHttpRequest object or fetch() API - return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeXhr, exception); - - } else if(option.endsWith(QLatin1Literal("object-subrequest"))) { - // requests started by plugins like Flash - return qMakePair(QWebEngineUrlRequestInfo::ResourceTypePluginResource, exception); - - } else if(option.endsWith(QLatin1Literal("subdocument"))) { - // embedded pages, usually included via HTML frames - return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeSubFrame, exception); - - } else if(option.endsWith(QLatin1Literal("ping"))) { - // requests started by <a ping> or navigator.sendBeacon() - return qMakePair(QWebEngineUrlRequestInfo::ResourceTypePing, exception); - - } else if(option.endsWith(QLatin1Literal("websocket"))) { - // requests initiated via WebSocket object - qDebug("Resource type 'websocket' not available"); - - } else if(option.endsWith(QLatin1Literal("webrtc"))) { - // connections opened via RTCPeerConnection instances to ICE servers - qDebug("Resource type 'webrtc' not available"); - - } else if(option.endsWith(QLatin1Literal("document"))) { - // the page itself - return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeMainFrame, exception); - - } else if(option.endsWith(QLatin1Literal("other"))) { - return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeUnknown, exception); - } - - return std::nullopt; -} diff --git a/lib/urlfilter/formats/adblockrule_parse.h b/lib/urlfilter/formats/adblockrule_parse.h deleted file mode 100644 index 01255ca..0000000 --- a/lib/urlfilter/formats/adblockrule_parse.h +++ /dev/null @@ -1,17 +0,0 @@ -/* - * This file is part of smolbote. It's copyrighted by the contributors recorded - * in the version control history of the file, available from its original - * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote - * - * SPDX-License-Identifier: GPL-3.0 - */ - -#ifndef ADBLOCKRULE_PARSE_H -#define ADBLOCKRULE_PARSE_H - -class AdBlockRule; - -AdBlockRule *parseRule_adblock(const QString &filter); -std::optional<QPair<QWebEngineUrlRequestInfo::ResourceType, bool>> parseOption(const QString &option); - -#endif // ADBLOCKRULE_PARSE_H diff --git a/lib/urlfilter/formats/hostlistrule.cpp b/lib/urlfilter/formats/hostlistrule.cpp deleted file mode 100644 index ad2c2a6..0000000 --- a/lib/urlfilter/formats/hostlistrule.cpp +++ /dev/null @@ -1,29 +0,0 @@ -/* - * This file is part of smolbote. It's copyrighted by the contributors recorded - * in the version control history of the file, available from its original - * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote - * - * SPDX-License-Identifier: GPL-3.0 - */ - -#include "hostlistrule.h" - -HostlistRule::HostlistRule(const QString &domain, const QString &redirect) -{ - this->m_isBlocking = (redirect == QLatin1Literal("0.0.0.0")); - this->m_request = domain; - this->m_redirect = redirect; -} - -bool HostlistRule::match(const QUrl &requestUrl) const -{ - //qDebug("checking [%s] against [%s]", qUtf8Printable(requestUrl.host()), qUtf8Printable(m_request)); - return (m_request == requestUrl.host()); -} - -std::pair<FilterLeaf::Action, QVariant> HostlistRule::action() const -{ - if(m_isBlocking) - return std::make_pair(FilterLeaf::Block, QVariant()); - return std::make_pair(FilterLeaf::Redirect, QVariant(m_redirect)); -} diff --git a/lib/urlfilter/formats/hostlistrule.h b/lib/urlfilter/formats/hostlistrule.h deleted file mode 100644 index 58ec690..0000000 --- a/lib/urlfilter/formats/hostlistrule.h +++ /dev/null @@ -1,27 +0,0 @@ -/* - * This file is part of smolbote. It's copyrighted by the contributors recorded - * in the version control history of the file, available from its original - * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote - * - * SPDX-License-Identifier: GPL-3.0 - */ - -#ifndef SMOLBOTE_HOSTLIST_RULE_H -#define SMOLBOTE_HOSTLIST_RULE_H - -#include "../filterleaf.h" -#include <QString> - -class HostlistRule : public FilterLeaf -{ -public: - explicit HostlistRule(const QString &domain, const QString &redirect); - - bool match(const QUrl &requestUrl) const override; - std::pair<FilterLeaf::Action, QVariant> action() const override; - -private: - QString m_redirect; -}; - -#endif // SMOLBOTE_HOSTLIST_RULE_H |