From 19137b3233c31da75973a3217558ea6bd6570bad Mon Sep 17 00:00:00 2001 From: Aqua-sama Date: Sun, 21 Oct 2018 13:48:23 +0200 Subject: urlfilter: base AdBlockRule off FilterLeaf --- lib/urlfilter/CMakeLists.txt | 14 ++-- lib/urlfilter/filterleaf.h | 20 ++--- lib/urlfilter/filterrule.cpp | 96 ------------------------ lib/urlfilter/filterrule.h | 54 -------------- lib/urlfilter/formats/adblockrule.cpp | 136 +++++++++++++++++----------------- lib/urlfilter/formats/adblockrule.h | 13 +++- 6 files changed, 92 insertions(+), 241 deletions(-) delete mode 100644 lib/urlfilter/filterrule.cpp delete mode 100644 lib/urlfilter/filterrule.h (limited to 'lib') diff --git a/lib/urlfilter/CMakeLists.txt b/lib/urlfilter/CMakeLists.txt index 9940222..20466d3 100644 --- a/lib/urlfilter/CMakeLists.txt +++ b/lib/urlfilter/CMakeLists.txt @@ -5,15 +5,6 @@ set(CMAKE_INCLUDE_CURRENT_DIR ON) set(CMAKE_AUTOMOC ON) add_library(urlfilter - filterrule.cpp - filterrule.h - - formats/adblockrule.cpp - formats/adblockrule.h - - formats/hostlistrule.cpp - formats/hostlistrule.h - # filter tree filtertree.cpp filtertree.h @@ -21,6 +12,11 @@ add_library(urlfilter filterleaf.h domain.cpp domain.h + + formats/adblockrule.cpp + formats/adblockrule.h + formats/hostlistrule.cpp + formats/hostlistrule.h ) target_link_libraries(urlfilter Qt5::WebEngineWidgets) diff --git a/lib/urlfilter/filterleaf.h b/lib/urlfilter/filterleaf.h index 2c552a6..a565f92 100644 --- a/lib/urlfilter/filterleaf.h +++ b/lib/urlfilter/filterleaf.h @@ -24,6 +24,16 @@ public: Redirect }; + enum UrlMatchType { + InvalidMatch, + RegularExpressionMatch, + StringContains, + StringStartsWith, + StringEndsWith, + StringEquals, + DomainMatch + }; + FilterLeaf(FilterLeaf &&other); FilterLeaf &operator=(FilterLeaf &&other); ~FilterLeaf() = default; @@ -35,16 +45,6 @@ public: const QString redirect() const; protected: - enum UrlMatchType { - InvalidMatch, - RegularExpressionMatch, - StringContains, - StringStartsWith, - StringEndsWith, - StringEquals, - DomainMatch - }; - explicit FilterLeaf() = default; // rule matching diff --git a/lib/urlfilter/filterrule.cpp b/lib/urlfilter/filterrule.cpp deleted file mode 100644 index 66a46f1..0000000 --- a/lib/urlfilter/filterrule.cpp +++ /dev/null @@ -1,96 +0,0 @@ -/* - * This file is part of smolbote. It's copyrighted by the contributors recorded - * in the version control history of the file, available from its original - * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote - * - * SPDX-License-Identifier: GPL-3.0 - */ - -#include "filterrule.h" - -inline bool isMatchingDomain(const QString &domain, const QString &filter) -{ - // domain and filter are the same - if(domain == filter) { - return true; - } - - // domain can't be matched by filter if it doesn't end with filter - // ex. example2.com isn't matched by example.com - if(!domain.endsWith(filter)) { - return false; - } - - // match with subdomains - // ex. subdomain.example.com is matched by example.com - int index = domain.indexOf(filter); - - // match if (domain ends with filter) && (filter has been found) and (character before filter is '.') - return index > 0 && domain[index - 1] == QLatin1Char('.'); -} - -bool FilterRule::isEnabled() const -{ - return m_isEnabled; -} - -bool FilterRule::isBlocking() const -{ - return m_isBlocking; -} - -bool FilterRule::matchesDomain(uint domainHash) const -{ - // no domains have been allowed or blocked -> allow on all domains - if(allowedDomains_hashes.isEmpty() && blockedDomains_hashes.isEmpty()) { - return true; - } - - // blockedDomains prevents the rules from being matched on those domains - if(blockedDomains_hashes.contains(domainHash)) { - return false; - } - - // allowedDomains means the rule should only be matched on those domains - return allowedDomains_hashes.contains(domainHash); -} - -bool FilterRule::matchesType(QWebEngineUrlRequestInfo::ResourceType type) const -{ - // no options have been specified -> match all resource types - if(m_resourceTypeOptions.isEmpty()) - return true; - - // this resource type has not been specified -> reject it - if(!m_resourceTypeOptions.contains(type)) - return false; - - // resource type has been specified; true to match, false to exception - return m_resourceTypeOptions.value(type); -} - -bool FilterRule::matchesUrl(const QUrl &url) const -{ - switch(urlMatchType) { - case InvalidMatch: - return false; - - case RegularExpressionMatch: - return regexp.match(url.toString()).hasMatch(); - - case StringContains: - return url.toString().contains(match); - - case StringStartsWith: - return url.toString().startsWith(match); - - case StringEndsWith: - return url.toString().endsWith(match); - - case StringEquals: - return url.toString() == match; - - case DomainMatch: - return isMatchingDomain(url.host(), match); - } -} diff --git a/lib/urlfilter/filterrule.h b/lib/urlfilter/filterrule.h deleted file mode 100644 index 6afe3c6..0000000 --- a/lib/urlfilter/filterrule.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * This file is part of smolbote. It's copyrighted by the contributors recorded - * in the version control history of the file, available from its original - * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote - * - * SPDX-License-Identifier: GPL-3.0 - */ - -#ifndef SMOLBOTE_FILTERRULE_H -#define SMOLBOTE_FILTERRULE_H - -#include -#include -#include -#include -#include -#include -#include -#include - -class FilterRule -{ -public: - enum UrlMatchType { - InvalidMatch, - RegularExpressionMatch, - StringContains, - StringStartsWith, - StringEndsWith, - StringEquals, - DomainMatch - }; - - bool isEnabled() const; - bool isBlocking() const; - - bool matchesDomain(uint domainHash) const; - bool matchesType(QWebEngineUrlRequestInfo::ResourceType type) const; - bool matchesUrl(const QUrl &url) const; - -protected: - bool m_isEnabled = false; - bool m_isBlocking = true; - - UrlMatchType urlMatchType = InvalidMatch; - QHash m_resourceTypeOptions; - - QVector allowedDomains_hashes, blockedDomains_hashes; - - QString match; - QRegularExpression regexp; -}; - -#endif // SMOLBOTE_FILTERRULE_H diff --git a/lib/urlfilter/formats/adblockrule.cpp b/lib/urlfilter/formats/adblockrule.cpp index c5d6b58..79a6dc8 100644 --- a/lib/urlfilter/formats/adblockrule.cpp +++ b/lib/urlfilter/formats/adblockrule.cpp @@ -16,30 +16,28 @@ // QString::chop(len) - Removes n characters from the end of the string. // QString::remove(pos, len) - Removes n characters from the string, starting at the given position index. -AdBlockRule::AdBlockRule(const QString &filter) +AdBlockRule *loadRule(const QString &filter) { QString parsedLine = filter.trimmed(); // there is no rule, or it's a comment if(parsedLine.isEmpty() || parsedLine.startsWith("!")) { - return; + return nullptr; } - // css rule - ignore for now + // css rule -> filterleaves cannot do element blocking if(parsedLine.contains(QLatin1Literal("##")) || parsedLine.contains(QLatin1Literal("#@#"))) { - return; + return nullptr; } - m_isEnabled = true; - // exception rules - if(parsedLine.startsWith(QLatin1Literal("@@"))) { - m_isBlocking = false; + const bool isBlocking = parsedLine.startsWith(QLatin1Literal("@@")); + if(isBlocking) parsedLine.remove(0, 2); - } else - m_isBlocking = true; // parse options + QStringList enabledOn, disabledOn; + QHash optionsHash; { const int sepPos = parsedLine.indexOf(QLatin1Literal("$")); if(sepPos != -1) { @@ -49,102 +47,102 @@ AdBlockRule::AdBlockRule(const QString &filter) for(const QString &option : options) { if(option.startsWith(QLatin1Literal("domain"))) { const auto domainList = option.mid(7).split(QLatin1Literal("|")); + for(const QString &domain : domainList) { if(domain.startsWith(QLatin1Literal("~"))) { - blockedDomains_hashes.append(qHash(domain.mid(1))); + disabledOn.append(domain.mid(1)); } else { - allowedDomains_hashes.append(qHash(domain)); + enabledOn.append(domain); } } } else { - parseOption(option); + const auto pair = parseOption(option); + if(pair) + optionsHash.insert(pair.value().first, pair.value().second); } } } } - // regular expression rule - if(parsedLine.startsWith(QLatin1Literal("/")) && parsedLine.endsWith(QLatin1Literal("/"))) { - parsedLine = parsedLine.mid(1, parsedLine.length() - 2); - - urlMatchType = RegularExpressionMatch; - regexp.setPattern(parsedLine); - return; - } - - // string equals rule - if(parsedLine.startsWith(QLatin1Literal("|")) && parsedLine.endsWith(QLatin1Literal("|"))) { - urlMatchType = StringEquals; - match = parsedLine.mid(1, parsedLine.length() - 2); - return; - } - - // Basic filter rules can use wildcards, which were supported by QRegExp, - // but were deprecated in QRegularExpression. - - // remove beginning and ending wildcards - if(parsedLine.startsWith(QLatin1Literal("*"))) - parsedLine = parsedLine.mid(1); + FilterLeaf::UrlMatchType matchType; + QString pattern; - if(parsedLine.endsWith(QLatin1Literal("*"))) - parsedLine.chop(1); - - if(parsedLine.startsWith(QLatin1Literal("||")) && parsedLine.endsWith(QLatin1Literal("^"))) { - urlMatchType = DomainMatch; - match = parsedLine.mid(2, parsedLine.length() - 3); - return; - } - - // check for wildcards and translate to regexp - // wildcard "*" - any number of characters - // separator "^" - end, ? or / - if(parsedLine.contains(QLatin1Literal("*")) || parsedLine.contains(QLatin1Literal("^"))) { - urlMatchType = RegularExpressionMatch; - parsedLine.replace(QLatin1Literal("||"), QLatin1Literal("^\\w+://")); - parsedLine.replace(QLatin1Literal("|"), QLatin1Literal("\\|")); - parsedLine.replace(QLatin1Literal("*"), QLatin1Literal(".*")); - parsedLine.replace(QLatin1Literal("^"), QLatin1Literal("($|\\?|\\/)")); - regexp.setPattern(parsedLine); - return; + if(parsedLine.startsWith(QLatin1Literal("/")) && parsedLine.endsWith(QLatin1Literal("/"))) { + // regular expression rule + matchType = FilterLeaf::RegularExpressionMatch; + pattern = parsedLine.mid(1, parsedLine.length() - 2); + + } else if(parsedLine.startsWith(QLatin1Literal("|")) && parsedLine.endsWith(QLatin1Literal("|"))) { + // string equals rule + matchType = FilterLeaf::StringEquals; + pattern = parsedLine.mid(1, parsedLine.length() - 2); + + } else { + + // Basic filter rules can use wildcards, which were supported by QRegExp, + // but were deprecated in QRegularExpression. + + // remove beginning and ending wildcards + if(parsedLine.startsWith(QLatin1Literal("*"))) + parsedLine = parsedLine.mid(1); + + if(parsedLine.endsWith(QLatin1Literal("*"))) + parsedLine.chop(1); + + if(parsedLine.startsWith(QLatin1Literal("||")) && parsedLine.endsWith(QLatin1Literal("^"))) { + matchType = FilterLeaf::DomainMatch; + pattern = parsedLine.mid(2, parsedLine.length() - 3); + + } else if(parsedLine.contains(QLatin1Literal("*")) || parsedLine.contains(QLatin1Literal("^"))) { + // check for wildcards and translate to regexp + // wildcard "*" - any number of characters + // separator "^" - end, ? or / + matchType = FilterLeaf::RegularExpressionMatch; + parsedLine.replace(QLatin1Literal("||"), QLatin1Literal("^\\w+://")); + parsedLine.replace(QLatin1Literal("|"), QLatin1Literal("\\|")); + parsedLine.replace(QLatin1Literal("*"), QLatin1Literal(".*")); + parsedLine.replace(QLatin1Literal("^"), QLatin1Literal("($|\\?|\\/)")); + pattern = parsedLine; + } } - - match = parsedLine; + return nullptr; } -void AdBlockRule::parseOption(const QString &option) + +std::optional> parseOption(const QString &option) { const bool exception = !option.startsWith(QLatin1Literal("~")); if(option.endsWith(QLatin1Literal("script"))) { // external scripts loaded via HTML script tag - m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeScript, exception); + return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeScript, exception); } else if(option.endsWith(QLatin1Literal("image"))) { // regular images, typically loaded via HTML img tag - m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeImage, exception); + return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeImage, exception); } else if(option.endsWith(QLatin1Literal("stylesheet"))) { // external CSS stylesheet files - m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeStylesheet, exception); + return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeStylesheet, exception); } else if(option.endsWith(QLatin1Literal("object"))) { // content handled by browser plugins, e.g. Flash or Java - m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeObject, exception); + return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeObject, exception); } else if(option.endsWith(QLatin1Literal("xmlhttprequest"))) { // requests started using the XMLHttpRequest object or fetch() API - m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeXhr, exception); + return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeXhr, exception); } else if(option.endsWith(QLatin1Literal("object-subrequest"))) { // requests started by plugins like Flash - m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypePluginResource, exception); + return qMakePair(QWebEngineUrlRequestInfo::ResourceTypePluginResource, exception); } else if(option.endsWith(QLatin1Literal("subdocument"))) { // embedded pages, usually included via HTML frames - m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeSubFrame, exception); + return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeSubFrame, exception); } else if(option.endsWith(QLatin1Literal("ping"))) { // requests started by or navigator.sendBeacon() - m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypePing, exception); + return qMakePair(QWebEngineUrlRequestInfo::ResourceTypePing, exception); } else if(option.endsWith(QLatin1Literal("websocket"))) { // requests initiated via WebSocket object @@ -156,9 +154,11 @@ void AdBlockRule::parseOption(const QString &option) } else if(option.endsWith(QLatin1Literal("document"))) { // the page itself - m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeMainFrame, exception); + return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeMainFrame, exception); } else if(option.endsWith(QLatin1Literal("other"))) { - m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeUnknown, exception); + return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeUnknown, exception); } + + return std::nullopt; } diff --git a/lib/urlfilter/formats/adblockrule.h b/lib/urlfilter/formats/adblockrule.h index 3c8edb1..0f0873c 100644 --- a/lib/urlfilter/formats/adblockrule.h +++ b/lib/urlfilter/formats/adblockrule.h @@ -9,14 +9,19 @@ #ifndef SMOLBOTE_ADBLOCKRULE_H #define SMOLBOTE_ADBLOCKRULE_H -#include "../filterrule.h" +#include "../filterleaf.h" +#include -class AdBlockRule : public FilterRule +class AdBlockRule : public FilterLeaf { public: - explicit AdBlockRule(const QString &filter); +// explicit AdBlockRule(const QString &filter); - void parseOption(const QString &option); +// bool match(const QUrl &requestUrl) const override; +// FilterLeaf::Action action() const override; }; +std::optional> parseOption(const QString &option); +AdBlockRule *loadRule(const QString &filter); + #endif // SMOLBOTE_ADBLOCKRULE_H -- cgit v1.2.1