diff options
author | Aqua-sama <aqua@iserlohn-fortress.net> | 2018-07-18 10:07:51 +0200 |
---|---|---|
committer | Aqua-sama <aqua@iserlohn-fortress.net> | 2018-07-18 10:07:51 +0200 |
commit | f507a7a5557c7cca9570c684920e055c5251b08e (patch) | |
tree | 0fad83fc99053006b375608e2a2d04388776ad89 /lib/web/urlfilter | |
parent | AdBlockTest: loading subscription (diff) | |
download | smolbote-f507a7a5557c7cca9570c684920e055c5251b08e.tar.xz |
AdBlockRule: move matching logic to FilterRule
Diffstat (limited to 'lib/web/urlfilter')
-rw-r--r-- | lib/web/urlfilter/adblockrule.cpp | 68 | ||||
-rw-r--r-- | lib/web/urlfilter/adblockrule.h | 35 | ||||
-rw-r--r-- | lib/web/urlfilter/filterdomain.cpp | 68 | ||||
-rw-r--r-- | lib/web/urlfilter/filterdomain.h | 37 | ||||
-rw-r--r-- | lib/web/urlfilter/filterrule.cpp | 168 | ||||
-rw-r--r-- | lib/web/urlfilter/filterrule.h | 78 |
6 files changed, 104 insertions, 350 deletions
diff --git a/lib/web/urlfilter/adblockrule.cpp b/lib/web/urlfilter/adblockrule.cpp index b20ed8f..60262b7 100644 --- a/lib/web/urlfilter/adblockrule.cpp +++ b/lib/web/urlfilter/adblockrule.cpp @@ -1,26 +1,5 @@ #include "adblockrule.h" -bool isMatchingDomain(const QString &domain, const QString &filter) -{ - // domain and filter are the same - if(domain == filter) { - return true; - } - - // domain can't be matched by filter if it doesn't end with filter - // ex. example2.com isn't matched by example.com - if(!domain.endsWith(filter)) { - return false; - } - - // match with subdomains - // ex. subdomain.example.com is matched by example.com - int index = domain.indexOf(filter); - - // match if (domain ends with filter) && (filter has been found) and (character before filter is '.') - return index > 0 && domain[index - 1] == QLatin1Char('.'); -} - inline std::pair<QWebEngineUrlRequestInfo::ResourceType, bool> parseOption(const QString &option) { if(option.endsWith(QLatin1Literal("script"))) { @@ -105,14 +84,14 @@ AdBlockRule::AdBlockRule(const QString &filter) if(parsedLine.startsWith(QLatin1Literal("/")) && parsedLine.endsWith(QLatin1Literal("/"))) { parsedLine = parsedLine.mid(1, parsedLine.length() - 2); - matchType = RegularExpressionMatch; + urlMatchType = RegularExpressionMatch; regexp.setPattern(parsedLine); return; } // basic filter rules if(parsedLine.startsWith(QLatin1Literal("|")) && parsedLine.endsWith(QLatin1Literal("|"))) { - matchType = StringEquals; + urlMatchType = StringEquals; match = parsedLine.mid(1, parsedLine.length() - 2); return; } @@ -128,7 +107,7 @@ AdBlockRule::AdBlockRule(const QString &filter) parsedLine.chop(1); if(parsedLine.startsWith(QLatin1Literal("||")) && parsedLine.endsWith(QLatin1Literal("^"))) { - matchType = DomainMatch; + urlMatchType = DomainMatch; match = parsedLine.mid(2, parsedLine.length() - 3); return; } @@ -137,49 +116,12 @@ AdBlockRule::AdBlockRule(const QString &filter) // wildcard "*" - any number of characters // separator "^" - end, ? or / if(parsedLine.contains(QLatin1Literal("*")) || parsedLine.contains(QLatin1Literal("^"))) { - matchType = RegularExpressionMatch; + urlMatchType = RegularExpressionMatch; parsedLine.replace(QLatin1Literal("*"), QLatin1Literal(".*")); parsedLine.replace(QLatin1Literal("^"), QLatin1Literal("($|\\?|\\/)")); regexp.setPattern(parsedLine); return; } - matcher.setPattern(parsedLine); -} - -bool AdBlockRule::isEnabled() const -{ - return m_isEnabled; -} - -bool AdBlockRule::matchesType(QWebEngineUrlRequestInfo::ResourceType type) const -{ - // no options have been specified -> match all resource types - if(m_resourceTypeOptions.isEmpty()) - return true; - - // this resource type has not been specified -> reject it - if(!m_resourceTypeOptions.contains(type)) - return false; - - // resource type has been specified; true to match, false to exception - return m_resourceTypeOptions.value(type); -} - -bool AdBlockRule::matchesUrl(const QUrl &url) const -{ - switch (matchType) { - case RegularExpressionMatch: - if(regexp.match(url.toString()).hasMatch()) - return !m_isException; - - case StringEquals: - return url.toString() == match; - - case DomainMatch: - return isMatchingDomain(url.host(), match); - - default: - return false; - } + match = parsedLine; } diff --git a/lib/web/urlfilter/adblockrule.h b/lib/web/urlfilter/adblockrule.h index 76484c3..aeabf4f 100644 --- a/lib/web/urlfilter/adblockrule.h +++ b/lib/web/urlfilter/adblockrule.h @@ -1,43 +1,18 @@ -#ifndef ADBLOCKRULE_H -#define ADBLOCKRULE_H +#ifndef SMOLBOTE_ADBLOCKRULE_H +#define SMOLBOTE_ADBLOCKRULE_H #include <QObject> #include <QString> #include <QRegularExpression> #include <QUrl> #include <QWebEngineUrlRequestInfo> +#include "filterrule.h" -class AdBlockRule +class AdBlockRule : public FilterRule { public: - - enum MatchType { - InvalidMatch, - RegularExpressionMatch, - StringContains, - StringStartsWith, - StringEndsWith, - StringEquals, - DomainMatch - }; - AdBlockRule(const QString &filter); - bool isEnabled() const; - bool matchesType(QWebEngineUrlRequestInfo::ResourceType type) const; - bool matchesUrl(const QUrl &url) const; - -private: - bool m_isEnabled = false; - bool m_isException = false; - - MatchType matchType = InvalidMatch; - QHash<QWebEngineUrlRequestInfo::ResourceType, bool> m_resourceTypeOptions; - QStringList allowedDomains, blockedDomains; - - QString match; - QRegularExpression regexp; - QStringMatcher matcher; }; -#endif // ADBLOCKRULE_H +#endif // SMOLBOTE_ADBLOCKRULE_H diff --git a/lib/web/urlfilter/filterdomain.cpp b/lib/web/urlfilter/filterdomain.cpp deleted file mode 100644 index 99cc71c..0000000 --- a/lib/web/urlfilter/filterdomain.cpp +++ /dev/null @@ -1,68 +0,0 @@ -#include "filterdomain.h" -#include <QVector> - -bool isMatchingDomain(const QString &domain, const QString &filter) -{ - // domain and filter are the same - if(domain == filter) { - return true; - } - - // domain can't be matched by filter if it doesn't end with filter - // ex. example2.com isn't matched by example.com - if(!domain.endsWith(filter)) { - return false; - } - - // match with subdomains - // ex. subdomain.example.com is matched by example.com - int index = domain.indexOf(filter); - - // match if (domain ends with filter) && (filter has been found) and (character before filter is '.') - return index > 0 && domain[index - 1] == QLatin1Char('.'); -} - -FilterDomain::FilterDomain(QObject *parent) - : QObject(parent) -{ -} - -void FilterDomain::addDomain(const QString &domain, bool isException) -{ - if(domain.isEmpty()) - return; - - if(!isException) - m_allowedOnDomains.append(domain); - else - m_blockedOnDomains.append(domain); -} - -void FilterDomain::addRule(FilterRule_ptr &rule) -{ - Q_ASSERT(rule); - m_rules.emplace_back(std::move(rule)); -} - -bool FilterDomain::hasMatch(const QString &host) const -{ - for(const auto &domain : qAsConst(m_blockedOnDomains)) { - if(isMatchingDomain(host, domain)) - return false; - } - - for(const auto &domain : qAsConst(m_allowedOnDomains)) { - if(isMatchingDomain(host, domain)) - return true; - } - - return false; -} - -void FilterDomain::process(QWebEngineUrlRequestInfo &info) const -{ - for(const FilterRule_ptr &rule : m_rules) { - if(rule->matchRequestUrl(info.requestUrl().toString(), info.resourceType())) - rule->process(info); - } -} diff --git a/lib/web/urlfilter/filterdomain.h b/lib/web/urlfilter/filterdomain.h deleted file mode 100644 index b356a32..0000000 --- a/lib/web/urlfilter/filterdomain.h +++ /dev/null @@ -1,37 +0,0 @@ -#ifndef FILTERDOMAIN_H -#define FILTERDOMAIN_H - -#include <QObject> -#include <QVector> -#include <memory> -#include "filterrule.h" - -typedef std::unique_ptr<FilterRule> FilterRule_ptr; -typedef QVector<QString> QStringVector; - -class FilterDomain : public QObject -{ - Q_OBJECT -public: - explicit FilterDomain(QObject *parent = nullptr); - - void addDomain(const QString &domain, bool isException = false); - void addRule(FilterRule_ptr &rule); - - bool hasMatch(const QString &host) const; - void process(QWebEngineUrlRequestInfo &info) const; - -private: - Q_DISABLE_COPY(FilterDomain) - - // lists of domains this rule group? is allowed on and blocked on - QStringVector m_allowedOnDomains; - QStringVector m_blockedOnDomains; - - std::vector<FilterRule_ptr> m_rules; -}; - -// function taken from KDE/Falkon -bool isMatchingDomain(const QString &domain, const QString &filter); - -#endif // FILTERDOMAIN_H diff --git a/lib/web/urlfilter/filterrule.cpp b/lib/web/urlfilter/filterrule.cpp index 73b357b..ba2181f 100644 --- a/lib/web/urlfilter/filterrule.cpp +++ b/lib/web/urlfilter/filterrule.cpp @@ -1,139 +1,89 @@ #include "filterrule.h" -#include <QWebEngineUrlRequestInfo> -#include <QJsonArray> -void parseJson(std::unique_ptr<FilterRule> &rule, const QJsonObject &filter) +inline bool isMatchingDomain(const QString &domain, const QString &filter) { - // set action - { - if(!filter.value("whitelist").isUndefined()) { - rule->setActionType(FilterRule::Whitelist); - - } else if(!filter.value("blacklist").isUndefined()) { - rule->setActionType(FilterRule::Blacklist); - - } else if(!filter.value("redirect").isUndefined()) { - rule->setActionType(FilterRule::Redirect); - rule->setRedirectUrl(QUrl::fromUserInput(filter.value("redirect").toString())); - - } else if(!filter.value("setheader").isUndefined()) { - rule->setActionType(FilterRule::SetHeader); - for(const QJsonValue v : filter.value("setheader").toArray()) { - QStringList h = v.toString().split(':'); - rule->addHeaderRule(h.at(0).toLatin1(), h.at(1).toLatin1()); - } - } - + // domain and filter are the same + if(domain == filter) { + return true; } - // set match type - { - const QJsonValue regexpValue = filter.value("regexp"); - const QJsonValue endswithValue = filter.value("endswith"); - const QJsonValue containsValue = filter.value("contains"); - - if(!regexpValue.isUndefined()) { - rule->setMatchType(FilterRule::RegExpMatchRule, regexpValue.toString()); - - } else if(!endswithValue.isUndefined()) { - rule->setMatchType(FilterRule::StringEndsMatchRule, endswithValue.toString()); - - } else if(!containsValue.isUndefined()) { - rule->setMatchType(FilterRule::StringContainsMatchRule, containsValue.toString()); - - } else - rule->setMatchType(FilterRule::MatchAllUrlsRule); + // domain can't be matched by filter if it doesn't end with filter + // ex. example2.com isn't matched by example.com + if(!domain.endsWith(filter)) { + return false; } -} + // match with subdomains + // ex. subdomain.example.com is matched by example.com + int index = domain.indexOf(filter); -FilterRule::FilterRule(const QJsonObject &filter) -{ - m_matcher.setCaseSensitivity(Qt::CaseInsensitive); + // match if (domain ends with filter) && (filter has been found) and (character before filter is '.') + return index > 0 && domain[index - 1] == QLatin1Char('.'); } -void FilterRule::setActionType(ActionType type) +bool FilterRule::isEnabled() const { - m_actionType = type; + return m_isEnabled; } -void FilterRule::setMatchType(MatchType type, const QString &pattern) +bool FilterRule::matchesDomain(const QString &domain) const { - m_matchType = type; - switch (type) { - case RegExpMatchRule: - m_regexp.setPattern(pattern); - break; - case StringEndsMatchRule: - m_pattern = pattern; - break; - case StringContainsMatchRule: - m_matcher.setPattern(pattern); - default: - break; + // no domains have been allowed or blocked -> allow on all domains + if(allowedDomains.isEmpty() && blockedDomains.isEmpty()) + return true; + + if(!blockedDomains.isEmpty()) { + // do not match rule if the domain has been blocked + if(blockedDomains.contains(domain)) + return false; } -} -void FilterRule::setRedirectUrl(const QUrl &url) -{ - m_redirectUrl = url; -} + if(!allowedDomains.isEmpty()) { + if(allowedDomains.contains(domain)) + return true; + } -void FilterRule::addHeaderRule(const QByteArray &header, const QByteArray &value) -{ - m_headers.insert(header, value); + return false; } -bool FilterRule::isValid() const +bool FilterRule::matchesType(QWebEngineUrlRequestInfo::ResourceType type) const { - return (m_matchType != MatchType::InvalidMatch) && (m_actionType != ActionType::InvalidAction); + // no options have been specified -> match all resource types + if(m_resourceTypeOptions.isEmpty()) + return true; + + // this resource type has not been specified -> reject it + if(!m_resourceTypeOptions.contains(type)) + return false; + + // resource type has been specified; true to match, false to exception + return m_resourceTypeOptions.value(type); } -bool FilterRule::process(QWebEngineUrlRequestInfo &info) const +bool FilterRule::matchesUrl(const QUrl &url) const { - Q_ASSERT(m_actionType != ActionType::InvalidAction); + switch (urlMatchType) { + case InvalidMatch: + return false; - if(matchRequestUrl(info.requestUrl().toString(), info.resourceType())) { - switch (m_actionType) { - case ActionType::Whitelist: - info.block(false); - return true; - case ActionType::Blacklist: - info.block(true); - return true; - case ActionType::Redirect: - info.redirect(m_redirectUrl); - return true; - case ActionType::SetHeader: - for(auto it = m_headers.constBegin(); it != m_headers.constEnd(); ++it) { - info.setHttpHeader(it.key(), it.value()); - } - return true; - case ActionType::InvalidAction: - break; - } - } + case RegularExpressionMatch: + if(regexp.match(url.toString()).hasMatch()) + return !m_isException; - return false; -} + case StringContains: + return url.toString().contains(match); -bool FilterRule::matchRequestUrl(const QString &requestUrl, const QWebEngineUrlRequestInfo::ResourceType type) const -{ - Q_ASSERT(m_matchType != MatchType::InvalidMatch); + case StringStartsWith: + return url.toString().startsWith(match); - if(!m_resourceTypeOptions.isEmpty() && !m_resourceTypeOptions.contains(type)) - return false; + case StringEndsWith: + return url.toString().endsWith(match); + + case StringEquals: + return url.toString() == match; + + case DomainMatch: + return isMatchingDomain(url.host(), match); - switch (m_matchType) { - case MatchType::RegExpMatchRule: - return m_regexp.match(requestUrl).hasMatch(); - case MatchType::StringEndsMatchRule: - return requestUrl.endsWith(m_pattern); - case MatchType::StringContainsMatchRule: - return m_matcher.indexIn(requestUrl) != -1; - case MatchType::MatchAllUrlsRule: - return true; - default: - return false; } } diff --git a/lib/web/urlfilter/filterrule.h b/lib/web/urlfilter/filterrule.h index f20ab15..8a622fe 100644 --- a/lib/web/urlfilter/filterrule.h +++ b/lib/web/urlfilter/filterrule.h @@ -1,70 +1,62 @@ #ifndef SMOLBOTE_FILTERRULE_H #define SMOLBOTE_FILTERRULE_H -#include <QObject> #include <QRegularExpression> #include <QStringList> #include <QStringMatcher> -#include <QJsonObject> #include <QUrl> #include <QWebEngineUrlRequestInfo> #include <memory> -class QUrl; class FilterRule { public: - enum ActionType { - Whitelist, - Blacklist, - Redirect, - SetHeader, - InvalidAction + enum UrlMatchType { + InvalidMatch, + RegularExpressionMatch, + StringContains, + StringStartsWith, + StringEndsWith, + StringEquals, + DomainMatch }; - enum MatchType { -// CssRule = 0, // -// DomainMatchRule = 1, // - RegExpMatchRule = 2, // match request url with regexp - StringEndsMatchRule = 3, // request url ends with string - StringContainsMatchRule = 4, // request url contains string - MatchAllUrlsRule = 5, // - InvalidMatch = 6, - }; - - FilterRule(const QJsonObject &filter); - ~FilterRule() = default; + FilterRule() = default; - void setActionType(ActionType type); - void setMatchType(MatchType type, const QString &pattern = QString()); - void setRedirectUrl(const QUrl &url); - void addHeaderRule(const QByteArray &header, const QByteArray &value); + bool isEnabled() const; - bool isValid() const; - bool process(QWebEngineUrlRequestInfo &info) const; - bool matchRequestUrl(const QString &requestUrl, const QWebEngineUrlRequestInfo::ResourceType type) const; + /** + * @brief matchesDomain + * @param domain + * @return + */ + bool matchesDomain(const QString &domain) const; -private: - Q_DISABLE_COPY(FilterRule) + /** + * @brief matchesType + * @param type + * @return true if type matches, false otherwise + */ + bool matchesType(QWebEngineUrlRequestInfo::ResourceType type) const; + /** + * @brief matchesUrl + * @param url + * @return + */ + bool matchesUrl(const QUrl &url) const; - ActionType m_actionType = ActionType::InvalidAction; - MatchType m_matchType = MatchType::InvalidMatch; +protected: + bool m_isEnabled = false; + bool m_isException = false; + UrlMatchType urlMatchType = InvalidMatch; QHash<QWebEngineUrlRequestInfo::ResourceType, bool> m_resourceTypeOptions; - QHash<QByteArray, QByteArray> m_headers; + QStringList allowedDomains, blockedDomains; - // Parsed rule for string matching (CSS Selector for CSS rules) - QString m_matchString; - // Case sensitivity for string matching - Qt::CaseSensitivity m_caseSensitivity = Qt::CaseInsensitive; + QString match; + QRegularExpression regexp; - QUrl m_redirectUrl; - QRegularExpression m_regexp; - QStringMatcher m_matcher; - QString m_pattern; }; -void parseJson(std::unique_ptr<FilterRule> &rule, const QJsonObject &filter); - #endif // SMOLBOTE_FILTERRULE_H |