From 0e4ee2ed1c64130262e42ddfc87f2d8944c5a10c Mon Sep 17 00:00:00 2001 From: Aqua-sama Date: Sat, 7 Jul 2018 19:45:45 +0200 Subject: Integrate urlfilter with urlrequestinterceptor --- lib/configuration/configuration.cpp | 1 + lib/web/urlfilter/filterdomain.cpp | 48 ++++++------ lib/web/urlfilter/filterdomain.h | 28 ++++--- lib/web/urlfilter/filterrule.cpp | 145 ++++++++++++++++++++++++------------ lib/web/urlfilter/filterrule.h | 60 +++++++-------- 5 files changed, 173 insertions(+), 109 deletions(-) (limited to 'lib') diff --git a/lib/configuration/configuration.cpp b/lib/configuration/configuration.cpp index 70be7b1..d31b4f2 100644 --- a/lib/configuration/configuration.cpp +++ b/lib/configuration/configuration.cpp @@ -96,6 +96,7 @@ Configuration::Configuration(QObject *parent) // Filter settings ("filter.path", po::value()->default_value(filter_path)) + ("filter.json-path", po::value()->default_value("~/.config/smolbote/filter.json")) ("filter.header", po::value>()) // ("filter.cookies.block.all", po::value()->default_value(false)) // ("filter.cookies.block.thirdParty", po::value()->default_value(true)) diff --git a/lib/web/urlfilter/filterdomain.cpp b/lib/web/urlfilter/filterdomain.cpp index 53bc7db..99cc71c 100644 --- a/lib/web/urlfilter/filterdomain.cpp +++ b/lib/web/urlfilter/filterdomain.cpp @@ -22,41 +22,47 @@ bool isMatchingDomain(const QString &domain, const QString &filter) return index > 0 && domain[index - 1] == QLatin1Char('.'); } -FilterDomain::FilterDomain(MatchType type, QObject *parent) +FilterDomain::FilterDomain(QObject *parent) : QObject(parent) { - setType(type); } -void FilterDomain::setType(MatchType type) +void FilterDomain::addDomain(const QString &domain, bool isException) { - m_type = type; + if(domain.isEmpty()) + return; + + if(!isException) + m_allowedOnDomains.append(domain); + else + m_blockedOnDomains.append(domain); } -void FilterDomain::addDomain(const QString &domain) +void FilterDomain::addRule(FilterRule_ptr &rule) { - if(!domain.isEmpty()) - m_domains.append(domain); + Q_ASSERT(rule); + m_rules.emplace_back(std::move(rule)); } bool FilterDomain::hasMatch(const QString &host) const { - // match all domains -> this rule applies to all domains - if(m_type == WhitelistAll) - return true; - - // match no domains -> this rule applies to no domains - if(m_type == BlacklistAll) - return false; - - // is this a whitelist or blacklist domain rule? - // should it apply to added domains or not - const bool whitelist = (m_type == Whitelist); + for(const auto &domain : qAsConst(m_blockedOnDomains)) { + if(isMatchingDomain(host, domain)) + return false; + } - for(const auto &domain : qAsConst(m_domains)) { + for(const auto &domain : qAsConst(m_allowedOnDomains)) { if(isMatchingDomain(host, domain)) - return whitelist; + return true; } - return !whitelist; + return false; +} + +void FilterDomain::process(QWebEngineUrlRequestInfo &info) const +{ + for(const FilterRule_ptr &rule : m_rules) { + if(rule->matchRequestUrl(info.requestUrl().toString(), info.resourceType())) + rule->process(info); + } } diff --git a/lib/web/urlfilter/filterdomain.h b/lib/web/urlfilter/filterdomain.h index 2173bfc..b356a32 100644 --- a/lib/web/urlfilter/filterdomain.h +++ b/lib/web/urlfilter/filterdomain.h @@ -3,28 +3,32 @@ #include #include +#include +#include "filterrule.h" + +typedef std::unique_ptr FilterRule_ptr; +typedef QVector QStringVector; class FilterDomain : public QObject { Q_OBJECT public: - enum MatchType { - Whitelist, // only match added domains - Blacklist, // only match domains not added - WhitelistAll, // match all domains - BlacklistAll // match no domains - }; - - explicit FilterDomain(MatchType type = Whitelist, QObject *parent = nullptr); + explicit FilterDomain(QObject *parent = nullptr); - void setType(MatchType type); - void addDomain(const QString &domain); + void addDomain(const QString &domain, bool isException = false); + void addRule(FilterRule_ptr &rule); bool hasMatch(const QString &host) const; + void process(QWebEngineUrlRequestInfo &info) const; private: - MatchType m_type; - QVector m_domains; + Q_DISABLE_COPY(FilterDomain) + + // lists of domains this rule group? is allowed on and blocked on + QStringVector m_allowedOnDomains; + QStringVector m_blockedOnDomains; + + std::vector m_rules; }; // function taken from KDE/Falkon diff --git a/lib/web/urlfilter/filterrule.cpp b/lib/web/urlfilter/filterrule.cpp index 5a9310e..73b357b 100644 --- a/lib/web/urlfilter/filterrule.cpp +++ b/lib/web/urlfilter/filterrule.cpp @@ -2,57 +2,99 @@ #include #include +void parseJson(std::unique_ptr &rule, const QJsonObject &filter) +{ + // set action + { + if(!filter.value("whitelist").isUndefined()) { + rule->setActionType(FilterRule::Whitelist); + + } else if(!filter.value("blacklist").isUndefined()) { + rule->setActionType(FilterRule::Blacklist); + + } else if(!filter.value("redirect").isUndefined()) { + rule->setActionType(FilterRule::Redirect); + rule->setRedirectUrl(QUrl::fromUserInput(filter.value("redirect").toString())); + + } else if(!filter.value("setheader").isUndefined()) { + rule->setActionType(FilterRule::SetHeader); + for(const QJsonValue v : filter.value("setheader").toArray()) { + QStringList h = v.toString().split(':'); + rule->addHeaderRule(h.at(0).toLatin1(), h.at(1).toLatin1()); + } + } + + } + + // set match type + { + const QJsonValue regexpValue = filter.value("regexp"); + const QJsonValue endswithValue = filter.value("endswith"); + const QJsonValue containsValue = filter.value("contains"); + + if(!regexpValue.isUndefined()) { + rule->setMatchType(FilterRule::RegExpMatchRule, regexpValue.toString()); + + } else if(!endswithValue.isUndefined()) { + rule->setMatchType(FilterRule::StringEndsMatchRule, endswithValue.toString()); + + } else if(!containsValue.isUndefined()) { + rule->setMatchType(FilterRule::StringContainsMatchRule, containsValue.toString()); + + } else + rule->setMatchType(FilterRule::MatchAllUrlsRule); + } + +} + FilterRule::FilterRule(const QJsonObject &filter) { - const QString action = filter.value("action").toString(); - - // there is no action specified => this rule is invalid - if(action.isEmpty()) - return; - - if(action == "Whitelist") - m_action = ActionType::Whitelist; - else if (action == "Blacklist") - m_action = ActionType::Blacklist; - else if (action == "Redirect") { - m_action = ActionType::Redirect; - m_redirectUrl = QUrl::fromUserInput(filter.value("url").toString()); - } else if (action == "SetHeader") - m_action = ActionType::SetHeader; - else // invalid action - return; - - QJsonValue regexp = filter.value("regexp"); - QJsonValue endswith = filter.value("endswith"); - QJsonValue contains = filter.value("contains"); - - if(!regexp.isUndefined()) { - m_type = RuleType::RegExpMatchRule; - this->regexp.setPattern(regexp.toString()); - } else if(!endswith.isUndefined()) { - m_type = RuleType::StringEndsMatchRule; - pattern = endswith.toString(); - } else if(!contains.isUndefined()) { - m_type = RuleType::StringContainsMatchRule; - this->matcher.setPattern(contains.toString()); - this->matcher.setCaseSensitivity(Qt::CaseInsensitive); - } else // invalid rule - return; - - m_options.insert(QWebEngineUrlRequestInfo::ResourceTypeImage, true); + m_matcher.setCaseSensitivity(Qt::CaseInsensitive); +} + +void FilterRule::setActionType(ActionType type) +{ + m_actionType = type; +} + +void FilterRule::setMatchType(MatchType type, const QString &pattern) +{ + m_matchType = type; + switch (type) { + case RegExpMatchRule: + m_regexp.setPattern(pattern); + break; + case StringEndsMatchRule: + m_pattern = pattern; + break; + case StringContainsMatchRule: + m_matcher.setPattern(pattern); + default: + break; + } +} + +void FilterRule::setRedirectUrl(const QUrl &url) +{ + m_redirectUrl = url; +} + +void FilterRule::addHeaderRule(const QByteArray &header, const QByteArray &value) +{ + m_headers.insert(header, value); } bool FilterRule::isValid() const { - return m_type != RuleType::Invalid; + return (m_matchType != MatchType::InvalidMatch) && (m_actionType != ActionType::InvalidAction); } bool FilterRule::process(QWebEngineUrlRequestInfo &info) const { - Q_ASSERT(m_type != RuleType::Invalid); + Q_ASSERT(m_actionType != ActionType::InvalidAction); if(matchRequestUrl(info.requestUrl().toString(), info.resourceType())) { - switch (m_action) { + switch (m_actionType) { case ActionType::Whitelist: info.block(false); return true; @@ -63,6 +105,11 @@ bool FilterRule::process(QWebEngineUrlRequestInfo &info) const info.redirect(m_redirectUrl); return true; case ActionType::SetHeader: + for(auto it = m_headers.constBegin(); it != m_headers.constEnd(); ++it) { + info.setHttpHeader(it.key(), it.value()); + } + return true; + case ActionType::InvalidAction: break; } } @@ -72,16 +119,20 @@ bool FilterRule::process(QWebEngineUrlRequestInfo &info) const bool FilterRule::matchRequestUrl(const QString &requestUrl, const QWebEngineUrlRequestInfo::ResourceType type) const { - if(!m_options.contains(type)) + Q_ASSERT(m_matchType != MatchType::InvalidMatch); + + if(!m_resourceTypeOptions.isEmpty() && !m_resourceTypeOptions.contains(type)) return false; - switch (m_type) { - case RuleType::RegExpMatchRule: - return regexp.match(requestUrl).hasMatch(); - case RuleType::StringEndsMatchRule: - return requestUrl.endsWith(pattern); - case RuleType::StringContainsMatchRule: - return matcher.indexIn(requestUrl) != -1; + switch (m_matchType) { + case MatchType::RegExpMatchRule: + return m_regexp.match(requestUrl).hasMatch(); + case MatchType::StringEndsMatchRule: + return requestUrl.endsWith(m_pattern); + case MatchType::StringContainsMatchRule: + return m_matcher.indexIn(requestUrl) != -1; + case MatchType::MatchAllUrlsRule: + return true; default: return false; } diff --git a/lib/web/urlfilter/filterrule.h b/lib/web/urlfilter/filterrule.h index 46690b1..f20ab15 100644 --- a/lib/web/urlfilter/filterrule.h +++ b/lib/web/urlfilter/filterrule.h @@ -8,61 +8,63 @@ #include #include #include +#include class QUrl; class FilterRule { public: - FilterRule(const QJsonObject &filter); - ~FilterRule() = default; - - bool isValid() const; - bool process(QWebEngineUrlRequestInfo &info) const; - bool matchRequestUrl(const QString &requestUrl, const QWebEngineUrlRequestInfo::ResourceType type) const; - -private: - Q_DISABLE_COPY(FilterRule) - enum ActionType { Whitelist, Blacklist, Redirect, - SetHeader + SetHeader, + InvalidAction }; - enum RuleType { - CssRule = 0, // - DomainMatchRule = 1, // + enum MatchType { +// CssRule = 0, // +// DomainMatchRule = 1, // RegExpMatchRule = 2, // match request url with regexp StringEndsMatchRule = 3, // request url ends with string StringContainsMatchRule = 4, // request url contains string MatchAllUrlsRule = 5, // - Invalid = 6 + InvalidMatch = 6, }; - ActionType m_action; - RuleType m_type = RuleType::Invalid; + FilterRule(const QJsonObject &filter); + ~FilterRule() = default; - QHash m_options; + void setActionType(ActionType type); + void setMatchType(MatchType type, const QString &pattern = QString()); + void setRedirectUrl(const QUrl &url); + void addHeaderRule(const QByteArray &header, const QByteArray &value); + + bool isValid() const; + bool process(QWebEngineUrlRequestInfo &info) const; + bool matchRequestUrl(const QString &requestUrl, const QWebEngineUrlRequestInfo::ResourceType type) const; + +private: + Q_DISABLE_COPY(FilterRule) + + + ActionType m_actionType = ActionType::InvalidAction; + MatchType m_matchType = MatchType::InvalidMatch; + + QHash m_resourceTypeOptions; + QHash m_headers; // Parsed rule for string matching (CSS Selector for CSS rules) QString m_matchString; // Case sensitivity for string matching Qt::CaseSensitivity m_caseSensitivity = Qt::CaseInsensitive; - bool m_isException = false; - - // domains this rule is allowed or blocked on - QStringList m_allowedForDomains; - QStringList m_blockedForDomains; - QUrl m_redirectUrl; - - QRegularExpression regexp; - QStringMatcher matcher; - QString pattern; + QRegularExpression m_regexp; + QStringMatcher m_matcher; + QString m_pattern; }; -//bool isMatchingDomain(const QString &domain, const QString &filter); +void parseJson(std::unique_ptr &rule, const QJsonObject &filter); #endif // SMOLBOTE_FILTERRULE_H -- cgit v1.2.1