From 0e4ee2ed1c64130262e42ddfc87f2d8944c5a10c Mon Sep 17 00:00:00 2001 From: Aqua-sama Date: Sat, 7 Jul 2018 19:45:45 +0200 Subject: Integrate urlfilter with urlrequestinterceptor --- doc/UrlFilter.md | 10 ++- lib/configuration/configuration.cpp | 1 + lib/web/urlfilter/filterdomain.cpp | 48 ++++++------ lib/web/urlfilter/filterdomain.h | 28 ++++--- lib/web/urlfilter/filterrule.cpp | 145 ++++++++++++++++++++++++------------ lib/web/urlfilter/filterrule.h | 60 +++++++-------- src/webengine/urlinterceptor.cpp | 40 ++++++++-- src/webengine/urlinterceptor.h | 4 + test/urlfilter/urlfiltertest.cpp | 7 -- 9 files changed, 218 insertions(+), 125 deletions(-) diff --git a/doc/UrlFilter.md b/doc/UrlFilter.md index cea2397..b521228 100644 --- a/doc/UrlFilter.md +++ b/doc/UrlFilter.md @@ -19,13 +19,17 @@ There are 4 Filter Domain types: ## FilterRule Filter rules contain information on how a request should be modified. -### "action" +### Action - Whitelist - allow this request - Blacklist - block this request - Redirect - redirect this request - SetHeader - apply a list of headers -### "regexp", "contains", "endswith" +### Match Type +- "regexp" +- "contains" +- "endswith" +- if none is specified, all URLs are matched ### JSON Allow all URLs that contain "waifu.png" @@ -56,7 +60,7 @@ Redirect URLs containing "ads/annoying-spam.gif" to "waifu.tld/waifu.gif" Set some headers { "action" : "SetHeader" - "header" : [ "DNT" : "1" ] + "headers" : [ "DNT:1" ] } ## QWebEngineUrlRequestInterceptor diff --git a/lib/configuration/configuration.cpp b/lib/configuration/configuration.cpp index 70be7b1..d31b4f2 100644 --- a/lib/configuration/configuration.cpp +++ b/lib/configuration/configuration.cpp @@ -96,6 +96,7 @@ Configuration::Configuration(QObject *parent) // Filter settings ("filter.path", po::value()->default_value(filter_path)) + ("filter.json-path", po::value()->default_value("~/.config/smolbote/filter.json")) ("filter.header", po::value>()) // ("filter.cookies.block.all", po::value()->default_value(false)) // ("filter.cookies.block.thirdParty", po::value()->default_value(true)) diff --git a/lib/web/urlfilter/filterdomain.cpp b/lib/web/urlfilter/filterdomain.cpp index 53bc7db..99cc71c 100644 --- a/lib/web/urlfilter/filterdomain.cpp +++ b/lib/web/urlfilter/filterdomain.cpp @@ -22,41 +22,47 @@ bool isMatchingDomain(const QString &domain, const QString &filter) return index > 0 && domain[index - 1] == QLatin1Char('.'); } -FilterDomain::FilterDomain(MatchType type, QObject *parent) +FilterDomain::FilterDomain(QObject *parent) : QObject(parent) { - setType(type); } -void FilterDomain::setType(MatchType type) +void FilterDomain::addDomain(const QString &domain, bool isException) { - m_type = type; + if(domain.isEmpty()) + return; + + if(!isException) + m_allowedOnDomains.append(domain); + else + m_blockedOnDomains.append(domain); } -void FilterDomain::addDomain(const QString &domain) +void FilterDomain::addRule(FilterRule_ptr &rule) { - if(!domain.isEmpty()) - m_domains.append(domain); + Q_ASSERT(rule); + m_rules.emplace_back(std::move(rule)); } bool FilterDomain::hasMatch(const QString &host) const { - // match all domains -> this rule applies to all domains - if(m_type == WhitelistAll) - return true; - - // match no domains -> this rule applies to no domains - if(m_type == BlacklistAll) - return false; - - // is this a whitelist or blacklist domain rule? - // should it apply to added domains or not - const bool whitelist = (m_type == Whitelist); + for(const auto &domain : qAsConst(m_blockedOnDomains)) { + if(isMatchingDomain(host, domain)) + return false; + } - for(const auto &domain : qAsConst(m_domains)) { + for(const auto &domain : qAsConst(m_allowedOnDomains)) { if(isMatchingDomain(host, domain)) - return whitelist; + return true; } - return !whitelist; + return false; +} + +void FilterDomain::process(QWebEngineUrlRequestInfo &info) const +{ + for(const FilterRule_ptr &rule : m_rules) { + if(rule->matchRequestUrl(info.requestUrl().toString(), info.resourceType())) + rule->process(info); + } } diff --git a/lib/web/urlfilter/filterdomain.h b/lib/web/urlfilter/filterdomain.h index 2173bfc..b356a32 100644 --- a/lib/web/urlfilter/filterdomain.h +++ b/lib/web/urlfilter/filterdomain.h @@ -3,28 +3,32 @@ #include #include +#include +#include "filterrule.h" + +typedef std::unique_ptr FilterRule_ptr; +typedef QVector QStringVector; class FilterDomain : public QObject { Q_OBJECT public: - enum MatchType { - Whitelist, // only match added domains - Blacklist, // only match domains not added - WhitelistAll, // match all domains - BlacklistAll // match no domains - }; - - explicit FilterDomain(MatchType type = Whitelist, QObject *parent = nullptr); + explicit FilterDomain(QObject *parent = nullptr); - void setType(MatchType type); - void addDomain(const QString &domain); + void addDomain(const QString &domain, bool isException = false); + void addRule(FilterRule_ptr &rule); bool hasMatch(const QString &host) const; + void process(QWebEngineUrlRequestInfo &info) const; private: - MatchType m_type; - QVector m_domains; + Q_DISABLE_COPY(FilterDomain) + + // lists of domains this rule group? is allowed on and blocked on + QStringVector m_allowedOnDomains; + QStringVector m_blockedOnDomains; + + std::vector m_rules; }; // function taken from KDE/Falkon diff --git a/lib/web/urlfilter/filterrule.cpp b/lib/web/urlfilter/filterrule.cpp index 5a9310e..73b357b 100644 --- a/lib/web/urlfilter/filterrule.cpp +++ b/lib/web/urlfilter/filterrule.cpp @@ -2,57 +2,99 @@ #include #include +void parseJson(std::unique_ptr &rule, const QJsonObject &filter) +{ + // set action + { + if(!filter.value("whitelist").isUndefined()) { + rule->setActionType(FilterRule::Whitelist); + + } else if(!filter.value("blacklist").isUndefined()) { + rule->setActionType(FilterRule::Blacklist); + + } else if(!filter.value("redirect").isUndefined()) { + rule->setActionType(FilterRule::Redirect); + rule->setRedirectUrl(QUrl::fromUserInput(filter.value("redirect").toString())); + + } else if(!filter.value("setheader").isUndefined()) { + rule->setActionType(FilterRule::SetHeader); + for(const QJsonValue v : filter.value("setheader").toArray()) { + QStringList h = v.toString().split(':'); + rule->addHeaderRule(h.at(0).toLatin1(), h.at(1).toLatin1()); + } + } + + } + + // set match type + { + const QJsonValue regexpValue = filter.value("regexp"); + const QJsonValue endswithValue = filter.value("endswith"); + const QJsonValue containsValue = filter.value("contains"); + + if(!regexpValue.isUndefined()) { + rule->setMatchType(FilterRule::RegExpMatchRule, regexpValue.toString()); + + } else if(!endswithValue.isUndefined()) { + rule->setMatchType(FilterRule::StringEndsMatchRule, endswithValue.toString()); + + } else if(!containsValue.isUndefined()) { + rule->setMatchType(FilterRule::StringContainsMatchRule, containsValue.toString()); + + } else + rule->setMatchType(FilterRule::MatchAllUrlsRule); + } + +} + FilterRule::FilterRule(const QJsonObject &filter) { - const QString action = filter.value("action").toString(); - - // there is no action specified => this rule is invalid - if(action.isEmpty()) - return; - - if(action == "Whitelist") - m_action = ActionType::Whitelist; - else if (action == "Blacklist") - m_action = ActionType::Blacklist; - else if (action == "Redirect") { - m_action = ActionType::Redirect; - m_redirectUrl = QUrl::fromUserInput(filter.value("url").toString()); - } else if (action == "SetHeader") - m_action = ActionType::SetHeader; - else // invalid action - return; - - QJsonValue regexp = filter.value("regexp"); - QJsonValue endswith = filter.value("endswith"); - QJsonValue contains = filter.value("contains"); - - if(!regexp.isUndefined()) { - m_type = RuleType::RegExpMatchRule; - this->regexp.setPattern(regexp.toString()); - } else if(!endswith.isUndefined()) { - m_type = RuleType::StringEndsMatchRule; - pattern = endswith.toString(); - } else if(!contains.isUndefined()) { - m_type = RuleType::StringContainsMatchRule; - this->matcher.setPattern(contains.toString()); - this->matcher.setCaseSensitivity(Qt::CaseInsensitive); - } else // invalid rule - return; - - m_options.insert(QWebEngineUrlRequestInfo::ResourceTypeImage, true); + m_matcher.setCaseSensitivity(Qt::CaseInsensitive); +} + +void FilterRule::setActionType(ActionType type) +{ + m_actionType = type; +} + +void FilterRule::setMatchType(MatchType type, const QString &pattern) +{ + m_matchType = type; + switch (type) { + case RegExpMatchRule: + m_regexp.setPattern(pattern); + break; + case StringEndsMatchRule: + m_pattern = pattern; + break; + case StringContainsMatchRule: + m_matcher.setPattern(pattern); + default: + break; + } +} + +void FilterRule::setRedirectUrl(const QUrl &url) +{ + m_redirectUrl = url; +} + +void FilterRule::addHeaderRule(const QByteArray &header, const QByteArray &value) +{ + m_headers.insert(header, value); } bool FilterRule::isValid() const { - return m_type != RuleType::Invalid; + return (m_matchType != MatchType::InvalidMatch) && (m_actionType != ActionType::InvalidAction); } bool FilterRule::process(QWebEngineUrlRequestInfo &info) const { - Q_ASSERT(m_type != RuleType::Invalid); + Q_ASSERT(m_actionType != ActionType::InvalidAction); if(matchRequestUrl(info.requestUrl().toString(), info.resourceType())) { - switch (m_action) { + switch (m_actionType) { case ActionType::Whitelist: info.block(false); return true; @@ -63,6 +105,11 @@ bool FilterRule::process(QWebEngineUrlRequestInfo &info) const info.redirect(m_redirectUrl); return true; case ActionType::SetHeader: + for(auto it = m_headers.constBegin(); it != m_headers.constEnd(); ++it) { + info.setHttpHeader(it.key(), it.value()); + } + return true; + case ActionType::InvalidAction: break; } } @@ -72,16 +119,20 @@ bool FilterRule::process(QWebEngineUrlRequestInfo &info) const bool FilterRule::matchRequestUrl(const QString &requestUrl, const QWebEngineUrlRequestInfo::ResourceType type) const { - if(!m_options.contains(type)) + Q_ASSERT(m_matchType != MatchType::InvalidMatch); + + if(!m_resourceTypeOptions.isEmpty() && !m_resourceTypeOptions.contains(type)) return false; - switch (m_type) { - case RuleType::RegExpMatchRule: - return regexp.match(requestUrl).hasMatch(); - case RuleType::StringEndsMatchRule: - return requestUrl.endsWith(pattern); - case RuleType::StringContainsMatchRule: - return matcher.indexIn(requestUrl) != -1; + switch (m_matchType) { + case MatchType::RegExpMatchRule: + return m_regexp.match(requestUrl).hasMatch(); + case MatchType::StringEndsMatchRule: + return requestUrl.endsWith(m_pattern); + case MatchType::StringContainsMatchRule: + return m_matcher.indexIn(requestUrl) != -1; + case MatchType::MatchAllUrlsRule: + return true; default: return false; } diff --git a/lib/web/urlfilter/filterrule.h b/lib/web/urlfilter/filterrule.h index 46690b1..f20ab15 100644 --- a/lib/web/urlfilter/filterrule.h +++ b/lib/web/urlfilter/filterrule.h @@ -8,61 +8,63 @@ #include #include #include +#include class QUrl; class FilterRule { public: - FilterRule(const QJsonObject &filter); - ~FilterRule() = default; - - bool isValid() const; - bool process(QWebEngineUrlRequestInfo &info) const; - bool matchRequestUrl(const QString &requestUrl, const QWebEngineUrlRequestInfo::ResourceType type) const; - -private: - Q_DISABLE_COPY(FilterRule) - enum ActionType { Whitelist, Blacklist, Redirect, - SetHeader + SetHeader, + InvalidAction }; - enum RuleType { - CssRule = 0, // - DomainMatchRule = 1, // + enum MatchType { +// CssRule = 0, // +// DomainMatchRule = 1, // RegExpMatchRule = 2, // match request url with regexp StringEndsMatchRule = 3, // request url ends with string StringContainsMatchRule = 4, // request url contains string MatchAllUrlsRule = 5, // - Invalid = 6 + InvalidMatch = 6, }; - ActionType m_action; - RuleType m_type = RuleType::Invalid; + FilterRule(const QJsonObject &filter); + ~FilterRule() = default; - QHash m_options; + void setActionType(ActionType type); + void setMatchType(MatchType type, const QString &pattern = QString()); + void setRedirectUrl(const QUrl &url); + void addHeaderRule(const QByteArray &header, const QByteArray &value); + + bool isValid() const; + bool process(QWebEngineUrlRequestInfo &info) const; + bool matchRequestUrl(const QString &requestUrl, const QWebEngineUrlRequestInfo::ResourceType type) const; + +private: + Q_DISABLE_COPY(FilterRule) + + + ActionType m_actionType = ActionType::InvalidAction; + MatchType m_matchType = MatchType::InvalidMatch; + + QHash m_resourceTypeOptions; + QHash m_headers; // Parsed rule for string matching (CSS Selector for CSS rules) QString m_matchString; // Case sensitivity for string matching Qt::CaseSensitivity m_caseSensitivity = Qt::CaseInsensitive; - bool m_isException = false; - - // domains this rule is allowed or blocked on - QStringList m_allowedForDomains; - QStringList m_blockedForDomains; - QUrl m_redirectUrl; - - QRegularExpression regexp; - QStringMatcher matcher; - QString pattern; + QRegularExpression m_regexp; + QStringMatcher m_matcher; + QString m_pattern; }; -//bool isMatchingDomain(const QString &domain, const QString &filter); +void parseJson(std::unique_ptr &rule, const QJsonObject &filter); #endif // SMOLBOTE_FILTERRULE_H diff --git a/src/webengine/urlinterceptor.cpp b/src/webengine/urlinterceptor.cpp index 4e1b2f1..1b44c47 100644 --- a/src/webengine/urlinterceptor.cpp +++ b/src/webengine/urlinterceptor.cpp @@ -11,6 +11,8 @@ #include #include #include +#include +#include UrlRequestInterceptor::UrlRequestInterceptor(const std::unique_ptr &config, QObject *parent) : QWebEngineUrlRequestInterceptor(parent) @@ -36,8 +38,29 @@ UrlRequestInterceptor::UrlRequestInterceptor(const std::unique_ptrvalue("filter.json-path").value()); + if(rules.open(QIODevice::ReadOnly | QIODevice::Text)) { + auto doc = QJsonDocument::fromJson(rules.readAll()).object(); + + Q_ASSERT(doc.value("domains").isArray()); + for(QJsonValue d : doc.value("domains").toArray()) { + domain.addDomain(d.toString()); + } + + Q_ASSERT(doc.value("rules").isArray()); + for(QJsonValue rule : doc.value("rules").toArray()) { + auto p = std::make_unique(rule.toObject()); + parseJson(p, rule.toObject()); + domain.addRule(p); + } + + rules.close(); + } + } +// test DNT on https://browserleaks.com/donottrack void UrlRequestInterceptor::interceptRequest(QWebEngineUrlRequestInfo &info) { for(const Header &header : m_headers) { @@ -46,15 +69,20 @@ void UrlRequestInterceptor::interceptRequest(QWebEngineUrlRequestInfo &info) if(rules.contains(info.requestUrl().host())) { info.block(rules.value(info.requestUrl().host()).isBlocking); + return; + } + + if(domain.hasMatch(info.requestUrl().host())) { + domain.process(info); } #ifdef QT_DEBUG - qDebug("request>>>"); - qDebug("firstParty url=%s", qUtf8Printable(info.firstPartyUrl().toString())); - qDebug("firstParty host=%s", qUtf8Printable(info.firstPartyUrl().host())); - qDebug("request url=%s", qUtf8Printable(info.requestUrl().toString())); - qDebug("request host=%s", qUtf8Printable(info.requestUrl().host())); - qDebug("<<<"); +// qDebug("request>>>"); +// qDebug("firstParty url=%s", qUtf8Printable(info.firstPartyUrl().toString())); +// qDebug("firstParty host=%s", qUtf8Printable(info.firstPartyUrl().host())); +// qDebug("request url=%s", qUtf8Printable(info.requestUrl().toString())); +// qDebug("request host=%s", qUtf8Printable(info.requestUrl().host())); +// qDebug("<<<"); #endif } diff --git a/src/webengine/urlinterceptor.h b/src/webengine/urlinterceptor.h index 2f91e30..06a4b97 100644 --- a/src/webengine/urlinterceptor.h +++ b/src/webengine/urlinterceptor.h @@ -14,6 +14,8 @@ #include #include +#include "web/urlfilter/filterdomain.h" + typedef std::pair Header; class Configuration; @@ -33,6 +35,8 @@ public: private: QHash rules; std::vector
m_headers; + + FilterDomain domain; }; QHash parse(const QString &filename); diff --git a/test/urlfilter/urlfiltertest.cpp b/test/urlfilter/urlfiltertest.cpp index f7ae0fb..eb12421 100644 --- a/test/urlfilter/urlfiltertest.cpp +++ b/test/urlfilter/urlfiltertest.cpp @@ -47,14 +47,7 @@ void UrlFilterTest::matchingDomain() QFETCH(QString, domain); QFETCH(bool, result); - filterDomain.setType(FilterDomain::Whitelist); QCOMPARE(filterDomain.hasMatch(domain), result); - filterDomain.setType(FilterDomain::Blacklist); - QCOMPARE(filterDomain.hasMatch(domain), !result); - filterDomain.setType(FilterDomain::WhitelistAll); - QCOMPARE(filterDomain.hasMatch(domain), true); - filterDomain.setType(FilterDomain::BlacklistAll); - QCOMPARE(filterDomain.hasMatch(domain), false); } void UrlFilterTest::matchingType_data() -- cgit v1.2.1