From aaed4ebc642d95dfb3cddab4fad196e870077bcf Mon Sep 17 00:00:00 2001 From: Aqua-sama Date: Thu, 5 Jul 2018 19:37:05 +0200 Subject: Add web/urlfilter --- lib/web/CMakeLists.txt | 10 ++--- lib/web/urlfilter/filterdomain.cpp | 62 +++++++++++++++++++++++++++ lib/web/urlfilter/filterdomain.h | 33 ++++++++++++++ lib/web/urlfilter/filterrule.cpp | 88 ++++++++++++++++++++++++++++++++++++++ lib/web/urlfilter/filterrule.h | 68 +++++++++++++++++++++++++++++ 5 files changed, 256 insertions(+), 5 deletions(-) create mode 100644 lib/web/urlfilter/filterdomain.cpp create mode 100644 lib/web/urlfilter/filterdomain.h create mode 100644 lib/web/urlfilter/filterrule.cpp create mode 100644 lib/web/urlfilter/filterrule.h (limited to 'lib/web') diff --git a/lib/web/CMakeLists.txt b/lib/web/CMakeLists.txt index c5d5eba..601c1d5 100644 --- a/lib/web/CMakeLists.txt +++ b/lib/web/CMakeLists.txt @@ -7,11 +7,11 @@ set(CMAKE_AUTOMOC ON) add_library(web webprofile.cpp webprofile.h -) -#target_include_directories(web -# PRIVATE ${Boost_INCLUDE_DIRS} -# PRIVATE .. -#) + urlfilter/filterdomain.cpp + urlfilter/filterdomain.h + urlfilter/filterrule.cpp + urlfilter/filterrule.h +) target_link_libraries(web Qt5::WebEngineWidgets) diff --git a/lib/web/urlfilter/filterdomain.cpp b/lib/web/urlfilter/filterdomain.cpp new file mode 100644 index 0000000..53bc7db --- /dev/null +++ b/lib/web/urlfilter/filterdomain.cpp @@ -0,0 +1,62 @@ +#include "filterdomain.h" +#include + +bool isMatchingDomain(const QString &domain, const QString &filter) +{ + // domain and filter are the same + if(domain == filter) { + return true; + } + + // domain can't be matched by filter if it doesn't end with filter + // ex. example2.com isn't matched by example.com + if(!domain.endsWith(filter)) { + return false; + } + + // match with subdomains + // ex. subdomain.example.com is matched by example.com + int index = domain.indexOf(filter); + + // match if (domain ends with filter) && (filter has been found) and (character before filter is '.') + return index > 0 && domain[index - 1] == QLatin1Char('.'); +} + +FilterDomain::FilterDomain(MatchType type, QObject *parent) + : QObject(parent) +{ + setType(type); +} + +void FilterDomain::setType(MatchType type) +{ + m_type = type; +} + +void FilterDomain::addDomain(const QString &domain) +{ + if(!domain.isEmpty()) + m_domains.append(domain); +} + +bool FilterDomain::hasMatch(const QString &host) const +{ + // match all domains -> this rule applies to all domains + if(m_type == WhitelistAll) + return true; + + // match no domains -> this rule applies to no domains + if(m_type == BlacklistAll) + return false; + + // is this a whitelist or blacklist domain rule? + // should it apply to added domains or not + const bool whitelist = (m_type == Whitelist); + + for(const auto &domain : qAsConst(m_domains)) { + if(isMatchingDomain(host, domain)) + return whitelist; + } + + return !whitelist; +} diff --git a/lib/web/urlfilter/filterdomain.h b/lib/web/urlfilter/filterdomain.h new file mode 100644 index 0000000..2173bfc --- /dev/null +++ b/lib/web/urlfilter/filterdomain.h @@ -0,0 +1,33 @@ +#ifndef FILTERDOMAIN_H +#define FILTERDOMAIN_H + +#include +#include + +class FilterDomain : public QObject +{ + Q_OBJECT +public: + enum MatchType { + Whitelist, // only match added domains + Blacklist, // only match domains not added + WhitelistAll, // match all domains + BlacklistAll // match no domains + }; + + explicit FilterDomain(MatchType type = Whitelist, QObject *parent = nullptr); + + void setType(MatchType type); + void addDomain(const QString &domain); + + bool hasMatch(const QString &host) const; + +private: + MatchType m_type; + QVector m_domains; +}; + +// function taken from KDE/Falkon +bool isMatchingDomain(const QString &domain, const QString &filter); + +#endif // FILTERDOMAIN_H diff --git a/lib/web/urlfilter/filterrule.cpp b/lib/web/urlfilter/filterrule.cpp new file mode 100644 index 0000000..5a9310e --- /dev/null +++ b/lib/web/urlfilter/filterrule.cpp @@ -0,0 +1,88 @@ +#include "filterrule.h" +#include +#include + +FilterRule::FilterRule(const QJsonObject &filter) +{ + const QString action = filter.value("action").toString(); + + // there is no action specified => this rule is invalid + if(action.isEmpty()) + return; + + if(action == "Whitelist") + m_action = ActionType::Whitelist; + else if (action == "Blacklist") + m_action = ActionType::Blacklist; + else if (action == "Redirect") { + m_action = ActionType::Redirect; + m_redirectUrl = QUrl::fromUserInput(filter.value("url").toString()); + } else if (action == "SetHeader") + m_action = ActionType::SetHeader; + else // invalid action + return; + + QJsonValue regexp = filter.value("regexp"); + QJsonValue endswith = filter.value("endswith"); + QJsonValue contains = filter.value("contains"); + + if(!regexp.isUndefined()) { + m_type = RuleType::RegExpMatchRule; + this->regexp.setPattern(regexp.toString()); + } else if(!endswith.isUndefined()) { + m_type = RuleType::StringEndsMatchRule; + pattern = endswith.toString(); + } else if(!contains.isUndefined()) { + m_type = RuleType::StringContainsMatchRule; + this->matcher.setPattern(contains.toString()); + this->matcher.setCaseSensitivity(Qt::CaseInsensitive); + } else // invalid rule + return; + + m_options.insert(QWebEngineUrlRequestInfo::ResourceTypeImage, true); +} + +bool FilterRule::isValid() const +{ + return m_type != RuleType::Invalid; +} + +bool FilterRule::process(QWebEngineUrlRequestInfo &info) const +{ + Q_ASSERT(m_type != RuleType::Invalid); + + if(matchRequestUrl(info.requestUrl().toString(), info.resourceType())) { + switch (m_action) { + case ActionType::Whitelist: + info.block(false); + return true; + case ActionType::Blacklist: + info.block(true); + return true; + case ActionType::Redirect: + info.redirect(m_redirectUrl); + return true; + case ActionType::SetHeader: + break; + } + } + + return false; +} + +bool FilterRule::matchRequestUrl(const QString &requestUrl, const QWebEngineUrlRequestInfo::ResourceType type) const +{ + if(!m_options.contains(type)) + return false; + + switch (m_type) { + case RuleType::RegExpMatchRule: + return regexp.match(requestUrl).hasMatch(); + case RuleType::StringEndsMatchRule: + return requestUrl.endsWith(pattern); + case RuleType::StringContainsMatchRule: + return matcher.indexIn(requestUrl) != -1; + default: + return false; + } +} diff --git a/lib/web/urlfilter/filterrule.h b/lib/web/urlfilter/filterrule.h new file mode 100644 index 0000000..46690b1 --- /dev/null +++ b/lib/web/urlfilter/filterrule.h @@ -0,0 +1,68 @@ +#ifndef SMOLBOTE_FILTERRULE_H +#define SMOLBOTE_FILTERRULE_H + +#include +#include +#include +#include +#include +#include +#include + +class QUrl; +class FilterRule +{ +public: + FilterRule(const QJsonObject &filter); + ~FilterRule() = default; + + bool isValid() const; + bool process(QWebEngineUrlRequestInfo &info) const; + bool matchRequestUrl(const QString &requestUrl, const QWebEngineUrlRequestInfo::ResourceType type) const; + +private: + Q_DISABLE_COPY(FilterRule) + + enum ActionType { + Whitelist, + Blacklist, + Redirect, + SetHeader + }; + + enum RuleType { + CssRule = 0, // + DomainMatchRule = 1, // + RegExpMatchRule = 2, // match request url with regexp + StringEndsMatchRule = 3, // request url ends with string + StringContainsMatchRule = 4, // request url contains string + MatchAllUrlsRule = 5, // + Invalid = 6 + }; + + ActionType m_action; + RuleType m_type = RuleType::Invalid; + + QHash m_options; + + // Parsed rule for string matching (CSS Selector for CSS rules) + QString m_matchString; + // Case sensitivity for string matching + Qt::CaseSensitivity m_caseSensitivity = Qt::CaseInsensitive; + + bool m_isException = false; + + // domains this rule is allowed or blocked on + QStringList m_allowedForDomains; + QStringList m_blockedForDomains; + + QUrl m_redirectUrl; + + QRegularExpression regexp; + QStringMatcher matcher; + QString pattern; +}; + +//bool isMatchingDomain(const QString &domain, const QString &filter); + +#endif // SMOLBOTE_FILTERRULE_H -- cgit v1.2.1