aboutsummaryrefslogtreecommitdiff
path: root/lib/web/urlfilter
diff options
context:
space:
mode:
authorAqua-sama <aqua@iserlohn-fortress.net>2018-07-18 10:07:51 +0200
committerAqua-sama <aqua@iserlohn-fortress.net>2018-07-18 10:07:51 +0200
commitf507a7a5557c7cca9570c684920e055c5251b08e (patch)
tree0fad83fc99053006b375608e2a2d04388776ad89 /lib/web/urlfilter
parentAdBlockTest: loading subscription (diff)
downloadsmolbote-f507a7a5557c7cca9570c684920e055c5251b08e.tar.xz
AdBlockRule: move matching logic to FilterRule
Diffstat (limited to 'lib/web/urlfilter')
-rw-r--r--lib/web/urlfilter/adblockrule.cpp68
-rw-r--r--lib/web/urlfilter/adblockrule.h35
-rw-r--r--lib/web/urlfilter/filterdomain.cpp68
-rw-r--r--lib/web/urlfilter/filterdomain.h37
-rw-r--r--lib/web/urlfilter/filterrule.cpp168
-rw-r--r--lib/web/urlfilter/filterrule.h78
6 files changed, 104 insertions, 350 deletions
diff --git a/lib/web/urlfilter/adblockrule.cpp b/lib/web/urlfilter/adblockrule.cpp
index b20ed8f..60262b7 100644
--- a/lib/web/urlfilter/adblockrule.cpp
+++ b/lib/web/urlfilter/adblockrule.cpp
@@ -1,26 +1,5 @@
#include "adblockrule.h"
-bool isMatchingDomain(const QString &domain, const QString &filter)
-{
- // domain and filter are the same
- if(domain == filter) {
- return true;
- }
-
- // domain can't be matched by filter if it doesn't end with filter
- // ex. example2.com isn't matched by example.com
- if(!domain.endsWith(filter)) {
- return false;
- }
-
- // match with subdomains
- // ex. subdomain.example.com is matched by example.com
- int index = domain.indexOf(filter);
-
- // match if (domain ends with filter) && (filter has been found) and (character before filter is '.')
- return index > 0 && domain[index - 1] == QLatin1Char('.');
-}
-
inline std::pair<QWebEngineUrlRequestInfo::ResourceType, bool> parseOption(const QString &option)
{
if(option.endsWith(QLatin1Literal("script"))) {
@@ -105,14 +84,14 @@ AdBlockRule::AdBlockRule(const QString &filter)
if(parsedLine.startsWith(QLatin1Literal("/")) && parsedLine.endsWith(QLatin1Literal("/"))) {
parsedLine = parsedLine.mid(1, parsedLine.length() - 2);
- matchType = RegularExpressionMatch;
+ urlMatchType = RegularExpressionMatch;
regexp.setPattern(parsedLine);
return;
}
// basic filter rules
if(parsedLine.startsWith(QLatin1Literal("|")) && parsedLine.endsWith(QLatin1Literal("|"))) {
- matchType = StringEquals;
+ urlMatchType = StringEquals;
match = parsedLine.mid(1, parsedLine.length() - 2);
return;
}
@@ -128,7 +107,7 @@ AdBlockRule::AdBlockRule(const QString &filter)
parsedLine.chop(1);
if(parsedLine.startsWith(QLatin1Literal("||")) && parsedLine.endsWith(QLatin1Literal("^"))) {
- matchType = DomainMatch;
+ urlMatchType = DomainMatch;
match = parsedLine.mid(2, parsedLine.length() - 3);
return;
}
@@ -137,49 +116,12 @@ AdBlockRule::AdBlockRule(const QString &filter)
// wildcard "*" - any number of characters
// separator "^" - end, ? or /
if(parsedLine.contains(QLatin1Literal("*")) || parsedLine.contains(QLatin1Literal("^"))) {
- matchType = RegularExpressionMatch;
+ urlMatchType = RegularExpressionMatch;
parsedLine.replace(QLatin1Literal("*"), QLatin1Literal(".*"));
parsedLine.replace(QLatin1Literal("^"), QLatin1Literal("($|\\?|\\/)"));
regexp.setPattern(parsedLine);
return;
}
- matcher.setPattern(parsedLine);
-}
-
-bool AdBlockRule::isEnabled() const
-{
- return m_isEnabled;
-}
-
-bool AdBlockRule::matchesType(QWebEngineUrlRequestInfo::ResourceType type) const
-{
- // no options have been specified -> match all resource types
- if(m_resourceTypeOptions.isEmpty())
- return true;
-
- // this resource type has not been specified -> reject it
- if(!m_resourceTypeOptions.contains(type))
- return false;
-
- // resource type has been specified; true to match, false to exception
- return m_resourceTypeOptions.value(type);
-}
-
-bool AdBlockRule::matchesUrl(const QUrl &url) const
-{
- switch (matchType) {
- case RegularExpressionMatch:
- if(regexp.match(url.toString()).hasMatch())
- return !m_isException;
-
- case StringEquals:
- return url.toString() == match;
-
- case DomainMatch:
- return isMatchingDomain(url.host(), match);
-
- default:
- return false;
- }
+ match = parsedLine;
}
diff --git a/lib/web/urlfilter/adblockrule.h b/lib/web/urlfilter/adblockrule.h
index 76484c3..aeabf4f 100644
--- a/lib/web/urlfilter/adblockrule.h
+++ b/lib/web/urlfilter/adblockrule.h
@@ -1,43 +1,18 @@
-#ifndef ADBLOCKRULE_H
-#define ADBLOCKRULE_H
+#ifndef SMOLBOTE_ADBLOCKRULE_H
+#define SMOLBOTE_ADBLOCKRULE_H
#include <QObject>
#include <QString>
#include <QRegularExpression>
#include <QUrl>
#include <QWebEngineUrlRequestInfo>
+#include "filterrule.h"
-class AdBlockRule
+class AdBlockRule : public FilterRule
{
public:
-
- enum MatchType {
- InvalidMatch,
- RegularExpressionMatch,
- StringContains,
- StringStartsWith,
- StringEndsWith,
- StringEquals,
- DomainMatch
- };
-
AdBlockRule(const QString &filter);
- bool isEnabled() const;
- bool matchesType(QWebEngineUrlRequestInfo::ResourceType type) const;
- bool matchesUrl(const QUrl &url) const;
-
-private:
- bool m_isEnabled = false;
- bool m_isException = false;
-
- MatchType matchType = InvalidMatch;
- QHash<QWebEngineUrlRequestInfo::ResourceType, bool> m_resourceTypeOptions;
- QStringList allowedDomains, blockedDomains;
-
- QString match;
- QRegularExpression regexp;
- QStringMatcher matcher;
};
-#endif // ADBLOCKRULE_H
+#endif // SMOLBOTE_ADBLOCKRULE_H
diff --git a/lib/web/urlfilter/filterdomain.cpp b/lib/web/urlfilter/filterdomain.cpp
deleted file mode 100644
index 99cc71c..0000000
--- a/lib/web/urlfilter/filterdomain.cpp
+++ /dev/null
@@ -1,68 +0,0 @@
-#include "filterdomain.h"
-#include <QVector>
-
-bool isMatchingDomain(const QString &domain, const QString &filter)
-{
- // domain and filter are the same
- if(domain == filter) {
- return true;
- }
-
- // domain can't be matched by filter if it doesn't end with filter
- // ex. example2.com isn't matched by example.com
- if(!domain.endsWith(filter)) {
- return false;
- }
-
- // match with subdomains
- // ex. subdomain.example.com is matched by example.com
- int index = domain.indexOf(filter);
-
- // match if (domain ends with filter) && (filter has been found) and (character before filter is '.')
- return index > 0 && domain[index - 1] == QLatin1Char('.');
-}
-
-FilterDomain::FilterDomain(QObject *parent)
- : QObject(parent)
-{
-}
-
-void FilterDomain::addDomain(const QString &domain, bool isException)
-{
- if(domain.isEmpty())
- return;
-
- if(!isException)
- m_allowedOnDomains.append(domain);
- else
- m_blockedOnDomains.append(domain);
-}
-
-void FilterDomain::addRule(FilterRule_ptr &rule)
-{
- Q_ASSERT(rule);
- m_rules.emplace_back(std::move(rule));
-}
-
-bool FilterDomain::hasMatch(const QString &host) const
-{
- for(const auto &domain : qAsConst(m_blockedOnDomains)) {
- if(isMatchingDomain(host, domain))
- return false;
- }
-
- for(const auto &domain : qAsConst(m_allowedOnDomains)) {
- if(isMatchingDomain(host, domain))
- return true;
- }
-
- return false;
-}
-
-void FilterDomain::process(QWebEngineUrlRequestInfo &info) const
-{
- for(const FilterRule_ptr &rule : m_rules) {
- if(rule->matchRequestUrl(info.requestUrl().toString(), info.resourceType()))
- rule->process(info);
- }
-}
diff --git a/lib/web/urlfilter/filterdomain.h b/lib/web/urlfilter/filterdomain.h
deleted file mode 100644
index b356a32..0000000
--- a/lib/web/urlfilter/filterdomain.h
+++ /dev/null
@@ -1,37 +0,0 @@
-#ifndef FILTERDOMAIN_H
-#define FILTERDOMAIN_H
-
-#include <QObject>
-#include <QVector>
-#include <memory>
-#include "filterrule.h"
-
-typedef std::unique_ptr<FilterRule> FilterRule_ptr;
-typedef QVector<QString> QStringVector;
-
-class FilterDomain : public QObject
-{
- Q_OBJECT
-public:
- explicit FilterDomain(QObject *parent = nullptr);
-
- void addDomain(const QString &domain, bool isException = false);
- void addRule(FilterRule_ptr &rule);
-
- bool hasMatch(const QString &host) const;
- void process(QWebEngineUrlRequestInfo &info) const;
-
-private:
- Q_DISABLE_COPY(FilterDomain)
-
- // lists of domains this rule group? is allowed on and blocked on
- QStringVector m_allowedOnDomains;
- QStringVector m_blockedOnDomains;
-
- std::vector<FilterRule_ptr> m_rules;
-};
-
-// function taken from KDE/Falkon
-bool isMatchingDomain(const QString &domain, const QString &filter);
-
-#endif // FILTERDOMAIN_H
diff --git a/lib/web/urlfilter/filterrule.cpp b/lib/web/urlfilter/filterrule.cpp
index 73b357b..ba2181f 100644
--- a/lib/web/urlfilter/filterrule.cpp
+++ b/lib/web/urlfilter/filterrule.cpp
@@ -1,139 +1,89 @@
#include "filterrule.h"
-#include <QWebEngineUrlRequestInfo>
-#include <QJsonArray>
-void parseJson(std::unique_ptr<FilterRule> &rule, const QJsonObject &filter)
+inline bool isMatchingDomain(const QString &domain, const QString &filter)
{
- // set action
- {
- if(!filter.value("whitelist").isUndefined()) {
- rule->setActionType(FilterRule::Whitelist);
-
- } else if(!filter.value("blacklist").isUndefined()) {
- rule->setActionType(FilterRule::Blacklist);
-
- } else if(!filter.value("redirect").isUndefined()) {
- rule->setActionType(FilterRule::Redirect);
- rule->setRedirectUrl(QUrl::fromUserInput(filter.value("redirect").toString()));
-
- } else if(!filter.value("setheader").isUndefined()) {
- rule->setActionType(FilterRule::SetHeader);
- for(const QJsonValue v : filter.value("setheader").toArray()) {
- QStringList h = v.toString().split(':');
- rule->addHeaderRule(h.at(0).toLatin1(), h.at(1).toLatin1());
- }
- }
-
+ // domain and filter are the same
+ if(domain == filter) {
+ return true;
}
- // set match type
- {
- const QJsonValue regexpValue = filter.value("regexp");
- const QJsonValue endswithValue = filter.value("endswith");
- const QJsonValue containsValue = filter.value("contains");
-
- if(!regexpValue.isUndefined()) {
- rule->setMatchType(FilterRule::RegExpMatchRule, regexpValue.toString());
-
- } else if(!endswithValue.isUndefined()) {
- rule->setMatchType(FilterRule::StringEndsMatchRule, endswithValue.toString());
-
- } else if(!containsValue.isUndefined()) {
- rule->setMatchType(FilterRule::StringContainsMatchRule, containsValue.toString());
-
- } else
- rule->setMatchType(FilterRule::MatchAllUrlsRule);
+ // domain can't be matched by filter if it doesn't end with filter
+ // ex. example2.com isn't matched by example.com
+ if(!domain.endsWith(filter)) {
+ return false;
}
-}
+ // match with subdomains
+ // ex. subdomain.example.com is matched by example.com
+ int index = domain.indexOf(filter);
-FilterRule::FilterRule(const QJsonObject &filter)
-{
- m_matcher.setCaseSensitivity(Qt::CaseInsensitive);
+ // match if (domain ends with filter) && (filter has been found) and (character before filter is '.')
+ return index > 0 && domain[index - 1] == QLatin1Char('.');
}
-void FilterRule::setActionType(ActionType type)
+bool FilterRule::isEnabled() const
{
- m_actionType = type;
+ return m_isEnabled;
}
-void FilterRule::setMatchType(MatchType type, const QString &pattern)
+bool FilterRule::matchesDomain(const QString &domain) const
{
- m_matchType = type;
- switch (type) {
- case RegExpMatchRule:
- m_regexp.setPattern(pattern);
- break;
- case StringEndsMatchRule:
- m_pattern = pattern;
- break;
- case StringContainsMatchRule:
- m_matcher.setPattern(pattern);
- default:
- break;
+ // no domains have been allowed or blocked -> allow on all domains
+ if(allowedDomains.isEmpty() && blockedDomains.isEmpty())
+ return true;
+
+ if(!blockedDomains.isEmpty()) {
+ // do not match rule if the domain has been blocked
+ if(blockedDomains.contains(domain))
+ return false;
}
-}
-void FilterRule::setRedirectUrl(const QUrl &url)
-{
- m_redirectUrl = url;
-}
+ if(!allowedDomains.isEmpty()) {
+ if(allowedDomains.contains(domain))
+ return true;
+ }
-void FilterRule::addHeaderRule(const QByteArray &header, const QByteArray &value)
-{
- m_headers.insert(header, value);
+ return false;
}
-bool FilterRule::isValid() const
+bool FilterRule::matchesType(QWebEngineUrlRequestInfo::ResourceType type) const
{
- return (m_matchType != MatchType::InvalidMatch) && (m_actionType != ActionType::InvalidAction);
+ // no options have been specified -> match all resource types
+ if(m_resourceTypeOptions.isEmpty())
+ return true;
+
+ // this resource type has not been specified -> reject it
+ if(!m_resourceTypeOptions.contains(type))
+ return false;
+
+ // resource type has been specified; true to match, false to exception
+ return m_resourceTypeOptions.value(type);
}
-bool FilterRule::process(QWebEngineUrlRequestInfo &info) const
+bool FilterRule::matchesUrl(const QUrl &url) const
{
- Q_ASSERT(m_actionType != ActionType::InvalidAction);
+ switch (urlMatchType) {
+ case InvalidMatch:
+ return false;
- if(matchRequestUrl(info.requestUrl().toString(), info.resourceType())) {
- switch (m_actionType) {
- case ActionType::Whitelist:
- info.block(false);
- return true;
- case ActionType::Blacklist:
- info.block(true);
- return true;
- case ActionType::Redirect:
- info.redirect(m_redirectUrl);
- return true;
- case ActionType::SetHeader:
- for(auto it = m_headers.constBegin(); it != m_headers.constEnd(); ++it) {
- info.setHttpHeader(it.key(), it.value());
- }
- return true;
- case ActionType::InvalidAction:
- break;
- }
- }
+ case RegularExpressionMatch:
+ if(regexp.match(url.toString()).hasMatch())
+ return !m_isException;
- return false;
-}
+ case StringContains:
+ return url.toString().contains(match);
-bool FilterRule::matchRequestUrl(const QString &requestUrl, const QWebEngineUrlRequestInfo::ResourceType type) const
-{
- Q_ASSERT(m_matchType != MatchType::InvalidMatch);
+ case StringStartsWith:
+ return url.toString().startsWith(match);
- if(!m_resourceTypeOptions.isEmpty() && !m_resourceTypeOptions.contains(type))
- return false;
+ case StringEndsWith:
+ return url.toString().endsWith(match);
+
+ case StringEquals:
+ return url.toString() == match;
+
+ case DomainMatch:
+ return isMatchingDomain(url.host(), match);
- switch (m_matchType) {
- case MatchType::RegExpMatchRule:
- return m_regexp.match(requestUrl).hasMatch();
- case MatchType::StringEndsMatchRule:
- return requestUrl.endsWith(m_pattern);
- case MatchType::StringContainsMatchRule:
- return m_matcher.indexIn(requestUrl) != -1;
- case MatchType::MatchAllUrlsRule:
- return true;
- default:
- return false;
}
}
diff --git a/lib/web/urlfilter/filterrule.h b/lib/web/urlfilter/filterrule.h
index f20ab15..8a622fe 100644
--- a/lib/web/urlfilter/filterrule.h
+++ b/lib/web/urlfilter/filterrule.h
@@ -1,70 +1,62 @@
#ifndef SMOLBOTE_FILTERRULE_H
#define SMOLBOTE_FILTERRULE_H
-#include <QObject>
#include <QRegularExpression>
#include <QStringList>
#include <QStringMatcher>
-#include <QJsonObject>
#include <QUrl>
#include <QWebEngineUrlRequestInfo>
#include <memory>
-class QUrl;
class FilterRule
{
public:
- enum ActionType {
- Whitelist,
- Blacklist,
- Redirect,
- SetHeader,
- InvalidAction
+ enum UrlMatchType {
+ InvalidMatch,
+ RegularExpressionMatch,
+ StringContains,
+ StringStartsWith,
+ StringEndsWith,
+ StringEquals,
+ DomainMatch
};
- enum MatchType {
-// CssRule = 0, //
-// DomainMatchRule = 1, //
- RegExpMatchRule = 2, // match request url with regexp
- StringEndsMatchRule = 3, // request url ends with string
- StringContainsMatchRule = 4, // request url contains string
- MatchAllUrlsRule = 5, //
- InvalidMatch = 6,
- };
-
- FilterRule(const QJsonObject &filter);
- ~FilterRule() = default;
+ FilterRule() = default;
- void setActionType(ActionType type);
- void setMatchType(MatchType type, const QString &pattern = QString());
- void setRedirectUrl(const QUrl &url);
- void addHeaderRule(const QByteArray &header, const QByteArray &value);
+ bool isEnabled() const;
- bool isValid() const;
- bool process(QWebEngineUrlRequestInfo &info) const;
- bool matchRequestUrl(const QString &requestUrl, const QWebEngineUrlRequestInfo::ResourceType type) const;
+ /**
+ * @brief matchesDomain
+ * @param domain
+ * @return
+ */
+ bool matchesDomain(const QString &domain) const;
-private:
- Q_DISABLE_COPY(FilterRule)
+ /**
+ * @brief matchesType
+ * @param type
+ * @return true if type matches, false otherwise
+ */
+ bool matchesType(QWebEngineUrlRequestInfo::ResourceType type) const;
+ /**
+ * @brief matchesUrl
+ * @param url
+ * @return
+ */
+ bool matchesUrl(const QUrl &url) const;
- ActionType m_actionType = ActionType::InvalidAction;
- MatchType m_matchType = MatchType::InvalidMatch;
+protected:
+ bool m_isEnabled = false;
+ bool m_isException = false;
+ UrlMatchType urlMatchType = InvalidMatch;
QHash<QWebEngineUrlRequestInfo::ResourceType, bool> m_resourceTypeOptions;
- QHash<QByteArray, QByteArray> m_headers;
+ QStringList allowedDomains, blockedDomains;
- // Parsed rule for string matching (CSS Selector for CSS rules)
- QString m_matchString;
- // Case sensitivity for string matching
- Qt::CaseSensitivity m_caseSensitivity = Qt::CaseInsensitive;
+ QString match;
+ QRegularExpression regexp;
- QUrl m_redirectUrl;
- QRegularExpression m_regexp;
- QStringMatcher m_matcher;
- QString m_pattern;
};
-void parseJson(std::unique_ptr<FilterRule> &rule, const QJsonObject &filter);
-
#endif // SMOLBOTE_FILTERRULE_H