aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/UrlFilter.md10
-rw-r--r--lib/configuration/configuration.cpp1
-rw-r--r--lib/web/urlfilter/filterdomain.cpp48
-rw-r--r--lib/web/urlfilter/filterdomain.h28
-rw-r--r--lib/web/urlfilter/filterrule.cpp145
-rw-r--r--lib/web/urlfilter/filterrule.h60
-rw-r--r--src/webengine/urlinterceptor.cpp40
-rw-r--r--src/webengine/urlinterceptor.h4
-rw-r--r--test/urlfilter/urlfiltertest.cpp7
9 files changed, 218 insertions, 125 deletions
diff --git a/doc/UrlFilter.md b/doc/UrlFilter.md
index cea2397..b521228 100644
--- a/doc/UrlFilter.md
+++ b/doc/UrlFilter.md
@@ -19,13 +19,17 @@ There are 4 Filter Domain types:
## FilterRule
Filter rules contain information on how a request should be modified.
-### "action"
+### Action
- Whitelist - allow this request
- Blacklist - block this request
- Redirect - redirect this request
- SetHeader - apply a list of headers
-### "regexp", "contains", "endswith"
+### Match Type
+- "regexp"
+- "contains"
+- "endswith"
+- if none is specified, all URLs are matched
### JSON
Allow all URLs that contain "waifu.png"
@@ -56,7 +60,7 @@ Redirect URLs containing "ads/annoying-spam.gif" to "waifu.tld/waifu.gif"
Set some headers
{
"action" : "SetHeader"
- "header" : [ "DNT" : "1" ]
+ "headers" : [ "DNT:1" ]
}
## QWebEngineUrlRequestInterceptor
diff --git a/lib/configuration/configuration.cpp b/lib/configuration/configuration.cpp
index 70be7b1..d31b4f2 100644
--- a/lib/configuration/configuration.cpp
+++ b/lib/configuration/configuration.cpp
@@ -96,6 +96,7 @@ Configuration::Configuration(QObject *parent)
// Filter settings
("filter.path", po::value<std::string>()->default_value(filter_path))
+ ("filter.json-path", po::value<std::string>()->default_value("~/.config/smolbote/filter.json"))
("filter.header", po::value<std::vector<std::string>>())
// ("filter.cookies.block.all", po::value<bool>()->default_value(false))
// ("filter.cookies.block.thirdParty", po::value<bool>()->default_value(true))
diff --git a/lib/web/urlfilter/filterdomain.cpp b/lib/web/urlfilter/filterdomain.cpp
index 53bc7db..99cc71c 100644
--- a/lib/web/urlfilter/filterdomain.cpp
+++ b/lib/web/urlfilter/filterdomain.cpp
@@ -22,41 +22,47 @@ bool isMatchingDomain(const QString &domain, const QString &filter)
return index > 0 && domain[index - 1] == QLatin1Char('.');
}
-FilterDomain::FilterDomain(MatchType type, QObject *parent)
+FilterDomain::FilterDomain(QObject *parent)
: QObject(parent)
{
- setType(type);
}
-void FilterDomain::setType(MatchType type)
+void FilterDomain::addDomain(const QString &domain, bool isException)
{
- m_type = type;
+ if(domain.isEmpty())
+ return;
+
+ if(!isException)
+ m_allowedOnDomains.append(domain);
+ else
+ m_blockedOnDomains.append(domain);
}
-void FilterDomain::addDomain(const QString &domain)
+void FilterDomain::addRule(FilterRule_ptr &rule)
{
- if(!domain.isEmpty())
- m_domains.append(domain);
+ Q_ASSERT(rule);
+ m_rules.emplace_back(std::move(rule));
}
bool FilterDomain::hasMatch(const QString &host) const
{
- // match all domains -> this rule applies to all domains
- if(m_type == WhitelistAll)
- return true;
-
- // match no domains -> this rule applies to no domains
- if(m_type == BlacklistAll)
- return false;
-
- // is this a whitelist or blacklist domain rule?
- // should it apply to added domains or not
- const bool whitelist = (m_type == Whitelist);
+ for(const auto &domain : qAsConst(m_blockedOnDomains)) {
+ if(isMatchingDomain(host, domain))
+ return false;
+ }
- for(const auto &domain : qAsConst(m_domains)) {
+ for(const auto &domain : qAsConst(m_allowedOnDomains)) {
if(isMatchingDomain(host, domain))
- return whitelist;
+ return true;
}
- return !whitelist;
+ return false;
+}
+
+void FilterDomain::process(QWebEngineUrlRequestInfo &info) const
+{
+ for(const FilterRule_ptr &rule : m_rules) {
+ if(rule->matchRequestUrl(info.requestUrl().toString(), info.resourceType()))
+ rule->process(info);
+ }
}
diff --git a/lib/web/urlfilter/filterdomain.h b/lib/web/urlfilter/filterdomain.h
index 2173bfc..b356a32 100644
--- a/lib/web/urlfilter/filterdomain.h
+++ b/lib/web/urlfilter/filterdomain.h
@@ -3,28 +3,32 @@
#include <QObject>
#include <QVector>
+#include <memory>
+#include "filterrule.h"
+
+typedef std::unique_ptr<FilterRule> FilterRule_ptr;
+typedef QVector<QString> QStringVector;
class FilterDomain : public QObject
{
Q_OBJECT
public:
- enum MatchType {
- Whitelist, // only match added domains
- Blacklist, // only match domains not added
- WhitelistAll, // match all domains
- BlacklistAll // match no domains
- };
-
- explicit FilterDomain(MatchType type = Whitelist, QObject *parent = nullptr);
+ explicit FilterDomain(QObject *parent = nullptr);
- void setType(MatchType type);
- void addDomain(const QString &domain);
+ void addDomain(const QString &domain, bool isException = false);
+ void addRule(FilterRule_ptr &rule);
bool hasMatch(const QString &host) const;
+ void process(QWebEngineUrlRequestInfo &info) const;
private:
- MatchType m_type;
- QVector<QString> m_domains;
+ Q_DISABLE_COPY(FilterDomain)
+
+ // lists of domains this rule group? is allowed on and blocked on
+ QStringVector m_allowedOnDomains;
+ QStringVector m_blockedOnDomains;
+
+ std::vector<FilterRule_ptr> m_rules;
};
// function taken from KDE/Falkon
diff --git a/lib/web/urlfilter/filterrule.cpp b/lib/web/urlfilter/filterrule.cpp
index 5a9310e..73b357b 100644
--- a/lib/web/urlfilter/filterrule.cpp
+++ b/lib/web/urlfilter/filterrule.cpp
@@ -2,57 +2,99 @@
#include <QWebEngineUrlRequestInfo>
#include <QJsonArray>
+void parseJson(std::unique_ptr<FilterRule> &rule, const QJsonObject &filter)
+{
+ // set action
+ {
+ if(!filter.value("whitelist").isUndefined()) {
+ rule->setActionType(FilterRule::Whitelist);
+
+ } else if(!filter.value("blacklist").isUndefined()) {
+ rule->setActionType(FilterRule::Blacklist);
+
+ } else if(!filter.value("redirect").isUndefined()) {
+ rule->setActionType(FilterRule::Redirect);
+ rule->setRedirectUrl(QUrl::fromUserInput(filter.value("redirect").toString()));
+
+ } else if(!filter.value("setheader").isUndefined()) {
+ rule->setActionType(FilterRule::SetHeader);
+ for(const QJsonValue v : filter.value("setheader").toArray()) {
+ QStringList h = v.toString().split(':');
+ rule->addHeaderRule(h.at(0).toLatin1(), h.at(1).toLatin1());
+ }
+ }
+
+ }
+
+ // set match type
+ {
+ const QJsonValue regexpValue = filter.value("regexp");
+ const QJsonValue endswithValue = filter.value("endswith");
+ const QJsonValue containsValue = filter.value("contains");
+
+ if(!regexpValue.isUndefined()) {
+ rule->setMatchType(FilterRule::RegExpMatchRule, regexpValue.toString());
+
+ } else if(!endswithValue.isUndefined()) {
+ rule->setMatchType(FilterRule::StringEndsMatchRule, endswithValue.toString());
+
+ } else if(!containsValue.isUndefined()) {
+ rule->setMatchType(FilterRule::StringContainsMatchRule, containsValue.toString());
+
+ } else
+ rule->setMatchType(FilterRule::MatchAllUrlsRule);
+ }
+
+}
+
FilterRule::FilterRule(const QJsonObject &filter)
{
- const QString action = filter.value("action").toString();
-
- // there is no action specified => this rule is invalid
- if(action.isEmpty())
- return;
-
- if(action == "Whitelist")
- m_action = ActionType::Whitelist;
- else if (action == "Blacklist")
- m_action = ActionType::Blacklist;
- else if (action == "Redirect") {
- m_action = ActionType::Redirect;
- m_redirectUrl = QUrl::fromUserInput(filter.value("url").toString());
- } else if (action == "SetHeader")
- m_action = ActionType::SetHeader;
- else // invalid action
- return;
-
- QJsonValue regexp = filter.value("regexp");
- QJsonValue endswith = filter.value("endswith");
- QJsonValue contains = filter.value("contains");
-
- if(!regexp.isUndefined()) {
- m_type = RuleType::RegExpMatchRule;
- this->regexp.setPattern(regexp.toString());
- } else if(!endswith.isUndefined()) {
- m_type = RuleType::StringEndsMatchRule;
- pattern = endswith.toString();
- } else if(!contains.isUndefined()) {
- m_type = RuleType::StringContainsMatchRule;
- this->matcher.setPattern(contains.toString());
- this->matcher.setCaseSensitivity(Qt::CaseInsensitive);
- } else // invalid rule
- return;
-
- m_options.insert(QWebEngineUrlRequestInfo::ResourceTypeImage, true);
+ m_matcher.setCaseSensitivity(Qt::CaseInsensitive);
+}
+
+void FilterRule::setActionType(ActionType type)
+{
+ m_actionType = type;
+}
+
+void FilterRule::setMatchType(MatchType type, const QString &pattern)
+{
+ m_matchType = type;
+ switch (type) {
+ case RegExpMatchRule:
+ m_regexp.setPattern(pattern);
+ break;
+ case StringEndsMatchRule:
+ m_pattern = pattern;
+ break;
+ case StringContainsMatchRule:
+ m_matcher.setPattern(pattern);
+ default:
+ break;
+ }
+}
+
+void FilterRule::setRedirectUrl(const QUrl &url)
+{
+ m_redirectUrl = url;
+}
+
+void FilterRule::addHeaderRule(const QByteArray &header, const QByteArray &value)
+{
+ m_headers.insert(header, value);
}
bool FilterRule::isValid() const
{
- return m_type != RuleType::Invalid;
+ return (m_matchType != MatchType::InvalidMatch) && (m_actionType != ActionType::InvalidAction);
}
bool FilterRule::process(QWebEngineUrlRequestInfo &info) const
{
- Q_ASSERT(m_type != RuleType::Invalid);
+ Q_ASSERT(m_actionType != ActionType::InvalidAction);
if(matchRequestUrl(info.requestUrl().toString(), info.resourceType())) {
- switch (m_action) {
+ switch (m_actionType) {
case ActionType::Whitelist:
info.block(false);
return true;
@@ -63,6 +105,11 @@ bool FilterRule::process(QWebEngineUrlRequestInfo &info) const
info.redirect(m_redirectUrl);
return true;
case ActionType::SetHeader:
+ for(auto it = m_headers.constBegin(); it != m_headers.constEnd(); ++it) {
+ info.setHttpHeader(it.key(), it.value());
+ }
+ return true;
+ case ActionType::InvalidAction:
break;
}
}
@@ -72,16 +119,20 @@ bool FilterRule::process(QWebEngineUrlRequestInfo &info) const
bool FilterRule::matchRequestUrl(const QString &requestUrl, const QWebEngineUrlRequestInfo::ResourceType type) const
{
- if(!m_options.contains(type))
+ Q_ASSERT(m_matchType != MatchType::InvalidMatch);
+
+ if(!m_resourceTypeOptions.isEmpty() && !m_resourceTypeOptions.contains(type))
return false;
- switch (m_type) {
- case RuleType::RegExpMatchRule:
- return regexp.match(requestUrl).hasMatch();
- case RuleType::StringEndsMatchRule:
- return requestUrl.endsWith(pattern);
- case RuleType::StringContainsMatchRule:
- return matcher.indexIn(requestUrl) != -1;
+ switch (m_matchType) {
+ case MatchType::RegExpMatchRule:
+ return m_regexp.match(requestUrl).hasMatch();
+ case MatchType::StringEndsMatchRule:
+ return requestUrl.endsWith(m_pattern);
+ case MatchType::StringContainsMatchRule:
+ return m_matcher.indexIn(requestUrl) != -1;
+ case MatchType::MatchAllUrlsRule:
+ return true;
default:
return false;
}
diff --git a/lib/web/urlfilter/filterrule.h b/lib/web/urlfilter/filterrule.h
index 46690b1..f20ab15 100644
--- a/lib/web/urlfilter/filterrule.h
+++ b/lib/web/urlfilter/filterrule.h
@@ -8,61 +8,63 @@
#include <QJsonObject>
#include <QUrl>
#include <QWebEngineUrlRequestInfo>
+#include <memory>
class QUrl;
class FilterRule
{
public:
- FilterRule(const QJsonObject &filter);
- ~FilterRule() = default;
-
- bool isValid() const;
- bool process(QWebEngineUrlRequestInfo &info) const;
- bool matchRequestUrl(const QString &requestUrl, const QWebEngineUrlRequestInfo::ResourceType type) const;
-
-private:
- Q_DISABLE_COPY(FilterRule)
-
enum ActionType {
Whitelist,
Blacklist,
Redirect,
- SetHeader
+ SetHeader,
+ InvalidAction
};
- enum RuleType {
- CssRule = 0, //
- DomainMatchRule = 1, //
+ enum MatchType {
+// CssRule = 0, //
+// DomainMatchRule = 1, //
RegExpMatchRule = 2, // match request url with regexp
StringEndsMatchRule = 3, // request url ends with string
StringContainsMatchRule = 4, // request url contains string
MatchAllUrlsRule = 5, //
- Invalid = 6
+ InvalidMatch = 6,
};
- ActionType m_action;
- RuleType m_type = RuleType::Invalid;
+ FilterRule(const QJsonObject &filter);
+ ~FilterRule() = default;
- QHash<QWebEngineUrlRequestInfo::ResourceType, bool> m_options;
+ void setActionType(ActionType type);
+ void setMatchType(MatchType type, const QString &pattern = QString());
+ void setRedirectUrl(const QUrl &url);
+ void addHeaderRule(const QByteArray &header, const QByteArray &value);
+
+ bool isValid() const;
+ bool process(QWebEngineUrlRequestInfo &info) const;
+ bool matchRequestUrl(const QString &requestUrl, const QWebEngineUrlRequestInfo::ResourceType type) const;
+
+private:
+ Q_DISABLE_COPY(FilterRule)
+
+
+ ActionType m_actionType = ActionType::InvalidAction;
+ MatchType m_matchType = MatchType::InvalidMatch;
+
+ QHash<QWebEngineUrlRequestInfo::ResourceType, bool> m_resourceTypeOptions;
+ QHash<QByteArray, QByteArray> m_headers;
// Parsed rule for string matching (CSS Selector for CSS rules)
QString m_matchString;
// Case sensitivity for string matching
Qt::CaseSensitivity m_caseSensitivity = Qt::CaseInsensitive;
- bool m_isException = false;
-
- // domains this rule is allowed or blocked on
- QStringList m_allowedForDomains;
- QStringList m_blockedForDomains;
-
QUrl m_redirectUrl;
-
- QRegularExpression regexp;
- QStringMatcher matcher;
- QString pattern;
+ QRegularExpression m_regexp;
+ QStringMatcher m_matcher;
+ QString m_pattern;
};
-//bool isMatchingDomain(const QString &domain, const QString &filter);
+void parseJson(std::unique_ptr<FilterRule> &rule, const QJsonObject &filter);
#endif // SMOLBOTE_FILTERRULE_H
diff --git a/src/webengine/urlinterceptor.cpp b/src/webengine/urlinterceptor.cpp
index 4e1b2f1..1b44c47 100644
--- a/src/webengine/urlinterceptor.cpp
+++ b/src/webengine/urlinterceptor.cpp
@@ -11,6 +11,8 @@
#include <QTextStream>
#include <configuration/configuration.h>
#include <boost/algorithm/string.hpp>
+#include <QJsonDocument>
+#include <QJsonArray>
UrlRequestInterceptor::UrlRequestInterceptor(const std::unique_ptr<Configuration> &config, QObject *parent)
: QWebEngineUrlRequestInterceptor(parent)
@@ -36,8 +38,29 @@ UrlRequestInterceptor::UrlRequestInterceptor(const std::unique_ptr<Configuration
m_headers.emplace_back(pair);
}
}
+
+ QFile rules(config->value<QString>("filter.json-path").value());
+ if(rules.open(QIODevice::ReadOnly | QIODevice::Text)) {
+ auto doc = QJsonDocument::fromJson(rules.readAll()).object();
+
+ Q_ASSERT(doc.value("domains").isArray());
+ for(QJsonValue d : doc.value("domains").toArray()) {
+ domain.addDomain(d.toString());
+ }
+
+ Q_ASSERT(doc.value("rules").isArray());
+ for(QJsonValue rule : doc.value("rules").toArray()) {
+ auto p = std::make_unique<FilterRule>(rule.toObject());
+ parseJson(p, rule.toObject());
+ domain.addRule(p);
+ }
+
+ rules.close();
+ }
+
}
+// test DNT on https://browserleaks.com/donottrack
void UrlRequestInterceptor::interceptRequest(QWebEngineUrlRequestInfo &info)
{
for(const Header &header : m_headers) {
@@ -46,15 +69,20 @@ void UrlRequestInterceptor::interceptRequest(QWebEngineUrlRequestInfo &info)
if(rules.contains(info.requestUrl().host())) {
info.block(rules.value(info.requestUrl().host()).isBlocking);
+ return;
+ }
+
+ if(domain.hasMatch(info.requestUrl().host())) {
+ domain.process(info);
}
#ifdef QT_DEBUG
- qDebug("request>>>");
- qDebug("firstParty url=%s", qUtf8Printable(info.firstPartyUrl().toString()));
- qDebug("firstParty host=%s", qUtf8Printable(info.firstPartyUrl().host()));
- qDebug("request url=%s", qUtf8Printable(info.requestUrl().toString()));
- qDebug("request host=%s", qUtf8Printable(info.requestUrl().host()));
- qDebug("<<<");
+// qDebug("request>>>");
+// qDebug("firstParty url=%s", qUtf8Printable(info.firstPartyUrl().toString()));
+// qDebug("firstParty host=%s", qUtf8Printable(info.firstPartyUrl().host()));
+// qDebug("request url=%s", qUtf8Printable(info.requestUrl().toString()));
+// qDebug("request host=%s", qUtf8Printable(info.requestUrl().host()));
+// qDebug("<<<");
#endif
}
diff --git a/src/webengine/urlinterceptor.h b/src/webengine/urlinterceptor.h
index 2f91e30..06a4b97 100644
--- a/src/webengine/urlinterceptor.h
+++ b/src/webengine/urlinterceptor.h
@@ -14,6 +14,8 @@
#include <QVector>
#include <QByteArray>
+#include "web/urlfilter/filterdomain.h"
+
typedef std::pair<std::string, std::string> Header;
class Configuration;
@@ -33,6 +35,8 @@ public:
private:
QHash<QString, HostRule> rules;
std::vector<Header> m_headers;
+
+ FilterDomain domain;
};
QHash<QString, UrlRequestInterceptor::HostRule> parse(const QString &filename);
diff --git a/test/urlfilter/urlfiltertest.cpp b/test/urlfilter/urlfiltertest.cpp
index f7ae0fb..eb12421 100644
--- a/test/urlfilter/urlfiltertest.cpp
+++ b/test/urlfilter/urlfiltertest.cpp
@@ -47,14 +47,7 @@ void UrlFilterTest::matchingDomain()
QFETCH(QString, domain);
QFETCH(bool, result);
- filterDomain.setType(FilterDomain::Whitelist);
QCOMPARE(filterDomain.hasMatch(domain), result);
- filterDomain.setType(FilterDomain::Blacklist);
- QCOMPARE(filterDomain.hasMatch(domain), !result);
- filterDomain.setType(FilterDomain::WhitelistAll);
- QCOMPARE(filterDomain.hasMatch(domain), true);
- filterDomain.setType(FilterDomain::BlacklistAll);
- QCOMPARE(filterDomain.hasMatch(domain), false);
}
void UrlFilterTest::matchingType_data()