From e53906ccae7610b00ee12c3c0c45710907d7ff81 Mon Sep 17 00:00:00 2001 From: Aqua-sama Date: Tue, 24 Jul 2018 12:09:07 +0200 Subject: UrlRequestInterceptor: add filter rules --- lib/configuration/configuration.cpp | 2 +- lib/web/urlfilter/adblockrule.cpp | 70 ++++++++++++++++++------------------- lib/web/urlfilter/adblockrule.h | 8 +++++ lib/web/urlfilter/filterrule.cpp | 21 +++++++++-- lib/web/urlfilter/filterrule.h | 15 +++++++- src/webengine/urlinterceptor.cpp | 47 +++++++++++++++++++------ src/webengine/urlinterceptor.h | 9 +++-- 7 files changed, 119 insertions(+), 53 deletions(-) diff --git a/lib/configuration/configuration.cpp b/lib/configuration/configuration.cpp index d31b4f2..c83e743 100644 --- a/lib/configuration/configuration.cpp +++ b/lib/configuration/configuration.cpp @@ -96,7 +96,7 @@ Configuration::Configuration(QObject *parent) // Filter settings ("filter.path", po::value()->default_value(filter_path)) - ("filter.json-path", po::value()->default_value("~/.config/smolbote/filter.json")) + ("filter.adblock", po::value()) ("filter.header", po::value>()) // ("filter.cookies.block.all", po::value()->default_value(false)) // ("filter.cookies.block.thirdParty", po::value()->default_value(true)) diff --git a/lib/web/urlfilter/adblockrule.cpp b/lib/web/urlfilter/adblockrule.cpp index 60262b7..58b1941 100644 --- a/lib/web/urlfilter/adblockrule.cpp +++ b/lib/web/urlfilter/adblockrule.cpp @@ -1,30 +1,13 @@ -#include "adblockrule.h" - -inline std::pair parseOption(const QString &option) -{ - if(option.endsWith(QLatin1Literal("script"))) { - return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeScript, !option.startsWith(QLatin1Literal("~"))); - - } else if(option.endsWith(QLatin1Literal("image"))) { - return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeImage, !option.startsWith(QLatin1Literal("~"))); - - } else if(option.endsWith(QLatin1Literal("stylesheet"))) { - return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeStylesheet, !option.startsWith(QLatin1Literal("~"))); +/* + * This file is part of smolbote. It's copyrighted by the contributors recorded + * in the version control history of the file, available from its original + * location: https://neueland.iserlohn-fortress.net/smolbote.hg + * + * SPDX-License-Identifier: GPL-3.0 + */ +// Based on Falkon's AdBlockRule class - } else if(option.endsWith(QLatin1Literal("object"))) { - return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeObject, !option.startsWith(QLatin1Literal("~"))); - - } else if(option.endsWith(QLatin1Literal("xmlhttprequest"))) { - return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeXhr, !option.startsWith(QLatin1Literal("~"))); - - } else if(option.endsWith(QLatin1Literal("other"))) { - return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeUnknown, !option.startsWith(QLatin1Literal("~"))); - - } else { - // unhandled pair - Q_ASSERT(false); - } -} +#include "adblockrule.h" // adblock format documentation // https://adblockplus.org/filters @@ -35,9 +18,10 @@ inline std::pair parseOption(const AdBlockRule::AdBlockRule(const QString &filter) { + originalFilter = filter; QString parsedLine = filter.trimmed(); - // there is no rule, or it"s a comment + // there is no rule, or it's a comment if(parsedLine.isEmpty() || parsedLine.startsWith("!")) { return; } @@ -51,9 +35,10 @@ AdBlockRule::AdBlockRule(const QString &filter) // exception rules if(parsedLine.startsWith(QLatin1Literal("@@"))) { - m_isException = true; + m_isBlocking = false; parsedLine.remove(0, 2); - } + } else + m_isBlocking = true; // parse options { @@ -71,16 +56,29 @@ AdBlockRule::AdBlockRule(const QString &filter) else allowedDomains.append(domain); } - } else { - auto optPair = parseOption(option); - m_resourceTypeOptions.insert(optPair.first, optPair.second); + } else if(option.endsWith(QLatin1Literal("script"))) { + m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeScript, !option.startsWith(QLatin1Literal("~"))); + + } else if(option.endsWith(QLatin1Literal("image"))) { + m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeImage, !option.startsWith(QLatin1Literal("~"))); + + } else if(option.endsWith(QLatin1Literal("stylesheet"))) { + m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeStylesheet, !option.startsWith(QLatin1Literal("~"))); + + } else if(option.endsWith(QLatin1Literal("object"))) { + m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeObject, !option.startsWith(QLatin1Literal("~"))); + + } else if(option.endsWith(QLatin1Literal("xmlhttprequest"))) { + m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeXhr, !option.startsWith(QLatin1Literal("~"))); + + } else if(option.endsWith(QLatin1Literal("other"))) { + m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeUnknown, !option.startsWith(QLatin1Literal("~"))); } } - } } - // regular expression rules + // regular expression rule if(parsedLine.startsWith(QLatin1Literal("/")) && parsedLine.endsWith(QLatin1Literal("/"))) { parsedLine = parsedLine.mid(1, parsedLine.length() - 2); @@ -89,7 +87,7 @@ AdBlockRule::AdBlockRule(const QString &filter) return; } - // basic filter rules + // string equals rule if(parsedLine.startsWith(QLatin1Literal("|")) && parsedLine.endsWith(QLatin1Literal("|"))) { urlMatchType = StringEquals; match = parsedLine.mid(1, parsedLine.length() - 2); @@ -117,6 +115,8 @@ AdBlockRule::AdBlockRule(const QString &filter) // separator "^" - end, ? or / if(parsedLine.contains(QLatin1Literal("*")) || parsedLine.contains(QLatin1Literal("^"))) { urlMatchType = RegularExpressionMatch; + parsedLine.replace(QLatin1Literal("||"), QLatin1Literal("^\\w+://")); + parsedLine.replace(QLatin1Literal("|"), QLatin1Literal("\\|")); parsedLine.replace(QLatin1Literal("*"), QLatin1Literal(".*")); parsedLine.replace(QLatin1Literal("^"), QLatin1Literal("($|\\?|\\/)")); regexp.setPattern(parsedLine); diff --git a/lib/web/urlfilter/adblockrule.h b/lib/web/urlfilter/adblockrule.h index aeabf4f..5c03f91 100644 --- a/lib/web/urlfilter/adblockrule.h +++ b/lib/web/urlfilter/adblockrule.h @@ -1,3 +1,11 @@ +/* + * This file is part of smolbote. It's copyrighted by the contributors recorded + * in the version control history of the file, available from its original + * location: https://neueland.iserlohn-fortress.net/smolbote.hg + * + * SPDX-License-Identifier: GPL-3.0 + */ + #ifndef SMOLBOTE_ADBLOCKRULE_H #define SMOLBOTE_ADBLOCKRULE_H diff --git a/lib/web/urlfilter/filterrule.cpp b/lib/web/urlfilter/filterrule.cpp index ba2181f..67ff4d7 100644 --- a/lib/web/urlfilter/filterrule.cpp +++ b/lib/web/urlfilter/filterrule.cpp @@ -1,3 +1,11 @@ +/* + * This file is part of smolbote. It's copyrighted by the contributors recorded + * in the version control history of the file, available from its original + * location: https://neueland.iserlohn-fortress.net/smolbote.hg + * + * SPDX-License-Identifier: GPL-3.0 + */ + #include "filterrule.h" inline bool isMatchingDomain(const QString &domain, const QString &filter) @@ -26,6 +34,11 @@ bool FilterRule::isEnabled() const return m_isEnabled; } +bool FilterRule::isBlocking() const +{ + return m_isBlocking; +} + bool FilterRule::matchesDomain(const QString &domain) const { // no domains have been allowed or blocked -> allow on all domains @@ -67,8 +80,7 @@ bool FilterRule::matchesUrl(const QUrl &url) const return false; case RegularExpressionMatch: - if(regexp.match(url.toString()).hasMatch()) - return !m_isException; + return regexp.match(url.toString()).hasMatch(); case StringContains: return url.toString().contains(match); @@ -87,3 +99,8 @@ bool FilterRule::matchesUrl(const QUrl &url) const } } + +QString FilterRule::toString() const +{ + return originalFilter; +} diff --git a/lib/web/urlfilter/filterrule.h b/lib/web/urlfilter/filterrule.h index 8a622fe..5b9a6cf 100644 --- a/lib/web/urlfilter/filterrule.h +++ b/lib/web/urlfilter/filterrule.h @@ -1,3 +1,11 @@ +/* + * This file is part of smolbote. It's copyrighted by the contributors recorded + * in the version control history of the file, available from its original + * location: https://neueland.iserlohn-fortress.net/smolbote.hg + * + * SPDX-License-Identifier: GPL-3.0 + */ + #ifndef SMOLBOTE_FILTERRULE_H #define SMOLBOTE_FILTERRULE_H @@ -24,6 +32,7 @@ public: FilterRule() = default; bool isEnabled() const; + bool isBlocking() const; /** * @brief matchesDomain @@ -46,9 +55,13 @@ public: */ bool matchesUrl(const QUrl &url) const; + QString toString() const; + protected: bool m_isEnabled = false; - bool m_isException = false; + bool m_isBlocking = true; + + QString originalFilter; UrlMatchType urlMatchType = InvalidMatch; QHash m_resourceTypeOptions; diff --git a/src/webengine/urlinterceptor.cpp b/src/webengine/urlinterceptor.cpp index a66ab57..cf50e59 100644 --- a/src/webengine/urlinterceptor.cpp +++ b/src/webengine/urlinterceptor.cpp @@ -7,12 +7,34 @@ */ #include "urlinterceptor.h" +#include "web/urlfilter/adblockrule.h" #include +#include +#include #include -#include #include -#include -#include +#include + +inline std::vector parseAdBlockList(const QString &filename) +{ + std::vector rules; + QFile list(filename); + + if(list.open(QIODevice::ReadOnly | QIODevice::Text), true) { + QTextStream l(&list); + QString line; + while(l.readLineInto(&line)) { + AdBlockRule rule(line); + if(rule.isEnabled()) { + rules.emplace_back(std::move(rule)); + //qDebug("added rule: %s", qUtf8Printable(line)); + } + } + list.close(); + } + + return rules; +} UrlRequestInterceptor::UrlRequestInterceptor(const std::unique_ptr &config, QObject *parent) : QWebEngineUrlRequestInterceptor(parent) @@ -39,8 +61,9 @@ UrlRequestInterceptor::UrlRequestInterceptor(const std::unique_ptrvalue("filter.adblock"); + if(filtersPath) + filters = std::move(parseAdBlockList(filtersPath.value())); } // test DNT on https://browserleaks.com/donottrack @@ -55,14 +78,16 @@ void UrlRequestInterceptor::interceptRequest(QWebEngineUrlRequestInfo &info) return; } + for(const FilterRule &rule : filters) { + if(rule.matchesDomain(info.firstPartyUrl().host()) && rule.matchesType(info.resourceType()) && rule.matchesUrl(info.requestUrl())) { + info.block(rule.isBlocking()); #ifdef QT_DEBUG -// qDebug("request>>>"); -// qDebug("firstParty url=%s", qUtf8Printable(info.firstPartyUrl().toString())); -// qDebug("firstParty host=%s", qUtf8Printable(info.firstPartyUrl().host())); -// qDebug("request url=%s", qUtf8Printable(info.requestUrl().toString())); -// qDebug("request host=%s", qUtf8Printable(info.requestUrl().host())); -// qDebug("<<<"); + qDebug("--> blocked %s", qUtf8Printable(info.requestUrl().toString())); + qDebug("- %s", qUtf8Printable(rule.toString())); #endif + break; + } + } } QHash parse(const QString &filename) diff --git a/src/webengine/urlinterceptor.h b/src/webengine/urlinterceptor.h index 2f91e30..a4a1b6e 100644 --- a/src/webengine/urlinterceptor.h +++ b/src/webengine/urlinterceptor.h @@ -6,13 +6,14 @@ * SPDX-License-Identifier: GPL-3.0 */ -#ifndef URLREQUESTINTERCEPTOR_H -#define URLREQUESTINTERCEPTOR_H +#ifndef SMOLBOTE_URLREQUESTINTERCEPTOR_H +#define SMOLBOTE_URLREQUESTINTERCEPTOR_H #include #include #include #include +#include "web/urlfilter/filterrule.h" typedef std::pair Header; @@ -32,9 +33,11 @@ public: private: QHash rules; + std::vector filters; std::vector
m_headers; }; QHash parse(const QString &filename); +inline std::vector parseAdBlockList(const QString &filename); -#endif // URLREQUESTINTERCEPTOR_H +#endif // SMOLBOTE_URLREQUESTINTERCEPTOR_H -- cgit v1.2.1