aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/configuration/configuration.cpp2
-rw-r--r--lib/web/urlfilter/adblockrule.cpp70
-rw-r--r--lib/web/urlfilter/adblockrule.h8
-rw-r--r--lib/web/urlfilter/filterrule.cpp21
-rw-r--r--lib/web/urlfilter/filterrule.h15
-rw-r--r--src/webengine/urlinterceptor.cpp47
-rw-r--r--src/webengine/urlinterceptor.h9
7 files changed, 119 insertions, 53 deletions
diff --git a/lib/configuration/configuration.cpp b/lib/configuration/configuration.cpp
index d31b4f2..c83e743 100644
--- a/lib/configuration/configuration.cpp
+++ b/lib/configuration/configuration.cpp
@@ -96,7 +96,7 @@ Configuration::Configuration(QObject *parent)
// Filter settings
("filter.path", po::value<std::string>()->default_value(filter_path))
- ("filter.json-path", po::value<std::string>()->default_value("~/.config/smolbote/filter.json"))
+ ("filter.adblock", po::value<std::string>())
("filter.header", po::value<std::vector<std::string>>())
// ("filter.cookies.block.all", po::value<bool>()->default_value(false))
// ("filter.cookies.block.thirdParty", po::value<bool>()->default_value(true))
diff --git a/lib/web/urlfilter/adblockrule.cpp b/lib/web/urlfilter/adblockrule.cpp
index 60262b7..58b1941 100644
--- a/lib/web/urlfilter/adblockrule.cpp
+++ b/lib/web/urlfilter/adblockrule.cpp
@@ -1,30 +1,13 @@
-#include "adblockrule.h"
-
-inline std::pair<QWebEngineUrlRequestInfo::ResourceType, bool> parseOption(const QString &option)
-{
- if(option.endsWith(QLatin1Literal("script"))) {
- return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeScript, !option.startsWith(QLatin1Literal("~")));
-
- } else if(option.endsWith(QLatin1Literal("image"))) {
- return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeImage, !option.startsWith(QLatin1Literal("~")));
-
- } else if(option.endsWith(QLatin1Literal("stylesheet"))) {
- return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeStylesheet, !option.startsWith(QLatin1Literal("~")));
+/*
+ * This file is part of smolbote. It's copyrighted by the contributors recorded
+ * in the version control history of the file, available from its original
+ * location: https://neueland.iserlohn-fortress.net/smolbote.hg
+ *
+ * SPDX-License-Identifier: GPL-3.0
+ */
+// Based on Falkon's AdBlockRule class
- } else if(option.endsWith(QLatin1Literal("object"))) {
- return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeObject, !option.startsWith(QLatin1Literal("~")));
-
- } else if(option.endsWith(QLatin1Literal("xmlhttprequest"))) {
- return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeXhr, !option.startsWith(QLatin1Literal("~")));
-
- } else if(option.endsWith(QLatin1Literal("other"))) {
- return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeUnknown, !option.startsWith(QLatin1Literal("~")));
-
- } else {
- // unhandled pair
- Q_ASSERT(false);
- }
-}
+#include "adblockrule.h"
// adblock format documentation
// https://adblockplus.org/filters
@@ -35,9 +18,10 @@ inline std::pair<QWebEngineUrlRequestInfo::ResourceType, bool> parseOption(const
AdBlockRule::AdBlockRule(const QString &filter)
{
+ originalFilter = filter;
QString parsedLine = filter.trimmed();
- // there is no rule, or it"s a comment
+ // there is no rule, or it's a comment
if(parsedLine.isEmpty() || parsedLine.startsWith("!")) {
return;
}
@@ -51,9 +35,10 @@ AdBlockRule::AdBlockRule(const QString &filter)
// exception rules
if(parsedLine.startsWith(QLatin1Literal("@@"))) {
- m_isException = true;
+ m_isBlocking = false;
parsedLine.remove(0, 2);
- }
+ } else
+ m_isBlocking = true;
// parse options
{
@@ -71,16 +56,29 @@ AdBlockRule::AdBlockRule(const QString &filter)
else
allowedDomains.append(domain);
}
- } else {
- auto optPair = parseOption(option);
- m_resourceTypeOptions.insert(optPair.first, optPair.second);
+ } else if(option.endsWith(QLatin1Literal("script"))) {
+ m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeScript, !option.startsWith(QLatin1Literal("~")));
+
+ } else if(option.endsWith(QLatin1Literal("image"))) {
+ m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeImage, !option.startsWith(QLatin1Literal("~")));
+
+ } else if(option.endsWith(QLatin1Literal("stylesheet"))) {
+ m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeStylesheet, !option.startsWith(QLatin1Literal("~")));
+
+ } else if(option.endsWith(QLatin1Literal("object"))) {
+ m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeObject, !option.startsWith(QLatin1Literal("~")));
+
+ } else if(option.endsWith(QLatin1Literal("xmlhttprequest"))) {
+ m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeXhr, !option.startsWith(QLatin1Literal("~")));
+
+ } else if(option.endsWith(QLatin1Literal("other"))) {
+ m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeUnknown, !option.startsWith(QLatin1Literal("~")));
}
}
-
}
}
- // regular expression rules
+ // regular expression rule
if(parsedLine.startsWith(QLatin1Literal("/")) && parsedLine.endsWith(QLatin1Literal("/"))) {
parsedLine = parsedLine.mid(1, parsedLine.length() - 2);
@@ -89,7 +87,7 @@ AdBlockRule::AdBlockRule(const QString &filter)
return;
}
- // basic filter rules
+ // string equals rule
if(parsedLine.startsWith(QLatin1Literal("|")) && parsedLine.endsWith(QLatin1Literal("|"))) {
urlMatchType = StringEquals;
match = parsedLine.mid(1, parsedLine.length() - 2);
@@ -117,6 +115,8 @@ AdBlockRule::AdBlockRule(const QString &filter)
// separator "^" - end, ? or /
if(parsedLine.contains(QLatin1Literal("*")) || parsedLine.contains(QLatin1Literal("^"))) {
urlMatchType = RegularExpressionMatch;
+ parsedLine.replace(QLatin1Literal("||"), QLatin1Literal("^\\w+://"));
+ parsedLine.replace(QLatin1Literal("|"), QLatin1Literal("\\|"));
parsedLine.replace(QLatin1Literal("*"), QLatin1Literal(".*"));
parsedLine.replace(QLatin1Literal("^"), QLatin1Literal("($|\\?|\\/)"));
regexp.setPattern(parsedLine);
diff --git a/lib/web/urlfilter/adblockrule.h b/lib/web/urlfilter/adblockrule.h
index aeabf4f..5c03f91 100644
--- a/lib/web/urlfilter/adblockrule.h
+++ b/lib/web/urlfilter/adblockrule.h
@@ -1,3 +1,11 @@
+/*
+ * This file is part of smolbote. It's copyrighted by the contributors recorded
+ * in the version control history of the file, available from its original
+ * location: https://neueland.iserlohn-fortress.net/smolbote.hg
+ *
+ * SPDX-License-Identifier: GPL-3.0
+ */
+
#ifndef SMOLBOTE_ADBLOCKRULE_H
#define SMOLBOTE_ADBLOCKRULE_H
diff --git a/lib/web/urlfilter/filterrule.cpp b/lib/web/urlfilter/filterrule.cpp
index ba2181f..67ff4d7 100644
--- a/lib/web/urlfilter/filterrule.cpp
+++ b/lib/web/urlfilter/filterrule.cpp
@@ -1,3 +1,11 @@
+/*
+ * This file is part of smolbote. It's copyrighted by the contributors recorded
+ * in the version control history of the file, available from its original
+ * location: https://neueland.iserlohn-fortress.net/smolbote.hg
+ *
+ * SPDX-License-Identifier: GPL-3.0
+ */
+
#include "filterrule.h"
inline bool isMatchingDomain(const QString &domain, const QString &filter)
@@ -26,6 +34,11 @@ bool FilterRule::isEnabled() const
return m_isEnabled;
}
+bool FilterRule::isBlocking() const
+{
+ return m_isBlocking;
+}
+
bool FilterRule::matchesDomain(const QString &domain) const
{
// no domains have been allowed or blocked -> allow on all domains
@@ -67,8 +80,7 @@ bool FilterRule::matchesUrl(const QUrl &url) const
return false;
case RegularExpressionMatch:
- if(regexp.match(url.toString()).hasMatch())
- return !m_isException;
+ return regexp.match(url.toString()).hasMatch();
case StringContains:
return url.toString().contains(match);
@@ -87,3 +99,8 @@ bool FilterRule::matchesUrl(const QUrl &url) const
}
}
+
+QString FilterRule::toString() const
+{
+ return originalFilter;
+}
diff --git a/lib/web/urlfilter/filterrule.h b/lib/web/urlfilter/filterrule.h
index 8a622fe..5b9a6cf 100644
--- a/lib/web/urlfilter/filterrule.h
+++ b/lib/web/urlfilter/filterrule.h
@@ -1,3 +1,11 @@
+/*
+ * This file is part of smolbote. It's copyrighted by the contributors recorded
+ * in the version control history of the file, available from its original
+ * location: https://neueland.iserlohn-fortress.net/smolbote.hg
+ *
+ * SPDX-License-Identifier: GPL-3.0
+ */
+
#ifndef SMOLBOTE_FILTERRULE_H
#define SMOLBOTE_FILTERRULE_H
@@ -24,6 +32,7 @@ public:
FilterRule() = default;
bool isEnabled() const;
+ bool isBlocking() const;
/**
* @brief matchesDomain
@@ -46,9 +55,13 @@ public:
*/
bool matchesUrl(const QUrl &url) const;
+ QString toString() const;
+
protected:
bool m_isEnabled = false;
- bool m_isException = false;
+ bool m_isBlocking = true;
+
+ QString originalFilter;
UrlMatchType urlMatchType = InvalidMatch;
QHash<QWebEngineUrlRequestInfo::ResourceType, bool> m_resourceTypeOptions;
diff --git a/src/webengine/urlinterceptor.cpp b/src/webengine/urlinterceptor.cpp
index a66ab57..cf50e59 100644
--- a/src/webengine/urlinterceptor.cpp
+++ b/src/webengine/urlinterceptor.cpp
@@ -7,12 +7,34 @@
*/
#include "urlinterceptor.h"
+#include "web/urlfilter/adblockrule.h"
#include <QDir>
+#include <QJsonArray>
+#include <QJsonDocument>
#include <QTextStream>
-#include <configuration/configuration.h>
#include <boost/algorithm/string.hpp>
-#include <QJsonDocument>
-#include <QJsonArray>
+#include <configuration/configuration.h>
+
+inline std::vector<FilterRule> parseAdBlockList(const QString &filename)
+{
+ std::vector<FilterRule> rules;
+ QFile list(filename);
+
+ if(list.open(QIODevice::ReadOnly | QIODevice::Text), true) {
+ QTextStream l(&list);
+ QString line;
+ while(l.readLineInto(&line)) {
+ AdBlockRule rule(line);
+ if(rule.isEnabled()) {
+ rules.emplace_back(std::move(rule));
+ //qDebug("added rule: %s", qUtf8Printable(line));
+ }
+ }
+ list.close();
+ }
+
+ return rules;
+}
UrlRequestInterceptor::UrlRequestInterceptor(const std::unique_ptr<Configuration> &config, QObject *parent)
: QWebEngineUrlRequestInterceptor(parent)
@@ -39,8 +61,9 @@ UrlRequestInterceptor::UrlRequestInterceptor(const std::unique_ptr<Configuration
}
}
-
-
+ auto filtersPath = config->value<QString>("filter.adblock");
+ if(filtersPath)
+ filters = std::move(parseAdBlockList(filtersPath.value()));
}
// test DNT on https://browserleaks.com/donottrack
@@ -55,14 +78,16 @@ void UrlRequestInterceptor::interceptRequest(QWebEngineUrlRequestInfo &info)
return;
}
+ for(const FilterRule &rule : filters) {
+ if(rule.matchesDomain(info.firstPartyUrl().host()) && rule.matchesType(info.resourceType()) && rule.matchesUrl(info.requestUrl())) {
+ info.block(rule.isBlocking());
#ifdef QT_DEBUG
-// qDebug("request>>>");
-// qDebug("firstParty url=%s", qUtf8Printable(info.firstPartyUrl().toString()));
-// qDebug("firstParty host=%s", qUtf8Printable(info.firstPartyUrl().host()));
-// qDebug("request url=%s", qUtf8Printable(info.requestUrl().toString()));
-// qDebug("request host=%s", qUtf8Printable(info.requestUrl().host()));
-// qDebug("<<<");
+ qDebug("--> blocked %s", qUtf8Printable(info.requestUrl().toString()));
+ qDebug("- %s", qUtf8Printable(rule.toString()));
#endif
+ break;
+ }
+ }
}
QHash<QString, UrlRequestInterceptor::HostRule> parse(const QString &filename)
diff --git a/src/webengine/urlinterceptor.h b/src/webengine/urlinterceptor.h
index 2f91e30..a4a1b6e 100644
--- a/src/webengine/urlinterceptor.h
+++ b/src/webengine/urlinterceptor.h
@@ -6,13 +6,14 @@
* SPDX-License-Identifier: GPL-3.0
*/
-#ifndef URLREQUESTINTERCEPTOR_H
-#define URLREQUESTINTERCEPTOR_H
+#ifndef SMOLBOTE_URLREQUESTINTERCEPTOR_H
+#define SMOLBOTE_URLREQUESTINTERCEPTOR_H
#include <QWebEngineUrlRequestInterceptor>
#include <memory>
#include <QVector>
#include <QByteArray>
+#include "web/urlfilter/filterrule.h"
typedef std::pair<std::string, std::string> Header;
@@ -32,9 +33,11 @@ public:
private:
QHash<QString, HostRule> rules;
+ std::vector<FilterRule> filters;
std::vector<Header> m_headers;
};
QHash<QString, UrlRequestInterceptor::HostRule> parse(const QString &filename);
+inline std::vector<FilterRule> parseAdBlockList(const QString &filename);
-#endif // URLREQUESTINTERCEPTOR_H
+#endif // SMOLBOTE_URLREQUESTINTERCEPTOR_H