aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAqua-sama <aqua@iserlohn-fortress.net>2018-10-01 16:43:18 +0200
committerAqua-sama <aqua@iserlohn-fortress.net>2018-10-02 11:47:49 +0200
commit7d8cbdb9941532cd5bf560b21395f6ed371d1ab5 (patch)
tree9c5a2d72a3882050f2c3c95ec2d15ad21ff98a93
parentupdater: windows fixes (diff)
downloadsmolbote-7d8cbdb9941532cd5bf560b21395f6ed371d1ab5.tar.xz
Split off UrlFilter into library
- add more adblock filter options
-rw-r--r--CMakeLists.txt1
-rw-r--r--lib/urlfilter/CMakeLists.txt15
-rw-r--r--lib/urlfilter/filterrule.cpp (renamed from lib/web/urlfilter/filterrule.cpp)27
-rw-r--r--lib/urlfilter/filterrule.h (renamed from lib/web/urlfilter/filterrule.h)30
-rw-r--r--lib/urlfilter/formats/adblockrule.cpp (renamed from lib/web/urlfilter/adblockrule.cpp)83
-rw-r--r--lib/urlfilter/formats/adblockrule.h (renamed from lib/web/urlfilter/adblockrule.h)9
-rw-r--r--lib/web/CMakeLists.txt6
-rw-r--r--src/CMakeLists.txt2
-rw-r--r--src/webengine/urlinterceptor.cpp11
-rw-r--r--src/webengine/urlinterceptor.h4
-rw-r--r--test/CMakeLists.txt4
-rw-r--r--test/adblock.txt2
-rw-r--r--test/urlfilter/adblocktest.cpp21
13 files changed, 119 insertions, 96 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 90aa0ef..ca345fb 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -51,6 +51,7 @@ add_subdirectory(lib/bookmarks)
add_subdirectory(lib/downloads)
add_subdirectory(lib/configuration)
add_subdirectory(lib/web)
+add_subdirectory(lib/urlfilter)
add_subdirectory(plugins/ProfileEditor)
add_subdirectory(plugins/ConfigurationEditor)
diff --git a/lib/urlfilter/CMakeLists.txt b/lib/urlfilter/CMakeLists.txt
new file mode 100644
index 0000000..842f18f
--- /dev/null
+++ b/lib/urlfilter/CMakeLists.txt
@@ -0,0 +1,15 @@
+# Find includes in corresponding build directories
+set(CMAKE_INCLUDE_CURRENT_DIR ON)
+
+# Instruct CMake to run moc automatically when needed.
+set(CMAKE_AUTOMOC ON)
+
+add_library(urlfilter
+ filterrule.cpp
+ filterrule.h
+
+ formats/adblockrule.cpp
+ formats/adblockrule.h
+)
+
+target_link_libraries(urlfilter Qt5::WebEngineWidgets)
diff --git a/lib/web/urlfilter/filterrule.cpp b/lib/urlfilter/filterrule.cpp
index 67ff4d7..22a2f06 100644
--- a/lib/web/urlfilter/filterrule.cpp
+++ b/lib/urlfilter/filterrule.cpp
@@ -39,24 +39,21 @@ bool FilterRule::isBlocking() const
return m_isBlocking;
}
-bool FilterRule::matchesDomain(const QString &domain) const
+bool FilterRule::matchesDomain(uint domainHash) const
{
// no domains have been allowed or blocked -> allow on all domains
- if(allowedDomains.isEmpty() && blockedDomains.isEmpty())
+ if(allowedDomains_hashes.isEmpty() && blockedDomains_hashes.isEmpty()) {
return true;
-
- if(!blockedDomains.isEmpty()) {
- // do not match rule if the domain has been blocked
- if(blockedDomains.contains(domain))
- return false;
}
- if(!allowedDomains.isEmpty()) {
- if(allowedDomains.contains(domain))
- return true;
+ // blockedDomains prevents the rules from being matched on those domains
+ if(blockedDomains_hashes.contains(domainHash)) {
+ return false;
}
- return false;
+ // allowedDomains means the rule should only be matched on those domains
+ return allowedDomains_hashes.contains(domainHash);
+
}
bool FilterRule::matchesType(QWebEngineUrlRequestInfo::ResourceType type) const
@@ -75,7 +72,7 @@ bool FilterRule::matchesType(QWebEngineUrlRequestInfo::ResourceType type) const
bool FilterRule::matchesUrl(const QUrl &url) const
{
- switch (urlMatchType) {
+ switch(urlMatchType) {
case InvalidMatch:
return false;
@@ -96,11 +93,5 @@ bool FilterRule::matchesUrl(const QUrl &url) const
case DomainMatch:
return isMatchingDomain(url.host(), match);
-
}
}
-
-QString FilterRule::toString() const
-{
- return originalFilter;
-}
diff --git a/lib/web/urlfilter/filterrule.h b/lib/urlfilter/filterrule.h
index 5b9a6cf..95fff6a 100644
--- a/lib/web/urlfilter/filterrule.h
+++ b/lib/urlfilter/filterrule.h
@@ -9,12 +9,14 @@
#ifndef SMOLBOTE_FILTERRULE_H
#define SMOLBOTE_FILTERRULE_H
+#include <QObject>
#include <QRegularExpression>
#include <QStringList>
#include <QStringMatcher>
#include <QUrl>
#include <QWebEngineUrlRequestInfo>
#include <memory>
+#include <QVector>
class FilterRule
{
@@ -29,43 +31,21 @@ public:
DomainMatch
};
- FilterRule() = default;
-
bool isEnabled() const;
bool isBlocking() const;
- /**
- * @brief matchesDomain
- * @param domain
- * @return
- */
- bool matchesDomain(const QString &domain) const;
-
- /**
- * @brief matchesType
- * @param type
- * @return true if type matches, false otherwise
- */
+ bool matchesDomain(uint domainHash) const;
bool matchesType(QWebEngineUrlRequestInfo::ResourceType type) const;
-
- /**
- * @brief matchesUrl
- * @param url
- * @return
- */
bool matchesUrl(const QUrl &url) const;
- QString toString() const;
-
protected:
bool m_isEnabled = false;
bool m_isBlocking = true;
- QString originalFilter;
-
UrlMatchType urlMatchType = InvalidMatch;
QHash<QWebEngineUrlRequestInfo::ResourceType, bool> m_resourceTypeOptions;
- QStringList allowedDomains, blockedDomains;
+
+ QVector<uint> allowedDomains_hashes, blockedDomains_hashes;
QString match;
QRegularExpression regexp;
diff --git a/lib/web/urlfilter/adblockrule.cpp b/lib/urlfilter/formats/adblockrule.cpp
index 58b1941..ef7bec1 100644
--- a/lib/web/urlfilter/adblockrule.cpp
+++ b/lib/urlfilter/formats/adblockrule.cpp
@@ -18,7 +18,6 @@
AdBlockRule::AdBlockRule(const QString &filter)
{
- originalFilter = filter;
QString parsedLine = filter.trimmed();
// there is no rule, or it's a comment
@@ -50,29 +49,15 @@ AdBlockRule::AdBlockRule(const QString &filter)
for(const QString &option : options) {
if(option.startsWith(QLatin1Literal("domain"))) {
const auto domainList = option.mid(7).split(QLatin1Literal("|"));
- for(const QString &domain : domainList) {
- if(domain.startsWith(QLatin1Literal("~")))
- blockedDomains.append(domain.mid(1));
- else
- allowedDomains.append(domain);
+ for (const QString &domain : domainList) {
+ if (domain.startsWith(QLatin1Literal("~"))) {
+ blockedDomains_hashes.append(qHash(domain.mid(1)));
+ } else {
+ allowedDomains_hashes.append(qHash(domain));
+ }
}
- } else if(option.endsWith(QLatin1Literal("script"))) {
- m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeScript, !option.startsWith(QLatin1Literal("~")));
-
- } else if(option.endsWith(QLatin1Literal("image"))) {
- m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeImage, !option.startsWith(QLatin1Literal("~")));
-
- } else if(option.endsWith(QLatin1Literal("stylesheet"))) {
- m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeStylesheet, !option.startsWith(QLatin1Literal("~")));
-
- } else if(option.endsWith(QLatin1Literal("object"))) {
- m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeObject, !option.startsWith(QLatin1Literal("~")));
-
- } else if(option.endsWith(QLatin1Literal("xmlhttprequest"))) {
- m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeXhr, !option.startsWith(QLatin1Literal("~")));
-
- } else if(option.endsWith(QLatin1Literal("other"))) {
- m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeUnknown, !option.startsWith(QLatin1Literal("~")));
+ } else {
+ parseOption(option);
}
}
}
@@ -125,3 +110,55 @@ AdBlockRule::AdBlockRule(const QString &filter)
match = parsedLine;
}
+void AdBlockRule::parseOption(const QString &option)
+{
+ const bool exception = !option.startsWith(QLatin1Literal("~"));
+
+ if(option.endsWith(QLatin1Literal("script"))) {
+ // external scripts loaded via HTML script tag
+ m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeScript, exception);
+
+ } else if(option.endsWith(QLatin1Literal("image"))) {
+ // regular images, typically loaded via HTML img tag
+ m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeImage, exception);
+
+ } else if(option.endsWith(QLatin1Literal("stylesheet"))) {
+ // external CSS stylesheet files
+ m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeStylesheet, exception);
+
+ } else if(option.endsWith(QLatin1Literal("object"))) {
+ // content handled by browser plugins, e.g. Flash or Java
+ m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeObject, exception);
+
+ } else if(option.endsWith(QLatin1Literal("xmlhttprequest"))) {
+ // requests started using the XMLHttpRequest object or fetch() API
+ m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeXhr, exception);
+
+ } else if(option.endsWith(QLatin1Literal("object-subrequest"))) {
+ // requests started by plugins like Flash
+ m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypePluginResource, exception);
+
+ } else if(option.endsWith(QLatin1Literal("subdocument"))) {
+ // embedded pages, usually included via HTML frames
+ m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeSubFrame, exception);
+
+ } else if(option.endsWith(QLatin1Literal("ping"))) {
+ // requests started by <a ping> or navigator.sendBeacon()
+ m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypePing, exception);
+
+ } else if(option.endsWith(QLatin1Literal("websocket"))) {
+ // requests initiated via WebSocket object
+ qDebug("Resource type 'websocket' not available");
+
+ } else if(option.endsWith(QLatin1Literal("webrtc"))) {
+ // connections opened via RTCPeerConnection instances to ICE servers
+ qDebug("Resource type 'webrtc' not available");
+
+ } else if(option.endsWith(QLatin1Literal("document"))) {
+ // the page itself
+ m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeMainFrame, exception);
+
+ } else if(option.endsWith(QLatin1Literal("other"))) {
+ m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeUnknown, exception);
+ }
+}
diff --git a/lib/web/urlfilter/adblockrule.h b/lib/urlfilter/formats/adblockrule.h
index 7b6f683..8677c2c 100644
--- a/lib/web/urlfilter/adblockrule.h
+++ b/lib/urlfilter/formats/adblockrule.h
@@ -9,18 +9,15 @@
#ifndef SMOLBOTE_ADBLOCKRULE_H
#define SMOLBOTE_ADBLOCKRULE_H
-#include <QObject>
-#include <QString>
-#include <QRegularExpression>
-#include <QUrl>
-#include <QWebEngineUrlRequestInfo>
-#include "filterrule.h"
+#include "../filterrule.h"
class AdBlockRule : public FilterRule
{
public:
explicit AdBlockRule(const QString &filter);
+ void parseOption(const QString &option);
+
};
#endif // SMOLBOTE_ADBLOCKRULE_H
diff --git a/lib/web/CMakeLists.txt b/lib/web/CMakeLists.txt
index b2d6c82..86f7f1c 100644
--- a/lib/web/CMakeLists.txt
+++ b/lib/web/CMakeLists.txt
@@ -9,12 +9,6 @@ add_library(web
profilemanager.h
webprofile.cpp
webprofile.h
-
- urlfilter/filterrule.cpp
- urlfilter/filterrule.h
-
- urlfilter/adblockrule.cpp
- urlfilter/adblockrule.h
)
target_link_libraries(web Qt5::WebEngineWidgets)
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 3263bbb..6162cd4 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -72,7 +72,7 @@ target_link_libraries(${poi_exe}
about
addressbar
configuration
- bookmarks downloads web
+ bookmarks downloads web urlfilter
)
if(Breakpad)
diff --git a/src/webengine/urlinterceptor.cpp b/src/webengine/urlinterceptor.cpp
index 06464ae..db4aea9 100644
--- a/src/webengine/urlinterceptor.cpp
+++ b/src/webengine/urlinterceptor.cpp
@@ -7,7 +7,7 @@
*/
#include "urlinterceptor.h"
-#include "web/urlfilter/adblockrule.h"
+#include "urlfilter/formats/adblockrule.h"
#include <QDir>
#include <QJsonArray>
#include <QJsonDocument>
@@ -27,7 +27,6 @@ inline std::vector<FilterRule> parseAdBlockList(const QString &filename)
AdBlockRule rule(line);
if(rule.isEnabled()) {
rules.emplace_back(std::move(rule));
- //qDebug("added rule: %s", qUtf8Printable(line));
}
}
list.close();
@@ -63,7 +62,7 @@ UrlRequestInterceptor::UrlRequestInterceptor(const std::unique_ptr<Configuration
auto filtersPath = config->value<QString>("filter.adblock");
if(filtersPath)
- filters = std::move(parseAdBlockList(filtersPath.value()));
+ filters = parseAdBlockList(filtersPath.value());
}
// test DNT on https://browserleaks.com/donottrack
@@ -78,12 +77,14 @@ void UrlRequestInterceptor::interceptRequest(QWebEngineUrlRequestInfo &info)
return;
}
+ const uint domainHash = qHash(info.firstPartyUrl().host());
+ const QWebEngineUrlRequestInfo::ResourceType type = info.resourceType();
+ const QUrl requestUrl = info.requestUrl();
for(const FilterRule &rule : filters) {
- if(rule.matchesDomain(info.firstPartyUrl().host()) && rule.matchesType(info.resourceType()) && rule.matchesUrl(info.requestUrl())) {
+ if(rule.matchesDomain(domainHash) && rule.matchesType(type) && rule.matchesUrl(requestUrl)) {
info.block(rule.isBlocking());
#ifdef QT_DEBUG
qDebug("--> blocked %s", qUtf8Printable(info.requestUrl().toString()));
- qDebug("- %s", qUtf8Printable(rule.toString()));
#endif
break;
}
diff --git a/src/webengine/urlinterceptor.h b/src/webengine/urlinterceptor.h
index 5c78b62..575e0c9 100644
--- a/src/webengine/urlinterceptor.h
+++ b/src/webengine/urlinterceptor.h
@@ -9,7 +9,7 @@
#ifndef SMOLBOTE_URLREQUESTINTERCEPTOR_H
#define SMOLBOTE_URLREQUESTINTERCEPTOR_H
-#include "web/urlfilter/filterrule.h"
+#include "urlfilter/filterrule.h"
#include <QByteArray>
#include <QVector>
#include <QWebEngineUrlRequestInterceptor>
@@ -27,7 +27,7 @@ public:
};
explicit UrlRequestInterceptor(const std::unique_ptr<Configuration> &config, QObject *parent = nullptr);
- ~UrlRequestInterceptor() = default;
+ ~UrlRequestInterceptor() override = default;
void interceptRequest(QWebEngineUrlRequestInfo &info) override;
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index a026ad6..4302ab2 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -7,6 +7,6 @@ add_executable(AdBlockTest
urlfilter/adblocktest.h
)
-target_include_directories(AdBlockTest PRIVATE ../lib/web)
+target_include_directories(AdBlockTest PRIVATE ../lib/urlfilter)
-target_link_libraries(AdBlockTest Qt5::Test web)
+target_link_libraries(AdBlockTest Qt5::Test urlfilter)
diff --git a/test/adblock.txt b/test/adblock.txt
index 38d4688..cd284e8 100644
--- a/test/adblock.txt
+++ b/test/adblock.txt
@@ -1,4 +1,4 @@
/banner/*/img^
||ads.example.com^
|http://example.com/|
-
+/banner\d+/
diff --git a/test/urlfilter/adblocktest.cpp b/test/urlfilter/adblocktest.cpp
index 416bc20..b31d965 100644
--- a/test/urlfilter/adblocktest.cpp
+++ b/test/urlfilter/adblocktest.cpp
@@ -1,11 +1,12 @@
#include "adblocktest.h"
#include <QtTest/QtTest>
-#include "urlfilter/adblockrule.h"
+#include "filterrule.h"
+#include "formats/adblockrule.h"
-inline bool check(const std::vector<FilterRule> rules, const QUrl &url)
+inline bool check(const std::vector<AdBlockRule> rules, const QUrl &url)
{
- for(const FilterRule &rule : rules) {
- if(rule.matchesDomain(url.host()) && rule.matchesUrl(url))
+ for(const AdBlockRule &rule : rules) {
+ if(rule.matchesDomain(qHash(url.host())) && rule.matchesUrl(url))
return true;
}
return false;
@@ -13,9 +14,10 @@ inline bool check(const std::vector<FilterRule> rules, const QUrl &url)
void AdBlockTest::parseList()
{
- std::vector<FilterRule> rules;
+ std::vector<AdBlockRule> rules;
QFile list("adblock.txt");
+ int ruleCount = 0;
QCOMPARE(list.open(QIODevice::ReadOnly | QIODevice::Text), true);
{
QTextStream l(&list);
@@ -24,6 +26,7 @@ void AdBlockTest::parseList()
AdBlockRule rule(line);
if(rule.isEnabled()) {
rules.emplace_back(std::move(rule));
+ ruleCount++;
qDebug("added rule: %s", qUtf8Printable(line));
}
}
@@ -31,7 +34,7 @@ void AdBlockTest::parseList()
list.close();
// there should be 3 rules
- QCOMPARE(rules.size(), 3);
+ QCOMPARE(rules.size(), ruleCount);
// block by address part
QCOMPARE(check(rules, QUrl("http://example.com/banner/foo/img")), true);
@@ -48,11 +51,15 @@ void AdBlockTest::parseList()
QCOMPARE(check(rules, QUrl("http://ads.example.com.ua/foo.gif")), false);
QCOMPARE(check(rules, QUrl("http://example.com/redirect/http://ads.example.com/")), false);
-
// block exact address
QCOMPARE(check(rules, QUrl("http://example.com/")), true);
QCOMPARE(check(rules, QUrl("http://example.com/foo.gif")), false);
QCOMPARE(check(rules, QUrl("http://example.info/redirect/http://example.com/")), false);
+
+ // regular expression
+ QCOMPARE(check(rules, QUrl("http://another.com/banner123")), true);
+ QCOMPARE(check(rules, QUrl("http://another.com/banner321")), true);
+ QCOMPARE(check(rules, QUrl("http://another.com/banners")), false);
}
QTEST_GUILESS_MAIN(AdBlockTest)