diff options
author | Aqua-sama <aqua@iserlohn-fortress.net> | 2018-10-01 16:43:18 +0200 |
---|---|---|
committer | Aqua-sama <aqua@iserlohn-fortress.net> | 2018-10-02 11:47:49 +0200 |
commit | 7d8cbdb9941532cd5bf560b21395f6ed371d1ab5 (patch) | |
tree | 9c5a2d72a3882050f2c3c95ec2d15ad21ff98a93 | |
parent | updater: windows fixes (diff) | |
download | smolbote-7d8cbdb9941532cd5bf560b21395f6ed371d1ab5.tar.xz |
Split off UrlFilter into library
- add more adblock filter options
-rw-r--r-- | CMakeLists.txt | 1 | ||||
-rw-r--r-- | lib/urlfilter/CMakeLists.txt | 15 | ||||
-rw-r--r-- | lib/urlfilter/filterrule.cpp (renamed from lib/web/urlfilter/filterrule.cpp) | 27 | ||||
-rw-r--r-- | lib/urlfilter/filterrule.h (renamed from lib/web/urlfilter/filterrule.h) | 30 | ||||
-rw-r--r-- | lib/urlfilter/formats/adblockrule.cpp (renamed from lib/web/urlfilter/adblockrule.cpp) | 83 | ||||
-rw-r--r-- | lib/urlfilter/formats/adblockrule.h (renamed from lib/web/urlfilter/adblockrule.h) | 9 | ||||
-rw-r--r-- | lib/web/CMakeLists.txt | 6 | ||||
-rw-r--r-- | src/CMakeLists.txt | 2 | ||||
-rw-r--r-- | src/webengine/urlinterceptor.cpp | 11 | ||||
-rw-r--r-- | src/webengine/urlinterceptor.h | 4 | ||||
-rw-r--r-- | test/CMakeLists.txt | 4 | ||||
-rw-r--r-- | test/adblock.txt | 2 | ||||
-rw-r--r-- | test/urlfilter/adblocktest.cpp | 21 |
13 files changed, 119 insertions, 96 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 90aa0ef..ca345fb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -51,6 +51,7 @@ add_subdirectory(lib/bookmarks) add_subdirectory(lib/downloads) add_subdirectory(lib/configuration) add_subdirectory(lib/web) +add_subdirectory(lib/urlfilter) add_subdirectory(plugins/ProfileEditor) add_subdirectory(plugins/ConfigurationEditor) diff --git a/lib/urlfilter/CMakeLists.txt b/lib/urlfilter/CMakeLists.txt new file mode 100644 index 0000000..842f18f --- /dev/null +++ b/lib/urlfilter/CMakeLists.txt @@ -0,0 +1,15 @@ +# Find includes in corresponding build directories +set(CMAKE_INCLUDE_CURRENT_DIR ON) + +# Instruct CMake to run moc automatically when needed. +set(CMAKE_AUTOMOC ON) + +add_library(urlfilter + filterrule.cpp + filterrule.h + + formats/adblockrule.cpp + formats/adblockrule.h +) + +target_link_libraries(urlfilter Qt5::WebEngineWidgets) diff --git a/lib/web/urlfilter/filterrule.cpp b/lib/urlfilter/filterrule.cpp index 67ff4d7..22a2f06 100644 --- a/lib/web/urlfilter/filterrule.cpp +++ b/lib/urlfilter/filterrule.cpp @@ -39,24 +39,21 @@ bool FilterRule::isBlocking() const return m_isBlocking; } -bool FilterRule::matchesDomain(const QString &domain) const +bool FilterRule::matchesDomain(uint domainHash) const { // no domains have been allowed or blocked -> allow on all domains - if(allowedDomains.isEmpty() && blockedDomains.isEmpty()) + if(allowedDomains_hashes.isEmpty() && blockedDomains_hashes.isEmpty()) { return true; - - if(!blockedDomains.isEmpty()) { - // do not match rule if the domain has been blocked - if(blockedDomains.contains(domain)) - return false; } - if(!allowedDomains.isEmpty()) { - if(allowedDomains.contains(domain)) - return true; + // blockedDomains prevents the rules from being matched on those domains + if(blockedDomains_hashes.contains(domainHash)) { + return false; } - return false; + // allowedDomains means the rule should only be matched on those domains + return allowedDomains_hashes.contains(domainHash); + } bool FilterRule::matchesType(QWebEngineUrlRequestInfo::ResourceType type) const @@ -75,7 +72,7 @@ bool FilterRule::matchesType(QWebEngineUrlRequestInfo::ResourceType type) const bool FilterRule::matchesUrl(const QUrl &url) const { - switch (urlMatchType) { + switch(urlMatchType) { case InvalidMatch: return false; @@ -96,11 +93,5 @@ bool FilterRule::matchesUrl(const QUrl &url) const case DomainMatch: return isMatchingDomain(url.host(), match); - } } - -QString FilterRule::toString() const -{ - return originalFilter; -} diff --git a/lib/web/urlfilter/filterrule.h b/lib/urlfilter/filterrule.h index 5b9a6cf..95fff6a 100644 --- a/lib/web/urlfilter/filterrule.h +++ b/lib/urlfilter/filterrule.h @@ -9,12 +9,14 @@ #ifndef SMOLBOTE_FILTERRULE_H #define SMOLBOTE_FILTERRULE_H +#include <QObject> #include <QRegularExpression> #include <QStringList> #include <QStringMatcher> #include <QUrl> #include <QWebEngineUrlRequestInfo> #include <memory> +#include <QVector> class FilterRule { @@ -29,43 +31,21 @@ public: DomainMatch }; - FilterRule() = default; - bool isEnabled() const; bool isBlocking() const; - /** - * @brief matchesDomain - * @param domain - * @return - */ - bool matchesDomain(const QString &domain) const; - - /** - * @brief matchesType - * @param type - * @return true if type matches, false otherwise - */ + bool matchesDomain(uint domainHash) const; bool matchesType(QWebEngineUrlRequestInfo::ResourceType type) const; - - /** - * @brief matchesUrl - * @param url - * @return - */ bool matchesUrl(const QUrl &url) const; - QString toString() const; - protected: bool m_isEnabled = false; bool m_isBlocking = true; - QString originalFilter; - UrlMatchType urlMatchType = InvalidMatch; QHash<QWebEngineUrlRequestInfo::ResourceType, bool> m_resourceTypeOptions; - QStringList allowedDomains, blockedDomains; + + QVector<uint> allowedDomains_hashes, blockedDomains_hashes; QString match; QRegularExpression regexp; diff --git a/lib/web/urlfilter/adblockrule.cpp b/lib/urlfilter/formats/adblockrule.cpp index 58b1941..ef7bec1 100644 --- a/lib/web/urlfilter/adblockrule.cpp +++ b/lib/urlfilter/formats/adblockrule.cpp @@ -18,7 +18,6 @@ AdBlockRule::AdBlockRule(const QString &filter) { - originalFilter = filter; QString parsedLine = filter.trimmed(); // there is no rule, or it's a comment @@ -50,29 +49,15 @@ AdBlockRule::AdBlockRule(const QString &filter) for(const QString &option : options) { if(option.startsWith(QLatin1Literal("domain"))) { const auto domainList = option.mid(7).split(QLatin1Literal("|")); - for(const QString &domain : domainList) { - if(domain.startsWith(QLatin1Literal("~"))) - blockedDomains.append(domain.mid(1)); - else - allowedDomains.append(domain); + for (const QString &domain : domainList) { + if (domain.startsWith(QLatin1Literal("~"))) { + blockedDomains_hashes.append(qHash(domain.mid(1))); + } else { + allowedDomains_hashes.append(qHash(domain)); + } } - } else if(option.endsWith(QLatin1Literal("script"))) { - m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeScript, !option.startsWith(QLatin1Literal("~"))); - - } else if(option.endsWith(QLatin1Literal("image"))) { - m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeImage, !option.startsWith(QLatin1Literal("~"))); - - } else if(option.endsWith(QLatin1Literal("stylesheet"))) { - m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeStylesheet, !option.startsWith(QLatin1Literal("~"))); - - } else if(option.endsWith(QLatin1Literal("object"))) { - m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeObject, !option.startsWith(QLatin1Literal("~"))); - - } else if(option.endsWith(QLatin1Literal("xmlhttprequest"))) { - m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeXhr, !option.startsWith(QLatin1Literal("~"))); - - } else if(option.endsWith(QLatin1Literal("other"))) { - m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeUnknown, !option.startsWith(QLatin1Literal("~"))); + } else { + parseOption(option); } } } @@ -125,3 +110,55 @@ AdBlockRule::AdBlockRule(const QString &filter) match = parsedLine; } +void AdBlockRule::parseOption(const QString &option) +{ + const bool exception = !option.startsWith(QLatin1Literal("~")); + + if(option.endsWith(QLatin1Literal("script"))) { + // external scripts loaded via HTML script tag + m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeScript, exception); + + } else if(option.endsWith(QLatin1Literal("image"))) { + // regular images, typically loaded via HTML img tag + m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeImage, exception); + + } else if(option.endsWith(QLatin1Literal("stylesheet"))) { + // external CSS stylesheet files + m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeStylesheet, exception); + + } else if(option.endsWith(QLatin1Literal("object"))) { + // content handled by browser plugins, e.g. Flash or Java + m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeObject, exception); + + } else if(option.endsWith(QLatin1Literal("xmlhttprequest"))) { + // requests started using the XMLHttpRequest object or fetch() API + m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeXhr, exception); + + } else if(option.endsWith(QLatin1Literal("object-subrequest"))) { + // requests started by plugins like Flash + m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypePluginResource, exception); + + } else if(option.endsWith(QLatin1Literal("subdocument"))) { + // embedded pages, usually included via HTML frames + m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeSubFrame, exception); + + } else if(option.endsWith(QLatin1Literal("ping"))) { + // requests started by <a ping> or navigator.sendBeacon() + m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypePing, exception); + + } else if(option.endsWith(QLatin1Literal("websocket"))) { + // requests initiated via WebSocket object + qDebug("Resource type 'websocket' not available"); + + } else if(option.endsWith(QLatin1Literal("webrtc"))) { + // connections opened via RTCPeerConnection instances to ICE servers + qDebug("Resource type 'webrtc' not available"); + + } else if(option.endsWith(QLatin1Literal("document"))) { + // the page itself + m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeMainFrame, exception); + + } else if(option.endsWith(QLatin1Literal("other"))) { + m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeUnknown, exception); + } +} diff --git a/lib/web/urlfilter/adblockrule.h b/lib/urlfilter/formats/adblockrule.h index 7b6f683..8677c2c 100644 --- a/lib/web/urlfilter/adblockrule.h +++ b/lib/urlfilter/formats/adblockrule.h @@ -9,18 +9,15 @@ #ifndef SMOLBOTE_ADBLOCKRULE_H #define SMOLBOTE_ADBLOCKRULE_H -#include <QObject> -#include <QString> -#include <QRegularExpression> -#include <QUrl> -#include <QWebEngineUrlRequestInfo> -#include "filterrule.h" +#include "../filterrule.h" class AdBlockRule : public FilterRule { public: explicit AdBlockRule(const QString &filter); + void parseOption(const QString &option); + }; #endif // SMOLBOTE_ADBLOCKRULE_H diff --git a/lib/web/CMakeLists.txt b/lib/web/CMakeLists.txt index b2d6c82..86f7f1c 100644 --- a/lib/web/CMakeLists.txt +++ b/lib/web/CMakeLists.txt @@ -9,12 +9,6 @@ add_library(web profilemanager.h webprofile.cpp webprofile.h - - urlfilter/filterrule.cpp - urlfilter/filterrule.h - - urlfilter/adblockrule.cpp - urlfilter/adblockrule.h ) target_link_libraries(web Qt5::WebEngineWidgets) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 3263bbb..6162cd4 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -72,7 +72,7 @@ target_link_libraries(${poi_exe} about addressbar configuration - bookmarks downloads web + bookmarks downloads web urlfilter ) if(Breakpad) diff --git a/src/webengine/urlinterceptor.cpp b/src/webengine/urlinterceptor.cpp index 06464ae..db4aea9 100644 --- a/src/webengine/urlinterceptor.cpp +++ b/src/webengine/urlinterceptor.cpp @@ -7,7 +7,7 @@ */ #include "urlinterceptor.h" -#include "web/urlfilter/adblockrule.h" +#include "urlfilter/formats/adblockrule.h" #include <QDir> #include <QJsonArray> #include <QJsonDocument> @@ -27,7 +27,6 @@ inline std::vector<FilterRule> parseAdBlockList(const QString &filename) AdBlockRule rule(line); if(rule.isEnabled()) { rules.emplace_back(std::move(rule)); - //qDebug("added rule: %s", qUtf8Printable(line)); } } list.close(); @@ -63,7 +62,7 @@ UrlRequestInterceptor::UrlRequestInterceptor(const std::unique_ptr<Configuration auto filtersPath = config->value<QString>("filter.adblock"); if(filtersPath) - filters = std::move(parseAdBlockList(filtersPath.value())); + filters = parseAdBlockList(filtersPath.value()); } // test DNT on https://browserleaks.com/donottrack @@ -78,12 +77,14 @@ void UrlRequestInterceptor::interceptRequest(QWebEngineUrlRequestInfo &info) return; } + const uint domainHash = qHash(info.firstPartyUrl().host()); + const QWebEngineUrlRequestInfo::ResourceType type = info.resourceType(); + const QUrl requestUrl = info.requestUrl(); for(const FilterRule &rule : filters) { - if(rule.matchesDomain(info.firstPartyUrl().host()) && rule.matchesType(info.resourceType()) && rule.matchesUrl(info.requestUrl())) { + if(rule.matchesDomain(domainHash) && rule.matchesType(type) && rule.matchesUrl(requestUrl)) { info.block(rule.isBlocking()); #ifdef QT_DEBUG qDebug("--> blocked %s", qUtf8Printable(info.requestUrl().toString())); - qDebug("- %s", qUtf8Printable(rule.toString())); #endif break; } diff --git a/src/webengine/urlinterceptor.h b/src/webengine/urlinterceptor.h index 5c78b62..575e0c9 100644 --- a/src/webengine/urlinterceptor.h +++ b/src/webengine/urlinterceptor.h @@ -9,7 +9,7 @@ #ifndef SMOLBOTE_URLREQUESTINTERCEPTOR_H #define SMOLBOTE_URLREQUESTINTERCEPTOR_H -#include "web/urlfilter/filterrule.h" +#include "urlfilter/filterrule.h" #include <QByteArray> #include <QVector> #include <QWebEngineUrlRequestInterceptor> @@ -27,7 +27,7 @@ public: }; explicit UrlRequestInterceptor(const std::unique_ptr<Configuration> &config, QObject *parent = nullptr); - ~UrlRequestInterceptor() = default; + ~UrlRequestInterceptor() override = default; void interceptRequest(QWebEngineUrlRequestInfo &info) override; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index a026ad6..4302ab2 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -7,6 +7,6 @@ add_executable(AdBlockTest urlfilter/adblocktest.h ) -target_include_directories(AdBlockTest PRIVATE ../lib/web) +target_include_directories(AdBlockTest PRIVATE ../lib/urlfilter) -target_link_libraries(AdBlockTest Qt5::Test web) +target_link_libraries(AdBlockTest Qt5::Test urlfilter) diff --git a/test/adblock.txt b/test/adblock.txt index 38d4688..cd284e8 100644 --- a/test/adblock.txt +++ b/test/adblock.txt @@ -1,4 +1,4 @@ /banner/*/img^ ||ads.example.com^ |http://example.com/| - +/banner\d+/ diff --git a/test/urlfilter/adblocktest.cpp b/test/urlfilter/adblocktest.cpp index 416bc20..b31d965 100644 --- a/test/urlfilter/adblocktest.cpp +++ b/test/urlfilter/adblocktest.cpp @@ -1,11 +1,12 @@ #include "adblocktest.h" #include <QtTest/QtTest> -#include "urlfilter/adblockrule.h" +#include "filterrule.h" +#include "formats/adblockrule.h" -inline bool check(const std::vector<FilterRule> rules, const QUrl &url) +inline bool check(const std::vector<AdBlockRule> rules, const QUrl &url) { - for(const FilterRule &rule : rules) { - if(rule.matchesDomain(url.host()) && rule.matchesUrl(url)) + for(const AdBlockRule &rule : rules) { + if(rule.matchesDomain(qHash(url.host())) && rule.matchesUrl(url)) return true; } return false; @@ -13,9 +14,10 @@ inline bool check(const std::vector<FilterRule> rules, const QUrl &url) void AdBlockTest::parseList() { - std::vector<FilterRule> rules; + std::vector<AdBlockRule> rules; QFile list("adblock.txt"); + int ruleCount = 0; QCOMPARE(list.open(QIODevice::ReadOnly | QIODevice::Text), true); { QTextStream l(&list); @@ -24,6 +26,7 @@ void AdBlockTest::parseList() AdBlockRule rule(line); if(rule.isEnabled()) { rules.emplace_back(std::move(rule)); + ruleCount++; qDebug("added rule: %s", qUtf8Printable(line)); } } @@ -31,7 +34,7 @@ void AdBlockTest::parseList() list.close(); // there should be 3 rules - QCOMPARE(rules.size(), 3); + QCOMPARE(rules.size(), ruleCount); // block by address part QCOMPARE(check(rules, QUrl("http://example.com/banner/foo/img")), true); @@ -48,11 +51,15 @@ void AdBlockTest::parseList() QCOMPARE(check(rules, QUrl("http://ads.example.com.ua/foo.gif")), false); QCOMPARE(check(rules, QUrl("http://example.com/redirect/http://ads.example.com/")), false); - // block exact address QCOMPARE(check(rules, QUrl("http://example.com/")), true); QCOMPARE(check(rules, QUrl("http://example.com/foo.gif")), false); QCOMPARE(check(rules, QUrl("http://example.info/redirect/http://example.com/")), false); + + // regular expression + QCOMPARE(check(rules, QUrl("http://another.com/banner123")), true); + QCOMPARE(check(rules, QUrl("http://another.com/banner321")), true); + QCOMPARE(check(rules, QUrl("http://another.com/banners")), false); } QTEST_GUILESS_MAIN(AdBlockTest) |