diff options
Diffstat (limited to 'staging')
-rw-r--r-- | staging/adblock/filterlist.cpp | 128 | ||||
-rw-r--r-- | staging/adblock/filterlist.h | 54 | ||||
-rw-r--r-- | staging/adblock/meson.build | 38 | ||||
-rw-r--r-- | staging/adblock/options.cpp | 94 | ||||
-rw-r--r-- | staging/adblock/options.h | 45 | ||||
-rw-r--r-- | staging/adblock/plugin/AdblockPlusPlugin.json | 4 | ||||
-rw-r--r-- | staging/adblock/plugin/plugin.cpp | 54 | ||||
-rw-r--r-- | staging/adblock/plugin/plugin.h | 25 | ||||
-rw-r--r-- | staging/adblock/rule.h | 153 | ||||
-rw-r--r-- | staging/adblock/test/filterlist.cpp | 118 | ||||
-rw-r--r-- | staging/adblock/test/options.cpp | 42 | ||||
-rw-r--r-- | staging/adblock/test/rule.cpp | 86 | ||||
-rw-r--r-- | staging/smolblok/README.md | 8 | ||||
-rw-r--r-- | staging/smolblok/filtermanager.hpp | 41 | ||||
-rw-r--r-- | staging/smolblok/meson.build | 17 | ||||
-rw-r--r-- | staging/smolblok/smolblok.cpp | 40 | ||||
-rw-r--r-- | staging/smolblok/smolblok.hpp | 80 | ||||
-rw-r--r-- | staging/smolblok/test/loader.cpp | 25 | ||||
-rw-r--r-- | staging/smolblok/test/main.cpp | 22 | ||||
-rw-r--r-- | staging/smolblok/test/sample-filters.txt | 10 |
20 files changed, 1084 insertions, 0 deletions
diff --git a/staging/adblock/filterlist.cpp b/staging/adblock/filterlist.cpp new file mode 100644 index 0000000..1846ff6 --- /dev/null +++ b/staging/adblock/filterlist.cpp @@ -0,0 +1,128 @@ +/* + * This file is part of smolbote. It's copyrighted by the contributors recorded + * in the version control history of the file, available from its original + * location: https://library.iserlohn-fortress.net/aqua/smolbote.git + * + * SPDX-License-Identifier: GPL-3.0 + */ + +#include "filterlist.h" +#include <QByteArray> +#include <QIODevice> + +/** + * Documentation: + * + * https://adblockplus.org/filter-cheatsheet + * https://help.eyeo.com/adblockplus/how-to-write-filters + * + * https://github.com/gorhill/uBlock/wiki/Introduction-to-basic-filtering-syntax + * https://github.com/gorhill/uBlock/wiki/Static-filter-syntax + * + */ + +const QLatin1String comment_lastModified("! Last modified: "); +const QLatin1String comment_expires("! Expires: "); + +using namespace AdblockPlus; + +FilterList::FilterList(QIODevice &from) +{ + if(from.isReadable() && from.isTextModeEnabled()) { + while(from.bytesAvailable() > 0) { + const auto line = from.readLine(512); + + if(line[0] == '!') { + parseComment(line); + + } else if(line.contains("##") || line.contains("#@#")) { + // ## is element hiding rule + // #@# is element hiding exception rule + + } else { + parseRule(line); + } + } + } +} + +void FilterList::parseComment(const QString &line) +{ + if(line.startsWith(comment_lastModified)) { + lastModified = QDateTime::fromString(line.mid(comment_lastModified.size()), "dd MMM yyyy HH:mm 'UTC'"); + expires = lastModified; + + } else if(line.startsWith(comment_expires)) { + const QRegularExpression time_re("(?:(\\d+) days)|(?:(\\d+) hours)"); + const auto match = time_re.match(line); + if(match.hasMatch()) { + expires = expires.addDays(match.captured(1).toInt()); + expires = expires.addSecs(match.captured(2).toInt() * 60 * 60); + } + } +} + +Rule *FilterList::parseRule(const QByteArray &line) +{ + QString pattern = line; + Options opt; + + if(pattern.startsWith(QLatin1String("@@"))) { + pattern.remove(0, 2); + opt.exception = true; + } + + // parse options + if(pattern.contains('$')) { + const auto list = pattern.split('$'); + pattern = list.at(0); + const auto options = list.at(1); + + if(!opt.parseAbp(&options)) { + return nullptr; + } + } + + if(pattern.startsWith("||") && pattern.endsWith("^")) { + // domain match + pattern = pattern.mid(2, pattern.length() - 3); + return new MatcherRule(pattern, opt, MatcherRule::DomainMatch); + + } else if(pattern.startsWith("|") && pattern.endsWith("|")) { + // string equals + pattern = pattern.mid(1, pattern.length() - 2); + return new MatcherRule(pattern, opt, MatcherRule::UrlEquals); + + } else if(pattern.startsWith("|")) { + // string starts with + pattern = pattern.mid(1, pattern.length() - 1); + return new MatcherRule(pattern, opt, MatcherRule::UrlStartsWith); + + } else if(pattern.endsWith("|")) { + // string ends with + pattern = pattern.mid(0, pattern.length() - 1); + return new MatcherRule(pattern, opt, MatcherRule::UrlEndsWith); + + } else if(pattern.startsWith("/") && pattern.endsWith("/")) { + // regular expression + pattern = pattern.mid(1, pattern.length() - 2); + return new RegexRule(pattern, opt); + + } else if(!pattern.isEmpty()) { + if(pattern.contains('*')) { + // wildcard pattern + pattern = QRegularExpression::wildcardToRegularExpression(pattern); + return new RegexRule(pattern, opt); + } else { + // contains pattern + return new MatcherRule(pattern, opt); + } + } + + return nullptr; +} + +bool FilterList::filter(QWebEngineUrlRequestInfo &info) const +{ + return false; +} diff --git a/staging/adblock/filterlist.h b/staging/adblock/filterlist.h new file mode 100644 index 0000000..24464c8 --- /dev/null +++ b/staging/adblock/filterlist.h @@ -0,0 +1,54 @@ +/* + * This file is part of smolbote. It's copyrighted by the contributors recorded + * in the version control history of the file, available from its original + * location: https://library.iserlohn-fortress.net/aqua/smolbote.git + * + * SPDX-License-Identifier: GPL-3.0 + */ + +#include "rule.h" +#include <QDateTime> +#include <QObject> +#include <QString> +#include <QStringList> +#include <QVector> +#include <smolbote/filterinterface.hpp> + +namespace AdblockPlus +{ +class FilterList : public Filter +{ +public: + explicit FilterList(QIODevice &from); + ~FilterList() + { + qDeleteAll(m_rules); + } + + bool filter(QWebEngineUrlRequestInfo &info) const override; + bool isUpToDate() const override + { + const auto current = QDateTime::currentDateTime(); + return expires > current; + } + + QDateTime modified() const + { + return lastModified; + } + QDateTime expiresOn() const + { + return expires; + } + + [[nodiscard]] static Rule *parseRule(const QByteArray &line); + +private: + void parseComment(const QString &line); + + QDateTime lastModified; + QDateTime expires; + QVector<AdblockPlus::Rule *> m_rules; +}; + +} // namespace AdblockPlus diff --git a/staging/adblock/meson.build b/staging/adblock/meson.build new file mode 100644 index 0000000..942f325 --- /dev/null +++ b/staging/adblock/meson.build @@ -0,0 +1,38 @@ +lib_adblockfilter = static_library('adblockfilter', + [ 'filterlist.cpp', 'options.cpp' ], + include_directories: smolbote_interfaces, + dependencies: [ dep_qt5 ] +) + +dep_adblockfilter = declare_dependency( + include_directories: ['.', smolbote_interfaces], + link_with: lib_adblockfilter +) + +#AdblockPlusFilterPlugin = shared_library('AdblockPlusPlugin', +# [ 'plugin/plugin.cpp', +# mod_qt5.preprocess(include_directories: smolbote_interfaces, +# moc_headers: 'plugin/plugin.h', dependencies: dep_qt5) +# ], +# include_directories: smolbote_interfaces, +# link_with: lib_adblockfilter, +# dependencies: dep_qt5, +# install: true, +# install_dir: get_option('libdir')/'smolbote/plugins' +#) + +test('adblock: rule', executable('libadblockfilter_rule', + sources: 'test/rule.cpp', + dependencies: [ dep_qt5, dep_catch, dep_adblockfilter ] +)) + +test('adblock: options', executable('libadblockfilter_options', + sources: 'test/options.cpp', + dependencies: [ dep_qt5, dep_catch, dep_adblockfilter ] +)) + +test('adblock: filterlist', executable('libadblockfilter_filterlist', + sources: 'test/filterlist.cpp', + dependencies: [ dep_qt5, dep_catch, dep_adblockfilter ] +)) + diff --git a/staging/adblock/options.cpp b/staging/adblock/options.cpp new file mode 100644 index 0000000..08f30ee --- /dev/null +++ b/staging/adblock/options.cpp @@ -0,0 +1,94 @@ +/* + * This file is part of smolbote. It's copyrighted by the contributors recorded + * in the version control history of the file, available from its original + * location: https://library.iserlohn-fortress.net/aqua/smolbote.git + * + * SPDX-License-Identifier: GPL-3.0 + */ + +#include "options.h" + +using namespace AdblockPlus; + +constexpr std::array abpTypeOptions = { + "document", // ResourceTypeMainFrame 0 Top level page. + "subdocument", // ResourceTypeSubFrame 1 Frame or iframe. + "stylesheet", // ResourceTypeStylesheet 2 A CSS stylesheet. + "script", // ResourceTypeScript 3 An external script. + "image", // ResourceTypeImage 4 An image (JPG, GIF, PNG, and so on). + "font", // ResourceTypeFontResource 5 A font. + "other", // ResourceTypeSubResource 6 An "other" subresource. + "object", // ResourceTypeObject 7 An object (or embed) tag for a plugin or a resource that a plugin requested. + "media", // ResourceTypeMedia 8 A media resource. + "__worker", // ResourceTypeWorker 9 The main resource of a dedicated worker. + "__sharedworker", // ResourceTypeSharedWorker 10 The main resource of a shared worker. + "__prefetch", // ResourceTypePrefetch 11 An explicitly requested prefetch. + "__favicon", // ResourceTypeFavicon 12 A favicon. + "xmlhttprequest", // ResourceTypeXhr 13 An XMLHttpRequest. + "ping", // ResourceTypePing 14 A ping request for <a ping>. + "__serviceworker", // ResourceTypeServiceWorker 15 The main resource of a service worker. + "__cspreport", // ResourceTypeCspReport 16 A report of Content Security Policy (CSP) violations. + "__pluginresource", // ResourceTypePluginResource 17 A resource requested by a plugin. + "__preloadmainframe", // ResourceTypeNavigationPreloadMainFrame 19 A main-frame service worker navigation preload request. + "__preloadsubframe", // ResourceTypeNavigationPreloadSubFrame 20 A sub-frame service worker navigation preload request. + "__unknown" // ResourceTypeUnknown 255 Unknown request type. +}; + +auto parseTypeOption(QStringRef &option) +{ + struct { + bool found = false; + int index = -1; + bool exception = false; + } ret; + + // Possible inverse type options include ~script, ~image, ~stylesheet, ~object, + // ~xmlhttprequest, ~subdocument, ~ping, ~websocket, ~webrtc, ~document, ~elemhide, ~other + if(option[0] == '~') { + ret.exception = true; + option = option.mid(1); + } + + // TODO: map all ResourceType's to their respective strings + // TODO: websocket, webrtc, elemhide, generichide, genericblock, popup + + for(std::size_t i = 0; i < std::size(abpTypeOptions); ++i) { + if(option == abpTypeOptions[i]) { + ret.index = i; + ret.found = true; + return ret; + } + } + return ret; +} + +bool Options::parseAbp(const QStringRef &options) +{ + std::bitset<32> checked_flags; + + for(auto &option : options.split(',')) { + if(option == "match-case") { + matchcase = true; + + } else if(option == "third-party") { + thirdparty = !exception; + } else if(const auto r = parseTypeOption(option); r.found) { + if(!r.exception) { + flags.set(r.index, true); + checked_flags.set(r.index, true); + } else { + flags.set(r.index, false); + checked_flags.set(r.index, true); + for(auto i = 0; i < 32; ++i) { + if(!checked_flags[i]) { + flags.set(i, true); + } + } + } + } else { + return false; + } + } + + return true; +} diff --git a/staging/adblock/options.h b/staging/adblock/options.h new file mode 100644 index 0000000..efc47a6 --- /dev/null +++ b/staging/adblock/options.h @@ -0,0 +1,45 @@ +/* + * This file is part of smolbote. It's copyrighted by the contributors recorded + * in the version control history of the file, available from its original + * location: https://library.iserlohn-fortress.net/aqua/smolbote.git + * + * SPDX-License-Identifier: GPL-3.0 + */ + +#ifndef SMOLBOTE_ADBLOCK_OPTIONS_H +#define SMOLBOTE_ADBLOCK_OPTIONS_H + +#include <QHash> +#include <QString> +#include <QWebEngineUrlRequestInfo> +#include <bitset> + +namespace AdblockPlus +{ + +struct Options { + // request handling options + bool exception = false; + bool redirect = false; + + // pattern options + bool matchcase = false; + + // request type options + bool firstparty = true; + bool thirdparty = true; + + // request types + bool matchesType(QWebEngineUrlRequestInfo::ResourceType type) + { + return flags.test(type); + } + bool parseAbp(const QStringRef &options); + + // TODO private: + std::bitset<32> flags; +}; + +} // namespace AdblockPlus + +#endif // SMOLBOTE_ADBLOCK_OPTIONS_H diff --git a/staging/adblock/plugin/AdblockPlusPlugin.json b/staging/adblock/plugin/AdblockPlusPlugin.json new file mode 100644 index 0000000..053826a --- /dev/null +++ b/staging/adblock/plugin/AdblockPlusPlugin.json @@ -0,0 +1,4 @@ +{ + "name": "AdblockPlus Filter Plugin", + "author": "Aqua <aqua@iserlohn-fortress.net>" +} diff --git a/staging/adblock/plugin/plugin.cpp b/staging/adblock/plugin/plugin.cpp new file mode 100644 index 0000000..028c83f --- /dev/null +++ b/staging/adblock/plugin/plugin.cpp @@ -0,0 +1,54 @@ +/* + * This file is part of smolbote. It's copyrighted by the contributors recorded + * in the version control history of the file, available from its original + * location: https://library.iserlohn-fortress.net/aqua/smolbote.git + * + * SPDX-License-Identifier: GPL-3.0 + */ + +#include "plugin.h" +#include "../filterlist.h" +#include <QTextStream> + +using namespace AdblockPlus; + +Filter* AdblockPlusFilterPlugin::load(QIODevice* from) const +{ + if(!from->isOpen()) + return nullptr; + + + QTextStream stream(from); + + if(stream.readLine().trimmed() != QLatin1String("[Adblock Plus 2.0]")) { + return nullptr; + } + + auto *list = new FilterList; + QString line; + int total, comments, rules, unsupported, failed; + + while(stream.readLineInto(&line)) { + if(!line.isEmpty()) { + ++total; + + switch(list->parse(line)) + { + case FilterList::Comment: + ++comments; + break; + case FilterList::Rule: + ++rules; + break; + case FilterList::Unsupported: + ++unsupported; + break; + case FilterList::Failed: + break; + } + } + } + + return list; +} + diff --git a/staging/adblock/plugin/plugin.h b/staging/adblock/plugin/plugin.h new file mode 100644 index 0000000..db419bd --- /dev/null +++ b/staging/adblock/plugin/plugin.h @@ -0,0 +1,25 @@ +/* + * This file is part of smolbote. It's copyrighted by the contributors recorded + * in the version control history of the file, available from its original + * location: https://library.iserlohn-fortress.net/aqua/smolbote.git + * + * SPDX-License-Identifier: GPL-3.0 + */ + +#ifndef ADBLOCKPLUSFILTER_PLUGIN_H +#define ADBLOCKPLUSFILTER_PLUGIN_H + +#include <smolbote/filterinterface.hpp> + +class AdblockPlusFilterPlugin : public QObject, public FilterPlugin +{ + Q_OBJECT + Q_PLUGIN_METADATA(IID FilterPluginIid FILE "AdblockPlusPlugin.json") + Q_INTERFACES(FilterPlugin) + +public: + Filter* load(QIODevice* from) const override; +}; + +#endif // ADBLOCKPLUSFILTER_PLUGIN_H + diff --git a/staging/adblock/rule.h b/staging/adblock/rule.h new file mode 100644 index 0000000..aaab49a --- /dev/null +++ b/staging/adblock/rule.h @@ -0,0 +1,153 @@ +/* + * This file is part of smolbote. It's copyrighted by the contributors recorded + * in the version control history of the file, available from its original + * location: https://library.iserlohn-fortress.net/aqua/smolbote.git + * + * SPDX-License-Identifier: GPL-3.0 + */ + +#ifndef SMOLBOTE_ADBLOCK_RULE_H +#define SMOLBOTE_ADBLOCK_RULE_H + +#include "options.h" +#include <QObject> +#include <QRegularExpression> +#include <QString> +#include <QStringMatcher> + +namespace AdblockPlus +{ +class Rule +{ +public: + virtual ~Rule() = default; + /** + * requestUrl: requested URL + * initiatorUrl: URL of document that initiated navigation + * firstPartyUrl: URL of the page that issued the request + */ + virtual bool hasMatch(const QStringRef &requestUrl, + const QStringRef &initiatorUrl, + const QStringRef &firstPartyUrl, + QWebEngineUrlRequestInfo::ResourceType resourceType = QWebEngineUrlRequestInfo::ResourceTypeMainFrame) const = 0; + + bool shouldRedirect() const + { + return options.redirect; + } + bool shouldBlock() const + { + return !options.exception; + } + +protected: + Rule(const Options &opt) + : options(opt) + { + } + const Options options; +}; + +// The separator character can be anything but +// a letter, a digit, or one of the following: _, -, ., %. +// The end of the address is also accepted as a separator. +inline bool isSeparator(const QChar &c) +{ + return !c.isLetter() && !c.isDigit() && c != '_' && c != '-' && c != '.' && c != '%'; +} + +class MatcherRule : public Rule +{ +public: + enum MatchPosition { + DomainMatch, + UrlStartsWith, + UrlEndsWith, + UrlContains, + UrlEquals + }; + + MatcherRule(const QString &pattern, const Options &opt, const MatchPosition pos = UrlContains) + : Rule(opt) + , position(pos) + , matcher(pattern, opt.matchcase ? Qt::CaseSensitive : Qt::CaseInsensitive) + , patternLength(pattern.length()) + { + } + explicit MatcherRule(const MatcherRule &) = delete; + MatcherRule &operator=(const MatcherRule &) = delete; + + explicit MatcherRule(MatcherRule &&) = delete; + MatcherRule &operator=(MatcherRule &&) = delete; + + ~MatcherRule() = default; + bool hasMatch(const QStringRef &url, + const QStringRef &initiatorUrl, + const QStringRef &firstPartyUrl, + QWebEngineUrlRequestInfo::ResourceType resourceType = QWebEngineUrlRequestInfo::ResourceTypeMainFrame) const override + { + const auto index = matcher.indexIn(url); + if(index == -1) { + return false; + } + + switch(position) { + case DomainMatch: + // match if + // there is only one : left of the index + // and + // character after pattern is separator or end of string + return (url.left(index).count(':') <= 1 && (index + patternLength == url.length() || isSeparator(url[index + patternLength]))); + case UrlStartsWith: + return (index == 0); + case UrlEndsWith: + return (index == url.length() - patternLength); + case UrlContains: + return (index != -1); + case UrlEquals: + return (index == 0 && patternLength == url.length()); + } + + return false; + } + +private: + const MatchPosition position; + const QStringMatcher matcher; + const int patternLength; +}; + +class RegexRule : public Rule +{ +public: + RegexRule(const QString &rule, const Options &opt) + : Rule(opt) + , regex(rule) + { + if(!opt.matchcase) { + regex.setPatternOptions(QRegularExpression::CaseInsensitiveOption); + } + } + explicit RegexRule(const RegexRule &) = delete; + RegexRule &operator=(const RegexRule &) = delete; + + explicit RegexRule(RegexRule &&) = delete; + RegexRule &operator=(RegexRule &&) = delete; + + ~RegexRule() = default; + bool hasMatch(const QStringRef &url, + const QStringRef &initiatorUrl, + const QStringRef &firstPartyUrl, + QWebEngineUrlRequestInfo::ResourceType resourceType = QWebEngineUrlRequestInfo::ResourceTypeMainFrame) const override + { + const auto match = regex.match(url); + return match.hasMatch(); + } + +private: + QRegularExpression regex; +}; + +} // namespace AdblockPlus + +#endif // SMOLBOTE_ADBLOCK_RULE_H diff --git a/staging/adblock/test/filterlist.cpp b/staging/adblock/test/filterlist.cpp new file mode 100644 index 0000000..ca122ac --- /dev/null +++ b/staging/adblock/test/filterlist.cpp @@ -0,0 +1,118 @@ +#define CATCH_CONFIG_MAIN +#include "filterlist.h" +#include <QBuffer> +#include <catch2/catch.hpp> + +using namespace AdblockPlus; + +QByteArray sampleList = + R"(! comment on line +! Last modified: 1 Jan 2000 00:00 UTC +! Expires: 4 days (update frequency) +)"; + +TEST_CASE("placeholder") +{ + QBuffer buffer(&sampleList); + buffer.open(QIODevice::ReadOnly | QIODevice::Text); + + AdblockPlus::FilterList list(buffer); + REQUIRE(!list.isUpToDate()); +} + +TEST_CASE("domain match") +{ + const QString defaultUrl = ""; + + const QString block1 = "http://ads.example.com/foo.gif"; + const QString block2 = "http://server1.ads.example.com/foo.gif"; + const QString block3 = "https://ads.example.com:8000/"; + const QString block4 = "https://ads.example.com"; + + const QString allow1 = "http://ads.example.com.ua/foo.gif"; + const QString allow2 = "http://example.com/redirect/http://ads.example.com/"; + + auto *rule = FilterList::parseRule("||ads.example.com^"); + REQUIRE(rule->shouldBlock()); + REQUIRE(!rule->shouldRedirect()); + + REQUIRE(rule->hasMatch(&block1, &defaultUrl, &defaultUrl)); + REQUIRE(rule->hasMatch(&block2, &defaultUrl, &defaultUrl)); + REQUIRE(rule->hasMatch(&block3, &defaultUrl, &defaultUrl)); + REQUIRE(rule->hasMatch(&block4, &defaultUrl, &defaultUrl)); + + REQUIRE(!rule->hasMatch(&allow1, &defaultUrl, &defaultUrl)); + REQUIRE(!rule->hasMatch(&allow2, &defaultUrl, &defaultUrl)); + + delete rule; +} + +TEST_CASE("string equals") +{ + const QString defaultUrl = ""; + + const QString block = "http://example.com/"; + + const QString allow1 = "http://example.com/foo.gif"; + const QString allow2 = "http://example.info/redirect/http://example.com/"; + + auto *rule = FilterList::parseRule("|http://example.com/|"); + REQUIRE(rule->shouldBlock()); + REQUIRE(!rule->shouldRedirect()); + + REQUIRE(rule->hasMatch(&block, &defaultUrl, &defaultUrl)); + + REQUIRE(!rule->hasMatch(&allow1, &defaultUrl, &defaultUrl)); + REQUIRE(!rule->hasMatch(&allow2, &defaultUrl, &defaultUrl)); + + delete rule; +} + +TEST_CASE("string starts with") +{ + const QString defaultUrl = ""; + + auto *rule = FilterList::parseRule("|http://baddomain.example/"); + REQUIRE(rule->shouldBlock()); + REQUIRE(!rule->shouldRedirect()); + + const QString blocks = "http://baddomain.example/banner.gif"; + const QString allows = "http://gooddomain.example/analyze?http://baddomain.example"; + + REQUIRE(rule->hasMatch(&blocks, &defaultUrl, &defaultUrl)); + REQUIRE(!rule->hasMatch(&allows, &defaultUrl, &defaultUrl)); + delete rule; +} + +TEST_CASE("string ends with") +{ + const QString defaultUrl = ""; + + auto *rule = FilterList::parseRule("swf|"); + REQUIRE(rule->shouldBlock()); + REQUIRE(!rule->shouldRedirect()); + + const QString blocks = "http://example.com/annoyingflash.swf"; + const QString allows = "http://example.com/swf/index.html"; + + REQUIRE(rule->hasMatch(&blocks, &defaultUrl, &defaultUrl)); + REQUIRE(!rule->hasMatch(&allows, &defaultUrl, &defaultUrl)); + delete rule; +} + +TEST_CASE("regular expressions") +{ + const QString defaultUrl = ""; + + auto *rule = FilterList::parseRule("/banner\\d+/"); + const QString matches1 = "banner123"; + const QString matches2 = "banner321"; + + const QString ignores = "banners"; + + REQUIRE(rule->hasMatch(&matches1, &defaultUrl, &defaultUrl)); + REQUIRE(rule->hasMatch(&matches2, &defaultUrl, &defaultUrl)); + REQUIRE(!rule->hasMatch(&ignores, &defaultUrl, &defaultUrl)); + delete rule; +} + diff --git a/staging/adblock/test/options.cpp b/staging/adblock/test/options.cpp new file mode 100644 index 0000000..67dc143 --- /dev/null +++ b/staging/adblock/test/options.cpp @@ -0,0 +1,42 @@ +#define CATCH_CONFIG_MAIN +#include "options.h" +#include <catch2/catch.hpp> + +using namespace AdblockPlus; + +SCENARIO("parsing adblock options") +{ + Options opt; + + GIVEN("an unknown option") + { + const QString unknown = "unknown"; + THEN("the option is not parsed") + { + QStringRef unknown_ref(&unknown); + REQUIRE(!opt.parseAbp(unknown_ref)); + } + } + + GIVEN("match-case,document,~subdocument") + { + const QString options = "match-case,document,~subdocument"; + REQUIRE(opt.parseAbp(&options)); + + WHEN("match-case") + { + REQUIRE(opt.matchcase); + } + + WHEN("testing set/unset options") + { + REQUIRE(opt.matchesType(QWebEngineUrlRequestInfo::ResourceTypeMainFrame)); + REQUIRE(!opt.matchesType(QWebEngineUrlRequestInfo::ResourceTypeSubFrame)); + } + + WHEN("testing other options") + { + REQUIRE(opt.matchesType(QWebEngineUrlRequestInfo::ResourceTypeStylesheet)); + } + } +} diff --git a/staging/adblock/test/rule.cpp b/staging/adblock/test/rule.cpp new file mode 100644 index 0000000..07186b9 --- /dev/null +++ b/staging/adblock/test/rule.cpp @@ -0,0 +1,86 @@ +#define CATCH_CONFIG_MAIN +#include "rule.h" +#include <catch2/catch.hpp> + +using namespace AdblockPlus; + +SCENARIO("MatcherRule") +{ + GIVEN("options with case sensitive pattern") + { + const QString defaultUrl = ""; + + const Options opt { .matchcase=true }; + const QString patternContains("this string contains the pattern in it"); + const QString patternBegins("pattern starts this string"); + const QString patternEnds("this string ends with pattern"); + const QString patternMissing("and this one does not"); + + WHEN("contains") + { + MatcherRule rule("pattern", opt); + REQUIRE(rule.shouldBlock()); + + THEN("pattern is matched anywhere in the URL") + { + REQUIRE(rule.hasMatch(&patternContains, &defaultUrl, &defaultUrl)); + REQUIRE(rule.hasMatch(&patternBegins, &defaultUrl, &defaultUrl)); + REQUIRE(rule.hasMatch(&patternEnds, &defaultUrl, &defaultUrl)); + REQUIRE(!rule.hasMatch(&patternMissing, &defaultUrl, &defaultUrl)); + } + } + + WHEN("startsWith") + { + MatcherRule rule("pattern", opt, MatcherRule::UrlStartsWith); + REQUIRE(rule.shouldBlock()); + + THEN("pattern is matched if at the start of the URL") + { + REQUIRE(!rule.hasMatch(&patternContains, &defaultUrl, &defaultUrl)); + REQUIRE(rule.hasMatch(&patternBegins, &defaultUrl, &defaultUrl)); + REQUIRE(!rule.hasMatch(&patternEnds, &defaultUrl, &defaultUrl)); + REQUIRE(!rule.hasMatch(&patternMissing, &defaultUrl, &defaultUrl)); + } + } + + WHEN("endsWith") + { + MatcherRule rule("pattern", opt, MatcherRule::UrlEndsWith); + REQUIRE(rule.shouldBlock()); + + THEN("pattern is matched if at the end of the URL") + { + REQUIRE(!rule.hasMatch(&patternContains, &defaultUrl, &defaultUrl)); + REQUIRE(!rule.hasMatch(&patternBegins, &defaultUrl, &defaultUrl)); + REQUIRE(rule.hasMatch(&patternEnds, &defaultUrl, &defaultUrl)); + REQUIRE(!rule.hasMatch(&patternMissing, &defaultUrl, &defaultUrl)); + } + } + } +} + +SCENARIO("RegexRule") +{ + GIVEN("options with case sensitive pattern") + { + const QString defaultUrl; + + const Options opt { .matchcase=true }; + const QString patternContains("this string contains the pattern in it"); + const QString patternMissing("and this one does not"); + + WHEN("contains") + { + RegexRule rule("pattern", opt); + REQUIRE(rule.shouldBlock()); + + THEN("pattern is matched anywhere in the URL") + { + REQUIRE(rule.hasMatch(&patternContains, &defaultUrl, &defaultUrl)); + REQUIRE(!rule.hasMatch(&patternMissing, &defaultUrl, &defaultUrl)); + } + } + } +} + diff --git a/staging/smolblok/README.md b/staging/smolblok/README.md new file mode 100644 index 0000000..1793009 --- /dev/null +++ b/staging/smolblok/README.md @@ -0,0 +1,8 @@ +## smolblok + +### What is this +This is a C++ library for URL filtering for Qt applications using QtWebEngine. + +### Supported formats +- AdblockPlus without element hiding rules + diff --git a/staging/smolblok/filtermanager.hpp b/staging/smolblok/filtermanager.hpp new file mode 100644 index 0000000..6ee4d3f --- /dev/null +++ b/staging/smolblok/filtermanager.hpp @@ -0,0 +1,41 @@ +/* + * This file is part of smolbote. It's copyrighted by the contributors recorded + * in the version control history of the file, available from its original + * location: https://library.iserlohn-fortress.net/aqua/smolbote.git + * + * SPDX-License-Identifier: GPL-3.0 + */ + +#pragma once + +#include <QWebEngineUrlRequestInterceptor> +#include <smolbote/filterinterface.hpp> + +class FilterManager : public QWebEngineUrlRequestInterceptor +{ +public: + FilterManager(QObject *parent = nullptr) + : QWebEngineUrlRequestInterceptor(parent) + { + } + ~FilterManager() + { + qDeleteAll(filters); + } + + void addFilterList(FilterList *list) { + filters.append(list); + } + + void interceptRequest(QWebEngineUrlRequestInfo &info) override + { + for(const auto *filter : qAsConst(filters)) { + if(filter->filter(info)) { + return; + } + } + } + +private: + QList<FilterList *> filters; +}; diff --git a/staging/smolblok/meson.build b/staging/smolblok/meson.build new file mode 100644 index 0000000..6105179 --- /dev/null +++ b/staging/smolblok/meson.build @@ -0,0 +1,17 @@ +dep_smolblok = declare_dependency( + include_directories: [ '.', smolbote_interfaces ], + link_with: library('smolblok', + [ 'smolblok.cpp' ], + include_directories: smolbote_interfaces, + dependencies: dep_qt5 + ) +) + +smolblok_load = executable('smolblok-load', + dependencies: [ dep_qt5, dep_spdlog, dep_smolblok ], + sources: [ 'test/loader.cpp' ] +) + +test('load', smolblok_load, suite: 'smolblok', should_fail: true) +test('load', smolblok_load, suite: 'smolblok', args: files('meson.build'), should_fail: true) + diff --git a/staging/smolblok/smolblok.cpp b/staging/smolblok/smolblok.cpp new file mode 100644 index 0000000..465c348 --- /dev/null +++ b/staging/smolblok/smolblok.cpp @@ -0,0 +1,40 @@ +/* + * This file is part of smolbote. It's copyrighted by the contributors recorded + * in the version control history of the file, available from its original + * location: https://library.iserlohn-fortress.net/aqua/smolbote.git + * + * SPDX-License-Identifier: GPL-3.0 + */ + +#include "smolblok.hpp" +#include <QFile> +#include <QSettings> + +bool smolblok::addSubscriptions(const QString &filename) +{ + if(filename.isEmpty()) { + return false; + } + + QSettings listconf(filename, QSettings::IniFormat); + + for(auto &group : listconf.childGroups()) { + listconf.beginGroup(group); + const auto *loader = m_formats.value(listconf.value("Format").toString()).instance; + if(loader != nullptr) { + QFile f(listconf.value("File").toString()); + if(!f.exists()) { + continue; + } + + auto *list = loader->load(f); + f.seek(0); + if(loader->parse(list, f)) { + m_subscriptions.addFilterList(list); + } + } + listconf.endGroup(); + } + return false; +} + diff --git a/staging/smolblok/smolblok.hpp b/staging/smolblok/smolblok.hpp new file mode 100644 index 0000000..e547d67 --- /dev/null +++ b/staging/smolblok/smolblok.hpp @@ -0,0 +1,80 @@ +/* + * This file is part of smolbote. It's copyrighted by the contributors recorded + * in the version control history of the file, available from its original + * location: https://library.iserlohn-fortress.net/aqua/smolbote.git + * + * SPDX-License-Identifier: GPL-3.0 + */ + +#ifndef SMOLBOTE_SMOLBLOK_HPP +#define SMOLBOTE_SMOLBLOK_HPP + +#include "filtermanager.hpp" +#include <QPluginLoader> +#include <QWebEngineUrlRequestInterceptor> +#include <smolbote/filterinterface.hpp> + +class smolblok +{ +public: + smolblok() = default; + ~smolblok() + { + for(auto &plugin : m_formats) { + delete plugin.loader; + } + } + + auto registerFormatPlugin(const QString &format, const QString &filename) + { + struct { + bool loaded = false; + QString error; + } ret; + + if(format.isEmpty() || filename.isEmpty()) { + ret.error = "Format or filename is empty"; + return ret; + } + + auto *plugin = new QPluginLoader(filename); + if(!plugin->load()) { + ret.error = plugin->errorString(); + delete plugin; + return ret; + } + + auto *instance = qobject_cast<FilterPlugin *>(plugin->instance()); + if(instance == nullptr) { + ret.error = "Unable to cast"; + delete plugin; + return ret; + } + + m_formats[format] = PluginInfo{ plugin, instance }; + ret.loaded = true; + return ret; + } + + const auto formats() const + { + return m_formats.keys(); + } + + bool addSubscriptions(const QString &filename); + QWebEngineUrlRequestInterceptor *interceptor() + { + return &m_subscriptions; + } + +private: + struct PluginInfo { + QPluginLoader *loader = nullptr; + FilterPlugin *instance = nullptr; + }; + + QHash<QString, PluginInfo> m_formats; + FilterManager m_subscriptions; +}; + +#endif // SMOLBOTE_SMOLBLOK_HPP diff --git a/staging/smolblok/test/loader.cpp b/staging/smolblok/test/loader.cpp new file mode 100644 index 0000000..9e27a26 --- /dev/null +++ b/staging/smolblok/test/loader.cpp @@ -0,0 +1,25 @@ +#include "smolblok.hpp" +#include <spdlog/spdlog.h> + +int main(int argc, char** argv) +{ + if(argc != 2) { + spdlog::error("usage: {} path/to/plugin.so", argv[0]); + return -1; + } + + smolblok filter; + { + const auto r = filter.registerFormatPlugin("unused", argv[1]); + if(r.loaded) { + spdlog::info("Loaded plugin {}", argv[1]); + } else { + spdlog::error("Failed loading plugin {}", argv[1]); + spdlog::error(qUtf8Printable(r.error)); + return -1; + } + } + + return 0; +} + diff --git a/staging/smolblok/test/main.cpp b/staging/smolblok/test/main.cpp new file mode 100644 index 0000000..5624ee9 --- /dev/null +++ b/staging/smolblok/test/main.cpp @@ -0,0 +1,22 @@ +#define CATCH_CONFIG_MAIN + +#include "smolblok.hpp" +#include <catch2/catch.hpp> + +SCENARIO("smolblok") +{ + smolblok s; + + GIVEN("invalid plugins") + { + REQUIRE(!s.registerFormatPlugin("", "")); + REQUIRE(!s.registerFormatPlugin("Format", "missing.dll")); + } + + GIVEN("invalid subscriptions") + { + REQUIRE(!s.addSubscriptions("")); + REQUIRE(!s.addSubscriptions("missing.txt")); + } +} + diff --git a/staging/smolblok/test/sample-filters.txt b/staging/smolblok/test/sample-filters.txt new file mode 100644 index 0000000..59e0e7b --- /dev/null +++ b/staging/smolblok/test/sample-filters.txt @@ -0,0 +1,10 @@ +[easylist-noelemhide] +Format = AdblockPlus +File = easylist_noelemhide.txt +Href = https://easylist-downloads.adblockplus.org/easylist_noelemhide.txt + +[StevenBlack] +Format = Hostlist +File = stevenblack.txt +Href = https://raw.githubusercontent.com/StevenBlack/hosts/master/hosts + |