diff options
author | Aqua-sama <aqua@iserlohn-fortress.net> | 2020-02-10 20:58:39 +0200 |
---|---|---|
committer | Aqua-sama <aqua@iserlohn-fortress.net> | 2020-05-30 11:01:35 +0300 |
commit | d1287f43964633035938f4f4d4133bb6d9da7b3e (patch) | |
tree | d09efa4074815c20be9bd6348203fe4336dfe716 /staging/adblock | |
parent | Fix segfault in release build (diff) | |
download | smolbote-d1287f43964633035938f4f4d4133bb6d9da7b3e.tar.xz |
staging: smolblok
smolblok is a replacement for the current lib/urlfilter AdBlockPlus
and hostlist format filter parser. It is a library that uses plugins to
provide support for different filter formats.
staging/adblock: AdBlockPlus parser plugin
plugins/smolblok_hostlist: hostlist format parser plugin
Headers will be installed to include/smolbote/
Remove lib/urlfilter
Diffstat (limited to 'staging/adblock')
-rw-r--r-- | staging/adblock/filterlist.cpp | 128 | ||||
-rw-r--r-- | staging/adblock/filterlist.h | 54 | ||||
-rw-r--r-- | staging/adblock/meson.build | 38 | ||||
-rw-r--r-- | staging/adblock/options.cpp | 94 | ||||
-rw-r--r-- | staging/adblock/options.h | 45 | ||||
-rw-r--r-- | staging/adblock/plugin/AdblockPlusPlugin.json | 4 | ||||
-rw-r--r-- | staging/adblock/plugin/plugin.cpp | 54 | ||||
-rw-r--r-- | staging/adblock/plugin/plugin.h | 25 | ||||
-rw-r--r-- | staging/adblock/rule.h | 153 | ||||
-rw-r--r-- | staging/adblock/test/filterlist.cpp | 118 | ||||
-rw-r--r-- | staging/adblock/test/options.cpp | 42 | ||||
-rw-r--r-- | staging/adblock/test/rule.cpp | 86 |
12 files changed, 841 insertions, 0 deletions
diff --git a/staging/adblock/filterlist.cpp b/staging/adblock/filterlist.cpp new file mode 100644 index 0000000..1846ff6 --- /dev/null +++ b/staging/adblock/filterlist.cpp @@ -0,0 +1,128 @@ +/* + * This file is part of smolbote. It's copyrighted by the contributors recorded + * in the version control history of the file, available from its original + * location: https://library.iserlohn-fortress.net/aqua/smolbote.git + * + * SPDX-License-Identifier: GPL-3.0 + */ + +#include "filterlist.h" +#include <QByteArray> +#include <QIODevice> + +/** + * Documentation: + * + * https://adblockplus.org/filter-cheatsheet + * https://help.eyeo.com/adblockplus/how-to-write-filters + * + * https://github.com/gorhill/uBlock/wiki/Introduction-to-basic-filtering-syntax + * https://github.com/gorhill/uBlock/wiki/Static-filter-syntax + * + */ + +const QLatin1String comment_lastModified("! Last modified: "); +const QLatin1String comment_expires("! Expires: "); + +using namespace AdblockPlus; + +FilterList::FilterList(QIODevice &from) +{ + if(from.isReadable() && from.isTextModeEnabled()) { + while(from.bytesAvailable() > 0) { + const auto line = from.readLine(512); + + if(line[0] == '!') { + parseComment(line); + + } else if(line.contains("##") || line.contains("#@#")) { + // ## is element hiding rule + // #@# is element hiding exception rule + + } else { + parseRule(line); + } + } + } +} + +void FilterList::parseComment(const QString &line) +{ + if(line.startsWith(comment_lastModified)) { + lastModified = QDateTime::fromString(line.mid(comment_lastModified.size()), "dd MMM yyyy HH:mm 'UTC'"); + expires = lastModified; + + } else if(line.startsWith(comment_expires)) { + const QRegularExpression time_re("(?:(\\d+) days)|(?:(\\d+) hours)"); + const auto match = time_re.match(line); + if(match.hasMatch()) { + expires = expires.addDays(match.captured(1).toInt()); + expires = expires.addSecs(match.captured(2).toInt() * 60 * 60); + } + } +} + +Rule *FilterList::parseRule(const QByteArray &line) +{ + QString pattern = line; + Options opt; + + if(pattern.startsWith(QLatin1String("@@"))) { + pattern.remove(0, 2); + opt.exception = true; + } + + // parse options + if(pattern.contains('$')) { + const auto list = pattern.split('$'); + pattern = list.at(0); + const auto options = list.at(1); + + if(!opt.parseAbp(&options)) { + return nullptr; + } + } + + if(pattern.startsWith("||") && pattern.endsWith("^")) { + // domain match + pattern = pattern.mid(2, pattern.length() - 3); + return new MatcherRule(pattern, opt, MatcherRule::DomainMatch); + + } else if(pattern.startsWith("|") && pattern.endsWith("|")) { + // string equals + pattern = pattern.mid(1, pattern.length() - 2); + return new MatcherRule(pattern, opt, MatcherRule::UrlEquals); + + } else if(pattern.startsWith("|")) { + // string starts with + pattern = pattern.mid(1, pattern.length() - 1); + return new MatcherRule(pattern, opt, MatcherRule::UrlStartsWith); + + } else if(pattern.endsWith("|")) { + // string ends with + pattern = pattern.mid(0, pattern.length() - 1); + return new MatcherRule(pattern, opt, MatcherRule::UrlEndsWith); + + } else if(pattern.startsWith("/") && pattern.endsWith("/")) { + // regular expression + pattern = pattern.mid(1, pattern.length() - 2); + return new RegexRule(pattern, opt); + + } else if(!pattern.isEmpty()) { + if(pattern.contains('*')) { + // wildcard pattern + pattern = QRegularExpression::wildcardToRegularExpression(pattern); + return new RegexRule(pattern, opt); + } else { + // contains pattern + return new MatcherRule(pattern, opt); + } + } + + return nullptr; +} + +bool FilterList::filter(QWebEngineUrlRequestInfo &info) const +{ + return false; +} diff --git a/staging/adblock/filterlist.h b/staging/adblock/filterlist.h new file mode 100644 index 0000000..24464c8 --- /dev/null +++ b/staging/adblock/filterlist.h @@ -0,0 +1,54 @@ +/* + * This file is part of smolbote. It's copyrighted by the contributors recorded + * in the version control history of the file, available from its original + * location: https://library.iserlohn-fortress.net/aqua/smolbote.git + * + * SPDX-License-Identifier: GPL-3.0 + */ + +#include "rule.h" +#include <QDateTime> +#include <QObject> +#include <QString> +#include <QStringList> +#include <QVector> +#include <smolbote/filterinterface.hpp> + +namespace AdblockPlus +{ +class FilterList : public Filter +{ +public: + explicit FilterList(QIODevice &from); + ~FilterList() + { + qDeleteAll(m_rules); + } + + bool filter(QWebEngineUrlRequestInfo &info) const override; + bool isUpToDate() const override + { + const auto current = QDateTime::currentDateTime(); + return expires > current; + } + + QDateTime modified() const + { + return lastModified; + } + QDateTime expiresOn() const + { + return expires; + } + + [[nodiscard]] static Rule *parseRule(const QByteArray &line); + +private: + void parseComment(const QString &line); + + QDateTime lastModified; + QDateTime expires; + QVector<AdblockPlus::Rule *> m_rules; +}; + +} // namespace AdblockPlus diff --git a/staging/adblock/meson.build b/staging/adblock/meson.build new file mode 100644 index 0000000..942f325 --- /dev/null +++ b/staging/adblock/meson.build @@ -0,0 +1,38 @@ +lib_adblockfilter = static_library('adblockfilter', + [ 'filterlist.cpp', 'options.cpp' ], + include_directories: smolbote_interfaces, + dependencies: [ dep_qt5 ] +) + +dep_adblockfilter = declare_dependency( + include_directories: ['.', smolbote_interfaces], + link_with: lib_adblockfilter +) + +#AdblockPlusFilterPlugin = shared_library('AdblockPlusPlugin', +# [ 'plugin/plugin.cpp', +# mod_qt5.preprocess(include_directories: smolbote_interfaces, +# moc_headers: 'plugin/plugin.h', dependencies: dep_qt5) +# ], +# include_directories: smolbote_interfaces, +# link_with: lib_adblockfilter, +# dependencies: dep_qt5, +# install: true, +# install_dir: get_option('libdir')/'smolbote/plugins' +#) + +test('adblock: rule', executable('libadblockfilter_rule', + sources: 'test/rule.cpp', + dependencies: [ dep_qt5, dep_catch, dep_adblockfilter ] +)) + +test('adblock: options', executable('libadblockfilter_options', + sources: 'test/options.cpp', + dependencies: [ dep_qt5, dep_catch, dep_adblockfilter ] +)) + +test('adblock: filterlist', executable('libadblockfilter_filterlist', + sources: 'test/filterlist.cpp', + dependencies: [ dep_qt5, dep_catch, dep_adblockfilter ] +)) + diff --git a/staging/adblock/options.cpp b/staging/adblock/options.cpp new file mode 100644 index 0000000..08f30ee --- /dev/null +++ b/staging/adblock/options.cpp @@ -0,0 +1,94 @@ +/* + * This file is part of smolbote. It's copyrighted by the contributors recorded + * in the version control history of the file, available from its original + * location: https://library.iserlohn-fortress.net/aqua/smolbote.git + * + * SPDX-License-Identifier: GPL-3.0 + */ + +#include "options.h" + +using namespace AdblockPlus; + +constexpr std::array abpTypeOptions = { + "document", // ResourceTypeMainFrame 0 Top level page. + "subdocument", // ResourceTypeSubFrame 1 Frame or iframe. + "stylesheet", // ResourceTypeStylesheet 2 A CSS stylesheet. + "script", // ResourceTypeScript 3 An external script. + "image", // ResourceTypeImage 4 An image (JPG, GIF, PNG, and so on). + "font", // ResourceTypeFontResource 5 A font. + "other", // ResourceTypeSubResource 6 An "other" subresource. + "object", // ResourceTypeObject 7 An object (or embed) tag for a plugin or a resource that a plugin requested. + "media", // ResourceTypeMedia 8 A media resource. + "__worker", // ResourceTypeWorker 9 The main resource of a dedicated worker. + "__sharedworker", // ResourceTypeSharedWorker 10 The main resource of a shared worker. + "__prefetch", // ResourceTypePrefetch 11 An explicitly requested prefetch. + "__favicon", // ResourceTypeFavicon 12 A favicon. + "xmlhttprequest", // ResourceTypeXhr 13 An XMLHttpRequest. + "ping", // ResourceTypePing 14 A ping request for <a ping>. + "__serviceworker", // ResourceTypeServiceWorker 15 The main resource of a service worker. + "__cspreport", // ResourceTypeCspReport 16 A report of Content Security Policy (CSP) violations. + "__pluginresource", // ResourceTypePluginResource 17 A resource requested by a plugin. + "__preloadmainframe", // ResourceTypeNavigationPreloadMainFrame 19 A main-frame service worker navigation preload request. + "__preloadsubframe", // ResourceTypeNavigationPreloadSubFrame 20 A sub-frame service worker navigation preload request. + "__unknown" // ResourceTypeUnknown 255 Unknown request type. +}; + +auto parseTypeOption(QStringRef &option) +{ + struct { + bool found = false; + int index = -1; + bool exception = false; + } ret; + + // Possible inverse type options include ~script, ~image, ~stylesheet, ~object, + // ~xmlhttprequest, ~subdocument, ~ping, ~websocket, ~webrtc, ~document, ~elemhide, ~other + if(option[0] == '~') { + ret.exception = true; + option = option.mid(1); + } + + // TODO: map all ResourceType's to their respective strings + // TODO: websocket, webrtc, elemhide, generichide, genericblock, popup + + for(std::size_t i = 0; i < std::size(abpTypeOptions); ++i) { + if(option == abpTypeOptions[i]) { + ret.index = i; + ret.found = true; + return ret; + } + } + return ret; +} + +bool Options::parseAbp(const QStringRef &options) +{ + std::bitset<32> checked_flags; + + for(auto &option : options.split(',')) { + if(option == "match-case") { + matchcase = true; + + } else if(option == "third-party") { + thirdparty = !exception; + } else if(const auto r = parseTypeOption(option); r.found) { + if(!r.exception) { + flags.set(r.index, true); + checked_flags.set(r.index, true); + } else { + flags.set(r.index, false); + checked_flags.set(r.index, true); + for(auto i = 0; i < 32; ++i) { + if(!checked_flags[i]) { + flags.set(i, true); + } + } + } + } else { + return false; + } + } + + return true; +} diff --git a/staging/adblock/options.h b/staging/adblock/options.h new file mode 100644 index 0000000..efc47a6 --- /dev/null +++ b/staging/adblock/options.h @@ -0,0 +1,45 @@ +/* + * This file is part of smolbote. It's copyrighted by the contributors recorded + * in the version control history of the file, available from its original + * location: https://library.iserlohn-fortress.net/aqua/smolbote.git + * + * SPDX-License-Identifier: GPL-3.0 + */ + +#ifndef SMOLBOTE_ADBLOCK_OPTIONS_H +#define SMOLBOTE_ADBLOCK_OPTIONS_H + +#include <QHash> +#include <QString> +#include <QWebEngineUrlRequestInfo> +#include <bitset> + +namespace AdblockPlus +{ + +struct Options { + // request handling options + bool exception = false; + bool redirect = false; + + // pattern options + bool matchcase = false; + + // request type options + bool firstparty = true; + bool thirdparty = true; + + // request types + bool matchesType(QWebEngineUrlRequestInfo::ResourceType type) + { + return flags.test(type); + } + bool parseAbp(const QStringRef &options); + + // TODO private: + std::bitset<32> flags; +}; + +} // namespace AdblockPlus + +#endif // SMOLBOTE_ADBLOCK_OPTIONS_H diff --git a/staging/adblock/plugin/AdblockPlusPlugin.json b/staging/adblock/plugin/AdblockPlusPlugin.json new file mode 100644 index 0000000..053826a --- /dev/null +++ b/staging/adblock/plugin/AdblockPlusPlugin.json @@ -0,0 +1,4 @@ +{ + "name": "AdblockPlus Filter Plugin", + "author": "Aqua <aqua@iserlohn-fortress.net>" +} diff --git a/staging/adblock/plugin/plugin.cpp b/staging/adblock/plugin/plugin.cpp new file mode 100644 index 0000000..028c83f --- /dev/null +++ b/staging/adblock/plugin/plugin.cpp @@ -0,0 +1,54 @@ +/* + * This file is part of smolbote. It's copyrighted by the contributors recorded + * in the version control history of the file, available from its original + * location: https://library.iserlohn-fortress.net/aqua/smolbote.git + * + * SPDX-License-Identifier: GPL-3.0 + */ + +#include "plugin.h" +#include "../filterlist.h" +#include <QTextStream> + +using namespace AdblockPlus; + +Filter* AdblockPlusFilterPlugin::load(QIODevice* from) const +{ + if(!from->isOpen()) + return nullptr; + + + QTextStream stream(from); + + if(stream.readLine().trimmed() != QLatin1String("[Adblock Plus 2.0]")) { + return nullptr; + } + + auto *list = new FilterList; + QString line; + int total, comments, rules, unsupported, failed; + + while(stream.readLineInto(&line)) { + if(!line.isEmpty()) { + ++total; + + switch(list->parse(line)) + { + case FilterList::Comment: + ++comments; + break; + case FilterList::Rule: + ++rules; + break; + case FilterList::Unsupported: + ++unsupported; + break; + case FilterList::Failed: + break; + } + } + } + + return list; +} + diff --git a/staging/adblock/plugin/plugin.h b/staging/adblock/plugin/plugin.h new file mode 100644 index 0000000..db419bd --- /dev/null +++ b/staging/adblock/plugin/plugin.h @@ -0,0 +1,25 @@ +/* + * This file is part of smolbote. It's copyrighted by the contributors recorded + * in the version control history of the file, available from its original + * location: https://library.iserlohn-fortress.net/aqua/smolbote.git + * + * SPDX-License-Identifier: GPL-3.0 + */ + +#ifndef ADBLOCKPLUSFILTER_PLUGIN_H +#define ADBLOCKPLUSFILTER_PLUGIN_H + +#include <smolbote/filterinterface.hpp> + +class AdblockPlusFilterPlugin : public QObject, public FilterPlugin +{ + Q_OBJECT + Q_PLUGIN_METADATA(IID FilterPluginIid FILE "AdblockPlusPlugin.json") + Q_INTERFACES(FilterPlugin) + +public: + Filter* load(QIODevice* from) const override; +}; + +#endif // ADBLOCKPLUSFILTER_PLUGIN_H + diff --git a/staging/adblock/rule.h b/staging/adblock/rule.h new file mode 100644 index 0000000..aaab49a --- /dev/null +++ b/staging/adblock/rule.h @@ -0,0 +1,153 @@ +/* + * This file is part of smolbote. It's copyrighted by the contributors recorded + * in the version control history of the file, available from its original + * location: https://library.iserlohn-fortress.net/aqua/smolbote.git + * + * SPDX-License-Identifier: GPL-3.0 + */ + +#ifndef SMOLBOTE_ADBLOCK_RULE_H +#define SMOLBOTE_ADBLOCK_RULE_H + +#include "options.h" +#include <QObject> +#include <QRegularExpression> +#include <QString> +#include <QStringMatcher> + +namespace AdblockPlus +{ +class Rule +{ +public: + virtual ~Rule() = default; + /** + * requestUrl: requested URL + * initiatorUrl: URL of document that initiated navigation + * firstPartyUrl: URL of the page that issued the request + */ + virtual bool hasMatch(const QStringRef &requestUrl, + const QStringRef &initiatorUrl, + const QStringRef &firstPartyUrl, + QWebEngineUrlRequestInfo::ResourceType resourceType = QWebEngineUrlRequestInfo::ResourceTypeMainFrame) const = 0; + + bool shouldRedirect() const + { + return options.redirect; + } + bool shouldBlock() const + { + return !options.exception; + } + +protected: + Rule(const Options &opt) + : options(opt) + { + } + const Options options; +}; + +// The separator character can be anything but +// a letter, a digit, or one of the following: _, -, ., %. +// The end of the address is also accepted as a separator. +inline bool isSeparator(const QChar &c) +{ + return !c.isLetter() && !c.isDigit() && c != '_' && c != '-' && c != '.' && c != '%'; +} + +class MatcherRule : public Rule +{ +public: + enum MatchPosition { + DomainMatch, + UrlStartsWith, + UrlEndsWith, + UrlContains, + UrlEquals + }; + + MatcherRule(const QString &pattern, const Options &opt, const MatchPosition pos = UrlContains) + : Rule(opt) + , position(pos) + , matcher(pattern, opt.matchcase ? Qt::CaseSensitive : Qt::CaseInsensitive) + , patternLength(pattern.length()) + { + } + explicit MatcherRule(const MatcherRule &) = delete; + MatcherRule &operator=(const MatcherRule &) = delete; + + explicit MatcherRule(MatcherRule &&) = delete; + MatcherRule &operator=(MatcherRule &&) = delete; + + ~MatcherRule() = default; + bool hasMatch(const QStringRef &url, + const QStringRef &initiatorUrl, + const QStringRef &firstPartyUrl, + QWebEngineUrlRequestInfo::ResourceType resourceType = QWebEngineUrlRequestInfo::ResourceTypeMainFrame) const override + { + const auto index = matcher.indexIn(url); + if(index == -1) { + return false; + } + + switch(position) { + case DomainMatch: + // match if + // there is only one : left of the index + // and + // character after pattern is separator or end of string + return (url.left(index).count(':') <= 1 && (index + patternLength == url.length() || isSeparator(url[index + patternLength]))); + case UrlStartsWith: + return (index == 0); + case UrlEndsWith: + return (index == url.length() - patternLength); + case UrlContains: + return (index != -1); + case UrlEquals: + return (index == 0 && patternLength == url.length()); + } + + return false; + } + +private: + const MatchPosition position; + const QStringMatcher matcher; + const int patternLength; +}; + +class RegexRule : public Rule +{ +public: + RegexRule(const QString &rule, const Options &opt) + : Rule(opt) + , regex(rule) + { + if(!opt.matchcase) { + regex.setPatternOptions(QRegularExpression::CaseInsensitiveOption); + } + } + explicit RegexRule(const RegexRule &) = delete; + RegexRule &operator=(const RegexRule &) = delete; + + explicit RegexRule(RegexRule &&) = delete; + RegexRule &operator=(RegexRule &&) = delete; + + ~RegexRule() = default; + bool hasMatch(const QStringRef &url, + const QStringRef &initiatorUrl, + const QStringRef &firstPartyUrl, + QWebEngineUrlRequestInfo::ResourceType resourceType = QWebEngineUrlRequestInfo::ResourceTypeMainFrame) const override + { + const auto match = regex.match(url); + return match.hasMatch(); + } + +private: + QRegularExpression regex; +}; + +} // namespace AdblockPlus + +#endif // SMOLBOTE_ADBLOCK_RULE_H diff --git a/staging/adblock/test/filterlist.cpp b/staging/adblock/test/filterlist.cpp new file mode 100644 index 0000000..ca122ac --- /dev/null +++ b/staging/adblock/test/filterlist.cpp @@ -0,0 +1,118 @@ +#define CATCH_CONFIG_MAIN +#include "filterlist.h" +#include <QBuffer> +#include <catch2/catch.hpp> + +using namespace AdblockPlus; + +QByteArray sampleList = + R"(! comment on line +! Last modified: 1 Jan 2000 00:00 UTC +! Expires: 4 days (update frequency) +)"; + +TEST_CASE("placeholder") +{ + QBuffer buffer(&sampleList); + buffer.open(QIODevice::ReadOnly | QIODevice::Text); + + AdblockPlus::FilterList list(buffer); + REQUIRE(!list.isUpToDate()); +} + +TEST_CASE("domain match") +{ + const QString defaultUrl = ""; + + const QString block1 = "http://ads.example.com/foo.gif"; + const QString block2 = "http://server1.ads.example.com/foo.gif"; + const QString block3 = "https://ads.example.com:8000/"; + const QString block4 = "https://ads.example.com"; + + const QString allow1 = "http://ads.example.com.ua/foo.gif"; + const QString allow2 = "http://example.com/redirect/http://ads.example.com/"; + + auto *rule = FilterList::parseRule("||ads.example.com^"); + REQUIRE(rule->shouldBlock()); + REQUIRE(!rule->shouldRedirect()); + + REQUIRE(rule->hasMatch(&block1, &defaultUrl, &defaultUrl)); + REQUIRE(rule->hasMatch(&block2, &defaultUrl, &defaultUrl)); + REQUIRE(rule->hasMatch(&block3, &defaultUrl, &defaultUrl)); + REQUIRE(rule->hasMatch(&block4, &defaultUrl, &defaultUrl)); + + REQUIRE(!rule->hasMatch(&allow1, &defaultUrl, &defaultUrl)); + REQUIRE(!rule->hasMatch(&allow2, &defaultUrl, &defaultUrl)); + + delete rule; +} + +TEST_CASE("string equals") +{ + const QString defaultUrl = ""; + + const QString block = "http://example.com/"; + + const QString allow1 = "http://example.com/foo.gif"; + const QString allow2 = "http://example.info/redirect/http://example.com/"; + + auto *rule = FilterList::parseRule("|http://example.com/|"); + REQUIRE(rule->shouldBlock()); + REQUIRE(!rule->shouldRedirect()); + + REQUIRE(rule->hasMatch(&block, &defaultUrl, &defaultUrl)); + + REQUIRE(!rule->hasMatch(&allow1, &defaultUrl, &defaultUrl)); + REQUIRE(!rule->hasMatch(&allow2, &defaultUrl, &defaultUrl)); + + delete rule; +} + +TEST_CASE("string starts with") +{ + const QString defaultUrl = ""; + + auto *rule = FilterList::parseRule("|http://baddomain.example/"); + REQUIRE(rule->shouldBlock()); + REQUIRE(!rule->shouldRedirect()); + + const QString blocks = "http://baddomain.example/banner.gif"; + const QString allows = "http://gooddomain.example/analyze?http://baddomain.example"; + + REQUIRE(rule->hasMatch(&blocks, &defaultUrl, &defaultUrl)); + REQUIRE(!rule->hasMatch(&allows, &defaultUrl, &defaultUrl)); + delete rule; +} + +TEST_CASE("string ends with") +{ + const QString defaultUrl = ""; + + auto *rule = FilterList::parseRule("swf|"); + REQUIRE(rule->shouldBlock()); + REQUIRE(!rule->shouldRedirect()); + + const QString blocks = "http://example.com/annoyingflash.swf"; + const QString allows = "http://example.com/swf/index.html"; + + REQUIRE(rule->hasMatch(&blocks, &defaultUrl, &defaultUrl)); + REQUIRE(!rule->hasMatch(&allows, &defaultUrl, &defaultUrl)); + delete rule; +} + +TEST_CASE("regular expressions") +{ + const QString defaultUrl = ""; + + auto *rule = FilterList::parseRule("/banner\\d+/"); + const QString matches1 = "banner123"; + const QString matches2 = "banner321"; + + const QString ignores = "banners"; + + REQUIRE(rule->hasMatch(&matches1, &defaultUrl, &defaultUrl)); + REQUIRE(rule->hasMatch(&matches2, &defaultUrl, &defaultUrl)); + REQUIRE(!rule->hasMatch(&ignores, &defaultUrl, &defaultUrl)); + delete rule; +} + diff --git a/staging/adblock/test/options.cpp b/staging/adblock/test/options.cpp new file mode 100644 index 0000000..67dc143 --- /dev/null +++ b/staging/adblock/test/options.cpp @@ -0,0 +1,42 @@ +#define CATCH_CONFIG_MAIN +#include "options.h" +#include <catch2/catch.hpp> + +using namespace AdblockPlus; + +SCENARIO("parsing adblock options") +{ + Options opt; + + GIVEN("an unknown option") + { + const QString unknown = "unknown"; + THEN("the option is not parsed") + { + QStringRef unknown_ref(&unknown); + REQUIRE(!opt.parseAbp(unknown_ref)); + } + } + + GIVEN("match-case,document,~subdocument") + { + const QString options = "match-case,document,~subdocument"; + REQUIRE(opt.parseAbp(&options)); + + WHEN("match-case") + { + REQUIRE(opt.matchcase); + } + + WHEN("testing set/unset options") + { + REQUIRE(opt.matchesType(QWebEngineUrlRequestInfo::ResourceTypeMainFrame)); + REQUIRE(!opt.matchesType(QWebEngineUrlRequestInfo::ResourceTypeSubFrame)); + } + + WHEN("testing other options") + { + REQUIRE(opt.matchesType(QWebEngineUrlRequestInfo::ResourceTypeStylesheet)); + } + } +} diff --git a/staging/adblock/test/rule.cpp b/staging/adblock/test/rule.cpp new file mode 100644 index 0000000..07186b9 --- /dev/null +++ b/staging/adblock/test/rule.cpp @@ -0,0 +1,86 @@ +#define CATCH_CONFIG_MAIN +#include "rule.h" +#include <catch2/catch.hpp> + +using namespace AdblockPlus; + +SCENARIO("MatcherRule") +{ + GIVEN("options with case sensitive pattern") + { + const QString defaultUrl = ""; + + const Options opt { .matchcase=true }; + const QString patternContains("this string contains the pattern in it"); + const QString patternBegins("pattern starts this string"); + const QString patternEnds("this string ends with pattern"); + const QString patternMissing("and this one does not"); + + WHEN("contains") + { + MatcherRule rule("pattern", opt); + REQUIRE(rule.shouldBlock()); + + THEN("pattern is matched anywhere in the URL") + { + REQUIRE(rule.hasMatch(&patternContains, &defaultUrl, &defaultUrl)); + REQUIRE(rule.hasMatch(&patternBegins, &defaultUrl, &defaultUrl)); + REQUIRE(rule.hasMatch(&patternEnds, &defaultUrl, &defaultUrl)); + REQUIRE(!rule.hasMatch(&patternMissing, &defaultUrl, &defaultUrl)); + } + } + + WHEN("startsWith") + { + MatcherRule rule("pattern", opt, MatcherRule::UrlStartsWith); + REQUIRE(rule.shouldBlock()); + + THEN("pattern is matched if at the start of the URL") + { + REQUIRE(!rule.hasMatch(&patternContains, &defaultUrl, &defaultUrl)); + REQUIRE(rule.hasMatch(&patternBegins, &defaultUrl, &defaultUrl)); + REQUIRE(!rule.hasMatch(&patternEnds, &defaultUrl, &defaultUrl)); + REQUIRE(!rule.hasMatch(&patternMissing, &defaultUrl, &defaultUrl)); + } + } + + WHEN("endsWith") + { + MatcherRule rule("pattern", opt, MatcherRule::UrlEndsWith); + REQUIRE(rule.shouldBlock()); + + THEN("pattern is matched if at the end of the URL") + { + REQUIRE(!rule.hasMatch(&patternContains, &defaultUrl, &defaultUrl)); + REQUIRE(!rule.hasMatch(&patternBegins, &defaultUrl, &defaultUrl)); + REQUIRE(rule.hasMatch(&patternEnds, &defaultUrl, &defaultUrl)); + REQUIRE(!rule.hasMatch(&patternMissing, &defaultUrl, &defaultUrl)); + } + } + } +} + +SCENARIO("RegexRule") +{ + GIVEN("options with case sensitive pattern") + { + const QString defaultUrl; + + const Options opt { .matchcase=true }; + const QString patternContains("this string contains the pattern in it"); + const QString patternMissing("and this one does not"); + + WHEN("contains") + { + RegexRule rule("pattern", opt); + REQUIRE(rule.shouldBlock()); + + THEN("pattern is matched anywhere in the URL") + { + REQUIRE(rule.hasMatch(&patternContains, &defaultUrl, &defaultUrl)); + REQUIRE(!rule.hasMatch(&patternMissing, &defaultUrl, &defaultUrl)); + } + } + } +} + |