From 68da62a64cd65a8b28dcfe61b2174e55a47b7517 Mon Sep 17 00:00:00 2001 From: Aqua-sama Date: Tue, 14 Apr 2020 22:09:01 +0300 Subject: Add some FilterList tests --- staging/adblock/filterlist.cpp | 68 +++++++++++++--------------- staging/adblock/filterlist.h | 30 ++++++------- staging/adblock/meson.build | 31 ++++++++----- staging/adblock/rule.h | 11 ++++- staging/adblock/test/filterlist.cpp | 89 +++++++++++++++++++++++++++++++++++++ staging/adblock/test/rule.cpp | 80 +++++++++++++++++++++++++++++++++ staging/adblock/test/rules.cpp | 80 --------------------------------- 7 files changed, 244 insertions(+), 145 deletions(-) create mode 100644 staging/adblock/test/filterlist.cpp create mode 100644 staging/adblock/test/rule.cpp delete mode 100644 staging/adblock/test/rules.cpp (limited to 'staging/adblock') diff --git a/staging/adblock/filterlist.cpp b/staging/adblock/filterlist.cpp index 9c80e88..2677e1c 100644 --- a/staging/adblock/filterlist.cpp +++ b/staging/adblock/filterlist.cpp @@ -7,9 +7,8 @@ */ #include "filterlist.h" -#include "rule.h" -#include -#include +#include +#include /** * Documentation: @@ -27,24 +26,23 @@ const QLatin1String comment_expires("! Expires: "); using namespace AdblockPlus; -FilterList::~FilterList() +FilterList::FilterList(QIODevice &from) { - qDeleteAll(m_rules); -} + if(from.isReadable() && from.isTextModeEnabled()) { + while(from.bytesAvailable() > 0) { + const auto line = from.readLine(512); -FilterList::ParseResult FilterList::parse(const QString &line) -{ - if(line.startsWith('!')) { - parseComment(line); - return FilterList::Comment; + if(line[0] == '!') { + parseComment(line); - } else if(line.contains("##") || line.contains("#@#")) { - // ## is element hiding rule - // #@# is element hiding exception rule - return FilterList::Unsupported; + } else if(line.contains("##") || line.contains("#@#")) { + // ## is element hiding rule + // #@# is element hiding exception rule - } else { - return parseRule(line) ? FilterList::Rule : FilterList::Failed; + } else { + parseRule(line); + } + } } } @@ -64,7 +62,7 @@ void FilterList::parseComment(const QString &line) } } -bool FilterList::parseRule(const QString &line) +Rule *FilterList::parseRule(const QByteArray &line) { QString pattern = line; Options opt; @@ -81,51 +79,47 @@ bool FilterList::parseRule(const QString &line) const auto options = list.at(1).split(','); for(const auto &option : options) { - if(!opt.set(option)) - return false; + if(!opt.set(option)) { + return nullptr; + } } } if(pattern.startsWith("||") && pattern.endsWith("^")) { // domain match pattern = pattern.mid(2, pattern.length() - 3); - m_rules.append(new MatcherRule(pattern, opt)); + return new MatcherRule(pattern, opt); } else if(pattern.startsWith("|") && pattern.endsWith("|")) { // string equals pattern = pattern.mid(1, pattern.length() - 2); - m_rules.append(new MatcherRule(pattern, opt)); + return new MatcherRule(pattern, opt, MatcherRule::UrlEquals); - } else if(pattern.startsWith("||")) { + } else if(pattern.startsWith("|")) { // string starts with - pattern = pattern.mid(2, pattern.length() - 2); - m_rules.append(new MatcherRule(pattern, opt)); + pattern = pattern.mid(1, pattern.length() - 1); + return new MatcherRule(pattern, opt, MatcherRule::UrlStartsWith); } else if(pattern.endsWith("|")) { // string ends with pattern = pattern.mid(0, pattern.length() - 1); - m_rules.append(new MatcherRule(pattern, opt)); + return new MatcherRule(pattern, opt, MatcherRule::UrlEndsWith); } else if(pattern.startsWith("/") && pattern.endsWith("/")) { // regular expression pattern = pattern.mid(1, pattern.length() - 2); - m_rules.append(new RegexRule(pattern, opt)); + return new RegexRule(pattern, opt); - } else { + } else if(!pattern.isEmpty()){ // wildcard pattern pattern = QRegularExpression::wildcardToRegularExpression(pattern); - m_rules.append(new RegexRule(pattern, opt)); + return new RegexRule(pattern, opt); } - return true; -} - -void FilterList::filter(QWebEngineUrlRequestInfo &info) const -{ + return nullptr; } -bool FilterList::isUpToDate() const +bool FilterList::filter(QWebEngineUrlRequestInfo &info) const { - const auto current = QDateTime::currentDateTime(); - return expires > current; + return false; } diff --git a/staging/adblock/filterlist.h b/staging/adblock/filterlist.h index 2e3fba6..1cdbb86 100644 --- a/staging/adblock/filterlist.h +++ b/staging/adblock/filterlist.h @@ -6,34 +6,34 @@ * SPDX-License-Identifier: GPL-3.0 */ -#include #include #include #include #include #include +#include +#include "rule.h" namespace AdblockPlus { class Rule; class FilterList : public Filter { - Q_DISABLE_COPY(FilterList) - public: - explicit FilterList() = default; - ~FilterList(); + FilterList() = default; + explicit FilterList(QIODevice &from); + ~FilterList() + { + qDeleteAll(m_rules); + } - enum ParseResult { - Comment, - Rule, - Unsupported, - Failed, - }; - ParseResult parse(const QString &line); + bool filter(QWebEngineUrlRequestInfo &info) const override; + bool isUpToDate() const override + { + const auto current = QDateTime::currentDateTime(); + return expires > current; + } - void filter(QWebEngineUrlRequestInfo &info) const override; - bool isUpToDate() const override; QDateTime modified() const { return lastModified; @@ -43,9 +43,9 @@ public: return expires; } + [[nodiscard]] static Rule *parseRule(const QByteArray &line); private: void parseComment(const QString &line); - bool parseRule(const QString &line); QDateTime lastModified; QDateTime expires; diff --git a/staging/adblock/meson.build b/staging/adblock/meson.build index 972b7cf..f0a0e07 100644 --- a/staging/adblock/meson.build +++ b/staging/adblock/meson.build @@ -4,20 +4,27 @@ lib_adblockfilter = static_library('adblockfilter', dependencies: [ dep_qt5 ] ) -AdblockPlusFilterPlugin = shared_library('AdblockPlusPlugin', - [ 'plugin/plugin.cpp', - mod_qt5.preprocess(include_directories: smolbote_interfaces, - moc_headers: 'plugin/plugin.h', dependencies: dep_qt5) - ], - include_directories: smolbote_interfaces, +#AdblockPlusFilterPlugin = shared_library('AdblockPlusPlugin', +# [ 'plugin/plugin.cpp', +# mod_qt5.preprocess(include_directories: smolbote_interfaces, +# moc_headers: 'plugin/plugin.h', dependencies: dep_qt5) +# ], +# include_directories: smolbote_interfaces, +# link_with: lib_adblockfilter, +# dependencies: dep_qt5, +# install: true, +# install_dir: get_option('libdir')/'smolbote/plugins' +#) + +test('adblock: rule', executable('libadblockfilter_rule', + sources: 'test/rule.cpp', link_with: lib_adblockfilter, - dependencies: dep_qt5, - install: true, - install_dir: get_option('libdir')/'smolbote/plugins' -) + dependencies: [ dep_qt5, dep_catch ] +)) -test('filter: adblock format rules', executable('libadblockfilter_rules', - sources: 'test/rules.cpp', +test('adblock: filterlist', executable('libadblockfilter_filterlist', + sources: 'test/filterlist.cpp', + include_directories: smolbote_interfaces, link_with: lib_adblockfilter, dependencies: [ dep_qt5, dep_catch ] )) diff --git a/staging/adblock/rule.h b/staging/adblock/rule.h index 03970dc..90062ba 100644 --- a/staging/adblock/rule.h +++ b/staging/adblock/rule.h @@ -45,7 +45,8 @@ public: enum MatchPosition { UrlStartsWith, UrlEndsWith, - UrlContains + UrlContains, + UrlEquals }; MatcherRule(const QString &pattern, const Options &opt, const MatchPosition pos = UrlContains) @@ -58,6 +59,9 @@ public: explicit MatcherRule(const MatcherRule &) = delete; MatcherRule &operator=(const MatcherRule &) = delete; + explicit MatcherRule(MatcherRule &&) = delete; + MatcherRule &operator=(MatcherRule &&) = delete; + ~MatcherRule() = default; bool hasMatch(const QStringRef &url) const override { @@ -70,6 +74,8 @@ public: return (index == url.length() - patternLength); case UrlContains: return (index != -1); + case UrlEquals: + return (index == 0 && patternLength == url.length()); } } @@ -93,6 +99,9 @@ public: explicit RegexRule(const RegexRule &) = delete; RegexRule &operator=(const RegexRule &) = delete; + explicit RegexRule(RegexRule &&) = delete; + RegexRule &operator=(RegexRule &&) = delete; + ~RegexRule() = default; bool hasMatch(const QStringRef &url) const override { diff --git a/staging/adblock/test/filterlist.cpp b/staging/adblock/test/filterlist.cpp new file mode 100644 index 0000000..6b27904 --- /dev/null +++ b/staging/adblock/test/filterlist.cpp @@ -0,0 +1,89 @@ +#define CATCH_CONFIG_MAIN +#include "filterlist.h" +#include +#include + +using namespace AdblockPlus; + +QByteArray sampleList = + R"(! comment on line +! Last modified: 1 Jan 2000 00:00 UTC +! Expires: 4 days (update frequency) +)"; + +TEST_CASE("placeholder") +{ + QBuffer buffer(&sampleList); + buffer.open(QIODevice::ReadOnly | QIODevice::Text); + + AdblockPlus::FilterList list(buffer); + REQUIRE(!list.isUpToDate()); +} + +TEST_CASE("domain match") +{ + const std::array, 5> testUrls = { + std::make_pair("http://ads.example.com/foo.gif", true), + std::make_pair("http://server1.ads.example.com/foo.gif", true), + std::make_pair("https://ads.example.com:8000/", true), + std::make_pair("http://ads.example.com.ua/foo.gif", false), + std::make_pair("http://example.com/redirect/http://ads.example.com/", false) + }; + + auto *rule = FilterList::parseRule("||ads.example.com^"); + REQUIRE(rule->shouldBlock()); + REQUIRE(!rule->shouldRedirect()); + + for(const auto &pair : testUrls) { + REQUIRE(rule->hasMatch(&pair.first) == pair.second); + } + + delete rule; +} + +TEST_CASE("string equals") +{ + const std::array, 3> testUrls = { + std::make_pair("http://example.com/", true), + std::make_pair("http://example.com/foo.gif", false), + std::make_pair("http://example.info/redirect/http://example.com/", false) + }; + + auto *rule = FilterList::parseRule("|http://example.com/|"); + REQUIRE(rule->shouldBlock()); + REQUIRE(!rule->shouldRedirect()); + + for(const auto &pair : testUrls) { + REQUIRE(rule->hasMatch(&pair.first) == pair.second); + } + + delete rule; +} + +TEST_CASE("string starts with") +{ + auto *rule = FilterList::parseRule("|http://baddomain.example/"); + REQUIRE(rule->shouldBlock()); + REQUIRE(!rule->shouldRedirect()); + + const QString blocks = "http://baddomain.example/banner.gif"; + const QString allows = "http://gooddomain.example/analyze?http://baddomain.example"; + + REQUIRE(rule->hasMatch(&blocks)); + REQUIRE(!rule->hasMatch(&allows)); + delete rule; +} + +TEST_CASE("string ends with") +{ + auto *rule = FilterList::parseRule("swf|"); + REQUIRE(rule->shouldBlock()); + REQUIRE(!rule->shouldRedirect()); + + const QString blocks = "http://example.com/annoyingflash.swf"; + const QString allows = "http://example.com/swf/index.html"; + + REQUIRE(rule->hasMatch(&blocks)); + REQUIRE(!rule->hasMatch(&allows)); + delete rule; +} diff --git a/staging/adblock/test/rule.cpp b/staging/adblock/test/rule.cpp new file mode 100644 index 0000000..d192601 --- /dev/null +++ b/staging/adblock/test/rule.cpp @@ -0,0 +1,80 @@ +#define CATCH_CONFIG_MAIN +#include "rule.h" +#include + +SCENARIO("MatcherRule") +{ + GIVEN("options with case sensitive pattern") + { + const AdblockPlus::Options opt { .matchcase=true }; + const QString patternContains("this string contains the pattern in it"); + const QString patternBegins("pattern starts this string"); + const QString patternEnds("this string ends with pattern"); + const QString patternMissing("and this one does not"); + + WHEN("contains") + { + AdblockPlus::MatcherRule rule("pattern", opt); + REQUIRE(rule.shouldBlock()); + + THEN("pattern is matched anywhere in the URL") + { + REQUIRE(rule.hasMatch(&patternContains)); + REQUIRE(rule.hasMatch(&patternBegins)); + REQUIRE(rule.hasMatch(&patternEnds)); + REQUIRE(!rule.hasMatch(&patternMissing)); + } + } + + WHEN("startsWith") + { + AdblockPlus::MatcherRule rule("pattern", opt, AdblockPlus::MatcherRule::UrlStartsWith); + REQUIRE(rule.shouldBlock()); + + THEN("pattern is matched if at the start of the URL") + { + REQUIRE(!rule.hasMatch(&patternContains)); + REQUIRE(rule.hasMatch(&patternBegins)); + REQUIRE(!rule.hasMatch(&patternEnds)); + REQUIRE(!rule.hasMatch(&patternMissing)); + } + } + + WHEN("endsWith") + { + AdblockPlus::MatcherRule rule("pattern", opt, AdblockPlus::MatcherRule::UrlEndsWith); + REQUIRE(rule.shouldBlock()); + + THEN("pattern is matched if at the end of the URL") + { + REQUIRE(!rule.hasMatch(&patternContains)); + REQUIRE(!rule.hasMatch(&patternBegins)); + REQUIRE(rule.hasMatch(&patternEnds)); + REQUIRE(!rule.hasMatch(&patternMissing)); + } + } + } +} + +SCENARIO("RegexRule") +{ + GIVEN("options with case sensitive pattern") + { + const AdblockPlus::Options opt { .matchcase=true }; + const QString patternContains("this string contains the pattern in it"); + const QString patternMissing("and this one does not"); + + WHEN("contains") + { + AdblockPlus::RegexRule rule("pattern", opt); + REQUIRE(rule.shouldBlock()); + + THEN("pattern is matched anywhere in the URL") + { + REQUIRE(rule.hasMatch(&patternContains)); + REQUIRE(!rule.hasMatch(&patternMissing)); + } + } + } +} + diff --git a/staging/adblock/test/rules.cpp b/staging/adblock/test/rules.cpp deleted file mode 100644 index d192601..0000000 --- a/staging/adblock/test/rules.cpp +++ /dev/null @@ -1,80 +0,0 @@ -#define CATCH_CONFIG_MAIN -#include "rule.h" -#include - -SCENARIO("MatcherRule") -{ - GIVEN("options with case sensitive pattern") - { - const AdblockPlus::Options opt { .matchcase=true }; - const QString patternContains("this string contains the pattern in it"); - const QString patternBegins("pattern starts this string"); - const QString patternEnds("this string ends with pattern"); - const QString patternMissing("and this one does not"); - - WHEN("contains") - { - AdblockPlus::MatcherRule rule("pattern", opt); - REQUIRE(rule.shouldBlock()); - - THEN("pattern is matched anywhere in the URL") - { - REQUIRE(rule.hasMatch(&patternContains)); - REQUIRE(rule.hasMatch(&patternBegins)); - REQUIRE(rule.hasMatch(&patternEnds)); - REQUIRE(!rule.hasMatch(&patternMissing)); - } - } - - WHEN("startsWith") - { - AdblockPlus::MatcherRule rule("pattern", opt, AdblockPlus::MatcherRule::UrlStartsWith); - REQUIRE(rule.shouldBlock()); - - THEN("pattern is matched if at the start of the URL") - { - REQUIRE(!rule.hasMatch(&patternContains)); - REQUIRE(rule.hasMatch(&patternBegins)); - REQUIRE(!rule.hasMatch(&patternEnds)); - REQUIRE(!rule.hasMatch(&patternMissing)); - } - } - - WHEN("endsWith") - { - AdblockPlus::MatcherRule rule("pattern", opt, AdblockPlus::MatcherRule::UrlEndsWith); - REQUIRE(rule.shouldBlock()); - - THEN("pattern is matched if at the end of the URL") - { - REQUIRE(!rule.hasMatch(&patternContains)); - REQUIRE(!rule.hasMatch(&patternBegins)); - REQUIRE(rule.hasMatch(&patternEnds)); - REQUIRE(!rule.hasMatch(&patternMissing)); - } - } - } -} - -SCENARIO("RegexRule") -{ - GIVEN("options with case sensitive pattern") - { - const AdblockPlus::Options opt { .matchcase=true }; - const QString patternContains("this string contains the pattern in it"); - const QString patternMissing("and this one does not"); - - WHEN("contains") - { - AdblockPlus::RegexRule rule("pattern", opt); - REQUIRE(rule.shouldBlock()); - - THEN("pattern is matched anywhere in the URL") - { - REQUIRE(rule.hasMatch(&patternContains)); - REQUIRE(!rule.hasMatch(&patternMissing)); - } - } - } -} - -- cgit v1.2.1