From 761446e34af7a81d0516e322228616ebd046c9ba Mon Sep 17 00:00:00 2001 From: Aqua-sama Date: Wed, 15 Apr 2020 11:56:04 +0300 Subject: Fix MatcherRule with DomainMatch --- staging/adblock/filterlist.cpp | 4 ++-- staging/adblock/rule.h | 20 +++++++++++++++++++ staging/adblock/test/filterlist.cpp | 40 ++++++++++++++++++++----------------- 3 files changed, 44 insertions(+), 20 deletions(-) diff --git a/staging/adblock/filterlist.cpp b/staging/adblock/filterlist.cpp index 2677e1c..d3f6134 100644 --- a/staging/adblock/filterlist.cpp +++ b/staging/adblock/filterlist.cpp @@ -88,7 +88,7 @@ Rule *FilterList::parseRule(const QByteArray &line) if(pattern.startsWith("||") && pattern.endsWith("^")) { // domain match pattern = pattern.mid(2, pattern.length() - 3); - return new MatcherRule(pattern, opt); + return new MatcherRule(pattern, opt, MatcherRule::DomainMatch); } else if(pattern.startsWith("|") && pattern.endsWith("|")) { // string equals @@ -110,7 +110,7 @@ Rule *FilterList::parseRule(const QByteArray &line) pattern = pattern.mid(1, pattern.length() - 2); return new RegexRule(pattern, opt); - } else if(!pattern.isEmpty()){ + } else if(!pattern.isEmpty()) { // wildcard pattern pattern = QRegularExpression::wildcardToRegularExpression(pattern); return new RegexRule(pattern, opt); diff --git a/staging/adblock/rule.h b/staging/adblock/rule.h index 90062ba..0dbff21 100644 --- a/staging/adblock/rule.h +++ b/staging/adblock/rule.h @@ -39,10 +39,19 @@ protected: const Options options; }; +// The separator character can be anything but +// a letter, a digit, or one of the following: _, -, ., %. +// The end of the address is also accepted as a separator. +inline bool isSeparator(const QChar &c) +{ + return !c.isLetter() && !c.isDigit() && c != '_' && c != '-' && c != '.' && c != '%'; +} + class MatcherRule : public Rule { public: enum MatchPosition { + DomainMatch, UrlStartsWith, UrlEndsWith, UrlContains, @@ -66,8 +75,17 @@ public: bool hasMatch(const QStringRef &url) const override { const auto index = matcher.indexIn(url); + if(index == -1) { + return false; + } switch(position) { + case DomainMatch: + // match if + // there is only one : left of the index + // and + // character after pattern is separator or end of string + return (url.left(index).count(':') <= 1 && (index + patternLength == url.length() || isSeparator(url[index + patternLength]))); case UrlStartsWith: return (index == 0); case UrlEndsWith: @@ -77,6 +95,8 @@ public: case UrlEquals: return (index == 0 && patternLength == url.length()); } + + return false; } private: diff --git a/staging/adblock/test/filterlist.cpp b/staging/adblock/test/filterlist.cpp index 6b27904..4366489 100644 --- a/staging/adblock/test/filterlist.cpp +++ b/staging/adblock/test/filterlist.cpp @@ -22,40 +22,44 @@ TEST_CASE("placeholder") TEST_CASE("domain match") { - const std::array, 5> testUrls = { - std::make_pair("http://ads.example.com/foo.gif", true), - std::make_pair("http://server1.ads.example.com/foo.gif", true), - std::make_pair("https://ads.example.com:8000/", true), - std::make_pair("http://ads.example.com.ua/foo.gif", false), - std::make_pair("http://example.com/redirect/http://ads.example.com/", false) - }; + const QString block1 = "http://ads.example.com/foo.gif"; + const QString block2 = "http://server1.ads.example.com/foo.gif"; + const QString block3 = "https://ads.example.com:8000/"; + const QString block4 = "https://ads.example.com"; + + const QString allow1 = "http://ads.example.com.ua/foo.gif"; + const QString allow2 = "http://example.com/redirect/http://ads.example.com/"; auto *rule = FilterList::parseRule("||ads.example.com^"); REQUIRE(rule->shouldBlock()); REQUIRE(!rule->shouldRedirect()); - for(const auto &pair : testUrls) { - REQUIRE(rule->hasMatch(&pair.first) == pair.second); - } + REQUIRE(rule->hasMatch(&block1)); + REQUIRE(rule->hasMatch(&block2)); + REQUIRE(rule->hasMatch(&block3)); + REQUIRE(rule->hasMatch(&block4)); + + REQUIRE(!rule->hasMatch(&allow1)); + REQUIRE(!rule->hasMatch(&allow2)); delete rule; } TEST_CASE("string equals") { - const std::array, 3> testUrls = { - std::make_pair("http://example.com/", true), - std::make_pair("http://example.com/foo.gif", false), - std::make_pair("http://example.info/redirect/http://example.com/", false) - }; + const QString block = "http://example.com/"; + + const QString allow1 = "http://example.com/foo.gif"; + const QString allow2 = "http://example.info/redirect/http://example.com/"; auto *rule = FilterList::parseRule("|http://example.com/|"); REQUIRE(rule->shouldBlock()); REQUIRE(!rule->shouldRedirect()); - for(const auto &pair : testUrls) { - REQUIRE(rule->hasMatch(&pair.first) == pair.second); - } + REQUIRE(rule->hasMatch(&block)); + + REQUIRE(!rule->hasMatch(&allow1)); + REQUIRE(!rule->hasMatch(&allow2)); delete rule; } -- cgit v1.2.1