From 908ab1e805d98cd79a36ba6bda3c3a9c8d0a99ca Mon Sep 17 00:00:00 2001 From: Aqua-sama Date: Thu, 16 Apr 2020 22:09:58 +0300 Subject: Parse type options into std::bitset --- staging/adblock/filterlist.cpp | 6 +- staging/adblock/options.cpp | 148 ++++++++++++++++++--------------------- staging/adblock/options.h | 19 ++--- staging/adblock/rule.h | 23 +++++- staging/adblock/test/options.cpp | 17 +++-- staging/adblock/test/rule.cpp | 14 ++-- 6 files changed, 116 insertions(+), 111 deletions(-) diff --git a/staging/adblock/filterlist.cpp b/staging/adblock/filterlist.cpp index 03db642..1846ff6 100644 --- a/staging/adblock/filterlist.cpp +++ b/staging/adblock/filterlist.cpp @@ -78,10 +78,8 @@ Rule *FilterList::parseRule(const QByteArray &line) pattern = list.at(0); const auto options = list.at(1); - for(auto &option : splitOptions(&options)) { - if(!opt.parseAbp(option)) { - return nullptr; - } + if(!opt.parseAbp(&options)) { + return nullptr; } } diff --git a/staging/adblock/options.cpp b/staging/adblock/options.cpp index a748a68..08f30ee 100644 --- a/staging/adblock/options.cpp +++ b/staging/adblock/options.cpp @@ -10,99 +10,85 @@ using namespace AdblockPlus; -bool Options::parseAbp(QStringRef &option) +constexpr std::array abpTypeOptions = { + "document", // ResourceTypeMainFrame 0 Top level page. + "subdocument", // ResourceTypeSubFrame 1 Frame or iframe. + "stylesheet", // ResourceTypeStylesheet 2 A CSS stylesheet. + "script", // ResourceTypeScript 3 An external script. + "image", // ResourceTypeImage 4 An image (JPG, GIF, PNG, and so on). + "font", // ResourceTypeFontResource 5 A font. + "other", // ResourceTypeSubResource 6 An "other" subresource. + "object", // ResourceTypeObject 7 An object (or embed) tag for a plugin or a resource that a plugin requested. + "media", // ResourceTypeMedia 8 A media resource. + "__worker", // ResourceTypeWorker 9 The main resource of a dedicated worker. + "__sharedworker", // ResourceTypeSharedWorker 10 The main resource of a shared worker. + "__prefetch", // ResourceTypePrefetch 11 An explicitly requested prefetch. + "__favicon", // ResourceTypeFavicon 12 A favicon. + "xmlhttprequest", // ResourceTypeXhr 13 An XMLHttpRequest. + "ping", // ResourceTypePing 14 A ping request for . + "__serviceworker", // ResourceTypeServiceWorker 15 The main resource of a service worker. + "__cspreport", // ResourceTypeCspReport 16 A report of Content Security Policy (CSP) violations. + "__pluginresource", // ResourceTypePluginResource 17 A resource requested by a plugin. + "__preloadmainframe", // ResourceTypeNavigationPreloadMainFrame 19 A main-frame service worker navigation preload request. + "__preloadsubframe", // ResourceTypeNavigationPreloadSubFrame 20 A sub-frame service worker navigation preload request. + "__unknown" // ResourceTypeUnknown 255 Unknown request type. +}; + +auto parseTypeOption(QStringRef &option) { - // pattern options - if(option == "match-case") { - matchcase = true; - return true; - } + struct { + bool found = false; + int index = -1; + bool exception = false; + } ret; // Possible inverse type options include ~script, ~image, ~stylesheet, ~object, // ~xmlhttprequest, ~subdocument, ~ping, ~websocket, ~webrtc, ~document, ~elemhide, ~other - const bool exception = option.startsWith('~'); - if(exception) { + if(option[0] == '~') { + ret.exception = true; option = option.mid(1); } // TODO: map all ResourceType's to their respective strings // TODO: websocket, webrtc, elemhide, generichide, genericblock, popup - if(option == "document") { - resource_options[QWebEngineUrlRequestInfo::ResourceTypeMainFrame] = !exception; - return true; - } - - else if(option == "subdocument") { - resource_options[QWebEngineUrlRequestInfo::ResourceTypeSubFrame] = !exception; - return true; - } - - else if(option == "stylesheet") { - resource_options[QWebEngineUrlRequestInfo::ResourceTypeStylesheet] = !exception; - return true; - } - - else if(option == "script") { - resource_options[QWebEngineUrlRequestInfo::ResourceTypeScript] = !exception; - return true; - } - - else if(option == "image") { - resource_options[QWebEngineUrlRequestInfo::ResourceTypeImage] = !exception; - return true; - } - - else if(option == "font") { - resource_options[QWebEngineUrlRequestInfo::ResourceTypeFontResource] = !exception; - return true; - } - - else if(option == "other") { - // An "other" subresource. - resource_options[QWebEngineUrlRequestInfo::ResourceTypeSubResource] = !exception; - return true; - } - - else if(option == "object") { - resource_options[QWebEngineUrlRequestInfo::ResourceTypeObject] = !exception; - return true; - } - - else if(option == "media") { - resource_options[QWebEngineUrlRequestInfo::ResourceTypeMedia] = !exception; - return true; - } - /* -QWebEngineUrlRequestInfo::ResourceTypeWorker 9 The main resource of a dedicated worker. -QWebEngineUrlRequestInfo::ResourceTypeSharedWorker 10 The main resource of a shared worker. -QWebEngineUrlRequestInfo::ResourceTypePrefetch 11 An explicitly requested prefetch. -QWebEngineUrlRequestInfo::ResourceTypeFavicon 12 A favicon. -*/ - else if(option == "xmlhttprequest") { - resource_options[QWebEngineUrlRequestInfo::ResourceTypeXhr] = !exception; - return true; - } - - else if(option == "ping") { - resource_options[QWebEngineUrlRequestInfo::ResourceTypePing] = !exception; - return true; + for(std::size_t i = 0; i < std::size(abpTypeOptions); ++i) { + if(option == abpTypeOptions[i]) { + ret.index = i; + ret.found = true; + return ret; + } } + return ret; +} - /* -QWebEngineUrlRequestInfo::ResourceTypeServiceWorker 15 The main resource of a service worker. -QWebEngineUrlRequestInfo::ResourceTypeCspReport 16 A report of Content Security Policy (CSP) violations. CSP reports are in JSON format and they are delivered by HTTP POST requests to specified servers. (Added in Qt 5.7) -QWebEngineUrlRequestInfo::ResourceTypePluginResource 17 A resource requested by a plugin. (Added in Qt 5.7) -QWebEngineUrlRequestInfo::ResourceTypeNavigationPreloadMainFrame 19 A main-frame service worker navigation preload request. (Added in Qt 5.14) -QWebEngineUrlRequestInfo::ResourceTypeNavigationPreloadSubFrame 20 A sub-frame service worker navigation preload request. (Added in Qt 5.14) -QWebEngineUrlRequestInfo::ResourceTypeUnknown 255 Unknown request type. - */ - - // Restriction to third-party/first-party requests - else if(option == "third-party") { - thirdparty = !exception; - return true; +bool Options::parseAbp(const QStringRef &options) +{ + std::bitset<32> checked_flags; + + for(auto &option : options.split(',')) { + if(option == "match-case") { + matchcase = true; + + } else if(option == "third-party") { + thirdparty = !exception; + } else if(const auto r = parseTypeOption(option); r.found) { + if(!r.exception) { + flags.set(r.index, true); + checked_flags.set(r.index, true); + } else { + flags.set(r.index, false); + checked_flags.set(r.index, true); + for(auto i = 0; i < 32; ++i) { + if(!checked_flags[i]) { + flags.set(i, true); + } + } + } + } else { + return false; + } } - return false; + return true; } diff --git a/staging/adblock/options.h b/staging/adblock/options.h index 642ea0c..d8f7d2b 100644 --- a/staging/adblock/options.h +++ b/staging/adblock/options.h @@ -12,6 +12,7 @@ #include #include #include +#include namespace AdblockPlus { @@ -33,16 +34,18 @@ struct Options { // request type options bool firstparty = true; bool thirdparty = true; - QHash resource_options; - bool parseAbp(QStringRef &opt); -}; + // request types + bool matchesType(QWebEngineUrlRequestInfo::ResourceType type) + { + return flags.test(type); + } + bool parseAbp(const QStringRef &options); -inline auto splitOptions(const QStringRef &options) -{ - return options.split(','); -} + // TODO private: + std::bitset<32> flags; +}; -} +} // namespace AdblockPlus #endif // SMOLBOTE_ADBLOCK_OPTIONS_H diff --git a/staging/adblock/rule.h b/staging/adblock/rule.h index 0dbff21..26a8249 100644 --- a/staging/adblock/rule.h +++ b/staging/adblock/rule.h @@ -17,11 +17,22 @@ namespace AdblockPlus { +static const QString defaultUrl; + class Rule { public: virtual ~Rule() = default; - virtual bool hasMatch(const QStringRef &) const = 0; + /** + * requestUrl: requested URL + * initiatorUrl: URL of document that initiated navigation + * firstPartyUrl: URL of the page that issued the request + */ + virtual bool hasMatch(const QStringRef &requestUrl, + const QStringRef &initiatorUrl = QStringRef(&defaultUrl), + const QStringRef &firstPartyUrl = QStringRef(&defaultUrl), + QWebEngineUrlRequestInfo::ResourceType resourceType = QWebEngineUrlRequestInfo::ResourceTypeMainFrame) const = 0; + bool shouldRedirect() const { return options.redirect; @@ -72,7 +83,10 @@ public: MatcherRule &operator=(MatcherRule &&) = delete; ~MatcherRule() = default; - bool hasMatch(const QStringRef &url) const override + bool hasMatch(const QStringRef &url, + const QStringRef &initiatorUrl = QStringRef(&defaultUrl), + const QStringRef &firstPartyUrl = QStringRef(&defaultUrl), + QWebEngineUrlRequestInfo::ResourceType resourceType = QWebEngineUrlRequestInfo::ResourceTypeMainFrame) const override { const auto index = matcher.indexIn(url); if(index == -1) { @@ -123,7 +137,10 @@ public: RegexRule &operator=(RegexRule &&) = delete; ~RegexRule() = default; - bool hasMatch(const QStringRef &url) const override + bool hasMatch(const QStringRef &url, + const QStringRef &initiatorUrl = QStringRef(&defaultUrl), + const QStringRef &firstPartyUrl = QStringRef(&defaultUrl), + QWebEngineUrlRequestInfo::ResourceType resourceType = QWebEngineUrlRequestInfo::ResourceTypeMainFrame) const override { const auto match = regex.match(url); return match.hasMatch(); diff --git a/staging/adblock/test/options.cpp b/staging/adblock/test/options.cpp index d0ad4a0..67dc143 100644 --- a/staging/adblock/test/options.cpp +++ b/staging/adblock/test/options.cpp @@ -18,26 +18,25 @@ SCENARIO("parsing adblock options") } } - GIVEN("various options in a QString") + GIVEN("match-case,document,~subdocument") { const QString options = "match-case,document,~subdocument"; - - for(auto &i : splitOptions(&options)) { - REQUIRE(opt.parseAbp(i)); - } + REQUIRE(opt.parseAbp(&options)); WHEN("match-case") { REQUIRE(opt.matchcase); } - WHEN("document") + WHEN("testing set/unset options") { - REQUIRE(opt.resource_options.value(QWebEngineUrlRequestInfo::ResourceTypeMainFrame)); + REQUIRE(opt.matchesType(QWebEngineUrlRequestInfo::ResourceTypeMainFrame)); + REQUIRE(!opt.matchesType(QWebEngineUrlRequestInfo::ResourceTypeSubFrame)); } - WHEN("~subdocument") + + WHEN("testing other options") { - REQUIRE(!opt.resource_options.value(QWebEngineUrlRequestInfo::ResourceTypeSubFrame)); + REQUIRE(opt.matchesType(QWebEngineUrlRequestInfo::ResourceTypeStylesheet)); } } } diff --git a/staging/adblock/test/rule.cpp b/staging/adblock/test/rule.cpp index d192601..31af7d5 100644 --- a/staging/adblock/test/rule.cpp +++ b/staging/adblock/test/rule.cpp @@ -2,11 +2,13 @@ #include "rule.h" #include +using namespace AdblockPlus; + SCENARIO("MatcherRule") { GIVEN("options with case sensitive pattern") { - const AdblockPlus::Options opt { .matchcase=true }; + const Options opt { .matchcase=true }; const QString patternContains("this string contains the pattern in it"); const QString patternBegins("pattern starts this string"); const QString patternEnds("this string ends with pattern"); @@ -14,7 +16,7 @@ SCENARIO("MatcherRule") WHEN("contains") { - AdblockPlus::MatcherRule rule("pattern", opt); + MatcherRule rule("pattern", opt); REQUIRE(rule.shouldBlock()); THEN("pattern is matched anywhere in the URL") @@ -28,7 +30,7 @@ SCENARIO("MatcherRule") WHEN("startsWith") { - AdblockPlus::MatcherRule rule("pattern", opt, AdblockPlus::MatcherRule::UrlStartsWith); + MatcherRule rule("pattern", opt, MatcherRule::UrlStartsWith); REQUIRE(rule.shouldBlock()); THEN("pattern is matched if at the start of the URL") @@ -42,7 +44,7 @@ SCENARIO("MatcherRule") WHEN("endsWith") { - AdblockPlus::MatcherRule rule("pattern", opt, AdblockPlus::MatcherRule::UrlEndsWith); + MatcherRule rule("pattern", opt, MatcherRule::UrlEndsWith); REQUIRE(rule.shouldBlock()); THEN("pattern is matched if at the end of the URL") @@ -60,13 +62,13 @@ SCENARIO("RegexRule") { GIVEN("options with case sensitive pattern") { - const AdblockPlus::Options opt { .matchcase=true }; + const Options opt { .matchcase=true }; const QString patternContains("this string contains the pattern in it"); const QString patternMissing("and this one does not"); WHEN("contains") { - AdblockPlus::RegexRule rule("pattern", opt); + RegexRule rule("pattern", opt); REQUIRE(rule.shouldBlock()); THEN("pattern is matched anywhere in the URL") -- cgit v1.2.1