From fde1d3399d0d51250c31315dca4cbc9044d36b9d Mon Sep 17 00:00:00 2001 From: Aqua-sama Date: Thu, 12 Jul 2018 19:57:01 +0200 Subject: AdBlockRule: parse some options --- lib/web/urlfilter/adblockrule.cpp | 65 +++++++++++++++++++++++++++++++++++++++ lib/web/urlfilter/adblockrule.h | 2 ++ 2 files changed, 67 insertions(+) (limited to 'lib') diff --git a/lib/web/urlfilter/adblockrule.cpp b/lib/web/urlfilter/adblockrule.cpp index d707783..505cda5 100644 --- a/lib/web/urlfilter/adblockrule.cpp +++ b/lib/web/urlfilter/adblockrule.cpp @@ -21,6 +21,32 @@ bool isMatchingDomain(const QString &domain, const QString &filter) return index > 0 && domain[index - 1] == QLatin1Char('.'); } +inline std::pair parseOption(const QString &option) +{ + if(option.endsWith(QLatin1Literal("script"))) { + return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeScript, !option.startsWith(QLatin1Literal("~"))); + + } else if(option.endsWith(QLatin1Literal("image"))) { + return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeImage, !option.startsWith(QLatin1Literal("~"))); + + } else if(option.endsWith(QLatin1Literal("stylesheet"))) { + return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeStylesheet, !option.startsWith(QLatin1Literal("~"))); + + } else if(option.endsWith(QLatin1Literal("object"))) { + return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeObject, !option.startsWith(QLatin1Literal("~"))); + + } else if(option.endsWith(QLatin1Literal("xmlhttprequest"))) { + return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeXhr, !option.startsWith(QLatin1Literal("~"))); + + } else if(option.endsWith(QLatin1Literal("other"))) { + return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeUnknown, !option.startsWith(QLatin1Literal("~"))); + + } else { + // unhandled pair + Q_ASSERT(false); + } +} + // adblock format documentation // https://adblockplus.org/filters @@ -50,6 +76,31 @@ AdBlockRule::AdBlockRule(const QString &filter) parsedLine.remove(0, 2); } + // parse options + { + const int sepPos = parsedLine.indexOf(QLatin1Literal("$")); + if(sepPos != -1) { + const auto options = parsedLine.mid(sepPos + 1).split(QLatin1Literal(",")); + parsedLine = parsedLine.mid(0, sepPos); + + for(const QString &option : options) { + if(option.startsWith(QLatin1Literal("domain"))) { + const auto domainList = option.mid(7).split(QLatin1Literal("|")); + for(const QString &domain : domainList) { + if(domain.startsWith(QLatin1Literal("~"))) + blockedDomains.append(domain.mid(1)); + else + allowedDomains.append(domain); + } + } else { + auto optPair = parseOption(option); + m_resourceTypeOptions.insert(optPair.first, optPair.second); + } + } + + } + } + // regular expression rules if(parsedLine.startsWith(QLatin1Literal("/")) && parsedLine.endsWith(QLatin1Literal("/"))) { parsedLine = parsedLine.mid(1, parsedLine.length() - 2); @@ -101,6 +152,20 @@ bool AdBlockRule::isEnabled() const return m_isEnabled; } +bool AdBlockRule::matches(QWebEngineUrlRequestInfo::ResourceType type) const +{ + // no options have been specified -> match all resource types + if(m_resourceTypeOptions.isEmpty()) + return true; + + // this resource type has not been specified -> reject it + if(!m_resourceTypeOptions.contains(type)) + return false; + + // resource type has been specified; true to match, false to exception + return m_resourceTypeOptions.value(type); +} + bool AdBlockRule::shouldBlock(const QUrl &url) const { switch (matchType) { diff --git a/lib/web/urlfilter/adblockrule.h b/lib/web/urlfilter/adblockrule.h index e1cabae..a08fbb7 100644 --- a/lib/web/urlfilter/adblockrule.h +++ b/lib/web/urlfilter/adblockrule.h @@ -24,6 +24,7 @@ public: AdBlockRule(const QString &filter); bool isEnabled() const; + bool matches(QWebEngineUrlRequestInfo::ResourceType type) const; bool shouldBlock(const QUrl &url) const; private: @@ -34,6 +35,7 @@ private: MatchType matchType = InvalidMatch; QHash m_resourceTypeOptions; + QStringList allowedDomains, blockedDomains; QString match; QRegularExpression regexp; -- cgit v1.2.1