#include "adblockrule.h" bool isMatchingDomain(const QString &domain, const QString &filter) { // domain and filter are the same if(domain == filter) { return true; } // domain can't be matched by filter if it doesn't end with filter // ex. example2.com isn't matched by example.com if(!domain.endsWith(filter)) { return false; } // match with subdomains // ex. subdomain.example.com is matched by example.com int index = domain.indexOf(filter); // match if (domain ends with filter) && (filter has been found) and (character before filter is '.') return index > 0 && domain[index - 1] == QLatin1Char('.'); } inline std::pair parseOption(const QString &option) { if(option.endsWith(QLatin1Literal("script"))) { return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeScript, !option.startsWith(QLatin1Literal("~"))); } else if(option.endsWith(QLatin1Literal("image"))) { return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeImage, !option.startsWith(QLatin1Literal("~"))); } else if(option.endsWith(QLatin1Literal("stylesheet"))) { return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeStylesheet, !option.startsWith(QLatin1Literal("~"))); } else if(option.endsWith(QLatin1Literal("object"))) { return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeObject, !option.startsWith(QLatin1Literal("~"))); } else if(option.endsWith(QLatin1Literal("xmlhttprequest"))) { return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeXhr, !option.startsWith(QLatin1Literal("~"))); } else if(option.endsWith(QLatin1Literal("other"))) { return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeUnknown, !option.startsWith(QLatin1Literal("~"))); } else { // unhandled pair Q_ASSERT(false); } } // adblock format documentation // https://adblockplus.org/filters // QString::mid(pos, len) - Returns a string starting at the specified position index. // QString::chop(len) - Removes n characters from the end of the string. // QString::remove(pos, len) - Removes n characters from the string, starting at the given position index. AdBlockRule::AdBlockRule(const QString &filter) { QString parsedLine = filter.trimmed(); // there is no rule, or it"s a comment if(parsedLine.isEmpty() || parsedLine.startsWith("!")) { return; } // css rule - ignore for now if(parsedLine.contains(QLatin1Literal("##")) || parsedLine.contains(QLatin1Literal("#@#"))) { return; } m_isEnabled = true; // exception rules if(parsedLine.startsWith(QLatin1Literal("@@"))) { m_isException = true; parsedLine.remove(0, 2); } // parse options { const int sepPos = parsedLine.indexOf(QLatin1Literal("$")); if(sepPos != -1) { const auto options = parsedLine.mid(sepPos + 1).split(QLatin1Literal(",")); parsedLine = parsedLine.mid(0, sepPos); for(const QString &option : options) { if(option.startsWith(QLatin1Literal("domain"))) { const auto domainList = option.mid(7).split(QLatin1Literal("|")); for(const QString &domain : domainList) { if(domain.startsWith(QLatin1Literal("~"))) blockedDomains.append(domain.mid(1)); else allowedDomains.append(domain); } } else { auto optPair = parseOption(option); m_resourceTypeOptions.insert(optPair.first, optPair.second); } } } } // regular expression rules if(parsedLine.startsWith(QLatin1Literal("/")) && parsedLine.endsWith(QLatin1Literal("/"))) { parsedLine = parsedLine.mid(1, parsedLine.length() - 2); matchType = RegularExpressionMatch; regexp.setPattern(parsedLine); return; } // basic filter rules if(parsedLine.startsWith(QLatin1Literal("|")) && parsedLine.endsWith(QLatin1Literal("|"))) { matchType = StringEquals; match = parsedLine.mid(1, parsedLine.length() - 2); return; } // Basic filter rules can use wildcards, which were supported by QRegExp, // but were deprecated in QRegularExpression. // remove beginning and ending wildcards if(parsedLine.startsWith(QLatin1Literal("*"))) parsedLine = parsedLine.mid(1); if(parsedLine.endsWith(QLatin1Literal("*"))) parsedLine.chop(1); if(parsedLine.startsWith(QLatin1Literal("||")) && parsedLine.endsWith(QLatin1Literal("^"))) { matchType = DomainMatch; match = parsedLine.mid(2, parsedLine.length() - 3); return; } // check for wildcards and translate to regexp // wildcard "*" - any number of characters // separator "^" - end, ? or / if(parsedLine.contains(QLatin1Literal("*")) || parsedLine.contains(QLatin1Literal("^"))) { matchType = RegularExpressionMatch; parsedLine.replace(QLatin1Literal("*"), QLatin1Literal(".*")); parsedLine.replace(QLatin1Literal("^"), QLatin1Literal("($|\\?|\\/)")); regexp.setPattern(parsedLine); return; } matcher.setPattern(parsedLine); } bool AdBlockRule::isEnabled() const { return m_isEnabled; } bool AdBlockRule::matches(QWebEngineUrlRequestInfo::ResourceType type) const { // no options have been specified -> match all resource types if(m_resourceTypeOptions.isEmpty()) return true; // this resource type has not been specified -> reject it if(!m_resourceTypeOptions.contains(type)) return false; // resource type has been specified; true to match, false to exception return m_resourceTypeOptions.value(type); } bool AdBlockRule::shouldBlock(const QUrl &url) const { switch (matchType) { case RegularExpressionMatch: if(regexp.match(url.toString()).hasMatch()) return !m_isException; case StringEquals: return url.toString() == match; case DomainMatch: return isMatchingDomain(url.host(), match); default: return false; } }