/* * This file is part of smolbote. It's copyrighted by the contributors recorded * in the version control history of the file, available from its original * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote * * SPDX-License-Identifier: GPL-3.0 */ // Based on Falkon's AdBlockRule class #include "adblockrule.h" // adblock format documentation // https://adblockplus.org/filters // QString::mid(pos, len) - Returns a string starting at the specified position index. // QString::chop(len) - Removes n characters from the end of the string. // QString::remove(pos, len) - Removes n characters from the string, starting at the given position index. AdBlockRule *loadRule(const QString &filter) { QString parsedLine = filter.trimmed(); // there is no rule, or it's a comment if(parsedLine.isEmpty() || parsedLine.startsWith("!")) { return nullptr; } // css rule -> filterleaves cannot do element blocking if(parsedLine.contains(QLatin1Literal("##")) || parsedLine.contains(QLatin1Literal("#@#"))) { return nullptr; } // exception rules FilterLeaf::Action action = FilterLeaf::Block; if(parsedLine.startsWith(QLatin1Literal("@@"))) { action = FilterLeaf::Allow; parsedLine.remove(0, 2); } // parse options QStringList enabledOn, disabledOn; QHash optionsHash; { const int sepPos = parsedLine.indexOf(QLatin1Literal("$")); if(sepPos != -1) { const auto options = parsedLine.mid(sepPos + 1).split(QLatin1Literal(",")); parsedLine = parsedLine.mid(0, sepPos); for(const QString &option : options) { if(option.startsWith(QLatin1Literal("domain"))) { const auto domainList = option.mid(7).split(QLatin1Literal("|")); for(const QString &domain : domainList) { if(domain.startsWith(QLatin1Literal("~"))) { disabledOn.append(domain.mid(1)); } else { enabledOn.append(domain); } } } else { const auto pair = parseOption(option); if(pair) optionsHash.insert(pair.value().first, pair.value().second); } } } } FilterLeaf::UrlMatchType matchType; QString pattern; if(parsedLine.startsWith(QLatin1Literal("/")) && parsedLine.endsWith(QLatin1Literal("/"))) { // regular expression rule matchType = FilterLeaf::RegularExpressionMatch; pattern = parsedLine.mid(1, parsedLine.length() - 2); } else if(parsedLine.startsWith(QLatin1Literal("|")) && parsedLine.endsWith(QLatin1Literal("|"))) { // string equals rule matchType = FilterLeaf::StringEquals; pattern = parsedLine.mid(1, parsedLine.length() - 2); } else { // Basic filter rules can use wildcards, which were supported by QRegExp, // but were deprecated in QRegularExpression. // remove beginning and ending wildcards if(parsedLine.startsWith(QLatin1Literal("*"))) parsedLine = parsedLine.mid(1); if(parsedLine.endsWith(QLatin1Literal("*"))) parsedLine.chop(1); if(parsedLine.startsWith(QLatin1Literal("||")) && parsedLine.endsWith(QLatin1Literal("^"))) { matchType = FilterLeaf::DomainMatch; pattern = parsedLine.mid(2, parsedLine.length() - 3); } else if(parsedLine.contains(QLatin1Literal("*")) || parsedLine.contains(QLatin1Literal("^"))) { // check for wildcards and translate to regexp // wildcard "*" - any number of characters // separator "^" - end, ? or / matchType = FilterLeaf::RegularExpressionMatch; parsedLine.replace(QLatin1Literal("||"), QLatin1Literal("^\\w+://")); parsedLine.replace(QLatin1Literal("|"), QLatin1Literal("\\|")); parsedLine.replace(QLatin1Literal("*"), QLatin1Literal(".*")); parsedLine.replace(QLatin1Literal("^"), QLatin1Literal("($|\\?|\\/)")); pattern = parsedLine; } } auto *rule = new AdBlockRule(matchType, pattern, action); rule->mergeOptions(optionsHash); return rule; } std::optional> parseOption(const QString &option) { const bool exception = !option.startsWith(QLatin1Literal("~")); if(option.endsWith(QLatin1Literal("script"))) { // external scripts loaded via HTML script tag return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeScript, exception); } else if(option.endsWith(QLatin1Literal("image"))) { // regular images, typically loaded via HTML img tag return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeImage, exception); } else if(option.endsWith(QLatin1Literal("stylesheet"))) { // external CSS stylesheet files return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeStylesheet, exception); } else if(option.endsWith(QLatin1Literal("object"))) { // content handled by browser plugins, e.g. Flash or Java return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeObject, exception); } else if(option.endsWith(QLatin1Literal("xmlhttprequest"))) { // requests started using the XMLHttpRequest object or fetch() API return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeXhr, exception); } else if(option.endsWith(QLatin1Literal("object-subrequest"))) { // requests started by plugins like Flash return qMakePair(QWebEngineUrlRequestInfo::ResourceTypePluginResource, exception); } else if(option.endsWith(QLatin1Literal("subdocument"))) { // embedded pages, usually included via HTML frames return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeSubFrame, exception); } else if(option.endsWith(QLatin1Literal("ping"))) { // requests started by or navigator.sendBeacon() return qMakePair(QWebEngineUrlRequestInfo::ResourceTypePing, exception); } else if(option.endsWith(QLatin1Literal("websocket"))) { // requests initiated via WebSocket object qDebug("Resource type 'websocket' not available"); } else if(option.endsWith(QLatin1Literal("webrtc"))) { // connections opened via RTCPeerConnection instances to ICE servers qDebug("Resource type 'webrtc' not available"); } else if(option.endsWith(QLatin1Literal("document"))) { // the page itself return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeMainFrame, exception); } else if(option.endsWith(QLatin1Literal("other"))) { return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeUnknown, exception); } return std::nullopt; } AdBlockRule::AdBlockRule(FilterLeaf::UrlMatchType matchType, const QString& filter, FilterLeaf::Action action) { this->matchType = matchType; this->m_request = filter; this->m_isBlocking = (action == FilterLeaf::Block) ? true : false; } void AdBlockRule::mergeOptions(const QHash &options) { this->resourceTypeOptions.unite(options); } bool AdBlockRule::match(const QUrl& requestUrl) const { switch(matchType) { case FilterLeaf::StringContains: return requestUrl.toString().contains(m_request); default: return false; } } FilterLeaf::Action AdBlockRule::action() const { return m_isBlocking ? FilterLeaf::Block : FilterLeaf::Allow; }