From 3d2ae07c455c0e423c64f19e445518427a5684fa Mon Sep 17 00:00:00 2001 From: Aqua-sama Date: Wed, 9 Jan 2019 19:38:58 +0100 Subject: Rewrite lib/urlfilter - Make HostList and AdBlockList implementations independent from each other - Move urlfilter tests to lib/urlfilter --- lib/urlfilter/formats/adblockrule_parse.cpp | 181 ---------------------------- 1 file changed, 181 deletions(-) delete mode 100644 lib/urlfilter/formats/adblockrule_parse.cpp (limited to 'lib/urlfilter/formats/adblockrule_parse.cpp') diff --git a/lib/urlfilter/formats/adblockrule_parse.cpp b/lib/urlfilter/formats/adblockrule_parse.cpp deleted file mode 100644 index c01ddfd..0000000 --- a/lib/urlfilter/formats/adblockrule_parse.cpp +++ /dev/null @@ -1,181 +0,0 @@ -/* - * This file is part of smolbote. It's copyrighted by the contributors recorded - * in the version control history of the file, available from its original - * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote - * - * SPDX-License-Identifier: GPL-3.0 - */ - -#include "adblockrule.h" -#include "adblockrule_parse.h" - -// adblock format documentation -// https://adblockplus.org/filters - -// QString::mid(pos, len) const - Returns a string starting at the specified position index. -// QString::chop(len) - Removes n characters from the end of the string. -// QString::remove(pos, len) - Removes n characters from the string, starting at the given position index. -// QString::trimmed() const - Remove whitespace from start and end - -AdBlockRule *parseRule_adblock(const QString &filter) -{ - QString parsedLine = filter.trimmed(); - - // there is no rule, or it's a comment - if(parsedLine.isEmpty() || parsedLine.startsWith("!")) { - return nullptr; - } - - // css rule -> filterleaves cannot do element blocking - if(parsedLine.contains(QLatin1Literal("##")) || parsedLine.contains(QLatin1Literal("#@#"))) { - return nullptr; - } - - // exception rules - FilterLeaf::Action action = FilterLeaf::Block; - if(parsedLine.startsWith(QLatin1Literal("@@"))) { - action = FilterLeaf::Allow; - parsedLine.remove(0, 2); - } - - // parse options - QStringList enabledOn, disabledOn; - QHash optionsHash; - { - const int sepPos = parsedLine.indexOf(QLatin1Literal("$")); - if(sepPos != -1) { - const auto options = parsedLine.mid(sepPos + 1).split(QLatin1Literal(",")); - parsedLine = parsedLine.mid(0, sepPos); - - for(const QString &option : options) { - if(option.startsWith(QLatin1Literal("domain"))) { - const auto domainList = option.mid(7).split(QLatin1Literal("|")); - - for(const QString &domain : domainList) { - if(domain.startsWith(QLatin1Literal("~"))) { - disabledOn.append(domain.mid(1)); - } else { - enabledOn.append(domain); - } - } - } else { - const auto pair = parseOption(option); - if(pair) - optionsHash.insert(pair.value().first, pair.value().second); - } - } - } - } - - FilterLeaf::UrlMatchType matchType = FilterLeaf::InvalidMatch; - - if(parsedLine.startsWith(QLatin1Literal("/")) && parsedLine.endsWith(QLatin1Literal("/"))) { - // regular expression rule - matchType = FilterLeaf::RegularExpressionMatch; - parsedLine = parsedLine.mid(1, parsedLine.length() - 2); - - } else if(parsedLine.startsWith(QLatin1Literal("||")) && parsedLine.endsWith(QLatin1Literal("^"))) { - matchType = FilterLeaf::DomainMatch; - parsedLine = parsedLine.mid(2, parsedLine.length() - 3); - - } else if(parsedLine.startsWith(QLatin1Literal("|")) && parsedLine.endsWith(QLatin1Literal("|"))) { - // string equals rule - matchType = FilterLeaf::StringEquals; - parsedLine = parsedLine.mid(1, parsedLine.length() - 2); - - } else if(parsedLine.startsWith(QLatin1Literal("||"))) { - // string starts with rule - matchType = FilterLeaf::StringStartsWith; - parsedLine = parsedLine.mid(2); - - } else if(parsedLine.endsWith(QLatin1Literal("|"))) { - // string ends with rule - matchType = FilterLeaf::StringEndsWith; - parsedLine.chop(1); - - } else { - // generic contains rule - matchType = FilterLeaf::StringContains; - - // Basic filter rules can use wildcards, which were supported by QRegExp, - // but were deprecated in QRegularExpression. - - // remove beginning and ending wildcards - if(parsedLine.startsWith(QLatin1Literal("*"))) - parsedLine = parsedLine.mid(1); - - if(parsedLine.endsWith(QLatin1Literal("*"))) - parsedLine.chop(1); - - if(parsedLine.contains(QLatin1Literal("*")) || parsedLine.contains(QLatin1Literal("^"))) { - // check for wildcards and translate to regexp - // wildcard "*" - any number of characters - // separator "^" - end, ? or / - parsedLine.replace(QLatin1Literal("||"), QLatin1Literal("^\\w+://")); - parsedLine.replace(QLatin1Literal("|"), QLatin1Literal("\\|")); - parsedLine.replace(QLatin1Literal("*"), QLatin1Literal(".*")); - parsedLine.replace(QLatin1Literal("^"), QLatin1Literal("($|\\?|\\/)")); - - matchType = FilterLeaf::RegularExpressionMatch; - } - } - - AdBlockRule *rule = new AdBlockRule(matchType, parsedLine, action); - rule->mergeOptions(optionsHash); - return rule; -} - -std::optional> parseOption(const QString &option) -{ - const bool exception = !option.startsWith(QLatin1Literal("~")); - - if(option.endsWith(QLatin1Literal("script"))) { - // external scripts loaded via HTML script tag - return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeScript, exception); - - } else if(option.endsWith(QLatin1Literal("image"))) { - // regular images, typically loaded via HTML img tag - return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeImage, exception); - - } else if(option.endsWith(QLatin1Literal("stylesheet"))) { - // external CSS stylesheet files - return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeStylesheet, exception); - - } else if(option.endsWith(QLatin1Literal("object"))) { - // content handled by browser plugins, e.g. Flash or Java - return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeObject, exception); - - } else if(option.endsWith(QLatin1Literal("xmlhttprequest"))) { - // requests started using the XMLHttpRequest object or fetch() API - return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeXhr, exception); - - } else if(option.endsWith(QLatin1Literal("object-subrequest"))) { - // requests started by plugins like Flash - return qMakePair(QWebEngineUrlRequestInfo::ResourceTypePluginResource, exception); - - } else if(option.endsWith(QLatin1Literal("subdocument"))) { - // embedded pages, usually included via HTML frames - return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeSubFrame, exception); - - } else if(option.endsWith(QLatin1Literal("ping"))) { - // requests started by or navigator.sendBeacon() - return qMakePair(QWebEngineUrlRequestInfo::ResourceTypePing, exception); - - } else if(option.endsWith(QLatin1Literal("websocket"))) { - // requests initiated via WebSocket object - qDebug("Resource type 'websocket' not available"); - - } else if(option.endsWith(QLatin1Literal("webrtc"))) { - // connections opened via RTCPeerConnection instances to ICE servers - qDebug("Resource type 'webrtc' not available"); - - } else if(option.endsWith(QLatin1Literal("document"))) { - // the page itself - return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeMainFrame, exception); - - } else if(option.endsWith(QLatin1Literal("other"))) { - return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeUnknown, exception); - } - - return std::nullopt; -} -- cgit v1.2.1