From 7d8cbdb9941532cd5bf560b21395f6ed371d1ab5 Mon Sep 17 00:00:00 2001 From: Aqua-sama Date: Mon, 1 Oct 2018 16:43:18 +0200 Subject: Split off UrlFilter into library - add more adblock filter options --- lib/web/urlfilter/adblockrule.cpp | 127 -------------------------------------- 1 file changed, 127 deletions(-) delete mode 100644 lib/web/urlfilter/adblockrule.cpp (limited to 'lib/web/urlfilter/adblockrule.cpp') diff --git a/lib/web/urlfilter/adblockrule.cpp b/lib/web/urlfilter/adblockrule.cpp deleted file mode 100644 index 58b1941..0000000 --- a/lib/web/urlfilter/adblockrule.cpp +++ /dev/null @@ -1,127 +0,0 @@ -/* - * This file is part of smolbote. It's copyrighted by the contributors recorded - * in the version control history of the file, available from its original - * location: https://neueland.iserlohn-fortress.net/smolbote.hg - * - * SPDX-License-Identifier: GPL-3.0 - */ -// Based on Falkon's AdBlockRule class - -#include "adblockrule.h" - -// adblock format documentation -// https://adblockplus.org/filters - -// QString::mid(pos, len) - Returns a string starting at the specified position index. -// QString::chop(len) - Removes n characters from the end of the string. -// QString::remove(pos, len) - Removes n characters from the string, starting at the given position index. - -AdBlockRule::AdBlockRule(const QString &filter) -{ - originalFilter = filter; - QString parsedLine = filter.trimmed(); - - // there is no rule, or it's a comment - if(parsedLine.isEmpty() || parsedLine.startsWith("!")) { - return; - } - - // css rule - ignore for now - if(parsedLine.contains(QLatin1Literal("##")) || parsedLine.contains(QLatin1Literal("#@#"))) { - return; - } - - m_isEnabled = true; - - // exception rules - if(parsedLine.startsWith(QLatin1Literal("@@"))) { - m_isBlocking = false; - parsedLine.remove(0, 2); - } else - m_isBlocking = true; - - // parse options - { - const int sepPos = parsedLine.indexOf(QLatin1Literal("$")); - if(sepPos != -1) { - const auto options = parsedLine.mid(sepPos + 1).split(QLatin1Literal(",")); - parsedLine = parsedLine.mid(0, sepPos); - - for(const QString &option : options) { - if(option.startsWith(QLatin1Literal("domain"))) { - const auto domainList = option.mid(7).split(QLatin1Literal("|")); - for(const QString &domain : domainList) { - if(domain.startsWith(QLatin1Literal("~"))) - blockedDomains.append(domain.mid(1)); - else - allowedDomains.append(domain); - } - } else if(option.endsWith(QLatin1Literal("script"))) { - m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeScript, !option.startsWith(QLatin1Literal("~"))); - - } else if(option.endsWith(QLatin1Literal("image"))) { - m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeImage, !option.startsWith(QLatin1Literal("~"))); - - } else if(option.endsWith(QLatin1Literal("stylesheet"))) { - m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeStylesheet, !option.startsWith(QLatin1Literal("~"))); - - } else if(option.endsWith(QLatin1Literal("object"))) { - m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeObject, !option.startsWith(QLatin1Literal("~"))); - - } else if(option.endsWith(QLatin1Literal("xmlhttprequest"))) { - m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeXhr, !option.startsWith(QLatin1Literal("~"))); - - } else if(option.endsWith(QLatin1Literal("other"))) { - m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeUnknown, !option.startsWith(QLatin1Literal("~"))); - } - } - } - } - - // regular expression rule - if(parsedLine.startsWith(QLatin1Literal("/")) && parsedLine.endsWith(QLatin1Literal("/"))) { - parsedLine = parsedLine.mid(1, parsedLine.length() - 2); - - urlMatchType = RegularExpressionMatch; - regexp.setPattern(parsedLine); - return; - } - - // string equals rule - if(parsedLine.startsWith(QLatin1Literal("|")) && parsedLine.endsWith(QLatin1Literal("|"))) { - urlMatchType = StringEquals; - match = parsedLine.mid(1, parsedLine.length() - 2); - return; - } - - // Basic filter rules can use wildcards, which were supported by QRegExp, - // but were deprecated in QRegularExpression. - - // remove beginning and ending wildcards - if(parsedLine.startsWith(QLatin1Literal("*"))) - parsedLine = parsedLine.mid(1); - - if(parsedLine.endsWith(QLatin1Literal("*"))) - parsedLine.chop(1); - - if(parsedLine.startsWith(QLatin1Literal("||")) && parsedLine.endsWith(QLatin1Literal("^"))) { - urlMatchType = DomainMatch; - match = parsedLine.mid(2, parsedLine.length() - 3); - return; - } - - // check for wildcards and translate to regexp - // wildcard "*" - any number of characters - // separator "^" - end, ? or / - if(parsedLine.contains(QLatin1Literal("*")) || parsedLine.contains(QLatin1Literal("^"))) { - urlMatchType = RegularExpressionMatch; - parsedLine.replace(QLatin1Literal("||"), QLatin1Literal("^\\w+://")); - parsedLine.replace(QLatin1Literal("|"), QLatin1Literal("\\|")); - parsedLine.replace(QLatin1Literal("*"), QLatin1Literal(".*")); - parsedLine.replace(QLatin1Literal("^"), QLatin1Literal("($|\\?|\\/)")); - regexp.setPattern(parsedLine); - return; - } - - match = parsedLine; -} -- cgit v1.2.1