diff options
Diffstat (limited to 'lib/urlfilter/adblock/adblocklist.cpp')
-rw-r--r-- | lib/urlfilter/adblock/adblocklist.cpp | 188 |
1 files changed, 0 insertions, 188 deletions
diff --git a/lib/urlfilter/adblock/adblocklist.cpp b/lib/urlfilter/adblock/adblocklist.cpp deleted file mode 100644 index 3be21bd..0000000 --- a/lib/urlfilter/adblock/adblocklist.cpp +++ /dev/null @@ -1,188 +0,0 @@ -/* - * This file is part of smolbote. It's copyrighted by the contributors recorded - * in the version control history of the file, available from its original - * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote - * - * SPDX-License-Identifier: GPL-3.0 - */ - -#include "adblocklist.h" -#include "parser.h" -#include <QIODevice> -#include <QTextStream> -#include <QDebug> - -AdBlockList::AdBlockList(QIODevice *device) -{ - Q_ASSERT(device->isOpen()); - - QTextStream list(device); - while (!list.atEnd()) { - parseLine(list.readLine()); - } - - qDebug() << m_metadata; -} - -AdBlockList::~AdBlockList() -{ - for(Rule &r : rules) { - delete r.matcher; - } -} - -QString AdBlockList::metadata(const QString& key) const -{ - return m_metadata.value(key); -} - -int AdBlockList::ruleCount() const -{ - return rules.size(); -} - -std::pair<UrlFilter::MatchResult, QString> AdBlockList::match(const QUrl& firstParty, const QUrl& requestUrl, QWebEngineUrlRequestInfo::ResourceType type) const -{ - const QString domain = firstParty.host(); - const QString request = requestUrl.toString(); - - for(const Rule &r : rules) { - // if there are options specified, but not the one we need - if(!r.options.isEmpty() && !r.options.contains(type)) - continue; - - if(r.disabledOn.contains(domain)) - continue; - - if(!r.enabledOn.isEmpty() && !r.enabledOn.contains(domain)) - continue; - - if(r.matcher->hasMatch(request)) - return std::make_pair(r.action, QString()); - } - - return std::make_pair(UrlFilter::NotMatched, QString()); -} - -void AdBlockList::parseLine(const QString& line) -{ - QString parsedLine = line.trimmed(); - - if(parsedLine.isEmpty()) - return; - - if(parsedLine.startsWith(QLatin1String("!"))) { - const auto comment = parseComment(parsedLine); - - if(comment) { - const auto key = comment.value().first; - if(keys.contains(key)) - m_metadata[key] = comment.value().second; - } - - return; - } - - // css rule -> filterleaves cannot do element blocking - if(parsedLine.contains(QLatin1String("##")) || parsedLine.contains(QLatin1String("#@#"))) { - qDebug("TODO: %s", qUtf8Printable(parsedLine)); - return; - } - - Rule r; - r.action = UrlFilter::Block; - - // exception rules - if(parsedLine.startsWith(QLatin1String("@@"))) { - r.action = UrlFilter::Allow; - parsedLine.remove(0, 2); - } - - bool matchCase = false; - - // parse options - { - const int sepPos = parsedLine.indexOf(QLatin1String("$")); - if(sepPos != -1) { - const auto options = parsedLine.mid(sepPos + 1).split(QLatin1String(",")); - parsedLine = parsedLine.mid(0, sepPos); - - for(const QString &option : options) { - if(option.startsWith(QLatin1String("domain"))) { - const auto domainList = option.mid(7).split(QLatin1String("|")); - - for(const QString &domain : domainList) { - if(domain.startsWith(QLatin1String("~"))) { - r.disabledOn.append(domain.mid(1)); - } else { - r.enabledOn.append(domain); - } - } - } else if(option.endsWith(QLatin1String("match-case"))) { - matchCase = !option.startsWith(QLatin1String("~")); - - } else { - const auto pair = parseResourceOption(option); - if(pair) - r.options.insert(pair.value().first, pair.value().second); - } - } - } - } - - if(parsedLine.startsWith(QLatin1String("/")) && parsedLine.endsWith(QLatin1String("/"))) { - // regular expression rule - parsedLine = parsedLine.mid(1, parsedLine.length() - 2); - r.matcher = new ContentsMatcher<QRegularExpression>(parsedLine, UrlFilter::RegularExpressionMatch); - - } else if(parsedLine.startsWith(QLatin1String("||")) && parsedLine.endsWith(QLatin1String("^"))) { - parsedLine = parsedLine.mid(2, parsedLine.length() - 3); - r.matcher = new ContentsMatcher<QString>(parsedLine, UrlFilter::DomainMatch); - - } else if(parsedLine.startsWith(QLatin1String("|")) && parsedLine.endsWith(QLatin1String("|"))) { - // string equals rule - parsedLine = parsedLine.mid(1, parsedLine.length() - 2); - r.matcher = new ContentsMatcher<QStringMatcher>(parsedLine, UrlFilter::StringEquals); - - } else if(parsedLine.startsWith(QLatin1String("||"))) { - // string starts with rule - parsedLine = parsedLine.mid(2); - r.matcher = new ContentsMatcher<QStringMatcher>(parsedLine, UrlFilter::StringStartsWith); - - } else if(parsedLine.endsWith(QLatin1String("|"))) { - // string ends with rule - parsedLine.chop(1); - r.matcher = new ContentsMatcher<QStringMatcher>(parsedLine, UrlFilter::StringEndsWith); - - } else { - // generic contains rule - - // remove beginning and ending wildcards - if(parsedLine.startsWith(QLatin1String("*"))) - parsedLine = parsedLine.mid(1); - - if(parsedLine.endsWith(QLatin1String("*"))) - parsedLine.chop(1); - - if(parsedLine.contains(QLatin1String("*")) || parsedLine.contains(QLatin1String("^"))) { - // check for wildcards and translate to regexp - // wildcard "*" - any number of characters - // separator "^" - end, ? or / - parsedLine.replace(QLatin1String("||"), QLatin1String("^\\w+://")); - parsedLine.replace(QLatin1String("|"), QLatin1String("\\|")); - parsedLine.replace(QLatin1String("*"), QLatin1String(".*")); - parsedLine.replace(QLatin1String("^"), QLatin1String("($|\\?|\\/)")); - - r.matcher = new ContentsMatcher<QRegularExpression>(parsedLine, UrlFilter::RegularExpressionMatch); - - } else { - r.matcher = new ContentsMatcher<QStringMatcher>(parsedLine, UrlFilter::StringContains); - } - } - - r.matcher->setCaseSensitive(matchCase); - - Q_CHECK_PTR(r.matcher); - rules.emplace_back(std::move(r)); -} - |