/* * This file is part of smolbote. It's copyrighted by the contributors recorded * in the version control history of the file, available from its original * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote * * SPDX-License-Identifier: GPL-3.0 */ #include "adblocklist.h" #include "parser.h" #include #include #include AdBlockList::AdBlockList(QIODevice *device) { Q_ASSERT(device->isOpen()); QTextStream list(device); while (!list.atEnd()) { parseLine(list.readLine()); } qDebug() << m_metadata; } AdBlockList::~AdBlockList() { for(Rule &r : rules) { delete r.matcher; } } QString AdBlockList::metadata(const QString& key) const { return m_metadata.value(key); } int AdBlockList::ruleCount() const { return rules.size(); } std::pair AdBlockList::match(const QUrl& firstParty, const QUrl& requestUrl, QWebEngineUrlRequestInfo::ResourceType type) const { const QString domain = firstParty.host(); const QString request = requestUrl.toString(); for(const Rule &r : rules) { // if there are options specified, but not the one we need if(!r.options.isEmpty() && !r.options.contains(type)) continue; if(r.disabledOn.contains(domain)) continue; if(!r.enabledOn.isEmpty() && !r.enabledOn.contains(domain)) continue; if(r.matcher->hasMatch(request)) return std::make_pair(r.action, QString()); } return std::make_pair(UrlFilter::NotMatched, QString()); } void AdBlockList::parseLine(const QString& line) { QString parsedLine = line.trimmed(); if(parsedLine.isEmpty()) return; if(parsedLine.startsWith(QLatin1Literal("!"))) { const auto comment = parseComment(parsedLine); if(comment) { const auto key = comment.value().first; if(keys.contains(key)) m_metadata[key] = comment.value().second; } return; } // css rule -> filterleaves cannot do element blocking if(parsedLine.contains(QLatin1Literal("##")) || parsedLine.contains(QLatin1Literal("#@#"))) { qDebug("TODO: %s", qUtf8Printable(parsedLine)); return; } Rule r; r.action = UrlFilter::Block; // exception rules if(parsedLine.startsWith(QLatin1Literal("@@"))) { r.action = UrlFilter::Allow; parsedLine.remove(0, 2); } bool matchCase = false; // parse options { const int sepPos = parsedLine.indexOf(QLatin1Literal("$")); if(sepPos != -1) { const auto options = parsedLine.mid(sepPos + 1).split(QLatin1Literal(",")); parsedLine = parsedLine.mid(0, sepPos); for(const QString &option : options) { if(option.startsWith(QLatin1Literal("domain"))) { const auto domainList = option.mid(7).split(QLatin1Literal("|")); for(const QString &domain : domainList) { if(domain.startsWith(QLatin1Literal("~"))) { r.disabledOn.append(domain.mid(1)); } else { r.enabledOn.append(domain); } } } else if(option.endsWith(QLatin1Literal("match-case"))) { matchCase = !option.startsWith(QLatin1Literal("~")); } else { const auto pair = parseResourceOption(option); if(pair) r.options.insert(pair.value().first, pair.value().second); } } } } if(parsedLine.startsWith(QLatin1Literal("/")) && parsedLine.endsWith(QLatin1Literal("/"))) { // regular expression rule parsedLine = parsedLine.mid(1, parsedLine.length() - 2); r.matcher = new ContentsMatcher(parsedLine, UrlFilter::RegularExpressionMatch); } else if(parsedLine.startsWith(QLatin1Literal("||")) && parsedLine.endsWith(QLatin1Literal("^"))) { parsedLine = parsedLine.mid(2, parsedLine.length() - 3); r.matcher = new ContentsMatcher(parsedLine, UrlFilter::DomainMatch); } else if(parsedLine.startsWith(QLatin1Literal("|")) && parsedLine.endsWith(QLatin1Literal("|"))) { // string equals rule parsedLine = parsedLine.mid(1, parsedLine.length() - 2); r.matcher = new ContentsMatcher(parsedLine, UrlFilter::StringEquals); } else if(parsedLine.startsWith(QLatin1Literal("||"))) { // string starts with rule parsedLine = parsedLine.mid(2); r.matcher = new ContentsMatcher(parsedLine, UrlFilter::StringStartsWith); } else if(parsedLine.endsWith(QLatin1Literal("|"))) { // string ends with rule parsedLine.chop(1); r.matcher = new ContentsMatcher(parsedLine, UrlFilter::StringEndsWith); } else { // generic contains rule // remove beginning and ending wildcards if(parsedLine.startsWith(QLatin1Literal("*"))) parsedLine = parsedLine.mid(1); if(parsedLine.endsWith(QLatin1Literal("*"))) parsedLine.chop(1); if(parsedLine.contains(QLatin1Literal("*")) || parsedLine.contains(QLatin1Literal("^"))) { // check for wildcards and translate to regexp // wildcard "*" - any number of characters // separator "^" - end, ? or / parsedLine.replace(QLatin1Literal("||"), QLatin1Literal("^\\w+://")); parsedLine.replace(QLatin1Literal("|"), QLatin1Literal("\\|")); parsedLine.replace(QLatin1Literal("*"), QLatin1Literal(".*")); parsedLine.replace(QLatin1Literal("^"), QLatin1Literal("($|\\?|\\/)")); r.matcher = new ContentsMatcher(parsedLine, UrlFilter::RegularExpressionMatch); } else { r.matcher = new ContentsMatcher(parsedLine, UrlFilter::StringContains); } } r.matcher->setCaseSensitive(matchCase); Q_CHECK_PTR(r.matcher); rules.emplace_back(std::move(r)); }