diff options
author | Aqua-sama <aqua@iserlohn-fortress.net> | 2019-01-09 19:38:58 +0100 |
---|---|---|
committer | Aqua-sama <aqua@iserlohn-fortress.net> | 2019-01-09 19:38:58 +0100 |
commit | 3d2ae07c455c0e423c64f19e445518427a5684fa (patch) | |
tree | 58f6b47c3db33658a6f2e605fd021f08d1fa9964 /lib/urlfilter/adblock/adblocklist.cpp | |
parent | Add assorted unfished doc files to repo (diff) | |
download | smolbote-3d2ae07c455c0e423c64f19e445518427a5684fa.tar.xz |
Rewrite lib/urlfilter
- Make HostList and AdBlockList implementations independent from each
other
- Move urlfilter tests to lib/urlfilter
Diffstat (limited to 'lib/urlfilter/adblock/adblocklist.cpp')
-rw-r--r-- | lib/urlfilter/adblock/adblocklist.cpp | 188 |
1 files changed, 188 insertions, 0 deletions
diff --git a/lib/urlfilter/adblock/adblocklist.cpp b/lib/urlfilter/adblock/adblocklist.cpp new file mode 100644 index 0000000..c749e9e --- /dev/null +++ b/lib/urlfilter/adblock/adblocklist.cpp @@ -0,0 +1,188 @@ +/* + * This file is part of smolbote. It's copyrighted by the contributors recorded + * in the version control history of the file, available from its original + * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote + * + * SPDX-License-Identifier: GPL-3.0 + */ + +#include "adblocklist.h" +#include "parser.h" +#include <QIODevice> +#include <QTextStream> +#include <QDebug> + +AdBlockList::AdBlockList(QIODevice *device) +{ + Q_ASSERT(device->isOpen()); + + QTextStream list(device); + while (!list.atEnd()) { + parseLine(list.readLine()); + } + + qDebug() << m_metadata; +} + +AdBlockList::~AdBlockList() +{ + for(Rule &r : rules) { + delete r.matcher; + } +} + +QString AdBlockList::metadata(const QString& key) const +{ + return m_metadata.value(key); +} + +int AdBlockList::ruleCount() const +{ + return rules.size(); +} + +std::pair<UrlFilter::MatchResult, QString> AdBlockList::match(const QUrl& firstParty, const QUrl& requestUrl, QWebEngineUrlRequestInfo::ResourceType type) const +{ + const QString domain = firstParty.host(); + const QString request = requestUrl.toString(); + + for(const Rule &r : rules) { + // if there are options specified, but not the one we need + if(!r.options.isEmpty() && !r.options.contains(type)) + continue; + + if(r.disabledOn.contains(domain)) + continue; + + if(!r.enabledOn.isEmpty() && !r.enabledOn.contains(domain)) + continue; + + if(r.matcher->hasMatch(request)) + return std::make_pair(r.action, QString()); + } + + return std::make_pair(UrlFilter::NotMatched, QString()); +} + +void AdBlockList::parseLine(const QString& line) +{ + QString parsedLine = line.trimmed(); + + if(parsedLine.isEmpty()) + return; + + if(parsedLine.startsWith(QLatin1Literal("!"))) { + const auto comment = parseComment(parsedLine); + + if(comment) { + const auto key = comment.value().first; + if(keys.contains(key)) + m_metadata[key] = comment.value().second; + } + + return; + } + + // css rule -> filterleaves cannot do element blocking + if(parsedLine.contains(QLatin1Literal("##")) || parsedLine.contains(QLatin1Literal("#@#"))) { + qDebug("TODO: %s", qUtf8Printable(parsedLine)); + return; + } + + Rule r; + r.action = UrlFilter::Block; + + // exception rules + if(parsedLine.startsWith(QLatin1Literal("@@"))) { + r.action = UrlFilter::Allow; + parsedLine.remove(0, 2); + } + + bool matchCase = false; + + // parse options + { + const int sepPos = parsedLine.indexOf(QLatin1Literal("$")); + if(sepPos != -1) { + const auto options = parsedLine.mid(sepPos + 1).split(QLatin1Literal(",")); + parsedLine = parsedLine.mid(0, sepPos); + + for(const QString &option : options) { + if(option.startsWith(QLatin1Literal("domain"))) { + const auto domainList = option.mid(7).split(QLatin1Literal("|")); + + for(const QString &domain : domainList) { + if(domain.startsWith(QLatin1Literal("~"))) { + r.disabledOn.append(domain.mid(1)); + } else { + r.enabledOn.append(domain); + } + } + } else if(option.endsWith(QLatin1Literal("match-case"))) { + matchCase = !option.startsWith(QLatin1Literal("~")); + + } else { + const auto pair = parseResourceOption(option); + if(pair) + r.options.insert(pair.value().first, pair.value().second); + } + } + } + } + + if(parsedLine.startsWith(QLatin1Literal("/")) && parsedLine.endsWith(QLatin1Literal("/"))) { + // regular expression rule + parsedLine = parsedLine.mid(1, parsedLine.length() - 2); + r.matcher = new ContentsMatcher<QRegularExpression>(parsedLine, UrlFilter::RegularExpressionMatch); + + } else if(parsedLine.startsWith(QLatin1Literal("||")) && parsedLine.endsWith(QLatin1Literal("^"))) { + parsedLine = parsedLine.mid(2, parsedLine.length() - 3); + r.matcher = new ContentsMatcher<QString>(parsedLine, UrlFilter::DomainMatch); + + } else if(parsedLine.startsWith(QLatin1Literal("|")) && parsedLine.endsWith(QLatin1Literal("|"))) { + // string equals rule + parsedLine = parsedLine.mid(1, parsedLine.length() - 2); + r.matcher = new ContentsMatcher<QStringMatcher>(parsedLine, UrlFilter::StringEquals); + + } else if(parsedLine.startsWith(QLatin1Literal("||"))) { + // string starts with rule + parsedLine = parsedLine.mid(2); + r.matcher = new ContentsMatcher<QStringMatcher>(parsedLine, UrlFilter::StringStartsWith); + + } else if(parsedLine.endsWith(QLatin1Literal("|"))) { + // string ends with rule + parsedLine.chop(1); + r.matcher = new ContentsMatcher<QStringMatcher>(parsedLine, UrlFilter::StringEndsWith); + + } else { + // generic contains rule + + // remove beginning and ending wildcards + if(parsedLine.startsWith(QLatin1Literal("*"))) + parsedLine = parsedLine.mid(1); + + if(parsedLine.endsWith(QLatin1Literal("*"))) + parsedLine.chop(1); + + if(parsedLine.contains(QLatin1Literal("*")) || parsedLine.contains(QLatin1Literal("^"))) { + // check for wildcards and translate to regexp + // wildcard "*" - any number of characters + // separator "^" - end, ? or / + parsedLine.replace(QLatin1Literal("||"), QLatin1Literal("^\\w+://")); + parsedLine.replace(QLatin1Literal("|"), QLatin1Literal("\\|")); + parsedLine.replace(QLatin1Literal("*"), QLatin1Literal(".*")); + parsedLine.replace(QLatin1Literal("^"), QLatin1Literal("($|\\?|\\/)")); + + r.matcher = new ContentsMatcher<QRegularExpression>(parsedLine, UrlFilter::RegularExpressionMatch); + + } else { + r.matcher = new ContentsMatcher<QStringMatcher>(parsedLine, UrlFilter::StringContains); + } + } + + r.matcher->setCaseSensitive(matchCase); + + Q_CHECK_PTR(r.matcher); + rules.emplace_back(std::move(r)); +} + |