diff options
author | Aqua-sama <aqua@iserlohn-fortress.net> | 2020-02-10 20:58:39 +0200 |
---|---|---|
committer | Aqua-sama <aqua@iserlohn-fortress.net> | 2020-05-30 11:01:35 +0300 |
commit | d1287f43964633035938f4f4d4133bb6d9da7b3e (patch) | |
tree | d09efa4074815c20be9bd6348203fe4336dfe716 /lib/urlfilter/adblock | |
parent | Fix segfault in release build (diff) | |
download | smolbote-d1287f43964633035938f4f4d4133bb6d9da7b3e.tar.xz |
staging: smolblok
smolblok is a replacement for the current lib/urlfilter AdBlockPlus
and hostlist format filter parser. It is a library that uses plugins to
provide support for different filter formats.
staging/adblock: AdBlockPlus parser plugin
plugins/smolblok_hostlist: hostlist format parser plugin
Headers will be installed to include/smolbote/
Remove lib/urlfilter
Diffstat (limited to 'lib/urlfilter/adblock')
-rw-r--r-- | lib/urlfilter/adblock/adblocklist.cpp | 188 | ||||
-rw-r--r-- | lib/urlfilter/adblock/adblocklist.h | 42 | ||||
-rw-r--r-- | lib/urlfilter/adblock/parser.cpp | 75 | ||||
-rw-r--r-- | lib/urlfilter/adblock/parser.h | 14 |
4 files changed, 0 insertions, 319 deletions
diff --git a/lib/urlfilter/adblock/adblocklist.cpp b/lib/urlfilter/adblock/adblocklist.cpp deleted file mode 100644 index 3be21bd..0000000 --- a/lib/urlfilter/adblock/adblocklist.cpp +++ /dev/null @@ -1,188 +0,0 @@ -/* - * This file is part of smolbote. It's copyrighted by the contributors recorded - * in the version control history of the file, available from its original - * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote - * - * SPDX-License-Identifier: GPL-3.0 - */ - -#include "adblocklist.h" -#include "parser.h" -#include <QIODevice> -#include <QTextStream> -#include <QDebug> - -AdBlockList::AdBlockList(QIODevice *device) -{ - Q_ASSERT(device->isOpen()); - - QTextStream list(device); - while (!list.atEnd()) { - parseLine(list.readLine()); - } - - qDebug() << m_metadata; -} - -AdBlockList::~AdBlockList() -{ - for(Rule &r : rules) { - delete r.matcher; - } -} - -QString AdBlockList::metadata(const QString& key) const -{ - return m_metadata.value(key); -} - -int AdBlockList::ruleCount() const -{ - return rules.size(); -} - -std::pair<UrlFilter::MatchResult, QString> AdBlockList::match(const QUrl& firstParty, const QUrl& requestUrl, QWebEngineUrlRequestInfo::ResourceType type) const -{ - const QString domain = firstParty.host(); - const QString request = requestUrl.toString(); - - for(const Rule &r : rules) { - // if there are options specified, but not the one we need - if(!r.options.isEmpty() && !r.options.contains(type)) - continue; - - if(r.disabledOn.contains(domain)) - continue; - - if(!r.enabledOn.isEmpty() && !r.enabledOn.contains(domain)) - continue; - - if(r.matcher->hasMatch(request)) - return std::make_pair(r.action, QString()); - } - - return std::make_pair(UrlFilter::NotMatched, QString()); -} - -void AdBlockList::parseLine(const QString& line) -{ - QString parsedLine = line.trimmed(); - - if(parsedLine.isEmpty()) - return; - - if(parsedLine.startsWith(QLatin1String("!"))) { - const auto comment = parseComment(parsedLine); - - if(comment) { - const auto key = comment.value().first; - if(keys.contains(key)) - m_metadata[key] = comment.value().second; - } - - return; - } - - // css rule -> filterleaves cannot do element blocking - if(parsedLine.contains(QLatin1String("##")) || parsedLine.contains(QLatin1String("#@#"))) { - qDebug("TODO: %s", qUtf8Printable(parsedLine)); - return; - } - - Rule r; - r.action = UrlFilter::Block; - - // exception rules - if(parsedLine.startsWith(QLatin1String("@@"))) { - r.action = UrlFilter::Allow; - parsedLine.remove(0, 2); - } - - bool matchCase = false; - - // parse options - { - const int sepPos = parsedLine.indexOf(QLatin1String("$")); - if(sepPos != -1) { - const auto options = parsedLine.mid(sepPos + 1).split(QLatin1String(",")); - parsedLine = parsedLine.mid(0, sepPos); - - for(const QString &option : options) { - if(option.startsWith(QLatin1String("domain"))) { - const auto domainList = option.mid(7).split(QLatin1String("|")); - - for(const QString &domain : domainList) { - if(domain.startsWith(QLatin1String("~"))) { - r.disabledOn.append(domain.mid(1)); - } else { - r.enabledOn.append(domain); - } - } - } else if(option.endsWith(QLatin1String("match-case"))) { - matchCase = !option.startsWith(QLatin1String("~")); - - } else { - const auto pair = parseResourceOption(option); - if(pair) - r.options.insert(pair.value().first, pair.value().second); - } - } - } - } - - if(parsedLine.startsWith(QLatin1String("/")) && parsedLine.endsWith(QLatin1String("/"))) { - // regular expression rule - parsedLine = parsedLine.mid(1, parsedLine.length() - 2); - r.matcher = new ContentsMatcher<QRegularExpression>(parsedLine, UrlFilter::RegularExpressionMatch); - - } else if(parsedLine.startsWith(QLatin1String("||")) && parsedLine.endsWith(QLatin1String("^"))) { - parsedLine = parsedLine.mid(2, parsedLine.length() - 3); - r.matcher = new ContentsMatcher<QString>(parsedLine, UrlFilter::DomainMatch); - - } else if(parsedLine.startsWith(QLatin1String("|")) && parsedLine.endsWith(QLatin1String("|"))) { - // string equals rule - parsedLine = parsedLine.mid(1, parsedLine.length() - 2); - r.matcher = new ContentsMatcher<QStringMatcher>(parsedLine, UrlFilter::StringEquals); - - } else if(parsedLine.startsWith(QLatin1String("||"))) { - // string starts with rule - parsedLine = parsedLine.mid(2); - r.matcher = new ContentsMatcher<QStringMatcher>(parsedLine, UrlFilter::StringStartsWith); - - } else if(parsedLine.endsWith(QLatin1String("|"))) { - // string ends with rule - parsedLine.chop(1); - r.matcher = new ContentsMatcher<QStringMatcher>(parsedLine, UrlFilter::StringEndsWith); - - } else { - // generic contains rule - - // remove beginning and ending wildcards - if(parsedLine.startsWith(QLatin1String("*"))) - parsedLine = parsedLine.mid(1); - - if(parsedLine.endsWith(QLatin1String("*"))) - parsedLine.chop(1); - - if(parsedLine.contains(QLatin1String("*")) || parsedLine.contains(QLatin1String("^"))) { - // check for wildcards and translate to regexp - // wildcard "*" - any number of characters - // separator "^" - end, ? or / - parsedLine.replace(QLatin1String("||"), QLatin1String("^\\w+://")); - parsedLine.replace(QLatin1String("|"), QLatin1String("\\|")); - parsedLine.replace(QLatin1String("*"), QLatin1String(".*")); - parsedLine.replace(QLatin1String("^"), QLatin1String("($|\\?|\\/)")); - - r.matcher = new ContentsMatcher<QRegularExpression>(parsedLine, UrlFilter::RegularExpressionMatch); - - } else { - r.matcher = new ContentsMatcher<QStringMatcher>(parsedLine, UrlFilter::StringContains); - } - } - - r.matcher->setCaseSensitive(matchCase); - - Q_CHECK_PTR(r.matcher); - rules.emplace_back(std::move(r)); -} - diff --git a/lib/urlfilter/adblock/adblocklist.h b/lib/urlfilter/adblock/adblocklist.h deleted file mode 100644 index ee41e11..0000000 --- a/lib/urlfilter/adblock/adblocklist.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * This file is part of smolbote. It's copyrighted by the contributors recorded - * in the version control history of the file, available from its original - * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote - * - * SPDX-License-Identifier: GPL-3.0 - */ - -#include "urlfilter.h" -#include "matcher.h" -#include <QHash> -#include <QWebEngineUrlRequestInfo> - -class QIODevice; -class AdBlockList : public UrlFilter -{ -public: - // TODO: check if all keys are listed - const QStringList keys = { "Version", "Title", "Last modified", "Expires", "Homepage", "Licence", "Redirect" }; - - AdBlockList(QIODevice *device); - ~AdBlockList(); - - QString metadata(const QString &key) const override; - int ruleCount() const; - std::pair<MatchResult, QString> match(const QUrl &firstParty, const QUrl &requestUrl, QWebEngineUrlRequestInfo::ResourceType type) const override; - -protected: - void parseLine(const QString &line); - -private: - QHash<QString, QString> m_metadata; - - struct Rule { - UrlFilter::MatchResult action = UrlFilter::NotMatched; - Matcher *matcher; - QStringList enabledOn, disabledOn; - QHash<QWebEngineUrlRequestInfo::ResourceType, bool> options; - }; - - std::vector<Rule> rules; -}; diff --git a/lib/urlfilter/adblock/parser.cpp b/lib/urlfilter/adblock/parser.cpp deleted file mode 100644 index 68f895d..0000000 --- a/lib/urlfilter/adblock/parser.cpp +++ /dev/null @@ -1,75 +0,0 @@ -/* - * This file is part of smolbote. It's copyrighted by the contributors recorded - * in the version control history of the file, available from its original - * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote - * - * SPDX-License-Identifier: GPL-3.0 - */ - -#include "parser.h" - -std::optional<std::pair<QString, QString>> parseComment(QString &line) -{ - const QLatin1String separator(": "); - if(line.contains(separator)) { - const QStringList comment = line.mid(1).split(QLatin1String(": ")); - return std::make_pair(comment.at(0).trimmed(), comment.at(1).trimmed()); - } else - return std::nullopt; -} - -std::optional<std::pair<QWebEngineUrlRequestInfo::ResourceType, bool>> parseResourceOption(const QString &option) -{ - const bool exception = !option.startsWith(QLatin1String("~")); - - if(option.endsWith(QLatin1String("script"))) { - // external scripts loaded via HTML script tag - return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeScript, exception); - - } else if(option.endsWith(QLatin1String("image"))) { - // regular images, typically loaded via HTML img tag - return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeImage, exception); - - } else if(option.endsWith(QLatin1String("stylesheet"))) { - // external CSS stylesheet files - return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeStylesheet, exception); - - } else if(option.endsWith(QLatin1String("object"))) { - // content handled by browser plugins, e.g. Flash or Java - return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeObject, exception); - - } else if(option.endsWith(QLatin1String("xmlhttprequest"))) { - // requests started using the XMLHttpRequest object or fetch() API - return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeXhr, exception); - - } else if(option.endsWith(QLatin1String("object-subrequest"))) { - // requests started by plugins like Flash - return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypePluginResource, exception); - - } else if(option.endsWith(QLatin1String("subdocument"))) { - // embedded pages, usually included via HTML frames - return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeSubFrame, exception); - - } else if(option.endsWith(QLatin1String("ping"))) { - // requests started by <a ping> or navigator.sendBeacon() - return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypePing, exception); - - } else if(option.endsWith(QLatin1String("websocket"))) { - // requests initiated via WebSocket object - qDebug("Resource type 'websocket' not available"); - - } else if(option.endsWith(QLatin1String("webrtc"))) { - // connections opened via RTCPeerConnection instances to ICE servers - qDebug("Resource type 'webrtc' not available"); - - } else if(option.endsWith(QLatin1String("document"))) { - // the page itself - return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeMainFrame, exception); - - } else if(option.endsWith(QLatin1String("other"))) { - return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeUnknown, exception); - } - - qDebug("TODO: %s", qUtf8Printable(option)); - return std::nullopt; -} diff --git a/lib/urlfilter/adblock/parser.h b/lib/urlfilter/adblock/parser.h deleted file mode 100644 index c73a9cf..0000000 --- a/lib/urlfilter/adblock/parser.h +++ /dev/null @@ -1,14 +0,0 @@ -/* - * This file is part of smolbote. It's copyrighted by the contributors recorded - * in the version control history of the file, available from its original - * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote - * - * SPDX-License-Identifier: GPL-3.0 - */ - -#include <QWebEngineUrlRequestInfo> -#include <optional> -#include <utility> - -std::optional<std::pair<QString, QString>> parseComment(QString &line); -std::optional<std::pair<QWebEngineUrlRequestInfo::ResourceType, bool>> parseResourceOption(const QString &option); |