diff options
author | Aqua-sama <aqua@iserlohn-fortress.net> | 2020-02-10 20:58:39 +0200 |
---|---|---|
committer | Aqua-sama <aqua@iserlohn-fortress.net> | 2020-05-30 11:01:35 +0300 |
commit | d1287f43964633035938f4f4d4133bb6d9da7b3e (patch) | |
tree | d09efa4074815c20be9bd6348203fe4336dfe716 | |
parent | Fix segfault in release build (diff) | |
download | smolbote-d1287f43964633035938f4f4d4133bb6d9da7b3e.tar.xz |
staging: smolblok
smolblok is a replacement for the current lib/urlfilter AdBlockPlus
and hostlist format filter parser. It is a library that uses plugins to
provide support for different filter formats.
staging/adblock: AdBlockPlus parser plugin
plugins/smolblok_hostlist: hostlist format parser plugin
Headers will be installed to include/smolbote/
Remove lib/urlfilter
64 files changed, 1576 insertions, 832 deletions
@@ -5,7 +5,9 @@ build* # qtcreator *.user -subprojects/ +subprojects/* +!subprojects/*.wrap + lang/*.qm tools/src/crashhandler/defaults.go .config @@ -13,5 +15,8 @@ tools/src/crashhandler/defaults.go compile_commands.json *.roff +# fuzzer files lib/configuration/test/corpus +plugins/*/corpus +!plugins/*/corpus/*.txt diff --git a/include/meson.build b/include/meson.build index 1cd9957..1372c86 100644 --- a/include/meson.build +++ b/include/meson.build @@ -1,6 +1,7 @@ plugininterfaces_include = include_directories('.') smolbote_include = include_directories('.') -install_headers('smolbote/filterinterface.hpp', 'smolbote/plugininterface.hpp', - subdir: 'smolbote') +install_headers('smolbote/plugininterface.hpp', 'smolbote/filterinterface.hpp', 'smolbote/session.hpp', + subdir: 'smolbote' +) diff --git a/include/session.hpp b/include/smolbote/session.hpp index 0146802..0146802 100644 --- a/include/session.hpp +++ b/include/smolbote/session.hpp diff --git a/lib/configuration/meson.build b/lib/configuration/meson.build index 0ef6f5f..5e3e4b2 100644 --- a/lib/configuration/meson.build +++ b/lib/configuration/meson.build @@ -16,6 +16,7 @@ if meson.get_compiler('cpp').has_multi_arguments('-g', '-fsanitize=fuzzer') executable('configuration-fuzzer', sources: 'configuration.cpp', cpp_args: [ '-g', '-fsanitize=fuzzer', '-DNO_QT_SPEC', '-DFUZZER' ], - link_args: [ '-fsanitize=fuzzer' ]) + link_args: [ '-fsanitize=fuzzer' ] # args: [ '-seed=1', '-max_total_time=24', meson.current_source_dir()/'test/corpus' ] +) endif diff --git a/lib/session_formats/session_json.hpp b/lib/session_formats/session_json.hpp index 3332229..142d9ef 100644 --- a/lib/session_formats/session_json.hpp +++ b/lib/session_formats/session_json.hpp @@ -10,7 +10,7 @@ #define SESSION_JSON_HPP #include <QJsonObject> -#include <session.hpp> +#include "smolbote/session.hpp" class JsonSession : public Session { diff --git a/lib/urlfilter/adblock/adblocklist.cpp b/lib/urlfilter/adblock/adblocklist.cpp deleted file mode 100644 index 3be21bd..0000000 --- a/lib/urlfilter/adblock/adblocklist.cpp +++ /dev/null @@ -1,188 +0,0 @@ -/* - * This file is part of smolbote. It's copyrighted by the contributors recorded - * in the version control history of the file, available from its original - * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote - * - * SPDX-License-Identifier: GPL-3.0 - */ - -#include "adblocklist.h" -#include "parser.h" -#include <QIODevice> -#include <QTextStream> -#include <QDebug> - -AdBlockList::AdBlockList(QIODevice *device) -{ - Q_ASSERT(device->isOpen()); - - QTextStream list(device); - while (!list.atEnd()) { - parseLine(list.readLine()); - } - - qDebug() << m_metadata; -} - -AdBlockList::~AdBlockList() -{ - for(Rule &r : rules) { - delete r.matcher; - } -} - -QString AdBlockList::metadata(const QString& key) const -{ - return m_metadata.value(key); -} - -int AdBlockList::ruleCount() const -{ - return rules.size(); -} - -std::pair<UrlFilter::MatchResult, QString> AdBlockList::match(const QUrl& firstParty, const QUrl& requestUrl, QWebEngineUrlRequestInfo::ResourceType type) const -{ - const QString domain = firstParty.host(); - const QString request = requestUrl.toString(); - - for(const Rule &r : rules) { - // if there are options specified, but not the one we need - if(!r.options.isEmpty() && !r.options.contains(type)) - continue; - - if(r.disabledOn.contains(domain)) - continue; - - if(!r.enabledOn.isEmpty() && !r.enabledOn.contains(domain)) - continue; - - if(r.matcher->hasMatch(request)) - return std::make_pair(r.action, QString()); - } - - return std::make_pair(UrlFilter::NotMatched, QString()); -} - -void AdBlockList::parseLine(const QString& line) -{ - QString parsedLine = line.trimmed(); - - if(parsedLine.isEmpty()) - return; - - if(parsedLine.startsWith(QLatin1String("!"))) { - const auto comment = parseComment(parsedLine); - - if(comment) { - const auto key = comment.value().first; - if(keys.contains(key)) - m_metadata[key] = comment.value().second; - } - - return; - } - - // css rule -> filterleaves cannot do element blocking - if(parsedLine.contains(QLatin1String("##")) || parsedLine.contains(QLatin1String("#@#"))) { - qDebug("TODO: %s", qUtf8Printable(parsedLine)); - return; - } - - Rule r; - r.action = UrlFilter::Block; - - // exception rules - if(parsedLine.startsWith(QLatin1String("@@"))) { - r.action = UrlFilter::Allow; - parsedLine.remove(0, 2); - } - - bool matchCase = false; - - // parse options - { - const int sepPos = parsedLine.indexOf(QLatin1String("$")); - if(sepPos != -1) { - const auto options = parsedLine.mid(sepPos + 1).split(QLatin1String(",")); - parsedLine = parsedLine.mid(0, sepPos); - - for(const QString &option : options) { - if(option.startsWith(QLatin1String("domain"))) { - const auto domainList = option.mid(7).split(QLatin1String("|")); - - for(const QString &domain : domainList) { - if(domain.startsWith(QLatin1String("~"))) { - r.disabledOn.append(domain.mid(1)); - } else { - r.enabledOn.append(domain); - } - } - } else if(option.endsWith(QLatin1String("match-case"))) { - matchCase = !option.startsWith(QLatin1String("~")); - - } else { - const auto pair = parseResourceOption(option); - if(pair) - r.options.insert(pair.value().first, pair.value().second); - } - } - } - } - - if(parsedLine.startsWith(QLatin1String("/")) && parsedLine.endsWith(QLatin1String("/"))) { - // regular expression rule - parsedLine = parsedLine.mid(1, parsedLine.length() - 2); - r.matcher = new ContentsMatcher<QRegularExpression>(parsedLine, UrlFilter::RegularExpressionMatch); - - } else if(parsedLine.startsWith(QLatin1String("||")) && parsedLine.endsWith(QLatin1String("^"))) { - parsedLine = parsedLine.mid(2, parsedLine.length() - 3); - r.matcher = new ContentsMatcher<QString>(parsedLine, UrlFilter::DomainMatch); - - } else if(parsedLine.startsWith(QLatin1String("|")) && parsedLine.endsWith(QLatin1String("|"))) { - // string equals rule - parsedLine = parsedLine.mid(1, parsedLine.length() - 2); - r.matcher = new ContentsMatcher<QStringMatcher>(parsedLine, UrlFilter::StringEquals); - - } else if(parsedLine.startsWith(QLatin1String("||"))) { - // string starts with rule - parsedLine = parsedLine.mid(2); - r.matcher = new ContentsMatcher<QStringMatcher>(parsedLine, UrlFilter::StringStartsWith); - - } else if(parsedLine.endsWith(QLatin1String("|"))) { - // string ends with rule - parsedLine.chop(1); - r.matcher = new ContentsMatcher<QStringMatcher>(parsedLine, UrlFilter::StringEndsWith); - - } else { - // generic contains rule - - // remove beginning and ending wildcards - if(parsedLine.startsWith(QLatin1String("*"))) - parsedLine = parsedLine.mid(1); - - if(parsedLine.endsWith(QLatin1String("*"))) - parsedLine.chop(1); - - if(parsedLine.contains(QLatin1String("*")) || parsedLine.contains(QLatin1String("^"))) { - // check for wildcards and translate to regexp - // wildcard "*" - any number of characters - // separator "^" - end, ? or / - parsedLine.replace(QLatin1String("||"), QLatin1String("^\\w+://")); - parsedLine.replace(QLatin1String("|"), QLatin1String("\\|")); - parsedLine.replace(QLatin1String("*"), QLatin1String(".*")); - parsedLine.replace(QLatin1String("^"), QLatin1String("($|\\?|\\/)")); - - r.matcher = new ContentsMatcher<QRegularExpression>(parsedLine, UrlFilter::RegularExpressionMatch); - - } else { - r.matcher = new ContentsMatcher<QStringMatcher>(parsedLine, UrlFilter::StringContains); - } - } - - r.matcher->setCaseSensitive(matchCase); - - Q_CHECK_PTR(r.matcher); - rules.emplace_back(std::move(r)); -} - diff --git a/lib/urlfilter/adblock/adblocklist.h b/lib/urlfilter/adblock/adblocklist.h deleted file mode 100644 index ee41e11..0000000 --- a/lib/urlfilter/adblock/adblocklist.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * This file is part of smolbote. It's copyrighted by the contributors recorded - * in the version control history of the file, available from its original - * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote - * - * SPDX-License-Identifier: GPL-3.0 - */ - -#include "urlfilter.h" -#include "matcher.h" -#include <QHash> -#include <QWebEngineUrlRequestInfo> - -class QIODevice; -class AdBlockList : public UrlFilter -{ -public: - // TODO: check if all keys are listed - const QStringList keys = { "Version", "Title", "Last modified", "Expires", "Homepage", "Licence", "Redirect" }; - - AdBlockList(QIODevice *device); - ~AdBlockList(); - - QString metadata(const QString &key) const override; - int ruleCount() const; - std::pair<MatchResult, QString> match(const QUrl &firstParty, const QUrl &requestUrl, QWebEngineUrlRequestInfo::ResourceType type) const override; - -protected: - void parseLine(const QString &line); - -private: - QHash<QString, QString> m_metadata; - - struct Rule { - UrlFilter::MatchResult action = UrlFilter::NotMatched; - Matcher *matcher; - QStringList enabledOn, disabledOn; - QHash<QWebEngineUrlRequestInfo::ResourceType, bool> options; - }; - - std::vector<Rule> rules; -}; diff --git a/lib/urlfilter/adblock/parser.cpp b/lib/urlfilter/adblock/parser.cpp deleted file mode 100644 index 68f895d..0000000 --- a/lib/urlfilter/adblock/parser.cpp +++ /dev/null @@ -1,75 +0,0 @@ -/* - * This file is part of smolbote. It's copyrighted by the contributors recorded - * in the version control history of the file, available from its original - * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote - * - * SPDX-License-Identifier: GPL-3.0 - */ - -#include "parser.h" - -std::optional<std::pair<QString, QString>> parseComment(QString &line) -{ - const QLatin1String separator(": "); - if(line.contains(separator)) { - const QStringList comment = line.mid(1).split(QLatin1String(": ")); - return std::make_pair(comment.at(0).trimmed(), comment.at(1).trimmed()); - } else - return std::nullopt; -} - -std::optional<std::pair<QWebEngineUrlRequestInfo::ResourceType, bool>> parseResourceOption(const QString &option) -{ - const bool exception = !option.startsWith(QLatin1String("~")); - - if(option.endsWith(QLatin1String("script"))) { - // external scripts loaded via HTML script tag - return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeScript, exception); - - } else if(option.endsWith(QLatin1String("image"))) { - // regular images, typically loaded via HTML img tag - return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeImage, exception); - - } else if(option.endsWith(QLatin1String("stylesheet"))) { - // external CSS stylesheet files - return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeStylesheet, exception); - - } else if(option.endsWith(QLatin1String("object"))) { - // content handled by browser plugins, e.g. Flash or Java - return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeObject, exception); - - } else if(option.endsWith(QLatin1String("xmlhttprequest"))) { - // requests started using the XMLHttpRequest object or fetch() API - return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeXhr, exception); - - } else if(option.endsWith(QLatin1String("object-subrequest"))) { - // requests started by plugins like Flash - return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypePluginResource, exception); - - } else if(option.endsWith(QLatin1String("subdocument"))) { - // embedded pages, usually included via HTML frames - return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeSubFrame, exception); - - } else if(option.endsWith(QLatin1String("ping"))) { - // requests started by <a ping> or navigator.sendBeacon() - return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypePing, exception); - - } else if(option.endsWith(QLatin1String("websocket"))) { - // requests initiated via WebSocket object - qDebug("Resource type 'websocket' not available"); - - } else if(option.endsWith(QLatin1String("webrtc"))) { - // connections opened via RTCPeerConnection instances to ICE servers - qDebug("Resource type 'webrtc' not available"); - - } else if(option.endsWith(QLatin1String("document"))) { - // the page itself - return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeMainFrame, exception); - - } else if(option.endsWith(QLatin1String("other"))) { - return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeUnknown, exception); - } - - qDebug("TODO: %s", qUtf8Printable(option)); - return std::nullopt; -} diff --git a/lib/urlfilter/adblock/parser.h b/lib/urlfilter/adblock/parser.h deleted file mode 100644 index c73a9cf..0000000 --- a/lib/urlfilter/adblock/parser.h +++ /dev/null @@ -1,14 +0,0 @@ -/* - * This file is part of smolbote. It's copyrighted by the contributors recorded - * in the version control history of the file, available from its original - * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote - * - * SPDX-License-Identifier: GPL-3.0 - */ - -#include <QWebEngineUrlRequestInfo> -#include <optional> -#include <utility> - -std::optional<std::pair<QString, QString>> parseComment(QString &line); -std::optional<std::pair<QWebEngineUrlRequestInfo::ResourceType, bool>> parseResourceOption(const QString &option); diff --git a/lib/urlfilter/hostlist/hostlist.cpp b/lib/urlfilter/hostlist/hostlist.cpp deleted file mode 100644 index bec79ea..0000000 --- a/lib/urlfilter/hostlist/hostlist.cpp +++ /dev/null @@ -1,79 +0,0 @@ -/* - * This file is part of smolbote. It's copyrighted by the contributors recorded - * in the version control history of the file, available from its original - * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote - * - * SPDX-License-Identifier: GPL-3.0 - */ - -#include "hostlist.h" -#include <QIODevice> -#include <QTextStream> -#include <QDebug> - -HostList::HostList(QIODevice *device) -{ - Q_ASSERT(device->isOpen()); - - QTextStream list(device); - while (!list.atEnd()) { - parseLine(list.readLine()); - } - - qDebug() << m_metadata; -} - -QString HostList::metadata(const QString& key) const -{ - return m_metadata.value(key); -} - -int HostList::ruleCount() const -{ - return rules.size(); -} - -std::pair<UrlFilter::MatchResult, QString> HostList::match(const QUrl& firstParty, const QUrl& requestUrl, QWebEngineUrlRequestInfo::ResourceType type) const -{ - Q_UNUSED(firstParty); - Q_UNUSED(type); - - const QString domain = requestUrl.host(); - const uint domainHash = qHash(domain); - - for(const Rule &r : rules) { - if(r.domainHash == domainHash) - return std::make_pair(r.action, r.redirect); - } - - return std::make_pair(UrlFilter::NotMatched, QString()); -} - -void HostList::parseLine(const QString& line) -{ - // check comment - if(line.startsWith(QLatin1String("#"))) - return; - - QString parsedLine = line.trimmed(); - - // malformed rule - if(!parsedLine.contains(QLatin1String(" "))) - return; - - const QStringList parts = parsedLine.split(QLatin1String(" ")); - const QString &redirect = parts.at(0); - const auto action = (redirect == QLatin1String("0.0.0.0")) ? UrlFilter::Block : UrlFilter::Redirect; - - for(int i = 1; i < parts.size(); i++) { - const QString &domain = parts.at(i); - Rule r; - r.action = action; - r.domainHash = qHash(domain); - if(action == UrlFilter::Redirect) - r.redirect = redirect; - - rules.emplace_back(std::move(r)); - } -} - diff --git a/lib/urlfilter/hostlist/hostlist.h b/lib/urlfilter/hostlist/hostlist.h deleted file mode 100644 index d4a8d87..0000000 --- a/lib/urlfilter/hostlist/hostlist.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - * This file is part of smolbote. It's copyrighted by the contributors recorded - * in the version control history of the file, available from its original - * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote - * - * SPDX-License-Identifier: GPL-3.0 - */ - -#ifndef SMOLBOTE_URLFILTER_HOSTLIST -#define SMOLBOTE_URLFILTER_HOSTLIST - -#include "urlfilter.h" -#include <QHash> -#include <vector> -#include <QWebEngineUrlRequestInfo> - -class QIODevice; -class HostList : public UrlFilter -{ -public: - - HostList(QIODevice *device); - ~HostList() = default; - - QString metadata(const QString &key) const override; - int ruleCount() const; - std::pair<MatchResult, QString> match(const QUrl &firstParty, const QUrl &requestUrl, QWebEngineUrlRequestInfo::ResourceType type) const override; - -protected: - void parseLine(const QString &line); - -private: - QHash<QString, QString> m_metadata; - - struct Rule { - UrlFilter::MatchResult action = UrlFilter::NotMatched; - uint domainHash; - QString redirect; - }; - - std::vector<Rule> rules; -}; - -#endif // SMOLBOTE_URLFILTER_HOSTLIST diff --git a/lib/urlfilter/matcher.h b/lib/urlfilter/matcher.h deleted file mode 100644 index 6696958..0000000 --- a/lib/urlfilter/matcher.h +++ /dev/null @@ -1,109 +0,0 @@ -/* - * This file is part of smolbote. It's copyrighted by the contributors recorded - * in the version control history of the file, available from its original - * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote - * - * SPDX-License-Identifier: GPL-3.0 - */ - -#ifndef SMOLBOTE_URLFILTER_MATCHER -#define SMOLBOTE_URLFILTER_MATCHER - -#include <QUrl> -#include <QString> -#include <utility> -#include <QRegularExpression> -#include <QStringMatcher> -#include <QWebEngineUrlRequestInfo> - -/** An interface class so we can use templated ContentsMatcher interchangeably - */ -class Matcher -{ -public: - virtual ~Matcher() = default; - - virtual void setCaseSensitive(bool matchCase) = 0; - virtual bool hasMatch(const QString &where) const = 0; -}; - -template <typename T> -class ContentsMatcher : public Matcher -{ -public: - ContentsMatcher(const QString &pattern, UrlFilter::MatchType type) - : patternLength(pattern.length()) - , matchType(type) - { - if constexpr(std::is_same_v<T, QRegularExpression>) { - matcher.setPatternOptions(matcher.patternOptions() | QRegularExpression::CaseInsensitiveOption); - matcher.setPattern(pattern); - } else if constexpr(std::is_same_v<T, QStringMatcher>) { - matcher.setCaseSensitivity(Qt::CaseInsensitive); - matcher.setPattern(pattern); - } else if constexpr(std::is_same_v<T, QString>) { - matcher = QUrl::fromUserInput(pattern).host(); - } - } - ~ContentsMatcher() = default; - - void setCaseSensitive(bool matchCase) override - { - if constexpr(std::is_same_v<T, QRegularExpression>) { - auto options = matcher.patternOptions(); - options.setFlag(QRegularExpression::CaseInsensitiveOption, !matchCase); - matcher.setPatternOptions(options); - - } else if constexpr(std::is_same_v<T, QStringMatcher>) { - matcher.setCaseSensitivity(matchCase ? Qt::CaseSensitive : Qt::CaseInsensitive); - } - } - - bool hasMatch(const QString &where) const override - { - if constexpr(std::is_same_v<T, QStringMatcher>) { - switch (matchType) { - case UrlFilter::InvalidMatch: - case UrlFilter::RegularExpressionMatch: - case UrlFilter::DomainMatch: - qWarning("ContentsMatcher is a String Matcher, but not doing string matching!"); - return false; - - case UrlFilter::StringContains: - return (matcher.indexIn(where) != -1); - - case UrlFilter::StringStartsWith: - return (matcher.indexIn(where) == 0); - - case UrlFilter::StringEndsWith: - return (matcher.indexIn(where) == where.length() - patternLength); - - case UrlFilter::StringEquals: - return (matcher.indexIn(where) == 0) && (patternLength == where.length()); - } - - } else if constexpr(std::is_same_v<T, QRegularExpression>) { - if(matchType != UrlFilter::RegularExpressionMatch) - qWarning("ContentsMatcher is a regular expression, but not doing a regular expression match!"); - return matcher.match(where).hasMatch(); - } else if constexpr(std::is_same_v<T, QString>) { - // TODO: fix - if(matchType == UrlFilter::DomainMatch) { -// qDebug("matching %s", qUtf8Printable(QUrl(where).host())); - return QUrl(where).host().endsWith(matcher); - } else - return matcher == where; - } - - qWarning("Matcher has no backend, returning false"); - return false; - } - -private: - const int patternLength; - const UrlFilter::MatchType matchType; - T matcher; -}; - -#endif // SMOLBOTE_URLFILTER_MATCHER - diff --git a/lib/urlfilter/meson.build b/lib/urlfilter/meson.build deleted file mode 100644 index 2591028..0000000 --- a/lib/urlfilter/meson.build +++ /dev/null @@ -1,26 +0,0 @@ -urlfilter_lib = static_library('urlfilter', - ['urlfilter.h', 'matcher.h', - 'hostlist/hostlist.cpp', 'hostlist/hostlist.h', - 'adblock/adblocklist.cpp', 'adblock/adblocklist.h', 'adblock/parser.cpp', 'adblock/parser.h'], - dependencies: dep_qt5 -) - -dep_urlfilter = declare_dependency( - include_directories: include_directories('.'), - link_with: urlfilter_lib -) - -if dep_gtest.found() - test('urlfilter: matcher', - executable('urlfilter-matcher', dependencies: [dep_qt5, dep_gtest, dep_urlfilter], sources: ['test/matcher.cpp']), - workdir: meson.current_source_dir() / 'test' - ) - test('urlfilter: host list', - executable('urlfilter-hostlist', dependencies: [dep_qt5, dep_gtest, dep_urlfilter], sources: ['test/hostlist.cpp']), - workdir: meson.current_source_dir() / 'test' - ) - test('urlfilter: adblock list', - executable('urlfilter-adblocklist', dependencies: [dep_qt5, dep_gtest, dep_urlfilter], sources: ['test/adblock.cpp']), - workdir: meson.current_source_dir() / 'test' - ) -endif diff --git a/lib/urlfilter/test/adblock.cpp b/lib/urlfilter/test/adblock.cpp deleted file mode 100644 index ecb94ee..0000000 --- a/lib/urlfilter/test/adblock.cpp +++ /dev/null @@ -1,88 +0,0 @@ -#include "urlfilter.h" -#include "adblock/adblocklist.h" -#include <gtest/gtest.h> -#include <QFile> - -AdBlockList *list = nullptr; - -TEST(AdBlockList, MetaData) { - EXPECT_STREQ(qUtf8Printable(list->metadata("Homepage")), "http://example.com/"); - EXPECT_STREQ(qUtf8Printable(list->metadata("Title")), "FooList"); - EXPECT_STREQ(qUtf8Printable(list->metadata("Expires")), "5 days"); - EXPECT_STREQ(qUtf8Printable(list->metadata("Redirect")), "http://example.com/list.txt"); - EXPECT_STREQ(qUtf8Printable(list->metadata("Version")), "1234"); -} - -TEST(AdBlockList, BasicFilter) { - // Rule: /banner/*/img^ - EXPECT_EQ(list->match(QUrl(), QUrl("http://example.com/banner/foo/img"), QWebEngineUrlRequestInfo::ResourceTypeImage).first, UrlFilter::Block); - EXPECT_EQ(list->match(QUrl(), QUrl("http://example.com/banner/foo/bar/img?param"), QWebEngineUrlRequestInfo::ResourceTypeImage).first, UrlFilter::Block); - EXPECT_EQ(list->match(QUrl(), QUrl("http://example.com/banner//img/foo"), QWebEngineUrlRequestInfo::ResourceTypeImage).first, UrlFilter::Block); - - EXPECT_EQ(list->match(QUrl(), QUrl("http://example.com/banner/foo.png"), QWebEngineUrlRequestInfo::ResourceTypeImage).first, UrlFilter::NotMatched); - EXPECT_EQ(list->match(QUrl(), QUrl("http://example.com/banner/img"), QWebEngineUrlRequestInfo::ResourceTypeImage).first, UrlFilter::NotMatched); - EXPECT_EQ(list->match(QUrl(), QUrl("http://example.com/banner/foo/imgraph"), QWebEngineUrlRequestInfo::ResourceTypeImage).first, UrlFilter::NotMatched); - EXPECT_EQ(list->match(QUrl(), QUrl("http://example.com/banner/foo/img.gif"), QWebEngineUrlRequestInfo::ResourceTypeImage).first, UrlFilter::NotMatched); - - EXPECT_EQ(list->match(QUrl(), QUrl("http://example.com/banner/ads/img.png"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::NotMatched); -} - -TEST(AdBlockList, MatchBeginningEnd) { - // Rule: |http://beginning-pattern.com - EXPECT_EQ(list->match(QUrl(), QUrl("http://beginning-pattern.com"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::Block); - EXPECT_EQ(list->match(QUrl(), QUrl("https://beginning-pattern.com"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::NotMatched); - // Rule: end-pattern| - EXPECT_EQ(list->match(QUrl(), QUrl("https://endpattern.com/end-pattern"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::Block); - EXPECT_EQ(list->match(QUrl(), QUrl("https://endpattern.com/end-pattern/foo"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::NotMatched); -} - -TEST(AdBlockList, Domain) { - // Rule: ||ads.example.com^ - EXPECT_EQ(list->match(QUrl(), QUrl("http://ads.example.com/foo.gif"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::Block); - EXPECT_EQ(list->match(QUrl(), QUrl("http://server1.ads.example.com/foo.gif"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::Block); - EXPECT_EQ(list->match(QUrl(), QUrl("https://ads.example.com:8000/"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::Block); - - EXPECT_EQ(list->match(QUrl(), QUrl("http://ads.example.com.ua/foo.gif"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::NotMatched); - EXPECT_EQ(list->match(QUrl(), QUrl("http://example.com/redirect/http://ads.example.com/"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::NotMatched); -} - -TEST(AdBlockList, RegularExpression) { - // Rule: /banner\d+/ - EXPECT_EQ(list->match(QUrl(), QUrl("http://example.com/banner123"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::Block); - EXPECT_EQ(list->match(QUrl(), QUrl("http://example.com/banner321"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::Block); - EXPECT_EQ(list->match(QUrl(), QUrl("http://example.com/banners"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::NotMatched); -} - -TEST(AdBlockList, MatchCase) { - // Rule: matchThisCase$match-case - EXPECT_EQ(list->match(QUrl(), QUrl("http://matchcase.com/matchThisCase"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::Block); - EXPECT_EQ(list->match(QUrl(), QUrl("http://matchcase.com/MatchThisCase"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::NotMatched); -} - -TEST(AdBlockList, DomainOption) { - // Rule: domain-limited-string$domain=example.com - EXPECT_EQ(list->match(QUrl("https://example.com"), QUrl("https://example.com/domain-limited-string/foo"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::Block); - EXPECT_EQ(list->match(QUrl("https://example.com"), QUrl("https://example.com/another-domain-string/foo"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::NotMatched); - EXPECT_EQ(list->match(QUrl("https://another.com"), QUrl("https://example.com/domain-limited-string/foo"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::NotMatched); - - //Rule: exception-limited-string$domain=~example.com - EXPECT_EQ(list->match(QUrl("https://another.com"), QUrl("https://example.com/exception-limited-string/foo"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::Block); - EXPECT_EQ(list->match(QUrl("https://example.com"), QUrl("https://example.com/exception-limited-string/foo"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::NotMatched); -} - -int main(int argc, char **argv) { - QFile f("adblock.txt"); - if(!f.open(QIODevice::ReadOnly | QIODevice::Text)) { - qDebug("Could not open list"); - return -1; - } - - list = new AdBlockList(&f); - f.close(); - - qDebug("Parsed %i rules", list->ruleCount()); - - testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} - diff --git a/lib/urlfilter/test/adblock.txt b/lib/urlfilter/test/adblock.txt deleted file mode 100644 index 635ce09..0000000 --- a/lib/urlfilter/test/adblock.txt +++ /dev/null @@ -1,26 +0,0 @@ -! Homepage: http://example.com/ -! Title: FooList -! Expires: 5 days -! Redirect: http://example.com/list.txt -! Version: 1234 - -/banner/*/img^ -||ads.example.com^ -|http://example.com/| -/banner\d+/ - -! match beginning -||http://beginning-pattern.com -! match end -end-pattern| - -! options -! match-case -matchThisCase$match-case - -! domain limiting -! only apply this filter on this domain -domain-limited-string$domain=example.com -! apply this filter to all domains but the listed one -exception-limited-string$domain=~example.com - diff --git a/lib/urlfilter/test/hostlist.cpp b/lib/urlfilter/test/hostlist.cpp deleted file mode 100644 index 041cd5f..0000000 --- a/lib/urlfilter/test/hostlist.cpp +++ /dev/null @@ -1,34 +0,0 @@ -#include <gtest/gtest.h> -#include "hostlist/hostlist.h" -#include <QFile> - -HostList *list = nullptr; - -TEST(AdBlockList, Block) { - EXPECT_EQ(list->match(QUrl(), QUrl::fromUserInput("blockeddomain.com"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::Block); - EXPECT_EQ(list->match(QUrl(), QUrl::fromUserInput("blockeddomain.first"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::Block); - EXPECT_EQ(list->match(QUrl(), QUrl::fromUserInput("blockeddomain.second"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::Block); - - const auto r = list->match(QUrl(), QUrl::fromUserInput("localhost.localdomain"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame); - EXPECT_EQ(r.first, UrlFilter::Redirect); - EXPECT_EQ(r.second, QString("127.0.0.1")); - - EXPECT_EQ(list->match(QUrl(), QUrl::fromUserInput("other.domain"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::NotMatched); -} - -int main(int argc, char **argv) { - QFile f("hostlist.txt"); - if(!f.open(QIODevice::ReadOnly | QIODevice::Text)) { - qDebug("Could not open list"); - return -1; - } - - list = new HostList(&f); - f.close(); - - qDebug("Parsed %i rules", list->ruleCount()); - - testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} - diff --git a/lib/urlfilter/test/matcher.cpp b/lib/urlfilter/test/matcher.cpp deleted file mode 100644 index 1c1efbf..0000000 --- a/lib/urlfilter/test/matcher.cpp +++ /dev/null @@ -1,42 +0,0 @@ -#include "urlfilter.h" -#include "matcher.h" -#include <gtest/gtest.h> - -TEST(Matcher, StringContains) { - ContentsMatcher<QStringMatcher> matcher("spam-pattern", UrlFilter::StringContains); - EXPECT_TRUE(matcher.hasMatch("this string contains a spam-pattern")); - EXPECT_FALSE(matcher.hasMatch("this string does not contain the pattern")); -} - -TEST(Matcher, StringStartsWith) { - ContentsMatcher<QStringMatcher> matcher("beginning", UrlFilter::StringStartsWith); - EXPECT_TRUE(matcher.hasMatch("beginning this string is the pattern")); - EXPECT_FALSE(matcher.hasMatch("ending this string is the pattern, the word beginning")); - EXPECT_FALSE(matcher.hasMatch("this would be a string where the pattern cannot be found")); -} - -TEST(Matcher, StringEndsWith) { - ContentsMatcher<QStringMatcher> matcher("ending", UrlFilter::StringEndsWith); - EXPECT_TRUE(matcher.hasMatch("this string has the proper ending")); - EXPECT_FALSE(matcher.hasMatch("and this string doesn't")); -} - -TEST(Matcher, StringEquals) { - ContentsMatcher<QStringMatcher> matcher("string-to-match", UrlFilter::StringEquals); - EXPECT_TRUE(matcher.hasMatch("string-to-match")); - EXPECT_FALSE(matcher.hasMatch("same-len-string")); - EXPECT_FALSE(matcher.hasMatch("not the string-to-match")); -} - -TEST(Matcher, RegularExpression) { - ContentsMatcher<QRegularExpression> matcher("banner\\d+", UrlFilter::RegularExpressionMatch); - EXPECT_TRUE(matcher.hasMatch("http://another.com/banner123")); - EXPECT_TRUE(matcher.hasMatch("http://another.com/banner321")); - EXPECT_FALSE(matcher.hasMatch("http://another.com/banners")); - -} - -int main(int argc, char **argv) { - testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/lib/urlfilter/urlfilter.h b/lib/urlfilter/urlfilter.h deleted file mode 100644 index e15122a..0000000 --- a/lib/urlfilter/urlfilter.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * This file is part of smolbote. It's copyrighted by the contributors recorded - * in the version control history of the file, available from its original - * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote - * - * SPDX-License-Identifier: GPL-3.0 - */ - -#include <QUrl> -#include <QString> -#include <utility> -#include <QWebEngineUrlRequestInfo> - -#ifndef SMOLBOTE_URLFILTER_FILTER -#define SMOLBOTE_URLFILTER_FILTER - -class UrlFilter -{ -public: - enum MatchResult { - NotMatched, - Allow, - Block, - Redirect - }; - - enum MatchType { - InvalidMatch, - RegularExpressionMatch, - StringContains, - StringStartsWith, - StringEndsWith, - StringEquals, - DomainMatch - }; - - virtual ~UrlFilter() = default; - - virtual QString metadata(const QString &key) const = 0; - virtual std::pair<MatchResult, QString> match(const QUrl &firstParty, const QUrl &requestUrl, QWebEngineUrlRequestInfo::ResourceType type) const = 0; -}; - -#endif // SMOLBOTE_URLFILTER_FILTER diff --git a/meson.build b/meson.build index 56e50e7..8290125 100644 --- a/meson.build +++ b/meson.build @@ -2,18 +2,24 @@ project('smolbote', ['cpp'], version: '0.1.0', default_options: ['cpp_std=c++2a', 'warning_level=3'], license: 'GPL3', - meson_version: '>=0.52.0' + meson_version: '>=0.53.0' ) +summary({ + 'prefix': get_option('prefix'), + 'bindir': get_option('bindir'), + 'libdir': get_option('libdir'), + 'datadir': get_option('datadir') +}, section: 'Install locations') + kconfig = import('unstable-kconfig') kconf = kconfig.load(host_machine.system() + '/.config') - cdata = configuration_data(kconf) version_h = vcs_tag( command: [find_program('git').path(), 'describe', '--long', '--abbrev=40'], #fallback: defaults to meson.project_version(), - input: 'include/version.h.in', + input: 'src/version.h.in', output: 'version.h' ) @@ -88,9 +94,6 @@ poi_cpp_args = [] dep_breakpad = dependency('breakpad-client', include_type: 'system', required: get_option('crashhandler')) dep_threads = dependency('threads', include_type: 'system', required: get_option('crashhandler')) -if dep_breakpad.found() - poi_cpp_args += '-DHAVE_BREAKPAD' -endif dep_gtest = dependency('gtest', required: false, disabler: true) dep_catch = dependency('catch2', required: true, fallback: ['catch2', 'catch2_dep'] ) @@ -98,16 +101,15 @@ dep_SingleApplication = dependency('singleapplication', fallback: [ 'singleappli dep_args = dependency('args.hxx', fallback: [ 'args', 'args_dep' ]) # Generate config header - poi_sourceset = sourceset.source_set() -subdir('include') # plugin interaces +subdir('include') +smolbote_interfaces = include_directories('include') subdir('lib/bookmarks') subdir('lib/configuration') subdir('lib/downloads') subdir('lib/pluginloader') -subdir('lib/urlfilter') subdir('lib/session_formats') subdir('src') @@ -118,6 +120,10 @@ subdir('tools') subdir('plugins/ProfileEditor') subdir('test/firefox-bookmarks-json-parser') +subdir('test/matcherbenchmark') + +subdir('staging/smolblok') +subdir('plugins/smolblok_hostlist') ssconfig = poi_sourceset.apply(cdata) @@ -125,7 +131,7 @@ poi_exe = executable(get_option('poi'), cpp_args: ['-DQAPPLICATION_CLASS=QApplication', poi_cpp_args], sources: [ssconfig.sources()], include_directories: [ plugininterfaces_include, include_directories('src') ], - dependencies: [ dep_qt5, dep_spdlog, dep_SingleApplication, dep_args, optional_deps, dep_bookmarks, dep_configuration, dep_downloads, dep_pluginloader, dep_urlfilter, ssconfig.dependencies(), lib_session_formats ], + dependencies: [ dep_qt5, dep_spdlog, dep_SingleApplication, dep_args, optional_deps, dep_bookmarks, dep_configuration, dep_downloads, dep_pluginloader, ssconfig.dependencies(), lib_session_formats ], install: true, ) diff --git a/plugins/smolblok_hostlist/corpus/apple.txt b/plugins/smolblok_hostlist/corpus/apple.txt new file mode 100644 index 0000000..3a8973b --- /dev/null +++ b/plugins/smolblok_hostlist/corpus/apple.txt @@ -0,0 +1 @@ +127.0.0.1 localhost.localdomain diff --git a/plugins/smolblok_hostlist/corpus/banana.txt b/plugins/smolblok_hostlist/corpus/banana.txt new file mode 100644 index 0000000..c30aa84 --- /dev/null +++ b/plugins/smolblok_hostlist/corpus/banana.txt @@ -0,0 +1 @@ +0.0.0.0 blockeddomain.com diff --git a/plugins/smolblok_hostlist/corpus/kiwi.txt b/plugins/smolblok_hostlist/corpus/kiwi.txt new file mode 100644 index 0000000..77c325c --- /dev/null +++ b/plugins/smolblok_hostlist/corpus/kiwi.txt @@ -0,0 +1 @@ +# This is a comment, and after it comes a blank line diff --git a/plugins/smolblok_hostlist/corpus/orange.txt b/plugins/smolblok_hostlist/corpus/orange.txt new file mode 100644 index 0000000..583273d --- /dev/null +++ b/plugins/smolblok_hostlist/corpus/orange.txt @@ -0,0 +1 @@ +0.0.0.0 blockeddomain.first blockeddomain.second diff --git a/plugins/smolblok_hostlist/filterlist.cpp b/plugins/smolblok_hostlist/filterlist.cpp new file mode 100644 index 0000000..a0fd414 --- /dev/null +++ b/plugins/smolblok_hostlist/filterlist.cpp @@ -0,0 +1,58 @@ +/* + * This file is part of smolbote. It's copyrighted by the contributors recorded + * in the version control history of the file, available from its original + * location: https://library.iserlohn-fortress.net/aqua/smolbote.git + * + * SPDX-License-Identifier: GPL-3.0 + */ + +#include "filterlist.h" +#include <QIODevice> +#include <QTextStream> + +using namespace Hostlist; + +#ifdef FUZZER +extern "C" int LLVMFuzzerTestOneInput(const char *Data, long long Size) +{ + Filterlist::parseRule(QString::fromLatin1(Data, Size)); + return 0; +} +#endif + +std::map<Filterlist::DomainHash, Filterlist::Rule> Filterlist::parseRule(const QString &line) +{ + if(line.isEmpty() || line.at(0) == '#') { + return {}; + } + + auto parts = line.trimmed().split(' '); + if(parts.size() < 2) { + return {}; + } + + const auto redirect = (parts[0] == "0.0.0.0") ? QString() : parts[0]; + + std::map<DomainHash, Rule> r; + for(int i = 1; i < parts.size(); ++i) { + r.emplace(qHash(parts[i], 0), Filterlist::Rule{ parts[i], redirect }); + } + return r; +} + +bool Filterlist::load(QIODevice &from) +{ + if(!from.isReadable() || !from.isTextModeEnabled()) { + return false; + } + + while(from.bytesAvailable() > 0) { + const auto line = from.readLine(512).trimmed(); + auto r = parseRule(line); + if(!r.empty()) { + qDebug("merging in %lu rules", r.size()); + rules.merge(r); + } + } + return true; +} diff --git a/plugins/smolblok_hostlist/filterlist.h b/plugins/smolblok_hostlist/filterlist.h new file mode 100644 index 0000000..7301f20 --- /dev/null +++ b/plugins/smolblok_hostlist/filterlist.h @@ -0,0 +1,58 @@ +/* + * This file is part of smolbote. It's copyrighted by the contributors recorded + * in the version control history of the file, available from its original + * location: https://library.iserlohn-fortress.net/aqua/smolbote.git + * + * SPDX-License-Identifier: GPL-3.0 + */ + +#pragma once + +#include <map> +#include <smolbote/filterinterface.hpp> + +namespace Hostlist +{ + +class Filterlist final : public FilterList +{ +public: + typedef uint DomainHash; + struct Rule { + QString domain; + QString redirect; + }; + + Filterlist() = default; + ~Filterlist() = default; + + [[nodiscard]] bool findMatch(const QString &domain) const + { + const auto hash = qHash(domain, 0); + const auto found = rules.find(hash); + if(found != rules.end()) { + return true; + } + return false; + } + int count() const + { + return rules.size(); + } + + [[nodiscard]] bool filter(QWebEngineUrlRequestInfo &info) const + { + return false; + } + [[nodiscard]] bool isUpToDate() const + { + return true; + } + + bool load(QIODevice &device); + [[nodiscard]] static std::map<DomainHash, Rule> parseRule(const QString &line); + +private: + std::map<DomainHash, Rule> rules; +}; +} // namespace Hostlist diff --git a/plugins/smolblok_hostlist/meson.build b/plugins/smolblok_hostlist/meson.build new file mode 100644 index 0000000..c9ff303 --- /dev/null +++ b/plugins/smolblok_hostlist/meson.build @@ -0,0 +1,50 @@ +lib_hostlistfilter = static_library('hostlistfilter', + [ 'filterlist.cpp' ], + include_directories: smolbote_interfaces, + dependencies: [dep_qt5] +) + +dep_hostlistfilter = declare_dependency( + include_directories: [ '.', smolbote_interfaces ], + link_with: lib_hostlistfilter +) + +# plugin +plugin = shared_library('smolblokHostlistPlugin', + [ 'plugin/plugin.cpp', + mod_qt5.preprocess(include_directories: smolbote_interfaces, moc_headers: 'plugin/plugin.h', dependencies: dep_qt5) ], + include_directories: smolbote_interfaces, + dependencies: [ dep_hostlistfilter, dep_qt5 ], + install: true, + install_dir: get_option('libdir')/'smolbote/plugins' +) + +# tests +test('rule', executable('rule', sources: 'test/rule.cpp', dependencies: [dep_qt5, dep_catch, dep_hostlistfilter]), suite: 'hostlist') + +test('filterlist', executable('filterlist', + sources: 'test/filterlist.cpp', + dependencies: [dep_qt5, dep_catch, dep_hostlistfilter]), + env: 'HOSTLIST_TXT='+meson.current_source_dir()/'test/hostlist.txt', + suite: 'hostlist' +) +test('plugin', executable('filterlist-plugin', + sources: [ 'test/plugin.cpp', 'plugin/plugin.cpp', + mod_qt5.preprocess(include_directories: smolbote_interfaces, moc_headers: 'plugin/plugin.h', dependencies: dep_qt5) ], + dependencies: [dep_qt5, dep_catch, dep_hostlistfilter]), + env: 'HOSTLIST_TXT='+meson.current_source_dir()/'test/hostlist.txt', + suite: 'hostlist' +) + +test('smolblok-load', smolblok_load, workdir: meson.build_root(), args: plugin.full_path(), suite: 'hostlist') + +# fuzzer +if meson.get_compiler('cpp').has_multi_arguments('-g', '-fsanitize=fuzzer') +executable('hostlist-fuzzer', + sources: 'filterlist.cpp', + include_directories: smolbote_interfaces, + dependencies: dep_qt5, + cpp_args: [ '-g', '-fsanitize=fuzzer', '-DFUZZER' ], + link_args: [ '-fsanitize=fuzzer' ] +) +endif diff --git a/plugins/smolblok_hostlist/plugin/plugin.cpp b/plugins/smolblok_hostlist/plugin/plugin.cpp new file mode 100644 index 0000000..28a7706 --- /dev/null +++ b/plugins/smolblok_hostlist/plugin/plugin.cpp @@ -0,0 +1,32 @@ +/* + * This file is part of smolbote. It's copyrighted by the contributors recorded + * in the version control history of the file, available from its original + * location: https://library.iserlohn-fortress.net/aqua/smolbote.git + * + * SPDX-License-Identifier: GPL-3.0 + */ + +#include "plugin.h" +#include "filterlist.h" + +FilterList* HostlistFilterPlugin::load(QIODevice &from) const +{ + if(!from.isOpen()) + return nullptr; + + auto *list = new Hostlist::Filterlist; + return list; +} + +bool HostlistFilterPlugin::parse(FilterList *list, QIODevice &from) const +{ + if(list == nullptr || !from.isOpen()) { + return false; + } + auto *l = dynamic_cast<Hostlist::Filterlist*>(list); + if(l == nullptr) { + return false; + } + return l->load(from); +} + diff --git a/plugins/smolblok_hostlist/plugin/plugin.h b/plugins/smolblok_hostlist/plugin/plugin.h new file mode 100644 index 0000000..53b5d36 --- /dev/null +++ b/plugins/smolblok_hostlist/plugin/plugin.h @@ -0,0 +1,28 @@ +/* + * This file is part of smolbote. It's copyrighted by the contributors recorded + * in the version control history of the file, available from its original + * location: https://library.iserlohn-fortress.net/aqua/smolbote.git + * + * SPDX-License-Identifier: GPL-3.0 + */ + +#ifndef SMOLBLOK_FILTER_PLUGIN_H +#define SMOLBLOK_FILTER_PLUGIN_H + +#include <smolbote/filterinterface.hpp> + +class HostlistFilterPlugin : public QObject, public FilterPlugin +{ + Q_OBJECT + Q_PLUGIN_METADATA(IID FilterPluginIid FILE "smolblokHostlistPlugin.json") + Q_INTERFACES(FilterPlugin) + +public: + ~HostlistFilterPlugin() = default; + + FilterList *load(QIODevice &from) const override; + bool parse(FilterList *list, QIODevice &from) const override; +}; + +#endif // SMOLBLOK_FILTER_PLUGIN_H + diff --git a/plugins/smolblok_hostlist/plugin/smolblokHostlistPlugin.json b/plugins/smolblok_hostlist/plugin/smolblokHostlistPlugin.json new file mode 100644 index 0000000..aa53cdd --- /dev/null +++ b/plugins/smolblok_hostlist/plugin/smolblokHostlistPlugin.json @@ -0,0 +1,4 @@ +{ + "name": "smolblok Hostlist filter plugin", + "author": "Aqua <aqua@iserlohn-fortress.net>" +} diff --git a/plugins/smolblok_hostlist/test/filterlist.cpp b/plugins/smolblok_hostlist/test/filterlist.cpp new file mode 100644 index 0000000..4aa532b --- /dev/null +++ b/plugins/smolblok_hostlist/test/filterlist.cpp @@ -0,0 +1,29 @@ +#define CATCH_CONFIG_MAIN +#include "filterlist.h" +#include <QFile> +#include <catch2/catch.hpp> + +using namespace Hostlist; + +TEST_CASE("Hostlist") +{ + Filterlist list; + + const QString filename(qgetenv("HOSTLIST_TXT")); + REQUIRE(!filename.isEmpty()); + + QFile f(filename); + REQUIRE(f.open(QIODevice::ReadOnly | QIODevice::Text)); + + REQUIRE(list.load(f)); + f.close(); + + REQUIRE(list.count() == 4); + + REQUIRE(list.findMatch("blockeddomain.first")); + REQUIRE(list.findMatch("blockeddomain.second")); + + REQUIRE(list.findMatch("localhost.localdomain")); + + REQUIRE(!list.findMatch("other.domain")); +} diff --git a/lib/urlfilter/test/hostlist.txt b/plugins/smolblok_hostlist/test/hostlist.txt index a0b4e5c..a0b4e5c 100644 --- a/lib/urlfilter/test/hostlist.txt +++ b/plugins/smolblok_hostlist/test/hostlist.txt diff --git a/plugins/smolblok_hostlist/test/plugin.cpp b/plugins/smolblok_hostlist/test/plugin.cpp new file mode 100644 index 0000000..fad34f2 --- /dev/null +++ b/plugins/smolblok_hostlist/test/plugin.cpp @@ -0,0 +1,27 @@ +#define CATCH_CONFIG_MAIN +#include "plugin/plugin.h" +#include <QFile> +#include <catch2/catch.hpp> + +TEST_CASE("Hostlist") +{ + HostlistFilterPlugin plugin; + + const QString filename(qgetenv("HOSTLIST_TXT")); + REQUIRE(!filename.isEmpty()); + QFile f(filename); + + // shouldn't be able to load an unopened QIODevice + REQUIRE(plugin.load(f) == nullptr); + + REQUIRE(f.open(QIODevice::ReadOnly | QIODevice::Text)); + + auto *list = plugin.load(f); + REQUIRE(list != nullptr); + f.seek(0); + REQUIRE_FALSE(plugin.parse(nullptr, f)); + REQUIRE(plugin.parse(list, f)); + f.close(); + REQUIRE_FALSE(plugin.parse(list, f)); +} + diff --git a/plugins/smolblok_hostlist/test/rule.cpp b/plugins/smolblok_hostlist/test/rule.cpp new file mode 100644 index 0000000..b5ba6e0 --- /dev/null +++ b/plugins/smolblok_hostlist/test/rule.cpp @@ -0,0 +1,57 @@ +#define CATCH_CONFIG_MAIN +#include "filterlist.h" +#include <catch2/catch.hpp> + +using namespace Hostlist; + +SCENARIO("Hostlist::Rule") +{ + GIVEN("an invalid rule") + { + const auto rule = Filterlist::parseRule("0.0.0.0 "); + REQUIRE(rule.empty()); + } + GIVEN("127.0.0.1 localhost.localdomain") + { + auto rule = Filterlist::parseRule("127.0.0.1 localhost.localdomain"); + + REQUIRE(!rule.empty()); + REQUIRE(rule.size() == 1); + + // note: you need to force it to hash a string, rather than the address itself + const auto index = qHash(QString("localhost.localdomain"), 0); + REQUIRE(rule[index].domain == "localhost.localdomain"); + REQUIRE(rule[index].redirect == "127.0.0.1"); + } + + GIVEN("0.0.0.0 blockeddomain.com") + { + auto rule = Filterlist::parseRule("0.0.0.0 blockeddomain.com"); + + REQUIRE(!rule.empty()); + REQUIRE(rule.size() == 1); + + const auto index = qHash(QString("blockeddomain.com"), 0); + REQUIRE(rule[index].domain == "blockeddomain.com"); + REQUIRE(rule[index].redirect.isEmpty()); + ; + } + + GIVEN("0.0.0.0 blockeddomain.first blockeddomain.second") + { + auto rule = Filterlist::parseRule("0.0.0.0 blockeddomain.first blockeddomain.second"); + + REQUIRE(!rule.empty()); + REQUIRE(rule.size() == 2); + { + const auto index = qHash(QString("blockeddomain.first"), 0); + REQUIRE(rule[index].domain == "blockeddomain.first"); + REQUIRE(rule[index].redirect.isEmpty()); + } + { + const auto index = qHash(QString("blockeddomain.second"), 0); + REQUIRE(rule[index].domain == "blockeddomain.second"); + REQUIRE(rule[index].redirect.isEmpty()); + } + } +} diff --git a/src/browser.cpp b/src/browser.cpp index f748e2f..ff948e4 100644 --- a/src/browser.cpp +++ b/src/browser.cpp @@ -9,12 +9,10 @@ #include "browser.h" #include "aboutdialog.h" #include "aboutplugin.h" -#include "adblock/adblocklist.h" #include "applicationmenu.h" #include "bookmarks/bookmarkswidget.h" #include "configuration.h" #include "downloadswidget.h" -#include "hostlist/hostlist.h" #include "mainwindow/addressbar.h" #include "mainwindow/mainwindow.h" #include "mainwindow/menubar.h" diff --git a/src/browser.h b/src/browser.h index 5b4785b..0a0b201 100644 --- a/src/browser.h +++ b/src/browser.h @@ -9,8 +9,8 @@ #ifndef SMOLBOTE_BROWSER_H #define SMOLBOTE_BROWSER_H -#include "session.hpp" #include "webengine/webprofilemanager.h" +#include "smolbote/session.hpp" #include <QJsonObject> #include <QMenu> #include <QPluginLoader> diff --git a/src/builtins.h b/src/builtins.h index cdf6642..9fdca98 100644 --- a/src/builtins.h +++ b/src/builtins.h @@ -9,6 +9,7 @@ #ifndef SMOLBOTE_BUILTINS_H #define SMOLBOTE_BUILTINS_H +#include "smolbote/plugininterface.hpp" #include <args.hxx> typedef std::function<int(const std::string &, std::vector<std::string>::const_iterator, std::vector<std::string>::const_iterator)> subcommand_func; diff --git a/src/mainwindow/mainwindow.h b/src/mainwindow/mainwindow.h index c5d573e..5026ae7 100644 --- a/src/mainwindow/mainwindow.h +++ b/src/mainwindow/mainwindow.h @@ -9,7 +9,7 @@ #ifndef SMOLBOTE_MAINWINDOW_H #define SMOLBOTE_MAINWINDOW_H -#include "session.hpp" +#include "smolbote/session.hpp" #include "subwindow/subwindow.h" #include <QJsonArray> #include <QJsonObject> diff --git a/src/session/sessiondialog.h b/src/session/sessiondialog.h index 0a04940..ecbfe46 100644 --- a/src/session/sessiondialog.h +++ b/src/session/sessiondialog.h @@ -9,7 +9,7 @@ #ifndef SMOLBOTE_SESSIONDIALOG_H #define SMOLBOTE_SESSIONDIALOG_H -#include "session.hpp" +#include "smolbote/session.hpp" #include <QDialog> namespace Ui diff --git a/src/subwindow/subwindow.h b/src/subwindow/subwindow.h index 68f7cf9..80e8520 100644 --- a/src/subwindow/subwindow.h +++ b/src/subwindow/subwindow.h @@ -9,7 +9,7 @@ #ifndef SMOLBOTE_SUBWINDOW_H #define SMOLBOTE_SUBWINDOW_H -#include "session.hpp" +#include "smolbote/session.hpp" #include "tabwidget.h" #include "webengine/webview.h" #include <QMenu> diff --git a/include/version.h.in b/src/version.h.in index 44f10d9..44f10d9 100644 --- a/include/version.h.in +++ b/src/version.h.in diff --git a/src/webengine/webview.h b/src/webengine/webview.h index 34c77bf..538ffa9 100644 --- a/src/webengine/webview.h +++ b/src/webengine/webview.h @@ -9,10 +9,10 @@ #ifndef SMOLBOTE_WEBVIEW_H #define SMOLBOTE_WEBVIEW_H +#include "smolbote/session.hpp" #include "webpage.h" #include <QWebEngineView> #include <functional> -#include <session.hpp> class WebProfile; class WebViewContextMenu; diff --git a/staging/adblock/filterlist.cpp b/staging/adblock/filterlist.cpp new file mode 100644 index 0000000..1846ff6 --- /dev/null +++ b/staging/adblock/filterlist.cpp @@ -0,0 +1,128 @@ +/* + * This file is part of smolbote. It's copyrighted by the contributors recorded + * in the version control history of the file, available from its original + * location: https://library.iserlohn-fortress.net/aqua/smolbote.git + * + * SPDX-License-Identifier: GPL-3.0 + */ + +#include "filterlist.h" +#include <QByteArray> +#include <QIODevice> + +/** + * Documentation: + * + * https://adblockplus.org/filter-cheatsheet + * https://help.eyeo.com/adblockplus/how-to-write-filters + * + * https://github.com/gorhill/uBlock/wiki/Introduction-to-basic-filtering-syntax + * https://github.com/gorhill/uBlock/wiki/Static-filter-syntax + * + */ + +const QLatin1String comment_lastModified("! Last modified: "); +const QLatin1String comment_expires("! Expires: "); + +using namespace AdblockPlus; + +FilterList::FilterList(QIODevice &from) +{ + if(from.isReadable() && from.isTextModeEnabled()) { + while(from.bytesAvailable() > 0) { + const auto line = from.readLine(512); + + if(line[0] == '!') { + parseComment(line); + + } else if(line.contains("##") || line.contains("#@#")) { + // ## is element hiding rule + // #@# is element hiding exception rule + + } else { + parseRule(line); + } + } + } +} + +void FilterList::parseComment(const QString &line) +{ + if(line.startsWith(comment_lastModified)) { + lastModified = QDateTime::fromString(line.mid(comment_lastModified.size()), "dd MMM yyyy HH:mm 'UTC'"); + expires = lastModified; + + } else if(line.startsWith(comment_expires)) { + const QRegularExpression time_re("(?:(\\d+) days)|(?:(\\d+) hours)"); + const auto match = time_re.match(line); + if(match.hasMatch()) { + expires = expires.addDays(match.captured(1).toInt()); + expires = expires.addSecs(match.captured(2).toInt() * 60 * 60); + } + } +} + +Rule *FilterList::parseRule(const QByteArray &line) +{ + QString pattern = line; + Options opt; + + if(pattern.startsWith(QLatin1String("@@"))) { + pattern.remove(0, 2); + opt.exception = true; + } + + // parse options + if(pattern.contains('$')) { + const auto list = pattern.split('$'); + pattern = list.at(0); + const auto options = list.at(1); + + if(!opt.parseAbp(&options)) { + return nullptr; + } + } + + if(pattern.startsWith("||") && pattern.endsWith("^")) { + // domain match + pattern = pattern.mid(2, pattern.length() - 3); + return new MatcherRule(pattern, opt, MatcherRule::DomainMatch); + + } else if(pattern.startsWith("|") && pattern.endsWith("|")) { + // string equals + pattern = pattern.mid(1, pattern.length() - 2); + return new MatcherRule(pattern, opt, MatcherRule::UrlEquals); + + } else if(pattern.startsWith("|")) { + // string starts with + pattern = pattern.mid(1, pattern.length() - 1); + return new MatcherRule(pattern, opt, MatcherRule::UrlStartsWith); + + } else if(pattern.endsWith("|")) { + // string ends with + pattern = pattern.mid(0, pattern.length() - 1); + return new MatcherRule(pattern, opt, MatcherRule::UrlEndsWith); + + } else if(pattern.startsWith("/") && pattern.endsWith("/")) { + // regular expression + pattern = pattern.mid(1, pattern.length() - 2); + return new RegexRule(pattern, opt); + + } else if(!pattern.isEmpty()) { + if(pattern.contains('*')) { + // wildcard pattern + pattern = QRegularExpression::wildcardToRegularExpression(pattern); + return new RegexRule(pattern, opt); + } else { + // contains pattern + return new MatcherRule(pattern, opt); + } + } + + return nullptr; +} + +bool FilterList::filter(QWebEngineUrlRequestInfo &info) const +{ + return false; +} diff --git a/staging/adblock/filterlist.h b/staging/adblock/filterlist.h new file mode 100644 index 0000000..24464c8 --- /dev/null +++ b/staging/adblock/filterlist.h @@ -0,0 +1,54 @@ +/* + * This file is part of smolbote. It's copyrighted by the contributors recorded + * in the version control history of the file, available from its original + * location: https://library.iserlohn-fortress.net/aqua/smolbote.git + * + * SPDX-License-Identifier: GPL-3.0 + */ + +#include "rule.h" +#include <QDateTime> +#include <QObject> +#include <QString> +#include <QStringList> +#include <QVector> +#include <smolbote/filterinterface.hpp> + +namespace AdblockPlus +{ +class FilterList : public Filter +{ +public: + explicit FilterList(QIODevice &from); + ~FilterList() + { + qDeleteAll(m_rules); + } + + bool filter(QWebEngineUrlRequestInfo &info) const override; + bool isUpToDate() const override + { + const auto current = QDateTime::currentDateTime(); + return expires > current; + } + + QDateTime modified() const + { + return lastModified; + } + QDateTime expiresOn() const + { + return expires; + } + + [[nodiscard]] static Rule *parseRule(const QByteArray &line); + +private: + void parseComment(const QString &line); + + QDateTime lastModified; + QDateTime expires; + QVector<AdblockPlus::Rule *> m_rules; +}; + +} // namespace AdblockPlus diff --git a/staging/adblock/meson.build b/staging/adblock/meson.build new file mode 100644 index 0000000..942f325 --- /dev/null +++ b/staging/adblock/meson.build @@ -0,0 +1,38 @@ +lib_adblockfilter = static_library('adblockfilter', + [ 'filterlist.cpp', 'options.cpp' ], + include_directories: smolbote_interfaces, + dependencies: [ dep_qt5 ] +) + +dep_adblockfilter = declare_dependency( + include_directories: ['.', smolbote_interfaces], + link_with: lib_adblockfilter +) + +#AdblockPlusFilterPlugin = shared_library('AdblockPlusPlugin', +# [ 'plugin/plugin.cpp', +# mod_qt5.preprocess(include_directories: smolbote_interfaces, +# moc_headers: 'plugin/plugin.h', dependencies: dep_qt5) +# ], +# include_directories: smolbote_interfaces, +# link_with: lib_adblockfilter, +# dependencies: dep_qt5, +# install: true, +# install_dir: get_option('libdir')/'smolbote/plugins' +#) + +test('adblock: rule', executable('libadblockfilter_rule', + sources: 'test/rule.cpp', + dependencies: [ dep_qt5, dep_catch, dep_adblockfilter ] +)) + +test('adblock: options', executable('libadblockfilter_options', + sources: 'test/options.cpp', + dependencies: [ dep_qt5, dep_catch, dep_adblockfilter ] +)) + +test('adblock: filterlist', executable('libadblockfilter_filterlist', + sources: 'test/filterlist.cpp', + dependencies: [ dep_qt5, dep_catch, dep_adblockfilter ] +)) + diff --git a/staging/adblock/options.cpp b/staging/adblock/options.cpp new file mode 100644 index 0000000..08f30ee --- /dev/null +++ b/staging/adblock/options.cpp @@ -0,0 +1,94 @@ +/* + * This file is part of smolbote. It's copyrighted by the contributors recorded + * in the version control history of the file, available from its original + * location: https://library.iserlohn-fortress.net/aqua/smolbote.git + * + * SPDX-License-Identifier: GPL-3.0 + */ + +#include "options.h" + +using namespace AdblockPlus; + +constexpr std::array abpTypeOptions = { + "document", // ResourceTypeMainFrame 0 Top level page. + "subdocument", // ResourceTypeSubFrame 1 Frame or iframe. + "stylesheet", // ResourceTypeStylesheet 2 A CSS stylesheet. + "script", // ResourceTypeScript 3 An external script. + "image", // ResourceTypeImage 4 An image (JPG, GIF, PNG, and so on). + "font", // ResourceTypeFontResource 5 A font. + "other", // ResourceTypeSubResource 6 An "other" subresource. + "object", // ResourceTypeObject 7 An object (or embed) tag for a plugin or a resource that a plugin requested. + "media", // ResourceTypeMedia 8 A media resource. + "__worker", // ResourceTypeWorker 9 The main resource of a dedicated worker. + "__sharedworker", // ResourceTypeSharedWorker 10 The main resource of a shared worker. + "__prefetch", // ResourceTypePrefetch 11 An explicitly requested prefetch. + "__favicon", // ResourceTypeFavicon 12 A favicon. + "xmlhttprequest", // ResourceTypeXhr 13 An XMLHttpRequest. + "ping", // ResourceTypePing 14 A ping request for <a ping>. + "__serviceworker", // ResourceTypeServiceWorker 15 The main resource of a service worker. + "__cspreport", // ResourceTypeCspReport 16 A report of Content Security Policy (CSP) violations. + "__pluginresource", // ResourceTypePluginResource 17 A resource requested by a plugin. + "__preloadmainframe", // ResourceTypeNavigationPreloadMainFrame 19 A main-frame service worker navigation preload request. + "__preloadsubframe", // ResourceTypeNavigationPreloadSubFrame 20 A sub-frame service worker navigation preload request. + "__unknown" // ResourceTypeUnknown 255 Unknown request type. +}; + +auto parseTypeOption(QStringRef &option) +{ + struct { + bool found = false; + int index = -1; + bool exception = false; + } ret; + + // Possible inverse type options include ~script, ~image, ~stylesheet, ~object, + // ~xmlhttprequest, ~subdocument, ~ping, ~websocket, ~webrtc, ~document, ~elemhide, ~other + if(option[0] == '~') { + ret.exception = true; + option = option.mid(1); + } + + // TODO: map all ResourceType's to their respective strings + // TODO: websocket, webrtc, elemhide, generichide, genericblock, popup + + for(std::size_t i = 0; i < std::size(abpTypeOptions); ++i) { + if(option == abpTypeOptions[i]) { + ret.index = i; + ret.found = true; + return ret; + } + } + return ret; +} + +bool Options::parseAbp(const QStringRef &options) +{ + std::bitset<32> checked_flags; + + for(auto &option : options.split(',')) { + if(option == "match-case") { + matchcase = true; + + } else if(option == "third-party") { + thirdparty = !exception; + } else if(const auto r = parseTypeOption(option); r.found) { + if(!r.exception) { + flags.set(r.index, true); + checked_flags.set(r.index, true); + } else { + flags.set(r.index, false); + checked_flags.set(r.index, true); + for(auto i = 0; i < 32; ++i) { + if(!checked_flags[i]) { + flags.set(i, true); + } + } + } + } else { + return false; + } + } + + return true; +} diff --git a/staging/adblock/options.h b/staging/adblock/options.h new file mode 100644 index 0000000..efc47a6 --- /dev/null +++ b/staging/adblock/options.h @@ -0,0 +1,45 @@ +/* + * This file is part of smolbote. It's copyrighted by the contributors recorded + * in the version control history of the file, available from its original + * location: https://library.iserlohn-fortress.net/aqua/smolbote.git + * + * SPDX-License-Identifier: GPL-3.0 + */ + +#ifndef SMOLBOTE_ADBLOCK_OPTIONS_H +#define SMOLBOTE_ADBLOCK_OPTIONS_H + +#include <QHash> +#include <QString> +#include <QWebEngineUrlRequestInfo> +#include <bitset> + +namespace AdblockPlus +{ + +struct Options { + // request handling options + bool exception = false; + bool redirect = false; + + // pattern options + bool matchcase = false; + + // request type options + bool firstparty = true; + bool thirdparty = true; + + // request types + bool matchesType(QWebEngineUrlRequestInfo::ResourceType type) + { + return flags.test(type); + } + bool parseAbp(const QStringRef &options); + + // TODO private: + std::bitset<32> flags; +}; + +} // namespace AdblockPlus + +#endif // SMOLBOTE_ADBLOCK_OPTIONS_H diff --git a/staging/adblock/plugin/AdblockPlusPlugin.json b/staging/adblock/plugin/AdblockPlusPlugin.json new file mode 100644 index 0000000..053826a --- /dev/null +++ b/staging/adblock/plugin/AdblockPlusPlugin.json @@ -0,0 +1,4 @@ +{ + "name": "AdblockPlus Filter Plugin", + "author": "Aqua <aqua@iserlohn-fortress.net>" +} diff --git a/staging/adblock/plugin/plugin.cpp b/staging/adblock/plugin/plugin.cpp new file mode 100644 index 0000000..028c83f --- /dev/null +++ b/staging/adblock/plugin/plugin.cpp @@ -0,0 +1,54 @@ +/* + * This file is part of smolbote. It's copyrighted by the contributors recorded + * in the version control history of the file, available from its original + * location: https://library.iserlohn-fortress.net/aqua/smolbote.git + * + * SPDX-License-Identifier: GPL-3.0 + */ + +#include "plugin.h" +#include "../filterlist.h" +#include <QTextStream> + +using namespace AdblockPlus; + +Filter* AdblockPlusFilterPlugin::load(QIODevice* from) const +{ + if(!from->isOpen()) + return nullptr; + + + QTextStream stream(from); + + if(stream.readLine().trimmed() != QLatin1String("[Adblock Plus 2.0]")) { + return nullptr; + } + + auto *list = new FilterList; + QString line; + int total, comments, rules, unsupported, failed; + + while(stream.readLineInto(&line)) { + if(!line.isEmpty()) { + ++total; + + switch(list->parse(line)) + { + case FilterList::Comment: + ++comments; + break; + case FilterList::Rule: + ++rules; + break; + case FilterList::Unsupported: + ++unsupported; + break; + case FilterList::Failed: + break; + } + } + } + + return list; +} + diff --git a/staging/adblock/plugin/plugin.h b/staging/adblock/plugin/plugin.h new file mode 100644 index 0000000..db419bd --- /dev/null +++ b/staging/adblock/plugin/plugin.h @@ -0,0 +1,25 @@ +/* + * This file is part of smolbote. It's copyrighted by the contributors recorded + * in the version control history of the file, available from its original + * location: https://library.iserlohn-fortress.net/aqua/smolbote.git + * + * SPDX-License-Identifier: GPL-3.0 + */ + +#ifndef ADBLOCKPLUSFILTER_PLUGIN_H +#define ADBLOCKPLUSFILTER_PLUGIN_H + +#include <smolbote/filterinterface.hpp> + +class AdblockPlusFilterPlugin : public QObject, public FilterPlugin +{ + Q_OBJECT + Q_PLUGIN_METADATA(IID FilterPluginIid FILE "AdblockPlusPlugin.json") + Q_INTERFACES(FilterPlugin) + +public: + Filter* load(QIODevice* from) const override; +}; + +#endif // ADBLOCKPLUSFILTER_PLUGIN_H + diff --git a/staging/adblock/rule.h b/staging/adblock/rule.h new file mode 100644 index 0000000..aaab49a --- /dev/null +++ b/staging/adblock/rule.h @@ -0,0 +1,153 @@ +/* + * This file is part of smolbote. It's copyrighted by the contributors recorded + * in the version control history of the file, available from its original + * location: https://library.iserlohn-fortress.net/aqua/smolbote.git + * + * SPDX-License-Identifier: GPL-3.0 + */ + +#ifndef SMOLBOTE_ADBLOCK_RULE_H +#define SMOLBOTE_ADBLOCK_RULE_H + +#include "options.h" +#include <QObject> +#include <QRegularExpression> +#include <QString> +#include <QStringMatcher> + +namespace AdblockPlus +{ +class Rule +{ +public: + virtual ~Rule() = default; + /** + * requestUrl: requested URL + * initiatorUrl: URL of document that initiated navigation + * firstPartyUrl: URL of the page that issued the request + */ + virtual bool hasMatch(const QStringRef &requestUrl, + const QStringRef &initiatorUrl, + const QStringRef &firstPartyUrl, + QWebEngineUrlRequestInfo::ResourceType resourceType = QWebEngineUrlRequestInfo::ResourceTypeMainFrame) const = 0; + + bool shouldRedirect() const + { + return options.redirect; + } + bool shouldBlock() const + { + return !options.exception; + } + +protected: + Rule(const Options &opt) + : options(opt) + { + } + const Options options; +}; + +// The separator character can be anything but +// a letter, a digit, or one of the following: _, -, ., %. +// The end of the address is also accepted as a separator. +inline bool isSeparator(const QChar &c) +{ + return !c.isLetter() && !c.isDigit() && c != '_' && c != '-' && c != '.' && c != '%'; +} + +class MatcherRule : public Rule +{ +public: + enum MatchPosition { + DomainMatch, + UrlStartsWith, + UrlEndsWith, + UrlContains, + UrlEquals + }; + + MatcherRule(const QString &pattern, const Options &opt, const MatchPosition pos = UrlContains) + : Rule(opt) + , position(pos) + , matcher(pattern, opt.matchcase ? Qt::CaseSensitive : Qt::CaseInsensitive) + , patternLength(pattern.length()) + { + } + explicit MatcherRule(const MatcherRule &) = delete; + MatcherRule &operator=(const MatcherRule &) = delete; + + explicit MatcherRule(MatcherRule &&) = delete; + MatcherRule &operator=(MatcherRule &&) = delete; + + ~MatcherRule() = default; + bool hasMatch(const QStringRef &url, + const QStringRef &initiatorUrl, + const QStringRef &firstPartyUrl, + QWebEngineUrlRequestInfo::ResourceType resourceType = QWebEngineUrlRequestInfo::ResourceTypeMainFrame) const override + { + const auto index = matcher.indexIn(url); + if(index == -1) { + return false; + } + + switch(position) { + case DomainMatch: + // match if + // there is only one : left of the index + // and + // character after pattern is separator or end of string + return (url.left(index).count(':') <= 1 && (index + patternLength == url.length() || isSeparator(url[index + patternLength]))); + case UrlStartsWith: + return (index == 0); + case UrlEndsWith: + return (index == url.length() - patternLength); + case UrlContains: + return (index != -1); + case UrlEquals: + return (index == 0 && patternLength == url.length()); + } + + return false; + } + +private: + const MatchPosition position; + const QStringMatcher matcher; + const int patternLength; +}; + +class RegexRule : public Rule +{ +public: + RegexRule(const QString &rule, const Options &opt) + : Rule(opt) + , regex(rule) + { + if(!opt.matchcase) { + regex.setPatternOptions(QRegularExpression::CaseInsensitiveOption); + } + } + explicit RegexRule(const RegexRule &) = delete; + RegexRule &operator=(const RegexRule &) = delete; + + explicit RegexRule(RegexRule &&) = delete; + RegexRule &operator=(RegexRule &&) = delete; + + ~RegexRule() = default; + bool hasMatch(const QStringRef &url, + const QStringRef &initiatorUrl, + const QStringRef &firstPartyUrl, + QWebEngineUrlRequestInfo::ResourceType resourceType = QWebEngineUrlRequestInfo::ResourceTypeMainFrame) const override + { + const auto match = regex.match(url); + return match.hasMatch(); + } + +private: + QRegularExpression regex; +}; + +} // namespace AdblockPlus + +#endif // SMOLBOTE_ADBLOCK_RULE_H diff --git a/staging/adblock/test/filterlist.cpp b/staging/adblock/test/filterlist.cpp new file mode 100644 index 0000000..ca122ac --- /dev/null +++ b/staging/adblock/test/filterlist.cpp @@ -0,0 +1,118 @@ +#define CATCH_CONFIG_MAIN +#include "filterlist.h" +#include <QBuffer> +#include <catch2/catch.hpp> + +using namespace AdblockPlus; + +QByteArray sampleList = + R"(! comment on line +! Last modified: 1 Jan 2000 00:00 UTC +! Expires: 4 days (update frequency) +)"; + +TEST_CASE("placeholder") +{ + QBuffer buffer(&sampleList); + buffer.open(QIODevice::ReadOnly | QIODevice::Text); + + AdblockPlus::FilterList list(buffer); + REQUIRE(!list.isUpToDate()); +} + +TEST_CASE("domain match") +{ + const QString defaultUrl = ""; + + const QString block1 = "http://ads.example.com/foo.gif"; + const QString block2 = "http://server1.ads.example.com/foo.gif"; + const QString block3 = "https://ads.example.com:8000/"; + const QString block4 = "https://ads.example.com"; + + const QString allow1 = "http://ads.example.com.ua/foo.gif"; + const QString allow2 = "http://example.com/redirect/http://ads.example.com/"; + + auto *rule = FilterList::parseRule("||ads.example.com^"); + REQUIRE(rule->shouldBlock()); + REQUIRE(!rule->shouldRedirect()); + + REQUIRE(rule->hasMatch(&block1, &defaultUrl, &defaultUrl)); + REQUIRE(rule->hasMatch(&block2, &defaultUrl, &defaultUrl)); + REQUIRE(rule->hasMatch(&block3, &defaultUrl, &defaultUrl)); + REQUIRE(rule->hasMatch(&block4, &defaultUrl, &defaultUrl)); + + REQUIRE(!rule->hasMatch(&allow1, &defaultUrl, &defaultUrl)); + REQUIRE(!rule->hasMatch(&allow2, &defaultUrl, &defaultUrl)); + + delete rule; +} + +TEST_CASE("string equals") +{ + const QString defaultUrl = ""; + + const QString block = "http://example.com/"; + + const QString allow1 = "http://example.com/foo.gif"; + const QString allow2 = "http://example.info/redirect/http://example.com/"; + + auto *rule = FilterList::parseRule("|http://example.com/|"); + REQUIRE(rule->shouldBlock()); + REQUIRE(!rule->shouldRedirect()); + + REQUIRE(rule->hasMatch(&block, &defaultUrl, &defaultUrl)); + + REQUIRE(!rule->hasMatch(&allow1, &defaultUrl, &defaultUrl)); + REQUIRE(!rule->hasMatch(&allow2, &defaultUrl, &defaultUrl)); + + delete rule; +} + +TEST_CASE("string starts with") +{ + const QString defaultUrl = ""; + + auto *rule = FilterList::parseRule("|http://baddomain.example/"); + REQUIRE(rule->shouldBlock()); + REQUIRE(!rule->shouldRedirect()); + + const QString blocks = "http://baddomain.example/banner.gif"; + const QString allows = "http://gooddomain.example/analyze?http://baddomain.example"; + + REQUIRE(rule->hasMatch(&blocks, &defaultUrl, &defaultUrl)); + REQUIRE(!rule->hasMatch(&allows, &defaultUrl, &defaultUrl)); + delete rule; +} + +TEST_CASE("string ends with") +{ + const QString defaultUrl = ""; + + auto *rule = FilterList::parseRule("swf|"); + REQUIRE(rule->shouldBlock()); + REQUIRE(!rule->shouldRedirect()); + + const QString blocks = "http://example.com/annoyingflash.swf"; + const QString allows = "http://example.com/swf/index.html"; + + REQUIRE(rule->hasMatch(&blocks, &defaultUrl, &defaultUrl)); + REQUIRE(!rule->hasMatch(&allows, &defaultUrl, &defaultUrl)); + delete rule; +} + +TEST_CASE("regular expressions") +{ + const QString defaultUrl = ""; + + auto *rule = FilterList::parseRule("/banner\\d+/"); + const QString matches1 = "banner123"; + const QString matches2 = "banner321"; + + const QString ignores = "banners"; + + REQUIRE(rule->hasMatch(&matches1, &defaultUrl, &defaultUrl)); + REQUIRE(rule->hasMatch(&matches2, &defaultUrl, &defaultUrl)); + REQUIRE(!rule->hasMatch(&ignores, &defaultUrl, &defaultUrl)); + delete rule; +} + diff --git a/staging/adblock/test/options.cpp b/staging/adblock/test/options.cpp new file mode 100644 index 0000000..67dc143 --- /dev/null +++ b/staging/adblock/test/options.cpp @@ -0,0 +1,42 @@ +#define CATCH_CONFIG_MAIN +#include "options.h" +#include <catch2/catch.hpp> + +using namespace AdblockPlus; + +SCENARIO("parsing adblock options") +{ + Options opt; + + GIVEN("an unknown option") + { + const QString unknown = "unknown"; + THEN("the option is not parsed") + { + QStringRef unknown_ref(&unknown); + REQUIRE(!opt.parseAbp(unknown_ref)); + } + } + + GIVEN("match-case,document,~subdocument") + { + const QString options = "match-case,document,~subdocument"; + REQUIRE(opt.parseAbp(&options)); + + WHEN("match-case") + { + REQUIRE(opt.matchcase); + } + + WHEN("testing set/unset options") + { + REQUIRE(opt.matchesType(QWebEngineUrlRequestInfo::ResourceTypeMainFrame)); + REQUIRE(!opt.matchesType(QWebEngineUrlRequestInfo::ResourceTypeSubFrame)); + } + + WHEN("testing other options") + { + REQUIRE(opt.matchesType(QWebEngineUrlRequestInfo::ResourceTypeStylesheet)); + } + } +} diff --git a/staging/adblock/test/rule.cpp b/staging/adblock/test/rule.cpp new file mode 100644 index 0000000..07186b9 --- /dev/null +++ b/staging/adblock/test/rule.cpp @@ -0,0 +1,86 @@ +#define CATCH_CONFIG_MAIN +#include "rule.h" +#include <catch2/catch.hpp> + +using namespace AdblockPlus; + +SCENARIO("MatcherRule") +{ + GIVEN("options with case sensitive pattern") + { + const QString defaultUrl = ""; + + const Options opt { .matchcase=true }; + const QString patternContains("this string contains the pattern in it"); + const QString patternBegins("pattern starts this string"); + const QString patternEnds("this string ends with pattern"); + const QString patternMissing("and this one does not"); + + WHEN("contains") + { + MatcherRule rule("pattern", opt); + REQUIRE(rule.shouldBlock()); + + THEN("pattern is matched anywhere in the URL") + { + REQUIRE(rule.hasMatch(&patternContains, &defaultUrl, &defaultUrl)); + REQUIRE(rule.hasMatch(&patternBegins, &defaultUrl, &defaultUrl)); + REQUIRE(rule.hasMatch(&patternEnds, &defaultUrl, &defaultUrl)); + REQUIRE(!rule.hasMatch(&patternMissing, &defaultUrl, &defaultUrl)); + } + } + + WHEN("startsWith") + { + MatcherRule rule("pattern", opt, MatcherRule::UrlStartsWith); + REQUIRE(rule.shouldBlock()); + + THEN("pattern is matched if at the start of the URL") + { + REQUIRE(!rule.hasMatch(&patternContains, &defaultUrl, &defaultUrl)); + REQUIRE(rule.hasMatch(&patternBegins, &defaultUrl, &defaultUrl)); + REQUIRE(!rule.hasMatch(&patternEnds, &defaultUrl, &defaultUrl)); + REQUIRE(!rule.hasMatch(&patternMissing, &defaultUrl, &defaultUrl)); + } + } + + WHEN("endsWith") + { + MatcherRule rule("pattern", opt, MatcherRule::UrlEndsWith); + REQUIRE(rule.shouldBlock()); + + THEN("pattern is matched if at the end of the URL") + { + REQUIRE(!rule.hasMatch(&patternContains, &defaultUrl, &defaultUrl)); + REQUIRE(!rule.hasMatch(&patternBegins, &defaultUrl, &defaultUrl)); + REQUIRE(rule.hasMatch(&patternEnds, &defaultUrl, &defaultUrl)); + REQUIRE(!rule.hasMatch(&patternMissing, &defaultUrl, &defaultUrl)); + } + } + } +} + +SCENARIO("RegexRule") +{ + GIVEN("options with case sensitive pattern") + { + const QString defaultUrl; + + const Options opt { .matchcase=true }; + const QString patternContains("this string contains the pattern in it"); + const QString patternMissing("and this one does not"); + + WHEN("contains") + { + RegexRule rule("pattern", opt); + REQUIRE(rule.shouldBlock()); + + THEN("pattern is matched anywhere in the URL") + { + REQUIRE(rule.hasMatch(&patternContains, &defaultUrl, &defaultUrl)); + REQUIRE(!rule.hasMatch(&patternMissing, &defaultUrl, &defaultUrl)); + } + } + } +} + diff --git a/staging/smolblok/README.md b/staging/smolblok/README.md new file mode 100644 index 0000000..1793009 --- /dev/null +++ b/staging/smolblok/README.md @@ -0,0 +1,8 @@ +## smolblok + +### What is this +This is a C++ library for URL filtering for Qt applications using QtWebEngine. + +### Supported formats +- AdblockPlus without element hiding rules + diff --git a/staging/smolblok/filtermanager.hpp b/staging/smolblok/filtermanager.hpp new file mode 100644 index 0000000..6ee4d3f --- /dev/null +++ b/staging/smolblok/filtermanager.hpp @@ -0,0 +1,41 @@ +/* + * This file is part of smolbote. It's copyrighted by the contributors recorded + * in the version control history of the file, available from its original + * location: https://library.iserlohn-fortress.net/aqua/smolbote.git + * + * SPDX-License-Identifier: GPL-3.0 + */ + +#pragma once + +#include <QWebEngineUrlRequestInterceptor> +#include <smolbote/filterinterface.hpp> + +class FilterManager : public QWebEngineUrlRequestInterceptor +{ +public: + FilterManager(QObject *parent = nullptr) + : QWebEngineUrlRequestInterceptor(parent) + { + } + ~FilterManager() + { + qDeleteAll(filters); + } + + void addFilterList(FilterList *list) { + filters.append(list); + } + + void interceptRequest(QWebEngineUrlRequestInfo &info) override + { + for(const auto *filter : qAsConst(filters)) { + if(filter->filter(info)) { + return; + } + } + } + +private: + QList<FilterList *> filters; +}; diff --git a/staging/smolblok/meson.build b/staging/smolblok/meson.build new file mode 100644 index 0000000..6105179 --- /dev/null +++ b/staging/smolblok/meson.build @@ -0,0 +1,17 @@ +dep_smolblok = declare_dependency( + include_directories: [ '.', smolbote_interfaces ], + link_with: library('smolblok', + [ 'smolblok.cpp' ], + include_directories: smolbote_interfaces, + dependencies: dep_qt5 + ) +) + +smolblok_load = executable('smolblok-load', + dependencies: [ dep_qt5, dep_spdlog, dep_smolblok ], + sources: [ 'test/loader.cpp' ] +) + +test('load', smolblok_load, suite: 'smolblok', should_fail: true) +test('load', smolblok_load, suite: 'smolblok', args: files('meson.build'), should_fail: true) + diff --git a/staging/smolblok/smolblok.cpp b/staging/smolblok/smolblok.cpp new file mode 100644 index 0000000..465c348 --- /dev/null +++ b/staging/smolblok/smolblok.cpp @@ -0,0 +1,40 @@ +/* + * This file is part of smolbote. It's copyrighted by the contributors recorded + * in the version control history of the file, available from its original + * location: https://library.iserlohn-fortress.net/aqua/smolbote.git + * + * SPDX-License-Identifier: GPL-3.0 + */ + +#include "smolblok.hpp" +#include <QFile> +#include <QSettings> + +bool smolblok::addSubscriptions(const QString &filename) +{ + if(filename.isEmpty()) { + return false; + } + + QSettings listconf(filename, QSettings::IniFormat); + + for(auto &group : listconf.childGroups()) { + listconf.beginGroup(group); + const auto *loader = m_formats.value(listconf.value("Format").toString()).instance; + if(loader != nullptr) { + QFile f(listconf.value("File").toString()); + if(!f.exists()) { + continue; + } + + auto *list = loader->load(f); + f.seek(0); + if(loader->parse(list, f)) { + m_subscriptions.addFilterList(list); + } + } + listconf.endGroup(); + } + return false; +} + diff --git a/staging/smolblok/smolblok.hpp b/staging/smolblok/smolblok.hpp new file mode 100644 index 0000000..e547d67 --- /dev/null +++ b/staging/smolblok/smolblok.hpp @@ -0,0 +1,80 @@ +/* + * This file is part of smolbote. It's copyrighted by the contributors recorded + * in the version control history of the file, available from its original + * location: https://library.iserlohn-fortress.net/aqua/smolbote.git + * + * SPDX-License-Identifier: GPL-3.0 + */ + +#ifndef SMOLBOTE_SMOLBLOK_HPP +#define SMOLBOTE_SMOLBLOK_HPP + +#include "filtermanager.hpp" +#include <QPluginLoader> +#include <QWebEngineUrlRequestInterceptor> +#include <smolbote/filterinterface.hpp> + +class smolblok +{ +public: + smolblok() = default; + ~smolblok() + { + for(auto &plugin : m_formats) { + delete plugin.loader; + } + } + + auto registerFormatPlugin(const QString &format, const QString &filename) + { + struct { + bool loaded = false; + QString error; + } ret; + + if(format.isEmpty() || filename.isEmpty()) { + ret.error = "Format or filename is empty"; + return ret; + } + + auto *plugin = new QPluginLoader(filename); + if(!plugin->load()) { + ret.error = plugin->errorString(); + delete plugin; + return ret; + } + + auto *instance = qobject_cast<FilterPlugin *>(plugin->instance()); + if(instance == nullptr) { + ret.error = "Unable to cast"; + delete plugin; + return ret; + } + + m_formats[format] = PluginInfo{ plugin, instance }; + ret.loaded = true; + return ret; + } + + const auto formats() const + { + return m_formats.keys(); + } + + bool addSubscriptions(const QString &filename); + QWebEngineUrlRequestInterceptor *interceptor() + { + return &m_subscriptions; + } + +private: + struct PluginInfo { + QPluginLoader *loader = nullptr; + FilterPlugin *instance = nullptr; + }; + + QHash<QString, PluginInfo> m_formats; + FilterManager m_subscriptions; +}; + +#endif // SMOLBOTE_SMOLBLOK_HPP diff --git a/staging/smolblok/test/loader.cpp b/staging/smolblok/test/loader.cpp new file mode 100644 index 0000000..9e27a26 --- /dev/null +++ b/staging/smolblok/test/loader.cpp @@ -0,0 +1,25 @@ +#include "smolblok.hpp" +#include <spdlog/spdlog.h> + +int main(int argc, char** argv) +{ + if(argc != 2) { + spdlog::error("usage: {} path/to/plugin.so", argv[0]); + return -1; + } + + smolblok filter; + { + const auto r = filter.registerFormatPlugin("unused", argv[1]); + if(r.loaded) { + spdlog::info("Loaded plugin {}", argv[1]); + } else { + spdlog::error("Failed loading plugin {}", argv[1]); + spdlog::error(qUtf8Printable(r.error)); + return -1; + } + } + + return 0; +} + diff --git a/staging/smolblok/test/main.cpp b/staging/smolblok/test/main.cpp new file mode 100644 index 0000000..5624ee9 --- /dev/null +++ b/staging/smolblok/test/main.cpp @@ -0,0 +1,22 @@ +#define CATCH_CONFIG_MAIN + +#include "smolblok.hpp" +#include <catch2/catch.hpp> + +SCENARIO("smolblok") +{ + smolblok s; + + GIVEN("invalid plugins") + { + REQUIRE(!s.registerFormatPlugin("", "")); + REQUIRE(!s.registerFormatPlugin("Format", "missing.dll")); + } + + GIVEN("invalid subscriptions") + { + REQUIRE(!s.addSubscriptions("")); + REQUIRE(!s.addSubscriptions("missing.txt")); + } +} + diff --git a/staging/smolblok/test/sample-filters.txt b/staging/smolblok/test/sample-filters.txt new file mode 100644 index 0000000..59e0e7b --- /dev/null +++ b/staging/smolblok/test/sample-filters.txt @@ -0,0 +1,10 @@ +[easylist-noelemhide] +Format = AdblockPlus +File = easylist_noelemhide.txt +Href = https://easylist-downloads.adblockplus.org/easylist_noelemhide.txt + +[StevenBlack] +Format = Hostlist +File = stevenblack.txt +Href = https://raw.githubusercontent.com/StevenBlack/hosts/master/hosts + diff --git a/test/matcherbenchmark/matcherbenchmark.cpp b/test/matcherbenchmark/matcherbenchmark.cpp new file mode 100644 index 0000000..1fd87cf --- /dev/null +++ b/test/matcherbenchmark/matcherbenchmark.cpp @@ -0,0 +1,85 @@ +#include "matcherbenchmark.h" +#include <QRegExp> +#include <QRegularExpression> +#include <QStringMatcher> +#include <QtTest/QTest> +#include <regex.h> +#include <regex> +#include <string> + +void MatcherBenchmark::qstringcontains() +{ + const QString pattern("spamdomain"); + const QString request("subdomain.spamdomain.com"); + + QCOMPARE(request.contains(pattern), true); + QBENCHMARK + { + request.contains(pattern); + } +} + +void MatcherBenchmark::qstringmatcher() +{ + const QStringMatcher pattern("spamdomain"); + const QString request("subdomain.spamdomain.com"); + + QCOMPARE(pattern.indexIn(request) != -1, true); + QBENCHMARK + { + pattern.indexIn(request); + } +} + +void MatcherBenchmark::qregexp() +{ + const QRegExp pattern("spamdomain"); + const QString request("subdomain.spamdomain.com"); + + QCOMPARE(pattern.indexIn(request) != -1, true); + QBENCHMARK + { + pattern.indexIn(request); + } +} + +void MatcherBenchmark::qregularexpressionmatch() +{ + const QRegularExpression pattern("spamdomain"); + const QString request("subdomain.spamdomain.com"); + + QCOMPARE(pattern.match(request).hasMatch(), true); + QBENCHMARK + { + pattern.match(request).hasMatch(); + } +} + +void MatcherBenchmark::stdregex() +{ + const std::regex pattern("spamdomain"); + const std::string request("subdomain.spamdomain.com"); + + QCOMPARE(std::regex_search(request, pattern), true); + QBENCHMARK + { + std::regex_search(request, pattern); + } +} + +void MatcherBenchmark::cregex() +{ + regex_t pattern; + QCOMPARE(regcomp(&pattern, "spamdomain", 0), 0); + const std::string request("subdomain.spamdomain.com"); + + QCOMPARE(regexec(&pattern, request.c_str(), 0, NULL, 0), false); + QBENCHMARK + { + regexec(&pattern, request.c_str(), 0, NULL, 0); + } + + regfree(&pattern); +} + +QTEST_GUILESS_MAIN(MatcherBenchmark) diff --git a/test/matcherbenchmark/matcherbenchmark.h b/test/matcherbenchmark/matcherbenchmark.h new file mode 100644 index 0000000..70fd859 --- /dev/null +++ b/test/matcherbenchmark/matcherbenchmark.h @@ -0,0 +1,19 @@ +#ifndef MATCHER_BENCHMARK +#define MATCHER_BENCHMARK + +#include <QObject> + +class MatcherBenchmark : public QObject +{ + Q_OBJECT + +private slots: + void qstringcontains(); + void qstringmatcher(); + void qregexp(); + void qregularexpressionmatch(); + void stdregex(); + void cregex(); +}; + +#endif diff --git a/test/matcherbenchmark/meson.build b/test/matcherbenchmark/meson.build new file mode 100644 index 0000000..64efd0b --- /dev/null +++ b/test/matcherbenchmark/meson.build @@ -0,0 +1,7 @@ +subdir_done() + +executable('matcherbenchmarks', + sources: [ 'matcherbenchmark.cpp', + mod_qt5.preprocess(moc_headers: 'matcherbenchmark.h')], + dependencies: [ dep_qt5 ] +) |