From d1287f43964633035938f4f4d4133bb6d9da7b3e Mon Sep 17 00:00:00 2001 From: Aqua-sama Date: Mon, 10 Feb 2020 20:58:39 +0200 Subject: staging: smolblok smolblok is a replacement for the current lib/urlfilter AdBlockPlus and hostlist format filter parser. It is a library that uses plugins to provide support for different filter formats. staging/adblock: AdBlockPlus parser plugin plugins/smolblok_hostlist: hostlist format parser plugin Headers will be installed to include/smolbote/ Remove lib/urlfilter --- lib/configuration/meson.build | 3 +- lib/session_formats/session_json.hpp | 2 +- lib/urlfilter/adblock/adblocklist.cpp | 188 ---------------------------------- lib/urlfilter/adblock/adblocklist.h | 42 -------- lib/urlfilter/adblock/parser.cpp | 75 -------------- lib/urlfilter/adblock/parser.h | 14 --- lib/urlfilter/hostlist/hostlist.cpp | 79 -------------- lib/urlfilter/hostlist/hostlist.h | 44 -------- lib/urlfilter/matcher.h | 109 -------------------- lib/urlfilter/meson.build | 26 ----- lib/urlfilter/test/adblock.cpp | 88 ---------------- lib/urlfilter/test/adblock.txt | 26 ----- lib/urlfilter/test/hostlist.cpp | 34 ------ lib/urlfilter/test/hostlist.txt | 6 -- lib/urlfilter/test/matcher.cpp | 42 -------- lib/urlfilter/urlfilter.h | 43 -------- 16 files changed, 3 insertions(+), 818 deletions(-) delete mode 100644 lib/urlfilter/adblock/adblocklist.cpp delete mode 100644 lib/urlfilter/adblock/adblocklist.h delete mode 100644 lib/urlfilter/adblock/parser.cpp delete mode 100644 lib/urlfilter/adblock/parser.h delete mode 100644 lib/urlfilter/hostlist/hostlist.cpp delete mode 100644 lib/urlfilter/hostlist/hostlist.h delete mode 100644 lib/urlfilter/matcher.h delete mode 100644 lib/urlfilter/meson.build delete mode 100644 lib/urlfilter/test/adblock.cpp delete mode 100644 lib/urlfilter/test/adblock.txt delete mode 100644 lib/urlfilter/test/hostlist.cpp delete mode 100644 lib/urlfilter/test/hostlist.txt delete mode 100644 lib/urlfilter/test/matcher.cpp delete mode 100644 lib/urlfilter/urlfilter.h (limited to 'lib') diff --git a/lib/configuration/meson.build b/lib/configuration/meson.build index 0ef6f5f..5e3e4b2 100644 --- a/lib/configuration/meson.build +++ b/lib/configuration/meson.build @@ -16,6 +16,7 @@ if meson.get_compiler('cpp').has_multi_arguments('-g', '-fsanitize=fuzzer') executable('configuration-fuzzer', sources: 'configuration.cpp', cpp_args: [ '-g', '-fsanitize=fuzzer', '-DNO_QT_SPEC', '-DFUZZER' ], - link_args: [ '-fsanitize=fuzzer' ]) + link_args: [ '-fsanitize=fuzzer' ] # args: [ '-seed=1', '-max_total_time=24', meson.current_source_dir()/'test/corpus' ] +) endif diff --git a/lib/session_formats/session_json.hpp b/lib/session_formats/session_json.hpp index 3332229..142d9ef 100644 --- a/lib/session_formats/session_json.hpp +++ b/lib/session_formats/session_json.hpp @@ -10,7 +10,7 @@ #define SESSION_JSON_HPP #include -#include +#include "smolbote/session.hpp" class JsonSession : public Session { diff --git a/lib/urlfilter/adblock/adblocklist.cpp b/lib/urlfilter/adblock/adblocklist.cpp deleted file mode 100644 index 3be21bd..0000000 --- a/lib/urlfilter/adblock/adblocklist.cpp +++ /dev/null @@ -1,188 +0,0 @@ -/* - * This file is part of smolbote. It's copyrighted by the contributors recorded - * in the version control history of the file, available from its original - * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote - * - * SPDX-License-Identifier: GPL-3.0 - */ - -#include "adblocklist.h" -#include "parser.h" -#include -#include -#include - -AdBlockList::AdBlockList(QIODevice *device) -{ - Q_ASSERT(device->isOpen()); - - QTextStream list(device); - while (!list.atEnd()) { - parseLine(list.readLine()); - } - - qDebug() << m_metadata; -} - -AdBlockList::~AdBlockList() -{ - for(Rule &r : rules) { - delete r.matcher; - } -} - -QString AdBlockList::metadata(const QString& key) const -{ - return m_metadata.value(key); -} - -int AdBlockList::ruleCount() const -{ - return rules.size(); -} - -std::pair AdBlockList::match(const QUrl& firstParty, const QUrl& requestUrl, QWebEngineUrlRequestInfo::ResourceType type) const -{ - const QString domain = firstParty.host(); - const QString request = requestUrl.toString(); - - for(const Rule &r : rules) { - // if there are options specified, but not the one we need - if(!r.options.isEmpty() && !r.options.contains(type)) - continue; - - if(r.disabledOn.contains(domain)) - continue; - - if(!r.enabledOn.isEmpty() && !r.enabledOn.contains(domain)) - continue; - - if(r.matcher->hasMatch(request)) - return std::make_pair(r.action, QString()); - } - - return std::make_pair(UrlFilter::NotMatched, QString()); -} - -void AdBlockList::parseLine(const QString& line) -{ - QString parsedLine = line.trimmed(); - - if(parsedLine.isEmpty()) - return; - - if(parsedLine.startsWith(QLatin1String("!"))) { - const auto comment = parseComment(parsedLine); - - if(comment) { - const auto key = comment.value().first; - if(keys.contains(key)) - m_metadata[key] = comment.value().second; - } - - return; - } - - // css rule -> filterleaves cannot do element blocking - if(parsedLine.contains(QLatin1String("##")) || parsedLine.contains(QLatin1String("#@#"))) { - qDebug("TODO: %s", qUtf8Printable(parsedLine)); - return; - } - - Rule r; - r.action = UrlFilter::Block; - - // exception rules - if(parsedLine.startsWith(QLatin1String("@@"))) { - r.action = UrlFilter::Allow; - parsedLine.remove(0, 2); - } - - bool matchCase = false; - - // parse options - { - const int sepPos = parsedLine.indexOf(QLatin1String("$")); - if(sepPos != -1) { - const auto options = parsedLine.mid(sepPos + 1).split(QLatin1String(",")); - parsedLine = parsedLine.mid(0, sepPos); - - for(const QString &option : options) { - if(option.startsWith(QLatin1String("domain"))) { - const auto domainList = option.mid(7).split(QLatin1String("|")); - - for(const QString &domain : domainList) { - if(domain.startsWith(QLatin1String("~"))) { - r.disabledOn.append(domain.mid(1)); - } else { - r.enabledOn.append(domain); - } - } - } else if(option.endsWith(QLatin1String("match-case"))) { - matchCase = !option.startsWith(QLatin1String("~")); - - } else { - const auto pair = parseResourceOption(option); - if(pair) - r.options.insert(pair.value().first, pair.value().second); - } - } - } - } - - if(parsedLine.startsWith(QLatin1String("/")) && parsedLine.endsWith(QLatin1String("/"))) { - // regular expression rule - parsedLine = parsedLine.mid(1, parsedLine.length() - 2); - r.matcher = new ContentsMatcher(parsedLine, UrlFilter::RegularExpressionMatch); - - } else if(parsedLine.startsWith(QLatin1String("||")) && parsedLine.endsWith(QLatin1String("^"))) { - parsedLine = parsedLine.mid(2, parsedLine.length() - 3); - r.matcher = new ContentsMatcher(parsedLine, UrlFilter::DomainMatch); - - } else if(parsedLine.startsWith(QLatin1String("|")) && parsedLine.endsWith(QLatin1String("|"))) { - // string equals rule - parsedLine = parsedLine.mid(1, parsedLine.length() - 2); - r.matcher = new ContentsMatcher(parsedLine, UrlFilter::StringEquals); - - } else if(parsedLine.startsWith(QLatin1String("||"))) { - // string starts with rule - parsedLine = parsedLine.mid(2); - r.matcher = new ContentsMatcher(parsedLine, UrlFilter::StringStartsWith); - - } else if(parsedLine.endsWith(QLatin1String("|"))) { - // string ends with rule - parsedLine.chop(1); - r.matcher = new ContentsMatcher(parsedLine, UrlFilter::StringEndsWith); - - } else { - // generic contains rule - - // remove beginning and ending wildcards - if(parsedLine.startsWith(QLatin1String("*"))) - parsedLine = parsedLine.mid(1); - - if(parsedLine.endsWith(QLatin1String("*"))) - parsedLine.chop(1); - - if(parsedLine.contains(QLatin1String("*")) || parsedLine.contains(QLatin1String("^"))) { - // check for wildcards and translate to regexp - // wildcard "*" - any number of characters - // separator "^" - end, ? or / - parsedLine.replace(QLatin1String("||"), QLatin1String("^\\w+://")); - parsedLine.replace(QLatin1String("|"), QLatin1String("\\|")); - parsedLine.replace(QLatin1String("*"), QLatin1String(".*")); - parsedLine.replace(QLatin1String("^"), QLatin1String("($|\\?|\\/)")); - - r.matcher = new ContentsMatcher(parsedLine, UrlFilter::RegularExpressionMatch); - - } else { - r.matcher = new ContentsMatcher(parsedLine, UrlFilter::StringContains); - } - } - - r.matcher->setCaseSensitive(matchCase); - - Q_CHECK_PTR(r.matcher); - rules.emplace_back(std::move(r)); -} - diff --git a/lib/urlfilter/adblock/adblocklist.h b/lib/urlfilter/adblock/adblocklist.h deleted file mode 100644 index ee41e11..0000000 --- a/lib/urlfilter/adblock/adblocklist.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * This file is part of smolbote. It's copyrighted by the contributors recorded - * in the version control history of the file, available from its original - * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote - * - * SPDX-License-Identifier: GPL-3.0 - */ - -#include "urlfilter.h" -#include "matcher.h" -#include -#include - -class QIODevice; -class AdBlockList : public UrlFilter -{ -public: - // TODO: check if all keys are listed - const QStringList keys = { "Version", "Title", "Last modified", "Expires", "Homepage", "Licence", "Redirect" }; - - AdBlockList(QIODevice *device); - ~AdBlockList(); - - QString metadata(const QString &key) const override; - int ruleCount() const; - std::pair match(const QUrl &firstParty, const QUrl &requestUrl, QWebEngineUrlRequestInfo::ResourceType type) const override; - -protected: - void parseLine(const QString &line); - -private: - QHash m_metadata; - - struct Rule { - UrlFilter::MatchResult action = UrlFilter::NotMatched; - Matcher *matcher; - QStringList enabledOn, disabledOn; - QHash options; - }; - - std::vector rules; -}; diff --git a/lib/urlfilter/adblock/parser.cpp b/lib/urlfilter/adblock/parser.cpp deleted file mode 100644 index 68f895d..0000000 --- a/lib/urlfilter/adblock/parser.cpp +++ /dev/null @@ -1,75 +0,0 @@ -/* - * This file is part of smolbote. It's copyrighted by the contributors recorded - * in the version control history of the file, available from its original - * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote - * - * SPDX-License-Identifier: GPL-3.0 - */ - -#include "parser.h" - -std::optional> parseComment(QString &line) -{ - const QLatin1String separator(": "); - if(line.contains(separator)) { - const QStringList comment = line.mid(1).split(QLatin1String(": ")); - return std::make_pair(comment.at(0).trimmed(), comment.at(1).trimmed()); - } else - return std::nullopt; -} - -std::optional> parseResourceOption(const QString &option) -{ - const bool exception = !option.startsWith(QLatin1String("~")); - - if(option.endsWith(QLatin1String("script"))) { - // external scripts loaded via HTML script tag - return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeScript, exception); - - } else if(option.endsWith(QLatin1String("image"))) { - // regular images, typically loaded via HTML img tag - return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeImage, exception); - - } else if(option.endsWith(QLatin1String("stylesheet"))) { - // external CSS stylesheet files - return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeStylesheet, exception); - - } else if(option.endsWith(QLatin1String("object"))) { - // content handled by browser plugins, e.g. Flash or Java - return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeObject, exception); - - } else if(option.endsWith(QLatin1String("xmlhttprequest"))) { - // requests started using the XMLHttpRequest object or fetch() API - return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeXhr, exception); - - } else if(option.endsWith(QLatin1String("object-subrequest"))) { - // requests started by plugins like Flash - return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypePluginResource, exception); - - } else if(option.endsWith(QLatin1String("subdocument"))) { - // embedded pages, usually included via HTML frames - return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeSubFrame, exception); - - } else if(option.endsWith(QLatin1String("ping"))) { - // requests started by or navigator.sendBeacon() - return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypePing, exception); - - } else if(option.endsWith(QLatin1String("websocket"))) { - // requests initiated via WebSocket object - qDebug("Resource type 'websocket' not available"); - - } else if(option.endsWith(QLatin1String("webrtc"))) { - // connections opened via RTCPeerConnection instances to ICE servers - qDebug("Resource type 'webrtc' not available"); - - } else if(option.endsWith(QLatin1String("document"))) { - // the page itself - return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeMainFrame, exception); - - } else if(option.endsWith(QLatin1String("other"))) { - return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeUnknown, exception); - } - - qDebug("TODO: %s", qUtf8Printable(option)); - return std::nullopt; -} diff --git a/lib/urlfilter/adblock/parser.h b/lib/urlfilter/adblock/parser.h deleted file mode 100644 index c73a9cf..0000000 --- a/lib/urlfilter/adblock/parser.h +++ /dev/null @@ -1,14 +0,0 @@ -/* - * This file is part of smolbote. It's copyrighted by the contributors recorded - * in the version control history of the file, available from its original - * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote - * - * SPDX-License-Identifier: GPL-3.0 - */ - -#include -#include -#include - -std::optional> parseComment(QString &line); -std::optional> parseResourceOption(const QString &option); diff --git a/lib/urlfilter/hostlist/hostlist.cpp b/lib/urlfilter/hostlist/hostlist.cpp deleted file mode 100644 index bec79ea..0000000 --- a/lib/urlfilter/hostlist/hostlist.cpp +++ /dev/null @@ -1,79 +0,0 @@ -/* - * This file is part of smolbote. It's copyrighted by the contributors recorded - * in the version control history of the file, available from its original - * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote - * - * SPDX-License-Identifier: GPL-3.0 - */ - -#include "hostlist.h" -#include -#include -#include - -HostList::HostList(QIODevice *device) -{ - Q_ASSERT(device->isOpen()); - - QTextStream list(device); - while (!list.atEnd()) { - parseLine(list.readLine()); - } - - qDebug() << m_metadata; -} - -QString HostList::metadata(const QString& key) const -{ - return m_metadata.value(key); -} - -int HostList::ruleCount() const -{ - return rules.size(); -} - -std::pair HostList::match(const QUrl& firstParty, const QUrl& requestUrl, QWebEngineUrlRequestInfo::ResourceType type) const -{ - Q_UNUSED(firstParty); - Q_UNUSED(type); - - const QString domain = requestUrl.host(); - const uint domainHash = qHash(domain); - - for(const Rule &r : rules) { - if(r.domainHash == domainHash) - return std::make_pair(r.action, r.redirect); - } - - return std::make_pair(UrlFilter::NotMatched, QString()); -} - -void HostList::parseLine(const QString& line) -{ - // check comment - if(line.startsWith(QLatin1String("#"))) - return; - - QString parsedLine = line.trimmed(); - - // malformed rule - if(!parsedLine.contains(QLatin1String(" "))) - return; - - const QStringList parts = parsedLine.split(QLatin1String(" ")); - const QString &redirect = parts.at(0); - const auto action = (redirect == QLatin1String("0.0.0.0")) ? UrlFilter::Block : UrlFilter::Redirect; - - for(int i = 1; i < parts.size(); i++) { - const QString &domain = parts.at(i); - Rule r; - r.action = action; - r.domainHash = qHash(domain); - if(action == UrlFilter::Redirect) - r.redirect = redirect; - - rules.emplace_back(std::move(r)); - } -} - diff --git a/lib/urlfilter/hostlist/hostlist.h b/lib/urlfilter/hostlist/hostlist.h deleted file mode 100644 index d4a8d87..0000000 --- a/lib/urlfilter/hostlist/hostlist.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - * This file is part of smolbote. It's copyrighted by the contributors recorded - * in the version control history of the file, available from its original - * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote - * - * SPDX-License-Identifier: GPL-3.0 - */ - -#ifndef SMOLBOTE_URLFILTER_HOSTLIST -#define SMOLBOTE_URLFILTER_HOSTLIST - -#include "urlfilter.h" -#include -#include -#include - -class QIODevice; -class HostList : public UrlFilter -{ -public: - - HostList(QIODevice *device); - ~HostList() = default; - - QString metadata(const QString &key) const override; - int ruleCount() const; - std::pair match(const QUrl &firstParty, const QUrl &requestUrl, QWebEngineUrlRequestInfo::ResourceType type) const override; - -protected: - void parseLine(const QString &line); - -private: - QHash m_metadata; - - struct Rule { - UrlFilter::MatchResult action = UrlFilter::NotMatched; - uint domainHash; - QString redirect; - }; - - std::vector rules; -}; - -#endif // SMOLBOTE_URLFILTER_HOSTLIST diff --git a/lib/urlfilter/matcher.h b/lib/urlfilter/matcher.h deleted file mode 100644 index 6696958..0000000 --- a/lib/urlfilter/matcher.h +++ /dev/null @@ -1,109 +0,0 @@ -/* - * This file is part of smolbote. It's copyrighted by the contributors recorded - * in the version control history of the file, available from its original - * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote - * - * SPDX-License-Identifier: GPL-3.0 - */ - -#ifndef SMOLBOTE_URLFILTER_MATCHER -#define SMOLBOTE_URLFILTER_MATCHER - -#include -#include -#include -#include -#include -#include - -/** An interface class so we can use templated ContentsMatcher interchangeably - */ -class Matcher -{ -public: - virtual ~Matcher() = default; - - virtual void setCaseSensitive(bool matchCase) = 0; - virtual bool hasMatch(const QString &where) const = 0; -}; - -template -class ContentsMatcher : public Matcher -{ -public: - ContentsMatcher(const QString &pattern, UrlFilter::MatchType type) - : patternLength(pattern.length()) - , matchType(type) - { - if constexpr(std::is_same_v) { - matcher.setPatternOptions(matcher.patternOptions() | QRegularExpression::CaseInsensitiveOption); - matcher.setPattern(pattern); - } else if constexpr(std::is_same_v) { - matcher.setCaseSensitivity(Qt::CaseInsensitive); - matcher.setPattern(pattern); - } else if constexpr(std::is_same_v) { - matcher = QUrl::fromUserInput(pattern).host(); - } - } - ~ContentsMatcher() = default; - - void setCaseSensitive(bool matchCase) override - { - if constexpr(std::is_same_v) { - auto options = matcher.patternOptions(); - options.setFlag(QRegularExpression::CaseInsensitiveOption, !matchCase); - matcher.setPatternOptions(options); - - } else if constexpr(std::is_same_v) { - matcher.setCaseSensitivity(matchCase ? Qt::CaseSensitive : Qt::CaseInsensitive); - } - } - - bool hasMatch(const QString &where) const override - { - if constexpr(std::is_same_v) { - switch (matchType) { - case UrlFilter::InvalidMatch: - case UrlFilter::RegularExpressionMatch: - case UrlFilter::DomainMatch: - qWarning("ContentsMatcher is a String Matcher, but not doing string matching!"); - return false; - - case UrlFilter::StringContains: - return (matcher.indexIn(where) != -1); - - case UrlFilter::StringStartsWith: - return (matcher.indexIn(where) == 0); - - case UrlFilter::StringEndsWith: - return (matcher.indexIn(where) == where.length() - patternLength); - - case UrlFilter::StringEquals: - return (matcher.indexIn(where) == 0) && (patternLength == where.length()); - } - - } else if constexpr(std::is_same_v) { - if(matchType != UrlFilter::RegularExpressionMatch) - qWarning("ContentsMatcher is a regular expression, but not doing a regular expression match!"); - return matcher.match(where).hasMatch(); - } else if constexpr(std::is_same_v) { - // TODO: fix - if(matchType == UrlFilter::DomainMatch) { -// qDebug("matching %s", qUtf8Printable(QUrl(where).host())); - return QUrl(where).host().endsWith(matcher); - } else - return matcher == where; - } - - qWarning("Matcher has no backend, returning false"); - return false; - } - -private: - const int patternLength; - const UrlFilter::MatchType matchType; - T matcher; -}; - -#endif // SMOLBOTE_URLFILTER_MATCHER - diff --git a/lib/urlfilter/meson.build b/lib/urlfilter/meson.build deleted file mode 100644 index 2591028..0000000 --- a/lib/urlfilter/meson.build +++ /dev/null @@ -1,26 +0,0 @@ -urlfilter_lib = static_library('urlfilter', - ['urlfilter.h', 'matcher.h', - 'hostlist/hostlist.cpp', 'hostlist/hostlist.h', - 'adblock/adblocklist.cpp', 'adblock/adblocklist.h', 'adblock/parser.cpp', 'adblock/parser.h'], - dependencies: dep_qt5 -) - -dep_urlfilter = declare_dependency( - include_directories: include_directories('.'), - link_with: urlfilter_lib -) - -if dep_gtest.found() - test('urlfilter: matcher', - executable('urlfilter-matcher', dependencies: [dep_qt5, dep_gtest, dep_urlfilter], sources: ['test/matcher.cpp']), - workdir: meson.current_source_dir() / 'test' - ) - test('urlfilter: host list', - executable('urlfilter-hostlist', dependencies: [dep_qt5, dep_gtest, dep_urlfilter], sources: ['test/hostlist.cpp']), - workdir: meson.current_source_dir() / 'test' - ) - test('urlfilter: adblock list', - executable('urlfilter-adblocklist', dependencies: [dep_qt5, dep_gtest, dep_urlfilter], sources: ['test/adblock.cpp']), - workdir: meson.current_source_dir() / 'test' - ) -endif diff --git a/lib/urlfilter/test/adblock.cpp b/lib/urlfilter/test/adblock.cpp deleted file mode 100644 index ecb94ee..0000000 --- a/lib/urlfilter/test/adblock.cpp +++ /dev/null @@ -1,88 +0,0 @@ -#include "urlfilter.h" -#include "adblock/adblocklist.h" -#include -#include - -AdBlockList *list = nullptr; - -TEST(AdBlockList, MetaData) { - EXPECT_STREQ(qUtf8Printable(list->metadata("Homepage")), "http://example.com/"); - EXPECT_STREQ(qUtf8Printable(list->metadata("Title")), "FooList"); - EXPECT_STREQ(qUtf8Printable(list->metadata("Expires")), "5 days"); - EXPECT_STREQ(qUtf8Printable(list->metadata("Redirect")), "http://example.com/list.txt"); - EXPECT_STREQ(qUtf8Printable(list->metadata("Version")), "1234"); -} - -TEST(AdBlockList, BasicFilter) { - // Rule: /banner/*/img^ - EXPECT_EQ(list->match(QUrl(), QUrl("http://example.com/banner/foo/img"), QWebEngineUrlRequestInfo::ResourceTypeImage).first, UrlFilter::Block); - EXPECT_EQ(list->match(QUrl(), QUrl("http://example.com/banner/foo/bar/img?param"), QWebEngineUrlRequestInfo::ResourceTypeImage).first, UrlFilter::Block); - EXPECT_EQ(list->match(QUrl(), QUrl("http://example.com/banner//img/foo"), QWebEngineUrlRequestInfo::ResourceTypeImage).first, UrlFilter::Block); - - EXPECT_EQ(list->match(QUrl(), QUrl("http://example.com/banner/foo.png"), QWebEngineUrlRequestInfo::ResourceTypeImage).first, UrlFilter::NotMatched); - EXPECT_EQ(list->match(QUrl(), QUrl("http://example.com/banner/img"), QWebEngineUrlRequestInfo::ResourceTypeImage).first, UrlFilter::NotMatched); - EXPECT_EQ(list->match(QUrl(), QUrl("http://example.com/banner/foo/imgraph"), QWebEngineUrlRequestInfo::ResourceTypeImage).first, UrlFilter::NotMatched); - EXPECT_EQ(list->match(QUrl(), QUrl("http://example.com/banner/foo/img.gif"), QWebEngineUrlRequestInfo::ResourceTypeImage).first, UrlFilter::NotMatched); - - EXPECT_EQ(list->match(QUrl(), QUrl("http://example.com/banner/ads/img.png"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::NotMatched); -} - -TEST(AdBlockList, MatchBeginningEnd) { - // Rule: |http://beginning-pattern.com - EXPECT_EQ(list->match(QUrl(), QUrl("http://beginning-pattern.com"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::Block); - EXPECT_EQ(list->match(QUrl(), QUrl("https://beginning-pattern.com"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::NotMatched); - // Rule: end-pattern| - EXPECT_EQ(list->match(QUrl(), QUrl("https://endpattern.com/end-pattern"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::Block); - EXPECT_EQ(list->match(QUrl(), QUrl("https://endpattern.com/end-pattern/foo"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::NotMatched); -} - -TEST(AdBlockList, Domain) { - // Rule: ||ads.example.com^ - EXPECT_EQ(list->match(QUrl(), QUrl("http://ads.example.com/foo.gif"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::Block); - EXPECT_EQ(list->match(QUrl(), QUrl("http://server1.ads.example.com/foo.gif"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::Block); - EXPECT_EQ(list->match(QUrl(), QUrl("https://ads.example.com:8000/"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::Block); - - EXPECT_EQ(list->match(QUrl(), QUrl("http://ads.example.com.ua/foo.gif"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::NotMatched); - EXPECT_EQ(list->match(QUrl(), QUrl("http://example.com/redirect/http://ads.example.com/"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::NotMatched); -} - -TEST(AdBlockList, RegularExpression) { - // Rule: /banner\d+/ - EXPECT_EQ(list->match(QUrl(), QUrl("http://example.com/banner123"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::Block); - EXPECT_EQ(list->match(QUrl(), QUrl("http://example.com/banner321"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::Block); - EXPECT_EQ(list->match(QUrl(), QUrl("http://example.com/banners"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::NotMatched); -} - -TEST(AdBlockList, MatchCase) { - // Rule: matchThisCase$match-case - EXPECT_EQ(list->match(QUrl(), QUrl("http://matchcase.com/matchThisCase"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::Block); - EXPECT_EQ(list->match(QUrl(), QUrl("http://matchcase.com/MatchThisCase"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::NotMatched); -} - -TEST(AdBlockList, DomainOption) { - // Rule: domain-limited-string$domain=example.com - EXPECT_EQ(list->match(QUrl("https://example.com"), QUrl("https://example.com/domain-limited-string/foo"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::Block); - EXPECT_EQ(list->match(QUrl("https://example.com"), QUrl("https://example.com/another-domain-string/foo"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::NotMatched); - EXPECT_EQ(list->match(QUrl("https://another.com"), QUrl("https://example.com/domain-limited-string/foo"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::NotMatched); - - //Rule: exception-limited-string$domain=~example.com - EXPECT_EQ(list->match(QUrl("https://another.com"), QUrl("https://example.com/exception-limited-string/foo"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::Block); - EXPECT_EQ(list->match(QUrl("https://example.com"), QUrl("https://example.com/exception-limited-string/foo"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::NotMatched); -} - -int main(int argc, char **argv) { - QFile f("adblock.txt"); - if(!f.open(QIODevice::ReadOnly | QIODevice::Text)) { - qDebug("Could not open list"); - return -1; - } - - list = new AdBlockList(&f); - f.close(); - - qDebug("Parsed %i rules", list->ruleCount()); - - testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} - diff --git a/lib/urlfilter/test/adblock.txt b/lib/urlfilter/test/adblock.txt deleted file mode 100644 index 635ce09..0000000 --- a/lib/urlfilter/test/adblock.txt +++ /dev/null @@ -1,26 +0,0 @@ -! Homepage: http://example.com/ -! Title: FooList -! Expires: 5 days -! Redirect: http://example.com/list.txt -! Version: 1234 - -/banner/*/img^ -||ads.example.com^ -|http://example.com/| -/banner\d+/ - -! match beginning -||http://beginning-pattern.com -! match end -end-pattern| - -! options -! match-case -matchThisCase$match-case - -! domain limiting -! only apply this filter on this domain -domain-limited-string$domain=example.com -! apply this filter to all domains but the listed one -exception-limited-string$domain=~example.com - diff --git a/lib/urlfilter/test/hostlist.cpp b/lib/urlfilter/test/hostlist.cpp deleted file mode 100644 index 041cd5f..0000000 --- a/lib/urlfilter/test/hostlist.cpp +++ /dev/null @@ -1,34 +0,0 @@ -#include -#include "hostlist/hostlist.h" -#include - -HostList *list = nullptr; - -TEST(AdBlockList, Block) { - EXPECT_EQ(list->match(QUrl(), QUrl::fromUserInput("blockeddomain.com"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::Block); - EXPECT_EQ(list->match(QUrl(), QUrl::fromUserInput("blockeddomain.first"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::Block); - EXPECT_EQ(list->match(QUrl(), QUrl::fromUserInput("blockeddomain.second"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::Block); - - const auto r = list->match(QUrl(), QUrl::fromUserInput("localhost.localdomain"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame); - EXPECT_EQ(r.first, UrlFilter::Redirect); - EXPECT_EQ(r.second, QString("127.0.0.1")); - - EXPECT_EQ(list->match(QUrl(), QUrl::fromUserInput("other.domain"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::NotMatched); -} - -int main(int argc, char **argv) { - QFile f("hostlist.txt"); - if(!f.open(QIODevice::ReadOnly | QIODevice::Text)) { - qDebug("Could not open list"); - return -1; - } - - list = new HostList(&f); - f.close(); - - qDebug("Parsed %i rules", list->ruleCount()); - - testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} - diff --git a/lib/urlfilter/test/hostlist.txt b/lib/urlfilter/test/hostlist.txt deleted file mode 100644 index a0b4e5c..0000000 --- a/lib/urlfilter/test/hostlist.txt +++ /dev/null @@ -1,6 +0,0 @@ -# This is a comment, and after it comes a blank line - -127.0.0.1 localhost.localdomain - -0.0.0.0 blockeddomain.com -0.0.0.0 blockeddomain.first blockeddomain.second diff --git a/lib/urlfilter/test/matcher.cpp b/lib/urlfilter/test/matcher.cpp deleted file mode 100644 index 1c1efbf..0000000 --- a/lib/urlfilter/test/matcher.cpp +++ /dev/null @@ -1,42 +0,0 @@ -#include "urlfilter.h" -#include "matcher.h" -#include - -TEST(Matcher, StringContains) { - ContentsMatcher matcher("spam-pattern", UrlFilter::StringContains); - EXPECT_TRUE(matcher.hasMatch("this string contains a spam-pattern")); - EXPECT_FALSE(matcher.hasMatch("this string does not contain the pattern")); -} - -TEST(Matcher, StringStartsWith) { - ContentsMatcher matcher("beginning", UrlFilter::StringStartsWith); - EXPECT_TRUE(matcher.hasMatch("beginning this string is the pattern")); - EXPECT_FALSE(matcher.hasMatch("ending this string is the pattern, the word beginning")); - EXPECT_FALSE(matcher.hasMatch("this would be a string where the pattern cannot be found")); -} - -TEST(Matcher, StringEndsWith) { - ContentsMatcher matcher("ending", UrlFilter::StringEndsWith); - EXPECT_TRUE(matcher.hasMatch("this string has the proper ending")); - EXPECT_FALSE(matcher.hasMatch("and this string doesn't")); -} - -TEST(Matcher, StringEquals) { - ContentsMatcher matcher("string-to-match", UrlFilter::StringEquals); - EXPECT_TRUE(matcher.hasMatch("string-to-match")); - EXPECT_FALSE(matcher.hasMatch("same-len-string")); - EXPECT_FALSE(matcher.hasMatch("not the string-to-match")); -} - -TEST(Matcher, RegularExpression) { - ContentsMatcher matcher("banner\\d+", UrlFilter::RegularExpressionMatch); - EXPECT_TRUE(matcher.hasMatch("http://another.com/banner123")); - EXPECT_TRUE(matcher.hasMatch("http://another.com/banner321")); - EXPECT_FALSE(matcher.hasMatch("http://another.com/banners")); - -} - -int main(int argc, char **argv) { - testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/lib/urlfilter/urlfilter.h b/lib/urlfilter/urlfilter.h deleted file mode 100644 index e15122a..0000000 --- a/lib/urlfilter/urlfilter.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * This file is part of smolbote. It's copyrighted by the contributors recorded - * in the version control history of the file, available from its original - * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote - * - * SPDX-License-Identifier: GPL-3.0 - */ - -#include -#include -#include -#include - -#ifndef SMOLBOTE_URLFILTER_FILTER -#define SMOLBOTE_URLFILTER_FILTER - -class UrlFilter -{ -public: - enum MatchResult { - NotMatched, - Allow, - Block, - Redirect - }; - - enum MatchType { - InvalidMatch, - RegularExpressionMatch, - StringContains, - StringStartsWith, - StringEndsWith, - StringEquals, - DomainMatch - }; - - virtual ~UrlFilter() = default; - - virtual QString metadata(const QString &key) const = 0; - virtual std::pair match(const QUrl &firstParty, const QUrl &requestUrl, QWebEngineUrlRequestInfo::ResourceType type) const = 0; -}; - -#endif // SMOLBOTE_URLFILTER_FILTER -- cgit v1.2.1