aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorAqua-sama <aqua@iserlohn-fortress.net>2020-02-10 20:58:39 +0200
committerAqua-sama <aqua@iserlohn-fortress.net>2020-05-30 11:01:35 +0300
commitd1287f43964633035938f4f4d4133bb6d9da7b3e (patch)
treed09efa4074815c20be9bd6348203fe4336dfe716 /lib
parentFix segfault in release build (diff)
downloadsmolbote-d1287f43964633035938f4f4d4133bb6d9da7b3e.tar.xz
staging: smolblok
smolblok is a replacement for the current lib/urlfilter AdBlockPlus and hostlist format filter parser. It is a library that uses plugins to provide support for different filter formats. staging/adblock: AdBlockPlus parser plugin plugins/smolblok_hostlist: hostlist format parser plugin Headers will be installed to include/smolbote/ Remove lib/urlfilter
Diffstat (limited to 'lib')
-rw-r--r--lib/configuration/meson.build3
-rw-r--r--lib/session_formats/session_json.hpp2
-rw-r--r--lib/urlfilter/adblock/adblocklist.cpp188
-rw-r--r--lib/urlfilter/adblock/adblocklist.h42
-rw-r--r--lib/urlfilter/adblock/parser.cpp75
-rw-r--r--lib/urlfilter/adblock/parser.h14
-rw-r--r--lib/urlfilter/hostlist/hostlist.cpp79
-rw-r--r--lib/urlfilter/hostlist/hostlist.h44
-rw-r--r--lib/urlfilter/matcher.h109
-rw-r--r--lib/urlfilter/meson.build26
-rw-r--r--lib/urlfilter/test/adblock.cpp88
-rw-r--r--lib/urlfilter/test/adblock.txt26
-rw-r--r--lib/urlfilter/test/hostlist.cpp34
-rw-r--r--lib/urlfilter/test/hostlist.txt6
-rw-r--r--lib/urlfilter/test/matcher.cpp42
-rw-r--r--lib/urlfilter/urlfilter.h43
16 files changed, 3 insertions, 818 deletions
diff --git a/lib/configuration/meson.build b/lib/configuration/meson.build
index 0ef6f5f..5e3e4b2 100644
--- a/lib/configuration/meson.build
+++ b/lib/configuration/meson.build
@@ -16,6 +16,7 @@ if meson.get_compiler('cpp').has_multi_arguments('-g', '-fsanitize=fuzzer')
executable('configuration-fuzzer',
sources: 'configuration.cpp',
cpp_args: [ '-g', '-fsanitize=fuzzer', '-DNO_QT_SPEC', '-DFUZZER' ],
- link_args: [ '-fsanitize=fuzzer' ])
+ link_args: [ '-fsanitize=fuzzer' ]
# args: [ '-seed=1', '-max_total_time=24', meson.current_source_dir()/'test/corpus' ]
+)
endif
diff --git a/lib/session_formats/session_json.hpp b/lib/session_formats/session_json.hpp
index 3332229..142d9ef 100644
--- a/lib/session_formats/session_json.hpp
+++ b/lib/session_formats/session_json.hpp
@@ -10,7 +10,7 @@
#define SESSION_JSON_HPP
#include <QJsonObject>
-#include <session.hpp>
+#include "smolbote/session.hpp"
class JsonSession : public Session
{
diff --git a/lib/urlfilter/adblock/adblocklist.cpp b/lib/urlfilter/adblock/adblocklist.cpp
deleted file mode 100644
index 3be21bd..0000000
--- a/lib/urlfilter/adblock/adblocklist.cpp
+++ /dev/null
@@ -1,188 +0,0 @@
-/*
- * This file is part of smolbote. It's copyrighted by the contributors recorded
- * in the version control history of the file, available from its original
- * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote
- *
- * SPDX-License-Identifier: GPL-3.0
- */
-
-#include "adblocklist.h"
-#include "parser.h"
-#include <QIODevice>
-#include <QTextStream>
-#include <QDebug>
-
-AdBlockList::AdBlockList(QIODevice *device)
-{
- Q_ASSERT(device->isOpen());
-
- QTextStream list(device);
- while (!list.atEnd()) {
- parseLine(list.readLine());
- }
-
- qDebug() << m_metadata;
-}
-
-AdBlockList::~AdBlockList()
-{
- for(Rule &r : rules) {
- delete r.matcher;
- }
-}
-
-QString AdBlockList::metadata(const QString& key) const
-{
- return m_metadata.value(key);
-}
-
-int AdBlockList::ruleCount() const
-{
- return rules.size();
-}
-
-std::pair<UrlFilter::MatchResult, QString> AdBlockList::match(const QUrl& firstParty, const QUrl& requestUrl, QWebEngineUrlRequestInfo::ResourceType type) const
-{
- const QString domain = firstParty.host();
- const QString request = requestUrl.toString();
-
- for(const Rule &r : rules) {
- // if there are options specified, but not the one we need
- if(!r.options.isEmpty() && !r.options.contains(type))
- continue;
-
- if(r.disabledOn.contains(domain))
- continue;
-
- if(!r.enabledOn.isEmpty() && !r.enabledOn.contains(domain))
- continue;
-
- if(r.matcher->hasMatch(request))
- return std::make_pair(r.action, QString());
- }
-
- return std::make_pair(UrlFilter::NotMatched, QString());
-}
-
-void AdBlockList::parseLine(const QString& line)
-{
- QString parsedLine = line.trimmed();
-
- if(parsedLine.isEmpty())
- return;
-
- if(parsedLine.startsWith(QLatin1String("!"))) {
- const auto comment = parseComment(parsedLine);
-
- if(comment) {
- const auto key = comment.value().first;
- if(keys.contains(key))
- m_metadata[key] = comment.value().second;
- }
-
- return;
- }
-
- // css rule -> filterleaves cannot do element blocking
- if(parsedLine.contains(QLatin1String("##")) || parsedLine.contains(QLatin1String("#@#"))) {
- qDebug("TODO: %s", qUtf8Printable(parsedLine));
- return;
- }
-
- Rule r;
- r.action = UrlFilter::Block;
-
- // exception rules
- if(parsedLine.startsWith(QLatin1String("@@"))) {
- r.action = UrlFilter::Allow;
- parsedLine.remove(0, 2);
- }
-
- bool matchCase = false;
-
- // parse options
- {
- const int sepPos = parsedLine.indexOf(QLatin1String("$"));
- if(sepPos != -1) {
- const auto options = parsedLine.mid(sepPos + 1).split(QLatin1String(","));
- parsedLine = parsedLine.mid(0, sepPos);
-
- for(const QString &option : options) {
- if(option.startsWith(QLatin1String("domain"))) {
- const auto domainList = option.mid(7).split(QLatin1String("|"));
-
- for(const QString &domain : domainList) {
- if(domain.startsWith(QLatin1String("~"))) {
- r.disabledOn.append(domain.mid(1));
- } else {
- r.enabledOn.append(domain);
- }
- }
- } else if(option.endsWith(QLatin1String("match-case"))) {
- matchCase = !option.startsWith(QLatin1String("~"));
-
- } else {
- const auto pair = parseResourceOption(option);
- if(pair)
- r.options.insert(pair.value().first, pair.value().second);
- }
- }
- }
- }
-
- if(parsedLine.startsWith(QLatin1String("/")) && parsedLine.endsWith(QLatin1String("/"))) {
- // regular expression rule
- parsedLine = parsedLine.mid(1, parsedLine.length() - 2);
- r.matcher = new ContentsMatcher<QRegularExpression>(parsedLine, UrlFilter::RegularExpressionMatch);
-
- } else if(parsedLine.startsWith(QLatin1String("||")) && parsedLine.endsWith(QLatin1String("^"))) {
- parsedLine = parsedLine.mid(2, parsedLine.length() - 3);
- r.matcher = new ContentsMatcher<QString>(parsedLine, UrlFilter::DomainMatch);
-
- } else if(parsedLine.startsWith(QLatin1String("|")) && parsedLine.endsWith(QLatin1String("|"))) {
- // string equals rule
- parsedLine = parsedLine.mid(1, parsedLine.length() - 2);
- r.matcher = new ContentsMatcher<QStringMatcher>(parsedLine, UrlFilter::StringEquals);
-
- } else if(parsedLine.startsWith(QLatin1String("||"))) {
- // string starts with rule
- parsedLine = parsedLine.mid(2);
- r.matcher = new ContentsMatcher<QStringMatcher>(parsedLine, UrlFilter::StringStartsWith);
-
- } else if(parsedLine.endsWith(QLatin1String("|"))) {
- // string ends with rule
- parsedLine.chop(1);
- r.matcher = new ContentsMatcher<QStringMatcher>(parsedLine, UrlFilter::StringEndsWith);
-
- } else {
- // generic contains rule
-
- // remove beginning and ending wildcards
- if(parsedLine.startsWith(QLatin1String("*")))
- parsedLine = parsedLine.mid(1);
-
- if(parsedLine.endsWith(QLatin1String("*")))
- parsedLine.chop(1);
-
- if(parsedLine.contains(QLatin1String("*")) || parsedLine.contains(QLatin1String("^"))) {
- // check for wildcards and translate to regexp
- // wildcard "*" - any number of characters
- // separator "^" - end, ? or /
- parsedLine.replace(QLatin1String("||"), QLatin1String("^\\w+://"));
- parsedLine.replace(QLatin1String("|"), QLatin1String("\\|"));
- parsedLine.replace(QLatin1String("*"), QLatin1String(".*"));
- parsedLine.replace(QLatin1String("^"), QLatin1String("($|\\?|\\/)"));
-
- r.matcher = new ContentsMatcher<QRegularExpression>(parsedLine, UrlFilter::RegularExpressionMatch);
-
- } else {
- r.matcher = new ContentsMatcher<QStringMatcher>(parsedLine, UrlFilter::StringContains);
- }
- }
-
- r.matcher->setCaseSensitive(matchCase);
-
- Q_CHECK_PTR(r.matcher);
- rules.emplace_back(std::move(r));
-}
-
diff --git a/lib/urlfilter/adblock/adblocklist.h b/lib/urlfilter/adblock/adblocklist.h
deleted file mode 100644
index ee41e11..0000000
--- a/lib/urlfilter/adblock/adblocklist.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * This file is part of smolbote. It's copyrighted by the contributors recorded
- * in the version control history of the file, available from its original
- * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote
- *
- * SPDX-License-Identifier: GPL-3.0
- */
-
-#include "urlfilter.h"
-#include "matcher.h"
-#include <QHash>
-#include <QWebEngineUrlRequestInfo>
-
-class QIODevice;
-class AdBlockList : public UrlFilter
-{
-public:
- // TODO: check if all keys are listed
- const QStringList keys = { "Version", "Title", "Last modified", "Expires", "Homepage", "Licence", "Redirect" };
-
- AdBlockList(QIODevice *device);
- ~AdBlockList();
-
- QString metadata(const QString &key) const override;
- int ruleCount() const;
- std::pair<MatchResult, QString> match(const QUrl &firstParty, const QUrl &requestUrl, QWebEngineUrlRequestInfo::ResourceType type) const override;
-
-protected:
- void parseLine(const QString &line);
-
-private:
- QHash<QString, QString> m_metadata;
-
- struct Rule {
- UrlFilter::MatchResult action = UrlFilter::NotMatched;
- Matcher *matcher;
- QStringList enabledOn, disabledOn;
- QHash<QWebEngineUrlRequestInfo::ResourceType, bool> options;
- };
-
- std::vector<Rule> rules;
-};
diff --git a/lib/urlfilter/adblock/parser.cpp b/lib/urlfilter/adblock/parser.cpp
deleted file mode 100644
index 68f895d..0000000
--- a/lib/urlfilter/adblock/parser.cpp
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * This file is part of smolbote. It's copyrighted by the contributors recorded
- * in the version control history of the file, available from its original
- * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote
- *
- * SPDX-License-Identifier: GPL-3.0
- */
-
-#include "parser.h"
-
-std::optional<std::pair<QString, QString>> parseComment(QString &line)
-{
- const QLatin1String separator(": ");
- if(line.contains(separator)) {
- const QStringList comment = line.mid(1).split(QLatin1String(": "));
- return std::make_pair(comment.at(0).trimmed(), comment.at(1).trimmed());
- } else
- return std::nullopt;
-}
-
-std::optional<std::pair<QWebEngineUrlRequestInfo::ResourceType, bool>> parseResourceOption(const QString &option)
-{
- const bool exception = !option.startsWith(QLatin1String("~"));
-
- if(option.endsWith(QLatin1String("script"))) {
- // external scripts loaded via HTML script tag
- return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeScript, exception);
-
- } else if(option.endsWith(QLatin1String("image"))) {
- // regular images, typically loaded via HTML img tag
- return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeImage, exception);
-
- } else if(option.endsWith(QLatin1String("stylesheet"))) {
- // external CSS stylesheet files
- return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeStylesheet, exception);
-
- } else if(option.endsWith(QLatin1String("object"))) {
- // content handled by browser plugins, e.g. Flash or Java
- return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeObject, exception);
-
- } else if(option.endsWith(QLatin1String("xmlhttprequest"))) {
- // requests started using the XMLHttpRequest object or fetch() API
- return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeXhr, exception);
-
- } else if(option.endsWith(QLatin1String("object-subrequest"))) {
- // requests started by plugins like Flash
- return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypePluginResource, exception);
-
- } else if(option.endsWith(QLatin1String("subdocument"))) {
- // embedded pages, usually included via HTML frames
- return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeSubFrame, exception);
-
- } else if(option.endsWith(QLatin1String("ping"))) {
- // requests started by <a ping> or navigator.sendBeacon()
- return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypePing, exception);
-
- } else if(option.endsWith(QLatin1String("websocket"))) {
- // requests initiated via WebSocket object
- qDebug("Resource type 'websocket' not available");
-
- } else if(option.endsWith(QLatin1String("webrtc"))) {
- // connections opened via RTCPeerConnection instances to ICE servers
- qDebug("Resource type 'webrtc' not available");
-
- } else if(option.endsWith(QLatin1String("document"))) {
- // the page itself
- return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeMainFrame, exception);
-
- } else if(option.endsWith(QLatin1String("other"))) {
- return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeUnknown, exception);
- }
-
- qDebug("TODO: %s", qUtf8Printable(option));
- return std::nullopt;
-}
diff --git a/lib/urlfilter/adblock/parser.h b/lib/urlfilter/adblock/parser.h
deleted file mode 100644
index c73a9cf..0000000
--- a/lib/urlfilter/adblock/parser.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/*
- * This file is part of smolbote. It's copyrighted by the contributors recorded
- * in the version control history of the file, available from its original
- * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote
- *
- * SPDX-License-Identifier: GPL-3.0
- */
-
-#include <QWebEngineUrlRequestInfo>
-#include <optional>
-#include <utility>
-
-std::optional<std::pair<QString, QString>> parseComment(QString &line);
-std::optional<std::pair<QWebEngineUrlRequestInfo::ResourceType, bool>> parseResourceOption(const QString &option);
diff --git a/lib/urlfilter/hostlist/hostlist.cpp b/lib/urlfilter/hostlist/hostlist.cpp
deleted file mode 100644
index bec79ea..0000000
--- a/lib/urlfilter/hostlist/hostlist.cpp
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * This file is part of smolbote. It's copyrighted by the contributors recorded
- * in the version control history of the file, available from its original
- * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote
- *
- * SPDX-License-Identifier: GPL-3.0
- */
-
-#include "hostlist.h"
-#include <QIODevice>
-#include <QTextStream>
-#include <QDebug>
-
-HostList::HostList(QIODevice *device)
-{
- Q_ASSERT(device->isOpen());
-
- QTextStream list(device);
- while (!list.atEnd()) {
- parseLine(list.readLine());
- }
-
- qDebug() << m_metadata;
-}
-
-QString HostList::metadata(const QString& key) const
-{
- return m_metadata.value(key);
-}
-
-int HostList::ruleCount() const
-{
- return rules.size();
-}
-
-std::pair<UrlFilter::MatchResult, QString> HostList::match(const QUrl& firstParty, const QUrl& requestUrl, QWebEngineUrlRequestInfo::ResourceType type) const
-{
- Q_UNUSED(firstParty);
- Q_UNUSED(type);
-
- const QString domain = requestUrl.host();
- const uint domainHash = qHash(domain);
-
- for(const Rule &r : rules) {
- if(r.domainHash == domainHash)
- return std::make_pair(r.action, r.redirect);
- }
-
- return std::make_pair(UrlFilter::NotMatched, QString());
-}
-
-void HostList::parseLine(const QString& line)
-{
- // check comment
- if(line.startsWith(QLatin1String("#")))
- return;
-
- QString parsedLine = line.trimmed();
-
- // malformed rule
- if(!parsedLine.contains(QLatin1String(" ")))
- return;
-
- const QStringList parts = parsedLine.split(QLatin1String(" "));
- const QString &redirect = parts.at(0);
- const auto action = (redirect == QLatin1String("0.0.0.0")) ? UrlFilter::Block : UrlFilter::Redirect;
-
- for(int i = 1; i < parts.size(); i++) {
- const QString &domain = parts.at(i);
- Rule r;
- r.action = action;
- r.domainHash = qHash(domain);
- if(action == UrlFilter::Redirect)
- r.redirect = redirect;
-
- rules.emplace_back(std::move(r));
- }
-}
-
diff --git a/lib/urlfilter/hostlist/hostlist.h b/lib/urlfilter/hostlist/hostlist.h
deleted file mode 100644
index d4a8d87..0000000
--- a/lib/urlfilter/hostlist/hostlist.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * This file is part of smolbote. It's copyrighted by the contributors recorded
- * in the version control history of the file, available from its original
- * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote
- *
- * SPDX-License-Identifier: GPL-3.0
- */
-
-#ifndef SMOLBOTE_URLFILTER_HOSTLIST
-#define SMOLBOTE_URLFILTER_HOSTLIST
-
-#include "urlfilter.h"
-#include <QHash>
-#include <vector>
-#include <QWebEngineUrlRequestInfo>
-
-class QIODevice;
-class HostList : public UrlFilter
-{
-public:
-
- HostList(QIODevice *device);
- ~HostList() = default;
-
- QString metadata(const QString &key) const override;
- int ruleCount() const;
- std::pair<MatchResult, QString> match(const QUrl &firstParty, const QUrl &requestUrl, QWebEngineUrlRequestInfo::ResourceType type) const override;
-
-protected:
- void parseLine(const QString &line);
-
-private:
- QHash<QString, QString> m_metadata;
-
- struct Rule {
- UrlFilter::MatchResult action = UrlFilter::NotMatched;
- uint domainHash;
- QString redirect;
- };
-
- std::vector<Rule> rules;
-};
-
-#endif // SMOLBOTE_URLFILTER_HOSTLIST
diff --git a/lib/urlfilter/matcher.h b/lib/urlfilter/matcher.h
deleted file mode 100644
index 6696958..0000000
--- a/lib/urlfilter/matcher.h
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * This file is part of smolbote. It's copyrighted by the contributors recorded
- * in the version control history of the file, available from its original
- * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote
- *
- * SPDX-License-Identifier: GPL-3.0
- */
-
-#ifndef SMOLBOTE_URLFILTER_MATCHER
-#define SMOLBOTE_URLFILTER_MATCHER
-
-#include <QUrl>
-#include <QString>
-#include <utility>
-#include <QRegularExpression>
-#include <QStringMatcher>
-#include <QWebEngineUrlRequestInfo>
-
-/** An interface class so we can use templated ContentsMatcher interchangeably
- */
-class Matcher
-{
-public:
- virtual ~Matcher() = default;
-
- virtual void setCaseSensitive(bool matchCase) = 0;
- virtual bool hasMatch(const QString &where) const = 0;
-};
-
-template <typename T>
-class ContentsMatcher : public Matcher
-{
-public:
- ContentsMatcher(const QString &pattern, UrlFilter::MatchType type)
- : patternLength(pattern.length())
- , matchType(type)
- {
- if constexpr(std::is_same_v<T, QRegularExpression>) {
- matcher.setPatternOptions(matcher.patternOptions() | QRegularExpression::CaseInsensitiveOption);
- matcher.setPattern(pattern);
- } else if constexpr(std::is_same_v<T, QStringMatcher>) {
- matcher.setCaseSensitivity(Qt::CaseInsensitive);
- matcher.setPattern(pattern);
- } else if constexpr(std::is_same_v<T, QString>) {
- matcher = QUrl::fromUserInput(pattern).host();
- }
- }
- ~ContentsMatcher() = default;
-
- void setCaseSensitive(bool matchCase) override
- {
- if constexpr(std::is_same_v<T, QRegularExpression>) {
- auto options = matcher.patternOptions();
- options.setFlag(QRegularExpression::CaseInsensitiveOption, !matchCase);
- matcher.setPatternOptions(options);
-
- } else if constexpr(std::is_same_v<T, QStringMatcher>) {
- matcher.setCaseSensitivity(matchCase ? Qt::CaseSensitive : Qt::CaseInsensitive);
- }
- }
-
- bool hasMatch(const QString &where) const override
- {
- if constexpr(std::is_same_v<T, QStringMatcher>) {
- switch (matchType) {
- case UrlFilter::InvalidMatch:
- case UrlFilter::RegularExpressionMatch:
- case UrlFilter::DomainMatch:
- qWarning("ContentsMatcher is a String Matcher, but not doing string matching!");
- return false;
-
- case UrlFilter::StringContains:
- return (matcher.indexIn(where) != -1);
-
- case UrlFilter::StringStartsWith:
- return (matcher.indexIn(where) == 0);
-
- case UrlFilter::StringEndsWith:
- return (matcher.indexIn(where) == where.length() - patternLength);
-
- case UrlFilter::StringEquals:
- return (matcher.indexIn(where) == 0) && (patternLength == where.length());
- }
-
- } else if constexpr(std::is_same_v<T, QRegularExpression>) {
- if(matchType != UrlFilter::RegularExpressionMatch)
- qWarning("ContentsMatcher is a regular expression, but not doing a regular expression match!");
- return matcher.match(where).hasMatch();
- } else if constexpr(std::is_same_v<T, QString>) {
- // TODO: fix
- if(matchType == UrlFilter::DomainMatch) {
-// qDebug("matching %s", qUtf8Printable(QUrl(where).host()));
- return QUrl(where).host().endsWith(matcher);
- } else
- return matcher == where;
- }
-
- qWarning("Matcher has no backend, returning false");
- return false;
- }
-
-private:
- const int patternLength;
- const UrlFilter::MatchType matchType;
- T matcher;
-};
-
-#endif // SMOLBOTE_URLFILTER_MATCHER
-
diff --git a/lib/urlfilter/meson.build b/lib/urlfilter/meson.build
deleted file mode 100644
index 2591028..0000000
--- a/lib/urlfilter/meson.build
+++ /dev/null
@@ -1,26 +0,0 @@
-urlfilter_lib = static_library('urlfilter',
- ['urlfilter.h', 'matcher.h',
- 'hostlist/hostlist.cpp', 'hostlist/hostlist.h',
- 'adblock/adblocklist.cpp', 'adblock/adblocklist.h', 'adblock/parser.cpp', 'adblock/parser.h'],
- dependencies: dep_qt5
-)
-
-dep_urlfilter = declare_dependency(
- include_directories: include_directories('.'),
- link_with: urlfilter_lib
-)
-
-if dep_gtest.found()
- test('urlfilter: matcher',
- executable('urlfilter-matcher', dependencies: [dep_qt5, dep_gtest, dep_urlfilter], sources: ['test/matcher.cpp']),
- workdir: meson.current_source_dir() / 'test'
- )
- test('urlfilter: host list',
- executable('urlfilter-hostlist', dependencies: [dep_qt5, dep_gtest, dep_urlfilter], sources: ['test/hostlist.cpp']),
- workdir: meson.current_source_dir() / 'test'
- )
- test('urlfilter: adblock list',
- executable('urlfilter-adblocklist', dependencies: [dep_qt5, dep_gtest, dep_urlfilter], sources: ['test/adblock.cpp']),
- workdir: meson.current_source_dir() / 'test'
- )
-endif
diff --git a/lib/urlfilter/test/adblock.cpp b/lib/urlfilter/test/adblock.cpp
deleted file mode 100644
index ecb94ee..0000000
--- a/lib/urlfilter/test/adblock.cpp
+++ /dev/null
@@ -1,88 +0,0 @@
-#include "urlfilter.h"
-#include "adblock/adblocklist.h"
-#include <gtest/gtest.h>
-#include <QFile>
-
-AdBlockList *list = nullptr;
-
-TEST(AdBlockList, MetaData) {
- EXPECT_STREQ(qUtf8Printable(list->metadata("Homepage")), "http://example.com/");
- EXPECT_STREQ(qUtf8Printable(list->metadata("Title")), "FooList");
- EXPECT_STREQ(qUtf8Printable(list->metadata("Expires")), "5 days");
- EXPECT_STREQ(qUtf8Printable(list->metadata("Redirect")), "http://example.com/list.txt");
- EXPECT_STREQ(qUtf8Printable(list->metadata("Version")), "1234");
-}
-
-TEST(AdBlockList, BasicFilter) {
- // Rule: /banner/*/img^
- EXPECT_EQ(list->match(QUrl(), QUrl("http://example.com/banner/foo/img"), QWebEngineUrlRequestInfo::ResourceTypeImage).first, UrlFilter::Block);
- EXPECT_EQ(list->match(QUrl(), QUrl("http://example.com/banner/foo/bar/img?param"), QWebEngineUrlRequestInfo::ResourceTypeImage).first, UrlFilter::Block);
- EXPECT_EQ(list->match(QUrl(), QUrl("http://example.com/banner//img/foo"), QWebEngineUrlRequestInfo::ResourceTypeImage).first, UrlFilter::Block);
-
- EXPECT_EQ(list->match(QUrl(), QUrl("http://example.com/banner/foo.png"), QWebEngineUrlRequestInfo::ResourceTypeImage).first, UrlFilter::NotMatched);
- EXPECT_EQ(list->match(QUrl(), QUrl("http://example.com/banner/img"), QWebEngineUrlRequestInfo::ResourceTypeImage).first, UrlFilter::NotMatched);
- EXPECT_EQ(list->match(QUrl(), QUrl("http://example.com/banner/foo/imgraph"), QWebEngineUrlRequestInfo::ResourceTypeImage).first, UrlFilter::NotMatched);
- EXPECT_EQ(list->match(QUrl(), QUrl("http://example.com/banner/foo/img.gif"), QWebEngineUrlRequestInfo::ResourceTypeImage).first, UrlFilter::NotMatched);
-
- EXPECT_EQ(list->match(QUrl(), QUrl("http://example.com/banner/ads/img.png"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::NotMatched);
-}
-
-TEST(AdBlockList, MatchBeginningEnd) {
- // Rule: |http://beginning-pattern.com
- EXPECT_EQ(list->match(QUrl(), QUrl("http://beginning-pattern.com"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::Block);
- EXPECT_EQ(list->match(QUrl(), QUrl("https://beginning-pattern.com"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::NotMatched);
- // Rule: end-pattern|
- EXPECT_EQ(list->match(QUrl(), QUrl("https://endpattern.com/end-pattern"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::Block);
- EXPECT_EQ(list->match(QUrl(), QUrl("https://endpattern.com/end-pattern/foo"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::NotMatched);
-}
-
-TEST(AdBlockList, Domain) {
- // Rule: ||ads.example.com^
- EXPECT_EQ(list->match(QUrl(), QUrl("http://ads.example.com/foo.gif"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::Block);
- EXPECT_EQ(list->match(QUrl(), QUrl("http://server1.ads.example.com/foo.gif"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::Block);
- EXPECT_EQ(list->match(QUrl(), QUrl("https://ads.example.com:8000/"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::Block);
-
- EXPECT_EQ(list->match(QUrl(), QUrl("http://ads.example.com.ua/foo.gif"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::NotMatched);
- EXPECT_EQ(list->match(QUrl(), QUrl("http://example.com/redirect/http://ads.example.com/"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::NotMatched);
-}
-
-TEST(AdBlockList, RegularExpression) {
- // Rule: /banner\d+/
- EXPECT_EQ(list->match(QUrl(), QUrl("http://example.com/banner123"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::Block);
- EXPECT_EQ(list->match(QUrl(), QUrl("http://example.com/banner321"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::Block);
- EXPECT_EQ(list->match(QUrl(), QUrl("http://example.com/banners"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::NotMatched);
-}
-
-TEST(AdBlockList, MatchCase) {
- // Rule: matchThisCase$match-case
- EXPECT_EQ(list->match(QUrl(), QUrl("http://matchcase.com/matchThisCase"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::Block);
- EXPECT_EQ(list->match(QUrl(), QUrl("http://matchcase.com/MatchThisCase"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::NotMatched);
-}
-
-TEST(AdBlockList, DomainOption) {
- // Rule: domain-limited-string$domain=example.com
- EXPECT_EQ(list->match(QUrl("https://example.com"), QUrl("https://example.com/domain-limited-string/foo"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::Block);
- EXPECT_EQ(list->match(QUrl("https://example.com"), QUrl("https://example.com/another-domain-string/foo"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::NotMatched);
- EXPECT_EQ(list->match(QUrl("https://another.com"), QUrl("https://example.com/domain-limited-string/foo"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::NotMatched);
-
- //Rule: exception-limited-string$domain=~example.com
- EXPECT_EQ(list->match(QUrl("https://another.com"), QUrl("https://example.com/exception-limited-string/foo"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::Block);
- EXPECT_EQ(list->match(QUrl("https://example.com"), QUrl("https://example.com/exception-limited-string/foo"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::NotMatched);
-}
-
-int main(int argc, char **argv) {
- QFile f("adblock.txt");
- if(!f.open(QIODevice::ReadOnly | QIODevice::Text)) {
- qDebug("Could not open list");
- return -1;
- }
-
- list = new AdBlockList(&f);
- f.close();
-
- qDebug("Parsed %i rules", list->ruleCount());
-
- testing::InitGoogleTest(&argc, argv);
- return RUN_ALL_TESTS();
-}
-
diff --git a/lib/urlfilter/test/adblock.txt b/lib/urlfilter/test/adblock.txt
deleted file mode 100644
index 635ce09..0000000
--- a/lib/urlfilter/test/adblock.txt
+++ /dev/null
@@ -1,26 +0,0 @@
-! Homepage: http://example.com/
-! Title: FooList
-! Expires: 5 days
-! Redirect: http://example.com/list.txt
-! Version: 1234
-
-/banner/*/img^
-||ads.example.com^
-|http://example.com/|
-/banner\d+/
-
-! match beginning
-||http://beginning-pattern.com
-! match end
-end-pattern|
-
-! options
-! match-case
-matchThisCase$match-case
-
-! domain limiting
-! only apply this filter on this domain
-domain-limited-string$domain=example.com
-! apply this filter to all domains but the listed one
-exception-limited-string$domain=~example.com
-
diff --git a/lib/urlfilter/test/hostlist.cpp b/lib/urlfilter/test/hostlist.cpp
deleted file mode 100644
index 041cd5f..0000000
--- a/lib/urlfilter/test/hostlist.cpp
+++ /dev/null
@@ -1,34 +0,0 @@
-#include <gtest/gtest.h>
-#include "hostlist/hostlist.h"
-#include <QFile>
-
-HostList *list = nullptr;
-
-TEST(AdBlockList, Block) {
- EXPECT_EQ(list->match(QUrl(), QUrl::fromUserInput("blockeddomain.com"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::Block);
- EXPECT_EQ(list->match(QUrl(), QUrl::fromUserInput("blockeddomain.first"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::Block);
- EXPECT_EQ(list->match(QUrl(), QUrl::fromUserInput("blockeddomain.second"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::Block);
-
- const auto r = list->match(QUrl(), QUrl::fromUserInput("localhost.localdomain"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame);
- EXPECT_EQ(r.first, UrlFilter::Redirect);
- EXPECT_EQ(r.second, QString("127.0.0.1"));
-
- EXPECT_EQ(list->match(QUrl(), QUrl::fromUserInput("other.domain"), QWebEngineUrlRequestInfo::ResourceTypeMainFrame).first, UrlFilter::NotMatched);
-}
-
-int main(int argc, char **argv) {
- QFile f("hostlist.txt");
- if(!f.open(QIODevice::ReadOnly | QIODevice::Text)) {
- qDebug("Could not open list");
- return -1;
- }
-
- list = new HostList(&f);
- f.close();
-
- qDebug("Parsed %i rules", list->ruleCount());
-
- testing::InitGoogleTest(&argc, argv);
- return RUN_ALL_TESTS();
-}
-
diff --git a/lib/urlfilter/test/hostlist.txt b/lib/urlfilter/test/hostlist.txt
deleted file mode 100644
index a0b4e5c..0000000
--- a/lib/urlfilter/test/hostlist.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-# This is a comment, and after it comes a blank line
-
-127.0.0.1 localhost.localdomain
-
-0.0.0.0 blockeddomain.com
-0.0.0.0 blockeddomain.first blockeddomain.second
diff --git a/lib/urlfilter/test/matcher.cpp b/lib/urlfilter/test/matcher.cpp
deleted file mode 100644
index 1c1efbf..0000000
--- a/lib/urlfilter/test/matcher.cpp
+++ /dev/null
@@ -1,42 +0,0 @@
-#include "urlfilter.h"
-#include "matcher.h"
-#include <gtest/gtest.h>
-
-TEST(Matcher, StringContains) {
- ContentsMatcher<QStringMatcher> matcher("spam-pattern", UrlFilter::StringContains);
- EXPECT_TRUE(matcher.hasMatch("this string contains a spam-pattern"));
- EXPECT_FALSE(matcher.hasMatch("this string does not contain the pattern"));
-}
-
-TEST(Matcher, StringStartsWith) {
- ContentsMatcher<QStringMatcher> matcher("beginning", UrlFilter::StringStartsWith);
- EXPECT_TRUE(matcher.hasMatch("beginning this string is the pattern"));
- EXPECT_FALSE(matcher.hasMatch("ending this string is the pattern, the word beginning"));
- EXPECT_FALSE(matcher.hasMatch("this would be a string where the pattern cannot be found"));
-}
-
-TEST(Matcher, StringEndsWith) {
- ContentsMatcher<QStringMatcher> matcher("ending", UrlFilter::StringEndsWith);
- EXPECT_TRUE(matcher.hasMatch("this string has the proper ending"));
- EXPECT_FALSE(matcher.hasMatch("and this string doesn't"));
-}
-
-TEST(Matcher, StringEquals) {
- ContentsMatcher<QStringMatcher> matcher("string-to-match", UrlFilter::StringEquals);
- EXPECT_TRUE(matcher.hasMatch("string-to-match"));
- EXPECT_FALSE(matcher.hasMatch("same-len-string"));
- EXPECT_FALSE(matcher.hasMatch("not the string-to-match"));
-}
-
-TEST(Matcher, RegularExpression) {
- ContentsMatcher<QRegularExpression> matcher("banner\\d+", UrlFilter::RegularExpressionMatch);
- EXPECT_TRUE(matcher.hasMatch("http://another.com/banner123"));
- EXPECT_TRUE(matcher.hasMatch("http://another.com/banner321"));
- EXPECT_FALSE(matcher.hasMatch("http://another.com/banners"));
-
-}
-
-int main(int argc, char **argv) {
- testing::InitGoogleTest(&argc, argv);
- return RUN_ALL_TESTS();
-}
diff --git a/lib/urlfilter/urlfilter.h b/lib/urlfilter/urlfilter.h
deleted file mode 100644
index e15122a..0000000
--- a/lib/urlfilter/urlfilter.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * This file is part of smolbote. It's copyrighted by the contributors recorded
- * in the version control history of the file, available from its original
- * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote
- *
- * SPDX-License-Identifier: GPL-3.0
- */
-
-#include <QUrl>
-#include <QString>
-#include <utility>
-#include <QWebEngineUrlRequestInfo>
-
-#ifndef SMOLBOTE_URLFILTER_FILTER
-#define SMOLBOTE_URLFILTER_FILTER
-
-class UrlFilter
-{
-public:
- enum MatchResult {
- NotMatched,
- Allow,
- Block,
- Redirect
- };
-
- enum MatchType {
- InvalidMatch,
- RegularExpressionMatch,
- StringContains,
- StringStartsWith,
- StringEndsWith,
- StringEquals,
- DomainMatch
- };
-
- virtual ~UrlFilter() = default;
-
- virtual QString metadata(const QString &key) const = 0;
- virtual std::pair<MatchResult, QString> match(const QUrl &firstParty, const QUrl &requestUrl, QWebEngineUrlRequestInfo::ResourceType type) const = 0;
-};
-
-#endif // SMOLBOTE_URLFILTER_FILTER