aboutsummaryrefslogtreecommitdiff
path: root/lib/urlfilter/formats
diff options
context:
space:
mode:
Diffstat (limited to 'lib/urlfilter/formats')
-rw-r--r--lib/urlfilter/formats/adblocklist.cpp95
-rw-r--r--lib/urlfilter/formats/adblocklist.h32
-rw-r--r--lib/urlfilter/formats/adblockrule.cpp63
-rw-r--r--lib/urlfilter/formats/adblockrule.h113
-rw-r--r--lib/urlfilter/formats/adblockrule_parse.cpp181
-rw-r--r--lib/urlfilter/formats/adblockrule_parse.h17
-rw-r--r--lib/urlfilter/formats/hostlistrule.cpp29
-rw-r--r--lib/urlfilter/formats/hostlistrule.h27
8 files changed, 0 insertions, 557 deletions
diff --git a/lib/urlfilter/formats/adblocklist.cpp b/lib/urlfilter/formats/adblocklist.cpp
deleted file mode 100644
index 772c252..0000000
--- a/lib/urlfilter/formats/adblocklist.cpp
+++ /dev/null
@@ -1,95 +0,0 @@
-#include "adblocklist.h"
-
-AdBlockList::AdBlockList()
-{
-}
-
-QString AdBlockList::metadata(const QString &key) const
-{
- return m_metadata.value(key, QString());
-}
-
-FilterLeaf::Action AdBlockList::match(const QUrl &firstParty, const QUrl &requestUrl, QWebEngineUrlRequestInfo::ResourceType type) const
-{
- const QString request = requestUrl.toString();
-
- for(auto &filter : m_rules) {
- if(filter.matcher->hasMatch(request))
- return filter.action;
- }
- return FilterLeaf::NotMatched;
-}
-
-bool AdBlockList::parseLine(const QString &line)
-{
- // remove whitespace from start/end of the line
- QString parsedLine = line.trimmed();
-
- // check if the line is empty
- if(parsedLine.isEmpty())
- return false;
-
- // parse comment
- if(parsedLine.startsWith(QLatin1Literal("!")))
- return parseComment(parsedLine);
-
- Filter filter;
-
- // exception rules
- if(parsedLine.startsWith(QLatin1Literal("@@"))) {
- filter.action = FilterLeaf::Allow;
- parsedLine.remove(0, 2);
- }
-
- // remove '*' at the beginning and the end
- if(parsedLine.startsWith(QLatin1Literal("*")))
- parsedLine = parsedLine.mid(1);
- if(parsedLine.endsWith(QLatin1Literal("*")))
- parsedLine.chop(1);
-
- if(parsedLine.startsWith(QLatin1Literal("/")) && parsedLine.endsWith(QLatin1Literal("/"))) {
- // regular expression rule
- parsedLine = parsedLine.mid(1, parsedLine.length() - 2);
- filter.matcher = new ContentsMatcher<QRegularExpression>(parsedLine, FilterLeaf::RegularExpressionMatch);
-
- } else if(parsedLine.contains(QLatin1Literal("*"))) {
- parsedLine = QRegularExpression::wildcardToRegularExpression(parsedLine);
- filter.matcher = new ContentsMatcher<QRegularExpression>(parsedLine, FilterLeaf::RegularExpressionMatch);
-
- } else if(parsedLine.startsWith(QLatin1Literal("||")) && parsedLine.endsWith(QLatin1Literal("^"))) {
-// matchType = FilterLeaf::DomainMatch;
- parsedLine = parsedLine.mid(2, parsedLine.length() - 3);
- filter.matcher = new ContentsMatcher<QString>(parsedLine, FilterLeaf::DomainMatch);
-
- } else if(parsedLine.startsWith(QLatin1Literal("|")) && parsedLine.endsWith(QLatin1Literal("|"))) {
- // string equals rule
- parsedLine = parsedLine.mid(1, parsedLine.length() - 2);
- filter.matcher = new ContentsMatcher<QStringMatcher>(parsedLine, FilterLeaf::StringEquals);
-
- } else if(parsedLine.startsWith(QLatin1Literal("||"))) {
- // string starts with rule
- parsedLine = parsedLine.mid(2);
- filter.matcher = new ContentsMatcher<QStringMatcher>(parsedLine, FilterLeaf::StringStartsWith);
-
- } else if(parsedLine.endsWith(QLatin1Literal("|"))) {
- // string ends with rule
- parsedLine.chop(1);
- filter.matcher = new ContentsMatcher<QStringMatcher>(parsedLine, FilterLeaf::StringEndsWith);
-
- } else {
- // generic contains rule
- filter.matcher = new ContentsMatcher<QStringMatcher>(parsedLine, FilterLeaf::StringContains);
- }
-
-
- Q_CHECK_PTR(filter.matcher);
- m_rules.emplace_back(std::move(filter));
- return true;
-}
-
-bool AdBlockList::parseComment(const QString &commentLine)
-{
- const QStringList comment = commentLine.mid(1).split(QLatin1Literal(": "));
- m_metadata[comment.at(0).trimmed()] = comment.at(1).trimmed();
- return true;
-}
diff --git a/lib/urlfilter/formats/adblocklist.h b/lib/urlfilter/formats/adblocklist.h
deleted file mode 100644
index 34a2120..0000000
--- a/lib/urlfilter/formats/adblocklist.h
+++ /dev/null
@@ -1,32 +0,0 @@
-#ifndef ADBLOCKLIST_H
-#define ADBLOCKLIST_H
-
-#include <QHash>
-#include "adblockrule.h"
-
-class AdBlockList
-{
-public:
- AdBlockList();
-
- QString metadata(const QString &key) const;
- FilterLeaf::Action match(const QUrl &firstParty, const QUrl &requestUrl, QWebEngineUrlRequestInfo::ResourceType type = QWebEngineUrlRequestInfo::ResourceTypeUnknown) const;
-
- bool parseLine(const QString &line);
-
-protected:
- bool parseComment(const QString &commentLine);
-
-private:
- struct Filter
- {
- FilterLeaf::Action action = FilterLeaf::Block;
- Matcher *matcher;
- };
-
- QHash<QString, QString> m_metadata;
- //QMap<QString, Filter> m_rules;
- std::vector<Filter> m_rules;
-};
-
-#endif // ADBLOCKLIST_H
diff --git a/lib/urlfilter/formats/adblockrule.cpp b/lib/urlfilter/formats/adblockrule.cpp
deleted file mode 100644
index 60e817f..0000000
--- a/lib/urlfilter/formats/adblockrule.cpp
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * This file is part of smolbote. It's copyrighted by the contributors recorded
- * in the version control history of the file, available from its original
- * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote
- *
- * SPDX-License-Identifier: GPL-3.0
- */
-
-#include "adblockrule.h"
-#include <QRegExp>
-#include <QStringMatcher>
-
-AdBlockRule::AdBlockRule(FilterLeaf::UrlMatchType matchType, const QString &filter, FilterLeaf::Action action)
-{
- this->matchType = matchType;
- this->m_request = filter;
- this->m_isBlocking = (action == FilterLeaf::Block);
- //matcher.setPattern(filter);
- if(matchType == FilterLeaf::RegularExpressionMatch)
- regExp = new QRegExp(filter);
- else
- stringMatcher = new QStringMatcher(filter);
-}
-
-void AdBlockRule::mergeOptions(const QHash<QWebEngineUrlRequestInfo::ResourceType, bool> &options)
-{
- this->resourceTypeOptions.unite(options);
-}
-
-bool AdBlockRule::match(const QUrl &requestUrl) const
-{
- switch(matchType) {
- case FilterLeaf::RegularExpressionMatch:
- return (regExp->indexIn(requestUrl.toString()) != -1);
- default:
- return false;
- }
-}
-
-bool AdBlockRule::match(const QUrl &requestUrl, QWebEngineUrlRequestInfo::ResourceType type) const
-{
- // if request is of the required type, or there are no types set (== apply to all requests)
- if(this->resourceTypeOptions.contains(type) || this->resourceTypeOptions.isEmpty()) {
- switch(matchType) {
- case FilterLeaf::RegularExpressionMatch:
- return (regExp->indexIn(requestUrl.toString()) != -1);
- default:
- qWarning("Match type not implemented, returning false!");
- return false;
- }
- }
-
- // request type is not matched
- return false;
-}
-
-std::pair<FilterLeaf::Action, QVariant> AdBlockRule::action() const
-{
- if(m_isBlocking)
- return std::make_pair(FilterLeaf::Block, QVariant());
- else
- return std::make_pair(FilterLeaf::Allow, QVariant());
-}
diff --git a/lib/urlfilter/formats/adblockrule.h b/lib/urlfilter/formats/adblockrule.h
deleted file mode 100644
index 6be3cdf..0000000
--- a/lib/urlfilter/formats/adblockrule.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * This file is part of smolbote. It's copyrighted by the contributors recorded
- * in the version control history of the file, available from its original
- * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote
- *
- * SPDX-License-Identifier: GPL-3.0
- */
-
-#ifndef SMOLBOTE_ADBLOCKRULE_H
-#define SMOLBOTE_ADBLOCKRULE_H
-
-#include "../filterleaf.h"
-#include <optional>
-#include <QRegularExpression>
-#include <QStringMatcher>
-
-class Matcher
-{
-public:
- virtual bool hasMatch(const QString &where) const = 0;
-};
-
-template <typename T>
-class ContentsMatcher : public Matcher
-{
-public:
- ContentsMatcher(const QString &pattern, FilterLeaf::UrlMatchType matchType)
- {
- this->matchType = matchType;
- patternLength = pattern.length();
-
-
- if constexpr(std::is_same_v<T, QRegularExpression>) {
- matcher.setPatternOptions(matcher.patternOptions() | QRegularExpression::CaseInsensitiveOption);
- matcher.setPattern(pattern);
- } else if constexpr(std::is_same_v<T, QStringMatcher>) {
- matcher.setCaseSensitivity(Qt::CaseInsensitive);
- matcher.setPattern(pattern);
- } else if constexpr(std::is_same_v<T, QString>) {
- matcher = QUrl::fromUserInput(pattern).host();
-// qDebug("matcher: %s", qUtf8Printable(matcher));
- }
- }
-
- bool hasMatch(const QString &where) const override
- {
- if constexpr(std::is_same_v<T, QStringMatcher>) {
- switch (matchType) {
- case FilterLeaf::InvalidMatch:
- case FilterLeaf::RegularExpressionMatch:
- case FilterLeaf::DomainMatch:
- qWarning("ContentsMatcher is a String Matcher, but not doing string matching!");
- return false;
-
- case FilterLeaf::StringContains:
- return (matcher.indexIn(where) != -1);
-
- case FilterLeaf::StringStartsWith:
- return (matcher.indexIn(where) == 0);
-
- case FilterLeaf::StringEndsWith:
- return (matcher.indexIn(where) == where.length() - patternLength);
-
- case FilterLeaf::StringEquals:
- return (matcher.indexIn(where) == 0) && (patternLength == where.length());
- }
-
- } else if constexpr(std::is_same_v<T, QRegularExpression>) {
- if(matchType != FilterLeaf::RegularExpressionMatch)
- qWarning("ContentsMatcher is a regular expression, but not doing a regular expression match!");
- return matcher.match(where).hasMatch();
- } else if constexpr(std::is_same_v<T, QString>) {
- // TODO: fix
- if(matchType == FilterLeaf::DomainMatch) {
-// qDebug("matching %s", qUtf8Printable(QUrl(where).host()));
- return QUrl(where).host().endsWith(matcher);
- } else
- return matcher == where;
- } else {
- qWarning("Matcher has no backend, returning false");
- return false;
- }
- }
-
-private:
- int patternLength;
- T matcher;
- FilterLeaf::UrlMatchType matchType;
-};
-
-class AdBlockRule : public FilterLeaf
-{
-public:
- explicit AdBlockRule(FilterLeaf::UrlMatchType matchType, const QString &filter, FilterLeaf::Action action);
- ~AdBlockRule()
- {
- delete stringMatcher;
- delete regExp;
- };
-
- void mergeOptions(const QHash<QWebEngineUrlRequestInfo::ResourceType, bool> &options);
-
- bool match(const QUrl &requestUrl) const override;
- bool match(const QUrl &requestUrl, QWebEngineUrlRequestInfo::ResourceType type) const;
- std::pair<FilterLeaf::Action, QVariant> action() const override;
-
-private:
- /* Once C++20 comes out, perhaps this can be replaced with a concept template */
- QStringMatcher *stringMatcher = nullptr;
- QRegExp *regExp = nullptr;
-};
-
-#endif // SMOLBOTE_ADBLOCKRULE_H
diff --git a/lib/urlfilter/formats/adblockrule_parse.cpp b/lib/urlfilter/formats/adblockrule_parse.cpp
deleted file mode 100644
index c01ddfd..0000000
--- a/lib/urlfilter/formats/adblockrule_parse.cpp
+++ /dev/null
@@ -1,181 +0,0 @@
-/*
- * This file is part of smolbote. It's copyrighted by the contributors recorded
- * in the version control history of the file, available from its original
- * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote
- *
- * SPDX-License-Identifier: GPL-3.0
- */
-
-#include "adblockrule.h"
-#include "adblockrule_parse.h"
-
-// adblock format documentation
-// https://adblockplus.org/filters
-
-// QString::mid(pos, len) const - Returns a string starting at the specified position index.
-// QString::chop(len) - Removes n characters from the end of the string.
-// QString::remove(pos, len) - Removes n characters from the string, starting at the given position index.
-// QString::trimmed() const - Remove whitespace from start and end
-
-AdBlockRule *parseRule_adblock(const QString &filter)
-{
- QString parsedLine = filter.trimmed();
-
- // there is no rule, or it's a comment
- if(parsedLine.isEmpty() || parsedLine.startsWith("!")) {
- return nullptr;
- }
-
- // css rule -> filterleaves cannot do element blocking
- if(parsedLine.contains(QLatin1Literal("##")) || parsedLine.contains(QLatin1Literal("#@#"))) {
- return nullptr;
- }
-
- // exception rules
- FilterLeaf::Action action = FilterLeaf::Block;
- if(parsedLine.startsWith(QLatin1Literal("@@"))) {
- action = FilterLeaf::Allow;
- parsedLine.remove(0, 2);
- }
-
- // parse options
- QStringList enabledOn, disabledOn;
- QHash<QWebEngineUrlRequestInfo::ResourceType, bool> optionsHash;
- {
- const int sepPos = parsedLine.indexOf(QLatin1Literal("$"));
- if(sepPos != -1) {
- const auto options = parsedLine.mid(sepPos + 1).split(QLatin1Literal(","));
- parsedLine = parsedLine.mid(0, sepPos);
-
- for(const QString &option : options) {
- if(option.startsWith(QLatin1Literal("domain"))) {
- const auto domainList = option.mid(7).split(QLatin1Literal("|"));
-
- for(const QString &domain : domainList) {
- if(domain.startsWith(QLatin1Literal("~"))) {
- disabledOn.append(domain.mid(1));
- } else {
- enabledOn.append(domain);
- }
- }
- } else {
- const auto pair = parseOption(option);
- if(pair)
- optionsHash.insert(pair.value().first, pair.value().second);
- }
- }
- }
- }
-
- FilterLeaf::UrlMatchType matchType = FilterLeaf::InvalidMatch;
-
- if(parsedLine.startsWith(QLatin1Literal("/")) && parsedLine.endsWith(QLatin1Literal("/"))) {
- // regular expression rule
- matchType = FilterLeaf::RegularExpressionMatch;
- parsedLine = parsedLine.mid(1, parsedLine.length() - 2);
-
- } else if(parsedLine.startsWith(QLatin1Literal("||")) && parsedLine.endsWith(QLatin1Literal("^"))) {
- matchType = FilterLeaf::DomainMatch;
- parsedLine = parsedLine.mid(2, parsedLine.length() - 3);
-
- } else if(parsedLine.startsWith(QLatin1Literal("|")) && parsedLine.endsWith(QLatin1Literal("|"))) {
- // string equals rule
- matchType = FilterLeaf::StringEquals;
- parsedLine = parsedLine.mid(1, parsedLine.length() - 2);
-
- } else if(parsedLine.startsWith(QLatin1Literal("||"))) {
- // string starts with rule
- matchType = FilterLeaf::StringStartsWith;
- parsedLine = parsedLine.mid(2);
-
- } else if(parsedLine.endsWith(QLatin1Literal("|"))) {
- // string ends with rule
- matchType = FilterLeaf::StringEndsWith;
- parsedLine.chop(1);
-
- } else {
- // generic contains rule
- matchType = FilterLeaf::StringContains;
-
- // Basic filter rules can use wildcards, which were supported by QRegExp,
- // but were deprecated in QRegularExpression.
-
- // remove beginning and ending wildcards
- if(parsedLine.startsWith(QLatin1Literal("*")))
- parsedLine = parsedLine.mid(1);
-
- if(parsedLine.endsWith(QLatin1Literal("*")))
- parsedLine.chop(1);
-
- if(parsedLine.contains(QLatin1Literal("*")) || parsedLine.contains(QLatin1Literal("^"))) {
- // check for wildcards and translate to regexp
- // wildcard "*" - any number of characters
- // separator "^" - end, ? or /
- parsedLine.replace(QLatin1Literal("||"), QLatin1Literal("^\\w+://"));
- parsedLine.replace(QLatin1Literal("|"), QLatin1Literal("\\|"));
- parsedLine.replace(QLatin1Literal("*"), QLatin1Literal(".*"));
- parsedLine.replace(QLatin1Literal("^"), QLatin1Literal("($|\\?|\\/)"));
-
- matchType = FilterLeaf::RegularExpressionMatch;
- }
- }
-
- AdBlockRule *rule = new AdBlockRule(matchType, parsedLine, action);
- rule->mergeOptions(optionsHash);
- return rule;
-}
-
-std::optional<QPair<QWebEngineUrlRequestInfo::ResourceType, bool>> parseOption(const QString &option)
-{
- const bool exception = !option.startsWith(QLatin1Literal("~"));
-
- if(option.endsWith(QLatin1Literal("script"))) {
- // external scripts loaded via HTML script tag
- return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeScript, exception);
-
- } else if(option.endsWith(QLatin1Literal("image"))) {
- // regular images, typically loaded via HTML img tag
- return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeImage, exception);
-
- } else if(option.endsWith(QLatin1Literal("stylesheet"))) {
- // external CSS stylesheet files
- return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeStylesheet, exception);
-
- } else if(option.endsWith(QLatin1Literal("object"))) {
- // content handled by browser plugins, e.g. Flash or Java
- return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeObject, exception);
-
- } else if(option.endsWith(QLatin1Literal("xmlhttprequest"))) {
- // requests started using the XMLHttpRequest object or fetch() API
- return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeXhr, exception);
-
- } else if(option.endsWith(QLatin1Literal("object-subrequest"))) {
- // requests started by plugins like Flash
- return qMakePair(QWebEngineUrlRequestInfo::ResourceTypePluginResource, exception);
-
- } else if(option.endsWith(QLatin1Literal("subdocument"))) {
- // embedded pages, usually included via HTML frames
- return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeSubFrame, exception);
-
- } else if(option.endsWith(QLatin1Literal("ping"))) {
- // requests started by <a ping> or navigator.sendBeacon()
- return qMakePair(QWebEngineUrlRequestInfo::ResourceTypePing, exception);
-
- } else if(option.endsWith(QLatin1Literal("websocket"))) {
- // requests initiated via WebSocket object
- qDebug("Resource type 'websocket' not available");
-
- } else if(option.endsWith(QLatin1Literal("webrtc"))) {
- // connections opened via RTCPeerConnection instances to ICE servers
- qDebug("Resource type 'webrtc' not available");
-
- } else if(option.endsWith(QLatin1Literal("document"))) {
- // the page itself
- return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeMainFrame, exception);
-
- } else if(option.endsWith(QLatin1Literal("other"))) {
- return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeUnknown, exception);
- }
-
- return std::nullopt;
-}
diff --git a/lib/urlfilter/formats/adblockrule_parse.h b/lib/urlfilter/formats/adblockrule_parse.h
deleted file mode 100644
index 01255ca..0000000
--- a/lib/urlfilter/formats/adblockrule_parse.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/*
- * This file is part of smolbote. It's copyrighted by the contributors recorded
- * in the version control history of the file, available from its original
- * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote
- *
- * SPDX-License-Identifier: GPL-3.0
- */
-
-#ifndef ADBLOCKRULE_PARSE_H
-#define ADBLOCKRULE_PARSE_H
-
-class AdBlockRule;
-
-AdBlockRule *parseRule_adblock(const QString &filter);
-std::optional<QPair<QWebEngineUrlRequestInfo::ResourceType, bool>> parseOption(const QString &option);
-
-#endif // ADBLOCKRULE_PARSE_H
diff --git a/lib/urlfilter/formats/hostlistrule.cpp b/lib/urlfilter/formats/hostlistrule.cpp
deleted file mode 100644
index ad2c2a6..0000000
--- a/lib/urlfilter/formats/hostlistrule.cpp
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * This file is part of smolbote. It's copyrighted by the contributors recorded
- * in the version control history of the file, available from its original
- * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote
- *
- * SPDX-License-Identifier: GPL-3.0
- */
-
-#include "hostlistrule.h"
-
-HostlistRule::HostlistRule(const QString &domain, const QString &redirect)
-{
- this->m_isBlocking = (redirect == QLatin1Literal("0.0.0.0"));
- this->m_request = domain;
- this->m_redirect = redirect;
-}
-
-bool HostlistRule::match(const QUrl &requestUrl) const
-{
- //qDebug("checking [%s] against [%s]", qUtf8Printable(requestUrl.host()), qUtf8Printable(m_request));
- return (m_request == requestUrl.host());
-}
-
-std::pair<FilterLeaf::Action, QVariant> HostlistRule::action() const
-{
- if(m_isBlocking)
- return std::make_pair(FilterLeaf::Block, QVariant());
- return std::make_pair(FilterLeaf::Redirect, QVariant(m_redirect));
-}
diff --git a/lib/urlfilter/formats/hostlistrule.h b/lib/urlfilter/formats/hostlistrule.h
deleted file mode 100644
index 58ec690..0000000
--- a/lib/urlfilter/formats/hostlistrule.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * This file is part of smolbote. It's copyrighted by the contributors recorded
- * in the version control history of the file, available from its original
- * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote
- *
- * SPDX-License-Identifier: GPL-3.0
- */
-
-#ifndef SMOLBOTE_HOSTLIST_RULE_H
-#define SMOLBOTE_HOSTLIST_RULE_H
-
-#include "../filterleaf.h"
-#include <QString>
-
-class HostlistRule : public FilterLeaf
-{
-public:
- explicit HostlistRule(const QString &domain, const QString &redirect);
-
- bool match(const QUrl &requestUrl) const override;
- std::pair<FilterLeaf::Action, QVariant> action() const override;
-
-private:
- QString m_redirect;
-};
-
-#endif // SMOLBOTE_HOSTLIST_RULE_H