aboutsummaryrefslogtreecommitdiff
path: root/lib/urlfilter
diff options
context:
space:
mode:
authorAqua-sama <aqua@iserlohn-fortress.net>2018-10-21 13:48:23 +0200
committerAqua-sama <aqua@iserlohn-fortress.net>2018-10-21 13:48:23 +0200
commit19137b3233c31da75973a3217558ea6bd6570bad (patch)
treee1e2aa014354b7a50c5db2895c3b3fffd9d718d5 /lib/urlfilter
parentFix install paths for icon, desktop and profile (diff)
downloadsmolbote-19137b3233c31da75973a3217558ea6bd6570bad.tar.xz
urlfilter: base AdBlockRule off FilterLeaf
Diffstat (limited to 'lib/urlfilter')
-rw-r--r--lib/urlfilter/CMakeLists.txt14
-rw-r--r--lib/urlfilter/filterleaf.h20
-rw-r--r--lib/urlfilter/filterrule.cpp96
-rw-r--r--lib/urlfilter/filterrule.h54
-rw-r--r--lib/urlfilter/formats/adblockrule.cpp136
-rw-r--r--lib/urlfilter/formats/adblockrule.h13
6 files changed, 92 insertions, 241 deletions
diff --git a/lib/urlfilter/CMakeLists.txt b/lib/urlfilter/CMakeLists.txt
index 9940222..20466d3 100644
--- a/lib/urlfilter/CMakeLists.txt
+++ b/lib/urlfilter/CMakeLists.txt
@@ -5,15 +5,6 @@ set(CMAKE_INCLUDE_CURRENT_DIR ON)
set(CMAKE_AUTOMOC ON)
add_library(urlfilter
- filterrule.cpp
- filterrule.h
-
- formats/adblockrule.cpp
- formats/adblockrule.h
-
- formats/hostlistrule.cpp
- formats/hostlistrule.h
-
# filter tree
filtertree.cpp
filtertree.h
@@ -21,6 +12,11 @@ add_library(urlfilter
filterleaf.h
domain.cpp
domain.h
+
+ formats/adblockrule.cpp
+ formats/adblockrule.h
+ formats/hostlistrule.cpp
+ formats/hostlistrule.h
)
target_link_libraries(urlfilter Qt5::WebEngineWidgets)
diff --git a/lib/urlfilter/filterleaf.h b/lib/urlfilter/filterleaf.h
index 2c552a6..a565f92 100644
--- a/lib/urlfilter/filterleaf.h
+++ b/lib/urlfilter/filterleaf.h
@@ -24,6 +24,16 @@ public:
Redirect
};
+ enum UrlMatchType {
+ InvalidMatch,
+ RegularExpressionMatch,
+ StringContains,
+ StringStartsWith,
+ StringEndsWith,
+ StringEquals,
+ DomainMatch
+ };
+
FilterLeaf(FilterLeaf &&other);
FilterLeaf &operator=(FilterLeaf &&other);
~FilterLeaf() = default;
@@ -35,16 +45,6 @@ public:
const QString redirect() const;
protected:
- enum UrlMatchType {
- InvalidMatch,
- RegularExpressionMatch,
- StringContains,
- StringStartsWith,
- StringEndsWith,
- StringEquals,
- DomainMatch
- };
-
explicit FilterLeaf() = default;
// rule matching
diff --git a/lib/urlfilter/filterrule.cpp b/lib/urlfilter/filterrule.cpp
deleted file mode 100644
index 66a46f1..0000000
--- a/lib/urlfilter/filterrule.cpp
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * This file is part of smolbote. It's copyrighted by the contributors recorded
- * in the version control history of the file, available from its original
- * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote
- *
- * SPDX-License-Identifier: GPL-3.0
- */
-
-#include "filterrule.h"
-
-inline bool isMatchingDomain(const QString &domain, const QString &filter)
-{
- // domain and filter are the same
- if(domain == filter) {
- return true;
- }
-
- // domain can't be matched by filter if it doesn't end with filter
- // ex. example2.com isn't matched by example.com
- if(!domain.endsWith(filter)) {
- return false;
- }
-
- // match with subdomains
- // ex. subdomain.example.com is matched by example.com
- int index = domain.indexOf(filter);
-
- // match if (domain ends with filter) && (filter has been found) and (character before filter is '.')
- return index > 0 && domain[index - 1] == QLatin1Char('.');
-}
-
-bool FilterRule::isEnabled() const
-{
- return m_isEnabled;
-}
-
-bool FilterRule::isBlocking() const
-{
- return m_isBlocking;
-}
-
-bool FilterRule::matchesDomain(uint domainHash) const
-{
- // no domains have been allowed or blocked -> allow on all domains
- if(allowedDomains_hashes.isEmpty() && blockedDomains_hashes.isEmpty()) {
- return true;
- }
-
- // blockedDomains prevents the rules from being matched on those domains
- if(blockedDomains_hashes.contains(domainHash)) {
- return false;
- }
-
- // allowedDomains means the rule should only be matched on those domains
- return allowedDomains_hashes.contains(domainHash);
-}
-
-bool FilterRule::matchesType(QWebEngineUrlRequestInfo::ResourceType type) const
-{
- // no options have been specified -> match all resource types
- if(m_resourceTypeOptions.isEmpty())
- return true;
-
- // this resource type has not been specified -> reject it
- if(!m_resourceTypeOptions.contains(type))
- return false;
-
- // resource type has been specified; true to match, false to exception
- return m_resourceTypeOptions.value(type);
-}
-
-bool FilterRule::matchesUrl(const QUrl &url) const
-{
- switch(urlMatchType) {
- case InvalidMatch:
- return false;
-
- case RegularExpressionMatch:
- return regexp.match(url.toString()).hasMatch();
-
- case StringContains:
- return url.toString().contains(match);
-
- case StringStartsWith:
- return url.toString().startsWith(match);
-
- case StringEndsWith:
- return url.toString().endsWith(match);
-
- case StringEquals:
- return url.toString() == match;
-
- case DomainMatch:
- return isMatchingDomain(url.host(), match);
- }
-}
diff --git a/lib/urlfilter/filterrule.h b/lib/urlfilter/filterrule.h
deleted file mode 100644
index 6afe3c6..0000000
--- a/lib/urlfilter/filterrule.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * This file is part of smolbote. It's copyrighted by the contributors recorded
- * in the version control history of the file, available from its original
- * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote
- *
- * SPDX-License-Identifier: GPL-3.0
- */
-
-#ifndef SMOLBOTE_FILTERRULE_H
-#define SMOLBOTE_FILTERRULE_H
-
-#include <QObject>
-#include <QRegularExpression>
-#include <QStringList>
-#include <QStringMatcher>
-#include <QUrl>
-#include <QVector>
-#include <QWebEngineUrlRequestInfo>
-#include <memory>
-
-class FilterRule
-{
-public:
- enum UrlMatchType {
- InvalidMatch,
- RegularExpressionMatch,
- StringContains,
- StringStartsWith,
- StringEndsWith,
- StringEquals,
- DomainMatch
- };
-
- bool isEnabled() const;
- bool isBlocking() const;
-
- bool matchesDomain(uint domainHash) const;
- bool matchesType(QWebEngineUrlRequestInfo::ResourceType type) const;
- bool matchesUrl(const QUrl &url) const;
-
-protected:
- bool m_isEnabled = false;
- bool m_isBlocking = true;
-
- UrlMatchType urlMatchType = InvalidMatch;
- QHash<QWebEngineUrlRequestInfo::ResourceType, bool> m_resourceTypeOptions;
-
- QVector<uint> allowedDomains_hashes, blockedDomains_hashes;
-
- QString match;
- QRegularExpression regexp;
-};
-
-#endif // SMOLBOTE_FILTERRULE_H
diff --git a/lib/urlfilter/formats/adblockrule.cpp b/lib/urlfilter/formats/adblockrule.cpp
index c5d6b58..79a6dc8 100644
--- a/lib/urlfilter/formats/adblockrule.cpp
+++ b/lib/urlfilter/formats/adblockrule.cpp
@@ -16,30 +16,28 @@
// QString::chop(len) - Removes n characters from the end of the string.
// QString::remove(pos, len) - Removes n characters from the string, starting at the given position index.
-AdBlockRule::AdBlockRule(const QString &filter)
+AdBlockRule *loadRule(const QString &filter)
{
QString parsedLine = filter.trimmed();
// there is no rule, or it's a comment
if(parsedLine.isEmpty() || parsedLine.startsWith("!")) {
- return;
+ return nullptr;
}
- // css rule - ignore for now
+ // css rule -> filterleaves cannot do element blocking
if(parsedLine.contains(QLatin1Literal("##")) || parsedLine.contains(QLatin1Literal("#@#"))) {
- return;
+ return nullptr;
}
- m_isEnabled = true;
-
// exception rules
- if(parsedLine.startsWith(QLatin1Literal("@@"))) {
- m_isBlocking = false;
+ const bool isBlocking = parsedLine.startsWith(QLatin1Literal("@@"));
+ if(isBlocking)
parsedLine.remove(0, 2);
- } else
- m_isBlocking = true;
// parse options
+ QStringList enabledOn, disabledOn;
+ QHash<QWebEngineUrlRequestInfo::ResourceType, bool> optionsHash;
{
const int sepPos = parsedLine.indexOf(QLatin1Literal("$"));
if(sepPos != -1) {
@@ -49,102 +47,102 @@ AdBlockRule::AdBlockRule(const QString &filter)
for(const QString &option : options) {
if(option.startsWith(QLatin1Literal("domain"))) {
const auto domainList = option.mid(7).split(QLatin1Literal("|"));
+
for(const QString &domain : domainList) {
if(domain.startsWith(QLatin1Literal("~"))) {
- blockedDomains_hashes.append(qHash(domain.mid(1)));
+ disabledOn.append(domain.mid(1));
} else {
- allowedDomains_hashes.append(qHash(domain));
+ enabledOn.append(domain);
}
}
} else {
- parseOption(option);
+ const auto pair = parseOption(option);
+ if(pair)
+ optionsHash.insert(pair.value().first, pair.value().second);
}
}
}
}
- // regular expression rule
- if(parsedLine.startsWith(QLatin1Literal("/")) && parsedLine.endsWith(QLatin1Literal("/"))) {
- parsedLine = parsedLine.mid(1, parsedLine.length() - 2);
-
- urlMatchType = RegularExpressionMatch;
- regexp.setPattern(parsedLine);
- return;
- }
-
- // string equals rule
- if(parsedLine.startsWith(QLatin1Literal("|")) && parsedLine.endsWith(QLatin1Literal("|"))) {
- urlMatchType = StringEquals;
- match = parsedLine.mid(1, parsedLine.length() - 2);
- return;
- }
-
- // Basic filter rules can use wildcards, which were supported by QRegExp,
- // but were deprecated in QRegularExpression.
-
- // remove beginning and ending wildcards
- if(parsedLine.startsWith(QLatin1Literal("*")))
- parsedLine = parsedLine.mid(1);
+ FilterLeaf::UrlMatchType matchType;
+ QString pattern;
- if(parsedLine.endsWith(QLatin1Literal("*")))
- parsedLine.chop(1);
-
- if(parsedLine.startsWith(QLatin1Literal("||")) && parsedLine.endsWith(QLatin1Literal("^"))) {
- urlMatchType = DomainMatch;
- match = parsedLine.mid(2, parsedLine.length() - 3);
- return;
- }
-
- // check for wildcards and translate to regexp
- // wildcard "*" - any number of characters
- // separator "^" - end, ? or /
- if(parsedLine.contains(QLatin1Literal("*")) || parsedLine.contains(QLatin1Literal("^"))) {
- urlMatchType = RegularExpressionMatch;
- parsedLine.replace(QLatin1Literal("||"), QLatin1Literal("^\\w+://"));
- parsedLine.replace(QLatin1Literal("|"), QLatin1Literal("\\|"));
- parsedLine.replace(QLatin1Literal("*"), QLatin1Literal(".*"));
- parsedLine.replace(QLatin1Literal("^"), QLatin1Literal("($|\\?|\\/)"));
- regexp.setPattern(parsedLine);
- return;
+ if(parsedLine.startsWith(QLatin1Literal("/")) && parsedLine.endsWith(QLatin1Literal("/"))) {
+ // regular expression rule
+ matchType = FilterLeaf::RegularExpressionMatch;
+ pattern = parsedLine.mid(1, parsedLine.length() - 2);
+
+ } else if(parsedLine.startsWith(QLatin1Literal("|")) && parsedLine.endsWith(QLatin1Literal("|"))) {
+ // string equals rule
+ matchType = FilterLeaf::StringEquals;
+ pattern = parsedLine.mid(1, parsedLine.length() - 2);
+
+ } else {
+
+ // Basic filter rules can use wildcards, which were supported by QRegExp,
+ // but were deprecated in QRegularExpression.
+
+ // remove beginning and ending wildcards
+ if(parsedLine.startsWith(QLatin1Literal("*")))
+ parsedLine = parsedLine.mid(1);
+
+ if(parsedLine.endsWith(QLatin1Literal("*")))
+ parsedLine.chop(1);
+
+ if(parsedLine.startsWith(QLatin1Literal("||")) && parsedLine.endsWith(QLatin1Literal("^"))) {
+ matchType = FilterLeaf::DomainMatch;
+ pattern = parsedLine.mid(2, parsedLine.length() - 3);
+
+ } else if(parsedLine.contains(QLatin1Literal("*")) || parsedLine.contains(QLatin1Literal("^"))) {
+ // check for wildcards and translate to regexp
+ // wildcard "*" - any number of characters
+ // separator "^" - end, ? or /
+ matchType = FilterLeaf::RegularExpressionMatch;
+ parsedLine.replace(QLatin1Literal("||"), QLatin1Literal("^\\w+://"));
+ parsedLine.replace(QLatin1Literal("|"), QLatin1Literal("\\|"));
+ parsedLine.replace(QLatin1Literal("*"), QLatin1Literal(".*"));
+ parsedLine.replace(QLatin1Literal("^"), QLatin1Literal("($|\\?|\\/)"));
+ pattern = parsedLine;
+ }
}
-
- match = parsedLine;
+ return nullptr;
}
-void AdBlockRule::parseOption(const QString &option)
+
+std::optional<QPair<QWebEngineUrlRequestInfo::ResourceType, bool>> parseOption(const QString &option)
{
const bool exception = !option.startsWith(QLatin1Literal("~"));
if(option.endsWith(QLatin1Literal("script"))) {
// external scripts loaded via HTML script tag
- m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeScript, exception);
+ return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeScript, exception);
} else if(option.endsWith(QLatin1Literal("image"))) {
// regular images, typically loaded via HTML img tag
- m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeImage, exception);
+ return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeImage, exception);
} else if(option.endsWith(QLatin1Literal("stylesheet"))) {
// external CSS stylesheet files
- m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeStylesheet, exception);
+ return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeStylesheet, exception);
} else if(option.endsWith(QLatin1Literal("object"))) {
// content handled by browser plugins, e.g. Flash or Java
- m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeObject, exception);
+ return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeObject, exception);
} else if(option.endsWith(QLatin1Literal("xmlhttprequest"))) {
// requests started using the XMLHttpRequest object or fetch() API
- m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeXhr, exception);
+ return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeXhr, exception);
} else if(option.endsWith(QLatin1Literal("object-subrequest"))) {
// requests started by plugins like Flash
- m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypePluginResource, exception);
+ return qMakePair(QWebEngineUrlRequestInfo::ResourceTypePluginResource, exception);
} else if(option.endsWith(QLatin1Literal("subdocument"))) {
// embedded pages, usually included via HTML frames
- m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeSubFrame, exception);
+ return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeSubFrame, exception);
} else if(option.endsWith(QLatin1Literal("ping"))) {
// requests started by <a ping> or navigator.sendBeacon()
- m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypePing, exception);
+ return qMakePair(QWebEngineUrlRequestInfo::ResourceTypePing, exception);
} else if(option.endsWith(QLatin1Literal("websocket"))) {
// requests initiated via WebSocket object
@@ -156,9 +154,11 @@ void AdBlockRule::parseOption(const QString &option)
} else if(option.endsWith(QLatin1Literal("document"))) {
// the page itself
- m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeMainFrame, exception);
+ return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeMainFrame, exception);
} else if(option.endsWith(QLatin1Literal("other"))) {
- m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeUnknown, exception);
+ return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeUnknown, exception);
}
+
+ return std::nullopt;
}
diff --git a/lib/urlfilter/formats/adblockrule.h b/lib/urlfilter/formats/adblockrule.h
index 3c8edb1..0f0873c 100644
--- a/lib/urlfilter/formats/adblockrule.h
+++ b/lib/urlfilter/formats/adblockrule.h
@@ -9,14 +9,19 @@
#ifndef SMOLBOTE_ADBLOCKRULE_H
#define SMOLBOTE_ADBLOCKRULE_H
-#include "../filterrule.h"
+#include "../filterleaf.h"
+#include <optional>
-class AdBlockRule : public FilterRule
+class AdBlockRule : public FilterLeaf
{
public:
- explicit AdBlockRule(const QString &filter);
+// explicit AdBlockRule(const QString &filter);
- void parseOption(const QString &option);
+// bool match(const QUrl &requestUrl) const override;
+// FilterLeaf::Action action() const override;
};
+std::optional<QPair<QWebEngineUrlRequestInfo::ResourceType, bool>> parseOption(const QString &option);
+AdBlockRule *loadRule(const QString &filter);
+
#endif // SMOLBOTE_ADBLOCKRULE_H