aboutsummaryrefslogtreecommitdiff
path: root/lib/web/urlfilter
diff options
context:
space:
mode:
authorAqua-sama <aqua@iserlohn-fortress.net>2018-10-01 16:43:18 +0200
committerAqua-sama <aqua@iserlohn-fortress.net>2018-10-02 11:47:49 +0200
commit7d8cbdb9941532cd5bf560b21395f6ed371d1ab5 (patch)
tree9c5a2d72a3882050f2c3c95ec2d15ad21ff98a93 /lib/web/urlfilter
parentupdater: windows fixes (diff)
downloadsmolbote-7d8cbdb9941532cd5bf560b21395f6ed371d1ab5.tar.xz
Split off UrlFilter into library
- add more adblock filter options
Diffstat (limited to 'lib/web/urlfilter')
-rw-r--r--lib/web/urlfilter/adblockrule.cpp127
-rw-r--r--lib/web/urlfilter/adblockrule.h26
-rw-r--r--lib/web/urlfilter/filterrule.cpp106
-rw-r--r--lib/web/urlfilter/filterrule.h75
4 files changed, 0 insertions, 334 deletions
diff --git a/lib/web/urlfilter/adblockrule.cpp b/lib/web/urlfilter/adblockrule.cpp
deleted file mode 100644
index 58b1941..0000000
--- a/lib/web/urlfilter/adblockrule.cpp
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * This file is part of smolbote. It's copyrighted by the contributors recorded
- * in the version control history of the file, available from its original
- * location: https://neueland.iserlohn-fortress.net/smolbote.hg
- *
- * SPDX-License-Identifier: GPL-3.0
- */
-// Based on Falkon's AdBlockRule class
-
-#include "adblockrule.h"
-
-// adblock format documentation
-// https://adblockplus.org/filters
-
-// QString::mid(pos, len) - Returns a string starting at the specified position index.
-// QString::chop(len) - Removes n characters from the end of the string.
-// QString::remove(pos, len) - Removes n characters from the string, starting at the given position index.
-
-AdBlockRule::AdBlockRule(const QString &filter)
-{
- originalFilter = filter;
- QString parsedLine = filter.trimmed();
-
- // there is no rule, or it's a comment
- if(parsedLine.isEmpty() || parsedLine.startsWith("!")) {
- return;
- }
-
- // css rule - ignore for now
- if(parsedLine.contains(QLatin1Literal("##")) || parsedLine.contains(QLatin1Literal("#@#"))) {
- return;
- }
-
- m_isEnabled = true;
-
- // exception rules
- if(parsedLine.startsWith(QLatin1Literal("@@"))) {
- m_isBlocking = false;
- parsedLine.remove(0, 2);
- } else
- m_isBlocking = true;
-
- // parse options
- {
- const int sepPos = parsedLine.indexOf(QLatin1Literal("$"));
- if(sepPos != -1) {
- const auto options = parsedLine.mid(sepPos + 1).split(QLatin1Literal(","));
- parsedLine = parsedLine.mid(0, sepPos);
-
- for(const QString &option : options) {
- if(option.startsWith(QLatin1Literal("domain"))) {
- const auto domainList = option.mid(7).split(QLatin1Literal("|"));
- for(const QString &domain : domainList) {
- if(domain.startsWith(QLatin1Literal("~")))
- blockedDomains.append(domain.mid(1));
- else
- allowedDomains.append(domain);
- }
- } else if(option.endsWith(QLatin1Literal("script"))) {
- m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeScript, !option.startsWith(QLatin1Literal("~")));
-
- } else if(option.endsWith(QLatin1Literal("image"))) {
- m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeImage, !option.startsWith(QLatin1Literal("~")));
-
- } else if(option.endsWith(QLatin1Literal("stylesheet"))) {
- m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeStylesheet, !option.startsWith(QLatin1Literal("~")));
-
- } else if(option.endsWith(QLatin1Literal("object"))) {
- m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeObject, !option.startsWith(QLatin1Literal("~")));
-
- } else if(option.endsWith(QLatin1Literal("xmlhttprequest"))) {
- m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeXhr, !option.startsWith(QLatin1Literal("~")));
-
- } else if(option.endsWith(QLatin1Literal("other"))) {
- m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeUnknown, !option.startsWith(QLatin1Literal("~")));
- }
- }
- }
- }
-
- // regular expression rule
- if(parsedLine.startsWith(QLatin1Literal("/")) && parsedLine.endsWith(QLatin1Literal("/"))) {
- parsedLine = parsedLine.mid(1, parsedLine.length() - 2);
-
- urlMatchType = RegularExpressionMatch;
- regexp.setPattern(parsedLine);
- return;
- }
-
- // string equals rule
- if(parsedLine.startsWith(QLatin1Literal("|")) && parsedLine.endsWith(QLatin1Literal("|"))) {
- urlMatchType = StringEquals;
- match = parsedLine.mid(1, parsedLine.length() - 2);
- return;
- }
-
- // Basic filter rules can use wildcards, which were supported by QRegExp,
- // but were deprecated in QRegularExpression.
-
- // remove beginning and ending wildcards
- if(parsedLine.startsWith(QLatin1Literal("*")))
- parsedLine = parsedLine.mid(1);
-
- if(parsedLine.endsWith(QLatin1Literal("*")))
- parsedLine.chop(1);
-
- if(parsedLine.startsWith(QLatin1Literal("||")) && parsedLine.endsWith(QLatin1Literal("^"))) {
- urlMatchType = DomainMatch;
- match = parsedLine.mid(2, parsedLine.length() - 3);
- return;
- }
-
- // check for wildcards and translate to regexp
- // wildcard "*" - any number of characters
- // separator "^" - end, ? or /
- if(parsedLine.contains(QLatin1Literal("*")) || parsedLine.contains(QLatin1Literal("^"))) {
- urlMatchType = RegularExpressionMatch;
- parsedLine.replace(QLatin1Literal("||"), QLatin1Literal("^\\w+://"));
- parsedLine.replace(QLatin1Literal("|"), QLatin1Literal("\\|"));
- parsedLine.replace(QLatin1Literal("*"), QLatin1Literal(".*"));
- parsedLine.replace(QLatin1Literal("^"), QLatin1Literal("($|\\?|\\/)"));
- regexp.setPattern(parsedLine);
- return;
- }
-
- match = parsedLine;
-}
diff --git a/lib/web/urlfilter/adblockrule.h b/lib/web/urlfilter/adblockrule.h
deleted file mode 100644
index 7b6f683..0000000
--- a/lib/web/urlfilter/adblockrule.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * This file is part of smolbote. It's copyrighted by the contributors recorded
- * in the version control history of the file, available from its original
- * location: https://neueland.iserlohn-fortress.net/smolbote.hg
- *
- * SPDX-License-Identifier: GPL-3.0
- */
-
-#ifndef SMOLBOTE_ADBLOCKRULE_H
-#define SMOLBOTE_ADBLOCKRULE_H
-
-#include <QObject>
-#include <QString>
-#include <QRegularExpression>
-#include <QUrl>
-#include <QWebEngineUrlRequestInfo>
-#include "filterrule.h"
-
-class AdBlockRule : public FilterRule
-{
-public:
- explicit AdBlockRule(const QString &filter);
-
-};
-
-#endif // SMOLBOTE_ADBLOCKRULE_H
diff --git a/lib/web/urlfilter/filterrule.cpp b/lib/web/urlfilter/filterrule.cpp
deleted file mode 100644
index 67ff4d7..0000000
--- a/lib/web/urlfilter/filterrule.cpp
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * This file is part of smolbote. It's copyrighted by the contributors recorded
- * in the version control history of the file, available from its original
- * location: https://neueland.iserlohn-fortress.net/smolbote.hg
- *
- * SPDX-License-Identifier: GPL-3.0
- */
-
-#include "filterrule.h"
-
-inline bool isMatchingDomain(const QString &domain, const QString &filter)
-{
- // domain and filter are the same
- if(domain == filter) {
- return true;
- }
-
- // domain can't be matched by filter if it doesn't end with filter
- // ex. example2.com isn't matched by example.com
- if(!domain.endsWith(filter)) {
- return false;
- }
-
- // match with subdomains
- // ex. subdomain.example.com is matched by example.com
- int index = domain.indexOf(filter);
-
- // match if (domain ends with filter) && (filter has been found) and (character before filter is '.')
- return index > 0 && domain[index - 1] == QLatin1Char('.');
-}
-
-bool FilterRule::isEnabled() const
-{
- return m_isEnabled;
-}
-
-bool FilterRule::isBlocking() const
-{
- return m_isBlocking;
-}
-
-bool FilterRule::matchesDomain(const QString &domain) const
-{
- // no domains have been allowed or blocked -> allow on all domains
- if(allowedDomains.isEmpty() && blockedDomains.isEmpty())
- return true;
-
- if(!blockedDomains.isEmpty()) {
- // do not match rule if the domain has been blocked
- if(blockedDomains.contains(domain))
- return false;
- }
-
- if(!allowedDomains.isEmpty()) {
- if(allowedDomains.contains(domain))
- return true;
- }
-
- return false;
-}
-
-bool FilterRule::matchesType(QWebEngineUrlRequestInfo::ResourceType type) const
-{
- // no options have been specified -> match all resource types
- if(m_resourceTypeOptions.isEmpty())
- return true;
-
- // this resource type has not been specified -> reject it
- if(!m_resourceTypeOptions.contains(type))
- return false;
-
- // resource type has been specified; true to match, false to exception
- return m_resourceTypeOptions.value(type);
-}
-
-bool FilterRule::matchesUrl(const QUrl &url) const
-{
- switch (urlMatchType) {
- case InvalidMatch:
- return false;
-
- case RegularExpressionMatch:
- return regexp.match(url.toString()).hasMatch();
-
- case StringContains:
- return url.toString().contains(match);
-
- case StringStartsWith:
- return url.toString().startsWith(match);
-
- case StringEndsWith:
- return url.toString().endsWith(match);
-
- case StringEquals:
- return url.toString() == match;
-
- case DomainMatch:
- return isMatchingDomain(url.host(), match);
-
- }
-}
-
-QString FilterRule::toString() const
-{
- return originalFilter;
-}
diff --git a/lib/web/urlfilter/filterrule.h b/lib/web/urlfilter/filterrule.h
deleted file mode 100644
index 5b9a6cf..0000000
--- a/lib/web/urlfilter/filterrule.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * This file is part of smolbote. It's copyrighted by the contributors recorded
- * in the version control history of the file, available from its original
- * location: https://neueland.iserlohn-fortress.net/smolbote.hg
- *
- * SPDX-License-Identifier: GPL-3.0
- */
-
-#ifndef SMOLBOTE_FILTERRULE_H
-#define SMOLBOTE_FILTERRULE_H
-
-#include <QRegularExpression>
-#include <QStringList>
-#include <QStringMatcher>
-#include <QUrl>
-#include <QWebEngineUrlRequestInfo>
-#include <memory>
-
-class FilterRule
-{
-public:
- enum UrlMatchType {
- InvalidMatch,
- RegularExpressionMatch,
- StringContains,
- StringStartsWith,
- StringEndsWith,
- StringEquals,
- DomainMatch
- };
-
- FilterRule() = default;
-
- bool isEnabled() const;
- bool isBlocking() const;
-
- /**
- * @brief matchesDomain
- * @param domain
- * @return
- */
- bool matchesDomain(const QString &domain) const;
-
- /**
- * @brief matchesType
- * @param type
- * @return true if type matches, false otherwise
- */
- bool matchesType(QWebEngineUrlRequestInfo::ResourceType type) const;
-
- /**
- * @brief matchesUrl
- * @param url
- * @return
- */
- bool matchesUrl(const QUrl &url) const;
-
- QString toString() const;
-
-protected:
- bool m_isEnabled = false;
- bool m_isBlocking = true;
-
- QString originalFilter;
-
- UrlMatchType urlMatchType = InvalidMatch;
- QHash<QWebEngineUrlRequestInfo::ResourceType, bool> m_resourceTypeOptions;
- QStringList allowedDomains, blockedDomains;
-
- QString match;
- QRegularExpression regexp;
-
-};
-
-#endif // SMOLBOTE_FILTERRULE_H