diff options
author | Aqua-sama <aqua@iserlohn-fortress.net> | 2018-10-01 16:43:18 +0200 |
---|---|---|
committer | Aqua-sama <aqua@iserlohn-fortress.net> | 2018-10-02 11:47:49 +0200 |
commit | 7d8cbdb9941532cd5bf560b21395f6ed371d1ab5 (patch) | |
tree | 9c5a2d72a3882050f2c3c95ec2d15ad21ff98a93 /lib/web/urlfilter | |
parent | updater: windows fixes (diff) | |
download | smolbote-7d8cbdb9941532cd5bf560b21395f6ed371d1ab5.tar.xz |
Split off UrlFilter into library
- add more adblock filter options
Diffstat (limited to 'lib/web/urlfilter')
-rw-r--r-- | lib/web/urlfilter/adblockrule.cpp | 127 | ||||
-rw-r--r-- | lib/web/urlfilter/adblockrule.h | 26 | ||||
-rw-r--r-- | lib/web/urlfilter/filterrule.cpp | 106 | ||||
-rw-r--r-- | lib/web/urlfilter/filterrule.h | 75 |
4 files changed, 0 insertions, 334 deletions
diff --git a/lib/web/urlfilter/adblockrule.cpp b/lib/web/urlfilter/adblockrule.cpp deleted file mode 100644 index 58b1941..0000000 --- a/lib/web/urlfilter/adblockrule.cpp +++ /dev/null @@ -1,127 +0,0 @@ -/* - * This file is part of smolbote. It's copyrighted by the contributors recorded - * in the version control history of the file, available from its original - * location: https://neueland.iserlohn-fortress.net/smolbote.hg - * - * SPDX-License-Identifier: GPL-3.0 - */ -// Based on Falkon's AdBlockRule class - -#include "adblockrule.h" - -// adblock format documentation -// https://adblockplus.org/filters - -// QString::mid(pos, len) - Returns a string starting at the specified position index. -// QString::chop(len) - Removes n characters from the end of the string. -// QString::remove(pos, len) - Removes n characters from the string, starting at the given position index. - -AdBlockRule::AdBlockRule(const QString &filter) -{ - originalFilter = filter; - QString parsedLine = filter.trimmed(); - - // there is no rule, or it's a comment - if(parsedLine.isEmpty() || parsedLine.startsWith("!")) { - return; - } - - // css rule - ignore for now - if(parsedLine.contains(QLatin1Literal("##")) || parsedLine.contains(QLatin1Literal("#@#"))) { - return; - } - - m_isEnabled = true; - - // exception rules - if(parsedLine.startsWith(QLatin1Literal("@@"))) { - m_isBlocking = false; - parsedLine.remove(0, 2); - } else - m_isBlocking = true; - - // parse options - { - const int sepPos = parsedLine.indexOf(QLatin1Literal("$")); - if(sepPos != -1) { - const auto options = parsedLine.mid(sepPos + 1).split(QLatin1Literal(",")); - parsedLine = parsedLine.mid(0, sepPos); - - for(const QString &option : options) { - if(option.startsWith(QLatin1Literal("domain"))) { - const auto domainList = option.mid(7).split(QLatin1Literal("|")); - for(const QString &domain : domainList) { - if(domain.startsWith(QLatin1Literal("~"))) - blockedDomains.append(domain.mid(1)); - else - allowedDomains.append(domain); - } - } else if(option.endsWith(QLatin1Literal("script"))) { - m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeScript, !option.startsWith(QLatin1Literal("~"))); - - } else if(option.endsWith(QLatin1Literal("image"))) { - m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeImage, !option.startsWith(QLatin1Literal("~"))); - - } else if(option.endsWith(QLatin1Literal("stylesheet"))) { - m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeStylesheet, !option.startsWith(QLatin1Literal("~"))); - - } else if(option.endsWith(QLatin1Literal("object"))) { - m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeObject, !option.startsWith(QLatin1Literal("~"))); - - } else if(option.endsWith(QLatin1Literal("xmlhttprequest"))) { - m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeXhr, !option.startsWith(QLatin1Literal("~"))); - - } else if(option.endsWith(QLatin1Literal("other"))) { - m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeUnknown, !option.startsWith(QLatin1Literal("~"))); - } - } - } - } - - // regular expression rule - if(parsedLine.startsWith(QLatin1Literal("/")) && parsedLine.endsWith(QLatin1Literal("/"))) { - parsedLine = parsedLine.mid(1, parsedLine.length() - 2); - - urlMatchType = RegularExpressionMatch; - regexp.setPattern(parsedLine); - return; - } - - // string equals rule - if(parsedLine.startsWith(QLatin1Literal("|")) && parsedLine.endsWith(QLatin1Literal("|"))) { - urlMatchType = StringEquals; - match = parsedLine.mid(1, parsedLine.length() - 2); - return; - } - - // Basic filter rules can use wildcards, which were supported by QRegExp, - // but were deprecated in QRegularExpression. - - // remove beginning and ending wildcards - if(parsedLine.startsWith(QLatin1Literal("*"))) - parsedLine = parsedLine.mid(1); - - if(parsedLine.endsWith(QLatin1Literal("*"))) - parsedLine.chop(1); - - if(parsedLine.startsWith(QLatin1Literal("||")) && parsedLine.endsWith(QLatin1Literal("^"))) { - urlMatchType = DomainMatch; - match = parsedLine.mid(2, parsedLine.length() - 3); - return; - } - - // check for wildcards and translate to regexp - // wildcard "*" - any number of characters - // separator "^" - end, ? or / - if(parsedLine.contains(QLatin1Literal("*")) || parsedLine.contains(QLatin1Literal("^"))) { - urlMatchType = RegularExpressionMatch; - parsedLine.replace(QLatin1Literal("||"), QLatin1Literal("^\\w+://")); - parsedLine.replace(QLatin1Literal("|"), QLatin1Literal("\\|")); - parsedLine.replace(QLatin1Literal("*"), QLatin1Literal(".*")); - parsedLine.replace(QLatin1Literal("^"), QLatin1Literal("($|\\?|\\/)")); - regexp.setPattern(parsedLine); - return; - } - - match = parsedLine; -} diff --git a/lib/web/urlfilter/adblockrule.h b/lib/web/urlfilter/adblockrule.h deleted file mode 100644 index 7b6f683..0000000 --- a/lib/web/urlfilter/adblockrule.h +++ /dev/null @@ -1,26 +0,0 @@ -/* - * This file is part of smolbote. It's copyrighted by the contributors recorded - * in the version control history of the file, available from its original - * location: https://neueland.iserlohn-fortress.net/smolbote.hg - * - * SPDX-License-Identifier: GPL-3.0 - */ - -#ifndef SMOLBOTE_ADBLOCKRULE_H -#define SMOLBOTE_ADBLOCKRULE_H - -#include <QObject> -#include <QString> -#include <QRegularExpression> -#include <QUrl> -#include <QWebEngineUrlRequestInfo> -#include "filterrule.h" - -class AdBlockRule : public FilterRule -{ -public: - explicit AdBlockRule(const QString &filter); - -}; - -#endif // SMOLBOTE_ADBLOCKRULE_H diff --git a/lib/web/urlfilter/filterrule.cpp b/lib/web/urlfilter/filterrule.cpp deleted file mode 100644 index 67ff4d7..0000000 --- a/lib/web/urlfilter/filterrule.cpp +++ /dev/null @@ -1,106 +0,0 @@ -/* - * This file is part of smolbote. It's copyrighted by the contributors recorded - * in the version control history of the file, available from its original - * location: https://neueland.iserlohn-fortress.net/smolbote.hg - * - * SPDX-License-Identifier: GPL-3.0 - */ - -#include "filterrule.h" - -inline bool isMatchingDomain(const QString &domain, const QString &filter) -{ - // domain and filter are the same - if(domain == filter) { - return true; - } - - // domain can't be matched by filter if it doesn't end with filter - // ex. example2.com isn't matched by example.com - if(!domain.endsWith(filter)) { - return false; - } - - // match with subdomains - // ex. subdomain.example.com is matched by example.com - int index = domain.indexOf(filter); - - // match if (domain ends with filter) && (filter has been found) and (character before filter is '.') - return index > 0 && domain[index - 1] == QLatin1Char('.'); -} - -bool FilterRule::isEnabled() const -{ - return m_isEnabled; -} - -bool FilterRule::isBlocking() const -{ - return m_isBlocking; -} - -bool FilterRule::matchesDomain(const QString &domain) const -{ - // no domains have been allowed or blocked -> allow on all domains - if(allowedDomains.isEmpty() && blockedDomains.isEmpty()) - return true; - - if(!blockedDomains.isEmpty()) { - // do not match rule if the domain has been blocked - if(blockedDomains.contains(domain)) - return false; - } - - if(!allowedDomains.isEmpty()) { - if(allowedDomains.contains(domain)) - return true; - } - - return false; -} - -bool FilterRule::matchesType(QWebEngineUrlRequestInfo::ResourceType type) const -{ - // no options have been specified -> match all resource types - if(m_resourceTypeOptions.isEmpty()) - return true; - - // this resource type has not been specified -> reject it - if(!m_resourceTypeOptions.contains(type)) - return false; - - // resource type has been specified; true to match, false to exception - return m_resourceTypeOptions.value(type); -} - -bool FilterRule::matchesUrl(const QUrl &url) const -{ - switch (urlMatchType) { - case InvalidMatch: - return false; - - case RegularExpressionMatch: - return regexp.match(url.toString()).hasMatch(); - - case StringContains: - return url.toString().contains(match); - - case StringStartsWith: - return url.toString().startsWith(match); - - case StringEndsWith: - return url.toString().endsWith(match); - - case StringEquals: - return url.toString() == match; - - case DomainMatch: - return isMatchingDomain(url.host(), match); - - } -} - -QString FilterRule::toString() const -{ - return originalFilter; -} diff --git a/lib/web/urlfilter/filterrule.h b/lib/web/urlfilter/filterrule.h deleted file mode 100644 index 5b9a6cf..0000000 --- a/lib/web/urlfilter/filterrule.h +++ /dev/null @@ -1,75 +0,0 @@ -/* - * This file is part of smolbote. It's copyrighted by the contributors recorded - * in the version control history of the file, available from its original - * location: https://neueland.iserlohn-fortress.net/smolbote.hg - * - * SPDX-License-Identifier: GPL-3.0 - */ - -#ifndef SMOLBOTE_FILTERRULE_H -#define SMOLBOTE_FILTERRULE_H - -#include <QRegularExpression> -#include <QStringList> -#include <QStringMatcher> -#include <QUrl> -#include <QWebEngineUrlRequestInfo> -#include <memory> - -class FilterRule -{ -public: - enum UrlMatchType { - InvalidMatch, - RegularExpressionMatch, - StringContains, - StringStartsWith, - StringEndsWith, - StringEquals, - DomainMatch - }; - - FilterRule() = default; - - bool isEnabled() const; - bool isBlocking() const; - - /** - * @brief matchesDomain - * @param domain - * @return - */ - bool matchesDomain(const QString &domain) const; - - /** - * @brief matchesType - * @param type - * @return true if type matches, false otherwise - */ - bool matchesType(QWebEngineUrlRequestInfo::ResourceType type) const; - - /** - * @brief matchesUrl - * @param url - * @return - */ - bool matchesUrl(const QUrl &url) const; - - QString toString() const; - -protected: - bool m_isEnabled = false; - bool m_isBlocking = true; - - QString originalFilter; - - UrlMatchType urlMatchType = InvalidMatch; - QHash<QWebEngineUrlRequestInfo::ResourceType, bool> m_resourceTypeOptions; - QStringList allowedDomains, blockedDomains; - - QString match; - QRegularExpression regexp; - -}; - -#endif // SMOLBOTE_FILTERRULE_H |