diff options
author | Aqua-sama <aqua@iserlohn-fortress.net> | 2020-02-10 20:58:39 +0200 |
---|---|---|
committer | Aqua-sama <aqua@iserlohn-fortress.net> | 2020-05-30 11:01:35 +0300 |
commit | d1287f43964633035938f4f4d4133bb6d9da7b3e (patch) | |
tree | d09efa4074815c20be9bd6348203fe4336dfe716 /staging/adblock/rule.h | |
parent | Fix segfault in release build (diff) | |
download | smolbote-d1287f43964633035938f4f4d4133bb6d9da7b3e.tar.xz |
staging: smolblok
smolblok is a replacement for the current lib/urlfilter AdBlockPlus
and hostlist format filter parser. It is a library that uses plugins to
provide support for different filter formats.
staging/adblock: AdBlockPlus parser plugin
plugins/smolblok_hostlist: hostlist format parser plugin
Headers will be installed to include/smolbote/
Remove lib/urlfilter
Diffstat (limited to 'staging/adblock/rule.h')
-rw-r--r-- | staging/adblock/rule.h | 153 |
1 files changed, 153 insertions, 0 deletions
diff --git a/staging/adblock/rule.h b/staging/adblock/rule.h new file mode 100644 index 0000000..aaab49a --- /dev/null +++ b/staging/adblock/rule.h @@ -0,0 +1,153 @@ +/* + * This file is part of smolbote. It's copyrighted by the contributors recorded + * in the version control history of the file, available from its original + * location: https://library.iserlohn-fortress.net/aqua/smolbote.git + * + * SPDX-License-Identifier: GPL-3.0 + */ + +#ifndef SMOLBOTE_ADBLOCK_RULE_H +#define SMOLBOTE_ADBLOCK_RULE_H + +#include "options.h" +#include <QObject> +#include <QRegularExpression> +#include <QString> +#include <QStringMatcher> + +namespace AdblockPlus +{ +class Rule +{ +public: + virtual ~Rule() = default; + /** + * requestUrl: requested URL + * initiatorUrl: URL of document that initiated navigation + * firstPartyUrl: URL of the page that issued the request + */ + virtual bool hasMatch(const QStringRef &requestUrl, + const QStringRef &initiatorUrl, + const QStringRef &firstPartyUrl, + QWebEngineUrlRequestInfo::ResourceType resourceType = QWebEngineUrlRequestInfo::ResourceTypeMainFrame) const = 0; + + bool shouldRedirect() const + { + return options.redirect; + } + bool shouldBlock() const + { + return !options.exception; + } + +protected: + Rule(const Options &opt) + : options(opt) + { + } + const Options options; +}; + +// The separator character can be anything but +// a letter, a digit, or one of the following: _, -, ., %. +// The end of the address is also accepted as a separator. +inline bool isSeparator(const QChar &c) +{ + return !c.isLetter() && !c.isDigit() && c != '_' && c != '-' && c != '.' && c != '%'; +} + +class MatcherRule : public Rule +{ +public: + enum MatchPosition { + DomainMatch, + UrlStartsWith, + UrlEndsWith, + UrlContains, + UrlEquals + }; + + MatcherRule(const QString &pattern, const Options &opt, const MatchPosition pos = UrlContains) + : Rule(opt) + , position(pos) + , matcher(pattern, opt.matchcase ? Qt::CaseSensitive : Qt::CaseInsensitive) + , patternLength(pattern.length()) + { + } + explicit MatcherRule(const MatcherRule &) = delete; + MatcherRule &operator=(const MatcherRule &) = delete; + + explicit MatcherRule(MatcherRule &&) = delete; + MatcherRule &operator=(MatcherRule &&) = delete; + + ~MatcherRule() = default; + bool hasMatch(const QStringRef &url, + const QStringRef &initiatorUrl, + const QStringRef &firstPartyUrl, + QWebEngineUrlRequestInfo::ResourceType resourceType = QWebEngineUrlRequestInfo::ResourceTypeMainFrame) const override + { + const auto index = matcher.indexIn(url); + if(index == -1) { + return false; + } + + switch(position) { + case DomainMatch: + // match if + // there is only one : left of the index + // and + // character after pattern is separator or end of string + return (url.left(index).count(':') <= 1 && (index + patternLength == url.length() || isSeparator(url[index + patternLength]))); + case UrlStartsWith: + return (index == 0); + case UrlEndsWith: + return (index == url.length() - patternLength); + case UrlContains: + return (index != -1); + case UrlEquals: + return (index == 0 && patternLength == url.length()); + } + + return false; + } + +private: + const MatchPosition position; + const QStringMatcher matcher; + const int patternLength; +}; + +class RegexRule : public Rule +{ +public: + RegexRule(const QString &rule, const Options &opt) + : Rule(opt) + , regex(rule) + { + if(!opt.matchcase) { + regex.setPatternOptions(QRegularExpression::CaseInsensitiveOption); + } + } + explicit RegexRule(const RegexRule &) = delete; + RegexRule &operator=(const RegexRule &) = delete; + + explicit RegexRule(RegexRule &&) = delete; + RegexRule &operator=(RegexRule &&) = delete; + + ~RegexRule() = default; + bool hasMatch(const QStringRef &url, + const QStringRef &initiatorUrl, + const QStringRef &firstPartyUrl, + QWebEngineUrlRequestInfo::ResourceType resourceType = QWebEngineUrlRequestInfo::ResourceTypeMainFrame) const override + { + const auto match = regex.match(url); + return match.hasMatch(); + } + +private: + QRegularExpression regex; +}; + +} // namespace AdblockPlus + +#endif // SMOLBOTE_ADBLOCK_RULE_H |