aboutsummaryrefslogtreecommitdiff
path: root/lib/urlfilter/formats/adblockrule.cpp
diff options
context:
space:
mode:
authorAqua-sama <aqua@iserlohn-fortress.net>2018-11-16 16:26:22 +0100
committerAqua-sama <aqua@iserlohn-fortress.net>2018-11-16 16:26:22 +0100
commit566abfa99120652fb1e9190d791fdbbba64d2e0d (patch)
tree86a6f71b926794298d922a9319b55909cf5a07b4 /lib/urlfilter/formats/adblockrule.cpp
parentAdd more regex benchmarks (diff)
downloadsmolbote-566abfa99120652fb1e9190d791fdbbba64d2e0d.tar.xz
Add adblockrule_parse
Diffstat (limited to 'lib/urlfilter/formats/adblockrule.cpp')
-rw-r--r--lib/urlfilter/formats/adblockrule.cpp200
1 files changed, 33 insertions, 167 deletions
diff --git a/lib/urlfilter/formats/adblockrule.cpp b/lib/urlfilter/formats/adblockrule.cpp
index 6b97d5d..db1c3c5 100644
--- a/lib/urlfilter/formats/adblockrule.cpp
+++ b/lib/urlfilter/formats/adblockrule.cpp
@@ -5,174 +5,21 @@
*
* SPDX-License-Identifier: GPL-3.0
*/
-// Based on Falkon's AdBlockRule class
#include "adblockrule.h"
+#include <QRegExp>
+#include <QStringMatcher>
-// adblock format documentation
-// https://adblockplus.org/filters
-
-// QString::mid(pos, len) - Returns a string starting at the specified position index.
-// QString::chop(len) - Removes n characters from the end of the string.
-// QString::remove(pos, len) - Removes n characters from the string, starting at the given position index.
-
-AdBlockRule *loadRule(const QString &filter)
-{
- QString parsedLine = filter.trimmed();
-
- // there is no rule, or it's a comment
- if(parsedLine.isEmpty() || parsedLine.startsWith("!")) {
- return nullptr;
- }
-
- // css rule -> filterleaves cannot do element blocking
- if(parsedLine.contains(QLatin1Literal("##")) || parsedLine.contains(QLatin1Literal("#@#"))) {
- return nullptr;
- }
-
- // exception rules
- FilterLeaf::Action action = FilterLeaf::Block;
- if(parsedLine.startsWith(QLatin1Literal("@@"))) {
- action = FilterLeaf::Allow;
- parsedLine.remove(0, 2);
- }
-
- // parse options
- QStringList enabledOn, disabledOn;
- QHash<QWebEngineUrlRequestInfo::ResourceType, bool> optionsHash;
- {
- const int sepPos = parsedLine.indexOf(QLatin1Literal("$"));
- if(sepPos != -1) {
- const auto options = parsedLine.mid(sepPos + 1).split(QLatin1Literal(","));
- parsedLine = parsedLine.mid(0, sepPos);
-
- for(const QString &option : options) {
- if(option.startsWith(QLatin1Literal("domain"))) {
- const auto domainList = option.mid(7).split(QLatin1Literal("|"));
-
- for(const QString &domain : domainList) {
- if(domain.startsWith(QLatin1Literal("~"))) {
- disabledOn.append(domain.mid(1));
- } else {
- enabledOn.append(domain);
- }
- }
- } else {
- const auto pair = parseOption(option);
- if(pair)
- optionsHash.insert(pair.value().first, pair.value().second);
- }
- }
- }
- }
-
- FilterLeaf::UrlMatchType matchType;
- QString pattern;
-
- if(parsedLine.startsWith(QLatin1Literal("/")) && parsedLine.endsWith(QLatin1Literal("/"))) {
- // regular expression rule
- matchType = FilterLeaf::RegularExpressionMatch;
- pattern = parsedLine.mid(1, parsedLine.length() - 2);
-
- } else if(parsedLine.startsWith(QLatin1Literal("|")) && parsedLine.endsWith(QLatin1Literal("|"))) {
- // string equals rule
- matchType = FilterLeaf::StringEquals;
- pattern = parsedLine.mid(1, parsedLine.length() - 2);
-
- } else {
-
- // Basic filter rules can use wildcards, which were supported by QRegExp,
- // but were deprecated in QRegularExpression.
-
- // remove beginning and ending wildcards
- if(parsedLine.startsWith(QLatin1Literal("*")))
- parsedLine = parsedLine.mid(1);
-
- if(parsedLine.endsWith(QLatin1Literal("*")))
- parsedLine.chop(1);
-
- if(parsedLine.startsWith(QLatin1Literal("||")) && parsedLine.endsWith(QLatin1Literal("^"))) {
- matchType = FilterLeaf::DomainMatch;
- pattern = parsedLine.mid(2, parsedLine.length() - 3);
-
- } else if(parsedLine.contains(QLatin1Literal("*")) || parsedLine.contains(QLatin1Literal("^"))) {
- // check for wildcards and translate to regexp
- // wildcard "*" - any number of characters
- // separator "^" - end, ? or /
- matchType = FilterLeaf::RegularExpressionMatch;
- parsedLine.replace(QLatin1Literal("||"), QLatin1Literal("^\\w+://"));
- parsedLine.replace(QLatin1Literal("|"), QLatin1Literal("\\|"));
- parsedLine.replace(QLatin1Literal("*"), QLatin1Literal(".*"));
- parsedLine.replace(QLatin1Literal("^"), QLatin1Literal("($|\\?|\\/)"));
- pattern = parsedLine;
- }
- }
-
- auto *rule = new AdBlockRule(matchType, pattern, action);
- rule->mergeOptions(optionsHash);
- return rule;
-}
-
-std::optional<QPair<QWebEngineUrlRequestInfo::ResourceType, bool>> parseOption(const QString &option)
-{
- const bool exception = !option.startsWith(QLatin1Literal("~"));
-
- if(option.endsWith(QLatin1Literal("script"))) {
- // external scripts loaded via HTML script tag
- return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeScript, exception);
-
- } else if(option.endsWith(QLatin1Literal("image"))) {
- // regular images, typically loaded via HTML img tag
- return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeImage, exception);
-
- } else if(option.endsWith(QLatin1Literal("stylesheet"))) {
- // external CSS stylesheet files
- return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeStylesheet, exception);
-
- } else if(option.endsWith(QLatin1Literal("object"))) {
- // content handled by browser plugins, e.g. Flash or Java
- return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeObject, exception);
-
- } else if(option.endsWith(QLatin1Literal("xmlhttprequest"))) {
- // requests started using the XMLHttpRequest object or fetch() API
- return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeXhr, exception);
-
- } else if(option.endsWith(QLatin1Literal("object-subrequest"))) {
- // requests started by plugins like Flash
- return qMakePair(QWebEngineUrlRequestInfo::ResourceTypePluginResource, exception);
-
- } else if(option.endsWith(QLatin1Literal("subdocument"))) {
- // embedded pages, usually included via HTML frames
- return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeSubFrame, exception);
-
- } else if(option.endsWith(QLatin1Literal("ping"))) {
- // requests started by <a ping> or navigator.sendBeacon()
- return qMakePair(QWebEngineUrlRequestInfo::ResourceTypePing, exception);
-
- } else if(option.endsWith(QLatin1Literal("websocket"))) {
- // requests initiated via WebSocket object
- qDebug("Resource type 'websocket' not available");
-
- } else if(option.endsWith(QLatin1Literal("webrtc"))) {
- // connections opened via RTCPeerConnection instances to ICE servers
- qDebug("Resource type 'webrtc' not available");
-
- } else if(option.endsWith(QLatin1Literal("document"))) {
- // the page itself
- return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeMainFrame, exception);
-
- } else if(option.endsWith(QLatin1Literal("other"))) {
- return qMakePair(QWebEngineUrlRequestInfo::ResourceTypeUnknown, exception);
- }
-
- return std::nullopt;
-}
-
-AdBlockRule::AdBlockRule(FilterLeaf::UrlMatchType matchType, const QString& filter, FilterLeaf::Action action)
+AdBlockRule::AdBlockRule(FilterLeaf::UrlMatchType matchType, const QString &filter, FilterLeaf::Action action)
{
this->matchType = matchType;
this->m_request = filter;
- this->m_isBlocking = (action == FilterLeaf::Block) ? true : false;
+ this->m_isBlocking = (action == FilterLeaf::Block);
+ //matcher.setPattern(filter);
+ if(matchType == FilterLeaf::RegularExpressionMatch)
+ regExp = new QRegExp(filter);
+ else
+ stringMatcher = new QStringMatcher(filter);
}
void AdBlockRule::mergeOptions(const QHash<QWebEngineUrlRequestInfo::ResourceType, bool> &options)
@@ -180,17 +27,36 @@ void AdBlockRule::mergeOptions(const QHash<QWebEngineUrlRequestInfo::ResourceTyp
this->resourceTypeOptions.unite(options);
}
-bool AdBlockRule::match(const QUrl& requestUrl) const
+bool AdBlockRule::match(const QUrl &requestUrl) const
{
switch(matchType) {
- case FilterLeaf::StringContains:
- return requestUrl.toString().contains(m_request);
+ case FilterLeaf::RegularExpressionMatch:
+ return (regExp->indexIn(requestUrl.toString()) != -1);
+ default:
+ return false;
+ }
+}
+
+bool AdBlockRule::match(const QUrl &requestUrl, QWebEngineUrlRequestInfo::ResourceType type) const
+{
+ // if request is of the required type, or there are no types set (== apply to all requests)
+ if(this->resourceTypeOptions.contains(type) || this->resourceTypeOptions.isEmpty()) {
+ switch(matchType) {
+ case FilterLeaf::RegularExpressionMatch:
+ return (regExp->indexIn(requestUrl.toString()) != -1);
default:
return false;
+ }
}
+
+ // request type is not matched
+ return false;
}
-FilterLeaf::Action AdBlockRule::action() const
+std::pair<FilterLeaf::Action, QVariant> AdBlockRule::action() const
{
- return m_isBlocking ? FilterLeaf::Block : FilterLeaf::Allow;
+ if(m_isBlocking)
+ return std::make_pair(FilterLeaf::Block, QVariant());
+ else
+ return std::make_pair(FilterLeaf::Allow, QVariant());
}