diff options
author | Aqua-sama <aqua@iserlohn-fortress.net> | 2017-05-21 20:24:57 +0200 |
---|---|---|
committer | Aqua-sama <aqua@iserlohn-fortress.net> | 2017-05-21 20:24:57 +0200 |
commit | 1a9e09332261d18ee892fc3613f16a0e80d115e0 (patch) | |
tree | 1bb1a158799dcc231b48aafc61d605e243f49314 /src/formats | |
parent | Updated CONTRIBUTING (diff) | |
download | smolbote-1a9e09332261d18ee892fc3613f16a0e80d115e0.tar.xz |
Reworking URL filter
Diffstat (limited to 'src/formats')
-rw-r--r-- | src/formats/adblockrule.cpp | 210 | ||||
-rw-r--r-- | src/formats/adblockrule.h | 83 |
2 files changed, 293 insertions, 0 deletions
diff --git a/src/formats/adblockrule.cpp b/src/formats/adblockrule.cpp new file mode 100644 index 0000000..b9c3730 --- /dev/null +++ b/src/formats/adblockrule.cpp @@ -0,0 +1,210 @@ +/******************************************************************************* + ** + ** smolbote: yet another qute browser + ** Copyright (C) 2017 Xian Nox + ** + ** This program is free software: you can redistribute it and/or modify + ** it under the terms of the GNU General Public License as published by + ** the Free Software Foundation, either version 3 of the License, or + ** (at your option) any later version. + ** + ** This program is distributed in the hope that it will be useful, + ** but WITHOUT ANY WARRANTY; without even the implied warranty of + ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + ** GNU General Public License for more details. + ** + ** You should have received a copy of the GNU General Public License + ** along with this program. If not, see <http://www.gnu.org/licenses/>. + ** + ******************************************************************************/ + +#include "blockerrule.h" + +/* AdBlock filter reference + * https://adblockplus.org/en/filters + * https://adblockplus.org/en/filter-cheatsheet + */ + +BlockerRule::BlockerRule(QString rule, QObject *parent) : + QObject(parent) +{ + m_filter = rule; + QString pattern = rule; + + // Empty rule or comment + if(pattern.trimmed().isEmpty() || pattern.startsWith("!")) { + m_valid = false; + return; + } + + // Exception + if(pattern.startsWith("@@")) { + m_exception = true; + pattern.remove(0, 2); + } + + // Ignore element hiding rules for now + if(pattern.contains("##") || pattern.contains("#@#")) { + m_valid = false; + return; + } + + // Options + if(pattern.contains("$")) { + QString opts = pattern.mid(pattern.indexOf("$")+1); + pattern.remove(pattern.indexOf("$"), pattern.length()); + + const QStringList optList = opts.split(','); + QStringList::const_iterator i; + for(i = optList.constBegin(); i != optList.constEnd(); ++i) { + parseOption(*i); + } + } + + // Regular expression + if(rule.startsWith("/") && rule.endsWith("/")) { + m_valid = true; + ruleExpression.setPattern(pattern); + return; + } + + // Domain rules + if(pattern.startsWith("||")) { + pattern.remove(0, 2); + // find the end point for the domain + int end = pattern.indexOf(QRegularExpression("(?:[^\\w\\d\\_\\-\\.\\%]|$)"), 0); + domainExpression.setPattern(pattern.mid(0, end)); + pattern.remove(0, end+1); + } else if(pattern.startsWith("|") && pattern.endsWith("|")) { + pattern.remove(0, 1); + pattern.chop(1); + domainExpression.setPattern(pattern); + } else { + domainExpression.setPattern(".*"); + } + + // Regular rule + ruleExpression.setWildcardPattern(pattern); + m_valid = true; +} + +bool BlockerRule::match(const QWebEngineUrlRequestInfo &info) +{ + if(!m_valid) { + return false; + } + + // if both domain and rule match + if(domainExpression.match(info.requestUrl().host()) && ruleExpression.match(info.requestUrl().toString())) { + + // option explicitly allows + if(matchOptions(info, m_whitelistOptions)) { + return false; + } + + // option explicitly bans + if(matchOptions(info, m_blacklistOptions)) { + return true; + } + + // no options, but both domain and rule match --> rule matches + return true; + } + + // domain and/or rule do not match + return false; +} + +bool BlockerRule::isValid() +{ + return m_valid; +} +bool BlockerRule::isException() +{ + return m_exception; +} + +QString BlockerRule::filter() const +{ + return m_filter; +} + +void BlockerRule::parseOption(const QString &opt) +{ + if(opt.startsWith("script")) { + m_blacklistOptions.setFlag(RuleOption::script, true); + } else if(opt.startsWith("~script")) { + m_whitelistOptions.setFlag(RuleOption::script, true); + + } else if(opt.startsWith("image")) { + m_blacklistOptions.setFlag(RuleOption::image, true); + } else if(opt.startsWith("~image")) { + m_whitelistOptions.setFlag(RuleOption::image, true); + + } else if(opt.startsWith("stylesheet")) { + m_blacklistOptions.setFlag(RuleOption::stylesheet, true); + } else if(opt.startsWith("~stylesheet")) { + m_whitelistOptions.setFlag(RuleOption::stylesheet, true); + + } else if(opt.startsWith("object")) { + m_blacklistOptions.setFlag(RuleOption::object, true); + } else if(opt.startsWith("~object")) { + m_whitelistOptions.setFlag(RuleOption::object, true); + + } else if(opt.startsWith("object-subrequest")) { + m_blacklistOptions.setFlag(RuleOption::objectsubrequest, true); + } else if(opt.startsWith("~object-subrequest")) { + m_whitelistOptions.setFlag(RuleOption::objectsubrequest, true); + + } else if(opt.startsWith("subdocument")) { + m_blacklistOptions.setFlag(RuleOption::subdocument, true); + } else if(opt.startsWith("~subdocument")) { + m_whitelistOptions.setFlag(RuleOption::subdocument, true); + } +} + +bool BlockerRule::matchOptions(const QWebEngineUrlRequestInfo &info, const RuleOptions &options) +{ + // no options are defined + if(options == 0) { + return false; + } + + bool hasOption = false; + switch (info.resourceType()) { + case QWebEngineUrlRequestInfo::ResourceTypeScript: + if(options.testFlag(RuleOption::script)) { + hasOption = true; + } + break; + case QWebEngineUrlRequestInfo::ResourceTypeImage: + if(options.testFlag(RuleOption::image)) { + hasOption = true; + } + break; + case QWebEngineUrlRequestInfo::ResourceTypeStylesheet: + if(options.testFlag(RuleOption::stylesheet)) { + hasOption = true; + } + break; + case QWebEngineUrlRequestInfo::ResourceTypeObject: + if(options.testFlag(RuleOption::object)) { + hasOption = true; + } + break; + case QWebEngineUrlRequestInfo::ResourceTypePluginResource: + if(options.testFlag(RuleOption::objectsubrequest)) { + hasOption = true; + } + break; + case QWebEngineUrlRequestInfo::ResourceTypeSubFrame: + if(options.testFlag(RuleOption::subdocument)) { + hasOption = true; + } + break; + default: + break; + } + + return hasOption; +} diff --git a/src/formats/adblockrule.h b/src/formats/adblockrule.h new file mode 100644 index 0000000..a98f699 --- /dev/null +++ b/src/formats/adblockrule.h @@ -0,0 +1,83 @@ +/******************************************************************************* + ** + ** smolbote: yet another qute browser + ** Copyright (C) 2017 Xian Nox + ** + ** This program is free software: you can redistribute it and/or modify + ** it under the terms of the GNU General Public License as published by + ** the Free Software Foundation, either version 3 of the License, or + ** (at your option) any later version. + ** + ** This program is distributed in the hope that it will be useful, + ** but WITHOUT ANY WARRANTY; without even the implied warranty of + ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + ** GNU General Public License for more details. + ** + ** You should have received a copy of the GNU General Public License + ** along with this program. If not, see <http://www.gnu.org/licenses/>. + ** + ******************************************************************************/ + +#ifndef ADBLOCKRULE_H +#define ADBLOCKRULE_H + +#include <QObject> +#include <QUrl> +#include "regexp.h" +#include <QWebEngineUrlRequestInfo> + +class BlockerRule : public QObject +{ + Q_OBJECT +public: + + // https://adblockplus.org/en/filters#options + enum RuleOption { + script = 1, + image = 2, + stylesheet = 4, + object = 8, + xmlhttprequest = 16, + objectsubrequest = 32, + subdocument = 64, + ping = 128, + websocket = 256, + document = 512, + elemhide = 1024, + generichide = 2048, + genericblock = 4096, + other = 8192 + }; + Q_DECLARE_FLAGS(RuleOptions, RuleOption) + + explicit BlockerRule(QString rule, QObject *parent = 0); + + bool match(const QWebEngineUrlRequestInfo &info); + bool isValid(); + bool isException(); + QString filter() const; + +signals: + +public slots: + +private: + void parseOption(const QString &opt); + bool matchOptions(const QWebEngineUrlRequestInfo &info, const RuleOptions &options); + + QString m_filter; + + bool m_valid; + bool m_exception = false; + bool m_elementRule; + + QStringList hostsBlacklist; + QStringList hostsWhitelist; + RegExp domainExpression; + RegExp ruleExpression; + + RuleOptions m_blacklistOptions; + RuleOptions m_whitelistOptions; +}; + +#endif // ADBLOCKRULE_H |