diff options
author | Aqua-sama <aqua@iserlohn-fortress.net> | 2017-02-05 15:12:45 +0100 |
---|---|---|
committer | Aqua-sama <aqua@iserlohn-fortress.net> | 2017-02-05 15:12:45 +0100 |
commit | 6d39c105cb453fdf071b9e0471b4ab62d75adb17 (patch) | |
tree | c31e1cf1b3b34eabdcaac8ef61b491a319dee298 /src | |
parent | More blocklist options (diff) | |
download | smolbote-6d39c105cb453fdf071b9e0471b4ab62d75adb17.tar.xz |
Added RegExp class for the Blocker
Diffstat (limited to 'src')
-rw-r--r-- | src/smolbote.qbs | 2 | ||||
-rw-r--r-- | src/webengine/blockerrule.cpp | 90 | ||||
-rw-r--r-- | src/webengine/blockerrule.h | 16 | ||||
-rw-r--r-- | src/webengine/regexp.cpp | 68 | ||||
-rw-r--r-- | src/webengine/regexp.h | 35 |
5 files changed, 144 insertions, 67 deletions
diff --git a/src/smolbote.qbs b/src/smolbote.qbs index e3b9d44..1f23534 100644 --- a/src/smolbote.qbs +++ b/src/smolbote.qbs @@ -53,6 +53,8 @@ Project { "webengine/downloaditemform.cpp", "webengine/downloaditemform.h", "webengine/downloaditemform.ui", + "webengine/regexp.cpp", + "webengine/regexp.h", "webengine/urlinterceptor.cpp", "webengine/urlinterceptor.h", "webengine/webengineprofile.cpp", diff --git a/src/webengine/blockerrule.cpp b/src/webengine/blockerrule.cpp index 4146e85..79669b9 100644 --- a/src/webengine/blockerrule.cpp +++ b/src/webengine/blockerrule.cpp @@ -28,7 +28,6 @@ BlockerRule::BlockerRule(QString rule, QObject *parent) : QObject(parent) { - m_rule = rule; QString pattern = rule; // Empty rule or comment @@ -37,6 +36,12 @@ BlockerRule::BlockerRule(QString rule, QObject *parent) : return; } + // Ignore element hiding rules for now + if(pattern.contains("##") || pattern.contains("#@#")) { + m_type = RuleType::Invalid; + return; + } + // Exception if(pattern.startsWith("@@")) { m_exception = true; @@ -89,13 +94,6 @@ BlockerRule::BlockerRule(QString rule, QObject *parent) : pattern.remove(pattern.indexOf("$"), pattern.length()); } - // Domain - if(pattern.startsWith("||")) { - m_type = RuleType::DomainMatch; - m_domain = pattern.mid(2, pattern.indexOf("^")-2); - return; - } - // Regular expression if(rule.startsWith("/") && rule.endsWith("/")) { m_type = RuleType::RegularExpressionMatch; @@ -103,39 +101,48 @@ BlockerRule::BlockerRule(QString rule, QObject *parent) : return; } + // Domain rules + if(pattern.startsWith("||")) { + pattern.remove(0, 2); + // find the end point for the domain + int end = pattern.indexOf(QRegularExpression("(?:[^\\w\\d\\_\\-\\.\\%]|$)"), 0); + domainExpression.setPattern(pattern.mid(0, end)); + pattern.remove(0, end+1); + } else if(pattern.startsWith("|") && pattern.endsWith("|")) { + pattern.remove(0, 1); + pattern.chop(1); + domainExpression.setPattern(pattern); + } else { + domainExpression.setPattern(".*"); + } + // Regular rule - ruleExpression.setPattern(fromWildcardMatch(pattern)); - m_type = RuleType::WildcardMatch; + ruleExpression.setWildcardPattern(pattern); + m_type = RuleType::RegularExpressionMatch; } bool BlockerRule::match(const QWebEngineUrlRequestInfo &info) { - bool shouldBlock = false; - switch (m_type) { case RuleType::Invalid: - shouldBlock = false; break; - case DomainMatch: - if(info.requestUrl().host() == m_domain) { - if(matchOptions(info, m_whitelistOptions)) { - shouldBlock = false; - } else { + case RuleType::RegularExpressionMatch: + if(domainExpression.match(info.requestUrl().host())) { + if(ruleExpression.match(info.requestUrl().toString())) { + if(matchOptions(info, m_whitelistOptions)) { + return false; + } if(matchOptions(info, m_blacklistOptions)) { - shouldBlock = true; + return true; } + return true; } } break; - - case RegularExpressionMatch: - case WildcardMatch: - shouldBlock = ruleExpression.match(info.requestUrl().toString()).hasMatch(); - break; } - return shouldBlock; + return false; } bool BlockerRule::isValid() @@ -153,38 +160,7 @@ bool BlockerRule::isException() QString BlockerRule::toString() const { - return m_rule; -} - -QString BlockerRule::fromWildcardMatch(const QString &pattern) -{ - QString parsed; - - for(int i=0; i<pattern.length(); i++) { - const QChar c = pattern.at(i); - switch (c.toLatin1()) { - case '*': - parsed.append(".*"); - break; - case '^': - parsed.append("(?:[^\\w\\d\\_\\-\\.\\%]|$)"); - break; - case '|': - if(i == 0) { - // beginning of string - parsed.append('^'); - } else { - // end of string - parsed.append('$'); - } - break; - default: - parsed.append(c); - break; - } - } - - return parsed; + return QString("On [%1]: %2 %3").arg(domainExpression.pattern()).arg(ruleExpression.pattern()).arg(QString::number(m_blacklistOptions, 2)); } bool BlockerRule::matchOptions(const QWebEngineUrlRequestInfo &info, const RuleOptions &options) diff --git a/src/webengine/blockerrule.h b/src/webengine/blockerrule.h index 810de6d..bdb2eb9 100644 --- a/src/webengine/blockerrule.h +++ b/src/webengine/blockerrule.h @@ -23,7 +23,7 @@ #include <QObject> #include <QUrl> -#include <QRegularExpression> +#include "regexp.h" #include <QWebEngineUrlRequestInfo> class BlockerRule : public QObject @@ -32,9 +32,7 @@ class BlockerRule : public QObject public: enum RuleType { Invalid, - DomainMatch, - RegularExpressionMatch, - WildcardMatch + RegularExpressionMatch }; enum RuleOption { @@ -70,16 +68,14 @@ signals: public slots: private: - // TODO: subclass QRegularExpression and move this there - QString fromWildcardMatch(const QString &pattern); bool matchOptions(const QWebEngineUrlRequestInfo &info, const RuleOptions &options); - QString m_rule; - RuleType m_type; bool m_exception = false; - QString m_domain; - QRegularExpression ruleExpression; + QStringList hostsBlacklist; + QStringList hostsWhitelist; + RegExp domainExpression; + RegExp ruleExpression; RuleOptions m_blacklistOptions; RuleOptions m_whitelistOptions; diff --git a/src/webengine/regexp.cpp b/src/webengine/regexp.cpp new file mode 100644 index 0000000..4a5878a --- /dev/null +++ b/src/webengine/regexp.cpp @@ -0,0 +1,68 @@ +/** LICENSE ******************************************************************** + ** + ** smolbote: yet another qute browser + ** Copyright (C) 2017 Xian Nox + ** + ** This program is free software: you can redistribute it and/or modify + ** it under the terms of the GNU General Public License as published by + ** the Free Software Foundation, either version 3 of the License, or + ** (at your option) any later version. + ** + ** This program is distributed in the hope that it will be useful, + ** but WITHOUT ANY WARRANTY; without even the implied warranty of + ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + ** GNU General Public License for more details. + ** + ** You should have received a copy of the GNU General Public License + ** along with this program. If not, see <http://www.gnu.org/licenses/>. + ** + ******************************************************************************/ + +#include "regexp.h" + +RegExp::RegExp() : + QRegularExpression() +{ +} + +bool RegExp::match(const QString &subject, int offset, MatchType matchType, MatchOptions matchOptions) const +{ + if(pattern().isEmpty()) { + return true; + } + return QRegularExpression::match(subject, offset, matchType, matchOptions).hasMatch(); +} + +void RegExp::setWildcardPattern(const QString &pattern) +{ + QString parsed; + + for(int i=0; i<pattern.length(); i++) { + const QChar c = pattern.at(i); + switch (c.toLatin1()) { + case '*': + // remove * at the start and end + if(i != 0 && i != pattern.length()-1) { + parsed.append(".*"); + } + break; + case '^': + parsed.append("(?:[^\\w\\d\\_\\-\\.\\%]|$)"); + break; + case '|': + if(i == 0) { + // beginning of string + parsed.append('^'); + } else { + // end of string + parsed.append('$'); + } + break; + default: + parsed.append(c); + break; + } + } + + setPattern(parsed); +} diff --git a/src/webengine/regexp.h b/src/webengine/regexp.h new file mode 100644 index 0000000..d66a98d --- /dev/null +++ b/src/webengine/regexp.h @@ -0,0 +1,35 @@ +/** LICENSE ******************************************************************** + ** + ** smolbote: yet another qute browser + ** Copyright (C) 2017 Xian Nox + ** + ** This program is free software: you can redistribute it and/or modify + ** it under the terms of the GNU General Public License as published by + ** the Free Software Foundation, either version 3 of the License, or + ** (at your option) any later version. + ** + ** This program is distributed in the hope that it will be useful, + ** but WITHOUT ANY WARRANTY; without even the implied warranty of + ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + ** GNU General Public License for more details. + ** + ** You should have received a copy of the GNU General Public License + ** along with this program. If not, see <http://www.gnu.org/licenses/>. + ** + ******************************************************************************/ + +#ifndef REGEXP_H +#define REGEXP_H + +#include <QRegularExpression> + +class RegExp : public QRegularExpression +{ +public: + explicit RegExp(); + + bool match(const QString &subject, int offset=0, MatchType matchType=NormalMatch, MatchOptions matchOptions=NoMatchOption) const; + void setWildcardPattern(const QString &pattern); +}; + +#endif // REGEXP_H |