diff options
author | Aqua-sama <aqua@iserlohn-fortress.net> | 2017-05-21 20:24:57 +0200 |
---|---|---|
committer | Aqua-sama <aqua@iserlohn-fortress.net> | 2017-05-21 20:24:57 +0200 |
commit | 1a9e09332261d18ee892fc3613f16a0e80d115e0 (patch) | |
tree | 1bb1a158799dcc231b48aafc61d605e243f49314 /src/blocker | |
parent | Updated CONTRIBUTING (diff) | |
download | smolbote-1a9e09332261d18ee892fc3613f16a0e80d115e0.tar.xz |
Reworking URL filter
Diffstat (limited to 'src/blocker')
-rw-r--r-- | src/blocker/blockermanager.cpp | 4 | ||||
-rw-r--r-- | src/blocker/blockermanager.h | 6 | ||||
-rw-r--r-- | src/blocker/blockerrule.cpp | 170 | ||||
-rw-r--r-- | src/blocker/blockerrule.h | 75 | ||||
-rw-r--r-- | src/blocker/blockersubscription.cpp | 163 | ||||
-rw-r--r-- | src/blocker/blockersubscription.h | 17 | ||||
-rw-r--r-- | src/blocker/regexp.cpp | 6 | ||||
-rw-r--r-- | src/blocker/regexp.h | 4 |
8 files changed, 111 insertions, 334 deletions
diff --git a/src/blocker/blockermanager.cpp b/src/blocker/blockermanager.cpp index 6e3860d..8763502 100644 --- a/src/blocker/blockermanager.cpp +++ b/src/blocker/blockermanager.cpp @@ -36,7 +36,7 @@ BlockerManager::BlockerManager(QWidget *parent) : const QStringList subscriptions = sSettings->value("blocker.subscriptions").toStringList(); QStringList::const_iterator i; for(i = subscriptions.constBegin(); i != subscriptions.constEnd(); ++i) { - BlockerSubscription *sub = new BlockerSubscription(QUrl(*i), this); + FilterCollection *sub = new FilterCollection(QString(*i), this); m_subscriptions.append(sub); ui->tabWidget->addTab(sub, sub->name()); } @@ -47,7 +47,7 @@ BlockerManager::~BlockerManager() delete ui; } -QVector<BlockerSubscription *> BlockerManager::subscriptions() const +QVector<FilterCollection *> BlockerManager::subscriptions() const { return m_subscriptions; } diff --git a/src/blocker/blockermanager.h b/src/blocker/blockermanager.h index cffcf5f..cfa2110 100644 --- a/src/blocker/blockermanager.h +++ b/src/blocker/blockermanager.h @@ -27,7 +27,7 @@ namespace Ui { class UrlInterceptorDialog; } -class BlockerSubscription; +class FilterCollection; class BlockerManager : public QDialog { Q_OBJECT @@ -36,11 +36,11 @@ public: explicit BlockerManager(QWidget *parent = 0); ~BlockerManager(); - QVector<BlockerSubscription* > subscriptions() const; + QVector<FilterCollection* > subscriptions() const; private: Ui::UrlInterceptorDialog *ui; - QVector<BlockerSubscription *> m_subscriptions; + QVector<FilterCollection *> m_subscriptions; }; #endif // URLINTERCEPTORDIALOG_H diff --git a/src/blocker/blockerrule.cpp b/src/blocker/blockerrule.cpp index b9c3730..ca4f339 100644 --- a/src/blocker/blockerrule.cpp +++ b/src/blocker/blockerrule.cpp @@ -20,72 +20,17 @@ #include "blockerrule.h" -/* AdBlock filter reference - * https://adblockplus.org/en/filters - * https://adblockplus.org/en/filter-cheatsheet - */ - -BlockerRule::BlockerRule(QString rule, QObject *parent) : +BlockerRule::BlockerRule(RegExp firstPartyUrl, RegExp requestUrl, NavigationType nav, ResourceType res, bool shouldBlock, QObject *parent) : QObject(parent) { - m_filter = rule; - QString pattern = rule; - - // Empty rule or comment - if(pattern.trimmed().isEmpty() || pattern.startsWith("!")) { - m_valid = false; - return; - } - - // Exception - if(pattern.startsWith("@@")) { - m_exception = true; - pattern.remove(0, 2); - } - - // Ignore element hiding rules for now - if(pattern.contains("##") || pattern.contains("#@#")) { - m_valid = false; - return; - } - - // Options - if(pattern.contains("$")) { - QString opts = pattern.mid(pattern.indexOf("$")+1); - pattern.remove(pattern.indexOf("$"), pattern.length()); - - const QStringList optList = opts.split(','); - QStringList::const_iterator i; - for(i = optList.constBegin(); i != optList.constEnd(); ++i) { - parseOption(*i); - } - } - - // Regular expression - if(rule.startsWith("/") && rule.endsWith("/")) { - m_valid = true; - ruleExpression.setPattern(pattern); - return; - } - - // Domain rules - if(pattern.startsWith("||")) { - pattern.remove(0, 2); - // find the end point for the domain - int end = pattern.indexOf(QRegularExpression("(?:[^\\w\\d\\_\\-\\.\\%]|$)"), 0); - domainExpression.setPattern(pattern.mid(0, end)); - pattern.remove(0, end+1); - } else if(pattern.startsWith("|") && pattern.endsWith("|")) { - pattern.remove(0, 1); - pattern.chop(1); - domainExpression.setPattern(pattern); - } else { - domainExpression.setPattern(".*"); - } + m_firstPartyUrl = firstPartyUrl; + m_requestUrl = requestUrl; + m_navRules = nav; + m_resRules = res; + m_shouldBlock = shouldBlock; - // Regular rule - ruleExpression.setWildcardPattern(pattern); m_valid = true; + } bool BlockerRule::match(const QWebEngineUrlRequestInfo &info) @@ -95,20 +40,11 @@ bool BlockerRule::match(const QWebEngineUrlRequestInfo &info) } // if both domain and rule match - if(domainExpression.match(info.requestUrl().host()) && ruleExpression.match(info.requestUrl().toString())) { - - // option explicitly allows - if(matchOptions(info, m_whitelistOptions)) { - return false; - } + if(m_firstPartyUrl.hasMatch(info.firstPartyUrl().toString()) && m_requestUrl.hasMatch(info.requestUrl().toString())) { - // option explicitly bans - if(matchOptions(info, m_blacklistOptions)) { - return true; - } + // TODO: check options - // no options, but both domain and rule match --> rule matches - return true; + return m_shouldBlock; } // domain and/or rule do not match @@ -119,92 +55,8 @@ bool BlockerRule::isValid() { return m_valid; } -bool BlockerRule::isException() -{ - return m_exception; -} QString BlockerRule::filter() const { - return m_filter; -} - -void BlockerRule::parseOption(const QString &opt) -{ - if(opt.startsWith("script")) { - m_blacklistOptions.setFlag(RuleOption::script, true); - } else if(opt.startsWith("~script")) { - m_whitelistOptions.setFlag(RuleOption::script, true); - - } else if(opt.startsWith("image")) { - m_blacklistOptions.setFlag(RuleOption::image, true); - } else if(opt.startsWith("~image")) { - m_whitelistOptions.setFlag(RuleOption::image, true); - - } else if(opt.startsWith("stylesheet")) { - m_blacklistOptions.setFlag(RuleOption::stylesheet, true); - } else if(opt.startsWith("~stylesheet")) { - m_whitelistOptions.setFlag(RuleOption::stylesheet, true); - - } else if(opt.startsWith("object")) { - m_blacklistOptions.setFlag(RuleOption::object, true); - } else if(opt.startsWith("~object")) { - m_whitelistOptions.setFlag(RuleOption::object, true); - - } else if(opt.startsWith("object-subrequest")) { - m_blacklistOptions.setFlag(RuleOption::objectsubrequest, true); - } else if(opt.startsWith("~object-subrequest")) { - m_whitelistOptions.setFlag(RuleOption::objectsubrequest, true); - - } else if(opt.startsWith("subdocument")) { - m_blacklistOptions.setFlag(RuleOption::subdocument, true); - } else if(opt.startsWith("~subdocument")) { - m_whitelistOptions.setFlag(RuleOption::subdocument, true); - } -} - -bool BlockerRule::matchOptions(const QWebEngineUrlRequestInfo &info, const RuleOptions &options) -{ - // no options are defined - if(options == 0) { - return false; - } - - bool hasOption = false; - switch (info.resourceType()) { - case QWebEngineUrlRequestInfo::ResourceTypeScript: - if(options.testFlag(RuleOption::script)) { - hasOption = true; - } - break; - case QWebEngineUrlRequestInfo::ResourceTypeImage: - if(options.testFlag(RuleOption::image)) { - hasOption = true; - } - break; - case QWebEngineUrlRequestInfo::ResourceTypeStylesheet: - if(options.testFlag(RuleOption::stylesheet)) { - hasOption = true; - } - break; - case QWebEngineUrlRequestInfo::ResourceTypeObject: - if(options.testFlag(RuleOption::object)) { - hasOption = true; - } - break; - case QWebEngineUrlRequestInfo::ResourceTypePluginResource: - if(options.testFlag(RuleOption::objectsubrequest)) { - hasOption = true; - } - break; - case QWebEngineUrlRequestInfo::ResourceTypeSubFrame: - if(options.testFlag(RuleOption::subdocument)) { - hasOption = true; - } - break; - default: - break; - } - - return hasOption; + return m_firstPartyUrl.pattern() + m_requestUrl.pattern(); } diff --git a/src/blocker/blockerrule.h b/src/blocker/blockerrule.h index a98f699..758366d 100644 --- a/src/blocker/blockerrule.h +++ b/src/blocker/blockerrule.h @@ -31,30 +31,48 @@ class BlockerRule : public QObject Q_OBJECT public: - // https://adblockplus.org/en/filters#options - enum RuleOption { - script = 1, - image = 2, - stylesheet = 4, - object = 8, - xmlhttprequest = 16, - objectsubrequest = 32, - subdocument = 64, - ping = 128, - websocket = 256, - document = 512, - elemhide = 1024, - generichide = 2048, - genericblock = 4096, - other = 8192 + enum TypeState { + Allow = 1, + Deny = 2, + None = 0 }; - Q_DECLARE_FLAGS(RuleOptions, RuleOption) - explicit BlockerRule(QString rule, QObject *parent = 0); + struct NavigationType { + TypeState link; + TypeState typed; + TypeState form; + TypeState history; + TypeState reload; + TypeState other; + }; + + struct ResourceType { + TypeState MainFrame; + TypeState SubFrame; + TypeState Stylesheet; + TypeState Script; + TypeState FontResource; + TypeState SubResource; + TypeState Object; + TypeState Media; + TypeState Worker; + TypeState SharedWorker; + TypeState Prefetch; + TypeState Favicon; + TypeState Xhr; + TypeState Ping; + TypeState ServiceWorker; + TypeState CspWorker; + TypeState PluginResource; + TypeState Unknown; + }; + + explicit BlockerRule(RegExp firstPartyUrl, RegExp requestUrl, NavigationType nav, ResourceType res, bool shouldBlock, QObject *parent = 0); bool match(const QWebEngineUrlRequestInfo &info); + bool isValid(); - bool isException(); + QString filter() const; signals: @@ -62,22 +80,15 @@ signals: public slots: private: - void parseOption(const QString &opt); - bool matchOptions(const QWebEngineUrlRequestInfo &info, const RuleOptions &options); - - QString m_filter; + RegExp m_firstPartyUrl; + RegExp m_requestUrl; - bool m_valid; - bool m_exception = false; - bool m_elementRule; + NavigationType m_navRules; + ResourceType m_resRules; - QStringList hostsBlacklist; - QStringList hostsWhitelist; - RegExp domainExpression; - RegExp ruleExpression; + bool m_valid = false; + bool m_shouldBlock; - RuleOptions m_blacklistOptions; - RuleOptions m_whitelistOptions; }; #endif // ADBLOCKRULE_H diff --git a/src/blocker/blockersubscription.cpp b/src/blocker/blockersubscription.cpp index ea95d57..c452511 100644 --- a/src/blocker/blockersubscription.cpp +++ b/src/blocker/blockersubscription.cpp @@ -25,38 +25,40 @@ #include <QNetworkRequest> #include <QNetworkReply> -BlockerSubscription::BlockerSubscription(const QUrl url, QWidget *parent) : +#include <QJsonDocument> +#include <QJsonObject> +#include <QJsonArray> + +FilterCollection::FilterCollection(const QString path, QWidget *parent) : QWidget(parent), ui(new Ui::SubscriptionForm) { ui->setupUi(this); - m_name = url.fileName(); - m_url = url; - - QString subPath = sSettings->value("blocker.path").toString() + m_name; - qDebug("Adding subscription [%s]", qUtf8Printable(subPath)); - QFile *sub = new QFile(subPath); - if(sub->exists()) { - sub->open(QIODevice::ReadOnly); - load(sub); - } else { - if(!url.scheme().startsWith("http")) { - qWarning("Invalid url, subscription cannot be updated"); - sub->deleteLater(); - return; - } - qDebug("Subscription doesn't exist, updating..."); - sub->open(QIODevice::ReadWrite); - update(sub); + + m_name = "TODO"; + m_path = path; + + qDebug("Adding subscription [%s]", qUtf8Printable(m_path)); + + + QFile filterFile(m_path); + if(!filterFile.open(QIODevice::ReadOnly)) { + qWarning("Could not open filter!"); + return; } + + QJsonDocument filters(QJsonDocument::fromJson(filterFile.readAll())); + load(filters.object()); + + qDebug("Added %i rules", m_rules.size()); } -BlockerSubscription::~BlockerSubscription() +FilterCollection::~FilterCollection() { delete ui; } -QString BlockerSubscription::name() const +QString FilterCollection::name() const { return m_name; } @@ -66,29 +68,15 @@ QString BlockerSubscription::name() const * @param info * @return true if it should be blocked; false otherwise */ -BlockerSubscription::MatchResult BlockerSubscription::match(QWebEngineUrlRequestInfo &info) +FilterCollection::MatchResult FilterCollection::match(QWebEngineUrlRequestInfo &info) { MatchResult result; - for(auto rule : qAsConst(m_urlWhitelist)) { - if(rule->match(info)) { - // this request is whitelisted - result.match = true; - result.block = false; - result.pattern = rule->filter(); - return result; - } - } - - // request is not in the whitelist - for(auto rule : qAsConst(m_urlBlacklist)) { - if(rule->match(info)) { - // this request is blacklisted - result.match = true; - result.block = true; - result.pattern = rule->filter(); - return result; - } + for(auto rule : qAsConst(m_rules)) { + result.match = true; + result.block = rule->match(info); + result.pattern = rule->filter(); + return result; } // request matches neither whitelist nor blacklist @@ -97,93 +85,20 @@ BlockerSubscription::MatchResult BlockerSubscription::match(QWebEngineUrlRequest return result; } -void BlockerSubscription::update(QFile *cache) +void FilterCollection::load(const QJsonObject &json) { - // no cache path specified - pull the subscription - QNetworkRequest request; - request.setUrl(m_url); - - QNetworkReply *reply = sNetwork->get(request); - connect(reply, &QNetworkReply::readyRead, [this, reply, cache]() { - cache->write(reply->readAll()); - }); - connect(reply, &QNetworkReply::finished, [this, reply, cache]() { - cache->write(reply->readAll()); - cache->flush(); - cache->seek(0); - reply->deleteLater(); - qDebug("Subscription updated: [%s]", qUtf8Printable(this->m_name)); - this->load(cache); - }); + for(QJsonValue v : json["rules"].toArray()) { + m_rules.append(createRule(v.toObject())); + } } -void BlockerSubscription::load(QIODevice *dev) +BlockerRule* FilterCollection::createRule(const QJsonObject &obj) { - QTextStream subscription(dev); - - QString header = subscription.readLine(); - if(!header.startsWith("[Adblock Plus")) { - qWarning("Invalid format of subscription: %s", qUtf8Printable(m_name)); - } - - // clear all lists - m_urlBlacklist.clear(); - m_urlWhitelist.clear(); - int rules = 0; - - while(!subscription.atEnd()) { - QString line = subscription.readLine(); - if(!line.isEmpty()) { - if(line.startsWith('!')) { - parseComment(line); - } else { - // The line is not empty or a comment - rules++; - BlockerRule *rule = new BlockerRule(line, this); - - if(rule->isValid()) { - if(rule->isException()) { - m_urlWhitelist.append(rule); - ui->whitelist_listWidget->addItem(rule->filter()); - } else { - ui->blacklist_listWidget->addItem(rule->filter()); - m_urlBlacklist.append(rule); - } - } - - } - } // line.isEmpty - } // subscription.atEnd() - - qDebug("Loaded %i/%i rules from subscription %s", m_urlBlacklist.count() + m_urlWhitelist.count(), rules, qUtf8Printable(m_name)); - dev->deleteLater(); -} + BlockerRule *rule; + BlockerRule::NavigationType n; + BlockerRule::ResourceType r; + rule = new BlockerRule(RegExp(obj["firstPartyUrl"].toString()), RegExp(obj["requestUrl"].toString()), n, r, obj["shouldBlock"].toBool(), this); -void BlockerSubscription::parseComment(const QString &line) -{ - if(line.startsWith("! Title: ")) { - ui->title->setText(line.right(line.length() - 9)); - return; - } - if(line.startsWith("! Homepage: ")) { - ui->homepage->setText(line.right(line.length() - 12)); - return; - } - if(line.startsWith("! Licence: ")) { - ui->license->setText(line.right(line.length() - 11)); - return; - } - if(line.startsWith("! Version: ")) { - ui->version->setText(line.right(line.length() - 11)); - return; - } - if(line.startsWith("! Last modified: ")) { - ui->lastModified->setText(line.right(line.length() - 17)); - return; - } - if(line.startsWith("! Expires: ")) { - ui->expires->setText(line.right(line.length() - 11).left(2)); - return; - } + return rule; } diff --git a/src/blocker/blockersubscription.h b/src/blocker/blockersubscription.h index a0e051a..26ef7e2 100644 --- a/src/blocker/blockersubscription.h +++ b/src/blocker/blockersubscription.h @@ -29,7 +29,7 @@ namespace Ui { class SubscriptionForm; } -class BlockerSubscription : public QWidget +class FilterCollection : public QWidget { Q_OBJECT @@ -40,25 +40,24 @@ public: QString pattern; }; - explicit BlockerSubscription(const QUrl url, QWidget *parent = 0); - ~BlockerSubscription(); + explicit FilterCollection(const QString path, QWidget *parent = 0); + ~FilterCollection(); QString name() const; MatchResult match(QWebEngineUrlRequestInfo &info); private slots: - void update(QFile *cache); - void load(QIODevice *dev); + void load(const QJsonObject &json); private: - void parseComment(const QString &line); + + BlockerRule* createRule(const QJsonObject &obj); Ui::SubscriptionForm *ui; QString m_name; - QUrl m_url; + QString m_path; - QVector<BlockerRule*> m_urlWhitelist; // exception rules - QVector<BlockerRule*> m_urlBlacklist; // block rules + QVector<BlockerRule*> m_rules; }; diff --git a/src/blocker/regexp.cpp b/src/blocker/regexp.cpp index 30fe00e..8560455 100644 --- a/src/blocker/regexp.cpp +++ b/src/blocker/regexp.cpp @@ -20,12 +20,12 @@ #include "regexp.h" -RegExp::RegExp() : - QRegularExpression() +RegExp::RegExp(const QString &pattern, PatternOptions options) : + QRegularExpression(pattern, options) { } -bool RegExp::match(const QString &subject, int offset, MatchType matchType, MatchOptions matchOptions) const +bool RegExp::hasMatch(const QString &subject, int offset, MatchType matchType, MatchOptions matchOptions) const { // Empty matches all if(pattern().isEmpty()) { diff --git a/src/blocker/regexp.h b/src/blocker/regexp.h index 183fa35..86cedfd 100644 --- a/src/blocker/regexp.h +++ b/src/blocker/regexp.h @@ -29,9 +29,9 @@ class RegExp : public QRegularExpression { public: - explicit RegExp(); + explicit RegExp(const QString &pattern = "", PatternOptions options = NoPatternOption); - bool match(const QString &subject, int offset=0, MatchType matchType=NormalMatch, MatchOptions matchOptions=NoMatchOption) const; + bool hasMatch(const QString &subject, int offset=0, MatchType matchType=NormalMatch, MatchOptions matchOptions=NoMatchOption) const; void setWildcardPattern(const QString &pattern); }; |