From 2a5ea0269a1f9511c51d661a6c7d7bdc7d0176fa Mon Sep 17 00:00:00 2001 From: Aqua-sama Date: Sun, 7 Oct 2018 13:20:54 +0200 Subject: Expand HTTP header settings #4 - add doc/Usage/Filter.asciidoc to explain the usage of the filter headers - add HTTP headers to Profile (section "headers") - Use request interceptor to apply filter headers, then profile headers - add insert/delete actions to ProfileEditor --- src/webengine/filter.cpp | 116 +++++++++++++++++++++++++++++++++++ src/webengine/filter.h | 45 ++++++++++++++ src/webengine/urlinterceptor.cpp | 127 +++++---------------------------------- src/webengine/urlinterceptor.h | 18 ++---- 4 files changed, 182 insertions(+), 124 deletions(-) create mode 100644 src/webengine/filter.cpp create mode 100644 src/webengine/filter.h (limited to 'src/webengine') diff --git a/src/webengine/filter.cpp b/src/webengine/filter.cpp new file mode 100644 index 0000000..b250843 --- /dev/null +++ b/src/webengine/filter.cpp @@ -0,0 +1,116 @@ +/* + * This file is part of smolbote. It's copyrighted by the contributors recorded + * in the version control history of the file, available from its original + * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote + * + * SPDX-License-Identifier: GPL-3.0 + */ + +#include "filter.h" +#include "urlinterceptor.h" +#include +#include +#include +#include +#include +#include "util.h" + + +QHash parseHostlist(const QString &filename) +{ + QHash rules; + + if(QFile hostfile(filename); hostfile.open(QIODevice::ReadOnly | QIODevice::Text)) { + + // with a QTextStream we can read lines without getting linebreaks at the end + QTextStream hostfile_stream(&hostfile); + + while(!hostfile_stream.atEnd()) { + + // read line and remove any whitespace at the end + const QString &line = hostfile_stream.readLine().trimmed(); + + // skip comments and empty lines + if(line.isEmpty() || line.startsWith('#')) + continue; + + // everything else should be a rule + // format is + // 0.0.0.0 hostname + const QStringList &parts = line.split(' '); + const QString &redirect = parts.at(0); + + for(auto i = parts.constBegin() + 1; i != parts.constEnd(); ++i) { + if(!rules.contains(*i)) { + Filter::HostRule rule{}; + rule.isBlocking = (redirect == "0.0.0.0"); + rules.insert(*i, rule); + } + } + + // for(const QString &host : parts.mid(1)) { + // if(!rules.contains(host)) { + // UrlRequestInterceptor::HostRule rule{}; + // rule.isBlocking = redirect == "0.0.0.0"; + // rules.insert(host, rule); + // } + // } + } + + hostfile.close(); + } + + return rules; +} +/* +inline std::vector parseAdBlockList(const QString &filename) +{ + std::vector rules; + QFile list(filename); + + if(list.open(QIODevice::ReadOnly | QIODevice::Text), true) { + QTextStream l(&list); + QString line; + while(l.readLineInto(&line)) { + AdBlockRule rule(line); + if(rule.isEnabled()) { + rules.emplace_back(std::move(rule)); + } + } + list.close(); + } + + return rules; +}*/ + +Filter::Filter::Filter(const std::unique_ptr &config, QObject* parent) + : QObject(parent) +{ + // parse headers + if(const auto headers = config->value("filter.header"); headers) { + for(const QString &header : headers.value()) { + const auto list = header.split(QLatin1Literal(":")); + if(list.length() == 2) + m_headers.insert(list.at(0).toLatin1(), list.at(1).toLatin1()); + } + } + + const QStringList hostfiles = Util::files(config->value("filter.path").value()); + for(const QString &hostfile : hostfiles) { + m_hostlist.unite(parseHostlist(hostfile)); + } + + /* + auto filtersPath = config->value("filter.adblock"); + if(filtersPath) + filters = parseAdBlockList(filtersPath.value()); + */ +} + +std::optional Filter::hostlistRule(const QString& url) const +{ + if(!m_hostlist.contains(url)) + return std::nullopt; + + return std::optional(m_hostlist.value(url)); +} diff --git a/src/webengine/filter.h b/src/webengine/filter.h new file mode 100644 index 0000000..3eac5ee --- /dev/null +++ b/src/webengine/filter.h @@ -0,0 +1,45 @@ +/* + * This file is part of smolbote. It's copyrighted by the contributors recorded + * in the version control history of the file, available from its original + * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote + * + * SPDX-License-Identifier: GPL-3.0 + */ + +#ifndef SMOLBOTE_FILTER_H +#define SMOLBOTE_FILTER_H + +#include "urlfilter/filterrule.h" +#include +#include +#include + +class Configuration; +class Filter : public QObject +{ + Q_OBJECT +public: + struct HostRule { + bool isBlocking; + }; + + explicit Filter(const std::unique_ptr &config, QObject *parent = nullptr); + ~Filter() override = default; + + const QHash hostlist() const + { + return qAsConst(m_hostlist); + } + std::optional hostlistRule(const QString &url) const; + + const QMap headers() const + { + return qAsConst(m_headers); + } + +private: + QHash m_hostlist; + QMap m_headers; +}; + +#endif // SMOLBOTE_FILTER_H diff --git a/src/webengine/urlinterceptor.cpp b/src/webengine/urlinterceptor.cpp index db4aea9..cf9b85f 100644 --- a/src/webengine/urlinterceptor.cpp +++ b/src/webengine/urlinterceptor.cpp @@ -14,127 +14,32 @@ #include #include #include +#include "filter.h" +#include -inline std::vector parseAdBlockList(const QString &filename) -{ - std::vector rules; - QFile list(filename); - - if(list.open(QIODevice::ReadOnly | QIODevice::Text), true) { - QTextStream l(&list); - QString line; - while(l.readLineInto(&line)) { - AdBlockRule rule(line); - if(rule.isEnabled()) { - rules.emplace_back(std::move(rule)); - } - } - list.close(); - } - - return rules; -} +// test DNT on https://browserleaks.com/donottrack -UrlRequestInterceptor::UrlRequestInterceptor(const std::unique_ptr &config, QObject *parent) +UrlRequestInterceptor::UrlRequestInterceptor(Filter* filter, WebProfile* profile, QObject* parent) : QWebEngineUrlRequestInterceptor(parent) { - QDir hostsD(config->value("filter.path").value()); - const QStringList hostFiles = hostsD.entryList(QDir::Files); - for(const QString &file : hostFiles) { - const QString absPath = hostsD.absoluteFilePath(file); - auto r = parse(absPath); -#ifdef QT_DEBUG - qDebug("Parsed %i rules from %s", r.count(), qUtf8Printable(absPath)); -#endif - - rules.unite(r); - } - - const auto header = config->value>("filter.header"); - if(header) { - for(const std::string &h : header.value()) { - std::vector s; - boost::split(s, h, boost::is_any_of(":=")); - auto pair = std::make_pair(s.at(0), s.at(1)); - m_headers.emplace_back(pair); - } - } - - auto filtersPath = config->value("filter.adblock"); - if(filtersPath) - filters = parseAdBlockList(filtersPath.value()); + Q_CHECK_PTR(filter); + m_filter = filter; + Q_CHECK_PTR(profile); + m_profile = profile; } -// test DNT on https://browserleaks.com/donottrack void UrlRequestInterceptor::interceptRequest(QWebEngineUrlRequestInfo &info) { - for(const Header &header : m_headers) { - info.setHttpHeader(QByteArray::fromStdString(header.first), QByteArray::fromStdString(header.second)); + auto hostlistCheck = m_filter->hostlistRule(info.requestUrl().host()); + if(hostlistCheck) { + info.block(hostlistCheck.value().isBlocking); } - if(rules.contains(info.requestUrl().host())) { - info.block(rules.value(info.requestUrl().host()).isBlocking); - return; + // set headers + for(auto i = m_filter->headers().constBegin(); i != m_filter->headers().constEnd(); ++i) { + info.setHttpHeader(i.key(), i.value()); } - - const uint domainHash = qHash(info.firstPartyUrl().host()); - const QWebEngineUrlRequestInfo::ResourceType type = info.resourceType(); - const QUrl requestUrl = info.requestUrl(); - for(const FilterRule &rule : filters) { - if(rule.matchesDomain(domainHash) && rule.matchesType(type) && rule.matchesUrl(requestUrl)) { - info.block(rule.isBlocking()); -#ifdef QT_DEBUG - qDebug("--> blocked %s", qUtf8Printable(info.requestUrl().toString())); -#endif - break; - } + for(auto i = m_profile->headers().constBegin(); i != m_profile->headers().constEnd(); ++i) { + info.setHttpHeader(i.key(), i.value()); } } - -QHash parse(const QString &filename) -{ - QHash rules; - - QFile hostfile(filename); - if(hostfile.open(QIODevice::ReadOnly | QIODevice::Text)) { - - // with a QTextStream we can read lines without getting linebreaks at the end - QTextStream hostfile_stream(&hostfile); - - while(!hostfile_stream.atEnd()) { - - // read line and remove any whitespace at the end - const QString &line = hostfile_stream.readLine().trimmed(); - - // skip comments and empty lines - if(line.isEmpty() || line.startsWith('#')) - continue; - - // everything else should be a rule - // format is - // 0.0.0.0 hostname - const QStringList &parts = line.split(' '); - const QString &redirect = parts.at(0); - - for(auto i = parts.constBegin() + 1; i != parts.constEnd(); ++i) { - if(!rules.contains(*i)) { - UrlRequestInterceptor::HostRule rule{}; - rule.isBlocking = (redirect == "0.0.0.0"); - rules.insert(*i, rule); - } - } - - // for(const QString &host : parts.mid(1)) { - // if(!rules.contains(host)) { - // UrlRequestInterceptor::HostRule rule{}; - // rule.isBlocking = redirect == "0.0.0.0"; - // rules.insert(host, rule); - // } - // } - } - - hostfile.close(); - } - - return rules; -} diff --git a/src/webengine/urlinterceptor.h b/src/webengine/urlinterceptor.h index 575e0c9..420a161 100644 --- a/src/webengine/urlinterceptor.h +++ b/src/webengine/urlinterceptor.h @@ -15,29 +15,21 @@ #include #include -typedef std::pair Header; - +class Filter; +class WebProfile; class Configuration; class UrlRequestInterceptor : public QWebEngineUrlRequestInterceptor { Q_OBJECT public: - struct HostRule { - bool isBlocking; - }; - - explicit UrlRequestInterceptor(const std::unique_ptr &config, QObject *parent = nullptr); + explicit UrlRequestInterceptor(Filter *filter, WebProfile *profile, QObject *parent = nullptr); ~UrlRequestInterceptor() override = default; void interceptRequest(QWebEngineUrlRequestInfo &info) override; private: - QHash rules; - std::vector filters; - std::vector
m_headers; + Filter *m_filter; + WebProfile *m_profile; }; -QHash parse(const QString &filename); -inline std::vector parseAdBlockList(const QString &filename); - #endif // SMOLBOTE_URLREQUESTINTERCEPTOR_H -- cgit v1.2.1