aboutsummaryrefslogtreecommitdiff
path: root/src/webengine/urlinterceptor.cpp
blob: 06464ae11090e78cc2ba127cacf89e7f3e71e37c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
/*
 * This file is part of smolbote. It's copyrighted by the contributors recorded
 * in the version control history of the file, available from its original
 * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote
 *
 * SPDX-License-Identifier: GPL-3.0
 */

#include "urlinterceptor.h"
#include "web/urlfilter/adblockrule.h"
#include <QDir>
#include <QJsonArray>
#include <QJsonDocument>
#include <QTextStream>
#include <boost/algorithm/string.hpp>
#include <configuration/configuration.h>

inline std::vector<FilterRule> parseAdBlockList(const QString &filename)
{
    std::vector<FilterRule> rules;
    QFile list(filename);

    if(list.open(QIODevice::ReadOnly | QIODevice::Text), true) {
        QTextStream l(&list);
        QString line;
        while(l.readLineInto(&line)) {
            AdBlockRule rule(line);
            if(rule.isEnabled()) {
                rules.emplace_back(std::move(rule));
                //qDebug("added rule: %s", qUtf8Printable(line));
            }
        }
        list.close();
    }

    return rules;
}

UrlRequestInterceptor::UrlRequestInterceptor(const std::unique_ptr<Configuration> &config, QObject *parent)
    : QWebEngineUrlRequestInterceptor(parent)
{
    QDir hostsD(config->value<QString>("filter.path").value());
    const QStringList hostFiles = hostsD.entryList(QDir::Files);
    for(const QString &file : hostFiles) {
        const QString absPath = hostsD.absoluteFilePath(file);
        auto r = parse(absPath);
#ifdef QT_DEBUG
        qDebug("Parsed %i rules from %s", r.count(), qUtf8Printable(absPath));
#endif

        rules.unite(r);
    }

    const auto header = config->value<std::vector<std::string>>("filter.header");
    if(header) {
        for(const std::string &h : header.value()) {
            std::vector<std::string> s;
            boost::split(s, h, boost::is_any_of(":="));
            auto pair = std::make_pair(s.at(0), s.at(1));
            m_headers.emplace_back(pair);
        }
    }

    auto filtersPath = config->value<QString>("filter.adblock");
    if(filtersPath)
        filters = std::move(parseAdBlockList(filtersPath.value()));
}

// test DNT on https://browserleaks.com/donottrack
void UrlRequestInterceptor::interceptRequest(QWebEngineUrlRequestInfo &info)
{
    for(const Header &header : m_headers) {
        info.setHttpHeader(QByteArray::fromStdString(header.first), QByteArray::fromStdString(header.second));
    }

    if(rules.contains(info.requestUrl().host())) {
        info.block(rules.value(info.requestUrl().host()).isBlocking);
        return;
    }

    for(const FilterRule &rule : filters) {
        if(rule.matchesDomain(info.firstPartyUrl().host()) && rule.matchesType(info.resourceType()) && rule.matchesUrl(info.requestUrl())) {
            info.block(rule.isBlocking());
#ifdef QT_DEBUG
            qDebug("--> blocked %s", qUtf8Printable(info.requestUrl().toString()));
            qDebug("- %s", qUtf8Printable(rule.toString()));
#endif
            break;
        }
    }
}

QHash<QString, UrlRequestInterceptor::HostRule> parse(const QString &filename)
{
    QHash<QString, UrlRequestInterceptor::HostRule> rules;

    QFile hostfile(filename);
    if(hostfile.open(QIODevice::ReadOnly | QIODevice::Text)) {

        // with a QTextStream we can read lines without getting linebreaks at the end
        QTextStream hostfile_stream(&hostfile);

        while(!hostfile_stream.atEnd()) {

            // read line and remove any whitespace at the end
            const QString &line = hostfile_stream.readLine().trimmed();

            // skip comments and empty lines
            if(line.isEmpty() || line.startsWith('#'))
                continue;

            // everything else should be a rule
            // format is <redirect> <host>
            // 0.0.0.0 hostname
            const QStringList &parts = line.split(' ');
            const QString &redirect = parts.at(0);

            for(auto i = parts.constBegin() + 1; i != parts.constEnd(); ++i) {
                if(!rules.contains(*i)) {
                    UrlRequestInterceptor::HostRule rule{};
                    rule.isBlocking = (redirect == "0.0.0.0");
                    rules.insert(*i, rule);
                }
            }

            //            for(const QString &host : parts.mid(1)) {
            //                if(!rules.contains(host)) {
            //                    UrlRequestInterceptor::HostRule rule{};
            //                    rule.isBlocking = redirect == "0.0.0.0";
            //                    rules.insert(host, rule);
            //                }
            //            }
        }

        hostfile.close();
    }

    return rules;
}