aboutsummaryrefslogtreecommitdiff
path: root/lib/urlfilter/formats/adblockrule.cpp
blob: c5d6b58358ccfe28c0435e4b90a750dcc4960664 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
/*
 * This file is part of smolbote. It's copyrighted by the contributors recorded
 * in the version control history of the file, available from its original
 * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote
 *
 * SPDX-License-Identifier: GPL-3.0
 */
// Based on Falkon's AdBlockRule class

#include "adblockrule.h"

// adblock format documentation
// https://adblockplus.org/filters

// QString::mid(pos, len) - Returns a string starting at the specified position index.
// QString::chop(len) - Removes n characters from the end of the string.
// QString::remove(pos, len) - Removes n characters from the string, starting at the given position index.

AdBlockRule::AdBlockRule(const QString &filter)
{
    QString parsedLine = filter.trimmed();

    // there is no rule, or it's a comment
    if(parsedLine.isEmpty() || parsedLine.startsWith("!")) {
        return;
    }

    // css rule - ignore for now
    if(parsedLine.contains(QLatin1Literal("##")) || parsedLine.contains(QLatin1Literal("#@#"))) {
        return;
    }

    m_isEnabled = true;

    // exception rules
    if(parsedLine.startsWith(QLatin1Literal("@@"))) {
        m_isBlocking = false;
        parsedLine.remove(0, 2);
    } else
        m_isBlocking = true;

    // parse options
    {
        const int sepPos = parsedLine.indexOf(QLatin1Literal("$"));
        if(sepPos != -1) {
            const auto options = parsedLine.mid(sepPos + 1).split(QLatin1Literal(","));
            parsedLine = parsedLine.mid(0, sepPos);

            for(const QString &option : options) {
                if(option.startsWith(QLatin1Literal("domain"))) {
                    const auto domainList = option.mid(7).split(QLatin1Literal("|"));
                    for(const QString &domain : domainList) {
                        if(domain.startsWith(QLatin1Literal("~"))) {
                            blockedDomains_hashes.append(qHash(domain.mid(1)));
                        } else {
                            allowedDomains_hashes.append(qHash(domain));
                        }
                    }
                } else {
                    parseOption(option);
                }
            }
        }
    }

    // regular expression rule
    if(parsedLine.startsWith(QLatin1Literal("/")) && parsedLine.endsWith(QLatin1Literal("/"))) {
        parsedLine = parsedLine.mid(1, parsedLine.length() - 2);

        urlMatchType = RegularExpressionMatch;
        regexp.setPattern(parsedLine);
        return;
    }

    // string equals rule
    if(parsedLine.startsWith(QLatin1Literal("|")) && parsedLine.endsWith(QLatin1Literal("|"))) {
        urlMatchType = StringEquals;
        match = parsedLine.mid(1, parsedLine.length() - 2);
        return;
    }

    // Basic filter rules can use wildcards, which were supported by QRegExp,
    // but were deprecated in QRegularExpression.

    // remove beginning and ending wildcards
    if(parsedLine.startsWith(QLatin1Literal("*")))
        parsedLine = parsedLine.mid(1);

    if(parsedLine.endsWith(QLatin1Literal("*")))
        parsedLine.chop(1);

    if(parsedLine.startsWith(QLatin1Literal("||")) && parsedLine.endsWith(QLatin1Literal("^"))) {
        urlMatchType = DomainMatch;
        match = parsedLine.mid(2, parsedLine.length() - 3);
        return;
    }

    // check for wildcards and translate to regexp
    // wildcard "*" - any number of characters
    // separator "^" - end, ? or /
    if(parsedLine.contains(QLatin1Literal("*")) || parsedLine.contains(QLatin1Literal("^"))) {
        urlMatchType = RegularExpressionMatch;
        parsedLine.replace(QLatin1Literal("||"), QLatin1Literal("^\\w+://"));
        parsedLine.replace(QLatin1Literal("|"), QLatin1Literal("\\|"));
        parsedLine.replace(QLatin1Literal("*"), QLatin1Literal(".*"));
        parsedLine.replace(QLatin1Literal("^"), QLatin1Literal("($|\\?|\\/)"));
        regexp.setPattern(parsedLine);
        return;
    }

    match = parsedLine;
}
void AdBlockRule::parseOption(const QString &option)
{
    const bool exception = !option.startsWith(QLatin1Literal("~"));

    if(option.endsWith(QLatin1Literal("script"))) {
        //  external scripts loaded via HTML script tag
        m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeScript, exception);

    } else if(option.endsWith(QLatin1Literal("image"))) {
        // regular images, typically loaded via HTML img tag
        m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeImage, exception);

    } else if(option.endsWith(QLatin1Literal("stylesheet"))) {
        // external CSS stylesheet files
        m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeStylesheet, exception);

    } else if(option.endsWith(QLatin1Literal("object"))) {
        // content handled by browser plugins, e.g. Flash or Java
        m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeObject, exception);

    } else if(option.endsWith(QLatin1Literal("xmlhttprequest"))) {
        //  requests started using the XMLHttpRequest object or fetch() API
        m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeXhr, exception);

    } else if(option.endsWith(QLatin1Literal("object-subrequest"))) {
        // requests started by plugins like Flash
        m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypePluginResource, exception);

    } else if(option.endsWith(QLatin1Literal("subdocument"))) {
        // embedded pages, usually included via HTML frames
        m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeSubFrame, exception);

    } else if(option.endsWith(QLatin1Literal("ping"))) {
        // requests started by <a ping> or navigator.sendBeacon()
        m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypePing, exception);

    } else if(option.endsWith(QLatin1Literal("websocket"))) {
        // requests initiated via WebSocket object
        qDebug("Resource type 'websocket' not available");

    } else if(option.endsWith(QLatin1Literal("webrtc"))) {
        // connections opened via RTCPeerConnection instances to ICE servers
        qDebug("Resource type 'webrtc' not available");

    } else if(option.endsWith(QLatin1Literal("document"))) {
        // the page itself
        m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeMainFrame, exception);

    } else if(option.endsWith(QLatin1Literal("other"))) {
        m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeUnknown, exception);
    }
}