aboutsummaryrefslogtreecommitdiff
path: root/lib/web/urlfilter/adblockrule.cpp
blob: 58b194114f3b70bcefcd063a2354855889f058d6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
/*
 * This file is part of smolbote. It's copyrighted by the contributors recorded
 * in the version control history of the file, available from its original
 * location: https://neueland.iserlohn-fortress.net/smolbote.hg
 *
 * SPDX-License-Identifier: GPL-3.0
 */
// Based on Falkon's AdBlockRule class

#include "adblockrule.h"

// adblock format documentation
// https://adblockplus.org/filters

// QString::mid(pos, len) - Returns a string starting at the specified position index.
// QString::chop(len) - Removes n characters from the end of the string.
// QString::remove(pos, len) - Removes n characters from the string, starting at the given position index.

AdBlockRule::AdBlockRule(const QString &filter)
{
    originalFilter = filter;
    QString parsedLine = filter.trimmed();

    // there is no rule, or it's a comment
    if(parsedLine.isEmpty() || parsedLine.startsWith("!")) {
        return;
    }

    // css rule - ignore for now
    if(parsedLine.contains(QLatin1Literal("##")) || parsedLine.contains(QLatin1Literal("#@#"))) {
        return;
    }

    m_isEnabled = true;

    // exception rules
    if(parsedLine.startsWith(QLatin1Literal("@@"))) {
        m_isBlocking = false;
        parsedLine.remove(0, 2);
    } else
        m_isBlocking = true;

    // parse options
    {
        const int sepPos = parsedLine.indexOf(QLatin1Literal("$"));
        if(sepPos != -1) {
            const auto options = parsedLine.mid(sepPos + 1).split(QLatin1Literal(","));
            parsedLine = parsedLine.mid(0, sepPos);

            for(const QString &option : options) {
                if(option.startsWith(QLatin1Literal("domain"))) {
                    const auto domainList = option.mid(7).split(QLatin1Literal("|"));
                    for(const QString &domain : domainList) {
                        if(domain.startsWith(QLatin1Literal("~")))
                            blockedDomains.append(domain.mid(1));
                        else
                            allowedDomains.append(domain);
                    }
                } else if(option.endsWith(QLatin1Literal("script"))) {
                    m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeScript, !option.startsWith(QLatin1Literal("~")));

                } else if(option.endsWith(QLatin1Literal("image"))) {
                    m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeImage, !option.startsWith(QLatin1Literal("~")));

                } else if(option.endsWith(QLatin1Literal("stylesheet"))) {
                    m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeStylesheet, !option.startsWith(QLatin1Literal("~")));

                } else if(option.endsWith(QLatin1Literal("object"))) {
                    m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeObject, !option.startsWith(QLatin1Literal("~")));

                } else if(option.endsWith(QLatin1Literal("xmlhttprequest"))) {
                    m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeXhr, !option.startsWith(QLatin1Literal("~")));

                } else if(option.endsWith(QLatin1Literal("other"))) {
                    m_resourceTypeOptions.insert(QWebEngineUrlRequestInfo::ResourceTypeUnknown, !option.startsWith(QLatin1Literal("~")));
                }
            }
        }
    }

    // regular expression rule
    if(parsedLine.startsWith(QLatin1Literal("/")) && parsedLine.endsWith(QLatin1Literal("/"))) {
        parsedLine = parsedLine.mid(1, parsedLine.length() - 2);

        urlMatchType = RegularExpressionMatch;
        regexp.setPattern(parsedLine);
        return;
    }

    // string equals rule
    if(parsedLine.startsWith(QLatin1Literal("|")) && parsedLine.endsWith(QLatin1Literal("|"))) {
        urlMatchType = StringEquals;
        match = parsedLine.mid(1, parsedLine.length() - 2);
        return;
    }

    // Basic filter rules can use wildcards, which were supported by QRegExp,
    // but were deprecated in QRegularExpression.

    // remove beginning and ending wildcards
    if(parsedLine.startsWith(QLatin1Literal("*")))
        parsedLine = parsedLine.mid(1);

    if(parsedLine.endsWith(QLatin1Literal("*")))
        parsedLine.chop(1);

    if(parsedLine.startsWith(QLatin1Literal("||")) && parsedLine.endsWith(QLatin1Literal("^"))) {
        urlMatchType = DomainMatch;
        match = parsedLine.mid(2, parsedLine.length() - 3);
        return;
    }

    // check for wildcards and translate to regexp
    // wildcard "*" - any number of characters
    // separator "^" - end, ? or /
    if(parsedLine.contains(QLatin1Literal("*")) || parsedLine.contains(QLatin1Literal("^"))) {
        urlMatchType = RegularExpressionMatch;
        parsedLine.replace(QLatin1Literal("||"), QLatin1Literal("^\\w+://"));
        parsedLine.replace(QLatin1Literal("|"), QLatin1Literal("\\|"));
        parsedLine.replace(QLatin1Literal("*"), QLatin1Literal(".*"));
        parsedLine.replace(QLatin1Literal("^"), QLatin1Literal("($|\\?|\\/)"));
        regexp.setPattern(parsedLine);
        return;
    }

    match = parsedLine;
}