aboutsummaryrefslogtreecommitdiff
path: root/lib/web/urlfilter/adblockrule.cpp
blob: 60262b7c148f38189dd7ba7197b1814b57930b1c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#include "adblockrule.h"

inline std::pair<QWebEngineUrlRequestInfo::ResourceType, bool> parseOption(const QString &option)
{
    if(option.endsWith(QLatin1Literal("script"))) {
        return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeScript, !option.startsWith(QLatin1Literal("~")));

    } else if(option.endsWith(QLatin1Literal("image"))) {
        return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeImage, !option.startsWith(QLatin1Literal("~")));

    } else if(option.endsWith(QLatin1Literal("stylesheet"))) {
        return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeStylesheet, !option.startsWith(QLatin1Literal("~")));

    } else if(option.endsWith(QLatin1Literal("object"))) {
        return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeObject, !option.startsWith(QLatin1Literal("~")));

    } else if(option.endsWith(QLatin1Literal("xmlhttprequest"))) {
        return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeXhr, !option.startsWith(QLatin1Literal("~")));

    } else if(option.endsWith(QLatin1Literal("other"))) {
        return std::make_pair(QWebEngineUrlRequestInfo::ResourceTypeUnknown, !option.startsWith(QLatin1Literal("~")));

    } else {
        // unhandled pair
        Q_ASSERT(false);
    }
}

// adblock format documentation
// https://adblockplus.org/filters

// QString::mid(pos, len) - Returns a string starting at the specified position index.
// QString::chop(len) - Removes n characters from the end of the string.
// QString::remove(pos, len) - Removes n characters from the string, starting at the given position index.

AdBlockRule::AdBlockRule(const QString &filter)
{
    QString parsedLine = filter.trimmed();

    // there is no rule, or it"s a comment
    if(parsedLine.isEmpty() || parsedLine.startsWith("!")) {
        return;
    }

    // css rule - ignore for now
    if(parsedLine.contains(QLatin1Literal("##")) || parsedLine.contains(QLatin1Literal("#@#"))) {
        return;
    }

    m_isEnabled = true;

    // exception rules
    if(parsedLine.startsWith(QLatin1Literal("@@"))) {
        m_isException = true;
        parsedLine.remove(0, 2);
    }

    // parse options
    {
        const int sepPos = parsedLine.indexOf(QLatin1Literal("$"));
        if(sepPos != -1) {
            const auto options = parsedLine.mid(sepPos + 1).split(QLatin1Literal(","));
            parsedLine = parsedLine.mid(0, sepPos);

            for(const QString &option : options) {
                if(option.startsWith(QLatin1Literal("domain"))) {
                    const auto domainList = option.mid(7).split(QLatin1Literal("|"));
                    for(const QString &domain : domainList) {
                        if(domain.startsWith(QLatin1Literal("~")))
                            blockedDomains.append(domain.mid(1));
                        else
                            allowedDomains.append(domain);
                    }
                } else {
                    auto optPair = parseOption(option);
                    m_resourceTypeOptions.insert(optPair.first, optPair.second);
                }
            }

        }
    }

    // regular expression rules
    if(parsedLine.startsWith(QLatin1Literal("/")) && parsedLine.endsWith(QLatin1Literal("/"))) {
        parsedLine = parsedLine.mid(1, parsedLine.length() - 2);

        urlMatchType = RegularExpressionMatch;
        regexp.setPattern(parsedLine);
        return;
    }

    // basic filter rules
    if(parsedLine.startsWith(QLatin1Literal("|")) && parsedLine.endsWith(QLatin1Literal("|"))) {
        urlMatchType = StringEquals;
        match = parsedLine.mid(1, parsedLine.length() - 2);
        return;
    }

    // Basic filter rules can use wildcards, which were supported by QRegExp,
    // but were deprecated in QRegularExpression.

    // remove beginning and ending wildcards
    if(parsedLine.startsWith(QLatin1Literal("*")))
        parsedLine = parsedLine.mid(1);

    if(parsedLine.endsWith(QLatin1Literal("*")))
        parsedLine.chop(1);

    if(parsedLine.startsWith(QLatin1Literal("||")) && parsedLine.endsWith(QLatin1Literal("^"))) {
        urlMatchType = DomainMatch;
        match = parsedLine.mid(2, parsedLine.length() - 3);
        return;
    }

    // check for wildcards and translate to regexp
    // wildcard "*" - any number of characters
    // separator "^" - end, ? or /
    if(parsedLine.contains(QLatin1Literal("*")) || parsedLine.contains(QLatin1Literal("^"))) {
        urlMatchType = RegularExpressionMatch;
        parsedLine.replace(QLatin1Literal("*"), QLatin1Literal(".*"));
        parsedLine.replace(QLatin1Literal("^"), QLatin1Literal("($|\\?|\\/)"));
        regexp.setPattern(parsedLine);
        return;
    }

    match = parsedLine;
}