aboutsummaryrefslogtreecommitdiff
path: root/lib/web/urlfilter/adblockrule.cpp
blob: d70778336ff8441bf7e5b4abdd3da356f37cc5f8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
#include "adblockrule.h"

bool isMatchingDomain(const QString &domain, const QString &filter)
{
    // domain and filter are the same
    if(domain == filter) {
        return true;
    }

    // domain can't be matched by filter if it doesn't end with filter
    // ex. example2.com isn't matched by example.com
    if(!domain.endsWith(filter)) {
        return false;
    }

    // match with subdomains
    // ex. subdomain.example.com is matched by example.com
    int index = domain.indexOf(filter);

    // match if (domain ends with filter) && (filter has been found) and (character before filter is '.')
    return index > 0 && domain[index - 1] == QLatin1Char('.');
}

// adblock format documentation
// https://adblockplus.org/filters

// QString::mid(pos, len) - Returns a string starting at the specified position index.
// QString::chop(len) - Removes n characters from the end of the string.
// QString::remove(pos, len) - Removes n characters from the string, starting at the given position index.

AdBlockRule::AdBlockRule(const QString &filter)
{
    QString parsedLine = filter.trimmed();

    // there is no rule, or it"s a comment
    if(parsedLine.isEmpty() || parsedLine.startsWith("!")) {
        return;
    }

    // css rule - ignore for now
    if(parsedLine.contains(QLatin1Literal("##")) || parsedLine.contains(QLatin1Literal("#@#"))) {
        return;
    }

    m_isEnabled = true;

    // exception rules
    if(parsedLine.startsWith(QLatin1Literal("@@"))) {
        m_isException = true;
        parsedLine.remove(0, 2);
    }

    // regular expression rules
    if(parsedLine.startsWith(QLatin1Literal("/")) && parsedLine.endsWith(QLatin1Literal("/"))) {
        parsedLine = parsedLine.mid(1, parsedLine.length() - 2);

        matchType = RegularExpressionMatch;
        regexp.setPattern(parsedLine);
        return;
    }

    // basic filter rules
    if(parsedLine.startsWith(QLatin1Literal("|")) && parsedLine.endsWith(QLatin1Literal("|"))) {
        matchType = StringEquals;
        match = parsedLine.mid(1, parsedLine.length() - 2);
        return;
    }

    // Basic filter rules can use wildcards, which were supported by QRegExp,
    // but were deprecated in QRegularExpression.

    // remove beginning and ending wildcards
    if(parsedLine.startsWith(QLatin1Literal("*")))
        parsedLine = parsedLine.mid(1);

    if(parsedLine.endsWith(QLatin1Literal("*")))
        parsedLine.chop(1);

    if(parsedLine.startsWith(QLatin1Literal("||")) && parsedLine.endsWith(QLatin1Literal("^"))) {
        matchType = DomainMatch;
        match = parsedLine.mid(2, parsedLine.length() - 3);
        return;
    }

    // check for wildcards and translate to regexp
    // wildcard "*" - any number of characters
    // separator "^" - end, ? or /
    if(parsedLine.contains(QLatin1Literal("*")) || parsedLine.contains(QLatin1Literal("^"))) {
        matchType = RegularExpressionMatch;
        parsedLine.replace(QLatin1Literal("*"), QLatin1Literal(".*"));
        parsedLine.replace(QLatin1Literal("^"), QLatin1Literal("($|\\?|\\/)"));
        regexp.setPattern(parsedLine);
        return;
    }

    matcher.setPattern(parsedLine);
}

bool AdBlockRule::isEnabled() const
{
    return m_isEnabled;
}

bool AdBlockRule::shouldBlock(const QUrl &url) const
{
    switch (matchType) {
    case RegularExpressionMatch:
        if(regexp.match(url.toString()).hasMatch())
            return !m_isException;

    case StringEquals:
        return url.toString() == match;

    case DomainMatch:
        return isMatchingDomain(url.host(), match);

    default:
        return false;
    }
}