aboutsummaryrefslogtreecommitdiff
path: root/lib/urlfilter/formats/adblocklist.cpp
blob: 772c25208c94e1185977aef9ab6a453c62211906 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#include "adblocklist.h"

AdBlockList::AdBlockList()
{
}

QString AdBlockList::metadata(const QString &key) const
{
    return m_metadata.value(key, QString());
}

FilterLeaf::Action AdBlockList::match(const QUrl &firstParty, const QUrl &requestUrl, QWebEngineUrlRequestInfo::ResourceType type) const
{
    const QString request = requestUrl.toString();

    for(auto &filter : m_rules) {
        if(filter.matcher->hasMatch(request))
            return filter.action;
    }
    return FilterLeaf::NotMatched;
}

bool AdBlockList::parseLine(const QString &line)
{
    // remove whitespace from start/end of the line
    QString parsedLine = line.trimmed();

    // check if the line is empty
    if(parsedLine.isEmpty())
        return false;

    // parse comment
    if(parsedLine.startsWith(QLatin1Literal("!")))
        return parseComment(parsedLine);

    Filter filter;

    // exception rules
    if(parsedLine.startsWith(QLatin1Literal("@@"))) {
        filter.action = FilterLeaf::Allow;
        parsedLine.remove(0, 2);
    }

    // remove '*' at the beginning and the end
    if(parsedLine.startsWith(QLatin1Literal("*")))
        parsedLine = parsedLine.mid(1);
    if(parsedLine.endsWith(QLatin1Literal("*")))
        parsedLine.chop(1);

    if(parsedLine.startsWith(QLatin1Literal("/")) && parsedLine.endsWith(QLatin1Literal("/"))) {
        // regular expression rule
        parsedLine = parsedLine.mid(1, parsedLine.length() - 2);
        filter.matcher = new ContentsMatcher<QRegularExpression>(parsedLine, FilterLeaf::RegularExpressionMatch);

    } else if(parsedLine.contains(QLatin1Literal("*"))) {
        parsedLine = QRegularExpression::wildcardToRegularExpression(parsedLine);
        filter.matcher = new ContentsMatcher<QRegularExpression>(parsedLine, FilterLeaf::RegularExpressionMatch);

    } else if(parsedLine.startsWith(QLatin1Literal("||")) && parsedLine.endsWith(QLatin1Literal("^"))) {
//        matchType = FilterLeaf::DomainMatch;
        parsedLine = parsedLine.mid(2, parsedLine.length() - 3);
        filter.matcher = new ContentsMatcher<QString>(parsedLine, FilterLeaf::DomainMatch);

    } else if(parsedLine.startsWith(QLatin1Literal("|")) && parsedLine.endsWith(QLatin1Literal("|"))) {
        // string equals rule
        parsedLine = parsedLine.mid(1, parsedLine.length() - 2);
        filter.matcher = new ContentsMatcher<QStringMatcher>(parsedLine, FilterLeaf::StringEquals);

    } else if(parsedLine.startsWith(QLatin1Literal("||"))) {
        // string starts with rule
        parsedLine = parsedLine.mid(2);
        filter.matcher = new ContentsMatcher<QStringMatcher>(parsedLine, FilterLeaf::StringStartsWith);

    } else if(parsedLine.endsWith(QLatin1Literal("|"))) {
        // string ends with rule
        parsedLine.chop(1);
        filter.matcher = new ContentsMatcher<QStringMatcher>(parsedLine, FilterLeaf::StringEndsWith);

    } else {
        // generic contains rule
        filter.matcher = new ContentsMatcher<QStringMatcher>(parsedLine, FilterLeaf::StringContains);
    }


    Q_CHECK_PTR(filter.matcher);
    m_rules.emplace_back(std::move(filter));
    return true;
}

bool AdBlockList::parseComment(const QString &commentLine)
{
    const QStringList comment = commentLine.mid(1).split(QLatin1Literal(": "));
    m_metadata[comment.at(0).trimmed()] = comment.at(1).trimmed();
    return true;
}