aboutsummaryrefslogtreecommitdiff
path: root/plugins/AdblockFilter/filterlist.cpp
blob: 76953fcabc37fbbd3c8c0cefb5cc42bb4d00be56 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
/*
 * This file is part of smolbote. It's copyrighted by the contributors recorded
 * in the version control history of the file, available from its original
 * location: https://library.iserlohn-fortress.net/aqua/smolbote.git
 *
 * SPDX-License-Identifier: GPL-3.0
 */

#include "filterlist.h"
#include <QByteArray>
#include <QIODevice>

/**
 * Documentation:
 *
 * https://adblockplus.org/filter-cheatsheet
 * https://help.eyeo.com/adblockplus/how-to-write-filters
 *
 * https://github.com/gorhill/uBlock/wiki/Introduction-to-basic-filtering-syntax
 * https://github.com/gorhill/uBlock/wiki/Static-filter-syntax
 *
 */

const QLatin1String comment_lastModified("! Last modified: ");
const QLatin1String comment_expires("! Expires: ");

using namespace AdblockPlus;

Filterlist::Filterlist(QIODevice &from)
{
    if(from.isReadable() && from.isTextModeEnabled()) {
        while(from.bytesAvailable() > 0) {
            const auto line = from.readLine(512);

            if(line[0] == '!') {
                parseComment(line);

            } else if(line.contains("##") || line.contains("#@#")) {
                // ## is element hiding rule
                // #@# is element hiding exception rule

            } else {
                parseRule(line);
            }
        }
    }
}

void Filterlist::parseComment(const QString &line)
{
    if(line.startsWith(comment_lastModified)) {
        lastModified = QDateTime::fromString(line.mid(comment_lastModified.size()), "dd MMM yyyy HH:mm 'UTC'");
        expires = lastModified;

    } else if(line.startsWith(comment_expires)) {
        const QRegularExpression time_re("(?:(\\d+) days)|(?:(\\d+) hours)");
        const auto match = time_re.match(line);
        if(match.hasMatch()) {
            expires = expires.addDays(match.captured(1).toInt());
            expires = expires.addSecs(match.captured(2).toInt() * 60 * 60);
        }
    }
}

Rule *Filterlist::parseRule(const QByteArray &line)
{
    QString pattern = line;
    Options opt;

    if(pattern.startsWith(QLatin1String("@@"))) {
        pattern.remove(0, 2);
        opt.exception = true;
    }

    // parse options
    if(pattern.contains('$')) {
        const auto list = pattern.split('$');
        pattern = list.at(0);
        const auto options = list.at(1);

        if(!opt.parseAbp(&options)) {
            return nullptr;
        }
    }

    if(pattern.startsWith("||") && pattern.endsWith("^")) {
        // domain match
        pattern = pattern.mid(2, pattern.length() - 3);
        return new MatcherRule(pattern, opt, MatcherRule::DomainMatch);

    } else if(pattern.startsWith("|") && pattern.endsWith("|")) {
        // string equals
        pattern = pattern.mid(1, pattern.length() - 2);
        return new MatcherRule(pattern, opt, MatcherRule::UrlEquals);

    } else if(pattern.startsWith("|")) {
        // string starts with
        pattern = pattern.mid(1, pattern.length() - 1);
        return new MatcherRule(pattern, opt, MatcherRule::UrlStartsWith);

    } else if(pattern.endsWith("|")) {
        // string ends with
        pattern = pattern.mid(0, pattern.length() - 1);
        return new MatcherRule(pattern, opt, MatcherRule::UrlEndsWith);

    } else if(pattern.startsWith("/") && pattern.endsWith("/")) {
        // regular expression
        pattern = pattern.mid(1, pattern.length() - 2);
        return new RegexRule(pattern, opt);

    } else if(!pattern.isEmpty()) {
        if(pattern.contains('*')) {
            // wildcard pattern
            pattern = QRegularExpression::wildcardToRegularExpression(pattern);
            return new RegexRule(pattern, opt);
        } else {
            // contains pattern
            return new MatcherRule(pattern, opt);
        }
    }

    return nullptr;
}

bool Filterlist::filter(QWebEngineUrlRequestInfo &info) const
{
    return false;
}