aboutsummaryrefslogtreecommitdiff
path: root/lib/adblock/filterrule.cpp
blob: 147e5fddad32b818118cfb5e3c42c457cd2c4221 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
/*******************************************************************************
 **
 ** smolbote: yet another qute browser
 ** Copyright (C) 2017  Xian Nox
 **
 ** This program is free software: you can redistribute it and/or modify
 ** it under the terms of the GNU General Public License as published by
 ** the Free Software Foundation, either version 3 of the License, or
 ** (at your option) any later version.
 **
 ** This program is distributed in the hope that it will be useful,
 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 ** GNU General Public License for more details.
 **
 ** You should have received a copy of the GNU General Public License
 ** along with this program.  If not, see <http://www.gnu.org/licenses/>.
 **
 ******************************************************************************/

#include "filterrule.h"
#include <QUrl>

/* TODO
 * - split this class into two: FilterRule that only deals with matching, and AdBlockRule, which only deals with parsing
 */

/* The AdBlock standard is an incredible mess, vaguely explaiend on:
 * - https://adblockplus.org/filters
 * - https://adblockplus.org/filter-cheatsheet
 */

FilterRule::FilterRule(const QString &line)
{
    valid = parse(line);
}

FilterRule::~FilterRule()
{
}

bool FilterRule::isValid() const
{
    return valid;
}

bool FilterRule::isException() const
{
    return exception;
}

bool FilterRule::shouldBlock(const QUrl &requestUrl) const
{
    QRegularExpressionMatch match = rule.match(requestUrl.toString());
    return match.hasMatch();
}

bool FilterRule::parse(const QString &line)
{
    // skip for comments and empty rules
    if(line.startsWith('!') || line.trimmed().isEmpty()) {
        return false;
    }

    // make a copy of the pattern so we can snap off the parts we've already parsed
    QString pattern = line;

    if(pattern.startsWith("@@")) {
        exception = true;
        pattern = pattern.mid(2);   // remove @@
    }

    rule.setPattern(createRegExpPattern(pattern));
    return true;
}

QString createRegExpPattern(const QString &line)
{
    QString pattern = line;

    // replace . (any character) with \. (a dot)
    pattern.replace('.', "\\.");

    // translate adblock special characters into regex
    // replace wildcard (*) with '.*' (zero or more of any element)
    pattern.replace('*', ".*");

    // replace separator (^) with '($|\?|\/)' (end of string, or ?, or /, or :number)
    pattern.replace('^', "($|\\?|\\/|:\\d+)");

    // replace || with ^\w+://([\w,\d,\.]+)?
    pattern.replace("||", "^\\w+://([\\w,\\d,\\.]+)?");

    if(pattern.startsWith('|') && pattern.endsWith('|')) {
        // replace | at start with ^ (start of string)
        pattern.replace(0, 1, '^');
        // replace | at end with $ (end of string)
        pattern.replace('|', '$');
    }

    return pattern;
}