1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
|
/*
* This file is part of smolbote. It's copyrighted by the contributors recorded
* in the version control history of the file, available from its original
* location: git://neueland.iserlohn-fortress.net/smolbote.git
*
* SPDX-License-Identifier: GPL-3.0
*/
#include "filterrule.h"
#include <QUrl>
/* TODO
* - split this class into two: FilterRule that only deals with matching, and AdBlockRule, which only deals with parsing
*/
/* The AdBlock standard is an incredible mess, vaguely explaiend on:
* - https://adblockplus.org/filters
* - https://adblockplus.org/filter-cheatsheet
*/
FilterRule::FilterRule(const QString &line)
{
valid = parse(line);
}
FilterRule::~FilterRule()
{
}
bool FilterRule::isValid() const
{
return valid;
}
bool FilterRule::isException() const
{
return exception;
}
bool FilterRule::shouldBlock(const QUrl &requestUrl) const
{
QRegularExpressionMatch match = rule.match(requestUrl.toString());
return match.hasMatch();
}
bool FilterRule::parse(const QString &line)
{
// skip for comments and empty rules
if(line.startsWith('!') || line.trimmed().isEmpty()) {
return false;
}
// make a copy of the pattern so we can snap off the parts we've already parsed
QString pattern = line;
if(pattern.startsWith("@@")) {
exception = true;
pattern = pattern.mid(2); // remove @@
}
rule.setPattern(createRegExpPattern(pattern));
return true;
}
QString createRegExpPattern(const QString &line)
{
QString pattern = line;
// replace . (any character) with \. (a dot)
pattern.replace('.', "\\.");
// translate adblock special characters into regex
// replace wildcard (*) with '.*' (zero or more of any element)
pattern.replace('*', ".*");
// replace separator (^) with '($|\?|\/)' (end of string, or ?, or /, or :number)
pattern.replace('^', "($|\\?|\\/|:\\d+)");
// replace || with ^\w+://([\w,\d,\.]+)?
pattern.replace("||", "^\\w+://([\\w,\\d,\\.]+)?");
if(pattern.startsWith('|') && pattern.endsWith('|')) {
// replace | at start with ^ (start of string)
pattern.replace(0, 1, '^');
// replace | at end with $ (end of string)
pattern.replace('|', '$');
}
return pattern;
}
|