/*******************************************************************************
**
** smolbote: yet another qute browser
** Copyright (C) 2017 Xian Nox
**
** This program is free software: you can redistribute it and/or modify
** it under the terms of the GNU General Public License as published by
** the Free Software Foundation, either version 3 of the License, or
** (at your option) any later version.
**
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
** GNU General Public License for more details.
**
** You should have received a copy of the GNU General Public License
** along with this program. If not, see .
**
******************************************************************************/
#include "filterrule.h"
#include
/* TODO
* - split this class into two: FilterRule that only deals with matching, and AdBlockRule, which only deals with parsing
*/
/* The AdBlock standard is an incredible mess, vaguely explaiend on:
* - https://adblockplus.org/filters
* - https://adblockplus.org/filter-cheatsheet
*/
FilterRule::FilterRule(const QString &line)
{
valid = parse(line);
}
FilterRule::~FilterRule()
{
}
bool FilterRule::isValid() const
{
return valid;
}
bool FilterRule::isException() const
{
return exception;
}
bool FilterRule::shouldBlock(const QUrl &requestUrl) const
{
QRegularExpressionMatch match = rule.match(requestUrl.toString());
return match.hasMatch();
}
bool FilterRule::parse(const QString &line)
{
// skip for comments and empty rules
if(line.startsWith('!') || line.trimmed().isEmpty()) {
return false;
}
// make a copy of the pattern so we can snap off the parts we've already parsed
QString pattern = line;
if(pattern.startsWith("@@")) {
exception = true;
pattern = pattern.mid(2); // remove @@
}
rule.setPattern(createRegExpPattern(pattern));
return true;
}
QString createRegExpPattern(const QString &line)
{
QString pattern = line;
// replace . (any character) with \. (a dot)
pattern.replace('.', "\\.");
// translate adblock special characters into regex
// replace wildcard (*) with '.*' (zero or more of any element)
pattern.replace('*', ".*");
// replace separator (^) with '($|\?|\/)' (end of string, or ?, or /, or :number)
pattern.replace('^', "($|\\?|\\/|:\\d+)");
// replace || with ^\w+://([\w,\d,\.]+)?
pattern.replace("||", "^\\w+://([\\w,\\d,\\.]+)?");
if(pattern.startsWith('|') && pattern.endsWith('|')) {
// replace | at start with ^ (start of string)
pattern.replace(0, 1, '^');
// replace | at end with $ (end of string)
pattern.replace('|', '$');
}
return pattern;
}