/* * This file is part of smolbote. It's copyrighted by the contributors recorded * in the version control history of the file, available from its original * location: git://neueland.iserlohn-fortress.net/smolbote.git * * SPDX-License-Identifier: GPL-3.0 */ #include "filterrule.h" #include /* TODO * - split this class into two: FilterRule that only deals with matching, and AdBlockRule, which only deals with parsing */ /* The AdBlock standard is an incredible mess, vaguely explaiend on: * - https://adblockplus.org/filters * - https://adblockplus.org/filter-cheatsheet */ FilterRule::FilterRule(const QString &line) { valid = parse(line); } FilterRule::~FilterRule() { } bool FilterRule::isValid() const { return valid; } bool FilterRule::isException() const { return exception; } bool FilterRule::shouldBlock(const QUrl &requestUrl) const { QRegularExpressionMatch match = rule.match(requestUrl.toString()); return match.hasMatch(); } bool FilterRule::parse(const QString &line) { // skip for comments and empty rules if(line.startsWith('!') || line.trimmed().isEmpty()) { return false; } // make a copy of the pattern so we can snap off the parts we've already parsed QString pattern = line; if(pattern.startsWith("@@")) { exception = true; pattern = pattern.mid(2); // remove @@ } rule.setPattern(createRegExpPattern(pattern)); return true; } QString createRegExpPattern(const QString &line) { QString pattern = line; // replace . (any character) with \. (a dot) pattern.replace('.', "\\."); // translate adblock special characters into regex // replace wildcard (*) with '.*' (zero or more of any element) pattern.replace('*', ".*"); // replace separator (^) with '($|\?|\/)' (end of string, or ?, or /, or :number) pattern.replace('^', "($|\\?|\\/|:\\d+)"); // replace || with ^\w+://([\w,\d,\.]+)? pattern.replace("||", "^\\w+://([\\w,\\d,\\.]+)?"); if(pattern.startsWith('|') && pattern.endsWith('|')) { // replace | at start with ^ (start of string) pattern.replace(0, 1, '^'); // replace | at end with $ (end of string) pattern.replace('|', '$'); } return pattern; }