/******************************************************************************* ** ** smolbote: yet another qute browser ** Copyright (C) 2017 Xian Nox ** ** This program is free software: you can redistribute it and/or modify ** it under the terms of the GNU General Public License as published by ** the Free Software Foundation, either version 3 of the License, or ** (at your option) any later version. ** ** This program is distributed in the hope that it will be useful, ** but WITHOUT ANY WARRANTY; without even the implied warranty of ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ** GNU General Public License for more details. ** ** You should have received a copy of the GNU General Public License ** along with this program. If not, see . ** ******************************************************************************/ #include "filterrule.h" #include /* TODO * - split this class into two: FilterRule that only deals with matching, and AdBlockRule, which only deals with parsing */ /* The AdBlock standard is an incredible mess, vaguely explaiend on: * - https://adblockplus.org/filters * - https://adblockplus.org/filter-cheatsheet */ FilterRule::FilterRule(const QString &line) { valid = parse(line); } FilterRule::~FilterRule() { } bool FilterRule::isValid() const { return valid; } bool FilterRule::isException() const { return exception; } bool FilterRule::shouldBlock(const QUrl &requestUrl) const { QRegularExpressionMatch match = rule.match(requestUrl.toString()); return match.hasMatch(); } bool FilterRule::parse(const QString &line) { // skip for comments and empty rules if(line.startsWith('!') || line.trimmed().isEmpty()) { return false; } // make a copy of the pattern so we can snap off the parts we've already parsed QString pattern = line; if(pattern.startsWith("@@")) { exception = true; pattern = pattern.mid(2); // remove @@ } rule.setPattern(createRegExpPattern(pattern)); return true; } QString createRegExpPattern(const QString &line) { QString pattern = line; // replace . (any character) with \. (a dot) pattern.replace('.', "\\."); // translate adblock special characters into regex // replace wildcard (*) with '.*' (zero or more of any element) pattern.replace('*', ".*"); // replace separator (^) with '($|\?|\/)' (end of string, or ?, or /, or :number) pattern.replace('^', "($|\\?|\\/|:\\d+)"); // replace || with ^\w+://([\w,\d,\.]+)? pattern.replace("||", "^\\w+://([\\w,\\d,\\.]+)?"); if(pattern.startsWith('|') && pattern.endsWith('|')) { // replace | at start with ^ (start of string) pattern.replace(0, 1, '^'); // replace | at end with $ (end of string) pattern.replace('|', '$'); } return pattern; }