/* * Copyright (c) 2009, Zsombor Gegesy <gzsombor@gmail.com> * Copyright (c) 2009, Benjamin C. Meyer <ben@meyerhome.net> * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the Benjamin Meyer nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * ============================================================ * * This file is a part of the rekonq project * * Copyright (C) 2009 by Andrea Diamantini <adjam7 at gmail dot com> * * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation; either version 2 of * the License or (at your option) version 3 or any later version * accepted by the membership of KDE e.V. (or its successor approved * by the membership of KDE e.V.), which shall act as a proxy * defined in Section 14 of version 3 of the license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * * ============================================================ */ // Self Includes #include "adblockrule.h" // Qt Includes #include <QStringList> #include <QDebug> #include <QRegExp> #include <QUrl> // Defines #define QL1S(x) QLatin1String(x) #define QL1C(x) QLatin1Char(x) AdBlockRule::AdBlockRule(const QString &filter) : m_optionMatchRule(false) { bool isRegExpRule = false; QString parsedLine = filter; if ( parsedLine.startsWith( QL1C('/') ) && parsedLine.endsWith( QL1C('/') ) ) { parsedLine = parsedLine.mid(1); parsedLine = parsedLine.left(parsedLine.size() - 1); isRegExpRule = true; } int optionsNumber = parsedLine.indexOf( QL1C('$'), 0); QStringList options; if (optionsNumber >= 0) { options = parsedLine.mid(optionsNumber + 1).split(QL1C(',')); parsedLine = parsedLine.left(optionsNumber); } if(!isRegExpRule) parsedLine = convertPatternToRegExp(parsedLine); m_regExp = QRegExp(parsedLine, Qt::CaseInsensitive, QRegExp::RegExp2); if ( options.contains( QL1S("match-case") )) { m_regExp.setCaseSensitivity(Qt::CaseSensitive); m_optionMatchRule = true; } } // here return false means that rule doesn't match, // so that url is allowed // return true means "matched rule", so stop url! bool AdBlockRule::match(const QString &encodedUrl) const { bool matched = m_regExp.indexIn(encodedUrl) != -1; // TODO: Reimplement this in rekonq 0.5 :) // // if (matched && !m_options.isEmpty()) // { // // we only support domain right now // if (m_options.count() == 1) // { // foreach (const QString &option, m_options) // { // if (option.startsWith( QL1S("domain=") )) // { // QUrl url = QUrl::fromEncoded(encodedUrl.toUtf8()); // QString host = url.host(); // QStringList domainOptions = option.mid(7).split( QL1C('|') ); // foreach (QString domainOption, domainOptions) // { // bool negate = domainOption.at(0) == QL1C('~'); // if (negate) // domainOption = domainOption.mid(1); // bool hostMatched = domainOption == host; // if (hostMatched && !negate) // return true; // if (!hostMatched && negate) // return true; // } // } // } // } // return false; // } return matched; } QString AdBlockRule::convertPatternToRegExp(const QString &wildcardPattern) { QString pattern = wildcardPattern; // remove multiple wildcards pattern.replace(QRegExp( QL1S("\\*+") ), QL1S("*") ); // remove anchors following separator placeholder pattern.replace(QRegExp( QL1S("\\^\\|$") ), QL1S("^") ); // remove leading wildcards pattern.replace(QRegExp( QL1S("^(\\*)") ), QL1S("") ); // remove trailing wildcards pattern.replace(QRegExp( QL1S("(\\*)$") ), QL1S("") ); // escape special symbols pattern.replace(QRegExp( QL1S("(\\W)") ), QL1S("\\\\1") ); // process extended anchor at expression start pattern.replace(QRegExp( QL1S("^\\\\\\|\\\\\\|") ), QL1S("^[\\w\\-]+:\\/+(?!\\/)(?:[^\\/]+\\.)?") ); // process separator placeholders pattern.replace(QRegExp( QL1S("\\\\\\^") ), QL1S("(?:[^\\w\\d\\-.%]|$)") ); // process anchor at expression start pattern.replace(QRegExp( QL1S("^\\\\\\|") ), QL1S("^") ); // process anchor at expression end pattern.replace(QRegExp( QL1S("\\\\\\|$") ), QL1S("$") ); // replace wildcards by .* pattern.replace(QRegExp( QL1S("\\\\\\*") ), QL1S(".*") ); // Finally, return... return pattern; }