diff options
Diffstat (limited to 'src/adblock')
| -rw-r--r-- | src/adblock/adblockmanager.cpp | 2 | ||||
| -rw-r--r-- | src/adblock/adblockrule.cpp | 93 | ||||
| -rw-r--r-- | src/adblock/adblockrule.h | 20 | ||||
| -rw-r--r-- | src/adblock/adblockrulefallbackimpl.cpp | 105 | ||||
| -rw-r--r-- | src/adblock/adblockrulefallbackimpl.h | 47 | ||||
| -rw-r--r-- | src/adblock/adblockruleimpl.h | 39 | ||||
| -rw-r--r-- | src/adblock/adblockruletextmatchimpl.cpp | 68 | ||||
| -rw-r--r-- | src/adblock/adblockruletextmatchimpl.h | 47 | 
8 files changed, 321 insertions, 100 deletions
| diff --git a/src/adblock/adblockmanager.cpp b/src/adblock/adblockmanager.cpp index 39432f88..644ecff5 100644 --- a/src/adblock/adblockmanager.cpp +++ b/src/adblock/adblockmanager.cpp @@ -168,7 +168,6 @@ QNetworkReply *AdBlockManager::block(const QNetworkRequest &request, WebPage *pa          if (filter.match(urlString))          {              kDebug() << "****ADBLOCK: WHITE RULE (@@) Matched: ***********"; -            kDebug() << "Filter exp: " << filter.pattern();              kDebug() << "UrlString:  " << urlString;              return 0;          } @@ -180,7 +179,6 @@ QNetworkReply *AdBlockManager::block(const QNetworkRequest &request, WebPage *pa          if (filter.match(urlString))          {              kDebug() << "****ADBLOCK: BLACK RULE Matched: ***********"; -            kDebug() << "Filter exp: " << filter.pattern();              kDebug() << "UrlString:  " << urlString;              QWebElement document = page->mainFrame()->documentElement(); diff --git a/src/adblock/adblockrule.cpp b/src/adblock/adblockrule.cpp index 6ff98f03..1cb6773a 100644 --- a/src/adblock/adblockrule.cpp +++ b/src/adblock/adblockrule.cpp @@ -55,94 +55,13 @@  // Self Includes  #include "adblockrule.h" -// Qt Includes -#include <QStringList> -#include <QUrl> - +#include "adblockrulefallbackimpl.h" +#include "adblockruletextmatchimpl.h"  AdBlockRule::AdBlockRule(const QString &filter)  { -    bool isRegExpRule = false; - -    QString parsedLine = filter; - -    if (parsedLine.startsWith(QL1C('/')) && parsedLine.endsWith(QL1C('/'))) -    { -        parsedLine = parsedLine.mid(1); -        parsedLine = parsedLine.left(parsedLine.size() - 1); -        isRegExpRule = true; -    } - -    int optionsNumber = parsedLine.indexOf(QL1C('$'), 0); -    QStringList options; - -    if (optionsNumber >= 0) -    { -        options = parsedLine.mid(optionsNumber + 1).split(QL1C(',')); -        parsedLine = parsedLine.left(optionsNumber); -    } - -    if (!isRegExpRule) -        parsedLine = convertPatternToRegExp(parsedLine); - -    m_regExp = QRegExp(parsedLine, Qt::CaseInsensitive, QRegExp::RegExp2); - -    if (options.contains(QL1S("match-case"))) -    { -        m_regExp.setCaseSensitivity(Qt::CaseSensitive); -    } -} - - -// here return false means that rule doesn't match, -// so that url is allowed -// return true means "matched rule", so stop url! -bool AdBlockRule::match(const QString &encodedUrl) const -{ -    return m_regExp.indexIn(encodedUrl) != -1; -} - - -QString AdBlockRule::convertPatternToRegExp(const QString &wildcardPattern) -{ -    QString pattern = wildcardPattern; - -    // remove multiple wildcards -    pattern.replace(QRegExp(QL1S("\\*+")), QL1S("*")); - -    // remove anchors following separator placeholder -    pattern.replace(QRegExp(QL1S("\\^\\|$")), QL1S("^")); - -    // remove leading wildcards -    pattern.replace(QRegExp(QL1S("^(\\*)")), QL1S("")); - -    // remove trailing wildcards -    pattern.replace(QRegExp(QL1S("(\\*)$")), QL1S("")); - -    // escape special symbols -    pattern.replace(QRegExp(QL1S("(\\W)")), QL1S("\\\\1")); - -    // process extended anchor at expression start -    pattern.replace(QRegExp(QL1S("^\\\\\\|\\\\\\|")), QL1S("^[\\w\\-]+:\\/+(?!\\/)(?:[^\\/]+\\.)?")); - -    // process separator placeholders -    pattern.replace(QRegExp(QL1S("\\\\\\^")), QL1S("(?:[^\\w\\d\\-.%]|$)")); - -    // process anchor at expression start -    pattern.replace(QRegExp(QL1S("^\\\\\\|")), QL1S("^")); - -    // process anchor at expression end -    pattern.replace(QRegExp(QL1S("\\\\\\|$")), QL1S("$")); - -    // replace wildcards by .* -    pattern.replace(QRegExp(QL1S("\\\\\\*")), QL1S(".*")); - -    // Finally, return... -    return pattern; -} - - -QString AdBlockRule::pattern() const -{ -    return m_regExp.pattern(); +    if (AdBlockRuleTextMatchImpl::isTextMatchFilter(filter)) +        m_implementation = QSharedPointer<AdBlockRuleImpl>(new AdBlockRuleTextMatchImpl(filter)); +    else +        m_implementation = QSharedPointer<AdBlockRuleImpl>(new AdBlockRuleFallbackImpl(filter));  } diff --git a/src/adblock/adblockrule.h b/src/adblock/adblockrule.h index 28084004..04409688 100644 --- a/src/adblock/adblockrule.h +++ b/src/adblock/adblockrule.h @@ -58,27 +58,25 @@  // Rekonq Includes  #include "rekonq_defines.h" -// Qt Includes -#include <QtCore/QRegExp> -#include <QtCore/QString> +#include "adblockruleimpl.h" -// Forward Includes -class QUrl; +#include <QSharedPointer> +// Forward Includes +class QString;  class AdBlockRule  {  public:      AdBlockRule(const QString &filter); -    bool match(const QString &encodedUrl) const; - -    QString pattern() const; +    bool match(const QString &encodedUrl) const +    { +        return m_implementation->match(encodedUrl); +    }  private: -    QString convertPatternToRegExp(const QString &wildcardPattern); - -    QRegExp m_regExp; +    QSharedPointer<AdBlockRuleImpl> m_implementation;  }; diff --git a/src/adblock/adblockrulefallbackimpl.cpp b/src/adblock/adblockrulefallbackimpl.cpp new file mode 100644 index 00000000..decb895d --- /dev/null +++ b/src/adblock/adblockrulefallbackimpl.cpp @@ -0,0 +1,105 @@ +/* ============================================================ +* +* This file is a part of the rekonq project +* +* Copyright (C) 2010 by Benjamin Poulain <ikipou at gmail dot com> +* +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License as +* published by the Free Software Foundation; either version 2 of +* the License or (at your option) version 3 or any later version +* accepted by the membership of KDE e.V. (or its successor approved +* by the membership of KDE e.V.), which shall act as a proxy +* defined in Section 14 of version 3 of the license. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program.  If not, see <http://www.gnu.org/licenses/>. +* +* ============================================================ */ + +// Self Includes +#include "adblockrulefallbackimpl.h" + +// Rekonq Includes +#include "rekonq_defines.h" + +// Qt Includes +#include <QStringList> + +static inline bool isRegExpFilter(const QString &filter) +{ +    return filter.startsWith(QL1C('/')) && filter.endsWith(QL1C('/')); +} + +AdBlockRuleFallbackImpl::AdBlockRuleFallbackImpl(const QString &filter) +    : AdBlockRuleImpl(filter) +{ +    m_regExp.setCaseSensitivity(Qt::CaseInsensitive); +    m_regExp.setPatternSyntax(QRegExp::RegExp2); + +    QString parsedLine = filter; + +    const int optionsNumber = parsedLine.lastIndexOf(QL1C('$')); +    if (optionsNumber >= 0 && !isRegExpFilter(parsedLine)) { +        const QStringList options(parsedLine.mid(optionsNumber + 1).split(QL1C(','))); +        if (options.contains(QL1S("match-case"))) +            m_regExp.setCaseSensitivity(Qt::CaseSensitive); +        parsedLine = parsedLine.left(optionsNumber); +    } + +    if (isRegExpFilter(parsedLine)) +        parsedLine = parsedLine.mid(1, parsedLine.length() - 2); +    else +        parsedLine = convertPatternToRegExp(parsedLine); + +    m_regExp.setPattern(parsedLine); +} + +bool AdBlockRuleFallbackImpl::match(const QString &encodedUrl) const +{ +    return m_regExp.indexIn(encodedUrl) != -1; +} + +QString AdBlockRuleFallbackImpl::convertPatternToRegExp(const QString &wildcardPattern) +{ +    QString pattern = wildcardPattern; + +    // remove multiple wildcards +    pattern.replace(QRegExp(QL1S("\\*+")), QL1S("*")); + +    // remove anchors following separator placeholder +    pattern.replace(QRegExp(QL1S("\\^\\|$")), QL1S("^")); + +    // remove leading wildcards +    pattern.replace(QRegExp(QL1S("^(\\*)")), QL1S("")); + +    // remove trailing wildcards +    pattern.replace(QRegExp(QL1S("(\\*)$")), QL1S("")); + +    // escape special symbols +    pattern.replace(QRegExp(QL1S("(\\W)")), QL1S("\\\\1")); + +    // process extended anchor at expression start +    pattern.replace(QRegExp(QL1S("^\\\\\\|\\\\\\|")), QL1S("^[\\w\\-]+:\\/+(?!\\/)(?:[^\\/]+\\.)?")); + +    // process separator placeholders +    pattern.replace(QRegExp(QL1S("\\\\\\^")), QL1S("(?:[^\\w\\d\\-.%]|$)")); + +    // process anchor at expression start +    pattern.replace(QRegExp(QL1S("^\\\\\\|")), QL1S("^")); + +    // process anchor at expression end +    pattern.replace(QRegExp(QL1S("\\\\\\|$")), QL1S("$")); + +    // replace wildcards by .* +    pattern.replace(QRegExp(QL1S("\\\\\\*")), QL1S(".*")); + +    // Finally, return... +    return pattern; +} diff --git a/src/adblock/adblockrulefallbackimpl.h b/src/adblock/adblockrulefallbackimpl.h new file mode 100644 index 00000000..4e7ca555 --- /dev/null +++ b/src/adblock/adblockrulefallbackimpl.h @@ -0,0 +1,47 @@ +/* ============================================================ +* +* This file is a part of the rekonq project +* +* Copyright (C) 2010 by Benjamin Poulain <ikipou at gmail dot com> +* +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License as +* published by the Free Software Foundation; either version 2 of +* the License or (at your option) version 3 or any later version +* accepted by the membership of KDE e.V. (or its successor approved +* by the membership of KDE e.V.), which shall act as a proxy +* defined in Section 14 of version 3 of the license. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program.  If not, see <http://www.gnu.org/licenses/>. +* +* ============================================================ */ + +#ifndef ADBLOCKRULEFALLBACKIMPL_H +#define ADBLOCKRULEFALLBACKIMPL_H + +#include "adblockruleimpl.h" + +// Qt Includes +#include <QRegExp> +#include <QString> + +class AdBlockRuleFallbackImpl : public AdBlockRuleImpl +{ +public: +    AdBlockRuleFallbackImpl(const QString &filter); +    bool match(const QString &encodedUrl) const; + +private: +    QString convertPatternToRegExp(const QString &wildcardPattern); + +    QRegExp m_regExp; +}; + +#endif // ADBLOCKRULEFALLBACKIMPL_H diff --git a/src/adblock/adblockruleimpl.h b/src/adblock/adblockruleimpl.h new file mode 100644 index 00000000..da367aeb --- /dev/null +++ b/src/adblock/adblockruleimpl.h @@ -0,0 +1,39 @@ +/* ============================================================ +* +* This file is a part of the rekonq project +* +* Copyright (C) 2010 by Benjamin Poulain <ikipou at gmail dot com> +* +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License as +* published by the Free Software Foundation; either version 2 of +* the License or (at your option) version 3 or any later version +* accepted by the membership of KDE e.V. (or its successor approved +* by the membership of KDE e.V.), which shall act as a proxy +* defined in Section 14 of version 3 of the license. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program.  If not, see <http://www.gnu.org/licenses/>. +* +* ============================================================ */ + +#ifndef ADBLOCKRULEIMPL_H +#define ADBLOCKRULEIMPL_H + +class QString; + +class AdBlockRuleImpl +{ +public: +    AdBlockRuleImpl(const QString &) {} +    virtual ~AdBlockRuleImpl() {} +    virtual bool match(const QString &encodedUrl) const = 0; +}; + +#endif // ADBLOCKRULEIMPL_H diff --git a/src/adblock/adblockruletextmatchimpl.cpp b/src/adblock/adblockruletextmatchimpl.cpp new file mode 100644 index 00000000..7c02ea37 --- /dev/null +++ b/src/adblock/adblockruletextmatchimpl.cpp @@ -0,0 +1,68 @@ +/* ============================================================ +* +* This file is a part of the rekonq project +* +* Copyright (C) 2010 by Benjamin Poulain <ikipou at gmail dot com> +* +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License as +* published by the Free Software Foundation; either version 2 of +* the License or (at your option) version 3 or any later version +* accepted by the membership of KDE e.V. (or its successor approved +* by the membership of KDE e.V.), which shall act as a proxy +* defined in Section 14 of version 3 of the license. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program.  If not, see <http://www.gnu.org/licenses/>. +* +* ============================================================ */ + +// Self Includes +#include "adblockruletextmatchimpl.h" + +// Rekonq Includes +#include "rekonq_defines.h" + +AdBlockRuleTextMatchImpl::AdBlockRuleTextMatchImpl(const QString &filter) +    : AdBlockRuleImpl(filter) +{ +    Q_ASSERT(AdBlockRuleTextMatchImpl::isTextMatchFilter(filter)); + +    m_textToMatch = filter; +    m_textToMatch.remove(QL1C('*')); +} + +bool AdBlockRuleTextMatchImpl::match(const QString &encodedUrl) const +{ +    return encodedUrl.contains(m_textToMatch, Qt::CaseInsensitive); +} + +bool AdBlockRuleTextMatchImpl::isTextMatchFilter(const QString &filter) +{ +    // We don't deal with options just yet +    if (filter.contains(QL1C('$'))) +        return false; + +    // We don't deal with element matching +    if (filter.contains(QL1S("##"))) +        return false; + +    // We don't deal with the begin-end matching +    if (filter.startsWith(QL1C('|')) || filter.endsWith(QL1C('|'))) +        return false; + +    // We only handle * at the beginning or the end +    int starPosition = filter.indexOf(QL1C('*')); +    while (starPosition >= 0) { +        if (starPosition != 0 && starPosition != (filter.length() - 1)) +            return false; +        starPosition = filter.indexOf(QL1C('*'), starPosition + 1); +    } +    return true; +} diff --git a/src/adblock/adblockruletextmatchimpl.h b/src/adblock/adblockruletextmatchimpl.h new file mode 100644 index 00000000..f0e78be0 --- /dev/null +++ b/src/adblock/adblockruletextmatchimpl.h @@ -0,0 +1,47 @@ +/* ============================================================ +* +* This file is a part of the rekonq project +* +* Copyright (C) 2010 by Benjamin Poulain <ikipou at gmail dot com> +* +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License as +* published by the Free Software Foundation; either version 2 of +* the License or (at your option) version 3 or any later version +* accepted by the membership of KDE e.V. (or its successor approved +* by the membership of KDE e.V.), which shall act as a proxy +* defined in Section 14 of version 3 of the license. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program.  If not, see <http://www.gnu.org/licenses/>. +* +* ============================================================ */ + +#ifndef ADBLOCKRULETEXTMATCHIMPL_H +#define ADBLOCKRULETEXTMATCHIMPL_H + +#include "adblockruleimpl.h" + +// Qt Includes +#include <QString> + +// Simple rule to find a string in the URL +class AdBlockRuleTextMatchImpl : public AdBlockRuleImpl +{ +public: +    AdBlockRuleTextMatchImpl(const QString &filter); +    bool match(const QString &encodedUrl) const; + +    static bool isTextMatchFilter(const QString &filter); + +private: +    QString m_textToMatch; +}; + +#endif // ADBLOCKRULETEXTMATCHIMPL_H | 
