diff options
Diffstat (limited to 'src/adblock')
-rw-r--r-- | src/adblock/adblockmanager.cpp | 2 | ||||
-rw-r--r-- | src/adblock/adblockrule.cpp | 93 | ||||
-rw-r--r-- | src/adblock/adblockrule.h | 20 | ||||
-rw-r--r-- | src/adblock/adblockrulefallbackimpl.cpp | 105 | ||||
-rw-r--r-- | src/adblock/adblockrulefallbackimpl.h | 47 | ||||
-rw-r--r-- | src/adblock/adblockruleimpl.h | 39 | ||||
-rw-r--r-- | src/adblock/adblockruletextmatchimpl.cpp | 68 | ||||
-rw-r--r-- | src/adblock/adblockruletextmatchimpl.h | 47 |
8 files changed, 321 insertions, 100 deletions
diff --git a/src/adblock/adblockmanager.cpp b/src/adblock/adblockmanager.cpp index 39432f88..644ecff5 100644 --- a/src/adblock/adblockmanager.cpp +++ b/src/adblock/adblockmanager.cpp @@ -168,7 +168,6 @@ QNetworkReply *AdBlockManager::block(const QNetworkRequest &request, WebPage *pa if (filter.match(urlString)) { kDebug() << "****ADBLOCK: WHITE RULE (@@) Matched: ***********"; - kDebug() << "Filter exp: " << filter.pattern(); kDebug() << "UrlString: " << urlString; return 0; } @@ -180,7 +179,6 @@ QNetworkReply *AdBlockManager::block(const QNetworkRequest &request, WebPage *pa if (filter.match(urlString)) { kDebug() << "****ADBLOCK: BLACK RULE Matched: ***********"; - kDebug() << "Filter exp: " << filter.pattern(); kDebug() << "UrlString: " << urlString; QWebElement document = page->mainFrame()->documentElement(); diff --git a/src/adblock/adblockrule.cpp b/src/adblock/adblockrule.cpp index 6ff98f03..1cb6773a 100644 --- a/src/adblock/adblockrule.cpp +++ b/src/adblock/adblockrule.cpp @@ -55,94 +55,13 @@ // Self Includes #include "adblockrule.h" -// Qt Includes -#include <QStringList> -#include <QUrl> - +#include "adblockrulefallbackimpl.h" +#include "adblockruletextmatchimpl.h" AdBlockRule::AdBlockRule(const QString &filter) { - bool isRegExpRule = false; - - QString parsedLine = filter; - - if (parsedLine.startsWith(QL1C('/')) && parsedLine.endsWith(QL1C('/'))) - { - parsedLine = parsedLine.mid(1); - parsedLine = parsedLine.left(parsedLine.size() - 1); - isRegExpRule = true; - } - - int optionsNumber = parsedLine.indexOf(QL1C('$'), 0); - QStringList options; - - if (optionsNumber >= 0) - { - options = parsedLine.mid(optionsNumber + 1).split(QL1C(',')); - parsedLine = parsedLine.left(optionsNumber); - } - - if (!isRegExpRule) - parsedLine = convertPatternToRegExp(parsedLine); - - m_regExp = QRegExp(parsedLine, Qt::CaseInsensitive, QRegExp::RegExp2); - - if (options.contains(QL1S("match-case"))) - { - m_regExp.setCaseSensitivity(Qt::CaseSensitive); - } -} - - -// here return false means that rule doesn't match, -// so that url is allowed -// return true means "matched rule", so stop url! -bool AdBlockRule::match(const QString &encodedUrl) const -{ - return m_regExp.indexIn(encodedUrl) != -1; -} - - -QString AdBlockRule::convertPatternToRegExp(const QString &wildcardPattern) -{ - QString pattern = wildcardPattern; - - // remove multiple wildcards - pattern.replace(QRegExp(QL1S("\\*+")), QL1S("*")); - - // remove anchors following separator placeholder - pattern.replace(QRegExp(QL1S("\\^\\|$")), QL1S("^")); - - // remove leading wildcards - pattern.replace(QRegExp(QL1S("^(\\*)")), QL1S("")); - - // remove trailing wildcards - pattern.replace(QRegExp(QL1S("(\\*)$")), QL1S("")); - - // escape special symbols - pattern.replace(QRegExp(QL1S("(\\W)")), QL1S("\\\\1")); - - // process extended anchor at expression start - pattern.replace(QRegExp(QL1S("^\\\\\\|\\\\\\|")), QL1S("^[\\w\\-]+:\\/+(?!\\/)(?:[^\\/]+\\.)?")); - - // process separator placeholders - pattern.replace(QRegExp(QL1S("\\\\\\^")), QL1S("(?:[^\\w\\d\\-.%]|$)")); - - // process anchor at expression start - pattern.replace(QRegExp(QL1S("^\\\\\\|")), QL1S("^")); - - // process anchor at expression end - pattern.replace(QRegExp(QL1S("\\\\\\|$")), QL1S("$")); - - // replace wildcards by .* - pattern.replace(QRegExp(QL1S("\\\\\\*")), QL1S(".*")); - - // Finally, return... - return pattern; -} - - -QString AdBlockRule::pattern() const -{ - return m_regExp.pattern(); + if (AdBlockRuleTextMatchImpl::isTextMatchFilter(filter)) + m_implementation = QSharedPointer<AdBlockRuleImpl>(new AdBlockRuleTextMatchImpl(filter)); + else + m_implementation = QSharedPointer<AdBlockRuleImpl>(new AdBlockRuleFallbackImpl(filter)); } diff --git a/src/adblock/adblockrule.h b/src/adblock/adblockrule.h index 28084004..04409688 100644 --- a/src/adblock/adblockrule.h +++ b/src/adblock/adblockrule.h @@ -58,27 +58,25 @@ // Rekonq Includes #include "rekonq_defines.h" -// Qt Includes -#include <QtCore/QRegExp> -#include <QtCore/QString> +#include "adblockruleimpl.h" -// Forward Includes -class QUrl; +#include <QSharedPointer> +// Forward Includes +class QString; class AdBlockRule { public: AdBlockRule(const QString &filter); - bool match(const QString &encodedUrl) const; - - QString pattern() const; + bool match(const QString &encodedUrl) const + { + return m_implementation->match(encodedUrl); + } private: - QString convertPatternToRegExp(const QString &wildcardPattern); - - QRegExp m_regExp; + QSharedPointer<AdBlockRuleImpl> m_implementation; }; diff --git a/src/adblock/adblockrulefallbackimpl.cpp b/src/adblock/adblockrulefallbackimpl.cpp new file mode 100644 index 00000000..decb895d --- /dev/null +++ b/src/adblock/adblockrulefallbackimpl.cpp @@ -0,0 +1,105 @@ +/* ============================================================ +* +* This file is a part of the rekonq project +* +* Copyright (C) 2010 by Benjamin Poulain <ikipou at gmail dot com> +* +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License as +* published by the Free Software Foundation; either version 2 of +* the License or (at your option) version 3 or any later version +* accepted by the membership of KDE e.V. (or its successor approved +* by the membership of KDE e.V.), which shall act as a proxy +* defined in Section 14 of version 3 of the license. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +* +* ============================================================ */ + +// Self Includes +#include "adblockrulefallbackimpl.h" + +// Rekonq Includes +#include "rekonq_defines.h" + +// Qt Includes +#include <QStringList> + +static inline bool isRegExpFilter(const QString &filter) +{ + return filter.startsWith(QL1C('/')) && filter.endsWith(QL1C('/')); +} + +AdBlockRuleFallbackImpl::AdBlockRuleFallbackImpl(const QString &filter) + : AdBlockRuleImpl(filter) +{ + m_regExp.setCaseSensitivity(Qt::CaseInsensitive); + m_regExp.setPatternSyntax(QRegExp::RegExp2); + + QString parsedLine = filter; + + const int optionsNumber = parsedLine.lastIndexOf(QL1C('$')); + if (optionsNumber >= 0 && !isRegExpFilter(parsedLine)) { + const QStringList options(parsedLine.mid(optionsNumber + 1).split(QL1C(','))); + if (options.contains(QL1S("match-case"))) + m_regExp.setCaseSensitivity(Qt::CaseSensitive); + parsedLine = parsedLine.left(optionsNumber); + } + + if (isRegExpFilter(parsedLine)) + parsedLine = parsedLine.mid(1, parsedLine.length() - 2); + else + parsedLine = convertPatternToRegExp(parsedLine); + + m_regExp.setPattern(parsedLine); +} + +bool AdBlockRuleFallbackImpl::match(const QString &encodedUrl) const +{ + return m_regExp.indexIn(encodedUrl) != -1; +} + +QString AdBlockRuleFallbackImpl::convertPatternToRegExp(const QString &wildcardPattern) +{ + QString pattern = wildcardPattern; + + // remove multiple wildcards + pattern.replace(QRegExp(QL1S("\\*+")), QL1S("*")); + + // remove anchors following separator placeholder + pattern.replace(QRegExp(QL1S("\\^\\|$")), QL1S("^")); + + // remove leading wildcards + pattern.replace(QRegExp(QL1S("^(\\*)")), QL1S("")); + + // remove trailing wildcards + pattern.replace(QRegExp(QL1S("(\\*)$")), QL1S("")); + + // escape special symbols + pattern.replace(QRegExp(QL1S("(\\W)")), QL1S("\\\\1")); + + // process extended anchor at expression start + pattern.replace(QRegExp(QL1S("^\\\\\\|\\\\\\|")), QL1S("^[\\w\\-]+:\\/+(?!\\/)(?:[^\\/]+\\.)?")); + + // process separator placeholders + pattern.replace(QRegExp(QL1S("\\\\\\^")), QL1S("(?:[^\\w\\d\\-.%]|$)")); + + // process anchor at expression start + pattern.replace(QRegExp(QL1S("^\\\\\\|")), QL1S("^")); + + // process anchor at expression end + pattern.replace(QRegExp(QL1S("\\\\\\|$")), QL1S("$")); + + // replace wildcards by .* + pattern.replace(QRegExp(QL1S("\\\\\\*")), QL1S(".*")); + + // Finally, return... + return pattern; +} diff --git a/src/adblock/adblockrulefallbackimpl.h b/src/adblock/adblockrulefallbackimpl.h new file mode 100644 index 00000000..4e7ca555 --- /dev/null +++ b/src/adblock/adblockrulefallbackimpl.h @@ -0,0 +1,47 @@ +/* ============================================================ +* +* This file is a part of the rekonq project +* +* Copyright (C) 2010 by Benjamin Poulain <ikipou at gmail dot com> +* +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License as +* published by the Free Software Foundation; either version 2 of +* the License or (at your option) version 3 or any later version +* accepted by the membership of KDE e.V. (or its successor approved +* by the membership of KDE e.V.), which shall act as a proxy +* defined in Section 14 of version 3 of the license. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +* +* ============================================================ */ + +#ifndef ADBLOCKRULEFALLBACKIMPL_H +#define ADBLOCKRULEFALLBACKIMPL_H + +#include "adblockruleimpl.h" + +// Qt Includes +#include <QRegExp> +#include <QString> + +class AdBlockRuleFallbackImpl : public AdBlockRuleImpl +{ +public: + AdBlockRuleFallbackImpl(const QString &filter); + bool match(const QString &encodedUrl) const; + +private: + QString convertPatternToRegExp(const QString &wildcardPattern); + + QRegExp m_regExp; +}; + +#endif // ADBLOCKRULEFALLBACKIMPL_H diff --git a/src/adblock/adblockruleimpl.h b/src/adblock/adblockruleimpl.h new file mode 100644 index 00000000..da367aeb --- /dev/null +++ b/src/adblock/adblockruleimpl.h @@ -0,0 +1,39 @@ +/* ============================================================ +* +* This file is a part of the rekonq project +* +* Copyright (C) 2010 by Benjamin Poulain <ikipou at gmail dot com> +* +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License as +* published by the Free Software Foundation; either version 2 of +* the License or (at your option) version 3 or any later version +* accepted by the membership of KDE e.V. (or its successor approved +* by the membership of KDE e.V.), which shall act as a proxy +* defined in Section 14 of version 3 of the license. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +* +* ============================================================ */ + +#ifndef ADBLOCKRULEIMPL_H +#define ADBLOCKRULEIMPL_H + +class QString; + +class AdBlockRuleImpl +{ +public: + AdBlockRuleImpl(const QString &) {} + virtual ~AdBlockRuleImpl() {} + virtual bool match(const QString &encodedUrl) const = 0; +}; + +#endif // ADBLOCKRULEIMPL_H diff --git a/src/adblock/adblockruletextmatchimpl.cpp b/src/adblock/adblockruletextmatchimpl.cpp new file mode 100644 index 00000000..7c02ea37 --- /dev/null +++ b/src/adblock/adblockruletextmatchimpl.cpp @@ -0,0 +1,68 @@ +/* ============================================================ +* +* This file is a part of the rekonq project +* +* Copyright (C) 2010 by Benjamin Poulain <ikipou at gmail dot com> +* +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License as +* published by the Free Software Foundation; either version 2 of +* the License or (at your option) version 3 or any later version +* accepted by the membership of KDE e.V. (or its successor approved +* by the membership of KDE e.V.), which shall act as a proxy +* defined in Section 14 of version 3 of the license. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +* +* ============================================================ */ + +// Self Includes +#include "adblockruletextmatchimpl.h" + +// Rekonq Includes +#include "rekonq_defines.h" + +AdBlockRuleTextMatchImpl::AdBlockRuleTextMatchImpl(const QString &filter) + : AdBlockRuleImpl(filter) +{ + Q_ASSERT(AdBlockRuleTextMatchImpl::isTextMatchFilter(filter)); + + m_textToMatch = filter; + m_textToMatch.remove(QL1C('*')); +} + +bool AdBlockRuleTextMatchImpl::match(const QString &encodedUrl) const +{ + return encodedUrl.contains(m_textToMatch, Qt::CaseInsensitive); +} + +bool AdBlockRuleTextMatchImpl::isTextMatchFilter(const QString &filter) +{ + // We don't deal with options just yet + if (filter.contains(QL1C('$'))) + return false; + + // We don't deal with element matching + if (filter.contains(QL1S("##"))) + return false; + + // We don't deal with the begin-end matching + if (filter.startsWith(QL1C('|')) || filter.endsWith(QL1C('|'))) + return false; + + // We only handle * at the beginning or the end + int starPosition = filter.indexOf(QL1C('*')); + while (starPosition >= 0) { + if (starPosition != 0 && starPosition != (filter.length() - 1)) + return false; + starPosition = filter.indexOf(QL1C('*'), starPosition + 1); + } + return true; +} diff --git a/src/adblock/adblockruletextmatchimpl.h b/src/adblock/adblockruletextmatchimpl.h new file mode 100644 index 00000000..f0e78be0 --- /dev/null +++ b/src/adblock/adblockruletextmatchimpl.h @@ -0,0 +1,47 @@ +/* ============================================================ +* +* This file is a part of the rekonq project +* +* Copyright (C) 2010 by Benjamin Poulain <ikipou at gmail dot com> +* +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License as +* published by the Free Software Foundation; either version 2 of +* the License or (at your option) version 3 or any later version +* accepted by the membership of KDE e.V. (or its successor approved +* by the membership of KDE e.V.), which shall act as a proxy +* defined in Section 14 of version 3 of the license. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +* +* ============================================================ */ + +#ifndef ADBLOCKRULETEXTMATCHIMPL_H +#define ADBLOCKRULETEXTMATCHIMPL_H + +#include "adblockruleimpl.h" + +// Qt Includes +#include <QString> + +// Simple rule to find a string in the URL +class AdBlockRuleTextMatchImpl : public AdBlockRuleImpl +{ +public: + AdBlockRuleTextMatchImpl(const QString &filter); + bool match(const QString &encodedUrl) const; + + static bool isTextMatchFilter(const QString &filter); + +private: + QString m_textToMatch; +}; + +#endif // ADBLOCKRULETEXTMATCHIMPL_H |