summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/CMakeLists.txt1
-rw-r--r--src/adblock/adblockhostmatcher.cpp55
-rw-r--r--src/adblock/adblockhostmatcher.h51
-rw-r--r--src/adblock/adblockmanager.cpp38
-rw-r--r--src/adblock/adblockmanager.h3
-rw-r--r--src/adblock/adblockrule.h5
-rw-r--r--src/adblock/adblockrulefallbackimpl.cpp2
-rw-r--r--src/adblock/adblockrulefallbackimpl.h2
-rw-r--r--src/adblock/adblockruleimpl.h2
-rw-r--r--src/adblock/adblockruletextmatchimpl.cpp11
-rw-r--r--src/adblock/adblockruletextmatchimpl.h2
11 files changed, 159 insertions, 13 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 0130c2d4..151906b9 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -52,6 +52,7 @@ SET( rekonq_KDEINIT_SRCS
bookmarks/bookmarkstoolbar.cpp
bookmarks/bookmarkowner.cpp
#----------------------------------------
+ adblock/adblockhostmatcher.cpp
adblock/adblockmanager.cpp
adblock/adblocknetworkreply.cpp
adblock/adblockrule.cpp
diff --git a/src/adblock/adblockhostmatcher.cpp b/src/adblock/adblockhostmatcher.cpp
new file mode 100644
index 00000000..b11dab2c
--- /dev/null
+++ b/src/adblock/adblockhostmatcher.cpp
@@ -0,0 +1,55 @@
+/* ============================================================
+*
+* This file is a part of the rekonq project
+*
+* Copyright (C) 2010 by Benjamin Poulain <ikipou at gmail dot com>
+*
+*
+* This program is free software; you can redistribute it and/or
+* modify it under the terms of the GNU General Public License as
+* published by the Free Software Foundation; either version 2 of
+* the License or (at your option) version 3 or any later version
+* accepted by the membership of KDE e.V. (or its successor approved
+* by the membership of KDE e.V.), which shall act as a proxy
+* defined in Section 14 of version 3 of the license.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*
+* ============================================================ */
+
+// Self Includes
+#include "adblockhostmatcher.h"
+
+// Rekonq Includes
+#include "rekonq_defines.h"
+
+bool AdBlockHostMatcher::tryAddFilter(const QString &filter)
+{
+ if (filter.startsWith(QL1S("||"))) {
+ QString domain = filter.mid(2);
+
+ const int indexOfFirstSeparator = domain.indexOf(QL1C('^'));
+ if (indexOfFirstSeparator < 0)
+ return false;
+
+ const int indexOfLastDollar = domain.lastIndexOf(QL1C('$'));
+ if (indexOfLastDollar >= 0 && indexOfLastDollar != indexOfFirstSeparator + 1)
+ return false;
+
+ domain = domain.left(indexOfFirstSeparator);
+ if (domain.contains(QL1C('/')) || domain.contains(QL1C('*')))
+ return false;
+
+ domain = domain.toLower();
+ m_hostList.insert(domain);
+ m_hostList.insert(QL1S("www.") + domain);
+ return true;
+ }
+ return false;
+}
diff --git a/src/adblock/adblockhostmatcher.h b/src/adblock/adblockhostmatcher.h
new file mode 100644
index 00000000..0a15bd4e
--- /dev/null
+++ b/src/adblock/adblockhostmatcher.h
@@ -0,0 +1,51 @@
+/* ============================================================
+*
+* This file is a part of the rekonq project
+*
+* Copyright (C) 2010 by Benjamin Poulain <ikipou at gmail dot com>
+*
+*
+* This program is free software; you can redistribute it and/or
+* modify it under the terms of the GNU General Public License as
+* published by the Free Software Foundation; either version 2 of
+* the License or (at your option) version 3 or any later version
+* accepted by the membership of KDE e.V. (or its successor approved
+* by the membership of KDE e.V.), which shall act as a proxy
+* defined in Section 14 of version 3 of the license.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*
+* ============================================================ */
+
+#ifndef ADBLOCKHOSTMATCHER_H
+#define ADBLOCKHOSTMATCHER_H
+
+#include <QSet>
+#include <QString>
+
+class AdBlockHostMatcher
+{
+public:
+ // Try to add an adblock filter to this host matcher.
+ // If the filter is not an hostname, the filter is not added
+ // and the method return false;
+ bool tryAddFilter(const QString &filter);
+
+ bool match(const QString &host) const
+ {
+ return m_hostList.contains(host.toLower());
+ }
+
+ void clear() { m_hostList.clear(); }
+
+private:
+ QSet<QString> m_hostList;
+};
+
+#endif // ADBLOCKHOSTMATCHER_H
diff --git a/src/adblock/adblockmanager.cpp b/src/adblock/adblockmanager.cpp
index 644ecff5..8deb0bcd 100644
--- a/src/adblock/adblockmanager.cpp
+++ b/src/adblock/adblockmanager.cpp
@@ -67,6 +67,8 @@ void AdBlockManager::loadSettings(bool checkUpdateDate)
_index = 0;
_buffer.clear();
+ _hostWhiteList.clear();
+ _hostBlackList.clear();
_whiteList.clear();
_blackList.clear();
_hideList.clear();
@@ -133,7 +135,10 @@ void AdBlockManager::loadRules(const QStringList &rules)
// white rules
if (stringRule.startsWith(QL1S("@@")))
{
- AdBlockRule rule(stringRule.mid(2));
+ const QString filter = stringRule.mid(2);
+ if (_hostWhiteList.tryAddFilter(filter))
+ continue;
+ AdBlockRule rule(filter);
_whiteList << rule;
continue;
}
@@ -145,6 +150,12 @@ void AdBlockManager::loadRules(const QStringList &rules)
continue;
}
+ // TODO implement domain-specific hiding
+ if (stringRule.contains(QL1S("##")))
+ continue;
+
+ if (_hostBlackList.tryAddFilter(stringRule))
+ continue;
AdBlockRule rule(stringRule);
_blackList << rule;
}
@@ -161,11 +172,22 @@ QNetworkReply *AdBlockManager::block(const QNetworkRequest &request, WebPage *pa
return 0;
QString urlString = request.url().toString();
+ // We compute a lowercase version of the URL so each rule does not
+ // have to do it.
+ const QString urlStringLowerCase = urlString.toLower();
+ const QString host = request.url().host();
// check white rules before :)
+
+ if (_hostWhiteList.match(host)) {
+ kDebug() << "****ADBLOCK: WHITE RULE (@@) Matched by host matcher: ***********";
+ kDebug() << "UrlString: " << urlString;
+ return 0;
+ }
+
foreach(const AdBlockRule &filter, _whiteList)
{
- if (filter.match(urlString))
+ if (filter.match(urlString, urlStringLowerCase))
{
kDebug() << "****ADBLOCK: WHITE RULE (@@) Matched: ***********";
kDebug() << "UrlString: " << urlString;
@@ -174,9 +196,16 @@ QNetworkReply *AdBlockManager::block(const QNetworkRequest &request, WebPage *pa
}
// then check the black ones :(
+ if (_hostBlackList.match(host)) {
+ kDebug() << "****ADBLOCK: BLACK RULE Matched by host matcher: ***********";
+ kDebug() << "UrlString: " << urlString;
+ AdBlockNetworkReply *reply = new AdBlockNetworkReply(request, urlString, this);
+ return reply;
+ }
+
foreach(const AdBlockRule &filter, _blackList)
{
- if (filter.match(urlString))
+ if (filter.match(urlString, urlStringLowerCase))
{
kDebug() << "****ADBLOCK: BLACK RULE Matched: ***********";
kDebug() << "UrlString: " << urlString;
@@ -185,7 +214,8 @@ QNetworkReply *AdBlockManager::block(const QNetworkRequest &request, WebPage *pa
QWebElementCollection elements = document.findAll("*");
foreach(QWebElement el, elements)
{
- if (filter.match(el.attribute("src")))
+ const QString srcAttribute = el.attribute("src");
+ if (filter.match(srcAttribute, srcAttribute.toLower()))
{
kDebug() << "MATCHES ATTRIBUTE!!!!!";
el.setStyleProperty(QL1S("visibility"), QL1S("hidden"));
diff --git a/src/adblock/adblockmanager.h b/src/adblock/adblockmanager.h
index eae761e0..69548994 100644
--- a/src/adblock/adblockmanager.h
+++ b/src/adblock/adblockmanager.h
@@ -108,6 +108,7 @@
#include "rekonq_defines.h"
// Local Includes
+#include "adblockhostmatcher.h"
#include "adblockrule.h"
// KDE Includes
@@ -155,6 +156,8 @@ private:
bool _isAdblockEnabled;
bool _isHideAdsEnabled;
+ AdBlockHostMatcher _hostBlackList;
+ AdBlockHostMatcher _hostWhiteList;
AdBlockRuleList _blackList;
AdBlockRuleList _whiteList;
QStringList _hideList;
diff --git a/src/adblock/adblockrule.h b/src/adblock/adblockrule.h
index 04409688..ef7b2f5f 100644
--- a/src/adblock/adblockrule.h
+++ b/src/adblock/adblockrule.h
@@ -70,9 +70,10 @@ class AdBlockRule
public:
AdBlockRule(const QString &filter);
- bool match(const QString &encodedUrl) const
+ bool match(const QString &encodedUrl, const QString &encodedUrlLowerCase) const
{
- return m_implementation->match(encodedUrl);
+ Q_ASSERT(encodedUrl.toLower() == encodedUrlLowerCase);
+ return m_implementation->match(encodedUrl, encodedUrlLowerCase);
}
private:
diff --git a/src/adblock/adblockrulefallbackimpl.cpp b/src/adblock/adblockrulefallbackimpl.cpp
index decb895d..988f2895 100644
--- a/src/adblock/adblockrulefallbackimpl.cpp
+++ b/src/adblock/adblockrulefallbackimpl.cpp
@@ -61,7 +61,7 @@ AdBlockRuleFallbackImpl::AdBlockRuleFallbackImpl(const QString &filter)
m_regExp.setPattern(parsedLine);
}
-bool AdBlockRuleFallbackImpl::match(const QString &encodedUrl) const
+bool AdBlockRuleFallbackImpl::match(const QString &encodedUrl, const QString &) const
{
return m_regExp.indexIn(encodedUrl) != -1;
}
diff --git a/src/adblock/adblockrulefallbackimpl.h b/src/adblock/adblockrulefallbackimpl.h
index 4e7ca555..ed0f6dc6 100644
--- a/src/adblock/adblockrulefallbackimpl.h
+++ b/src/adblock/adblockrulefallbackimpl.h
@@ -36,7 +36,7 @@ class AdBlockRuleFallbackImpl : public AdBlockRuleImpl
{
public:
AdBlockRuleFallbackImpl(const QString &filter);
- bool match(const QString &encodedUrl) const;
+ bool match(const QString &encodedUrl, const QString &encodedUrlLowerCase) const;
private:
QString convertPatternToRegExp(const QString &wildcardPattern);
diff --git a/src/adblock/adblockruleimpl.h b/src/adblock/adblockruleimpl.h
index da367aeb..db5cec30 100644
--- a/src/adblock/adblockruleimpl.h
+++ b/src/adblock/adblockruleimpl.h
@@ -33,7 +33,7 @@ class AdBlockRuleImpl
public:
AdBlockRuleImpl(const QString &) {}
virtual ~AdBlockRuleImpl() {}
- virtual bool match(const QString &encodedUrl) const = 0;
+ virtual bool match(const QString &encodedUrl, const QString &encodedUrlLowerCase) const = 0;
};
#endif // ADBLOCKRULEIMPL_H
diff --git a/src/adblock/adblockruletextmatchimpl.cpp b/src/adblock/adblockruletextmatchimpl.cpp
index 7c02ea37..892d78e0 100644
--- a/src/adblock/adblockruletextmatchimpl.cpp
+++ b/src/adblock/adblockruletextmatchimpl.cpp
@@ -34,13 +34,18 @@ AdBlockRuleTextMatchImpl::AdBlockRuleTextMatchImpl(const QString &filter)
{
Q_ASSERT(AdBlockRuleTextMatchImpl::isTextMatchFilter(filter));
- m_textToMatch = filter;
+ m_textToMatch = filter.toLower();
m_textToMatch.remove(QL1C('*'));
}
-bool AdBlockRuleTextMatchImpl::match(const QString &encodedUrl) const
+bool AdBlockRuleTextMatchImpl::match(const QString &encodedUrl, const QString &encodedUrlLowerCase) const
{
- return encodedUrl.contains(m_textToMatch, Qt::CaseInsensitive);
+ Q_UNUSED(encodedUrl);
+ // Case sensitive compare is faster, but would be incorrect with encodedUrl since
+ // we do want case insensitive.
+ // What we do is work on a lowercase version of m_textToMatch, and compare to the lowercase
+ // version of encodedUrl.
+ return encodedUrlLowerCase.contains(m_textToMatch, Qt::CaseSensitive);
}
bool AdBlockRuleTextMatchImpl::isTextMatchFilter(const QString &filter)
diff --git a/src/adblock/adblockruletextmatchimpl.h b/src/adblock/adblockruletextmatchimpl.h
index f0e78be0..28b0656c 100644
--- a/src/adblock/adblockruletextmatchimpl.h
+++ b/src/adblock/adblockruletextmatchimpl.h
@@ -36,7 +36,7 @@ class AdBlockRuleTextMatchImpl : public AdBlockRuleImpl
{
public:
AdBlockRuleTextMatchImpl(const QString &filter);
- bool match(const QString &encodedUrl) const;
+ bool match(const QString &encodedUrl, const QString &encodedUrlLowerCase) const;
static bool isTextMatchFilter(const QString &filter);