summaryrefslogtreecommitdiff
path: root/src/adblock
diff options
context:
space:
mode:
authorBenjamin Poulain <benjamin.poulain@nokia.com>2010-08-19 02:26:40 +0200
committerBenjamin Poulain <benjamin.poulain@nokia.com>2010-08-19 03:18:29 +0200
commita4631fb9ec2541f99e1aba05cf1e7d2b5ebecb98 (patch)
tree2db557fe27ed8589eb0937755cf9bbe69dc1b1e5 /src/adblock
parentImprove the performance of AdBlockRuleTextMatchImpl (diff)
downloadrekonq-a4631fb9ec2541f99e1aba05cf1e7d2b5ebecb98.tar.xz
Add a special matcher for ad block filters for host name
Quite a few rules of ad block are just matching domains. Those are of the form: ||trolltech.com^$options This patch add a new class to deal with this kind of filter, AdBlockHostMatcher. Matching a host address is much faster (O(1)) than going through the entire list of rules.
Diffstat (limited to 'src/adblock')
-rw-r--r--src/adblock/adblockhostmatcher.cpp55
-rw-r--r--src/adblock/adblockhostmatcher.h51
-rw-r--r--src/adblock/adblockmanager.cpp24
-rw-r--r--src/adblock/adblockmanager.h3
4 files changed, 132 insertions, 1 deletions
diff --git a/src/adblock/adblockhostmatcher.cpp b/src/adblock/adblockhostmatcher.cpp
new file mode 100644
index 00000000..b11dab2c
--- /dev/null
+++ b/src/adblock/adblockhostmatcher.cpp
@@ -0,0 +1,55 @@
+/* ============================================================
+*
+* This file is a part of the rekonq project
+*
+* Copyright (C) 2010 by Benjamin Poulain <ikipou at gmail dot com>
+*
+*
+* This program is free software; you can redistribute it and/or
+* modify it under the terms of the GNU General Public License as
+* published by the Free Software Foundation; either version 2 of
+* the License or (at your option) version 3 or any later version
+* accepted by the membership of KDE e.V. (or its successor approved
+* by the membership of KDE e.V.), which shall act as a proxy
+* defined in Section 14 of version 3 of the license.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*
+* ============================================================ */
+
+// Self Includes
+#include "adblockhostmatcher.h"
+
+// Rekonq Includes
+#include "rekonq_defines.h"
+
+bool AdBlockHostMatcher::tryAddFilter(const QString &filter)
+{
+ if (filter.startsWith(QL1S("||"))) {
+ QString domain = filter.mid(2);
+
+ const int indexOfFirstSeparator = domain.indexOf(QL1C('^'));
+ if (indexOfFirstSeparator < 0)
+ return false;
+
+ const int indexOfLastDollar = domain.lastIndexOf(QL1C('$'));
+ if (indexOfLastDollar >= 0 && indexOfLastDollar != indexOfFirstSeparator + 1)
+ return false;
+
+ domain = domain.left(indexOfFirstSeparator);
+ if (domain.contains(QL1C('/')) || domain.contains(QL1C('*')))
+ return false;
+
+ domain = domain.toLower();
+ m_hostList.insert(domain);
+ m_hostList.insert(QL1S("www.") + domain);
+ return true;
+ }
+ return false;
+}
diff --git a/src/adblock/adblockhostmatcher.h b/src/adblock/adblockhostmatcher.h
new file mode 100644
index 00000000..0a15bd4e
--- /dev/null
+++ b/src/adblock/adblockhostmatcher.h
@@ -0,0 +1,51 @@
+/* ============================================================
+*
+* This file is a part of the rekonq project
+*
+* Copyright (C) 2010 by Benjamin Poulain <ikipou at gmail dot com>
+*
+*
+* This program is free software; you can redistribute it and/or
+* modify it under the terms of the GNU General Public License as
+* published by the Free Software Foundation; either version 2 of
+* the License or (at your option) version 3 or any later version
+* accepted by the membership of KDE e.V. (or its successor approved
+* by the membership of KDE e.V.), which shall act as a proxy
+* defined in Section 14 of version 3 of the license.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*
+* ============================================================ */
+
+#ifndef ADBLOCKHOSTMATCHER_H
+#define ADBLOCKHOSTMATCHER_H
+
+#include <QSet>
+#include <QString>
+
+class AdBlockHostMatcher
+{
+public:
+ // Try to add an adblock filter to this host matcher.
+ // If the filter is not an hostname, the filter is not added
+ // and the method return false;
+ bool tryAddFilter(const QString &filter);
+
+ bool match(const QString &host) const
+ {
+ return m_hostList.contains(host.toLower());
+ }
+
+ void clear() { m_hostList.clear(); }
+
+private:
+ QSet<QString> m_hostList;
+};
+
+#endif // ADBLOCKHOSTMATCHER_H
diff --git a/src/adblock/adblockmanager.cpp b/src/adblock/adblockmanager.cpp
index e0a109f0..600dc5ce 100644
--- a/src/adblock/adblockmanager.cpp
+++ b/src/adblock/adblockmanager.cpp
@@ -67,6 +67,8 @@ void AdBlockManager::loadSettings(bool checkUpdateDate)
_index = 0;
_buffer.clear();
+ _hostWhiteList.clear();
+ _hostBlackList.clear();
_whiteList.clear();
_blackList.clear();
_hideList.clear();
@@ -133,7 +135,10 @@ void AdBlockManager::loadRules(const QStringList &rules)
// white rules
if (stringRule.startsWith(QL1S("@@")))
{
- AdBlockRule rule(stringRule.mid(2));
+ const QString filter = stringRule.mid(2);
+ if (_hostWhiteList.tryAddFilter(filter))
+ continue;
+ AdBlockRule rule(filter);
_whiteList << rule;
continue;
}
@@ -145,6 +150,8 @@ void AdBlockManager::loadRules(const QStringList &rules)
continue;
}
+ if (_hostBlackList.tryAddFilter(stringRule))
+ continue;
AdBlockRule rule(stringRule);
_blackList << rule;
}
@@ -164,8 +171,16 @@ QNetworkReply *AdBlockManager::block(const QNetworkRequest &request, WebPage *pa
// We compute a lowercase version of the URL so each rule does not
// have to do it.
const QString urlStringLowerCase = urlString.toLower();
+ const QString host = request.url().host();
// check white rules before :)
+
+ if (_hostWhiteList.match(host)) {
+ kDebug() << "****ADBLOCK: WHITE RULE (@@) Matched by host matcher: ***********";
+ kDebug() << "UrlString: " << urlString;
+ return 0;
+ }
+
foreach(const AdBlockRule &filter, _whiteList)
{
if (filter.match(urlString, urlStringLowerCase))
@@ -177,6 +192,13 @@ QNetworkReply *AdBlockManager::block(const QNetworkRequest &request, WebPage *pa
}
// then check the black ones :(
+ if (_hostBlackList.match(host)) {
+ kDebug() << "****ADBLOCK: BLACK RULE Matched by host matcher: ***********";
+ kDebug() << "UrlString: " << urlString;
+ AdBlockNetworkReply *reply = new AdBlockNetworkReply(request, urlString, this);
+ return reply;
+ }
+
foreach(const AdBlockRule &filter, _blackList)
{
if (filter.match(urlString, urlStringLowerCase))
diff --git a/src/adblock/adblockmanager.h b/src/adblock/adblockmanager.h
index eae761e0..69548994 100644
--- a/src/adblock/adblockmanager.h
+++ b/src/adblock/adblockmanager.h
@@ -108,6 +108,7 @@
#include "rekonq_defines.h"
// Local Includes
+#include "adblockhostmatcher.h"
#include "adblockrule.h"
// KDE Includes
@@ -155,6 +156,8 @@ private:
bool _isAdblockEnabled;
bool _isHideAdsEnabled;
+ AdBlockHostMatcher _hostBlackList;
+ AdBlockHostMatcher _hostWhiteList;
AdBlockRuleList _blackList;
AdBlockRuleList _whiteList;
QStringList _hideList;