summaryrefslogtreecommitdiff
path: root/src/adblock
diff options
context:
space:
mode:
authorAndrea Diamantini <adjam7@gmail.com>2009-11-30 10:26:43 +0100
committerAndrea Diamantini <adjam7@gmail.com>2009-11-30 10:26:43 +0100
commit88bde6fd1e73f30a9bdb1da5fd4bd24aeb84266b (patch)
treed002a585d5a889c5e5df8ea8ab83ec8a04e2e59a /src/adblock
parentRestored QWebElementCollection (diff)
downloadrekonq-88bde6fd1e73f30a9bdb1da5fd4bd24aeb84266b.tar.xz
Porting rekonq to last Qt/KDE API
adblock (KDE one) fix #1
Diffstat (limited to 'src/adblock')
-rw-r--r--src/adblock/adblockmanager.cpp45
-rw-r--r--src/adblock/adblockmanager.h11
-rw-r--r--src/adblock/adblocknetworkreply.cpp88
-rw-r--r--src/adblock/adblocknetworkreply.h81
-rw-r--r--src/adblock/khtml_filter.cpp266
-rw-r--r--src/adblock/khtml_filter_p.h79
6 files changed, 566 insertions, 4 deletions
diff --git a/src/adblock/adblockmanager.cpp b/src/adblock/adblockmanager.cpp
index b25edcb1..987c793f 100644
--- a/src/adblock/adblockmanager.cpp
+++ b/src/adblock/adblockmanager.cpp
@@ -24,9 +24,12 @@
* ============================================================ */
+// Self Includes
#include "adblockmanager.h"
#include "adblockmanager.moc"
+// Local Includes
+#include "adblocknetworkreply.h"
// KDE Includes
#include <KSharedConfig>
@@ -53,10 +56,48 @@ AdBlockManager::~AdBlockManager()
void AdBlockManager::loadSettings()
{
+ KSharedConfig::Ptr config = KSharedConfig::openConfig("khtmlrc", KConfig::NoGlobals);
+ KConfigGroup cg( config, "Filter Settings" );
+
+ if ( cg.exists() )
+ {
+ _isAdblockEnabled = cg.readEntry("Enabled", false);
+ _isHideAdsEnabled = cg.readEntry("Shrink", false);
+
+ _adBlackList.clear();
+ _adWhiteList.clear();
+
+ QMap<QString,QString> entryMap = cg.entryMap();
+ QMap<QString,QString>::ConstIterator it;
+ for( it = entryMap.constBegin(); it != entryMap.constEnd(); ++it )
+ {
+ QString name = it.key();
+ QString url = it.value();
+
+ if (name.startsWith(QLatin1String("Filter")))
+ {
+ if (url.startsWith(QLatin1String("@@")))
+ _adWhiteList.addFilter(url);
+ else
+ _adBlackList.addFilter(url);
+ }
+ }
+ }
}
-bool AdBlockManager::isUrlAllowed(const QUrl &url)
+QNetworkReply *AdBlockManager::block(const QNetworkRequest &request)
{
- return true;
+ if (!_isAdblockEnabled)
+ return 0;
+
+ QString urlString = request.url().toString();
+
+ // Check the blacklist, and only if that matches, the whitelist
+ if(_adBlackList.isUrlMatched(urlString) && !_adWhiteList.isUrlMatched(urlString))
+ {
+ AdBlockNetworkReply *reply = new AdBlockNetworkReply(request, urlString, this);
+ return reply;
+ }
+ return 0;
}
diff --git a/src/adblock/adblockmanager.h b/src/adblock/adblockmanager.h
index 10f72366..5b47c1da 100644
--- a/src/adblock/adblockmanager.h
+++ b/src/adblock/adblockmanager.h
@@ -29,12 +29,16 @@
#define ADBLOCK_MANAGER_H
+// Local Includes
+#include "khtml_filter_p.h"
+
// Qt Includes
#include <QObject>
#include <QStringList>
+#include <QNetworkReply>
// Forward Includes
-class QUrl;
+class QNetworkRequest;
class AdBlockManager : public QObject
@@ -46,11 +50,14 @@ public:
~AdBlockManager();
void loadSettings();
- bool isUrlAllowed(const QUrl &url);
+ QNetworkReply *block(const QNetworkRequest &request);
private:
bool _isAdblockEnabled;
bool _isHideAdsEnabled;
+
+ khtml::FilterSet _adBlackList;
+ khtml::FilterSet _adWhiteList;
};
#endif
diff --git a/src/adblock/adblocknetworkreply.cpp b/src/adblock/adblocknetworkreply.cpp
new file mode 100644
index 00000000..1ccca96d
--- /dev/null
+++ b/src/adblock/adblocknetworkreply.cpp
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2009, Benjamin C. Meyer <ben@meyerhome.net>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the Benjamin Meyer nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ============================================================
+ *
+ * This file is a part of the rekonq project
+ *
+ * Copyright (C) 2009 by Andrea Diamantini <adjam7 at gmail dot com>
+ *
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License or (at your option) version 3 or any later version
+ * accepted by the membership of KDE e.V. (or its successor approved
+ * by the membership of KDE e.V.), which shall act as a proxy
+ * defined in Section 14 of version 3 of the license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * ============================================================ */
+
+
+// Self Includes
+#include "adblocknetworkreply.h"
+#include "adblocknetworkreply.moc"
+
+// KDE Includes
+#include <klocalizedstring.h>
+
+// Qt Includes
+#include <QNetworkRequest>
+#include <QTimer>
+
+
+AdBlockNetworkReply::AdBlockNetworkReply(const QNetworkRequest &request, const QString &urlString, QObject *parent)
+ : QNetworkReply(parent)
+{
+ setOperation(QNetworkAccessManager::GetOperation);
+ setRequest(request);
+ setUrl(request.url());
+ setError(QNetworkReply::ContentAccessDenied, i18n("Blocked by AdBlockRule: %1").arg(urlString));
+ QTimer::singleShot(0, this, SLOT(delayedFinished()));
+}
+
+
+qint64 AdBlockNetworkReply::readData(char *data, qint64 maxSize)
+{
+ Q_UNUSED(data);
+ Q_UNUSED(maxSize);
+ return -1;
+}
+
+
+void AdBlockNetworkReply::delayedFinished()
+{
+ emit error(QNetworkReply::ContentAccessDenied);
+ emit finished();
+}
diff --git a/src/adblock/adblocknetworkreply.h b/src/adblock/adblocknetworkreply.h
new file mode 100644
index 00000000..b5bb8300
--- /dev/null
+++ b/src/adblock/adblocknetworkreply.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2009, Benjamin C. Meyer <ben@meyerhome.net>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the Benjamin Meyer nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ============================================================
+ *
+ * This file is a part of the rekonq project
+ *
+ * Copyright (C) 2009 by Andrea Diamantini <adjam7 at gmail dot com>
+ *
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License or (at your option) version 3 or any later version
+ * accepted by the membership of KDE e.V. (or its successor approved
+ * by the membership of KDE e.V.), which shall act as a proxy
+ * defined in Section 14 of version 3 of the license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * ============================================================ */
+
+
+#ifndef ADBLOCK_NETWORK_REPLY_H
+#define ADBLOCK_NETWORK_REPLY_H
+
+
+// Qt Includes
+#include <QNetworkReply>
+#include <QString>
+
+// Forward Declarations
+class AdBlockRule;
+
+
+class AdBlockNetworkReply : public QNetworkReply
+{
+ Q_OBJECT
+
+public:
+ AdBlockNetworkReply(const QNetworkRequest &request, const QString &urlString, QObject *parent = 0);
+ void abort() {};
+
+protected:
+ qint64 readData(char *data, qint64 maxSize);
+
+private slots:
+ void delayedFinished();
+
+};
+
+#endif // ADBLOCKBLOCKEDNETWORKREPLY_H
diff --git a/src/adblock/khtml_filter.cpp b/src/adblock/khtml_filter.cpp
new file mode 100644
index 00000000..f258d1e7
--- /dev/null
+++ b/src/adblock/khtml_filter.cpp
@@ -0,0 +1,266 @@
+/* This file is part of the KDE project
+
+ Copyright (C) 2005 Ivor Hewitt <ivor@kde.org>
+ Copyright (C) 2008 Maksim Orlovich <maksim@kde.org>
+ Copyright (C) 2008 Vyacheslav Tokarev <tsjoker@gmail.com>
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public
+ License as published by the Free Software Foundation; either
+ version 2 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public License
+ along with this library; see the file COPYING.LIB. If not, write to
+ the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ Boston, MA 02110-1301, USA.
+*/
+
+#include "khtml_filter_p.h"
+#include <QDebug>
+
+// rolling hash parameters
+#define HASH_P (1997)
+#define HASH_Q (17509)
+// HASH_MOD = (HASH_P^7) % HASH_Q
+#define HASH_MOD (523)
+
+namespace khtml {
+
+void FilterSet::addFilter(const QString& filterStr)
+{
+ QString filter = filterStr;
+
+ if (filter.startsWith(QLatin1Char('!')))
+ return;
+
+ // Strip leading @@
+ int first = 0;
+ int last = filter.length() - 1;
+ if (filter.startsWith(QLatin1String("@@")))
+ first = 2;
+
+ // Strip options, we ignore them for now.
+ int dollar = filter.lastIndexOf(QLatin1Char('$'));
+ if (dollar != -1)
+ last = dollar - 1;
+
+ // Perhaps nothing left?
+ if (first > last)
+ return;
+
+ filter = filter.mid(first, last - first + 1);
+
+ // Is it a regexp filter?
+ if (filter.length()>2 && filter.startsWith(QLatin1Char('/')) && filter.endsWith(QLatin1Char('/')))
+ {
+ QString inside = filter.mid(1, filter.length()-2);
+ QRegExp rx(inside);
+ reFilters.append(rx);
+// qDebug() << "R:" << inside;
+ }
+ else
+ {
+ // Nope, a wildcard one.
+ // Note: For these, we also need to handle |.
+
+ // Strip wildcards at the ends
+ first = 0;
+ last = filter.length() - 1;
+
+ while (first < filter.length() && filter[first] == QLatin1Char('*'))
+ ++first;
+
+ while (last >= 0 && filter[last] == QLatin1Char('*'))
+ --last;
+
+ if (first > last)
+ filter = QLatin1String("*"); // erm... Well, they asked for it.
+ else
+ filter = filter.mid(first, last - first + 1);
+
+ // Now, do we still have any wildcard stuff left?
+ if (filter.contains("*") || filter.contains("?"))
+ {
+// qDebug() << "W:" << filter;
+ // check if we can use RK first (and then check full RE for the rest) for better performance
+ int aPos = filter.indexOf('*');
+ if (aPos < 0)
+ aPos = filter.length();
+ int qPos = filter.indexOf('?');
+ if (qPos < 0)
+ qPos = filter.length();
+ int pos = qMin(aPos, qPos);
+ if (pos > 7) {
+ QRegExp rx;
+
+ rx.setPatternSyntax(QRegExp::Wildcard);
+ rx.setPattern(filter.mid(pos));
+
+ stringFiltersMatcher.addWildedString(filter.mid(0, pos), rx);
+
+ } else {
+ QRegExp rx;
+
+ rx.setPatternSyntax(QRegExp::Wildcard);
+ rx.setPattern(filter);
+ reFilters.append(rx);
+ }
+ }
+ else
+ {
+ // Fast path
+ stringFiltersMatcher.addString(filter);
+ }
+ }
+}
+
+bool FilterSet::isUrlMatched(const QString& url)
+{
+ if (stringFiltersMatcher.isMatched(url))
+ return true;
+
+ for (int c = 0; c < reFilters.size(); ++c)
+ {
+ if (url.contains(reFilters[c]))
+ return true;
+ }
+
+ return false;
+}
+
+void FilterSet::clear()
+{
+ reFilters.clear();
+ stringFiltersMatcher.clear();
+}
+
+
+void StringsMatcher::addString(const QString& pattern)
+{
+ if (pattern.length() < 8) {
+ // handle short string differently
+ shortStringFilters.append(pattern);
+ } else {
+ // use modified Rabin-Karp's algorithm with 8-length string hash
+ // i.e. store hash of first 8 chars in the HashMap for fast look-up
+ stringFilters.append(pattern);
+ int ind = stringFilters.size() - 1;
+ int current = 0;
+
+ // compute hash using rolling hash
+ // hash for string: x0,x1,x2...xn-1 will be:
+ // (p^(n-1)*x0 + p^(n-2)*x1 + ... + p * xn-2 + xn-1) % q
+ // where p and q some wisely-chosen integers
+ /*for (int k = 0; k < 8; ++k)*/
+ int len = pattern.length();
+ for (int k = len - 8; k < len; ++k)
+ current = (current * HASH_P + pattern[k].unicode()) % HASH_Q;
+
+ // insert computed hash value into HashMap
+ QHash<int, QVector<int> >::iterator it = stringFiltersHash.find(current + 1);
+ if (it == stringFiltersHash.end()) {
+ QVector<int> list;
+ list.append(ind);
+ stringFiltersHash.insert(current + 1, list);
+ fastLookUp.setBit(current);
+ } else {
+ it.value().append(ind);
+ }
+ }
+}
+
+void StringsMatcher::addWildedString(const QString& prefix, const QRegExp& rx)
+{
+ rePrefixes.append(prefix);
+ reFilters.append(rx);
+ int index = -rePrefixes.size();
+
+ int current = 0;
+ for (int k = 0; k < 8; ++k)
+ current = (current * HASH_P + prefix[k].unicode()) % HASH_Q;
+
+ // insert computed hash value into HashMap
+ QHash<int, QVector<int> >::iterator it = stringFiltersHash.find(current + 1);
+ if (it == stringFiltersHash.end()) {
+ QVector<int> list;
+ list.append(index);
+ stringFiltersHash.insert(current + 1, list);
+ fastLookUp.setBit(current);
+ } else {
+ it.value().append(index);
+ }
+}
+
+bool StringsMatcher::isMatched(const QString& str) const
+{
+ // check short strings first
+ for (int i = 0; i < shortStringFilters.size(); ++i) {
+ if (str.contains(shortStringFilters[i]))
+ return true;
+ }
+
+ int len = str.length();
+ int k;
+
+ int current = 0;
+ int next = 0;
+ // compute hash for first 8 characters
+ for (k = 0; k < 8 && k < len; ++k)
+ current = (current * HASH_P + str[k].unicode()) % HASH_Q;
+
+ QHash<int, QVector<int> >::const_iterator hashEnd = stringFiltersHash.end();
+ // main Rabin-Karp's algorithm loop
+ for (k = 7; k < len; ++k, current = next) {
+ // roll the hash if not at the end
+ // (calculate hash for the next iteration)
+ if (k + 1 < len)
+ next = (HASH_P * ((current + HASH_Q - ((HASH_MOD * str[k - 7].unicode()) % HASH_Q)) % HASH_Q) + str[k + 1].unicode()) % HASH_Q;
+
+ if (!fastLookUp.testBit(current))
+ continue;
+
+ // look-up the hash in the HashMap and check all strings
+ QHash<int, QVector<int> >::const_iterator it = stringFiltersHash.find(current + 1);
+
+ // check possible strings
+ if (it != hashEnd) {
+ for (int j = 0; j < it.value().size(); ++j) {
+ int index = it.value()[j];
+ // check if we got simple string or REs prefix
+ if (index >= 0) {
+ int flen = stringFilters[index].length();
+ if (k - flen + 1 >= 0 && stringFilters[index] == str.midRef(k - flen + 1 , flen))
+ return true;
+ } else {
+ index = -index - 1;
+ int flen = rePrefixes[index].length();
+ if (k - 8 + flen < len && rePrefixes[index] == str.midRef(k - 7, flen) &&
+ str.indexOf(reFilters[index], k - 7 + flen) == k - 7 + flen)
+ return true;
+ }
+ }
+ }
+ }
+
+ return false;
+}
+
+void StringsMatcher::clear()
+{
+ stringFilters.clear();
+ shortStringFilters.clear();
+ reFilters.clear();
+ rePrefixes.clear();
+ stringFiltersHash.clear();
+ fastLookUp.resize(HASH_Q);
+ fastLookUp.fill(0, 0, HASH_Q);
+}
+
+}
+
+// kate: indent-width 4; replace-tabs on; tab-width 4; space-indent on;
diff --git a/src/adblock/khtml_filter_p.h b/src/adblock/khtml_filter_p.h
new file mode 100644
index 00000000..4490bbd8
--- /dev/null
+++ b/src/adblock/khtml_filter_p.h
@@ -0,0 +1,79 @@
+/* This file is part of the KDE project
+
+ Copyright (C) 2005 Ivor Hewitt <ivor@kde.org>
+ Copyright (C) 2008 Maksim Orlovich <maksim@kde.org>
+ Copyright (C) 2008 Vyacheslav Tokarev <tsjoker@gmail.com>
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public
+ License as published by the Free Software Foundation; either
+ version 2 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public License
+ along with this library; see the file COPYING.LIB. If not, write to
+ the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ Boston, MA 02110-1301, USA.
+*/
+
+#ifndef KHTML_FILTER_P_H
+#define KHTML_FILTER_P_H
+
+#include <QString>
+#include <QRegExp>
+#include <QVector>
+#include <QHash>
+#include <QBitArray>
+
+namespace khtml {
+
+// Updateable Multi-String Matcher based on Rabin-Karp's algorithm
+class StringsMatcher {
+public:
+ // add filter to matching set
+ void addString(const QString& pattern);
+
+ // check if string match at least one string from matching set
+ bool isMatched(const QString& str) const;
+
+ // add filter to matching set with wildcards (*,?) in it
+ void addWildedString(const QString& prefix, const QRegExp& rx);
+
+ void clear();
+
+private:
+ QVector<QString> stringFilters;
+ QVector<QString> shortStringFilters;
+ QVector<QRegExp> reFilters;
+ QVector<QString> rePrefixes;
+ QBitArray fastLookUp;
+
+ QHash<int, QVector<int> > stringFiltersHash;
+};
+
+// This represents a set of filters that may match URLs.
+// Currently it supports a subset of AddBlock Plus functionality.
+class FilterSet {
+public:
+ // Parses and registers a filter. This will also strip @@ for exclusion rules, skip comments, etc.
+ // The user does have to split black and white lists into separate sets, however
+ void addFilter(const QString& filter);
+
+ bool isUrlMatched(const QString& url);
+
+ void clear();
+
+private:
+ QVector<QRegExp> reFilters;
+ StringsMatcher stringFiltersMatcher;
+};
+
+}
+
+#endif // KHTML_FILTER_P_H
+
+// kate: indent-width 4; replace-tabs on; tab-width 4; space-indent on;