From 88bde6fd1e73f30a9bdb1da5fd4bd24aeb84266b Mon Sep 17 00:00:00 2001 From: Andrea Diamantini Date: Mon, 30 Nov 2009 10:26:43 +0100 Subject: Porting rekonq to last Qt/KDE API adblock (KDE one) fix #1 --- src/adblock/adblockmanager.cpp | 45 +++++- src/adblock/adblockmanager.h | 11 +- src/adblock/adblocknetworkreply.cpp | 88 ++++++++++++ src/adblock/adblocknetworkreply.h | 81 +++++++++++ src/adblock/khtml_filter.cpp | 266 ++++++++++++++++++++++++++++++++++++ src/adblock/khtml_filter_p.h | 79 +++++++++++ 6 files changed, 566 insertions(+), 4 deletions(-) create mode 100644 src/adblock/adblocknetworkreply.cpp create mode 100644 src/adblock/adblocknetworkreply.h create mode 100644 src/adblock/khtml_filter.cpp create mode 100644 src/adblock/khtml_filter_p.h (limited to 'src/adblock') diff --git a/src/adblock/adblockmanager.cpp b/src/adblock/adblockmanager.cpp index b25edcb1..987c793f 100644 --- a/src/adblock/adblockmanager.cpp +++ b/src/adblock/adblockmanager.cpp @@ -24,9 +24,12 @@ * ============================================================ */ +// Self Includes #include "adblockmanager.h" #include "adblockmanager.moc" +// Local Includes +#include "adblocknetworkreply.h" // KDE Includes #include @@ -53,10 +56,48 @@ AdBlockManager::~AdBlockManager() void AdBlockManager::loadSettings() { + KSharedConfig::Ptr config = KSharedConfig::openConfig("khtmlrc", KConfig::NoGlobals); + KConfigGroup cg( config, "Filter Settings" ); + + if ( cg.exists() ) + { + _isAdblockEnabled = cg.readEntry("Enabled", false); + _isHideAdsEnabled = cg.readEntry("Shrink", false); + + _adBlackList.clear(); + _adWhiteList.clear(); + + QMap entryMap = cg.entryMap(); + QMap::ConstIterator it; + for( it = entryMap.constBegin(); it != entryMap.constEnd(); ++it ) + { + QString name = it.key(); + QString url = it.value(); + + if (name.startsWith(QLatin1String("Filter"))) + { + if (url.startsWith(QLatin1String("@@"))) + _adWhiteList.addFilter(url); + else + _adBlackList.addFilter(url); + } + } + } } -bool AdBlockManager::isUrlAllowed(const QUrl &url) +QNetworkReply *AdBlockManager::block(const QNetworkRequest &request) { - return true; + if (!_isAdblockEnabled) + return 0; + + QString urlString = request.url().toString(); + + // Check the blacklist, and only if that matches, the whitelist + if(_adBlackList.isUrlMatched(urlString) && !_adWhiteList.isUrlMatched(urlString)) + { + AdBlockNetworkReply *reply = new AdBlockNetworkReply(request, urlString, this); + return reply; + } + return 0; } diff --git a/src/adblock/adblockmanager.h b/src/adblock/adblockmanager.h index 10f72366..5b47c1da 100644 --- a/src/adblock/adblockmanager.h +++ b/src/adblock/adblockmanager.h @@ -29,12 +29,16 @@ #define ADBLOCK_MANAGER_H +// Local Includes +#include "khtml_filter_p.h" + // Qt Includes #include #include +#include // Forward Includes -class QUrl; +class QNetworkRequest; class AdBlockManager : public QObject @@ -46,11 +50,14 @@ public: ~AdBlockManager(); void loadSettings(); - bool isUrlAllowed(const QUrl &url); + QNetworkReply *block(const QNetworkRequest &request); private: bool _isAdblockEnabled; bool _isHideAdsEnabled; + + khtml::FilterSet _adBlackList; + khtml::FilterSet _adWhiteList; }; #endif diff --git a/src/adblock/adblocknetworkreply.cpp b/src/adblock/adblocknetworkreply.cpp new file mode 100644 index 00000000..1ccca96d --- /dev/null +++ b/src/adblock/adblocknetworkreply.cpp @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2009, Benjamin C. Meyer + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the Benjamin Meyer nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ============================================================ + * + * This file is a part of the rekonq project + * + * Copyright (C) 2009 by Andrea Diamantini + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License or (at your option) version 3 or any later version + * accepted by the membership of KDE e.V. (or its successor approved + * by the membership of KDE e.V.), which shall act as a proxy + * defined in Section 14 of version 3 of the license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * ============================================================ */ + + +// Self Includes +#include "adblocknetworkreply.h" +#include "adblocknetworkreply.moc" + +// KDE Includes +#include + +// Qt Includes +#include +#include + + +AdBlockNetworkReply::AdBlockNetworkReply(const QNetworkRequest &request, const QString &urlString, QObject *parent) + : QNetworkReply(parent) +{ + setOperation(QNetworkAccessManager::GetOperation); + setRequest(request); + setUrl(request.url()); + setError(QNetworkReply::ContentAccessDenied, i18n("Blocked by AdBlockRule: %1").arg(urlString)); + QTimer::singleShot(0, this, SLOT(delayedFinished())); +} + + +qint64 AdBlockNetworkReply::readData(char *data, qint64 maxSize) +{ + Q_UNUSED(data); + Q_UNUSED(maxSize); + return -1; +} + + +void AdBlockNetworkReply::delayedFinished() +{ + emit error(QNetworkReply::ContentAccessDenied); + emit finished(); +} diff --git a/src/adblock/adblocknetworkreply.h b/src/adblock/adblocknetworkreply.h new file mode 100644 index 00000000..b5bb8300 --- /dev/null +++ b/src/adblock/adblocknetworkreply.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2009, Benjamin C. Meyer + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the Benjamin Meyer nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ============================================================ + * + * This file is a part of the rekonq project + * + * Copyright (C) 2009 by Andrea Diamantini + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License or (at your option) version 3 or any later version + * accepted by the membership of KDE e.V. (or its successor approved + * by the membership of KDE e.V.), which shall act as a proxy + * defined in Section 14 of version 3 of the license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * ============================================================ */ + + +#ifndef ADBLOCK_NETWORK_REPLY_H +#define ADBLOCK_NETWORK_REPLY_H + + +// Qt Includes +#include +#include + +// Forward Declarations +class AdBlockRule; + + +class AdBlockNetworkReply : public QNetworkReply +{ + Q_OBJECT + +public: + AdBlockNetworkReply(const QNetworkRequest &request, const QString &urlString, QObject *parent = 0); + void abort() {}; + +protected: + qint64 readData(char *data, qint64 maxSize); + +private slots: + void delayedFinished(); + +}; + +#endif // ADBLOCKBLOCKEDNETWORKREPLY_H diff --git a/src/adblock/khtml_filter.cpp b/src/adblock/khtml_filter.cpp new file mode 100644 index 00000000..f258d1e7 --- /dev/null +++ b/src/adblock/khtml_filter.cpp @@ -0,0 +1,266 @@ +/* This file is part of the KDE project + + Copyright (C) 2005 Ivor Hewitt + Copyright (C) 2008 Maksim Orlovich + Copyright (C) 2008 Vyacheslav Tokarev + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public License + along with this library; see the file COPYING.LIB. If not, write to + the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + Boston, MA 02110-1301, USA. +*/ + +#include "khtml_filter_p.h" +#include + +// rolling hash parameters +#define HASH_P (1997) +#define HASH_Q (17509) +// HASH_MOD = (HASH_P^7) % HASH_Q +#define HASH_MOD (523) + +namespace khtml { + +void FilterSet::addFilter(const QString& filterStr) +{ + QString filter = filterStr; + + if (filter.startsWith(QLatin1Char('!'))) + return; + + // Strip leading @@ + int first = 0; + int last = filter.length() - 1; + if (filter.startsWith(QLatin1String("@@"))) + first = 2; + + // Strip options, we ignore them for now. + int dollar = filter.lastIndexOf(QLatin1Char('$')); + if (dollar != -1) + last = dollar - 1; + + // Perhaps nothing left? + if (first > last) + return; + + filter = filter.mid(first, last - first + 1); + + // Is it a regexp filter? + if (filter.length()>2 && filter.startsWith(QLatin1Char('/')) && filter.endsWith(QLatin1Char('/'))) + { + QString inside = filter.mid(1, filter.length()-2); + QRegExp rx(inside); + reFilters.append(rx); +// qDebug() << "R:" << inside; + } + else + { + // Nope, a wildcard one. + // Note: For these, we also need to handle |. + + // Strip wildcards at the ends + first = 0; + last = filter.length() - 1; + + while (first < filter.length() && filter[first] == QLatin1Char('*')) + ++first; + + while (last >= 0 && filter[last] == QLatin1Char('*')) + --last; + + if (first > last) + filter = QLatin1String("*"); // erm... Well, they asked for it. + else + filter = filter.mid(first, last - first + 1); + + // Now, do we still have any wildcard stuff left? + if (filter.contains("*") || filter.contains("?")) + { +// qDebug() << "W:" << filter; + // check if we can use RK first (and then check full RE for the rest) for better performance + int aPos = filter.indexOf('*'); + if (aPos < 0) + aPos = filter.length(); + int qPos = filter.indexOf('?'); + if (qPos < 0) + qPos = filter.length(); + int pos = qMin(aPos, qPos); + if (pos > 7) { + QRegExp rx; + + rx.setPatternSyntax(QRegExp::Wildcard); + rx.setPattern(filter.mid(pos)); + + stringFiltersMatcher.addWildedString(filter.mid(0, pos), rx); + + } else { + QRegExp rx; + + rx.setPatternSyntax(QRegExp::Wildcard); + rx.setPattern(filter); + reFilters.append(rx); + } + } + else + { + // Fast path + stringFiltersMatcher.addString(filter); + } + } +} + +bool FilterSet::isUrlMatched(const QString& url) +{ + if (stringFiltersMatcher.isMatched(url)) + return true; + + for (int c = 0; c < reFilters.size(); ++c) + { + if (url.contains(reFilters[c])) + return true; + } + + return false; +} + +void FilterSet::clear() +{ + reFilters.clear(); + stringFiltersMatcher.clear(); +} + + +void StringsMatcher::addString(const QString& pattern) +{ + if (pattern.length() < 8) { + // handle short string differently + shortStringFilters.append(pattern); + } else { + // use modified Rabin-Karp's algorithm with 8-length string hash + // i.e. store hash of first 8 chars in the HashMap for fast look-up + stringFilters.append(pattern); + int ind = stringFilters.size() - 1; + int current = 0; + + // compute hash using rolling hash + // hash for string: x0,x1,x2...xn-1 will be: + // (p^(n-1)*x0 + p^(n-2)*x1 + ... + p * xn-2 + xn-1) % q + // where p and q some wisely-chosen integers + /*for (int k = 0; k < 8; ++k)*/ + int len = pattern.length(); + for (int k = len - 8; k < len; ++k) + current = (current * HASH_P + pattern[k].unicode()) % HASH_Q; + + // insert computed hash value into HashMap + QHash >::iterator it = stringFiltersHash.find(current + 1); + if (it == stringFiltersHash.end()) { + QVector list; + list.append(ind); + stringFiltersHash.insert(current + 1, list); + fastLookUp.setBit(current); + } else { + it.value().append(ind); + } + } +} + +void StringsMatcher::addWildedString(const QString& prefix, const QRegExp& rx) +{ + rePrefixes.append(prefix); + reFilters.append(rx); + int index = -rePrefixes.size(); + + int current = 0; + for (int k = 0; k < 8; ++k) + current = (current * HASH_P + prefix[k].unicode()) % HASH_Q; + + // insert computed hash value into HashMap + QHash >::iterator it = stringFiltersHash.find(current + 1); + if (it == stringFiltersHash.end()) { + QVector list; + list.append(index); + stringFiltersHash.insert(current + 1, list); + fastLookUp.setBit(current); + } else { + it.value().append(index); + } +} + +bool StringsMatcher::isMatched(const QString& str) const +{ + // check short strings first + for (int i = 0; i < shortStringFilters.size(); ++i) { + if (str.contains(shortStringFilters[i])) + return true; + } + + int len = str.length(); + int k; + + int current = 0; + int next = 0; + // compute hash for first 8 characters + for (k = 0; k < 8 && k < len; ++k) + current = (current * HASH_P + str[k].unicode()) % HASH_Q; + + QHash >::const_iterator hashEnd = stringFiltersHash.end(); + // main Rabin-Karp's algorithm loop + for (k = 7; k < len; ++k, current = next) { + // roll the hash if not at the end + // (calculate hash for the next iteration) + if (k + 1 < len) + next = (HASH_P * ((current + HASH_Q - ((HASH_MOD * str[k - 7].unicode()) % HASH_Q)) % HASH_Q) + str[k + 1].unicode()) % HASH_Q; + + if (!fastLookUp.testBit(current)) + continue; + + // look-up the hash in the HashMap and check all strings + QHash >::const_iterator it = stringFiltersHash.find(current + 1); + + // check possible strings + if (it != hashEnd) { + for (int j = 0; j < it.value().size(); ++j) { + int index = it.value()[j]; + // check if we got simple string or REs prefix + if (index >= 0) { + int flen = stringFilters[index].length(); + if (k - flen + 1 >= 0 && stringFilters[index] == str.midRef(k - flen + 1 , flen)) + return true; + } else { + index = -index - 1; + int flen = rePrefixes[index].length(); + if (k - 8 + flen < len && rePrefixes[index] == str.midRef(k - 7, flen) && + str.indexOf(reFilters[index], k - 7 + flen) == k - 7 + flen) + return true; + } + } + } + } + + return false; +} + +void StringsMatcher::clear() +{ + stringFilters.clear(); + shortStringFilters.clear(); + reFilters.clear(); + rePrefixes.clear(); + stringFiltersHash.clear(); + fastLookUp.resize(HASH_Q); + fastLookUp.fill(0, 0, HASH_Q); +} + +} + +// kate: indent-width 4; replace-tabs on; tab-width 4; space-indent on; diff --git a/src/adblock/khtml_filter_p.h b/src/adblock/khtml_filter_p.h new file mode 100644 index 00000000..4490bbd8 --- /dev/null +++ b/src/adblock/khtml_filter_p.h @@ -0,0 +1,79 @@ +/* This file is part of the KDE project + + Copyright (C) 2005 Ivor Hewitt + Copyright (C) 2008 Maksim Orlovich + Copyright (C) 2008 Vyacheslav Tokarev + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public License + along with this library; see the file COPYING.LIB. If not, write to + the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + Boston, MA 02110-1301, USA. +*/ + +#ifndef KHTML_FILTER_P_H +#define KHTML_FILTER_P_H + +#include +#include +#include +#include +#include + +namespace khtml { + +// Updateable Multi-String Matcher based on Rabin-Karp's algorithm +class StringsMatcher { +public: + // add filter to matching set + void addString(const QString& pattern); + + // check if string match at least one string from matching set + bool isMatched(const QString& str) const; + + // add filter to matching set with wildcards (*,?) in it + void addWildedString(const QString& prefix, const QRegExp& rx); + + void clear(); + +private: + QVector stringFilters; + QVector shortStringFilters; + QVector reFilters; + QVector rePrefixes; + QBitArray fastLookUp; + + QHash > stringFiltersHash; +}; + +// This represents a set of filters that may match URLs. +// Currently it supports a subset of AddBlock Plus functionality. +class FilterSet { +public: + // Parses and registers a filter. This will also strip @@ for exclusion rules, skip comments, etc. + // The user does have to split black and white lists into separate sets, however + void addFilter(const QString& filter); + + bool isUrlMatched(const QString& url); + + void clear(); + +private: + QVector reFilters; + StringsMatcher stringFiltersMatcher; +}; + +} + +#endif // KHTML_FILTER_P_H + +// kate: indent-width 4; replace-tabs on; tab-width 4; space-indent on; -- cgit v1.2.1