diff options
author | Andrea Diamantini <adjam7@gmail.com> | 2009-11-30 14:57:09 +0100 |
---|---|---|
committer | Andrea Diamantini <adjam7@gmail.com> | 2009-11-30 14:57:09 +0100 |
commit | 78da6efbf8a6694cd1f1c11af404b21941099f95 (patch) | |
tree | 1a6fa792012fff7e9b6869e1581de822cf501c1c /src/adblock | |
parent | Porting rekonq to last Qt/KDE API (diff) | |
download | rekonq-78da6efbf8a6694cd1f1c11af404b21941099f95.tar.xz |
We have adblock! (and it works)
I had a lot of problems implementing it because I started working on
assuming 2 things:
1) konqueror implementation works (it's not true, I found a bug! To guess what,
try loading current rekonq vs current konqueror against kde-apps.org)
2) Arora's implementation can be easily ported to kcm technology. Another wrong assumption,
based on MVP implementation.
Sorry for spamming master branch, guys.
Diffstat (limited to 'src/adblock')
-rw-r--r-- | src/adblock/adblockmanager.cpp | 45 | ||||
-rw-r--r-- | src/adblock/adblockmanager.h | 6 | ||||
-rw-r--r-- | src/adblock/adblockrule.cpp | 173 | ||||
-rw-r--r-- | src/adblock/adblockrule.h | 82 | ||||
-rw-r--r-- | src/adblock/khtml_filter.cpp | 266 | ||||
-rw-r--r-- | src/adblock/khtml_filter_p.h | 79 |
6 files changed, 289 insertions, 362 deletions
diff --git a/src/adblock/adblockmanager.cpp b/src/adblock/adblockmanager.cpp index 987c793f..4f4cff51 100644 --- a/src/adblock/adblockmanager.cpp +++ b/src/adblock/adblockmanager.cpp @@ -30,6 +30,7 @@ // Local Includes #include "adblocknetworkreply.h" +#include "adblockrule.h" // KDE Includes #include <KSharedConfig> @@ -64,9 +65,8 @@ void AdBlockManager::loadSettings() _isAdblockEnabled = cg.readEntry("Enabled", false); _isHideAdsEnabled = cg.readEntry("Shrink", false); - _adBlackList.clear(); - _adWhiteList.clear(); - + filterList.clear(); + QMap<QString,QString> entryMap = cg.entryMap(); QMap<QString,QString>::ConstIterator it; for( it = entryMap.constBegin(); it != entryMap.constEnd(); ++it ) @@ -76,10 +76,7 @@ void AdBlockManager::loadSettings() if (name.startsWith(QLatin1String("Filter"))) { - if (url.startsWith(QLatin1String("@@"))) - _adWhiteList.addFilter(url); - else - _adBlackList.addFilter(url); + filterList << url; } } } @@ -91,13 +88,37 @@ QNetworkReply *AdBlockManager::block(const QNetworkRequest &request) if (!_isAdblockEnabled) return 0; - QString urlString = request.url().toString(); + // we (ad)block just http traffic + if(request.url().scheme() != QLatin1String("http")) + return 0; - // Check the blacklist, and only if that matches, the whitelist - if(_adBlackList.isUrlMatched(urlString) && !_adWhiteList.isUrlMatched(urlString)) + QString urlString = request.url().toString(); + kDebug() << "****************************** ADBLOCK: Matching url: "<< urlString; + + foreach(const QString &filter, filterList) { - AdBlockNetworkReply *reply = new AdBlockNetworkReply(request, urlString, this); - return reply; + AdBlockRule rule(filter); + if(rule.match(urlString)) + { + kDebug() << "****ADBLOCK: Matched: **************************"; + AdBlockNetworkReply *reply = new AdBlockNetworkReply(request, urlString, this); + return reply; + } } + + + + // Check the blacklist, and only if that matches, the whitelist + + + + + +// if(_adBlackList.isUrlMatched(urlString) && !_adWhiteList.isUrlMatched(urlString)) +// { +// kDebug() << "****ADBLOCK: Matched: **************************"; +// AdBlockNetworkReply *reply = new AdBlockNetworkReply(request, urlString, this); +// return reply; +// } return 0; } diff --git a/src/adblock/adblockmanager.h b/src/adblock/adblockmanager.h index 5b47c1da..32f123fd 100644 --- a/src/adblock/adblockmanager.h +++ b/src/adblock/adblockmanager.h @@ -29,9 +29,6 @@ #define ADBLOCK_MANAGER_H -// Local Includes -#include "khtml_filter_p.h" - // Qt Includes #include <QObject> #include <QStringList> @@ -56,8 +53,7 @@ private: bool _isAdblockEnabled; bool _isHideAdsEnabled; - khtml::FilterSet _adBlackList; - khtml::FilterSet _adWhiteList; + QStringList filterList; }; #endif diff --git a/src/adblock/adblockrule.cpp b/src/adblock/adblockrule.cpp new file mode 100644 index 00000000..870ad825 --- /dev/null +++ b/src/adblock/adblockrule.cpp @@ -0,0 +1,173 @@ +/* + * Copyright (c) 2009, Zsombor Gegesy <gzsombor@gmail.com> + * Copyright (c) 2009, Benjamin C. Meyer <ben@meyerhome.net> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the Benjamin Meyer nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * ============================================================ + * + * This file is a part of the rekonq project + * + * Copyright (C) 2009 by Andrea Diamantini <adjam7 at gmail dot com> + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License or (at your option) version 3 or any later version + * accepted by the membership of KDE e.V. (or its successor approved + * by the membership of KDE e.V.), which shall act as a proxy + * defined in Section 14 of version 3 of the license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * ============================================================ */ + + +#include "adblockrule.h" + +#include <QDebug> +#include <QRegExp> +#include <QUrl> + + +AdBlockRule::AdBlockRule(const QString &filter) + : m_cssRule(false) + , m_exceptionRule(false) + , m_enabledRule(true) +{ + bool isRegExpRule = false; + + if (filter.startsWith(QLatin1String("!")) || filter.trimmed().isEmpty()) + m_enabledRule = false; + + if (filter.contains(QLatin1String("##"))) + m_cssRule = true; + + QString parsedLine = filter; + if (parsedLine.startsWith(QLatin1String("@@"))) + { + m_exceptionRule = true; + parsedLine = parsedLine.mid(2); + } + + if (parsedLine.startsWith(QLatin1Char('/'))) + { + if (parsedLine.endsWith(QLatin1Char('/'))) + { + parsedLine = parsedLine.mid(1); + parsedLine = parsedLine.left(parsedLine.size() - 1); + isRegExpRule = true; + } + } + + int options = parsedLine.indexOf(QLatin1String("$"), 0); + if (options >= 0) + { + m_options = parsedLine.mid(options + 1).split(QLatin1Char(',')); + parsedLine = parsedLine.left(options); + } + + if(!isRegExpRule) + parsedLine = convertPatternToRegExp(parsedLine); + m_regExp = QRegExp(parsedLine, Qt::CaseInsensitive, QRegExp::RegExp2); + + if (m_options.contains(QLatin1String("match-case"))) + { + m_regExp.setCaseSensitivity(Qt::CaseSensitive); + m_options.removeOne(QLatin1String("match-case")); + } +} + + +// here return false means that rule doesn't match, +// so that url is allowed +// return true means "matched rule", so stop url! +bool AdBlockRule::match(const QString &encodedUrl) const +{ + if (m_cssRule) + return false; + + if (!m_enabledRule) + return false; + + bool matched = m_regExp.indexIn(encodedUrl) != -1; + + if (matched && !m_options.isEmpty()) + { + // we only support domain right now + if (m_options.count() == 1) + { + foreach (const QString &option, m_options) + { + if (option.startsWith(QLatin1String("domain="))) + { + QUrl url = QUrl::fromEncoded(encodedUrl.toUtf8()); + QString host = url.host(); + QStringList domainOptions = option.mid(7).split(QLatin1Char('|')); + foreach (QString domainOption, domainOptions) + { + bool negate = domainOption.at(0) == QLatin1Char('~'); + if (negate) + domainOption = domainOption.mid(1); + bool hostMatched = domainOption == host; + if (hostMatched && !negate) + return true; + if (!hostMatched && negate) + return true; + } + } + } + } + return false; + } + + return matched; +} + + +QString AdBlockRule::convertPatternToRegExp(const QString &wildcardPattern) +{ + QString pattern = wildcardPattern; + return pattern.replace(QRegExp(QLatin1String("\\*+")), QLatin1String("*")) // remove multiple wildcards + .replace(QRegExp(QLatin1String("\\^\\|$")), QLatin1String("^")) // remove anchors following separator placeholder + .replace(QRegExp(QLatin1String("^(\\*)")), QLatin1String("")) // remove leading wildcards + .replace(QRegExp(QLatin1String("(\\*)$")), QLatin1String("")) // remove trailing wildcards + .replace(QRegExp(QLatin1String("(\\W)")), QLatin1String("\\\\1")) // escape special symbols + .replace(QRegExp(QLatin1String("^\\\\\\|\\\\\\|")), + QLatin1String("^[\\w\\-]+:\\/+(?!\\/)(?:[^\\/]+\\.)?")) // process extended anchor at expression start + .replace(QRegExp(QLatin1String("\\\\\\^")), + QLatin1String("(?:[^\\w\\d\\-.%]|$)")) // process separator placeholders + .replace(QRegExp(QLatin1String("^\\\\\\|")), QLatin1String("^")) // process anchor at expression start + .replace(QRegExp(QLatin1String("\\\\\\|$")), QLatin1String("$")) // process anchor at expression end + .replace(QRegExp(QLatin1String("\\\\\\*")), QLatin1String(".*")) // replace wildcards by .* + ; +} diff --git a/src/adblock/adblockrule.h b/src/adblock/adblockrule.h new file mode 100644 index 00000000..3f1bd8bf --- /dev/null +++ b/src/adblock/adblockrule.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2009, Benjamin C. Meyer <ben@meyerhome.net> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the Benjamin Meyer nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * ============================================================ + * + * This file is a part of the rekonq project + * + * Copyright (C) 2009 by Andrea Diamantini <adjam7 at gmail dot com> + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License or (at your option) version 3 or any later version + * accepted by the membership of KDE e.V. (or its successor approved + * by the membership of KDE e.V.), which shall act as a proxy + * defined in Section 14 of version 3 of the license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * ============================================================ */ + + +#ifndef ADBLOCKRULE_H +#define ADBLOCKRULE_H + +// Qt Includes +#include <QStringList> + +// Forward Includes +class QUrl; +class QRegExp; + + +class AdBlockRule +{ +public: + AdBlockRule(const QString &filter); + + bool match(const QString &encodedUrl) const; + +private: + QString convertPatternToRegExp(const QString &wildcardPattern); + + bool m_cssRule; + bool m_exceptionRule; + bool m_enabledRule; + QRegExp m_regExp; + QStringList m_options; +}; + +#endif // ADBLOCKRULE_H diff --git a/src/adblock/khtml_filter.cpp b/src/adblock/khtml_filter.cpp deleted file mode 100644 index f258d1e7..00000000 --- a/src/adblock/khtml_filter.cpp +++ /dev/null @@ -1,266 +0,0 @@ -/* This file is part of the KDE project - - Copyright (C) 2005 Ivor Hewitt <ivor@kde.org> - Copyright (C) 2008 Maksim Orlovich <maksim@kde.org> - Copyright (C) 2008 Vyacheslav Tokarev <tsjoker@gmail.com> - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Library General Public - License as published by the Free Software Foundation; either - version 2 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Library General Public License for more details. - - You should have received a copy of the GNU Library General Public License - along with this library; see the file COPYING.LIB. If not, write to - the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - Boston, MA 02110-1301, USA. -*/ - -#include "khtml_filter_p.h" -#include <QDebug> - -// rolling hash parameters -#define HASH_P (1997) -#define HASH_Q (17509) -// HASH_MOD = (HASH_P^7) % HASH_Q -#define HASH_MOD (523) - -namespace khtml { - -void FilterSet::addFilter(const QString& filterStr) -{ - QString filter = filterStr; - - if (filter.startsWith(QLatin1Char('!'))) - return; - - // Strip leading @@ - int first = 0; - int last = filter.length() - 1; - if (filter.startsWith(QLatin1String("@@"))) - first = 2; - - // Strip options, we ignore them for now. - int dollar = filter.lastIndexOf(QLatin1Char('$')); - if (dollar != -1) - last = dollar - 1; - - // Perhaps nothing left? - if (first > last) - return; - - filter = filter.mid(first, last - first + 1); - - // Is it a regexp filter? - if (filter.length()>2 && filter.startsWith(QLatin1Char('/')) && filter.endsWith(QLatin1Char('/'))) - { - QString inside = filter.mid(1, filter.length()-2); - QRegExp rx(inside); - reFilters.append(rx); -// qDebug() << "R:" << inside; - } - else - { - // Nope, a wildcard one. - // Note: For these, we also need to handle |. - - // Strip wildcards at the ends - first = 0; - last = filter.length() - 1; - - while (first < filter.length() && filter[first] == QLatin1Char('*')) - ++first; - - while (last >= 0 && filter[last] == QLatin1Char('*')) - --last; - - if (first > last) - filter = QLatin1String("*"); // erm... Well, they asked for it. - else - filter = filter.mid(first, last - first + 1); - - // Now, do we still have any wildcard stuff left? - if (filter.contains("*") || filter.contains("?")) - { -// qDebug() << "W:" << filter; - // check if we can use RK first (and then check full RE for the rest) for better performance - int aPos = filter.indexOf('*'); - if (aPos < 0) - aPos = filter.length(); - int qPos = filter.indexOf('?'); - if (qPos < 0) - qPos = filter.length(); - int pos = qMin(aPos, qPos); - if (pos > 7) { - QRegExp rx; - - rx.setPatternSyntax(QRegExp::Wildcard); - rx.setPattern(filter.mid(pos)); - - stringFiltersMatcher.addWildedString(filter.mid(0, pos), rx); - - } else { - QRegExp rx; - - rx.setPatternSyntax(QRegExp::Wildcard); - rx.setPattern(filter); - reFilters.append(rx); - } - } - else - { - // Fast path - stringFiltersMatcher.addString(filter); - } - } -} - -bool FilterSet::isUrlMatched(const QString& url) -{ - if (stringFiltersMatcher.isMatched(url)) - return true; - - for (int c = 0; c < reFilters.size(); ++c) - { - if (url.contains(reFilters[c])) - return true; - } - - return false; -} - -void FilterSet::clear() -{ - reFilters.clear(); - stringFiltersMatcher.clear(); -} - - -void StringsMatcher::addString(const QString& pattern) -{ - if (pattern.length() < 8) { - // handle short string differently - shortStringFilters.append(pattern); - } else { - // use modified Rabin-Karp's algorithm with 8-length string hash - // i.e. store hash of first 8 chars in the HashMap for fast look-up - stringFilters.append(pattern); - int ind = stringFilters.size() - 1; - int current = 0; - - // compute hash using rolling hash - // hash for string: x0,x1,x2...xn-1 will be: - // (p^(n-1)*x0 + p^(n-2)*x1 + ... + p * xn-2 + xn-1) % q - // where p and q some wisely-chosen integers - /*for (int k = 0; k < 8; ++k)*/ - int len = pattern.length(); - for (int k = len - 8; k < len; ++k) - current = (current * HASH_P + pattern[k].unicode()) % HASH_Q; - - // insert computed hash value into HashMap - QHash<int, QVector<int> >::iterator it = stringFiltersHash.find(current + 1); - if (it == stringFiltersHash.end()) { - QVector<int> list; - list.append(ind); - stringFiltersHash.insert(current + 1, list); - fastLookUp.setBit(current); - } else { - it.value().append(ind); - } - } -} - -void StringsMatcher::addWildedString(const QString& prefix, const QRegExp& rx) -{ - rePrefixes.append(prefix); - reFilters.append(rx); - int index = -rePrefixes.size(); - - int current = 0; - for (int k = 0; k < 8; ++k) - current = (current * HASH_P + prefix[k].unicode()) % HASH_Q; - - // insert computed hash value into HashMap - QHash<int, QVector<int> >::iterator it = stringFiltersHash.find(current + 1); - if (it == stringFiltersHash.end()) { - QVector<int> list; - list.append(index); - stringFiltersHash.insert(current + 1, list); - fastLookUp.setBit(current); - } else { - it.value().append(index); - } -} - -bool StringsMatcher::isMatched(const QString& str) const -{ - // check short strings first - for (int i = 0; i < shortStringFilters.size(); ++i) { - if (str.contains(shortStringFilters[i])) - return true; - } - - int len = str.length(); - int k; - - int current = 0; - int next = 0; - // compute hash for first 8 characters - for (k = 0; k < 8 && k < len; ++k) - current = (current * HASH_P + str[k].unicode()) % HASH_Q; - - QHash<int, QVector<int> >::const_iterator hashEnd = stringFiltersHash.end(); - // main Rabin-Karp's algorithm loop - for (k = 7; k < len; ++k, current = next) { - // roll the hash if not at the end - // (calculate hash for the next iteration) - if (k + 1 < len) - next = (HASH_P * ((current + HASH_Q - ((HASH_MOD * str[k - 7].unicode()) % HASH_Q)) % HASH_Q) + str[k + 1].unicode()) % HASH_Q; - - if (!fastLookUp.testBit(current)) - continue; - - // look-up the hash in the HashMap and check all strings - QHash<int, QVector<int> >::const_iterator it = stringFiltersHash.find(current + 1); - - // check possible strings - if (it != hashEnd) { - for (int j = 0; j < it.value().size(); ++j) { - int index = it.value()[j]; - // check if we got simple string or REs prefix - if (index >= 0) { - int flen = stringFilters[index].length(); - if (k - flen + 1 >= 0 && stringFilters[index] == str.midRef(k - flen + 1 , flen)) - return true; - } else { - index = -index - 1; - int flen = rePrefixes[index].length(); - if (k - 8 + flen < len && rePrefixes[index] == str.midRef(k - 7, flen) && - str.indexOf(reFilters[index], k - 7 + flen) == k - 7 + flen) - return true; - } - } - } - } - - return false; -} - -void StringsMatcher::clear() -{ - stringFilters.clear(); - shortStringFilters.clear(); - reFilters.clear(); - rePrefixes.clear(); - stringFiltersHash.clear(); - fastLookUp.resize(HASH_Q); - fastLookUp.fill(0, 0, HASH_Q); -} - -} - -// kate: indent-width 4; replace-tabs on; tab-width 4; space-indent on; diff --git a/src/adblock/khtml_filter_p.h b/src/adblock/khtml_filter_p.h deleted file mode 100644 index 4490bbd8..00000000 --- a/src/adblock/khtml_filter_p.h +++ /dev/null @@ -1,79 +0,0 @@ -/* This file is part of the KDE project - - Copyright (C) 2005 Ivor Hewitt <ivor@kde.org> - Copyright (C) 2008 Maksim Orlovich <maksim@kde.org> - Copyright (C) 2008 Vyacheslav Tokarev <tsjoker@gmail.com> - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Library General Public - License as published by the Free Software Foundation; either - version 2 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Library General Public License for more details. - - You should have received a copy of the GNU Library General Public License - along with this library; see the file COPYING.LIB. If not, write to - the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - Boston, MA 02110-1301, USA. -*/ - -#ifndef KHTML_FILTER_P_H -#define KHTML_FILTER_P_H - -#include <QString> -#include <QRegExp> -#include <QVector> -#include <QHash> -#include <QBitArray> - -namespace khtml { - -// Updateable Multi-String Matcher based on Rabin-Karp's algorithm -class StringsMatcher { -public: - // add filter to matching set - void addString(const QString& pattern); - - // check if string match at least one string from matching set - bool isMatched(const QString& str) const; - - // add filter to matching set with wildcards (*,?) in it - void addWildedString(const QString& prefix, const QRegExp& rx); - - void clear(); - -private: - QVector<QString> stringFilters; - QVector<QString> shortStringFilters; - QVector<QRegExp> reFilters; - QVector<QString> rePrefixes; - QBitArray fastLookUp; - - QHash<int, QVector<int> > stringFiltersHash; -}; - -// This represents a set of filters that may match URLs. -// Currently it supports a subset of AddBlock Plus functionality. -class FilterSet { -public: - // Parses and registers a filter. This will also strip @@ for exclusion rules, skip comments, etc. - // The user does have to split black and white lists into separate sets, however - void addFilter(const QString& filter); - - bool isUrlMatched(const QString& url); - - void clear(); - -private: - QVector<QRegExp> reFilters; - StringsMatcher stringFiltersMatcher; -}; - -} - -#endif // KHTML_FILTER_P_H - -// kate: indent-width 4; replace-tabs on; tab-width 4; space-indent on; |