aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.hgignore1
-rw-r--r--CMakeLists.txt4
-rw-r--r--doc/UrlFilter.md91
-rw-r--r--lib/web/CMakeLists.txt10
-rw-r--r--lib/web/urlfilter/filterdomain.cpp62
-rw-r--r--lib/web/urlfilter/filterdomain.h33
-rw-r--r--lib/web/urlfilter/filterrule.cpp88
-rw-r--r--lib/web/urlfilter/filterrule.h68
-rw-r--r--src/webengine/urlinterceptor.cpp9
-rw-r--r--test/CMakeLists.txt19
-rw-r--r--test/HostlistTest.cpp25
-rw-r--r--test/HostlistTest.h16
-rw-r--r--test/autotests.qrc5
-rw-r--r--test/data/hostlist.txt2
-rw-r--r--test/urlfilter/urlfiltertest.cpp88
-rw-r--r--test/urlfilter/urlfiltertest.h41
16 files changed, 496 insertions, 66 deletions
diff --git a/.hgignore b/.hgignore
index 39716ac..dab1be2 100644
--- a/.hgignore
+++ b/.hgignore
@@ -11,6 +11,7 @@ build*
# qtcreator
*.user
+lang/*.qm
test/plugins.d
3rd-party/*/.git
diff --git a/CMakeLists.txt b/CMakeLists.txt
index f0f48c9..a123603 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -8,7 +8,7 @@ option(MercurialRepo "Get version information from .hg" ON)
option(CompilerWarnings "Compiler warnings" ON)
option(QtDeprecatedWarnings "Qt deprecated warnings" ON)
option(UseLibCpp "Use libc++ over stdlibc++ (requires clang)" OFF)
-option(Tests "Enable/disable some basic autotests" ON)
+option(Tests "Enable/disable some basic autotests" OFF)
option(Plasma "Enable some fancy effects on Plasma" OFF)
# Libraries
@@ -86,7 +86,7 @@ add_subdirectory(src)
if (Tests)
enable_testing()
find_package(Qt5 COMPONENTS Test REQUIRED)
- #add_subdirectory(test)
+ add_subdirectory(test)
endif()
message("Version='${VerInfo}' bookmark='${VerBookmark}' commit='${VerCommit}'")
diff --git a/doc/UrlFilter.md b/doc/UrlFilter.md
new file mode 100644
index 0000000..cea2397
--- /dev/null
+++ b/doc/UrlFilter.md
@@ -0,0 +1,91 @@
+## FilterDomain
+Filter Domains are groups of domains that can point to one or more filter
+rules.
+
+### Types
+There are 4 Filter Domain types:
+- AllowOnDomains - only match specified domains
+- BlockOnDomains - match all but specified domains
+- AllowOnAllDomains - match all domains
+- BlockOnAllDomains - match no domains
+
+### JSON
+ {
+ "type" : "AllowOnDomains",
+ "domains" : [ "example.com", "test.example.com" ],
+ "rules" : []
+ }
+
+## FilterRule
+Filter rules contain information on how a request should be modified.
+
+### "action"
+- Whitelist - allow this request
+- Blacklist - block this request
+- Redirect - redirect this request
+- SetHeader - apply a list of headers
+
+### "regexp", "contains", "endswith"
+
+### JSON
+Allow all URLs that contain "waifu.png"
+ {
+ "action" : "Whitelist"
+ "contains" : "waifu.png"
+ }
+
+Block specific URL
+ {
+ "action" : "Blacklist"
+ "equals" : "example.com/annoying-ad/masquerade/waifu.png"
+ }
+
+Block all URLs that contain "banner.gif"
+ {
+ "action" : "Blacklist",
+ "contains" : "banner.gif"
+ }
+
+Redirect URLs containing "ads/annoying-spam.gif" to "waifu.tld/waifu.gif"
+ {
+ "action" : "Redirect"
+ "contains" : "ads/annoying-spam.gif"
+ "url" : "waifu.tld/waifu.gif"
+ }
+
+Set some headers
+ {
+ "action" : "SetHeader"
+ "header" : [ "DNT" : "1" ]
+ }
+
+## QWebEngineUrlRequestInterceptor
+
+All network requests pass through the request interceptor. It gives the
+following information:
+
+- firstPartyUrl - the page on which the request is made
+- requestUrl - the url of the request
+- requestMethod
+- resourceType
+- navigationType
+
+And provides the following methods:
+
+- block (bool shouldBlock) - can block the request
+- redirect (const QUrl) - can redirect the requestUrl
+- setHttpHeader - can set HTTP headers (such as user agent and do not track)
+
+### Example
+This is a sample request made when loading DuckDuckGo:
+
+ firstPartyUrl=https://duckduckgo.com/
+ requestUrl=https://duckduckgo.com/o1635.css
+
+## How the filter works
+When a requst comes in, the interceptor extracts the host of the request and
+matches it against the list of FilterDomains.
+
+ firstPartyHost=duckduckgo.com
+ requestHost=duckduckgo.com
+
diff --git a/lib/web/CMakeLists.txt b/lib/web/CMakeLists.txt
index c5d5eba..601c1d5 100644
--- a/lib/web/CMakeLists.txt
+++ b/lib/web/CMakeLists.txt
@@ -7,11 +7,11 @@ set(CMAKE_AUTOMOC ON)
add_library(web
webprofile.cpp
webprofile.h
-)
-#target_include_directories(web
-# PRIVATE ${Boost_INCLUDE_DIRS}
-# PRIVATE ..
-#)
+ urlfilter/filterdomain.cpp
+ urlfilter/filterdomain.h
+ urlfilter/filterrule.cpp
+ urlfilter/filterrule.h
+)
target_link_libraries(web Qt5::WebEngineWidgets)
diff --git a/lib/web/urlfilter/filterdomain.cpp b/lib/web/urlfilter/filterdomain.cpp
new file mode 100644
index 0000000..53bc7db
--- /dev/null
+++ b/lib/web/urlfilter/filterdomain.cpp
@@ -0,0 +1,62 @@
+#include "filterdomain.h"
+#include <QVector>
+
+bool isMatchingDomain(const QString &domain, const QString &filter)
+{
+ // domain and filter are the same
+ if(domain == filter) {
+ return true;
+ }
+
+ // domain can't be matched by filter if it doesn't end with filter
+ // ex. example2.com isn't matched by example.com
+ if(!domain.endsWith(filter)) {
+ return false;
+ }
+
+ // match with subdomains
+ // ex. subdomain.example.com is matched by example.com
+ int index = domain.indexOf(filter);
+
+ // match if (domain ends with filter) && (filter has been found) and (character before filter is '.')
+ return index > 0 && domain[index - 1] == QLatin1Char('.');
+}
+
+FilterDomain::FilterDomain(MatchType type, QObject *parent)
+ : QObject(parent)
+{
+ setType(type);
+}
+
+void FilterDomain::setType(MatchType type)
+{
+ m_type = type;
+}
+
+void FilterDomain::addDomain(const QString &domain)
+{
+ if(!domain.isEmpty())
+ m_domains.append(domain);
+}
+
+bool FilterDomain::hasMatch(const QString &host) const
+{
+ // match all domains -> this rule applies to all domains
+ if(m_type == WhitelistAll)
+ return true;
+
+ // match no domains -> this rule applies to no domains
+ if(m_type == BlacklistAll)
+ return false;
+
+ // is this a whitelist or blacklist domain rule?
+ // should it apply to added domains or not
+ const bool whitelist = (m_type == Whitelist);
+
+ for(const auto &domain : qAsConst(m_domains)) {
+ if(isMatchingDomain(host, domain))
+ return whitelist;
+ }
+
+ return !whitelist;
+}
diff --git a/lib/web/urlfilter/filterdomain.h b/lib/web/urlfilter/filterdomain.h
new file mode 100644
index 0000000..2173bfc
--- /dev/null
+++ b/lib/web/urlfilter/filterdomain.h
@@ -0,0 +1,33 @@
+#ifndef FILTERDOMAIN_H
+#define FILTERDOMAIN_H
+
+#include <QObject>
+#include <QVector>
+
+class FilterDomain : public QObject
+{
+ Q_OBJECT
+public:
+ enum MatchType {
+ Whitelist, // only match added domains
+ Blacklist, // only match domains not added
+ WhitelistAll, // match all domains
+ BlacklistAll // match no domains
+ };
+
+ explicit FilterDomain(MatchType type = Whitelist, QObject *parent = nullptr);
+
+ void setType(MatchType type);
+ void addDomain(const QString &domain);
+
+ bool hasMatch(const QString &host) const;
+
+private:
+ MatchType m_type;
+ QVector<QString> m_domains;
+};
+
+// function taken from KDE/Falkon
+bool isMatchingDomain(const QString &domain, const QString &filter);
+
+#endif // FILTERDOMAIN_H
diff --git a/lib/web/urlfilter/filterrule.cpp b/lib/web/urlfilter/filterrule.cpp
new file mode 100644
index 0000000..5a9310e
--- /dev/null
+++ b/lib/web/urlfilter/filterrule.cpp
@@ -0,0 +1,88 @@
+#include "filterrule.h"
+#include <QWebEngineUrlRequestInfo>
+#include <QJsonArray>
+
+FilterRule::FilterRule(const QJsonObject &filter)
+{
+ const QString action = filter.value("action").toString();
+
+ // there is no action specified => this rule is invalid
+ if(action.isEmpty())
+ return;
+
+ if(action == "Whitelist")
+ m_action = ActionType::Whitelist;
+ else if (action == "Blacklist")
+ m_action = ActionType::Blacklist;
+ else if (action == "Redirect") {
+ m_action = ActionType::Redirect;
+ m_redirectUrl = QUrl::fromUserInput(filter.value("url").toString());
+ } else if (action == "SetHeader")
+ m_action = ActionType::SetHeader;
+ else // invalid action
+ return;
+
+ QJsonValue regexp = filter.value("regexp");
+ QJsonValue endswith = filter.value("endswith");
+ QJsonValue contains = filter.value("contains");
+
+ if(!regexp.isUndefined()) {
+ m_type = RuleType::RegExpMatchRule;
+ this->regexp.setPattern(regexp.toString());
+ } else if(!endswith.isUndefined()) {
+ m_type = RuleType::StringEndsMatchRule;
+ pattern = endswith.toString();
+ } else if(!contains.isUndefined()) {
+ m_type = RuleType::StringContainsMatchRule;
+ this->matcher.setPattern(contains.toString());
+ this->matcher.setCaseSensitivity(Qt::CaseInsensitive);
+ } else // invalid rule
+ return;
+
+ m_options.insert(QWebEngineUrlRequestInfo::ResourceTypeImage, true);
+}
+
+bool FilterRule::isValid() const
+{
+ return m_type != RuleType::Invalid;
+}
+
+bool FilterRule::process(QWebEngineUrlRequestInfo &info) const
+{
+ Q_ASSERT(m_type != RuleType::Invalid);
+
+ if(matchRequestUrl(info.requestUrl().toString(), info.resourceType())) {
+ switch (m_action) {
+ case ActionType::Whitelist:
+ info.block(false);
+ return true;
+ case ActionType::Blacklist:
+ info.block(true);
+ return true;
+ case ActionType::Redirect:
+ info.redirect(m_redirectUrl);
+ return true;
+ case ActionType::SetHeader:
+ break;
+ }
+ }
+
+ return false;
+}
+
+bool FilterRule::matchRequestUrl(const QString &requestUrl, const QWebEngineUrlRequestInfo::ResourceType type) const
+{
+ if(!m_options.contains(type))
+ return false;
+
+ switch (m_type) {
+ case RuleType::RegExpMatchRule:
+ return regexp.match(requestUrl).hasMatch();
+ case RuleType::StringEndsMatchRule:
+ return requestUrl.endsWith(pattern);
+ case RuleType::StringContainsMatchRule:
+ return matcher.indexIn(requestUrl) != -1;
+ default:
+ return false;
+ }
+}
diff --git a/lib/web/urlfilter/filterrule.h b/lib/web/urlfilter/filterrule.h
new file mode 100644
index 0000000..46690b1
--- /dev/null
+++ b/lib/web/urlfilter/filterrule.h
@@ -0,0 +1,68 @@
+#ifndef SMOLBOTE_FILTERRULE_H
+#define SMOLBOTE_FILTERRULE_H
+
+#include <QObject>
+#include <QRegularExpression>
+#include <QStringList>
+#include <QStringMatcher>
+#include <QJsonObject>
+#include <QUrl>
+#include <QWebEngineUrlRequestInfo>
+
+class QUrl;
+class FilterRule
+{
+public:
+ FilterRule(const QJsonObject &filter);
+ ~FilterRule() = default;
+
+ bool isValid() const;
+ bool process(QWebEngineUrlRequestInfo &info) const;
+ bool matchRequestUrl(const QString &requestUrl, const QWebEngineUrlRequestInfo::ResourceType type) const;
+
+private:
+ Q_DISABLE_COPY(FilterRule)
+
+ enum ActionType {
+ Whitelist,
+ Blacklist,
+ Redirect,
+ SetHeader
+ };
+
+ enum RuleType {
+ CssRule = 0, //
+ DomainMatchRule = 1, //
+ RegExpMatchRule = 2, // match request url with regexp
+ StringEndsMatchRule = 3, // request url ends with string
+ StringContainsMatchRule = 4, // request url contains string
+ MatchAllUrlsRule = 5, //
+ Invalid = 6
+ };
+
+ ActionType m_action;
+ RuleType m_type = RuleType::Invalid;
+
+ QHash<QWebEngineUrlRequestInfo::ResourceType, bool> m_options;
+
+ // Parsed rule for string matching (CSS Selector for CSS rules)
+ QString m_matchString;
+ // Case sensitivity for string matching
+ Qt::CaseSensitivity m_caseSensitivity = Qt::CaseInsensitive;
+
+ bool m_isException = false;
+
+ // domains this rule is allowed or blocked on
+ QStringList m_allowedForDomains;
+ QStringList m_blockedForDomains;
+
+ QUrl m_redirectUrl;
+
+ QRegularExpression regexp;
+ QStringMatcher matcher;
+ QString pattern;
+};
+
+//bool isMatchingDomain(const QString &domain, const QString &filter);
+
+#endif // SMOLBOTE_FILTERRULE_H
diff --git a/src/webengine/urlinterceptor.cpp b/src/webengine/urlinterceptor.cpp
index 70d7701..4e1b2f1 100644
--- a/src/webengine/urlinterceptor.cpp
+++ b/src/webengine/urlinterceptor.cpp
@@ -47,6 +47,15 @@ void UrlRequestInterceptor::interceptRequest(QWebEngineUrlRequestInfo &info)
if(rules.contains(info.requestUrl().host())) {
info.block(rules.value(info.requestUrl().host()).isBlocking);
}
+
+#ifdef QT_DEBUG
+ qDebug("request>>>");
+ qDebug("firstParty url=%s", qUtf8Printable(info.firstPartyUrl().toString()));
+ qDebug("firstParty host=%s", qUtf8Printable(info.firstPartyUrl().host()));
+ qDebug("request url=%s", qUtf8Printable(info.requestUrl().toString()));
+ qDebug("request host=%s", qUtf8Printable(info.requestUrl().host()));
+ qDebug("<<<");
+#endif
}
QHash<QString, UrlRequestInterceptor::HostRule> parse(const QString &filename)
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index ad2ca1f..a44da07 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -2,16 +2,13 @@ set(CMAKE_INCLUDE_CURRENT_DIR ON)
set(CMAKE_AUTOMOC ON)
set(CMAKE_AUTORCC ON)
-macro(create_test testname)
- add_executable(${testname}
- ${testname}.cpp ${testname}.h
- autotests.qrc
- ${ARGN})
+add_executable(UrlFilterTest
+ urlfilter/urlfiltertest.cpp
+ urlfilter/urlfiltertest.h
+)
- target_include_directories(${testname} PRIVATE ../lib PRIVATE ../src)
- target_link_libraries(${testname} Qt5::Test Qt5::Concurrent Qt5::WebEngineWidgets)
+target_include_directories(UrlFilterTest
+ PRIVATE ../lib/web
+)
- add_test(NAME smolbote-${testname} COMMAND ${testname})
-endmacro()
-
-create_test(HostlistTest ../src/webengine/urlinterceptor.cpp ../src/webengine/urlinterceptor.h)
+target_link_libraries(UrlFilterTest Qt5::Test web)
diff --git a/test/HostlistTest.cpp b/test/HostlistTest.cpp
deleted file mode 100644
index 31ae11c..0000000
--- a/test/HostlistTest.cpp
+++ /dev/null
@@ -1,25 +0,0 @@
-#include "HostlistTest.h"
-
-void HostlistTest::initTestCase()
-{
- rules = parse(":/autotests/data/hostlist.txt");
-}
-
-void HostlistTest::parse_ruleCount()
-{
- QVERIFY(rules.count() == 3);
-}
-
-void HostlistTest::parse_blockSomehost()
-{
- QVERIFY(rules.contains("somehost.org"));
- QVERIFY(rules.value("somehost.org").isBlocking);
-}
-
-void HostlistTest::parse_blockHost2()
-{
- QVERIFY(rules.contains("host2.org"));
- QVERIFY(rules.value("host2.org").isBlocking);
-}
-
-QTEST_MAIN(HostlistTest)
diff --git a/test/HostlistTest.h b/test/HostlistTest.h
deleted file mode 100644
index dcfd5a3..0000000
--- a/test/HostlistTest.h
+++ /dev/null
@@ -1,16 +0,0 @@
-#include <QtTest/QtTest>
-#include "webengine/urlinterceptor.h"
-
-class HostlistTest : public QObject
-{
- Q_OBJECT
-
-private slots:
- void initTestCase();
- void parse_ruleCount();
- void parse_blockSomehost();
- void parse_blockHost2();
-
-private:
- QHash<QString, UrlRequestInterceptor::HostRule> rules;
-};
diff --git a/test/autotests.qrc b/test/autotests.qrc
deleted file mode 100644
index 5817c00..0000000
--- a/test/autotests.qrc
+++ /dev/null
@@ -1,5 +0,0 @@
-<RCC>
- <qresource prefix="/autotests">
- <file>data/hostlist.txt</file>
- </qresource>
-</RCC> \ No newline at end of file
diff --git a/test/data/hostlist.txt b/test/data/hostlist.txt
deleted file mode 100644
index d228e1d..0000000
--- a/test/data/hostlist.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-0.0.0.0 somehost.org
-0.0.0.0 host1.org host2.org
diff --git a/test/urlfilter/urlfiltertest.cpp b/test/urlfilter/urlfiltertest.cpp
new file mode 100644
index 0000000..f7ae0fb
--- /dev/null
+++ b/test/urlfilter/urlfiltertest.cpp
@@ -0,0 +1,88 @@
+/* ============================================================
+* Falkon - Qt web browser
+* Copyright (C) 2013-2018 David Rosca <nowrep@gmail.com>
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+* ============================================================ */
+
+#include "urlfiltertest.h"
+#include <QtTest/QtTest>
+#include <QUrl>
+
+#include <QJsonObject>
+
+void UrlFilterTest::matchingDomain_data()
+{
+ filterDomain.addDomain("example.com");
+ filterDomain.addDomain("second-domain.org");
+
+ // Test adapted from Falkon
+ QTest::addColumn<QString>("domain");
+ QTest::addColumn<bool>("result");
+
+ // description site domain result
+ QTest::newRow("missing tld") << "example" << false;
+ QTest::newRow("different tld") << "example.org" << false;
+ QTest::newRow("exact match") << "example.com" << true;
+ QTest::newRow("exact match 2") << "second-domain.org" << true;
+ QTest::newRow("subdomain match") << "www.example.com" << true;
+ QTest::newRow("subdomain match") << "www.test.example.com" << true;
+ QTest::newRow("similar domain") << "anotherexample.com" << false;
+ QTest::newRow("empty domain") << "" << false;
+}
+
+void UrlFilterTest::matchingDomain()
+{
+ QFETCH(QString, domain);
+ QFETCH(bool, result);
+
+ filterDomain.setType(FilterDomain::Whitelist);
+ QCOMPARE(filterDomain.hasMatch(domain), result);
+ filterDomain.setType(FilterDomain::Blacklist);
+ QCOMPARE(filterDomain.hasMatch(domain), !result);
+ filterDomain.setType(FilterDomain::WhitelistAll);
+ QCOMPARE(filterDomain.hasMatch(domain), true);
+ filterDomain.setType(FilterDomain::BlacklistAll);
+ QCOMPARE(filterDomain.hasMatch(domain), false);
+}
+
+void UrlFilterTest::matchingType_data()
+{
+ QJsonObject j;
+ j.insert("action", "Blacklist");
+ j.insert("contains", "annoying-ad.banner");
+
+ filterRule = new FilterRule(j);
+ Q_ASSERT(filterRule->isValid());
+
+ QTest::addColumn<QString>("requestUrl");
+ QTest::addColumn<int>("resourceType");
+ QTest::addColumn<bool>("result");
+
+ QTest::newRow("contains 1") << "http://example.com/ads/annoying-ad.banner/something" << static_cast<int>(QWebEngineUrlRequestInfo::ResourceTypeImage) << true;
+ QTest::newRow("contains 2") << "http://example.com/ads/annoying-ad.banner/something" << static_cast<int>(QWebEngineUrlRequestInfo::ResourceTypeMedia) << false;
+ QTest::newRow("contains 3") << "http://example.com/ads/banner" << static_cast<int>(QWebEngineUrlRequestInfo::ResourceTypeImage) << false;
+ QTest::newRow("blank") << "" << static_cast<int>(QWebEngineUrlRequestInfo::ResourceTypeUnknown) << false;
+}
+
+void UrlFilterTest::matchingType()
+{
+ QFETCH(QString, requestUrl);
+ QFETCH(int, resourceType);
+ QFETCH(bool, result);
+
+ QCOMPARE(filterRule->matchRequestUrl(requestUrl, static_cast<QWebEngineUrlRequestInfo::ResourceType>(resourceType)), result);
+}
+
+QTEST_GUILESS_MAIN(UrlFilterTest)
diff --git a/test/urlfilter/urlfiltertest.h b/test/urlfilter/urlfiltertest.h
new file mode 100644
index 0000000..1b158e8
--- /dev/null
+++ b/test/urlfilter/urlfiltertest.h
@@ -0,0 +1,41 @@
+/* ============================================================
+* Falkon - Qt web browser
+* Copyright (C) 2013-2018 David Rosca <nowrep@gmail.com>
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+* ============================================================ */
+#ifndef URLFILTER_TEST_H
+#define URLFILTER_TEST_H
+
+#include "urlfilter/filterrule.h"
+#include "urlfilter/filterdomain.h"
+#include <QObject>
+
+class UrlFilterTest : public QObject
+{
+ Q_OBJECT
+
+private slots:
+ void matchingDomain_data();
+ void matchingDomain();
+
+ void matchingType_data();
+ void matchingType();
+
+private:
+ FilterDomain filterDomain;
+ FilterRule *filterRule;
+};
+
+#endif // URLFILTER_TEST_H