From 9ab6f6d363f873f0799982c3c0872d38a656ee84 Mon Sep 17 00:00:00 2001 From: Aqua-sama Date: Thu, 18 Oct 2018 14:40:32 +0200 Subject: urlfilter: add Domain class (#6): Add domain matcher class --- lib/urlfilter/CMakeLists.txt | 2 ++ lib/urlfilter/domain.cpp | 61 ++++++++++++++++++++++++++++++++++ lib/urlfilter/domain.h | 33 ++++++++++++++++++ lib/urlfilter/filtertree.cpp | 10 +++--- lib/urlfilter/filtertree.h | 9 ++++- lib/urlfilter/formats/hostlistrule.cpp | 8 +++++ lib/urlfilter/formats/hostlistrule.h | 8 +++++ 7 files changed, 125 insertions(+), 6 deletions(-) create mode 100644 lib/urlfilter/domain.cpp create mode 100644 lib/urlfilter/domain.h (limited to 'lib/urlfilter') diff --git a/lib/urlfilter/CMakeLists.txt b/lib/urlfilter/CMakeLists.txt index 375ffa7..9940222 100644 --- a/lib/urlfilter/CMakeLists.txt +++ b/lib/urlfilter/CMakeLists.txt @@ -19,6 +19,8 @@ add_library(urlfilter filtertree.h filterleaf.cpp filterleaf.h + domain.cpp + domain.h ) target_link_libraries(urlfilter Qt5::WebEngineWidgets) diff --git a/lib/urlfilter/domain.cpp b/lib/urlfilter/domain.cpp new file mode 100644 index 0000000..3686210 --- /dev/null +++ b/lib/urlfilter/domain.cpp @@ -0,0 +1,61 @@ +/* + * This file is part of smolbote. It's copyrighted by the contributors recorded + * in the version control history of the file, available from its original + * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote + * + * SPDX-License-Identifier: GPL-3.0 + */ + +#include "domain.h" + +Domain::Domain(const QString& domain) + : m_domain(domain) + , m_hash(qHash(domain, 0)) +{ +} + +Domain::Domain(Domain && other) + : m_domain(std::move(other.m_domain)) + , m_hash(std::move(other.m_hash)) +{ +} + +Domain & Domain::operator=(Domain && other) +{ + m_domain = std::move(other.m_domain); + m_hash = other.m_hash; + return *this; +} + +bool Domain::matches(const QUrl& url) const +{ + const QString domain = url.host(); + + // domain and filter are the same + if(domain == m_domain) { + return true; + } + + // domain cannot be matched if it doesn't end with filter + // ex. example2.com isn't matched by example.com + if(!domain.endsWith(m_domain)) { + return false; + } + + // match with subdomains + // ex. subdomain.example.com is matched by example.com + int index = domain.indexOf(m_domain); + + // match if (domain ends with filter) && (filter has been found) and (character before filter is '.') + return index > 0 && domain[index - 1] == QLatin1Char('.'); +} + +bool Domain::matchesExactly(uint hash) const +{ + return (m_hash == hash); +} + +QString Domain::host() const +{ + return m_domain; +} diff --git a/lib/urlfilter/domain.h b/lib/urlfilter/domain.h new file mode 100644 index 0000000..356762d --- /dev/null +++ b/lib/urlfilter/domain.h @@ -0,0 +1,33 @@ +/* + * This file is part of smolbote. It's copyrighted by the contributors recorded + * in the version control history of the file, available from its original + * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote + * + * SPDX-License-Identifier: GPL-3.0 + */ + +#ifndef SMOLBOTE_DOMAIN_H +#define SMOLBOTE_DOMAIN_H + +#include +#include + +class Domain +{ +public: + explicit Domain(const QString &domain); + explicit Domain(Domain &&other); + Domain& operator=(Domain &&other); + + // match domain and subdomains of domain + bool matches(const QUrl &url) const; + // exact match of domain + bool matchesExactly(uint hash) const; + QString host() const; + +private: + QString m_domain; + uint m_hash; +}; + +#endif // SMOLBOTE_DOMAIN_H diff --git a/lib/urlfilter/filtertree.cpp b/lib/urlfilter/filtertree.cpp index 8844a76..8d88140 100644 --- a/lib/urlfilter/filtertree.cpp +++ b/lib/urlfilter/filtertree.cpp @@ -45,7 +45,7 @@ const QStringList FilterTree::branches() const { QStringList branches; for(auto &branch : m_branches) { - branches.append(QString::fromStdString(branch.domain)); + branches.append(branch.domain.host()); } return branches; } @@ -54,7 +54,7 @@ QVector FilterTree::match(const QString& domain, const QStri { QVector leaves; for(const auto &branch : m_branches) { - if(branch.domain == domain.toStdString()) { + if(branch.domain.matches(QUrl(domain))) { for(const auto leaf : branch.leaves) { if(leaf->match(requestUrl)) { @@ -70,15 +70,15 @@ QVector FilterTree::match(const QString& domain, const QStri bool FilterTree::addRule(FilterLeaf *rule, const QString& domain) { for(auto &branch : m_branches) { - if(branch.domain == domain.toStdString()) { + if(branch.domain.matches(QUrl(domain))) { branch.leaves.emplace_back(rule); return true; } } // no branch was found - Branch branch; - branch.domain = domain.toStdString(); + Branch branch(domain); + //branch.domain = domain.toStdString(); // TODO: for some reason, can't add rule here //branch.leaves.emplace_back(rule); m_branches.emplace_back(std::move(branch)); diff --git a/lib/urlfilter/filtertree.h b/lib/urlfilter/filtertree.h index 8cecf50..004cf5a 100644 --- a/lib/urlfilter/filtertree.h +++ b/lib/urlfilter/filtertree.h @@ -14,6 +14,7 @@ #include #include #include "filterleaf.h" +#include "domain.h" /** FilterTree: B+ tree of filter rules * The root of the tree contains branches that represent domains, on which their rules are to be applied. @@ -31,10 +32,16 @@ public: private: struct Branch { + explicit Branch(const QString &host) : domain(host) {} + explicit Branch(Branch &&other) + : domain(std::move(other.domain)) + , leaves(std::move(other.leaves)) + {} ~Branch() { qDeleteAll(leaves); } // TODO: replace domain type with domain-matching class - std::string domain; + Domain domain; + //std::string domain; std::vector leaves; }; diff --git a/lib/urlfilter/formats/hostlistrule.cpp b/lib/urlfilter/formats/hostlistrule.cpp index 1df6b3e..8336243 100644 --- a/lib/urlfilter/formats/hostlistrule.cpp +++ b/lib/urlfilter/formats/hostlistrule.cpp @@ -1,3 +1,11 @@ +/* + * This file is part of smolbote. It's copyrighted by the contributors recorded + * in the version control history of the file, available from its original + * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote + * + * SPDX-License-Identifier: GPL-3.0 + */ + #include "hostlistrule.h" HostlistRule::HostlistRule(const QString &domain, const QString& redirect) diff --git a/lib/urlfilter/formats/hostlistrule.h b/lib/urlfilter/formats/hostlistrule.h index 764a2e2..19fd63f 100644 --- a/lib/urlfilter/formats/hostlistrule.h +++ b/lib/urlfilter/formats/hostlistrule.h @@ -1,3 +1,11 @@ +/* + * This file is part of smolbote. It's copyrighted by the contributors recorded + * in the version control history of the file, available from its original + * location: https://neueland.iserlohn-fortress.net/gitea/aqua/smolbote + * + * SPDX-License-Identifier: GPL-3.0 + */ + #ifndef SMOLBOTE_HOSTLIST_RULE_H #define SMOLBOTE_HOSTLIST_RULE_H -- cgit v1.2.1