Skip to content

Commit

Permalink
Merge pull request #833 from openzim/clone_entry
Browse files Browse the repository at this point in the history
  • Loading branch information
mgautierfr authored Nov 28, 2023
2 parents ca1bcee + 9a59af5 commit abf7c11
Show file tree
Hide file tree
Showing 11 changed files with 145 additions and 22 deletions.
1 change: 1 addition & 0 deletions include/zim/item.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ namespace zim

#ifdef ZIM_PRIVATE
cluster_index_type getClusterIndex() const;
blob_index_type getBlobIndex() const;
#endif

private: // functions
Expand Down
30 changes: 30 additions & 0 deletions include/zim/writer/creator.h
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,36 @@ namespace zim
const std::string& targetpath,
const Hints& hints = Hints());


/**
* Add a alias of a existing entry.
*
* The existing entry pointed by `targetPath` is cloned and updated with
* `path` and `title`.
*
* The alias entry will shared the same type (redirection or item)
* and namespace than `targetPath`.
*
* If the `targetPath` is a item, the new entry will be item pointing
* to the same data than `targetPath` item. (Not a redirection to `targetPath`).
* However, the alias entry is not counted in the media type counter
* and it is not fulltext indexed (only title indexed).
*
* Hints can be given to influence creator handling (front article, ...)
* as it is done for redirection.
*
* @param path the path of the alias
* @param title the title of the alias
* @param targetPath the path of the aliased entry.
* @param hints hints associated to the alias.
*/
void addAlias(
const std::string& path,
const std::string& title,
const std::string& targetPath,
const Hints& hints = Hints()
);

/**
* Finalize the zim creation.
*/
Expand Down
5 changes: 5 additions & 0 deletions src/item.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,3 +88,8 @@ cluster_index_type Item::getClusterIndex() const
{
return m_dirent->getClusterNumber().v;
}

blob_index_type Item::getBlobIndex() const
{
return m_dirent->getBlobNumber().v;
}
19 changes: 19 additions & 0 deletions src/writer/_dirent.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ namespace zim
ns(ns)
{};
Redirect(Redirect&& r) = default;
Redirect(const Redirect& r) = default;
~Redirect() {};
TinyString targetPath;
NS ns;
Expand Down Expand Up @@ -96,6 +97,21 @@ namespace zim
resolved(std::move(r)),
tag(DirentInfo::RESOLVED)
{}
DirentInfo(const DirentInfo& other):
tag(other.tag)
{
switch (tag) {
case DIRECT:
new(&direct) Direct(other.direct);
break;
case REDIRECT:
new(&redirect) Redirect(other.redirect);
break;
case RESOLVED:
new(&resolved) Resolved(other.resolved);
break;
}
}
DirentInfo::Direct& getDirect() {
ASSERT(tag, ==, DIRECT);
return direct;
Expand Down Expand Up @@ -153,6 +169,9 @@ namespace zim
// Creator for a "redirection" dirent
Dirent(NS ns, const std::string& path, const std::string& title, NS targetNs, const std::string& targetPath);

// Creator for a "alias" dirent. Reuse the namespace of the targeted Dirent.
Dirent(const std::string& path, const std::string& title, const Dirent& target);

// Creator for "temporary" dirent, used to search for dirent in container.
// We use them in url ordered container so we only need to set the namespace and the path.
// Other value are irrelevant.
Expand Down
24 changes: 24 additions & 0 deletions src/writer/creator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,23 @@ namespace zim
data->handle(dirent, hints);
}

void Creator::addAlias(const std::string& path, const std::string& title, const std::string& targetPath, const Hints& hints)
{
checkError();
Dirent tmpDirent(NS::C, targetPath);
auto existing_dirent_it = data->dirents.find(&tmpDirent);

if (existing_dirent_it == data->dirents.end()) {
std::ostringstream ss;
ss << "Impossible to alias C/" << targetPath << " as C/" << path << std::endl;
ss << "C/" << targetPath << " doesn't exist." << std::endl;
throw InvalidEntry(ss.str());
}

auto dirent = data->createAliasDirent(path, title, **existing_dirent_it);
data->handle(dirent, hints);
}

void Creator::finishZimCreation()
{
checkError();
Expand Down Expand Up @@ -598,6 +615,13 @@ namespace zim
return dirent;
}

Dirent* CreatorData::createAliasDirent(const std::string& path, const std::string& title, const Dirent& target)
{
auto dirent = pool.getAliasDirent(path, title, target);
addDirent(dirent);
return dirent;
}

Cluster* CreatorData::closeCluster(bool compressed)
{
Cluster *cluster;
Expand Down
1 change: 1 addition & 0 deletions src/writer/creatordata.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ namespace zim
Dirent* createDirent(NS ns, const std::string& path, const std::string& mimetype, const std::string& title);
Dirent* createItemDirent(const Item* item);
Dirent* createRedirectDirent(NS ns, const std::string& path, const std::string& title, NS targetNs, const std::string& targetPath);
Dirent* createAliasDirent(const std::string& path, const std::string& title, const Dirent& target);
Cluster* closeCluster(bool compressed);

void setEntryIndexes();
Expand Down
11 changes: 11 additions & 0 deletions src/writer/dirent.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,17 @@ Dirent::Dirent(NS ns, const std::string& path, const std::string& title, NS targ
frontArticle(false)
{}

Dirent::Dirent(const std::string& path, const std::string& title, const Dirent& target)
: pathTitle(path, title),
mimeType(target.mimeType),
idx(0),
info(target.info),
offset(0),
_ns(target._ns),
removed(false),
frontArticle(false)
{}

NS Dirent::getRedirectNs() const {
return info.getRedirect().ns;
}
Expand Down
24 changes: 16 additions & 8 deletions src/writer/direntPool.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,14 @@ namespace zim
delete [] (reinterpret_cast<char*>(pool));
}

/* Return a *NOT constructed* pointer to a dirent */
Dirent* getDirentSlot() {
if (direntIndex == 0xFFFF) {
allocate_new_pool();
}
auto dirent = pools.back() + direntIndex++;
return dirent;
}

public:
DirentPool() :
Expand All @@ -65,22 +73,22 @@ namespace zim
}

Dirent* getClassicDirent(NS ns, const std::string& path, const std::string& title, uint16_t mimetype) {
if (direntIndex == 0xFFFF) {
allocate_new_pool();
}
auto dirent = pools.back() + direntIndex++;
auto dirent = getDirentSlot();
new (dirent) Dirent(ns, path, title, mimetype);
return dirent;
}

Dirent* getRedirectDirent(NS ns, const std::string& path, const std::string& title, NS targetNs, const std::string& targetPath) {
if (direntIndex == 0xFFFF) {
allocate_new_pool();
}
auto dirent = pools.back() + direntIndex++;
auto dirent = getDirentSlot();
new (dirent) Dirent(ns, path, title, targetNs, targetPath);
return dirent;
}

Dirent* getAliasDirent(const std::string& path, const std::string& title, const Dirent& target) {
auto dirent = getDirentSlot();
new (dirent) Dirent(path, title, target);
return dirent;
}
};
}
}
Expand Down
8 changes: 7 additions & 1 deletion src/writer/tinyString.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,13 @@ namespace zim
t.m_data = nullptr;
t.m_size = 0;
};
TinyString(const TinyString& t) = delete;
TinyString(const TinyString& t) :
m_data(new char[(uint16_t)t.m_size]),
m_size(t.m_size)
{
std::memcpy(m_data, t.m_data, m_size);
}

~TinyString() {
if (m_data) {
delete[] m_data;
Expand Down
29 changes: 20 additions & 9 deletions test/creator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -185,11 +185,13 @@ TEST(ZimCreator, createZim)
// Be sure that title order is not the same that url order
item = std::make_shared<TestItem>("foo2", "AFoo", "Foo2Content");
creator.addItem(item);
creator.addAlias("foo_bis", "The same Foo", "foo2");
creator.addMetadata("Title", "This is a title");
creator.addIllustration(48, "PNGBinaryContent48");
creator.addIllustration(96, "PNGBinaryContent96");
creator.setMainPath("foo");
creator.addRedirection("foo3", "FooRedirection", "foo"); // No a front article.
creator.addRedirection("foo3", "FooRedirection", "foo"); // Not a front article.
creator.addAlias("foo_ter", "The same redirection", "foo3", {{ zim::writer::FRONT_ARTICLE, true}}); // a clone of the previous redirect, but as a front article.
creator.addRedirection("foo4", "FooRedirection", "NoExistant", {{zim::writer::FRONT_ARTICLE, true}}); // Invalid redirection, must be removed by creator
creator.finishZimCreation();

Expand All @@ -200,15 +202,15 @@ TEST(ZimCreator, createZim)
header.read(*reader);
ASSERT_TRUE(header.hasMainPage());
#if defined(ENABLE_XAPIAN)
entry_index_type nb_entry = 12; // counter + 2*illustration + xapiantitleIndex + xapianfulltextIndex + foo + foo2 + foo3 + Title + mainPage + titleListIndexes*2
entry_index_type nb_entry = 14; // counter + 2*illustration + xapiantitleIndex + xapianfulltextIndex + foo + foo2 + foo_bis + foo3 + foo_ter + Title + mainPage + titleListIndexes*2
int xapian_mimetype = 0;
int listing_mimetype = 1;
int png_mimetype = 2;
int html_mimetype = 3;
int plain_mimetype = 4;
int plainutf8_mimetype = 5;
#else
entry_index_type nb_entry = 10; // counter + 2*illustration + foo + foo2 + foo3 + Title + mainPage + titleListIndexes*2
entry_index_type nb_entry = 12; // counter + 2*illustration + foo + foo_bis + foo2 + foo3 + foo_ter + Title + mainPage + titleListIndexes*2
int listing_mimetype = 0;
int png_mimetype = 1;
int html_mimetype = 2;
Expand All @@ -235,6 +237,12 @@ TEST(ZimCreator, createZim)
dirent = direntAccessor.getDirent(entry_index_t(direntIdx++));
test_redirect_dirent(dirent, 'C', "foo3", "FooRedirection", entry_index_t(0));

dirent = direntAccessor.getDirent(entry_index_t(direntIdx++));
test_article_dirent(dirent, 'C', "foo_bis", "The same Foo", html_mimetype, cluster_index_t(0), foo2BlobIndex);

dirent = direntAccessor.getDirent(entry_index_t(direntIdx++));
test_redirect_dirent(dirent, 'C', "foo_ter", "The same redirection", entry_index_t(0));

dirent = direntAccessor.getDirent(entry_index_t(direntIdx++));
test_article_dirent(dirent, 'M', "Counter", None, plain_mimetype, cluster_index_t(0), None);
auto counterBlobIndex = dirent->getBlobNumber();
Expand Down Expand Up @@ -297,7 +305,7 @@ TEST(ZimCreator, createZim)
clusterOffset = offset_t(reader->read_uint<offset_type>(offset_t(clusterPtrPos + 8)));
cluster = Cluster::read(*reader, clusterOffset);
ASSERT_EQ(cluster->getCompression(), Cluster::Compression::None);
ASSERT_EQ(cluster->count(), blob_index_t(nb_entry-6)); // 6 entries are either compressed or redirections
ASSERT_EQ(cluster->count(), blob_index_t(nb_entry-8)); // 7 entries are either compressed or redirections + 1 entry is a clone of content

ASSERT_EQ(header.getTitleIdxPos(), (clusterOffset+cluster->getBlobOffset(v0BlobIndex)).v);

Expand All @@ -314,20 +322,23 @@ TEST(ZimCreator, createZim)
6, 0, 0, 0,
7, 0, 0, 0,
8, 0, 0, 0,
9, 0, 0, 0
9, 0, 0, 0,
10, 0, 0, 0,
11, 0, 0, 0
#if defined(ENABLE_XAPIAN)
,10, 0, 0, 0
,11, 0, 0, 0
,12, 0, 0, 0
,13, 0, 0, 0
#endif
};
ASSERT_EQ(blob0Data, expectedBlob0Data);

blob = cluster->getBlob(v1BlobIndex);
ASSERT_EQ(blob.size(), 2*sizeof(title_index_t));
ASSERT_EQ(blob.size(), 3*sizeof(title_index_t));
std::vector<char> blob1Data(blob.data(), blob.end());
std::vector<char> expectedBlob1Data = {
1, 0, 0, 0,
0, 0, 0, 0
0, 0, 0, 0,
4, 0, 0, 0 // "The same redirection" is the 5th entry "by title order"
};
ASSERT_EQ(blob1Data, expectedBlob1Data);

Expand Down
15 changes: 11 additions & 4 deletions test/search.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ TEST(Search, indexFullPath)

auto item = std::make_shared<TestItem>("testPath", "text/html", "Test Article", "This is a test article");
creator.addItem(item);
creator.addAlias("anotherPath", "Another Test Article", "testPath");

creator.setMainPath("testPath");
creator.addMetadata("Title", "Test zim");
Expand All @@ -76,9 +77,14 @@ TEST(Search, indexFullPath)
auto search = searcher.search(query);

ASSERT_NE(0, search.getEstimatedMatches());
auto result = search.getResults(0, archive.getEntryCount());
ASSERT_EQ(result.begin().getPath(), "testPath");
ASSERT_EQ(result.begin().getDbData().substr(0, 2), "C/");
auto results = search.getResults(0, archive.getEntryCount());

auto result = results.begin();
ASSERT_EQ(result.getPath(), "testPath");
ASSERT_EQ(result.getDbData().substr(0, 2), "C/");

result++;
ASSERT_EQ(result, results.end());
}

TEST(Search, fulltextSnippet)
Expand Down Expand Up @@ -118,6 +124,7 @@ TEST(Search, multiSearch)
creator.addItem(std::make_shared<TestItem>("path2", "text/html", "Test Article001", "This is a test article. Super. temp0"));
creator.addItem(std::make_shared<TestItem>("path3", "text/html", "Test Article2", "This is a test article. Super."));
creator.addItem(std::make_shared<TestItem>("path4", "text/html", "Test Article23", "This is a test article. bis."));
creator.addAlias("path5", "Test Article5", "path0"); // Should not be fulltext indexed

creator.setMainPath("path0");
creator.finishZimCreation();
Expand All @@ -133,7 +140,7 @@ TEST(Search, multiSearch)
zim::Query query("test article");
auto search0 = searcher.search(query);

ASSERT_EQ(archive.getEntryCount(), (unsigned int)search0.getEstimatedMatches());
ASSERT_EQ(5U, (unsigned int)search0.getEstimatedMatches());
auto result0 = search0.getResults(0, 2);
ASSERT_EQ(result0.size(), 2);
auto it0 = result0.begin();
Expand Down

0 comments on commit abf7c11

Please sign in to comment.