Skip to content

Commit

Permalink
Set up mtime preservation
Browse files Browse the repository at this point in the history
  • Loading branch information
LunarWatcher committed Sep 21, 2024
1 parent f091d97 commit 92b6662
Show file tree
Hide file tree
Showing 7 changed files with 32 additions and 13 deletions.
2 changes: 1 addition & 1 deletion transformer/src/Main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ int main(int argc, char* argv[]) {

bool checkNesting = true;
app.add_flag(
"--check-nesting,!--no-check-nesting",
"--check-nesting,!--no-nesting",
checkNesting,
"Whether or not to check for nested .7zs. DO NOT SET TO FALSE UNLESS YOU KNOW THE ARCHIVE IS GOOD! "
"Setting this to false when there is nesting will have unintended consequences. All this option does is "
Expand Down
1 change: 1 addition & 0 deletions transformer/src/data/ArchiveParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ void ArchiveParser::read(const GlobalContext& conf) {
continue;
}
spdlog::info("Extracting {}/{}", ctx.baseDomain, entryName);
ctx.lastModified = archive_entry_mtime(entry);

ctx.currType = DataDumpFileType::UNKNOWN;
ctx.currTypeStr = "";
Expand Down
6 changes: 6 additions & 0 deletions transformer/src/data/ArchiveParser.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include "data/GlobalContext.hpp"
#include <archive.h>

#include <ctime>
#include <filesystem>
#include <string>
#include <vector>
Expand Down Expand Up @@ -60,6 +61,11 @@ struct ParserContext {
* Same as currType, but as a string
*/
std::string currTypeStr;

/**
* The lastModified time of the archive. Used to preserve timestamps
*/
time_t lastModified;

const GlobalContext& conf;
};
Expand Down
16 changes: 9 additions & 7 deletions transformer/src/data/ArchiveWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ ArchiveWriter::~ArchiveWriter() {
std::filesystem::remove_all(this->tmpOutputDir);
} else {
// TODO: This feels sketch
for (auto& file : files) {
for (auto& [file, _] : files) {
spdlog::debug("Binary file::Removing {}", (this->tmpOutputDir / file).string());
std::filesystem::remove(this->tmpOutputDir / file);
}
Expand All @@ -66,12 +66,13 @@ ArchiveWriter::~ArchiveWriter() {

void ArchiveWriter::commit() {

for (auto& file : this->files) {
for (auto& [file, attr] : this->files) {
spdlog::info("Now committing {} to archive {}", file, this->archiveName.filename().string());
archive_entry* currEntry = archive_entry_new();
archive_entry_set_pathname(currEntry, file.c_str());
archive_entry_set_filetype(currEntry, AE_IFREG);
archive_entry_set_perm(currEntry, 0644);
archive_entry_set_mtime(currEntry, attr.lastModified, 0L);

std::ifstream f(this->tmpOutputDir / file, std::ios_base::binary);

Expand Down Expand Up @@ -113,10 +114,11 @@ void ArchiveWriter::commit() {

spdlog::info("Committing LICENSE to {}", archiveName.filename().string());
archive_entry* currEntry = archive_entry_new();
archive_entry_set_pathname(currEntry, "LICENSE");
archive_entry_set_pathname(currEntry, "LICENSE.txt");
archive_entry_set_filetype(currEntry, AE_IFREG);
archive_entry_set_perm(currEntry, 0644);
archive_entry_set_size(currEntry, (int64_t) MetaFiles::dataDumpLicense.size());
archive_entry_set_mtime(currEntry, std::time(nullptr), 0);

SEDDARCHIVE_CHECK_ERROR(a, archive_write_header(a, currEntry));

Expand All @@ -127,12 +129,12 @@ void ArchiveWriter::commit() {
archive_entry_free(currEntry);
}

void ArchiveWriter::open(const std::string& filename) {
void ArchiveWriter::open(const std::string& filename, const FileAttr& attr) {
if (!createTempDir) {
throw std::runtime_error("Can't open() without a tempDir");
}
spdlog::debug("Opening file {}", filename);
this->files.push_back(filename);
this->files[filename] = attr;

writer.clear();
writer.open(tmpOutputDir / filename);
Expand All @@ -158,8 +160,8 @@ void ArchiveWriter::close() {
writer.close();
}

void ArchiveWriter::addBinaryFile(const std::string& filename) {
this->files.push_back(filename);
void ArchiveWriter::addBinaryFile(const std::string& filename, const FileAttr& attr) {
this->files[filename] = attr;
}

}
11 changes: 8 additions & 3 deletions transformer/src/data/ArchiveWriter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,25 @@
#include "archive.h"
#include "archive_entry.h"
#include <filesystem>
#include <map>
#include <string>
#include <vector>
#include <fstream>

namespace sedd {

struct FileAttr {
time_t lastModified;
};

class ArchiveWriter {
private:
// Approx. 16MB
constexpr static auto BLOCK_SIZE = size_t(4194304) * size_t(4);
std::filesystem::path archiveName;
std::filesystem::path tmpOutputDir;

std::vector<std::string> files;
std::map<std::string, FileAttr> files;
archive* a;
std::ofstream writer;

Expand All @@ -38,7 +43,7 @@ class ArchiveWriter {
* Opens a managed text file for writing. Use write() to write to it, and call
* close() when writing is completed.
*/
void open(const std::string& filename);
void open(const std::string& filename, const FileAttr& attr);

/**
* Writes to the current open()ed file
Expand All @@ -56,7 +61,7 @@ class ArchiveWriter {
* It's entirely the responsibility of the invoking transformer to create the file.
* The file will be opened and written to the archive when commit() is called.
*/
void addBinaryFile(const std::string& filename);
void addBinaryFile(const std::string& filename, const FileAttr& attr);

/**
* Commits the saved files (either added with `addBinaryFile`, or managed text
Expand Down
4 changes: 3 additions & 1 deletion transformer/src/data/transformers/JSONTransformer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@ namespace sedd {
void JSONTransformer::beginFile(const ParserContext& ctx) {
auto filename = DataDumpFileType::toFilename(ctx.currType) + ".json";
spdlog::debug("Starting new file: {}", filename);
this->writer->open(filename);
this->writer->open(filename, FileAttr {
ctx.lastModified
});
this->writer->write("[\n");
started = false;
}
Expand Down
5 changes: 4 additions & 1 deletion transformer/src/data/util/ArchiveCache.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "ArchiveCache.hpp"
#include "data/ArchiveParser.hpp"
#include "data/ArchiveWriter.hpp"
#include "data/util/ArchiveCache.hpp"
#include "data/Schema.hpp"
#include <spdlog/spdlog.h>
Expand All @@ -20,7 +21,9 @@ void ArchiveCache::initArchive(const ParserContext& ctx, const std::string& binF
.exhaustedFiletypes = {}
};

cache.writer->addBinaryFile(binFile);
cache.writer->addBinaryFile(binFile, FileAttr {
ctx.lastModified
});

siteColumnMaps[ctx.baseSiteName] = std::move(cache);
}
Expand Down

0 comments on commit 92b6662

Please # to comment.