diff --git a/transformer/src/Main.cpp b/transformer/src/Main.cpp index f7b315c..d1e0535 100644 --- a/transformer/src/Main.cpp +++ b/transformer/src/Main.cpp @@ -101,7 +101,7 @@ int main(int argc, char* argv[]) { bool checkNesting = true; app.add_flag( - "--check-nesting,!--no-check-nesting", + "--check-nesting,!--no-nesting", checkNesting, "Whether or not to check for nested .7zs. DO NOT SET TO FALSE UNLESS YOU KNOW THE ARCHIVE IS GOOD! " "Setting this to false when there is nesting will have unintended consequences. All this option does is " diff --git a/transformer/src/data/ArchiveParser.cpp b/transformer/src/data/ArchiveParser.cpp index 3e1005d..cc050a3 100644 --- a/transformer/src/data/ArchiveParser.cpp +++ b/transformer/src/data/ArchiveParser.cpp @@ -119,6 +119,7 @@ void ArchiveParser::read(const GlobalContext& conf) { continue; } spdlog::info("Extracting {}/{}", ctx.baseDomain, entryName); + ctx.lastModified = archive_entry_mtime(entry); ctx.currType = DataDumpFileType::UNKNOWN; ctx.currTypeStr = ""; diff --git a/transformer/src/data/ArchiveParser.hpp b/transformer/src/data/ArchiveParser.hpp index 2ebbae9..74e6305 100644 --- a/transformer/src/data/ArchiveParser.hpp +++ b/transformer/src/data/ArchiveParser.hpp @@ -3,6 +3,7 @@ #include "data/GlobalContext.hpp" #include +#include #include #include #include @@ -60,6 +61,11 @@ struct ParserContext { * Same as currType, but as a string */ std::string currTypeStr; + + /** + * The lastModified time of the archive. Used to preserve timestamps + */ + time_t lastModified; const GlobalContext& conf; }; diff --git a/transformer/src/data/ArchiveWriter.cpp b/transformer/src/data/ArchiveWriter.cpp index 8a164ed..431b268 100644 --- a/transformer/src/data/ArchiveWriter.cpp +++ b/transformer/src/data/ArchiveWriter.cpp @@ -57,7 +57,7 @@ ArchiveWriter::~ArchiveWriter() { std::filesystem::remove_all(this->tmpOutputDir); } else { // TODO: This feels sketch - for (auto& file : files) { + for (auto& [file, _] : files) { spdlog::debug("Binary file::Removing {}", (this->tmpOutputDir / file).string()); std::filesystem::remove(this->tmpOutputDir / file); } @@ -66,12 +66,13 @@ ArchiveWriter::~ArchiveWriter() { void ArchiveWriter::commit() { - for (auto& file : this->files) { + for (auto& [file, attr] : this->files) { spdlog::info("Now committing {} to archive {}", file, this->archiveName.filename().string()); archive_entry* currEntry = archive_entry_new(); archive_entry_set_pathname(currEntry, file.c_str()); archive_entry_set_filetype(currEntry, AE_IFREG); archive_entry_set_perm(currEntry, 0644); + archive_entry_set_mtime(currEntry, attr.lastModified, 0L); std::ifstream f(this->tmpOutputDir / file, std::ios_base::binary); @@ -113,10 +114,11 @@ void ArchiveWriter::commit() { spdlog::info("Committing LICENSE to {}", archiveName.filename().string()); archive_entry* currEntry = archive_entry_new(); - archive_entry_set_pathname(currEntry, "LICENSE"); + archive_entry_set_pathname(currEntry, "LICENSE.txt"); archive_entry_set_filetype(currEntry, AE_IFREG); archive_entry_set_perm(currEntry, 0644); archive_entry_set_size(currEntry, (int64_t) MetaFiles::dataDumpLicense.size()); + archive_entry_set_mtime(currEntry, std::time(nullptr), 0); SEDDARCHIVE_CHECK_ERROR(a, archive_write_header(a, currEntry)); @@ -127,12 +129,12 @@ void ArchiveWriter::commit() { archive_entry_free(currEntry); } -void ArchiveWriter::open(const std::string& filename) { +void ArchiveWriter::open(const std::string& filename, const FileAttr& attr) { if (!createTempDir) { throw std::runtime_error("Can't open() without a tempDir"); } spdlog::debug("Opening file {}", filename); - this->files.push_back(filename); + this->files[filename] = attr; writer.clear(); writer.open(tmpOutputDir / filename); @@ -158,8 +160,8 @@ void ArchiveWriter::close() { writer.close(); } -void ArchiveWriter::addBinaryFile(const std::string& filename) { - this->files.push_back(filename); +void ArchiveWriter::addBinaryFile(const std::string& filename, const FileAttr& attr) { + this->files[filename] = attr; } } diff --git a/transformer/src/data/ArchiveWriter.hpp b/transformer/src/data/ArchiveWriter.hpp index 04c122c..1a8b191 100644 --- a/transformer/src/data/ArchiveWriter.hpp +++ b/transformer/src/data/ArchiveWriter.hpp @@ -3,12 +3,17 @@ #include "archive.h" #include "archive_entry.h" #include +#include #include #include #include namespace sedd { +struct FileAttr { + time_t lastModified; +}; + class ArchiveWriter { private: // Approx. 16MB @@ -16,7 +21,7 @@ class ArchiveWriter { std::filesystem::path archiveName; std::filesystem::path tmpOutputDir; - std::vector files; + std::map files; archive* a; std::ofstream writer; @@ -38,7 +43,7 @@ class ArchiveWriter { * Opens a managed text file for writing. Use write() to write to it, and call * close() when writing is completed. */ - void open(const std::string& filename); + void open(const std::string& filename, const FileAttr& attr); /** * Writes to the current open()ed file @@ -56,7 +61,7 @@ class ArchiveWriter { * It's entirely the responsibility of the invoking transformer to create the file. * The file will be opened and written to the archive when commit() is called. */ - void addBinaryFile(const std::string& filename); + void addBinaryFile(const std::string& filename, const FileAttr& attr); /** * Commits the saved files (either added with `addBinaryFile`, or managed text diff --git a/transformer/src/data/transformers/JSONTransformer.cpp b/transformer/src/data/transformers/JSONTransformer.cpp index 5506a79..41964db 100644 --- a/transformer/src/data/transformers/JSONTransformer.cpp +++ b/transformer/src/data/transformers/JSONTransformer.cpp @@ -13,7 +13,9 @@ namespace sedd { void JSONTransformer::beginFile(const ParserContext& ctx) { auto filename = DataDumpFileType::toFilename(ctx.currType) + ".json"; spdlog::debug("Starting new file: {}", filename); - this->writer->open(filename); + this->writer->open(filename, FileAttr { + ctx.lastModified + }); this->writer->write("[\n"); started = false; } diff --git a/transformer/src/data/util/ArchiveCache.cpp b/transformer/src/data/util/ArchiveCache.cpp index f4c4a6b..004f162 100644 --- a/transformer/src/data/util/ArchiveCache.cpp +++ b/transformer/src/data/util/ArchiveCache.cpp @@ -1,5 +1,6 @@ #include "ArchiveCache.hpp" #include "data/ArchiveParser.hpp" +#include "data/ArchiveWriter.hpp" #include "data/util/ArchiveCache.hpp" #include "data/Schema.hpp" #include @@ -20,7 +21,9 @@ void ArchiveCache::initArchive(const ParserContext& ctx, const std::string& binF .exhaustedFiletypes = {} }; - cache.writer->addBinaryFile(binFile); + cache.writer->addBinaryFile(binFile, FileAttr { + ctx.lastModified + }); siteColumnMaps[ctx.baseSiteName] = std::move(cache); }