SourcePP
Several modern C++20 libraries for sanely parsing Valve's formats.
Loading...
Searching...
No Matches
TAB.cpp
Go to the documentation of this file.
1// ReSharper disable CppParameterMayBeConst
2// ReSharper disable CppRedundantQualifier
3
4#include <vpkpp/format/TAB.h>
5
6#include <filesystem>
7#include <format>
8#include <numeric>
9
10#include <BufferStream.h>
11#include <FileStream.h>
12#include <tomcrypt.h>
13
14using namespace sourcepp;
15using namespace vpkpp;
16
17namespace {
18
19constexpr std::string_view TAB_FILEPATH_LIST_STRIP_PATH_INDEX = "projects/justcause/data/";
20
21[[nodiscard]] std::filesystem::path getArchivePath(const TAB& tab, uint32_t archiveIndex) {
22 return std::filesystem::path{tab.getFilepath()}.parent_path() / std::format("{}{}{}", tab.getFilestem(), archiveIndex, ARC_EXTENSION);
23}
24
25} // namespace
26
27std::unique_ptr<PackFile> TAB::create(const std::string& path, Version version, uint32_t sectorSize) {
28 {
29 FileStream stream{path, FileStream::OPT_TRUNCATE | FileStream::OPT_CREATE_IF_NONEXISTENT};
30 stream
31 .set_big_endian(version == Version::JC1_BE)
32 .write<uint32_t>(3)
33 .write<uint32_t>(sectorSize)
34 .write<uint32_t>(0);
35 }
36 return TAB::open(path);
37}
38
39std::unique_ptr<PackFile> TAB::open(const std::string& path, const EntryCallback& callback) {
40 if (!std::filesystem::exists(path)) {
41 // File does not exist
42 return nullptr;
43 }
44
45 auto* tab = new TAB{path};
46 auto packFile = std::unique_ptr<PackFile>(tab);
47
48 FileStream reader{tab->fullFilePath};
49 reader.seek_in(0);
50
51 if (auto version = reader.read<uint32_t>(); version == 3) {
53 } else {
54 BufferStream::swap_endian(&version);
55 if (version == 3) {
57 } else {
58 return nullptr;
59 }
60 }
61
62 reader >> tab->sectorSize >> tab->numArchives;
63
64 std::vector<uint32_t> archiveAlignments(tab->numArchives);
65 uint32_t alignment = 0;
66 for (uint32_t i = 0; i < tab->numArchives; i++) {
67 alignment += (static_cast<uint32_t>(std::filesystem::file_size(::getArchivePath(*tab, i))) + tab->sectorSize - 1) / tab->sectorSize;
68 archiveAlignments[i] = alignment;
69 }
70
71 // Here we load in the filepath list if it exists
72 std::unordered_map<uint32_t, std::string> crackedHashes;
73 if (const std::filesystem::path mapPath{std::filesystem::path{tab->fullFilePath}.parent_path() / std::format("{}list.txt", tab->getFilestem())}; std::filesystem::exists(mapPath)) {
74 std::ifstream mapFile{mapPath};
75 std::string filepath;
76 while (std::getline(mapFile, filepath)) {
77 if (filepath.empty() || filepath.starts_with(':')) {
78 continue;
79 }
80 string::trim(filepath);
82 string::toLower(filepath);
83 if (const auto index = filepath.rfind(TAB_FILEPATH_LIST_STRIP_PATH_INDEX); index != std::string::npos) {
84 filepath = filepath.substr(index + TAB_FILEPATH_LIST_STRIP_PATH_INDEX.size());
85 }
86 crackedHashes[TAB::hashFilePath(filepath)] = filepath;
87 }
88 }
89
90 const auto fileCount = (std::filesystem::file_size(tab->fullFilePath) - sizeof(uint32_t) * 3) / (sizeof(uint32_t) * 3);
91 for (uint32_t i = 0; i < fileCount; i++) {
92 Entry entry = createNewEntry();
93
94 std::string entryPath;
95
96 // note: NOT a CRC32! check TAB::hashFilePath
97 entry.crc32 = reader.read<uint32_t>();
98 if (crackedHashes.contains(entry.crc32)) {
99 entryPath = tab->cleanEntryPath(crackedHashes[entry.crc32]);
100 } else {
101 entryPath = tab->cleanEntryPath(TAB_HASHED_FILEPATH_PREFIX.data() + string::encodeHex({reinterpret_cast<const std::byte*>(&entry.crc32), sizeof(entry.crc32)}));
102 }
103
104 entry.offset = reader.read<uint32_t>();
105 entry.length = reader.read<uint32_t>();
106 entry.archiveIndex = 0;
107
108 for (int j = 0; j < tab->numArchives; j++) {
109 if (entry.offset < archiveAlignments[j]) {
110 entry.archiveIndex = j;
111 break;
112 }
113 }
114 if (entry.archiveIndex == 0) {
115 entry.offset = (entry.offset * tab->sectorSize) % ARC_CHUNK_SIZE;
116 } else {
117 entry.offset = (entry.offset - archiveAlignments[entry.archiveIndex - 1]) * tab->sectorSize % ARC_CHUNK_SIZE;
118 }
119
120 tab->entries.emplace(entryPath, entry);
121
122 if (callback) {
123 callback(entryPath, entry);
124 }
125 }
126
127 return packFile;
128}
129
130std::optional<std::vector<std::byte>> TAB::readEntry(const std::string& path_) const {
131 const auto path = this->cleanEntryPath(path_);
132 const auto entry = this->findEntry(path);
133 if (!entry) {
134 return std::nullopt;
135 }
136 if (entry->unbaked) {
137 return readUnbakedEntry(*entry);
138 }
139
140 // It's baked into the file on disk
141 FileStream stream{::getArchivePath(*this, entry->archiveIndex)};
142 if (!stream) {
143 return std::nullopt;
144 }
145 stream.seek_in_u(entry->offset);
146 return stream.read_bytes(entry->length);
147}
148
149void TAB::addEntryInternal(Entry& entry, const std::string& path, std::vector<std::byte>& buffer, EntryOptions options) {
150 // note: NOT a CRC32! check TAB::hashFilePath
151 entry.crc32 = TAB::hashFilePath(path);
152 entry.length = buffer.size();
153
154 // These will be reset when it's baked
155 entry.archiveIndex = this->numArchives + 1;
156 entry.offset = 0;
157}
158
159bool TAB::bake(const std::string& outputDir_, BakeOptions options, const EntryCallback& callback) {
160 // Get the proper file output folder
161 const std::string outputDir = this->getBakeOutputDir(outputDir_);
162 const std::string outputPath = outputDir + '/' + this->getFilename();
163
164 // Reconstruct data for ease of access
165 std::vector<std::pair<std::string, Entry*>> entriesToBake;
166 this->runForAllEntriesInternal([&entriesToBake](const std::string& path, Entry& entry) {
167 entriesToBake.emplace_back(path, &entry);
168 });
169 std::ranges::sort(entriesToBake, [](const std::pair<std::string, Entry*>& lhs, const std::pair<std::string, Entry*>& rhs) {
170 return lhs.second->crc32 < rhs.second->crc32;
171 });
172
173 // Cracked hash list
174 const std::filesystem::path mapPath{std::filesystem::path{outputDir} / std::format("{}list.txt", this->getFilestem())};
175 if (!std::filesystem::exists(mapPath)) {
176 fs::writeFileText(mapPath, "");
177 }
178 std::ofstream mapStream{mapPath};
179
180 // Open directory file
181 FileStream outDir{outputPath, FileStream::OPT_READ | FileStream::OPT_TRUNCATE | FileStream::OPT_CREATE_IF_NONEXISTENT};
182 outDir.seek_in(0);
183 outDir.seek_out(0);
184 outDir.set_big_endian(this->version == Version::JC1_BE);
185
186 // Dummy header
187 outDir.pad<uint32_t>(3);
188
189 // Archive alignment setup, because this format is weird
190 int32_t currentArchiveIndex = -1;
191 uint64_t currentArchiveLength = 0;
192 std::vector<uint32_t> archiveAlignments;
193 uint32_t totalArchiveSectors = 0;
194 const auto getNewArchivePath = [this, &outputDir](uint32_t archiveIndex, bool srcPath = true) {
195 return std::filesystem::path{outputDir} / std::format("{}{}{}{}", this->getFilestem(), archiveIndex, ARC_EXTENSION, srcPath ? ".new" : "");
196 };
197 const auto getNewArchiveStream = [this, &currentArchiveIndex, &currentArchiveLength, &archiveAlignments, &totalArchiveSectors, &getNewArchivePath] {
198 if (currentArchiveIndex >= 0) {
199 totalArchiveSectors += (currentArchiveLength + this->sectorSize - 1) / this->sectorSize;
200 archiveAlignments.push_back(totalArchiveSectors);
201 }
202 currentArchiveLength = 0;
203
204 auto out = FileStream{getNewArchivePath(++currentArchiveIndex), FileStream::OPT_READ | FileStream::OPT_TRUNCATE | FileStream::OPT_CREATE_IF_NONEXISTENT};
205 out.seek_in(0);
206 out.seek_out(0);
207 return out;
208 };
209 FileStream currentArchive = getNewArchiveStream();
210
211 // File tree and data
212 for (auto& [path, entry] : entriesToBake) {
213 if (!path.starts_with(TAB_HASHED_FILEPATH_PREFIX)) {
214 mapStream << path << '\n';
215 }
216
217 if (const auto data = this->readEntry(path)) {
218 const uint16_t padLength = math::paddingForAlignment(this->sectorSize, data->size());
219 uint64_t entryTotalSize = data->size() + padLength;
220 if (currentArchiveLength + entryTotalSize > ARC_CHUNK_SIZE) {
221 currentArchive = getNewArchiveStream();
222 }
223
224 entry->archiveIndex = currentArchiveIndex;
225 entry->offset = currentArchive.tell_out();
226
227 currentArchive
228 .write(*data)
229 .pad<char>(padLength, 'P');
230
231 outDir
232 .write<uint32_t>(entry->crc32)
233 .write<uint32_t>(currentArchiveIndex == 0
234 ? currentArchiveLength / this->sectorSize
235 : archiveAlignments[currentArchiveIndex - 1] + currentArchiveLength / this->sectorSize)
236 .write<uint32_t>(entry->length);
237
238 currentArchiveLength += entryTotalSize;
239 } else {
240 entry->archiveIndex = 0;
241 entry->offset = 0;
242 entry->length = 0;
243 }
244
245 if (callback) {
246 callback(path, *entry);
247 }
248 }
249
250 // Write header
251 this->numArchives = currentArchiveIndex + 1;
252 outDir
253 .seek_out(0)
254 .write<uint32_t>(3)
255 .write<uint32_t>(this->sectorSize)
256 .write<uint32_t>(this->numArchives);
257
258 // Rename new archives
259 for (uint32_t i = 0; i < this->numArchives; i++) {
260 const auto srcPath = getNewArchivePath(i);
261 const auto destPath = getNewArchivePath(i, false);
262 if (std::filesystem::exists(destPath)) {
263 std::filesystem::remove(destPath);
264 }
265 std::filesystem::rename(srcPath, destPath);
266 }
267
268 // Merge unbaked into baked entries
269 this->mergeUnbakedEntries();
270
271 PackFile::setFullFilePath(outputDir);
272 return true;
273}
274
276 using enum Attribute;
277 return ARCHIVE_INDEX | LENGTH;
278}
279
280TAB::operator std::string() const {
281 return PackFile::operator std::string() + std::format(" | {}", this->version == Version::JC1_LE ? "JC1 LE" : "JC1 BE");
282}
283
285 return this->version;
286}
287
288void TAB::setVersion(Version version_) {
289 this->version = version_;
290}
291
292uint32_t TAB::getSectorSize() const {
293 return this->sectorSize;
294}
295
296void TAB::setSectorSize(uint32_t sectorSize_) {
297 this->sectorSize = sectorSize_;
298}
299
300uint32_t TAB::hashFilePath(const std::string& filepath) {
301 auto cleanPath = filepath;
302 string::normalizeSlashes(cleanPath);
303 string::toLower(cleanPath);
304 cleanPath = string::trim(std::filesystem::path{cleanPath}.filename().string());
305
306 std::vector<std::byte> buffer;
307 buffer.resize(TAB_FILENAME_MAX_SIZE);
308 std::memset(buffer.data(), 0, TAB_FILENAME_MAX_SIZE);
309 std::memcpy(buffer.data(), cleanPath.c_str(), cleanPath.size());
310
311 buffer.push_back(std::byte{0x80});
312 if (buffer.size() % 64 < 56) {
313 buffer.resize(buffer.size() + (56 - buffer.size() % 64));
314 } else if (buffer.size() % 64 > 56) {
315 buffer.resize(buffer.size() + (64 - buffer.size() % 64) + 56);
316 }
317 for (int i = 7; i >= 0; i--) {
318 buffer.push_back(static_cast<std::byte>(static_cast<uint64_t>(TAB_FILENAME_MAX_SIZE) * 8 >> i * 8 & 0xff));
319 }
320
321 for (std::span bufferU32{reinterpret_cast<uint32_t*>(buffer.data()), buffer.size() / sizeof(uint32_t)}; auto& uint : bufferU32) {
322 BufferStream::swap_endian(&uint);
323 }
324 hash_state sha1;
325 sha1_init(&sha1);
326 sha1_process(&sha1, reinterpret_cast<const unsigned char*>(buffer.data()), buffer.size());
327 BufferStream::swap_endian(&sha1.sha1.state[0]);
328 return sha1.sha1.state[0];
329}
This class represents the metadata that a file has inside a PackFile.
Definition Entry.h:14
uint64_t offset
Offset, format-specific meaning - 0 if unused, or if the offset genuinely is 0.
Definition Entry.h:33
uint32_t archiveIndex
Which external archive this entry is in.
Definition Entry.h:23
uint32_t crc32
CRC32 checksum - 0 if unused.
Definition Entry.h:40
uint64_t length
Length in bytes (in formats with compression, this is the uncompressed length).
Definition Entry.h:26
EntryCallbackBase< void > EntryCallback
Definition PackFile.h:38
void mergeUnbakedEntries()
Definition PackFile.cpp:685
std::optional< Entry > findEntry(const std::string &path_, bool includeUnbaked=true) const
Try to find an entry given the file path.
Definition PackFile.cpp:172
std::string fullFilePath
Definition PackFile.h:231
EntryTrie entries
Definition PackFile.h:232
void runForAllEntriesInternal(const std::function< void(const std::string &, Entry &)> &operation, bool includeUnbaked=true)
Definition PackFile.cpp:571
std::string getFilestem() const
/home/user/pak01_dir.vpk -> pak01_dir
Definition PackFile.cpp:630
bool bake()
If output folder is an empty string, it will overwrite the original.
Definition PackFile.cpp:369
std::string getFilename() const
/home/user/pak01_dir.vpk -> pak01_dir.vpk
Definition PackFile.cpp:621
std::string getBakeOutputDir(const std::string &outputDir) const
Definition PackFile.cpp:670
void setFullFilePath(const std::string &outputDir)
Definition PackFile.cpp:701
std::string cleanEntryPath(const std::string &path) const
Definition PackFile.cpp:706
static Entry createNewEntry()
Definition PackFile.cpp:715
std::string_view getFilepath() const
/home/user/pak01_dir.vpk
Definition PackFile.cpp:613
static std::optional< std::vector< std::byte > > readUnbakedEntry(const Entry &entry)
Definition PackFile.cpp:719
static std::unique_ptr< PackFile > open(const std::string &path, const EntryCallback &callback=nullptr)
Open a TAB file.
Definition TAB.cpp:39
void setVersion(Version version_)
Definition TAB.cpp:288
uint32_t getSectorSize() const
Definition TAB.cpp:292
uint32_t numArchives
Definition TAB.h:59
uint32_t sectorSize
Definition TAB.h:58
void addEntryInternal(Entry &entry, const std::string &path, std::vector< std::byte > &buffer, EntryOptions options) override
Definition TAB.cpp:149
void setSectorSize(uint32_t sectorSize_)
Definition TAB.cpp:296
Attribute getSupportedEntryAttributes() const override
Returns a list of supported entry attributes Mostly for GUI programs that show entries and their meta...
Definition TAB.cpp:275
std::optional< std::vector< std::byte > > readEntry(const std::string &path_) const override
Try to read the entry's data to a bytebuffer.
Definition TAB.cpp:130
static uint32_t hashFilePath(const std::string &filepath)
Definition TAB.cpp:300
Version version
Definition TAB.h:57
Version
Definition TAB.h:23
Version getVersion() const
Definition TAB.cpp:284
static std::unique_ptr< PackFile > create(const std::string &path, Version version=Version::JC1_LE, uint32_t sectorSize=2048)
Create a TAB file.
Definition TAB.cpp:27
bool writeFileText(const std::filesystem::path &filepath, std::string_view text)
Definition FS.cpp:46
constexpr uint16_t paddingForAlignment(uint16_t alignment, uint64_t n)
Definition Math.h:64
void normalizeSlashes(std::string &path, bool stripSlashPrefix=false, bool stripSlashSuffix=true)
Definition String.cpp:227
void trim(std::string &s)
Definition String.cpp:91
void toLower(std::string &input)
Definition String.cpp:167
std::string encodeHex(std::span< const std::byte > hex)
Definition String.cpp:281
constexpr std::string_view ARC_EXTENSION
Definition TAB.h:16
constexpr uint32_t ARC_CHUNK_SIZE
Chunk size in bytes (1gb).
Definition TAB.h:19
constexpr auto TAB_FILENAME_MAX_SIZE
Definition TAB.h:13
Attribute
Definition Attribute.h:7
constexpr std::string_view TAB_HASHED_FILEPATH_PREFIX
Definition TAB.h:14