Optimize archive reading
This should be almost as fast as reading unencrypted files from disk now. I also don't see any possible further optimizations, so this is probably as fast as it gets.
This commit is contained in:
parent
a9fc44f79d
commit
958af38442
|
@ -148,26 +148,92 @@ RGSS_ioRead(PHYSFS_Io *self, void *buffer, PHYSFS_uint64 len)
|
||||||
{
|
{
|
||||||
RGSS_entryHandle *entry = static_cast<RGSS_entryHandle*>(self->opaque);
|
RGSS_entryHandle *entry = static_cast<RGSS_entryHandle*>(self->opaque);
|
||||||
|
|
||||||
|
PHYSFS_Io *io = entry->io;
|
||||||
|
|
||||||
uint64_t toRead = min<uint64_t>(entry->data.size - entry->currentOffset, len);
|
uint64_t toRead = min<uint64_t>(entry->data.size - entry->currentOffset, len);
|
||||||
uint64_t offs = entry->currentOffset;
|
uint64_t offs = entry->currentOffset;
|
||||||
|
|
||||||
entry->io->seek(entry->io, entry->data.offset + offs);
|
io->seek(io, entry->data.offset + offs);
|
||||||
|
|
||||||
uint64_t buffI = 0;
|
/* We divide up the bytes to be read in 3 categories:
|
||||||
for (uint64_t o = offs; o < offs + toRead;)
|
*
|
||||||
|
* preAlign: If the current read address is not dword
|
||||||
|
* aligned, this is the number of bytes to read til
|
||||||
|
* we reach alignment again (therefore can only be
|
||||||
|
* 3 or less).
|
||||||
|
*
|
||||||
|
* align: The number of aligned dwords we can read
|
||||||
|
* times 4 (= number of bytes).
|
||||||
|
*
|
||||||
|
* postAlign: The number of bytes to read after the
|
||||||
|
* last aligned dword. Always 3 or less.
|
||||||
|
*
|
||||||
|
* Treating the pre- and post aligned reads specially,
|
||||||
|
* we can read all aligned dwords in one syscall directly
|
||||||
|
* into the write buffer and then run the xor chain on
|
||||||
|
* it afterwards. */
|
||||||
|
|
||||||
|
uint8_t preAlign = 4 - (offs % 4);
|
||||||
|
|
||||||
|
if (preAlign == 4)
|
||||||
|
preAlign = 0;
|
||||||
|
else
|
||||||
|
preAlign = min<uint64_t>(preAlign, len);
|
||||||
|
|
||||||
|
uint8_t postAlign = (len > preAlign) ? (offs + len) % 4 : 0;
|
||||||
|
|
||||||
|
uint64_t align = len - (preAlign + postAlign);
|
||||||
|
|
||||||
|
/* Byte buffer pointer */
|
||||||
|
uint8_t *bBufferP = static_cast<uint8_t*>(buffer);
|
||||||
|
|
||||||
|
if (preAlign > 0)
|
||||||
{
|
{
|
||||||
uint8_t bitOffset = (0x8 * (o % 4));
|
uint32_t dword;
|
||||||
uint8_t magicByte = (entry->currentMagic >> bitOffset) & 0xFF;
|
io->read(io, &dword, preAlign);
|
||||||
|
|
||||||
uint8_t byte;
|
/* Need to align the bytes with the
|
||||||
entry->io->read(entry->io, &byte, 1);
|
* magic before xoring */
|
||||||
|
dword <<= 8 * (offs % 4);
|
||||||
|
dword ^= entry->currentMagic;
|
||||||
|
|
||||||
((uint8_t*) buffer)[buffI++] = byte ^ magicByte;
|
/* Shift them back to normal */
|
||||||
|
dword >>= 8 * (offs % 4);
|
||||||
|
memcpy(bBufferP, &dword, preAlign);
|
||||||
|
|
||||||
if (++o % 4 == 0)
|
bBufferP += preAlign;
|
||||||
|
|
||||||
|
/* Only advance the magic if we actually
|
||||||
|
* reached the next alignment */
|
||||||
|
if ((offs+preAlign) % 4 == 0)
|
||||||
advanceMagic(entry->currentMagic);
|
advanceMagic(entry->currentMagic);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (align > 0)
|
||||||
|
{
|
||||||
|
/* Double word buffer pointer */
|
||||||
|
uint32_t *dwBufferP = reinterpret_cast<uint32_t*>(bBufferP);
|
||||||
|
|
||||||
|
/* Read aligned dwords in one go */
|
||||||
|
io->read(io, bBufferP, align);
|
||||||
|
|
||||||
|
/* Then xor them */
|
||||||
|
for (uint64_t i = 0; i < (align / 4); ++i)
|
||||||
|
dwBufferP[i] ^= advanceMagic(entry->currentMagic);
|
||||||
|
|
||||||
|
bBufferP += align;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (postAlign > 0)
|
||||||
|
{
|
||||||
|
uint32_t dword;
|
||||||
|
io->read(io, &dword, postAlign);
|
||||||
|
|
||||||
|
/* Bytes are already aligned with magic */
|
||||||
|
dword ^= entry->currentMagic;
|
||||||
|
memcpy(bBufferP, &dword, postAlign);
|
||||||
|
}
|
||||||
|
|
||||||
entry->currentOffset += toRead;
|
entry->currentOffset += toRead;
|
||||||
|
|
||||||
return toRead;
|
return toRead;
|
||||||
|
@ -191,10 +257,10 @@ RGSS_ioSeek(PHYSFS_Io *self, PHYSFS_uint64 offset)
|
||||||
entry->currentMagic = entry->data.startMagic;
|
entry->currentMagic = entry->data.startMagic;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* For each 4 bytes sought, advance magic */
|
/* For each overstepped alignment, advance magic */
|
||||||
uint64_t currentDword = entry->currentOffset / 4;
|
uint64_t currentDword = entry->currentOffset / 4;
|
||||||
uint64_t soughtDword = offset / 4;
|
uint64_t targetDword = offset / 4;
|
||||||
uint64_t dwordsSought = soughtDword - currentDword;
|
uint64_t dwordsSought = targetDword - currentDword;
|
||||||
|
|
||||||
for (uint64_t i = 0; i < dwordsSought; ++i)
|
for (uint64_t i = 0; i < dwordsSought; ++i)
|
||||||
advanceMagic(entry->currentMagic);
|
advanceMagic(entry->currentMagic);
|
||||||
|
|
Loading…
Reference in New Issue