Optimize archive reading
This should be almost as fast as reading unencrypted files from disk now. I also don't see any possible further optimizations, so this is probably as fast as it gets.
This commit is contained in:
		
							parent
							
								
									a9fc44f79d
								
							
						
					
					
						commit
						958af38442
					
				
					 1 changed files with 78 additions and 12 deletions
				
			
		| 
						 | 
					@ -148,26 +148,92 @@ RGSS_ioRead(PHYSFS_Io *self, void *buffer, PHYSFS_uint64 len)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	RGSS_entryHandle *entry = static_cast<RGSS_entryHandle*>(self->opaque);
 | 
						RGSS_entryHandle *entry = static_cast<RGSS_entryHandle*>(self->opaque);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						PHYSFS_Io *io = entry->io;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	uint64_t toRead = min<uint64_t>(entry->data.size - entry->currentOffset, len);
 | 
						uint64_t toRead = min<uint64_t>(entry->data.size - entry->currentOffset, len);
 | 
				
			||||||
	uint64_t offs = entry->currentOffset;
 | 
						uint64_t offs = entry->currentOffset;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	entry->io->seek(entry->io, entry->data.offset + offs);
 | 
						io->seek(io, entry->data.offset + offs);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	uint64_t buffI = 0;
 | 
						/* We divide up the bytes to be read in 3 categories:
 | 
				
			||||||
	for (uint64_t o = offs; o < offs + toRead;)
 | 
						 *
 | 
				
			||||||
 | 
						 * preAlign: If the current read address is not dword
 | 
				
			||||||
 | 
						 *   aligned, this is the number of bytes to read til
 | 
				
			||||||
 | 
						 *   we reach alignment again (therefore can only be
 | 
				
			||||||
 | 
						 *   3 or less).
 | 
				
			||||||
 | 
						 *
 | 
				
			||||||
 | 
						 * align: The number of aligned dwords we can read
 | 
				
			||||||
 | 
						 *   times 4 (= number of bytes).
 | 
				
			||||||
 | 
						 *
 | 
				
			||||||
 | 
						 * postAlign: The number of bytes to read after the
 | 
				
			||||||
 | 
						 *   last aligned dword. Always 3 or less.
 | 
				
			||||||
 | 
						 *
 | 
				
			||||||
 | 
						 * Treating the pre- and post aligned reads specially,
 | 
				
			||||||
 | 
						 * we can read all aligned dwords in one syscall directly
 | 
				
			||||||
 | 
						 * into the write buffer and then run the xor chain on
 | 
				
			||||||
 | 
						 * it afterwards. */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						uint8_t preAlign = 4 - (offs % 4);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (preAlign == 4)
 | 
				
			||||||
 | 
							preAlign = 0;
 | 
				
			||||||
 | 
						else
 | 
				
			||||||
 | 
							preAlign = min<uint64_t>(preAlign, len);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						uint8_t postAlign = (len > preAlign) ? (offs + len) % 4 : 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						uint64_t align = len - (preAlign + postAlign);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Byte buffer pointer */
 | 
				
			||||||
 | 
						uint8_t *bBufferP = static_cast<uint8_t*>(buffer);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (preAlign > 0)
 | 
				
			||||||
	{
 | 
						{
 | 
				
			||||||
		uint8_t bitOffset = (0x8 * (o % 4));
 | 
							uint32_t dword;
 | 
				
			||||||
		uint8_t magicByte = (entry->currentMagic >> bitOffset) & 0xFF;
 | 
							io->read(io, &dword, preAlign);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		uint8_t byte;
 | 
							/* Need to align the bytes with the
 | 
				
			||||||
		entry->io->read(entry->io, &byte, 1);
 | 
							 * magic before xoring */
 | 
				
			||||||
 | 
							dword <<= 8 * (offs % 4);
 | 
				
			||||||
 | 
							dword ^= entry->currentMagic;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		((uint8_t*) buffer)[buffI++] = byte ^ magicByte;
 | 
							/* Shift them back to normal */
 | 
				
			||||||
 | 
							dword >>= 8 * (offs % 4);
 | 
				
			||||||
 | 
							memcpy(bBufferP, &dword, preAlign);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		if (++o % 4 == 0)
 | 
							bBufferP += preAlign;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							/* Only advance the magic if we actually
 | 
				
			||||||
 | 
							 * reached the next alignment */
 | 
				
			||||||
 | 
							if ((offs+preAlign) % 4 == 0)
 | 
				
			||||||
			advanceMagic(entry->currentMagic);
 | 
								advanceMagic(entry->currentMagic);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (align > 0)
 | 
				
			||||||
 | 
						{
 | 
				
			||||||
 | 
							/* Double word buffer pointer */
 | 
				
			||||||
 | 
							uint32_t *dwBufferP = reinterpret_cast<uint32_t*>(bBufferP);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							/* Read aligned dwords in one go */
 | 
				
			||||||
 | 
							io->read(io, bBufferP, align);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							/* Then xor them */
 | 
				
			||||||
 | 
							for (uint64_t i = 0; i < (align / 4); ++i)
 | 
				
			||||||
 | 
								dwBufferP[i] ^= advanceMagic(entry->currentMagic);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							bBufferP += align;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (postAlign > 0)
 | 
				
			||||||
 | 
						{
 | 
				
			||||||
 | 
							uint32_t dword;
 | 
				
			||||||
 | 
							io->read(io, &dword, postAlign);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							/* Bytes are already aligned with magic */
 | 
				
			||||||
 | 
							dword ^= entry->currentMagic;
 | 
				
			||||||
 | 
							memcpy(bBufferP, &dword, postAlign);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	entry->currentOffset += toRead;
 | 
						entry->currentOffset += toRead;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return toRead;
 | 
						return toRead;
 | 
				
			||||||
| 
						 | 
					@ -191,10 +257,10 @@ RGSS_ioSeek(PHYSFS_Io *self, PHYSFS_uint64 offset)
 | 
				
			||||||
		entry->currentMagic = entry->data.startMagic;
 | 
							entry->currentMagic = entry->data.startMagic;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* For each 4 bytes sought, advance magic */
 | 
						/* For each overstepped alignment, advance magic */
 | 
				
			||||||
	uint64_t currentDword = entry->currentOffset / 4;
 | 
						uint64_t currentDword = entry->currentOffset / 4;
 | 
				
			||||||
	uint64_t soughtDword  = offset / 4;
 | 
						uint64_t targetDword  = offset / 4;
 | 
				
			||||||
	uint64_t dwordsSought = soughtDword - currentDword;
 | 
						uint64_t dwordsSought = targetDword - currentDword;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	for (uint64_t i = 0; i < dwordsSought; ++i)
 | 
						for (uint64_t i = 0; i < dwordsSought; ++i)
 | 
				
			||||||
		advanceMagic(entry->currentMagic);
 | 
							advanceMagic(entry->currentMagic);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue