Wrum, wrum, wrum. Java is awesome at parsing binary files.
parent
0252245893
commit
c3df7b0c97
|
@ -0,0 +1,23 @@
|
|||
/*
|
||||
* To change this template, choose Tools | Templates
|
||||
* and open the template in the editor.
|
||||
*/
|
||||
package mobireader;
|
||||
|
||||
import nl.flotsam.preon.annotation.BoundNumber;
|
||||
|
||||
/**
|
||||
*
|
||||
* @author att
|
||||
*/
|
||||
public class EXTHHeader {
|
||||
//headerfmt = '>III'
|
||||
@BoundNumber(size="32")
|
||||
long identifier;
|
||||
@BoundNumber(size="32")
|
||||
long headerLength;
|
||||
@BoundNumber(size="32")
|
||||
long recordCount;
|
||||
|
||||
|
||||
}
|
|
@ -0,0 +1,29 @@
|
|||
/*
|
||||
* To change this template, choose Tools | Templates
|
||||
* and open the template in the editor.
|
||||
*/
|
||||
package mobireader;
|
||||
|
||||
import nl.flotsam.preon.annotation.BoundNumber;
|
||||
import nl.flotsam.preon.annotation.BoundString;
|
||||
|
||||
/**
|
||||
*
|
||||
* @author att
|
||||
*/
|
||||
public class Header {
|
||||
public @BoundString(size="32") String name;
|
||||
@BoundNumber int attributes;
|
||||
@BoundNumber int version;
|
||||
@BoundNumber int created;
|
||||
@BoundNumber int modified;
|
||||
@BoundNumber int backup;
|
||||
@BoundNumber int modnum;
|
||||
@BoundNumber int appInfoId;
|
||||
@BoundNumber int sortInfoId;
|
||||
@BoundString(size="4") String type;
|
||||
@BoundString(size="4") String creator;
|
||||
@BoundNumber int uniqueIDseed;
|
||||
@BoundNumber int nextRecordListId;
|
||||
@BoundNumber int number_of_records;
|
||||
}
|
|
@ -0,0 +1,172 @@
|
|||
/*
|
||||
* To change this template, choose Tools | Templates
|
||||
* and open the template in the editor.
|
||||
*/
|
||||
package mobireader;
|
||||
|
||||
import com.google.common.io.Files;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOError;
|
||||
import java.io.IOException;
|
||||
import java.io.ObjectInputStream;
|
||||
import java.util.HashMap;
|
||||
import nl.flotsam.preon.Codec;
|
||||
import nl.flotsam.preon.Codecs;
|
||||
import nl.flotsam.preon.DecodingException;
|
||||
import nl.flotsam.preon.annotation.BoundNumber;
|
||||
import nl.flotsam.preon.annotation.BoundString;
|
||||
/**
|
||||
*
|
||||
* @author att
|
||||
*/
|
||||
|
||||
|
||||
|
||||
public class Mobi {
|
||||
File file;
|
||||
int offset = 0;
|
||||
String contents;
|
||||
Header header;
|
||||
public Mobi(String filename)
|
||||
{
|
||||
try {
|
||||
this.file = new File(filename);
|
||||
}
|
||||
catch (IOError e) {
|
||||
throw e;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void parse() throws IOException
|
||||
{
|
||||
byte compressed [] = Files.toByteArray(this.file);
|
||||
this.contents = new String(compressed);
|
||||
this.header = parseHeader();
|
||||
//this.records = self.parseRecordInfoList();
|
||||
//this..readRecord0()
|
||||
}
|
||||
|
||||
public int calcsize(String headerFormat)
|
||||
{
|
||||
int size = 0;
|
||||
boolean is_number = false;
|
||||
String number = "";
|
||||
String type = "";
|
||||
for(int i = 0; i < headerFormat.length(); i++)
|
||||
{
|
||||
char c = headerFormat.charAt(i);
|
||||
if(Character.isDigit(c))
|
||||
{
|
||||
number += c;
|
||||
is_number = true;
|
||||
}
|
||||
else if (is_number){
|
||||
size += addNumberOfBytes(Integer.parseInt(number), c);
|
||||
is_number = false;
|
||||
number = "";
|
||||
}
|
||||
else {
|
||||
size += addNumberOfBytes(1, c);
|
||||
}
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
int addNumberOfBytes(int n, char c)
|
||||
{
|
||||
int base;
|
||||
|
||||
switch (c) {
|
||||
case 'c': base = 1;
|
||||
break;
|
||||
case 's': base = 1;
|
||||
break;
|
||||
case 'b': base = 1;
|
||||
break;
|
||||
case 'h': base = 2;
|
||||
break;
|
||||
case 'H': base = 2;
|
||||
break;
|
||||
case 'i': base = 4;
|
||||
break;
|
||||
case 'I': base = 4;
|
||||
break;
|
||||
case 'l': base = 4;
|
||||
break;
|
||||
case 'L': base = 4;
|
||||
break;
|
||||
case 'f': base = 4;
|
||||
break;
|
||||
case 'd': base = 8;
|
||||
break;
|
||||
default: base = 0;
|
||||
break;
|
||||
}
|
||||
return base * n;
|
||||
}
|
||||
|
||||
Header parseHeader(){
|
||||
String headerfmt = "32shhIIIIII4s4sIIH";
|
||||
int headerlen = calcsize(headerfmt);
|
||||
String headerData = this.contents.substring(this.offset,
|
||||
this.offset+headerlen);
|
||||
|
||||
Header parsedHeader = new Header(); //createHeaderBasedOn(headerData);
|
||||
|
||||
/*
|
||||
# unpack header, zip up into list of tuples
|
||||
results = zip(fields, unpack(headerfmt, self.contents[self.offset:self.offset+headerlen]))
|
||||
|
||||
# increment offset into file
|
||||
this.offset += headerlen;
|
||||
|
||||
# convert tuple array to dictionary
|
||||
resultsDict = utils.toDict(results);
|
||||
*/
|
||||
return parsedHeader;
|
||||
}
|
||||
|
||||
public Header createHeaderBasedOn(File file)
|
||||
{
|
||||
Header headerFromText;
|
||||
try {
|
||||
Codec<Header> codec = Codecs.create(Header.class);
|
||||
headerFromText = Codecs.decode(codec, file);
|
||||
}
|
||||
catch( IOException e ){
|
||||
System.out.println(e.getCause());
|
||||
headerFromText = new Header();
|
||||
}
|
||||
catch (DecodingException e)
|
||||
{
|
||||
System.out.println(e.getCause());
|
||||
headerFromText = new Header();
|
||||
}
|
||||
return headerFromText;
|
||||
}
|
||||
/*
|
||||
def readRecord(self, recordnum, disable_compression=False):
|
||||
if self.config:
|
||||
if self.config['palmdoc']['Compression'] == 1 or disable_compression:
|
||||
return self.contents[self.records[recordnum]['record Data Offset']:self.records[recordnum+1]['record Data Offset']];
|
||||
elif self.config['palmdoc']['Compression'] == 2:
|
||||
result = uncompress_lz77(self.contents[self.records[recordnum]['record Data Offset']:self.records[recordnum+1]['record Data Offset']-self.config['mobi']['extra bytes']])
|
||||
return result
|
||||
|
||||
def readImageRecord(self, imgnum):
|
||||
if self.config:
|
||||
recordnum = self.config['mobi']['First Image index'] + imgnum;
|
||||
return self.readRecord(recordnum, disable_compression=True);
|
||||
|
||||
def author(self):
|
||||
"Returns the author of the book"
|
||||
return self.config['exth']['records'][100]
|
||||
|
||||
def title(self):
|
||||
"Returns the title of the book"
|
||||
return self.config['mobi']['Full Name']
|
||||
*/
|
||||
}
|
|
@ -0,0 +1,92 @@
|
|||
/*
|
||||
* To change this template, choose Tools | Templates
|
||||
* and open the template in the editor.
|
||||
*/
|
||||
package mobireader;
|
||||
|
||||
import nl.flotsam.preon.annotation.BoundNumber;
|
||||
import nl.flotsam.preon.annotation.BoundString;
|
||||
|
||||
/**
|
||||
*
|
||||
* @author att
|
||||
*/
|
||||
public class MobiHeader {
|
||||
//headerfmt = '> IIII II 40s III IIIII IIII I 36s IIII 8s HHIIIII'
|
||||
@BoundNumber(size="32")
|
||||
long identifier;
|
||||
@BoundNumber(size="32")
|
||||
long header_length;
|
||||
@BoundNumber(size="32")
|
||||
long Mobi_type;
|
||||
@BoundNumber(size="32")
|
||||
long text_Encoding;
|
||||
|
||||
@BoundNumber(size="32")
|
||||
long Unique_ID;
|
||||
@BoundNumber(size="32")
|
||||
long Generator_version;
|
||||
|
||||
@BoundString(size="40")
|
||||
String Reserved;
|
||||
|
||||
@BoundNumber(size="32")
|
||||
long FirstNonBookIndex;
|
||||
@BoundNumber(size="32")
|
||||
long FullNameOffset;
|
||||
@BoundNumber(size="32")
|
||||
long FullNameLength;
|
||||
|
||||
@BoundNumber(size="32")
|
||||
long Language;
|
||||
@BoundNumber(size="32")
|
||||
long InputLanguage;
|
||||
@BoundNumber(size="32")
|
||||
long OutputLanguage;
|
||||
@BoundNumber(size="32")
|
||||
long FormatVersion;
|
||||
@BoundNumber(size="32")
|
||||
long FirstImageIndex;
|
||||
|
||||
@BoundNumber(size="32")
|
||||
long FirstHuffRecord;
|
||||
@BoundNumber(size="32")
|
||||
long HuffRecordCount;
|
||||
@BoundNumber(size="32")
|
||||
long FirstDATPRecord;
|
||||
@BoundNumber(size="32")
|
||||
long DATPRecordCount;
|
||||
|
||||
@BoundNumber(size="32")
|
||||
long EXTHFlags;
|
||||
|
||||
@BoundString(size="36")
|
||||
String unknown36Bytes;
|
||||
|
||||
@BoundNumber(size="32")
|
||||
long DRMOffset;
|
||||
@BoundNumber(size="32")
|
||||
long DRMCount;
|
||||
@BoundNumber(size="32")
|
||||
long DRMSize;
|
||||
@BoundNumber(size="32")
|
||||
long DRMFlags;
|
||||
|
||||
@BoundString(size="8")
|
||||
String unknown8Bytes;
|
||||
|
||||
@BoundNumber(size="16")
|
||||
int Unknown1;
|
||||
@BoundNumber(size="16")
|
||||
int LastImageRecord;
|
||||
@BoundNumber(size="32")
|
||||
long Unknown2;
|
||||
@BoundNumber(size="32")
|
||||
long FCISRecord;
|
||||
@BoundNumber(size="32")
|
||||
long Unknown3;
|
||||
@BoundNumber(size="32")
|
||||
long FLISRecord;
|
||||
@BoundNumber(size="32")
|
||||
long Unknown4;
|
||||
}
|
|
@ -34,5 +34,6 @@ public class MobiReader extends Application {
|
|||
|
||||
public static void main(String[] args) {
|
||||
launch(args);
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,30 @@
|
|||
/*
|
||||
* To change this template, choose Tools | Templates
|
||||
* and open the template in the editor.
|
||||
*/
|
||||
package mobireader;
|
||||
|
||||
import nl.flotsam.preon.annotation.BoundNumber;
|
||||
|
||||
/**
|
||||
*
|
||||
* @author att
|
||||
*/
|
||||
public class PalmDocHeader {
|
||||
//headerfmt = '>HHIHHHH'
|
||||
@BoundNumber(size="16")
|
||||
int Compression;
|
||||
@BoundNumber(size="16")
|
||||
int Unused;
|
||||
@BoundNumber(size="32")
|
||||
long textLength;
|
||||
@BoundNumber(size="16")
|
||||
int recordCount;
|
||||
@BoundNumber(size="16")
|
||||
int recordSize;
|
||||
@BoundNumber(size="16")
|
||||
int encryptionType;
|
||||
@BoundNumber(size="16")
|
||||
int unknown;
|
||||
|
||||
}
|
|
@ -0,0 +1,289 @@
|
|||
/*
|
||||
* The MIT License
|
||||
*
|
||||
* Copyright (c) 2009 Olle Törnström studiomediatech.com
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*
|
||||
* CREDIT: Initially implemented by Diogo Kollross and made publicly available
|
||||
* on the website http://www.geocities.com/diogok_br/lz77. Edited here
|
||||
* to provide two flavours for JavaScript usage, either as standalone
|
||||
* compressor/decompressor or as class for copy/paste use.
|
||||
*/
|
||||
|
||||
/**
|
||||
* This class provides simple LZ77 compression and decompression.
|
||||
*
|
||||
* USAGE: Place in your own project package of choice adding preferred package
|
||||
* setting.
|
||||
*
|
||||
* @author Olle Törnström olle[at]studiomediatech[dot]com
|
||||
* @created 2009-02-18
|
||||
*/
|
||||
package unzipping;
|
||||
|
||||
public class LZ77 {
|
||||
|
||||
private char referencePrefix;
|
||||
private int referenceIntBase;
|
||||
private int referenceIntFloorCode;
|
||||
private int referenceIntCeilCode;
|
||||
private int maxStringDistance;
|
||||
private int minStringLength;
|
||||
private int maxStringLength;
|
||||
private int defaultWindowLength;
|
||||
private int maxWindowLength;
|
||||
|
||||
// CONSTRUCTOR
|
||||
|
||||
public LZ77() {
|
||||
|
||||
referencePrefix = '`';
|
||||
referenceIntBase = 96;
|
||||
referenceIntFloorCode = (int) ' ';
|
||||
referenceIntCeilCode = referenceIntFloorCode + referenceIntBase;
|
||||
maxStringDistance = (int) Math.pow(referenceIntBase, 2) - 1;
|
||||
minStringLength = 5;
|
||||
maxStringLength = (int) Math.pow(referenceIntBase, 1) - 1
|
||||
+ minStringLength;
|
||||
defaultWindowLength = 144;
|
||||
maxWindowLength = maxStringDistance + minStringLength;
|
||||
}
|
||||
|
||||
|
||||
// LAZY STATIC METHODS - ADDED BY: DAN!
|
||||
public static String compressStr(String data) {
|
||||
LZ77 lz = new LZ77();
|
||||
return lz.compress(data, null);
|
||||
}
|
||||
public static String decompressStr(String data) {
|
||||
LZ77 lz = new LZ77();
|
||||
return lz.decompress(data);
|
||||
}
|
||||
|
||||
// PUBLIC METHODS
|
||||
|
||||
/**
|
||||
* Compress string data using the LZ77 algorithm.
|
||||
*
|
||||
* @param data
|
||||
* String data to compress
|
||||
* @return LZ77 compressed string
|
||||
*/
|
||||
public String compress(String data) {
|
||||
|
||||
return compress(data, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Compress string data using the LZ77 algorithm.
|
||||
*
|
||||
* @param data
|
||||
* String data to compress
|
||||
* @param windowLength
|
||||
* Optional window length
|
||||
* @return LZ77 compressed string
|
||||
*/
|
||||
public String compress(String data, Integer windowLength) {
|
||||
|
||||
if (windowLength == null)
|
||||
windowLength = defaultWindowLength;
|
||||
|
||||
if (windowLength > maxWindowLength)
|
||||
throw new IllegalArgumentException("Window length too large");
|
||||
|
||||
String compressed = "";
|
||||
|
||||
int pos = 0;
|
||||
int lastPos = data.length() - minStringLength;
|
||||
|
||||
while (pos < lastPos) {
|
||||
|
||||
int searchStart = Math.max(pos - windowLength, 0);
|
||||
int matchLength = minStringLength;
|
||||
boolean foundMatch = false;
|
||||
int bestMatchDistance = maxStringDistance;
|
||||
int bestMatchLength = 0;
|
||||
String newCompressed = null;
|
||||
|
||||
while ((searchStart + matchLength) < pos) {
|
||||
|
||||
int sourceWindowEnd = Math.min(searchStart + matchLength, data
|
||||
.length());
|
||||
|
||||
int targetWindowEnd = Math
|
||||
.min(pos + matchLength, data.length());
|
||||
|
||||
String m1 = data.substring(searchStart, sourceWindowEnd);
|
||||
String m2 = data.substring(pos, targetWindowEnd);
|
||||
|
||||
boolean isValidMatch = m1.equals(m2)
|
||||
&& matchLength < maxStringLength;
|
||||
|
||||
if (isValidMatch) {
|
||||
|
||||
matchLength++;
|
||||
foundMatch = true;
|
||||
|
||||
} else {
|
||||
|
||||
int realMatchLength = matchLength - 1;
|
||||
|
||||
if (foundMatch && (realMatchLength > bestMatchLength)) {
|
||||
bestMatchDistance = pos - searchStart - realMatchLength;
|
||||
bestMatchLength = realMatchLength;
|
||||
}
|
||||
|
||||
matchLength = minStringLength;
|
||||
searchStart++;
|
||||
foundMatch = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (bestMatchLength != 0) {
|
||||
|
||||
newCompressed = referencePrefix
|
||||
+ encodeReferenceInt(bestMatchDistance, 2)
|
||||
+ encodeReferenceLength(bestMatchLength);
|
||||
|
||||
pos += bestMatchLength;
|
||||
|
||||
} else {
|
||||
|
||||
if (data.charAt(pos) != referencePrefix) {
|
||||
newCompressed = "" + data.charAt(pos);
|
||||
} else {
|
||||
newCompressed = "" + referencePrefix + referencePrefix;
|
||||
}
|
||||
|
||||
pos++;
|
||||
}
|
||||
compressed += newCompressed;
|
||||
}
|
||||
|
||||
return compressed + data.substring(pos).replaceAll("/`/g", "``");
|
||||
}
|
||||
|
||||
public String decompress(String data) {
|
||||
|
||||
String decompressed = "";
|
||||
int pos = 0;
|
||||
|
||||
while (pos < data.length()) {
|
||||
|
||||
char currentChar = data.charAt(pos);
|
||||
|
||||
if (currentChar != referencePrefix) {
|
||||
|
||||
decompressed += currentChar;
|
||||
pos++;
|
||||
|
||||
} else {
|
||||
|
||||
char nextChar = data.charAt(pos + 1);
|
||||
|
||||
if (nextChar != referencePrefix) {
|
||||
|
||||
int distance = decodeReferenceInt(data.substring(pos + 1,
|
||||
pos + 3), 2);
|
||||
|
||||
int length = decodeReferenceLength(data.substring(pos + 3,
|
||||
pos + 4));
|
||||
|
||||
int start = decompressed.length() - distance - length;
|
||||
int end = start + length;
|
||||
decompressed += decompressed.substring(start, end);
|
||||
pos += minStringLength - 1;
|
||||
|
||||
} else {
|
||||
|
||||
decompressed += referencePrefix;
|
||||
pos += 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return decompressed;
|
||||
}
|
||||
|
||||
// PRIVATE METHODS
|
||||
|
||||
private String encodeReferenceInt(int value, int width) {
|
||||
|
||||
if ((value >= 0) && (value < (Math.pow(referenceIntBase, width) - 1))) {
|
||||
|
||||
String encoded = "";
|
||||
|
||||
while (value > 0) {
|
||||
char c = (char) ((value % referenceIntBase) + referenceIntFloorCode);
|
||||
encoded = "" + c + encoded;
|
||||
value = (int) Math.floor(value / referenceIntBase);
|
||||
}
|
||||
|
||||
int missingLength = width - encoded.length();
|
||||
|
||||
for (int i = 0; i < missingLength; i++) {
|
||||
char c = (char) referenceIntFloorCode;
|
||||
encoded = "" + c + encoded;
|
||||
}
|
||||
|
||||
return encoded;
|
||||
|
||||
} else {
|
||||
|
||||
throw new IllegalArgumentException("Reference int out of range: "
|
||||
+ value + " (width = " + width + ")");
|
||||
}
|
||||
}
|
||||
|
||||
private String encodeReferenceLength(int length) {
|
||||
|
||||
return encodeReferenceInt(length - minStringLength, 1);
|
||||
}
|
||||
|
||||
private int decodeReferenceInt(String data, int width) {
|
||||
|
||||
int value = 0;
|
||||
|
||||
for (int i = 0; i < width; i++) {
|
||||
|
||||
value *= referenceIntBase;
|
||||
|
||||
int charCode = (int) data.charAt(i);
|
||||
|
||||
if ((charCode >= referenceIntFloorCode)
|
||||
&& (charCode <= referenceIntCeilCode)) {
|
||||
|
||||
value += charCode - referenceIntFloorCode;
|
||||
|
||||
} else {
|
||||
|
||||
throw new RuntimeException(
|
||||
"Invalid char code in reference int: " + charCode);
|
||||
}
|
||||
}
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
private int decodeReferenceLength(String data) {
|
||||
|
||||
return decodeReferenceInt(data, 1) + minStringLength;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,101 @@
|
|||
/*
|
||||
* To change this template, choose Tools | Templates
|
||||
* and open the template in the editor.
|
||||
*/
|
||||
package unzipping;
|
||||
|
||||
/**
|
||||
*
|
||||
* @author att
|
||||
*/
|
||||
public class lz77Unzipper {
|
||||
|
||||
}
|
||||
/*
|
||||
import struct
|
||||
# ported directly from the PalmDoc Perl library
|
||||
# http://kobesearch.cpan.org/htdocs/EBook-Tools/EBook/Tools/PalmDoc.pm.html
|
||||
|
||||
def uncompress_lz77(data):
|
||||
length = len(data);
|
||||
offset = 0; # Current offset into data
|
||||
# char; # Character being examined
|
||||
# ord; # Ordinal of $char
|
||||
# lz77; # 16-bit Lempel-Ziv 77 length-offset pair
|
||||
# lz77offset; # LZ77 offset
|
||||
# lz77length; # LZ77 length
|
||||
# lz77pos; # Position inside $lz77length
|
||||
text = ''; # Output (uncompressed) text
|
||||
# textlength; # Length of uncompressed text during LZ77 pass
|
||||
# textpos; # Position inside $text during LZ77 pass
|
||||
|
||||
while offset < length:
|
||||
# char = substr($data,$offset++,1);
|
||||
char = data[offset];
|
||||
offset += 1;
|
||||
ord_ = ord(char);
|
||||
|
||||
# print " ".join([repr(char), hex(ord_)])
|
||||
|
||||
# The long if-elsif chain is the best logic for $ord handling
|
||||
## no critic (Cascading if-elsif chain)
|
||||
if (ord_ == 0):
|
||||
# Nulls are literal
|
||||
text += char;
|
||||
elif (ord_ <= 8):
|
||||
# Next $ord bytes are literal
|
||||
text += data[offset:offset+ord_] # text .=substr($data,$offset,ord);
|
||||
offset += ord_;
|
||||
elif (ord_ <= 0x7f):
|
||||
# Values from 0x09 through 0x7f are literal
|
||||
text += char;
|
||||
elif (ord_ <= 0xbf):
|
||||
# Data is LZ77-compressed
|
||||
|
||||
# From Wikipedia:
|
||||
# "A length-distance pair is always encoded by a two-byte
|
||||
# sequence. Of the 16 bits that make up these two bytes,
|
||||
# 11 bits go to encoding the distance, 3 go to encoding
|
||||
# the length, and the remaining two are used to make sure
|
||||
# the decoder can identify the first byte as the beginning
|
||||
# of such a two-byte sequence."
|
||||
|
||||
offset += 1;
|
||||
if (offset > len(data)):
|
||||
print("WARNING: offset to LZ77 bits is outside of the data: %d" % offset);
|
||||
return text;
|
||||
|
||||
lz77, = struct.unpack('>H', data[offset-2:offset])
|
||||
|
||||
# Leftmost two bits are ID bits and need to be dropped
|
||||
lz77 &= 0x3fff;
|
||||
|
||||
# Length is rightmost 3 bits + 3
|
||||
lz77length = (lz77 & 0x0007) + 3;
|
||||
|
||||
# Remaining 11 bits are offset
|
||||
lz77offset = lz77 >> 3;
|
||||
if (lz77offset < 1):
|
||||
print("WARNING: LZ77 decompression offset is invalid!");
|
||||
return text;
|
||||
|
||||
# Getting text from the offset is a little tricky, because
|
||||
# in theory you can be referring to characters you haven't
|
||||
# actually decompressed yet. You therefore have to check
|
||||
# the reference one character at a time.
|
||||
textlength = len(text);
|
||||
for lz77pos in range(lz77length): # for($lz77pos = 0; $lz77pos < $lz77length; $lz77pos++)
|
||||
textpos = textlength - lz77offset;
|
||||
if (textpos < 0):
|
||||
print("WARNING: LZ77 decompression reference is before"+
|
||||
" beginning of text! %x" % lz77);
|
||||
return;
|
||||
|
||||
text += text[textpos:textpos+1]; #text .= substr($text,$textpos,1);
|
||||
textlength+=1;
|
||||
else:
|
||||
# 0xc0 - 0xff are single characters (XOR 0x80) preceded by
|
||||
# a space
|
||||
text += ' ' + chr(ord_ ^ 0x80);
|
||||
return text;
|
||||
*/
|
Loading…
Reference in New Issue