#define unused __attribute__((unused)) #include "Flips.h" //Module name: Floating IPS, shared core for all frontends //Author: Alcaro //Date: See Git history //Licence: GPL v3.0 or higher #ifdef __MINGW32__ //get rid of dependencies on libstdc++, they waste 200KB on this platform void* operator new(size_t n) { return malloc(n); } // forget allocation failures, let them segfault. void operator delete(void * p) { free(p); } void operator delete(void * p, size_t n) { free(p); } extern "C" void __cxa_pure_virtual() { abort(); } #if __GNUC__ && (__cpp_rtti || __cpp_exceptions) #warning "Consider building with -fno-exceptions -fno-rtti, to avoid dependencies on libgcc_s_sjlj-1.dll and libstdc++-6.dll." #endif #endif //TODO: source ROM chooser //given a target, from all known source ROMs with same extension, find the most similar // read 1MB from each; check how many bytes are same and same location as target // (only counting offsets where both source and target bytes are different from previous, to avoid false positives on long runs of 00) // if one is >80% same (not counting repeated runs), and all others are <20%, use that //if no match, multiple matches, or one or more half-matches: // read first 64K from each of the listed sources, create BPS, and see if one is <20% of size of bps with blank source, while others are >70% //if still nothing unambiguous: // fail //this goes on a separate thread //TODO: more manual GUI // has three text fields, for patch / source / target, and 'pick' buttons nearby that open file dialogs // also Create / Apply / ApplyRun buttons at the bottom //auto selection (only if target field is blank): // patch -> source (crc32 database) // target -> source (algorithm above) // target -> patch (set extension to bps) // (source&patch) -> target (source extension, name/path from patch; also happens if triggering patch->source) //all text fields are scrolled to the end on creation //no auto selection in simple UI //applying or creating clears patch/target, but keeps source //ips creation is allowed, but only by changing filetype in picker, or editing text field; it does not keep last used type // ips application remains unchanged // multi-patching is allowed, each filename is wrapped in <>; if filename doesn't start or end with right char, it's a single name // colon-separated or semicolon-separated are more common, but they're plausible (uncommon, but possible) in filenames // (Super Mario World 2: Yoshi's Island, or Marios;Gate, for example), and I don't want to fail on that // multi-creation is not allowed; technically feasible, but super rare, should probably be CLI instead // if first file isn't found, it tries using the entire field as filename, in case it actually contains semicolon //TODO: delete struct mem ReadWholeFile(LPCWSTR filename) { return file::read(filename); } bool WriteWholeFile(LPCWSTR filename, struct mem data) { return filewrite::write(filename, data); } bool WriteWholeFileWithHeader(LPCWSTR filename, struct mem header, struct mem data) { filewrite* f = filewrite::create(filename); if (!f) return false; bool ret = (f->append(header.ptr, 512) && f->append(data.ptr, data.len)); // do not use header.len, that'd prepend the entire file delete f; return ret; } void FreeFileMemory(struct mem mem) { free(mem.ptr); } class file_libc : public file { size_t size; FILE* io; public: static file* create(const char * filename) { FILE* f = fopen(filename, "rb"); if (!f) return NULL; return new file_libc(f); } private: file_libc(FILE* io) : io(io) { fseek(io, 0, SEEK_END); size = ftell(io); } public: size_t len() { return size; } bool read(uint8_t* target, size_t start, size_t len) { fseek(io, start, SEEK_SET); return (fread(target, 1,len, io) == len); } ~file_libc() { fclose(io); } }; file* file::create_libc(const char * filename) { return file_libc::create(filename); } bool file::exists_libc(const char * filename) { FILE* f = fopen(filename, "rb"); if (f) fclose(f); return (bool)f; } class filewrite_libc : public filewrite { FILE* io; public: static filewrite* create(const char * filename) { FILE* f = fopen(filename, "wb"); if (!f) return NULL; return new filewrite_libc(f); } private: filewrite_libc(FILE* io) : io(io) {} public: bool append(const uint8_t* data, size_t len) { return (fwrite(data, 1,len, io)==len); } ~filewrite_libc() { fclose(io); } }; filewrite* filewrite::create_libc(const char * filename) { return filewrite_libc::create(filename); } class filemap_fallback : public filemap { public: size_t m_len; uint8_t* m_ptr; static filemap* create(file* f) { if (!f) return NULL; size_t len = f->len(); uint8_t* ptr = (uint8_t*)malloc(len); if (!ptr) return NULL; if (!f->read(ptr, 0, len)) { free(ptr); return NULL; } return new filemap_fallback(f, len, ptr); } size_t len() { return m_len; } const uint8_t * ptr() { return m_ptr; } //delete the file early, to avoid file sharing issues on Windows (and because keeping it is useless) // https://github.com/Alcaro/Flips/pull/14 filemap_fallback(file* f, size_t len, uint8_t* ptr) : m_len(len), m_ptr(ptr) { delete f; } ~filemap_fallback() { free(m_ptr); } }; filemap* filemap::create_fallback(LPCWSTR filename) { return filemap_fallback::create(file::create(filename)); } LPWSTR GetExtension(LPCWSTR fname) { LPWSTR ptr1=(LPWSTR)fname; LPWSTR ptr2; ptr2=wcsrchr(ptr1, '/'); if (ptr2) ptr1=ptr2; ptr2=wcsrchr(ptr1, '.'); if (ptr2) ptr1=ptr2; if (*ptr1=='.') return ptr1; else return wcsrchr(ptr1, '\0'); } LPWSTR GetBaseName(LPCWSTR fname) { LPWSTR ptr1=(LPWSTR)fname; LPWSTR ptr2; ptr2=wcsrchr(ptr1, '/'); if (ptr2) ptr1=ptr2+1; return ptr1; } bool forceKeepHeader=false; struct mem file::read() { struct mem out; out.len = len(); out.ptr = (uint8_t*)malloc(out.len + sizeof(WCHAR)); memset(out.ptr + out.len, 0, sizeof(WCHAR)); if (!read(out.ptr, 0, out.len)) { free(out.ptr); struct mem err = {NULL, 0}; return err; } return out; } struct mem file::read(LPCWSTR filename) { struct mem err = {NULL, 0}; file* f = file::create(filename); if (!f) return err; struct mem ret = f->read(); delete f; return ret; } bool filewrite::write(LPCWSTR filename, struct mem data) { filewrite* f = filewrite::create(filename); if (!f) return false; bool ret = f->append(data.ptr, data.len); delete f; return ret; } class fileheader : public file { file* child; public: fileheader(file* child) : child(child) {} size_t len() { return child->len()-512; } bool read(uint8_t* target, size_t start, size_t len) { return child->read(target, start+512, len); } ~fileheader() { delete child; } }; class fileheadermap : public filemap { filemap* child; public: fileheadermap(filemap* child) : child(child) {} size_t len() { return child->len()-512; } //bool read(uint8_t* target, size_t start, size_t len) { return child->read(target, start+512, len); } const uint8_t* ptr() { return child->ptr()+512; } ~fileheadermap() { delete child; } }; const struct errorinfo ipserrors[]={ { el_ok, NULL },//ips_ok { el_unlikelythis, "The patch was applied, but is most likely not intended for this ROM." },//ips_notthis { el_unlikelythis, "The patch was applied, but did nothing. You most likely already had the output file of this patch." },//ips_thisout { el_warning, "The patch was applied, but appears scrambled or malformed." },//ips_suspicious { el_broken, "The patch is broken and can't be used." },//ips_invalid { el_broken, "The IPS format does not support files larger than 16MB." },//ips_16MB { el_warning, "The files are identical! The patch will do nothing." },//ips_identical }; const struct errorinfo bpserrors[]={ { el_ok, NULL },//bps_ok, { el_notthis, "That's the output file already." },//bps_to_output { el_notthis, "This patch is not intended for this ROM." },//bps_not_this { el_broken, "This patch is broken and can't be used." },//bps_broken { el_broken, "Couldn't read input patch." },//bps_io { el_warning, "The files are identical! The patch will do nothing." },//bps_identical { el_broken, "These files are too big for this program to handle." },//bps_too_big { el_broken, "These files are too big for this program to handle." },//bps_out_of_mem (same message as above, it's accurate for both.) { el_broken, "Patch creation was canceled." },//bps_canceled }; LPCWSTR GetManifestName(LPCWSTR romname) { //static WCHAR manifestname[MAX_PATH]; //wcscpy(manifestname, romname); //LPWSTR manifestext=GetExtension(manifestname); //if (!manifestext) manifestext=wcschr(manifestname, '\0'); //wcscpy(manifestext, TEXT(".xml")); //return manifestname; static WCHAR * manifestname=NULL; if (manifestname) free(manifestname); manifestname=(WCHAR*)malloc((wcslen(romname)+1+4)*sizeof(WCHAR)); wcscpy(manifestname, romname); LPWSTR manifestext=GetExtension(manifestname); if (manifestext) wcscpy(manifestext, TEXT(".xml")); return manifestname; } enum patchtype IdentifyPatch(file* patch) { size_t len = patch->len(); uint8_t data[16]; if (len>16) len=16; patch->read(data, 0, len); if (len>=5 && !memcmp(data, "PATCH", 5)) return ty_ips; if (len>=4 && !memcmp(data, "BPS1", 4)) return ty_bps; if (len>=4 && !memcmp(data, "UPS1", 4)) return ty_ups; return ty_null; } //this is the most inefficient possible implementation, but since it only needs about 10 entries, //performance is irrelevant void config::init_raw(LPWSTR contents) { LPCWSTR header = TEXT("[Flips]\n"); if (wcsncmp(contents, header, wcslen(header)) != 0) return; contents += wcslen(header); //I need to somehow ensure that stepping backwards across whitespace doesn't go before the original string. //This can be done with while (iswspace(*contents)) contents++;, but demanding the header above works just as well. while (true) { LPWSTR key; LPWSTR keyend; LPWSTR val; LPWSTR valend; LPWSTR nextline = wcschr(contents, '\n'); if (nextline != NULL) valend = nextline; else valend = wcschr(contents, '\0'); //do not move inside the conditional, it screws up the strchr while (iswspace(valend[-1])) valend--; *valend = '\0'; LPWSTR sep = wcschr(contents, '='); if (sep != NULL) { key = contents; keyend = sep; val = sep+1; while (iswspace(key[0])) key++; while (iswspace(keyend[-1])) keyend--; *keyend = '\0'; while (iswspace(val[0])) val++; if (valend>val && keyend>key && iswalnum(key[0])) { set(key, val); } } if (!nextline) break; contents = nextline+1; while (contents && iswspace(*contents)) contents++; } for (size_t i=0;i 0 && data.len%sizeof(WCHAR) == 0) { this->init_raw((LPWSTR)(data.ptr)); } free(data.ptr); this->filename = wcsdup(filename); } void config::sort() { //bubble sort, and called for every insertion... super fun //but it's easy, it works, and it's fast for numentries=10 and there's no reason to go much higher than that for (size_t i=0;i 0) { LPWSTR tmp = names[i]; names[i] = names[j]; names[j] = tmp; tmp = values[i]; values[i] = values[j]; values[j] = tmp; } } } void config::set(LPCWSTR name, LPCWSTR value) { for (size_t i=0;inumentries;i++) { if (!wcscmp(name, this->names[i])) { if (value == this->values[i]) return; free(this->values[i]); if (value!=NULL) { this->values[i] = wcsdup(value); } else { free(this->names[i]); this->names[i] = this->names[this->numentries-1]; this->values[i] = this->values[this->numentries-1]; this->numentries--; } return; } } this->numentries++; this->names = (LPWSTR*)realloc(this->names, sizeof(LPWSTR)*this->numentries); this->values = (LPWSTR*)realloc(this->values, sizeof(LPWSTR)*this->numentries); this->names[this->numentries-1] = wcsdup(name); this->values[this->numentries-1] = wcsdup(value); sort(); } LPCWSTR config::get(LPCWSTR name, LPCWSTR def) { for (size_t i=0;inumentries;i++) { if (!wcscmp(name, this->names[i])) { if (this->values[i]) return this->values[i]; else return def; } } return def; } LPWSTR config::flatten() { LPCWSTR header = TEXT("[Flips]\n#Changing this file may void your warranty. Do not report any bugs if you do.\n"); size_t len = wcslen(header); for (size_t i=0;inumentries;i++) { if (this->values[i]!=NULL) { len += wcslen(this->names[i]) + 1 + wcslen(this->values[i]) + 1; } } LPWSTR ret = (LPWSTR)malloc((len+1)*sizeof(WCHAR)); LPWSTR at = ret; at += wsprintf(at, TEXT("%s"), header); for (size_t i=0;inumentries;i++) { if (this->values[i]!=NULL) { at += wsprintf(at, TEXT("%s=%s\n"), this->names[i], this->values[i]); } } return ret; } config::~config() { if (this->filename) { LPWSTR data = this->flatten(); //puts(data); struct mem m = { (uint8_t*)data, wcslen(data)*sizeof(WCHAR) }; filewrite::write(this->filename, m); free(data); free(this->filename); } for (size_t i=0;inumentries;i++) { //printf("#(%s)(%s)\n",this->names[i],this->values[i]); free(this->names[i]); free(this->values[i]); } free(this->names); free(this->values); } config cfg; static LPWSTR EmuGetKey(LPCWSTR filename) { static WCHAR ret[64]; wsprintf(ret, TEXT("emu%s"), GetExtension(filename)); return ret; } LPCWSTR GetEmuFor(LPCWSTR filename) { return cfg.get(EmuGetKey(filename)); } void SetEmuFor(LPCWSTR filename, LPCWSTR emu) { cfg.set(EmuGetKey(filename), emu); } enum { ch_crc32, ch_last }; static LPCWSTR checkmap_typenames[] = { TEXT("rom.crc32.") }; // sizeof rather than strlen to ensure compile-time evaluation; -1 for NUL static const int checkmap_typenames_maxlen = sizeof("rom.crc32.")-1; struct checkmap { uint8_t* sum; LPWSTR name; }; static struct checkmap * checkmap[ch_last]={NULL}; static uint32_t checkmap_len[ch_last]={0}; static const uint8_t checkmap_sum_size[]={ 4 }; static const uint8_t checkmap_sum_size_max = 4; static const int CfgSumNameMaxLen = checkmap_typenames_maxlen + checkmap_sum_size_max*2 + 1; static void CfgSumName(WCHAR* out, int type, const void* sum) { const uint8_t* sum8 = (uint8_t*)sum; wcscpy(out, checkmap_typenames[type]); WCHAR* end = out + wcslen(checkmap_typenames[type]); for (int i=0;isum=(uint8_t*)malloc(checkmap_sum_size[type]); memcpy(item->sum, sum, checkmap_sum_size[type]); item->name=wcsdup(filename); WCHAR cfgname[CfgSumNameMaxLen]; CfgSumName(cfgname, type, sum); cfg.set(cfgname, filename); } struct mem GetRomList() { struct mem out={NULL, 0}; for (unsigned int type=0;type data.len) return; \ memcpy(target, data.ptr, bytes); \ data.ptr += bytes; \ data.len -= bytes #define read_discard(bytes) \ if (bytes > data.len) return; \ data.ptr += bytes; \ data.len -= bytes uint32_t count; read(&count, sizeof(count)); checkmap[type]=(struct checkmap*)malloc(sizeof(struct checkmap)*count*2);//overallocate so I won't need to round the count while (count--) { uint8_t hashlen; read(&hashlen, sizeof(hashlen)); uint16_t strlen; read(&strlen, sizeof(strlen)); if (hashlen==checkmap_sum_size[type]) { if (data.len < hashlen+strlen) return; struct checkmap* item=&checkmap[type][checkmap_len[type]++]; item->sum=(uint8_t*)malloc(checkmap_sum_size[type]); read(item->sum, hashlen); item->name=(WCHAR*)malloc(strlen+sizeof(WCHAR)); read(item->name, strlen); memset((uint8_t*)item->name + strlen, 0, sizeof(WCHAR)); } else { read_discard(hashlen); read_discard(strlen); } } #undef read } } LPCWSTR FindRomForPatch(file* patch, bool * possibleToFind) { if (possibleToFind) *possibleToFind=false; enum patchtype patchtype=IdentifyPatch(patch); if (patchtype==ty_bps) { struct bpsinfo info = bps_get_info(patch, false); if (info.error) return NULL; if (possibleToFind) *possibleToFind=true; return FindRomForSum(ch_crc32, &info.crc_in); } //UPS has checksums too, but screw UPS. Nobody cares. return NULL; } void AddToRomList(file* patch, LPCWSTR path) { enum patchtype patchtype=IdentifyPatch(patch); if (patchtype==ty_bps) { struct bpsinfo info = bps_get_info(patch, false); if (info.error) return; AddRomForSum(ch_crc32, &info.crc_in, path); } } void AddConfigToRomList(LPCWSTR key, LPCWSTR value) { int type; uint8_t sum[checkmap_sum_size_max]; if (CfgSumParseName(&type, sum, key)) AddRomForSum(type, sum, value); } void DeleteRomFromList(LPCWSTR path) { for (unsigned int type=0;typeread(); // There's no real reason to remove this, no patcher knows how to handle these file objects. enum patchtype patchtype=IdentifyPatch(patch); struct errorinfo errinf; removeheader=(removeheader && patchtype==ty_bps); if (removeheader) { inrom.ptr+=512; inrom.len-=512; } struct mem outrom={NULL,0}; struct mem manifest={NULL,0}; errinf=error(el_broken, "Unknown patch format."); if (patchtype==ty_bps) { errinf=bpserrors[bps_apply(patchmem, inrom, &outrom, &manifest, !verifyinput)]; if (errinf.level==el_notthis && !verifyinput && outrom.ptr) errinf = error(el_warning, "This patch is not intended for this ROM (output created anyways)"); if (errinf.level==el_notthis) { bpsinfo inf = bps_get_info(patch, false); static char errtextbuf[2][256]; // ugly trick to get the nested invocation for the header remover to not screw up the error static int errtextid=0; // makes it impossible to save the error strings, but Flips doesn't do that anyways char* errtext=errtextbuf[errtextid]; if (++errtextid == 2) errtextid=0; if (inf.size_in != inrom.len) { //http://msdn.microsoft.com/en-us/library/vstudio/tcxf1dw6.aspx says %zX is not supported //this is true up to and including Windows Vista; 7 adds support for it //I could define it to "I", but my GCC does not acknowledge its legitimacy and throws bogus warnings //nor does my GCC accept any supported alternative, so let's just nuke the entire warning. //it's a poor solution, but it's the best I can find # define z "z" sprintf(errtext, "This patch is not intended for this ROM. Expected file size %" z "u, got %" z "u.", inf.size_in, inrom.len); errinf.description=errtext; } else { uint32_t crc = crc32(inrom.ptr, inrom.len); if (inf.crc_in != crc) { sprintf(errtext, "This patch is not intended for this ROM. Expected checksum %.8X, got %.8X.", inf.crc_in, crc); errinf.description=errtext; } } } } if (errinf.level==el_ok) errinf.description="The patch was applied successfully!"; struct manifestinfo defmanifestinfo={true,false,NULL}; if (!manifestinfo) manifestinfo=&defmanifestinfo; if (manifestinfo->use) { if (manifest.ptr) { LPCWSTR manifestname; if (manifestinfo->name) manifestname=manifestinfo->name; else manifestname=GetManifestName(outromname); if (!WriteWholeFile(manifestname, manifest) && manifestinfo->required) { if (errinf.level==el_ok) errinf=error(el_warning, "The patch was applied, but the manifest could not be created."); } } else if (manifestinfo->required && errinf.level==el_ok) { errinf=error(el_warning, "The patch was applied, but there was no manifest present."); } } if (removeheader) { inrom.ptr-=512; inrom.len+=512; if (errinf.levelget(), verifyinput, shouldRemoveHeader(inromname, inrom->len()), outromname, manifestinfo); if (update_rom_list && errinf.level==el_ok) AddToRomList(patch, inromname); delete inrom; return errinf; } struct errorinfo ApplyPatch(LPCWSTR patchname, LPCWSTR inromname, bool verifyinput, LPCWSTR outromname, struct manifestinfo * manifestinfo, bool update_rom_list) { file* patch = file::create(patchname); if (!patch) { return error(el_broken, "Couldn't read input patch"); } struct errorinfo errinf=ApplyPatchMem(patch, inromname, verifyinput, outromname, manifestinfo, update_rom_list); delete patch; return errinf; } char bpsdProgStr[24]; int bpsdLastPromille=-1; bool bpsdeltaGetProgress(size_t done, size_t total) { if (total<1000) total=1000;//avoid div by zero int promille=done/(total/1000);//don't set this to done*1000/total, it'd just give overflows on huge stuff. 100% is handled later if (promille==bpsdLastPromille) return false; bpsdLastPromille=promille; if (promille>=1000) return false; strcpy(bpsdProgStr, "Please wait... "); bpsdProgStr[15]='0'+promille/100; int digit1=((promille<100)?15:16); bpsdProgStr[digit1+0]='0'+promille/10%10; bpsdProgStr[digit1+1]='.'; bpsdProgStr[digit1+2]='0'+promille%10; bpsdProgStr[digit1+3]='%'; bpsdProgStr[digit1+4]='\0'; return true; } bool bpsdeltaProgressCLI(unused void* userdata, size_t done, size_t total) { if (!bpsdeltaGetProgress(done, total)) return true; fputs(bpsdProgStr, stdout); putchar('\r'); fflush(stdout); return true; } struct errorinfo CreatePatchToMem(LPCWSTR inromname, LPCWSTR outromname, enum patchtype patchtype, struct manifestinfo * manifestinfo, struct mem * patchmem) { bool usemmap = (patchtype!=ty_bps && patchtype!=ty_bps_moremem); //pick roms filemap* romsmap[2]={NULL, NULL}; file* roms[2]={NULL, NULL}; unused size_t lens[2]; for (int i=0;i<2;i++) { LPCWSTR romname=((i==0)?inromname:outromname); if (usemmap) { romsmap[i] = filemap::create(romname); if (!romsmap[i]) { if (i==1) delete romsmap[0]; return error(el_broken, "Couldn't read this ROM."); } if (shouldRemoveHeader(romname, romsmap[i]->len()) && (patchtype==ty_bps || patchtype==ty_bps_linear || patchtype==ty_bps_moremem)) { romsmap[i] = new fileheadermap(romsmap[i]); } lens[i] = romsmap[i]->len(); } else { roms[i] = file::create(romname); if (!roms[i]) { if (i==1) delete roms[0]; return error(el_broken, "Couldn't read this ROM."); } if (shouldRemoveHeader(romname, roms[i]->len()) && (patchtype==ty_bps || patchtype==ty_bps_linear || patchtype==ty_bps_moremem)) { roms[i] = new fileheader(roms[i]); } lens[i] = roms[i]->len(); } } struct mem manifest={NULL,0}; struct errorinfo manifesterr={el_ok, NULL}; struct manifestinfo defmanifestinfo={true,false,NULL}; if (!manifestinfo) manifestinfo=&defmanifestinfo; if (patchtype==ty_bps || patchtype==ty_bps_linear || patchtype==ty_bps_moremem) { LPCWSTR manifestname; if (manifestinfo->name) manifestname=manifestinfo->name; else manifestname=GetManifestName(outromname); manifest=ReadWholeFile(manifestname); if (!manifest.ptr) manifesterr=error(el_warning, "The patch was created, but the manifest could not be read."); } else manifesterr=error(el_warning, "The patch was created, but this patch format does not support manifests."); struct errorinfo errinf={ el_broken, "Unknown patch format." }; if (patchtype==ty_bps || patchtype==ty_bps_moremem) { { errinf=bpserrors[bps_create_delta(roms[0], roms[1], manifest, patchmem, bpsdeltaProgressCLI, NULL, (patchtype==ty_bps_moremem))]; } } if (patchtype==ty_bps_linear) { errinf=bpserrors[bps_create_linear(romsmap[0]->get(), romsmap[1]->get(), manifest, patchmem)]; } FreeFileMemory(manifest); if (errinf.level==el_ok) errinf.description="The patch was created successfully!"; if (manifestinfo->required && errinf.level==el_ok && manifesterr.level!=el_ok) errinf=manifesterr; // if (errinf.level==el_ok && lens[0] > lens[1]) // { // errinf=error(el_warning, "The patch was created, but the input ROM is larger than the " // "output ROM. Double check whether you've gotten them backwards."); // } if (usemmap) { delete romsmap[0]; delete romsmap[1]; } else { delete roms[0]; delete roms[1]; } return errinf; } struct errorinfo CreatePatch(LPCWSTR inromname, LPCWSTR outromname, enum patchtype patchtype, struct manifestinfo * manifestinfo, LPCWSTR patchname) { struct mem patch={NULL,0}; struct errorinfo errinf = CreatePatchToMem(inromname, outromname, patchtype, manifestinfo, &patch); if (errinf.levelread(meta.ptr, info.meta_start, info.meta_size); if (manifestinfo->required) { if (manifestinfo->name) { filewrite::write(manifestinfo->name, meta); } else { fwrite(meta.ptr, 1,meta.len, stdout); free(meta.ptr); return el_ok; } } } LPCWSTR inromname = FindRomForPatch(patch, NULL); //'z' macro defined above printf("Input ROM: %" z "u bytes, CRC32 %.8X", info.size_in, info.crc_in); if (inromname) wprintf(TEXT(", %s"), inromname); puts(""); printf("Output ROM: %" z "u bytes, CRC32 %.8X\n", info.size_out, info.crc_out); //floating point may lose a little precision, but it's easier than dodging overflows, and this //is the output of inaccurate heuristics anyways, losing a little more makes no difference. //Windows MulDiv could also work, but it's kinda nonportable. //printf("Change index: %i / 1000\n", (int)(info.change_num / (float)info.change_denom * 1000)); if (info.meta_size) { printf("Metadata: %" z "u bytes:\n", info.meta_size); char* meta_iter = (char*)meta.ptr; char* meta_end = meta_iter + meta.len; for (int i=0;i<3;i++) { int n_chars = meta_end-meta_iter; if (n_chars > 75) n_chars = 75; char* nextline = (char*)memchr(meta_iter, '\n', n_chars); if (nextline && nextline-meta_iter < n_chars) n_chars = nextline-meta_iter; if (!nextline && !n_chars) break; // wipe trailing linebreaks printf(" %.*s\n", n_chars, meta_iter); if (!nextline) break; meta_iter = nextline+1; } } if (verbosity >= 1) { puts("Disassembly:"); struct mem patchmem = patch->read(); bps_disassemble(patchmem, stdout); free(patchmem.ptr); } free(meta.ptr); return el_ok; } puts("No information available for this patch type"); return el_broken; } void usage() { GUIClaimConsole(); fputs( // 12345678901234567890123456789012345678901234567890123456789012345678901234567890 flipsversion "\n" "usage:\n" " " "flips [--apply] [--exact] patch.bps rom.smc [outrom.smc]\n" "or flips [--create] [--exact] [--bps | etc] clean.smc hack.smc [patch.bps]\n" "\n" // 12345678901234567890123456789012345678901234567890123456789012345678901234567890 "options:\n" "-a --apply: apply IPS, BPS or UPS patch (default if given two arguments)\n" " if output filename is not given, Flips defaults to patch.smc beside the patch\n" "-c --create: create IPS or BPS patch (default if given three arguments)\n" "-I --info: BPS files contain information about input and output roms, print it\n" " with --verbose, disassemble the entire patch\n" //" also estimates how much of the source file is retained\n" //" anything under 400 is fine, anything over 600 should be treated with suspicion\n" //(TODO: --info --verbose) "-i --ips, -b -B --bps --bps-delta, --bps-delta-moremem, --bps-linear:\n" " create this patch format instead of guessing based on file extension\n" " ignored when applying\n" " bps creation styles:\n" " delta is the recommended and default one; it's a good balance between creation\n" " performance and patch size\n" " delta-moremem is usually slightly (~3%) faster than delta, but uses about\n" " twice as much memory; it gives identical patches to delta\n" " linear is the fastest, but tends to give pretty big patches\n" " all BPS patchers can apply all patch styles, the only difference is file size\n" " and creation performance\n" "--exact: do not remove SMC headers when applying or creating a BPS patch\n" " not recommended, may affect patcher compatibility\n" "--ignore-checksum: accept checksum mismatches (BPS only)\n" "-m or --manifest: emit or insert a manifest file as romname.xml (BPS only)\n" "-mfilename or --manifest=filename: emit or insert a manifest file exactly here\n" "-h -? --help: show this information\n" "-v --version: show application version\n" "\n" // 12345678901234567890123456789012345678901234567890123456789012345678901234567890 , stdout); exit(0); } int error_to_exit(errorlevel level) { return (level >= el_notthis ? EXIT_FAILURE : EXIT_SUCCESS); } int flipsmain(int argc, WCHAR * argv[]) { enum patchtype patchtype=ty_null; enum { a_default, a_apply_filepicker, a_apply_given, a_create, a_info } action=a_default; int numargs=0; LPCWSTR arg[3]={NULL,NULL,NULL}; bool hasFlags=false; int verbosity = 0; bool ignoreChecksum=false; struct manifestinfo manifestinfo={false, false, NULL}; // { // bool use; // bool required; // LPCWSTR name; // bool success; //}; for (int i=1;i>4); crc = crctable_4bits[(crc^(data[i]>>4))&0x0F] ^ (crc>>4); } return ~crc; } /* * divsufsort.c for libdivsufsort-lite * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, * copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following * conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include # include /*- Constants -*/ #define INLINE __inline #if defined(ALPHABET_SIZE) && (ALPHABET_SIZE < 1) # undef ALPHABET_SIZE #endif #if !defined(ALPHABET_SIZE) # define ALPHABET_SIZE (256) #endif #define BUCKET_A_SIZE (ALPHABET_SIZE) #define BUCKET_B_SIZE (ALPHABET_SIZE * ALPHABET_SIZE) #if defined(SS_INSERTIONSORT_THRESHOLD) # if SS_INSERTIONSORT_THRESHOLD < 1 # undef SS_INSERTIONSORT_THRESHOLD # define SS_INSERTIONSORT_THRESHOLD (1) # endif #else # define SS_INSERTIONSORT_THRESHOLD (8) #endif #if defined(SS_BLOCKSIZE) # if SS_BLOCKSIZE < 0 # undef SS_BLOCKSIZE # define SS_BLOCKSIZE (0) # elif 32768 <= SS_BLOCKSIZE # undef SS_BLOCKSIZE # define SS_BLOCKSIZE (32767) # endif #else # define SS_BLOCKSIZE (1024) #endif /* minstacksize = log(SS_BLOCKSIZE) / log(3) * 2 */ #if SS_BLOCKSIZE == 0 # define SS_MISORT_STACKSIZE (96) #elif SS_BLOCKSIZE <= 4096 # define SS_MISORT_STACKSIZE (16) #else # define SS_MISORT_STACKSIZE (24) #endif #define SS_SMERGE_STACKSIZE (32) #define TR_INSERTIONSORT_THRESHOLD (8) #define TR_STACKSIZE (64) /*- Macros -*/ #ifndef SWAP # define SWAP(_a, _b) do { t = (_a); (_a) = (_b); (_b) = t; } while(0) #endif /* SWAP */ #ifndef MIN # define MIN(_a, _b) (((_a) < (_b)) ? (_a) : (_b)) #endif /* MIN */ #ifndef MAX # define MAX(_a, _b) (((_a) > (_b)) ? (_a) : (_b)) #endif /* MAX */ #define STACK_PUSH(_a, _b, _c, _d)\ do {\ assert(ssize < STACK_SIZE);\ stack[ssize].a = (_a), stack[ssize].b = (_b),\ stack[ssize].c = (_c), stack[ssize++].d = (_d);\ } while(0) #define STACK_PUSH5(_a, _b, _c, _d, _e)\ do {\ assert(ssize < STACK_SIZE);\ stack[ssize].a = (_a), stack[ssize].b = (_b),\ stack[ssize].c = (_c), stack[ssize].d = (_d), stack[ssize++].e = (_e);\ } while(0) #define STACK_POP(_a, _b, _c, _d)\ do {\ assert(0 <= ssize);\ if(ssize == 0) { return; }\ (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\ (_c) = stack[ssize].c, (_d) = stack[ssize].d;\ } while(0) #define STACK_POP5(_a, _b, _c, _d, _e)\ do {\ assert(0 <= ssize);\ if(ssize == 0) { return; }\ (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\ (_c) = stack[ssize].c, (_d) = stack[ssize].d, (_e) = stack[ssize].e;\ } while(0) #define BUCKET_A(_c0) bucket_A[(_c0)] #if ALPHABET_SIZE == 256 #define BUCKET_B(_c0, _c1) (bucket_B[((_c1) << 8) | (_c0)]) #define BUCKET_BSTAR(_c0, _c1) (bucket_B[((_c0) << 8) | (_c1)]) #else #define BUCKET_B(_c0, _c1) (bucket_B[(_c1) * ALPHABET_SIZE + (_c0)]) #define BUCKET_BSTAR(_c0, _c1) (bucket_B[(_c0) * ALPHABET_SIZE + (_c1)]) #endif /*- Private Functions -*/ static const int lg_table[256]= { -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7 }; #if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) static INLINE int ss_ilg(int n) { #if SS_BLOCKSIZE == 0 return (n & 0xffff0000) ? ((n & 0xff000000) ? 24 + lg_table[(n >> 24) & 0xff] : 16 + lg_table[(n >> 16) & 0xff]) : ((n & 0x0000ff00) ? 8 + lg_table[(n >> 8) & 0xff] : 0 + lg_table[(n >> 0) & 0xff]); #elif SS_BLOCKSIZE < 256 return lg_table[n]; #else return (n & 0xff00) ? 8 + lg_table[(n >> 8) & 0xff] : 0 + lg_table[(n >> 0) & 0xff]; #endif } #endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */ #if SS_BLOCKSIZE != 0 static const int sqq_table[256] = { 0, 16, 22, 27, 32, 35, 39, 42, 45, 48, 50, 53, 55, 57, 59, 61, 64, 65, 67, 69, 71, 73, 75, 76, 78, 80, 81, 83, 84, 86, 87, 89, 90, 91, 93, 94, 96, 97, 98, 99, 101, 102, 103, 104, 106, 107, 108, 109, 110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155, 156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168, 169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180, 181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191, 192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201, 202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211, 212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221, 221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230, 230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238, 239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247, 247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255 }; static INLINE int ss_isqrt(int x) { int y, e; if(x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) { return SS_BLOCKSIZE; } e = (x & 0xffff0000) ? ((x & 0xff000000) ? 24 + lg_table[(x >> 24) & 0xff] : 16 + lg_table[(x >> 16) & 0xff]) : ((x & 0x0000ff00) ? 8 + lg_table[(x >> 8) & 0xff] : 0 + lg_table[(x >> 0) & 0xff]); if(e >= 16) { y = sqq_table[x >> ((e - 6) - (e & 1))] << ((e >> 1) - 7); if(e >= 24) { y = (y + 1 + x / y) >> 1; } y = (y + 1 + x / y) >> 1; } else if(e >= 8) { y = (sqq_table[x >> ((e - 6) - (e & 1))] >> (7 - (e >> 1))) + 1; } else { return sqq_table[x] >> 4; } return (x < (y * y)) ? y - 1 : y; } #endif /* SS_BLOCKSIZE != 0 */ /*---------------------------------------------------------------------------*/ /* Compares two suffixes. */ static INLINE int ss_compare(const unsigned char *T, const int *p1, const int *p2, int depth) { const unsigned char *U1, *U2, *U1n, *U2n; for(U1 = T + depth + *p1, U2 = T + depth + *p2, U1n = T + *(p1 + 1) + 2, U2n = T + *(p2 + 1) + 2; (U1 < U1n) && (U2 < U2n) && (*U1 == *U2); ++U1, ++U2) { } return U1 < U1n ? (U2 < U2n ? *U1 - *U2 : 1) : (U2 < U2n ? -1 : 0); } /*---------------------------------------------------------------------------*/ #if (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) /* Insertionsort for small size groups */ static void ss_insertionsort(const unsigned char *T, const int *PA, int *first, int *last, int depth) { int *i, *j; int t; int r; for(i = last - 2; first <= i; --i) { for(t = *i, j = i + 1; 0 < (r = ss_compare(T, PA + t, PA + *j, depth));) { do { *(j - 1) = *j; } while((++j < last) && (*j < 0)); if(last <= j) { break; } } if(r == 0) { *j = ~*j; } *(j - 1) = t; } } #endif /* (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) */ /*---------------------------------------------------------------------------*/ #if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) static INLINE void ss_fixdown(const unsigned char *Td, const int *PA, int *SA, int i, int size) { int j, k; int v; int c, d, e; for(v = SA[i], c = Td[PA[v]]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) { d = Td[PA[SA[k = j++]]]; if(d < (e = Td[PA[SA[j]]])) { k = j; d = e; } if(d <= c) { break; } } SA[i] = v; } /* Simple top-down heapsort. */ static void ss_heapsort(const unsigned char *Td, const int *PA, int *SA, int size) { int i, m; int t; m = size; if((size % 2) == 0) { m--; if(Td[PA[SA[m / 2]]] < Td[PA[SA[m]]]) { SWAP(SA[m], SA[m / 2]); } } for(i = m / 2 - 1; 0 <= i; --i) { ss_fixdown(Td, PA, SA, i, m); } if((size % 2) == 0) { SWAP(SA[0], SA[m]); ss_fixdown(Td, PA, SA, 0, m); } for(i = m - 1; 0 < i; --i) { t = SA[0], SA[0] = SA[i]; ss_fixdown(Td, PA, SA, 0, i); SA[i] = t; } } /*---------------------------------------------------------------------------*/ /* Returns the median of three elements. */ static INLINE int * ss_median3(const unsigned char *Td, const int *PA, int *v1, int *v2, int *v3) { int *t; if(Td[PA[*v1]] > Td[PA[*v2]]) { SWAP(v1, v2); } if(Td[PA[*v2]] > Td[PA[*v3]]) { if(Td[PA[*v1]] > Td[PA[*v3]]) { return v1; } else { return v3; } } return v2; } /* Returns the median of five elements. */ static INLINE int * ss_median5(const unsigned char *Td, const int *PA, int *v1, int *v2, int *v3, int *v4, int *v5) { int *t; if(Td[PA[*v2]] > Td[PA[*v3]]) { SWAP(v2, v3); } if(Td[PA[*v4]] > Td[PA[*v5]]) { SWAP(v4, v5); } if(Td[PA[*v2]] > Td[PA[*v4]]) { SWAP(v2, v4); SWAP(v3, v5); } if(Td[PA[*v1]] > Td[PA[*v3]]) { SWAP(v1, v3); } if(Td[PA[*v1]] > Td[PA[*v4]]) { SWAP(v1, v4); SWAP(v3, v5); } if(Td[PA[*v3]] > Td[PA[*v4]]) { return v4; } return v3; } /* Returns the pivot element. */ static INLINE int * ss_pivot(const unsigned char *Td, const int *PA, int *first, int *last) { int *middle; int t; t = last - first; middle = first + t / 2; if(t <= 512) { if(t <= 32) { return ss_median3(Td, PA, first, middle, last - 1); } else { t >>= 2; return ss_median5(Td, PA, first, first + t, middle, last - 1 - t, last - 1); } } t >>= 3; first = ss_median3(Td, PA, first, first + t, first + (t << 1)); middle = ss_median3(Td, PA, middle - t, middle, middle + t); last = ss_median3(Td, PA, last - 1 - (t << 1), last - 1 - t, last - 1); return ss_median3(Td, PA, first, middle, last); } /*---------------------------------------------------------------------------*/ /* Binary partition for substrings. */ static INLINE int * ss_partition(const int *PA, int *first, int *last, int depth) { int *a, *b; int t; for(a = first - 1, b = last;;) { for(; (++a < b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1));) { *a = ~*a; } for(; (a < --b) && ((PA[*b] + depth) < (PA[*b + 1] + 1));) { } if(b <= a) { break; } t = ~*b; *b = *a; *a = t; } if(first < a) { *first = ~*first; } return a; } /* Multikey introsort for medium size groups. */ static void ss_mintrosort(const unsigned char *T, const int *PA, int *first, int *last, int depth) { #define STACK_SIZE SS_MISORT_STACKSIZE struct { int *a, *b, c; int d; } stack[STACK_SIZE]; const unsigned char *Td; int *a, *b, *c, *d, *e, *f; int s, t; int ssize; int limit; int v, x = 0; for(ssize = 0, limit = ss_ilg(last - first);;) { if((last - first) <= SS_INSERTIONSORT_THRESHOLD) { #if 1 < SS_INSERTIONSORT_THRESHOLD if(1 < (last - first)) { ss_insertionsort(T, PA, first, last, depth); } #endif STACK_POP(first, last, depth, limit); continue; } Td = T + depth; if(limit-- == 0) { ss_heapsort(Td, PA, first, last - first); } if(limit < 0) { for(a = first + 1, v = Td[PA[*first]]; a < last; ++a) { if((x = Td[PA[*a]]) != v) { if(1 < (a - first)) { break; } v = x; first = a; } } if(Td[PA[*first] - 1] < v) { first = ss_partition(PA, first, a, depth); } if((a - first) <= (last - a)) { if(1 < (a - first)) { STACK_PUSH(a, last, depth, -1); last = a, depth += 1, limit = ss_ilg(a - first); } else { first = a, limit = -1; } } else { if(1 < (last - a)) { STACK_PUSH(first, a, depth + 1, ss_ilg(a - first)); first = a, limit = -1; } else { last = a, depth += 1, limit = ss_ilg(a - first); } } continue; } /* choose pivot */ a = ss_pivot(Td, PA, first, last); v = Td[PA[*a]]; SWAP(*first, *a); /* partition */ for(b = first; (++b < last) && ((x = Td[PA[*b]]) == v);) { } if(((a = b) < last) && (x < v)) { for(; (++b < last) && ((x = Td[PA[*b]]) <= v);) { if(x == v) { SWAP(*b, *a); ++a; } } } for(c = last; (b < --c) && ((x = Td[PA[*c]]) == v);) { } if((b < (d = c)) && (x > v)) { for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) { if(x == v) { SWAP(*c, *d); --d; } } } for(; b < c;) { SWAP(*b, *c); for(; (++b < c) && ((x = Td[PA[*b]]) <= v);) { if(x == v) { SWAP(*b, *a); ++a; } } for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) { if(x == v) { SWAP(*c, *d); --d; } } } if(a <= d) { c = b - 1; if((s = a - first) > (t = b - a)) { s = t; } for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } if((s = d - c) > (t = last - d - 1)) { s = t; } for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } a = first + (b - a), c = last - (d - c); b = (v <= Td[PA[*a] - 1]) ? a : ss_partition(PA, a, c, depth); if((a - first) <= (last - c)) { if((last - c) <= (c - b)) { STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); STACK_PUSH(c, last, depth, limit); last = a; } else if((a - first) <= (c - b)) { STACK_PUSH(c, last, depth, limit); STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); last = a; } else { STACK_PUSH(c, last, depth, limit); STACK_PUSH(first, a, depth, limit); first = b, last = c, depth += 1, limit = ss_ilg(c - b); } } else { if((a - first) <= (c - b)) { STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); STACK_PUSH(first, a, depth, limit); first = c; } else if((last - c) <= (c - b)) { STACK_PUSH(first, a, depth, limit); STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); first = c; } else { STACK_PUSH(first, a, depth, limit); STACK_PUSH(c, last, depth, limit); first = b, last = c, depth += 1, limit = ss_ilg(c - b); } } } else { limit += 1; if(Td[PA[*first] - 1] < v) { first = ss_partition(PA, first, last, depth); limit = ss_ilg(last - first); } depth += 1; } } #undef STACK_SIZE } #endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */ /*---------------------------------------------------------------------------*/ #if SS_BLOCKSIZE != 0 static INLINE void ss_blockswap(int *a, int *b, int n) { int t; for(; 0 < n; --n, ++a, ++b) { t = *a, *a = *b, *b = t; } } static INLINE void ss_rotate(int *first, int *middle, int *last) { int *a, *b, t; int l, r; l = middle - first, r = last - middle; for(; (0 < l) && (0 < r);) { if(l == r) { ss_blockswap(first, middle, l); break; } if(l < r) { a = last - 1, b = middle - 1; t = *a; do { *a-- = *b, *b-- = *a; if(b < first) { *a = t; last = a; if((r -= l + 1) <= l) { break; } a -= 1, b = middle - 1; t = *a; } } while(1); } else { a = first, b = middle; t = *a; do { *a++ = *b, *b++ = *a; if(last <= b) { *a = t; first = a + 1; if((l -= r + 1) <= r) { break; } a += 1, b = middle; t = *a; } } while(1); } } } /*---------------------------------------------------------------------------*/ static void ss_inplacemerge(const unsigned char *T, const int *PA, int *first, int *middle, int *last, int depth) { const int *p; int *a, *b; int len, half; int q, r; int x; for(;;) { if(*(last - 1) < 0) { x = 1; p = PA + ~*(last - 1); } else { x = 0; p = PA + *(last - 1); } for(a = first, len = middle - first, half = len >> 1, r = -1; 0 < len; len = half, half >>= 1) { b = a + half; q = ss_compare(T, PA + ((0 <= *b) ? *b : ~*b), p, depth); if(q < 0) { a = b + 1; half -= (len & 1) ^ 1; } else { r = q; } } if(a < middle) { if(r == 0) { *a = ~*a; } ss_rotate(a, middle, last); last -= middle - a; middle = a; if(first == middle) { break; } } --last; if(x != 0) { while(*--last < 0) { } } if(middle == last) { break; } } } /*---------------------------------------------------------------------------*/ /* Merge-forward with internal buffer. */ static void ss_mergeforward(const unsigned char *T, const int *PA, int *first, int *middle, int *last, int *buf, int depth) { int *a, *b, *c, *bufend; int t; int r; bufend = buf + (middle - first) - 1; ss_blockswap(buf, first, middle - first); for(t = *(a = first), b = buf, c = middle;;) { r = ss_compare(T, PA + *b, PA + *c, depth); if(r < 0) { do { *a++ = *b; if(bufend <= b) { *bufend = t; return; } *b++ = *a; } while(*b < 0); } else if(r > 0) { do { *a++ = *c, *c++ = *a; if(last <= c) { while(b < bufend) { *a++ = *b, *b++ = *a; } *a = *b, *b = t; return; } } while(*c < 0); } else { *c = ~*c; do { *a++ = *b; if(bufend <= b) { *bufend = t; return; } *b++ = *a; } while(*b < 0); do { *a++ = *c, *c++ = *a; if(last <= c) { while(b < bufend) { *a++ = *b, *b++ = *a; } *a = *b, *b = t; return; } } while(*c < 0); } } } /* Merge-backward with internal buffer. */ static void ss_mergebackward(const unsigned char *T, const int *PA, int *first, int *middle, int *last, int *buf, int depth) { const int *p1, *p2; int *a, *b, *c, *bufend; int t; int r; int x; bufend = buf + (last - middle) - 1; ss_blockswap(buf, middle, last - middle); x = 0; if(*bufend < 0) { p1 = PA + ~*bufend; x |= 1; } else { p1 = PA + *bufend; } if(*(middle - 1) < 0) { p2 = PA + ~*(middle - 1); x |= 2; } else { p2 = PA + *(middle - 1); } for(t = *(a = last - 1), b = bufend, c = middle - 1;;) { r = ss_compare(T, p1, p2, depth); if(0 < r) { if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; } *a-- = *b; if(b <= buf) { *buf = t; break; } *b-- = *a; if(*b < 0) { p1 = PA + ~*b; x |= 1; } else { p1 = PA + *b; } } else if(r < 0) { if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; } *a-- = *c, *c-- = *a; if(c < first) { while(buf < b) { *a-- = *b, *b-- = *a; } *a = *b, *b = t; break; } if(*c < 0) { p2 = PA + ~*c; x |= 2; } else { p2 = PA + *c; } } else { if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; } *a-- = ~*b; if(b <= buf) { *buf = t; break; } *b-- = *a; if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; } *a-- = *c, *c-- = *a; if(c < first) { while(buf < b) { *a-- = *b, *b-- = *a; } *a = *b, *b = t; break; } if(*b < 0) { p1 = PA + ~*b; x |= 1; } else { p1 = PA + *b; } if(*c < 0) { p2 = PA + ~*c; x |= 2; } else { p2 = PA + *c; } } } } /* D&C based merge. */ static void ss_swapmerge(const unsigned char *T, const int *PA, int *first, int *middle, int *last, int *buf, int bufsize, int depth) { #define STACK_SIZE SS_SMERGE_STACKSIZE #define GETIDX(a) ((0 <= (a)) ? (a) : (~(a))) #define MERGE_CHECK(a, b, c)\ do {\ if(((c) & 1) ||\ (((c) & 2) && (ss_compare(T, PA + GETIDX(*((a) - 1)), PA + *(a), depth) == 0))) {\ *(a) = ~*(a);\ }\ if(((c) & 4) && ((ss_compare(T, PA + GETIDX(*((b) - 1)), PA + *(b), depth) == 0))) {\ *(b) = ~*(b);\ }\ } while(0) struct { int *a, *b, *c; int d; } stack[STACK_SIZE]; int *l, *r, *lm, *rm; int m, len, half; int ssize; int check, next; for(check = 0, ssize = 0;;) { if((last - middle) <= bufsize) { if((first < middle) && (middle < last)) { ss_mergebackward(T, PA, first, middle, last, buf, depth); } MERGE_CHECK(first, last, check); STACK_POP(first, middle, last, check); continue; } if((middle - first) <= bufsize) { if(first < middle) { ss_mergeforward(T, PA, first, middle, last, buf, depth); } MERGE_CHECK(first, last, check); STACK_POP(first, middle, last, check); continue; } for(m = 0, len = MIN(middle - first, last - middle), half = len >> 1; 0 < len; len = half, half >>= 1) { if(ss_compare(T, PA + GETIDX(*(middle + m + half)), PA + GETIDX(*(middle - m - half - 1)), depth) < 0) { m += half + 1; half -= (len & 1) ^ 1; } } if(0 < m) { lm = middle - m, rm = middle + m; ss_blockswap(lm, middle, m); l = r = middle, next = 0; if(rm < last) { if(*rm < 0) { *rm = ~*rm; if(first < lm) { for(; *--l < 0;) { } next |= 4; } next |= 1; } else if(first < lm) { for(; *r < 0; ++r) { } next |= 2; } } if((l - first) <= (last - r)) { STACK_PUSH(r, rm, last, (next & 3) | (check & 4)); middle = lm, last = l, check = (check & 3) | (next & 4); } else { if((next & 2) && (r == middle)) { next ^= 6; } STACK_PUSH(first, lm, l, (check & 3) | (next & 4)); first = r, middle = rm, check = (next & 3) | (check & 4); } } else { if(ss_compare(T, PA + GETIDX(*(middle - 1)), PA + *middle, depth) == 0) { *middle = ~*middle; } MERGE_CHECK(first, last, check); STACK_POP(first, middle, last, check); } } #undef STACK_SIZE } #endif /* SS_BLOCKSIZE != 0 */ /*---------------------------------------------------------------------------*/ /* Substring sort */ static void sssort(const unsigned char *T, const int *PA, int *first, int *last, int *buf, int bufsize, int depth, int n, int lastsuffix) { int *a; #if SS_BLOCKSIZE != 0 int *b, *middle, *curbuf; int j, k, curbufsize, limit; #endif int i; if(lastsuffix != 0) { ++first; } #if SS_BLOCKSIZE == 0 ss_mintrosort(T, PA, first, last, depth); #else if((bufsize < SS_BLOCKSIZE) && (bufsize < (last - first)) && (bufsize < (limit = ss_isqrt(last - first)))) { if(SS_BLOCKSIZE < limit) { limit = SS_BLOCKSIZE; } buf = middle = last - limit, bufsize = limit; } else { middle = last, limit = 0; } for(a = first, i = 0; SS_BLOCKSIZE < (middle - a); a += SS_BLOCKSIZE, ++i) { #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE ss_mintrosort(T, PA, a, a + SS_BLOCKSIZE, depth); #elif 1 < SS_BLOCKSIZE ss_insertionsort(T, PA, a, a + SS_BLOCKSIZE, depth); #endif curbufsize = last - (a + SS_BLOCKSIZE); curbuf = a + SS_BLOCKSIZE; if(curbufsize <= bufsize) { curbufsize = bufsize, curbuf = buf; } for(b = a, k = SS_BLOCKSIZE, j = i; j & 1; b -= k, k <<= 1, j >>= 1) { ss_swapmerge(T, PA, b - k, b, b + k, curbuf, curbufsize, depth); } } #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE ss_mintrosort(T, PA, a, middle, depth); #elif 1 < SS_BLOCKSIZE ss_insertionsort(T, PA, a, middle, depth); #endif for(k = SS_BLOCKSIZE; i != 0; k <<= 1, i >>= 1) { if(i & 1) { ss_swapmerge(T, PA, a - k, a, middle, buf, bufsize, depth); a -= k; } } if(limit != 0) { #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE ss_mintrosort(T, PA, middle, last, depth); #elif 1 < SS_BLOCKSIZE ss_insertionsort(T, PA, middle, last, depth); #endif ss_inplacemerge(T, PA, first, middle, last, depth); } #endif if(lastsuffix != 0) { /* Insert last type B* suffix. */ int PAi[2]; PAi[0] = PA[*(first - 1)], PAi[1] = n - 2; for(a = first, i = *(first - 1); (a < last) && ((*a < 0) || (0 < ss_compare(T, &(PAi[0]), PA + *a, depth))); ++a) { *(a - 1) = *a; } *(a - 1) = i; } } /*---------------------------------------------------------------------------*/ static INLINE int tr_ilg(int n) { return (n & 0xffff0000) ? ((n & 0xff000000) ? 24 + lg_table[(n >> 24) & 0xff] : 16 + lg_table[(n >> 16) & 0xff]) : ((n & 0x0000ff00) ? 8 + lg_table[(n >> 8) & 0xff] : 0 + lg_table[(n >> 0) & 0xff]); } /*---------------------------------------------------------------------------*/ /* Simple insertionsort for small size groups. */ static void tr_insertionsort(const int *ISAd, int *first, int *last) { int *a, *b; int t, r; for(a = first + 1; a < last; ++a) { for(t = *a, b = a - 1; 0 > (r = ISAd[t] - ISAd[*b]);) { do { *(b + 1) = *b; } while((first <= --b) && (*b < 0)); if(b < first) { break; } } if(r == 0) { *b = ~*b; } *(b + 1) = t; } } /*---------------------------------------------------------------------------*/ static INLINE void tr_fixdown(const int *ISAd, int *SA, int i, int size) { int j, k; int v; int c, d, e; for(v = SA[i], c = ISAd[v]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) { d = ISAd[SA[k = j++]]; if(d < (e = ISAd[SA[j]])) { k = j; d = e; } if(d <= c) { break; } } SA[i] = v; } /* Simple top-down heapsort. */ static void tr_heapsort(const int *ISAd, int *SA, int size) { int i, m; int t; m = size; if((size % 2) == 0) { m--; if(ISAd[SA[m / 2]] < ISAd[SA[m]]) { SWAP(SA[m], SA[m / 2]); } } for(i = m / 2 - 1; 0 <= i; --i) { tr_fixdown(ISAd, SA, i, m); } if((size % 2) == 0) { SWAP(SA[0], SA[m]); tr_fixdown(ISAd, SA, 0, m); } for(i = m - 1; 0 < i; --i) { t = SA[0], SA[0] = SA[i]; tr_fixdown(ISAd, SA, 0, i); SA[i] = t; } } /*---------------------------------------------------------------------------*/ /* Returns the median of three elements. */ static INLINE int * tr_median3(const int *ISAd, int *v1, int *v2, int *v3) { int *t; if(ISAd[*v1] > ISAd[*v2]) { SWAP(v1, v2); } if(ISAd[*v2] > ISAd[*v3]) { if(ISAd[*v1] > ISAd[*v3]) { return v1; } else { return v3; } } return v2; } /* Returns the median of five elements. */ static INLINE int * tr_median5(const int *ISAd, int *v1, int *v2, int *v3, int *v4, int *v5) { int *t; if(ISAd[*v2] > ISAd[*v3]) { SWAP(v2, v3); } if(ISAd[*v4] > ISAd[*v5]) { SWAP(v4, v5); } if(ISAd[*v2] > ISAd[*v4]) { SWAP(v2, v4); SWAP(v3, v5); } if(ISAd[*v1] > ISAd[*v3]) { SWAP(v1, v3); } if(ISAd[*v1] > ISAd[*v4]) { SWAP(v1, v4); SWAP(v3, v5); } if(ISAd[*v3] > ISAd[*v4]) { return v4; } return v3; } /* Returns the pivot element. */ static INLINE int * tr_pivot(const int *ISAd, int *first, int *last) { int *middle; int t; t = last - first; middle = first + t / 2; if(t <= 512) { if(t <= 32) { return tr_median3(ISAd, first, middle, last - 1); } else { t >>= 2; return tr_median5(ISAd, first, first + t, middle, last - 1 - t, last - 1); } } t >>= 3; first = tr_median3(ISAd, first, first + t, first + (t << 1)); middle = tr_median3(ISAd, middle - t, middle, middle + t); last = tr_median3(ISAd, last - 1 - (t << 1), last - 1 - t, last - 1); return tr_median3(ISAd, first, middle, last); } /*---------------------------------------------------------------------------*/ typedef struct _trbudget_t trbudget_t; struct _trbudget_t { int chance; int remain; int incval; int count; }; static INLINE void trbudget_init(trbudget_t *budget, int chance, int incval) { budget->chance = chance; budget->remain = budget->incval = incval; } static INLINE int trbudget_check(trbudget_t *budget, int size) { if(size <= budget->remain) { budget->remain -= size; return 1; } if(budget->chance == 0) { budget->count += size; return 0; } budget->remain += budget->incval - size; budget->chance -= 1; return 1; } /*---------------------------------------------------------------------------*/ static INLINE void tr_partition(const int *ISAd, int *first, int *middle, int *last, int **pa, int **pb, int v) { int *a, *b, *c, *d, *e, *f; int t, s; int x = 0; for(b = middle - 1; (++b < last) && ((x = ISAd[*b]) == v);) { } if(((a = b) < last) && (x < v)) { for(; (++b < last) && ((x = ISAd[*b]) <= v);) { if(x == v) { SWAP(*b, *a); ++a; } } } for(c = last; (b < --c) && ((x = ISAd[*c]) == v);) { } if((b < (d = c)) && (x > v)) { for(; (b < --c) && ((x = ISAd[*c]) >= v);) { if(x == v) { SWAP(*c, *d); --d; } } } for(; b < c;) { SWAP(*b, *c); for(; (++b < c) && ((x = ISAd[*b]) <= v);) { if(x == v) { SWAP(*b, *a); ++a; } } for(; (b < --c) && ((x = ISAd[*c]) >= v);) { if(x == v) { SWAP(*c, *d); --d; } } } if(a <= d) { c = b - 1; if((s = a - first) > (t = b - a)) { s = t; } for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } if((s = d - c) > (t = last - d - 1)) { s = t; } for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } first += (b - a), last -= (d - c); } *pa = first, *pb = last; } static void tr_copy(int *ISA, const int *SA, int *first, int *a, int *b, int *last, int depth) { /* sort suffixes of middle partition by using sorted order of suffixes of left and right partition. */ int *c, *d, *e; int s, v; v = b - SA - 1; for(c = first, d = a - 1; c <= d; ++c) { if((0 <= (s = *c - depth)) && (ISA[s] == v)) { *++d = s; ISA[s] = d - SA; } } for(c = last - 1, e = d + 1, d = b; e < d; --c) { if((0 <= (s = *c - depth)) && (ISA[s] == v)) { *--d = s; ISA[s] = d - SA; } } } static void tr_partialcopy(int *ISA, const int *SA, int *first, int *a, int *b, int *last, int depth) { int *c, *d, *e; int s, v; int rank, lastrank, newrank = -1; v = b - SA - 1; lastrank = -1; for(c = first, d = a - 1; c <= d; ++c) { if((0 <= (s = *c - depth)) && (ISA[s] == v)) { *++d = s; rank = ISA[s + depth]; if(lastrank != rank) { lastrank = rank; newrank = d - SA; } ISA[s] = newrank; } } lastrank = -1; for(e = d; first <= e; --e) { rank = ISA[*e]; if(lastrank != rank) { lastrank = rank; newrank = e - SA; } if(newrank != rank) { ISA[*e] = newrank; } } lastrank = -1; for(c = last - 1, e = d + 1, d = b; e < d; --c) { if((0 <= (s = *c - depth)) && (ISA[s] == v)) { *--d = s; rank = ISA[s + depth]; if(lastrank != rank) { lastrank = rank; newrank = d - SA; } ISA[s] = newrank; } } } static void tr_introsort(int *ISA, const int *ISAd, int *SA, int *first, int *last, trbudget_t *budget) { #define STACK_SIZE TR_STACKSIZE struct { const int *a; int *b, *c; int d, e; }stack[STACK_SIZE]; int *a, *b, *c; int t; int v, x = 0; int incr = ISAd - ISA; int limit, next; int ssize, trlink = -1; for(ssize = 0, limit = tr_ilg(last - first);;) { if(limit < 0) { if(limit == -1) { /* tandem repeat partition */ tr_partition(ISAd - incr, first, first, last, &a, &b, last - SA - 1); /* update ranks */ if(a < last) { for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; } } if(b < last) { for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } } /* push */ if(1 < (b - a)) { STACK_PUSH5(NULL, a, b, 0, 0); STACK_PUSH5(ISAd - incr, first, last, -2, trlink); trlink = ssize - 2; } if((a - first) <= (last - b)) { if(1 < (a - first)) { STACK_PUSH5(ISAd, b, last, tr_ilg(last - b), trlink); last = a, limit = tr_ilg(a - first); } else if(1 < (last - b)) { first = b, limit = tr_ilg(last - b); } else { STACK_POP5(ISAd, first, last, limit, trlink); } } else { if(1 < (last - b)) { STACK_PUSH5(ISAd, first, a, tr_ilg(a - first), trlink); first = b, limit = tr_ilg(last - b); } else if(1 < (a - first)) { last = a, limit = tr_ilg(a - first); } else { STACK_POP5(ISAd, first, last, limit, trlink); } } } else if(limit == -2) { /* tandem repeat copy */ a = stack[--ssize].b, b = stack[ssize].c; if(stack[ssize].d == 0) { tr_copy(ISA, SA, first, a, b, last, ISAd - ISA); } else { if(0 <= trlink) { stack[trlink].d = -1; } tr_partialcopy(ISA, SA, first, a, b, last, ISAd - ISA); } STACK_POP5(ISAd, first, last, limit, trlink); } else { /* sorted partition */ if(0 <= *first) { a = first; do { ISA[*a] = a - SA; } while((++a < last) && (0 <= *a)); first = a; } if(first < last) { a = first; do { *a = ~*a; } while(*++a < 0); next = (ISA[*a] != ISAd[*a]) ? tr_ilg(a - first + 1) : -1; if(++a < last) { for(b = first, v = a - SA - 1; b < a; ++b) { ISA[*b] = v; } } /* push */ if(trbudget_check(budget, a - first)) { if((a - first) <= (last - a)) { STACK_PUSH5(ISAd, a, last, -3, trlink); ISAd += incr, last = a, limit = next; } else { if(1 < (last - a)) { STACK_PUSH5(ISAd + incr, first, a, next, trlink); first = a, limit = -3; } else { ISAd += incr, last = a, limit = next; } } } else { if(0 <= trlink) { stack[trlink].d = -1; } if(1 < (last - a)) { first = a, limit = -3; } else { STACK_POP5(ISAd, first, last, limit, trlink); } } } else { STACK_POP5(ISAd, first, last, limit, trlink); } } continue; } if((last - first) <= TR_INSERTIONSORT_THRESHOLD) { tr_insertionsort(ISAd, first, last); limit = -3; continue; } if(limit-- == 0) { tr_heapsort(ISAd, first, last - first); for(a = last - 1; first < a; a = b) { for(x = ISAd[*a], b = a - 1; (first <= b) && (ISAd[*b] == x); --b) { *b = ~*b; } } limit = -3; continue; } /* choose pivot */ a = tr_pivot(ISAd, first, last); SWAP(*first, *a); v = ISAd[*first]; /* partition */ tr_partition(ISAd, first, first + 1, last, &a, &b, v); if((last - first) != (b - a)) { next = (ISA[*a] != v) ? tr_ilg(b - a) : -1; /* update ranks */ for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; } if(b < last) { for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } } /* push */ if((1 < (b - a)) && (trbudget_check(budget, b - a))) { if((a - first) <= (last - b)) { if((last - b) <= (b - a)) { if(1 < (a - first)) { STACK_PUSH5(ISAd + incr, a, b, next, trlink); STACK_PUSH5(ISAd, b, last, limit, trlink); last = a; } else if(1 < (last - b)) { STACK_PUSH5(ISAd + incr, a, b, next, trlink); first = b; } else { ISAd += incr, first = a, last = b, limit = next; } } else if((a - first) <= (b - a)) { if(1 < (a - first)) { STACK_PUSH5(ISAd, b, last, limit, trlink); STACK_PUSH5(ISAd + incr, a, b, next, trlink); last = a; } else { STACK_PUSH5(ISAd, b, last, limit, trlink); ISAd += incr, first = a, last = b, limit = next; } } else { STACK_PUSH5(ISAd, b, last, limit, trlink); STACK_PUSH5(ISAd, first, a, limit, trlink); ISAd += incr, first = a, last = b, limit = next; } } else { if((a - first) <= (b - a)) { if(1 < (last - b)) { STACK_PUSH5(ISAd + incr, a, b, next, trlink); STACK_PUSH5(ISAd, first, a, limit, trlink); first = b; } else if(1 < (a - first)) { STACK_PUSH5(ISAd + incr, a, b, next, trlink); last = a; } else { ISAd += incr, first = a, last = b, limit = next; } } else if((last - b) <= (b - a)) { if(1 < (last - b)) { STACK_PUSH5(ISAd, first, a, limit, trlink); STACK_PUSH5(ISAd + incr, a, b, next, trlink); first = b; } else { STACK_PUSH5(ISAd, first, a, limit, trlink); ISAd += incr, first = a, last = b, limit = next; } } else { STACK_PUSH5(ISAd, first, a, limit, trlink); STACK_PUSH5(ISAd, b, last, limit, trlink); ISAd += incr, first = a, last = b, limit = next; } } } else { if((1 < (b - a)) && (0 <= trlink)) { stack[trlink].d = -1; } if((a - first) <= (last - b)) { if(1 < (a - first)) { STACK_PUSH5(ISAd, b, last, limit, trlink); last = a; } else if(1 < (last - b)) { first = b; } else { STACK_POP5(ISAd, first, last, limit, trlink); } } else { if(1 < (last - b)) { STACK_PUSH5(ISAd, first, a, limit, trlink); first = b; } else if(1 < (a - first)) { last = a; } else { STACK_POP5(ISAd, first, last, limit, trlink); } } } } else { if(trbudget_check(budget, last - first)) { limit = tr_ilg(last - first), ISAd += incr; } else { if(0 <= trlink) { stack[trlink].d = -1; } STACK_POP5(ISAd, first, last, limit, trlink); } } } #undef STACK_SIZE } /*---------------------------------------------------------------------------*/ /* Tandem repeat sort */ static void trsort(int *ISA, int *SA, int n, int depth) { int *ISAd; int *first, *last; trbudget_t budget; int t, skip, unsorted; trbudget_init(&budget, tr_ilg(n) * 2 / 3, n); /* trbudget_init(&budget, tr_ilg(n) * 3 / 4, n); */ for(ISAd = ISA + depth; -n < *SA; ISAd += ISAd - ISA) { first = SA; skip = 0; unsorted = 0; do { if((t = *first) < 0) { first -= t; skip += t; } else { if(skip != 0) { *(first + skip) = skip; skip = 0; } last = SA + ISA[t] + 1; if(1 < (last - first)) { budget.count = 0; tr_introsort(ISA, ISAd, SA, first, last, &budget); if(budget.count != 0) { unsorted += budget.count; } else { skip = first - last; } } else if((last - first) == 1) { skip = -1; } first = last; } } while(first < (SA + n)); if(skip != 0) { *(first + skip) = skip; } if(unsorted == 0) { break; } } } /*---------------------------------------------------------------------------*/ /* Sorts suffixes of type B*. */ static int sort_typeBstar(const unsigned char *T, int *SA, int *bucket_A, int *bucket_B, int n) { int *PAb, *ISAb, *buf; int *curbuf; int l; int i, j, k, t, m, bufsize; int c0, c1; int d0, d1; int tmp; /* Initialize bucket arrays. */ for(i = 0; i < BUCKET_A_SIZE; ++i) { bucket_A[i] = 0; } for(i = 0; i < BUCKET_B_SIZE; ++i) { bucket_B[i] = 0; } /* Count the number of occurrences of the first one or two characters of each type A, B and B* suffix. Moreover, store the beginning position of all type B* suffixes into the array SA. */ for(i = n - 1, m = n, c0 = T[n - 1]; 0 <= i;) { /* type A suffix. */ do { ++BUCKET_A(c1 = c0); } while((0 <= --i) && ((c0 = T[i]) >= c1)); if(0 <= i) { /* type B* suffix. */ ++BUCKET_BSTAR(c0, c1); SA[--m] = i; /* type B suffix. */ for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { ++BUCKET_B(c0, c1); } } } m = n - m; /* note: A type B* suffix is lexicographically smaller than a type B suffix that begins with the same first two characters. */ /* Calculate the index of start/end point of each bucket. */ for(c0 = 0, i = 0, j = 0; c0 < ALPHABET_SIZE; ++c0) { t = i + BUCKET_A(c0); BUCKET_A(c0) = i + j; /* start point */ i = t + BUCKET_B(c0, c0); for(c1 = c0 + 1; c1 < ALPHABET_SIZE; ++c1) { j += BUCKET_BSTAR(c0, c1); BUCKET_BSTAR(c0, c1) = j; /* end point */ i += BUCKET_B(c0, c1); } } if(0 < m) { /* Sort the type B* suffixes by their first two characters. */ PAb = SA + n - m; ISAb = SA + m; for(i = m - 2; 0 <= i; --i) { t = PAb[i], c0 = T[t], c1 = T[t + 1]; SA[--BUCKET_BSTAR(c0, c1)] = i; } t = PAb[m - 1], c0 = T[t], c1 = T[t + 1]; SA[--BUCKET_BSTAR(c0, c1)] = m - 1; /* Sort the type B* substrings using sssort. */ tmp = omp_get_max_threads(); buf = SA + m, bufsize = (n - (2 * m)) / tmp; c0 = ALPHABET_SIZE - 2, c1 = ALPHABET_SIZE - 1, j = m; #pragma omp parallel default(shared) private(curbuf, k, l, d0, d1, tmp) { tmp = omp_get_thread_num(); curbuf = buf + tmp * bufsize; k = 0; for(;;) { #pragma omp critical(sssort_lock) { if(0 < (l = j)) { d0 = c0, d1 = c1; do { k = BUCKET_BSTAR(d0, d1); if(--d1 <= d0) { d1 = ALPHABET_SIZE - 1; if(--d0 < 0) { break; } } } while(((l - k) <= 1) && (0 < (l = k))); c0 = d0, c1 = d1, j = k; } } if(l == 0) { break; } sssort(T, PAb, SA + k, SA + l, curbuf, bufsize, 2, n, *(SA + k) == (m - 1)); } } /* Compute ranks of type B* substrings. */ for(i = m - 1; 0 <= i; --i) { if(0 <= SA[i]) { j = i; do { ISAb[SA[i]] = i; } while((0 <= --i) && (0 <= SA[i])); SA[i + 1] = i - j; if(i <= 0) { break; } } j = i; do { ISAb[SA[i] = ~SA[i]] = j; } while(SA[--i] < 0); ISAb[SA[i]] = j; } /* Construct the inverse suffix array of type B* suffixes using trsort. */ trsort(ISAb, SA, m, 1); /* Set the sorted order of tyoe B* suffixes. */ for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) { for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { } if(0 <= i) { t = i; for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { } SA[ISAb[--j]] = ((t == 0) || (1 < (t - i))) ? t : ~t; } } /* Calculate the index of start/end point of each bucket. */ BUCKET_B(ALPHABET_SIZE - 1, ALPHABET_SIZE - 1) = n; /* end point */ for(c0 = ALPHABET_SIZE - 2, k = m - 1; 0 <= c0; --c0) { i = BUCKET_A(c0 + 1) - 1; for(c1 = ALPHABET_SIZE - 1; c0 < c1; --c1) { t = i - BUCKET_B(c0, c1); BUCKET_B(c0, c1) = i; /* end point */ /* Move all type B* suffixes to the correct position. */ for(i = t, j = BUCKET_BSTAR(c0, c1); j <= k; --i, --k) { SA[i] = SA[k]; } } BUCKET_BSTAR(c0, c0 + 1) = i - BUCKET_B(c0, c0) + 1; /* start point */ BUCKET_B(c0, c0) = i; /* end point */ } } return m; } /* Constructs the suffix array by using the sorted order of type B* suffixes. */ static void construct_SA(const unsigned char *T, int *SA, int *bucket_A, int *bucket_B, int n, int m) { int *i, *j, *k; int s; int c0, c1, c2; if(0 < m) { /* Construct the sorted order of type B suffixes by using the sorted order of type B* suffixes. */ for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) { /* Scan the suffix array from right to left. */ for(i = SA + BUCKET_BSTAR(c1, c1 + 1), j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1; i <= j; --j) { if(0 < (s = *j)) { assert(T[s] == c1); assert(((s + 1) < n) && (T[s] <= T[s + 1])); assert(T[s - 1] <= T[s]); *j = ~s; c0 = T[--s]; if((0 < s) && (T[s - 1] > c0)) { s = ~s; } if(c0 != c2) { if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; } k = SA + BUCKET_B(c2 = c0, c1); } assert(k < j); *k-- = s; } else { assert(((s == 0) && (T[s] == c1)) || (s < 0)); *j = ~s; } } } } /* Construct the suffix array by using the sorted order of type B suffixes. */ k = SA + BUCKET_A(c2 = T[n - 1]); *k++ = (T[n - 2] < c2) ? ~(n - 1) : (n - 1); /* Scan the suffix array from left to right. */ for(i = SA, j = SA + n; i < j; ++i) { if(0 < (s = *i)) { assert(T[s - 1] >= T[s]); c0 = T[--s]; if((s == 0) || (T[s - 1] < c0)) { s = ~s; } if(c0 != c2) { BUCKET_A(c2) = k - SA; k = SA + BUCKET_A(c2 = c0); } assert(i < k); *k++ = s; } else { assert(s < 0); *i = ~s; } } } /* Constructs the burrows-wheeler transformed string directly by using the sorted order of type B* suffixes. */ static int construct_BWT(const unsigned char *T, int *SA, int *bucket_A, int *bucket_B, int n, int m) { int *i, *j, *k, *orig; int s; int c0, c1, c2; if(0 < m) { /* Construct the sorted order of type B suffixes by using the sorted order of type B* suffixes. */ for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) { /* Scan the suffix array from right to left. */ for(i = SA + BUCKET_BSTAR(c1, c1 + 1), j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1; i <= j; --j) { if(0 < (s = *j)) { assert(T[s] == c1); assert(((s + 1) < n) && (T[s] <= T[s + 1])); assert(T[s - 1] <= T[s]); c0 = T[--s]; *j = ~((int)c0); if((0 < s) && (T[s - 1] > c0)) { s = ~s; } if(c0 != c2) { if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; } k = SA + BUCKET_B(c2 = c0, c1); } assert(k < j); *k-- = s; } else if(s != 0) { *j = ~s; #ifndef NDEBUG } else { assert(T[s] == c1); #endif } } } } /* Construct the BWTed string by using the sorted order of type B suffixes. */ k = SA + BUCKET_A(c2 = T[n - 1]); *k++ = (T[n - 2] < c2) ? ~((int)T[n - 2]) : (n - 1); /* Scan the suffix array from left to right. */ for(i = SA, j = SA + n, orig = SA; i < j; ++i) { if(0 < (s = *i)) { assert(T[s - 1] >= T[s]); c0 = T[--s]; *i = c0; if((0 < s) && (T[s - 1] < c0)) { s = ~((int)T[s - 1]); } if(c0 != c2) { BUCKET_A(c2) = k - SA; k = SA + BUCKET_A(c2 = c0); } assert(i < k); *k++ = s; } else if(s != 0) { *i = ~s; } else { orig = i; } } return orig - SA; } /*---------------------------------------------------------------------------*/ /*- Function -*/ int divsufsort(const unsigned char *T, int *SA, int n) { int *bucket_A, *bucket_B; int m; int err = 0; /* Check arguments. */ if((T == NULL) || (SA == NULL) || (n < 0)) { return -1; } else if(n == 0) { return 0; } else if(n == 1) { SA[0] = 0; return 0; } else if(n == 2) { m = (T[0] < T[1]); SA[m ^ 1] = 0, SA[m] = 1; return 0; } bucket_A = (int *)malloc(BUCKET_A_SIZE * sizeof(int)); bucket_B = (int *)malloc(BUCKET_B_SIZE * sizeof(int)); /* Suffixsort. */ if((bucket_A != NULL) && (bucket_B != NULL)) { m = sort_typeBstar(T, SA, bucket_A, bucket_B, n); construct_SA(T, SA, bucket_A, bucket_B, n, m); } else { err = -2; } free(bucket_B); free(bucket_A); return err; } int divbwt(const unsigned char *T, unsigned char *U, int *A, int n) { int *B; int *bucket_A, *bucket_B; int m, pidx, i; /* Check arguments. */ if((T == NULL) || (U == NULL) || (n < 0)) { return -1; } else if(n <= 1) { if(n == 1) { U[0] = T[0]; } return n; } if((B = A) == NULL) { B = (int *)malloc((size_t)(n + 1) * sizeof(int)); } bucket_A = (int *)malloc(BUCKET_A_SIZE * sizeof(int)); bucket_B = (int *)malloc(BUCKET_B_SIZE * sizeof(int)); /* Burrows-Wheeler Transform. */ if((B != NULL) && (bucket_A != NULL) && (bucket_B != NULL)) { m = sort_typeBstar(T, B, bucket_A, bucket_B, n); pidx = construct_BWT(T, B, bucket_A, bucket_B, n, m); /* Copy to output string. */ U[0] = T[n - 1]; for(i = 0; i < pidx; ++i) { U[i + 1] = (unsigned char)B[i]; } for(i += 1; i < n; ++i) { U[i] = (unsigned char)B[i]; } pidx += 1; } else { pidx = -2; } free(bucket_B); free(bucket_A); if(A == NULL) { free(B); } return pidx; } //Module name: libbps //Author: Alcaro //Date: November 7, 2015 //Licence: GPL v3.0 or higher #include //malloc, realloc, free #include //memcpy, memset #include //uint8_t, uint32_t static uint32_t read32(uint8_t * ptr) { uint32_t out; out =ptr[0]; out|=ptr[1]<<8; out|=ptr[2]<<16; out|=ptr[3]<<24; return out; } enum { SourceRead, TargetRead, SourceCopy, TargetCopy }; static bool try_add(size_t& a, size_t b) { if (SIZE_MAX-a < b) return false; a+=b; return true; } static bool try_shift(size_t& a, size_t b) { if (SIZE_MAX>>b < a) return false; a<<=b; return true; } static bool decodenum(const uint8_t*& ptr, size_t& out) { out=0; unsigned int shift=0; while (true) { uint8_t next=*ptr++; size_t addthis=(next&0x7F); if (shift) addthis++; if (!try_shift(addthis, shift)) return false; // unchecked because if it was shifted, the lowest bit is zero, and if not, it's <=0x7F. if (!try_add(out, addthis)) return false; if (next&0x80) return true; shift+=7; } } #define error(which) do { error=which; goto exit; } while(0) #define assert_sum(a,b) do { if (SIZE_MAX-(a)<(b)) error(bps_too_big); } while(0) #define assert_shift(a,b) do { if (SIZE_MAX>>(b)<(a)) error(bps_too_big); } while(0) enum bpserror bps_apply(struct mem patch, struct mem in, struct mem * out, struct mem * metadata, bool accept_wrong_input) { enum bpserror error = bps_ok; out->len=0; out->ptr=NULL; if (metadata) { metadata->len=0; metadata->ptr=NULL; } if (patch.len<4+3+12) return bps_broken; if (true) { #define read8() (*(patchat++)) #define decodeto(var) \ do { \ if (!decodenum(patchat, var)) error(bps_too_big); \ } while(false) #define write8(byte) (*(outat++)=byte) const uint8_t * patchat=patch.ptr; const uint8_t * patchend=patch.ptr+patch.len-12; if (read8()!='B') error(bps_broken); if (read8()!='P') error(bps_broken); if (read8()!='S') error(bps_broken); if (read8()!='1') error(bps_broken); uint32_t crc_in_e = read32(patch.ptr+patch.len-12); uint32_t crc_out_e = read32(patch.ptr+patch.len-8); uint32_t crc_patch_e = read32(patch.ptr+patch.len-4); uint32_t crc_in_a = crc32(in.ptr, in.len); uint32_t crc_patch_a = crc32(patch.ptr, patch.len-4); if (crc_patch_a != crc_patch_e) error(bps_broken); size_t inlen; decodeto(inlen); size_t outlen; decodeto(outlen); if (inlen!=in.len || crc_in_a!=crc_in_e) { if (in.len==outlen && crc_in_a==crc_out_e) error=bps_to_output; else error=bps_not_this; if (!accept_wrong_input) goto exit; } out->len=outlen; out->ptr=(uint8_t*)malloc(outlen); const uint8_t * instart=in.ptr; const uint8_t * inreadat=in.ptr; const uint8_t * inend=in.ptr+in.len; uint8_t * outstart=out->ptr; uint8_t * outreadat=out->ptr; uint8_t * outat=out->ptr; uint8_t * outend=out->ptr+out->len; size_t metadatalen; decodeto(metadatalen); if (metadata && metadatalen) { metadata->len=metadatalen; metadata->ptr=(uint8_t*)malloc(metadatalen+1); for (size_t i=0;iptr[i]=read8(); metadata->ptr[metadatalen]='\0';//just to be on the safe side - that metadata is assumed to be text, might as well terminate it } else { for (size_t i=0;i>2)+1; int action=(thisinstr&3); if (outat+length>outend) error(bps_broken); switch (action) { case SourceRead: { if (outat-outstart+length > in.len) error(bps_broken); for (size_t i=0;ipatchend) error(bps_broken); for (size_t i=0;i>1; if ((encodeddistance&1)==0) inreadat+=distance; else inreadat-=distance; if (inreadatinend) error(bps_broken); for (size_t i=0;i>1; if ((encodeddistance&1)==0) outreadat+=distance; else outreadat-=distance; if (outreadat=outat || outreadat+length>outend) error(bps_broken); for (size_t i=0;iptr, out->len); if (crc_out_a!=crc_out_e) { error=bps_not_this; if (!accept_wrong_input) goto exit; } return error; #undef read8 #undef decodeto #undef write8 } exit: free(out->ptr); out->len=0; out->ptr=NULL; if (metadata) { free(metadata->ptr); metadata->len=0; metadata->ptr=NULL; } return error; } #define write(val) \ do { \ out[outlen++]=(val); \ if (outlen==outbuflen) \ { \ outbuflen*=2; \ uint8_t* newout=(uint8_t*)realloc(out, outbuflen); \ if (!newout) { free(out); return bps_out_of_mem; } \ out=newout; \ } \ } while(0) #define write32(val) \ do { \ uint32_t tmp=(val); \ write(tmp); \ write(tmp>>8); \ write(tmp>>16); \ write(tmp>>24); \ } while(0) #define writenum(val) \ do { \ size_t tmpval=(val); \ while (true) \ { \ uint8_t tmpbyte=(tmpval&0x7F); \ tmpval>>=7; \ if (!tmpval) \ { \ write(tmpbyte|0x80); \ break; \ } \ write(tmpbyte); \ tmpval--; \ } \ } while(0) enum bpserror bps_create_linear(struct mem sourcemem, struct mem targetmem, struct mem metadata, struct mem * patchmem) { if (sourcemem.len>=(SIZE_MAX>>2) - 16) return bps_too_big;//the 16 is just to be on the safe side, I don't think it's needed. if (targetmem.len>=(SIZE_MAX>>2) - 16) return bps_too_big; const uint8_t * source=sourcemem.ptr; const uint8_t * sourceend=sourcemem.ptr+sourcemem.len; if (sourcemem.len>targetmem.len) sourceend=sourcemem.ptr+targetmem.len; const uint8_t * targetbegin=targetmem.ptr; const uint8_t * target=targetmem.ptr; const uint8_t * targetend=targetmem.ptr+targetmem.len; const uint8_t * targetcopypos=targetbegin; size_t outbuflen=4096; uint8_t * out=(uint8_t*)malloc(outbuflen); if (!out) return bps_out_of_mem; size_t outlen=0; write('B'); write('P'); write('S'); write('1'); writenum(sourcemem.len); writenum(targetmem.len); writenum(metadata.len); for (size_t i=0;i1 || numunchanged == (uintptr_t)(targetend-target)) { //assert_shift((numunchanged-1), 2); writenum((numunchanged-1)<<2 | 0);//SourceRead source+=numunchanged; target+=numunchanged; } size_t numchanged=0; if (lastknownchange>target) numchanged=lastknownchange-target; while ((source+numchanged>=sourceend || source[numchanged]!=target[numchanged] || source[numchanged+1]!=target[numchanged+1] || source[numchanged+2]!=target[numchanged+2]) && target+numchanged=sourceend) numchanged=targetend-target; } lastknownchange=target+numchanged; if (numchanged) { //assert_shift((numchanged-1), 2); size_t rle1start=(target==targetbegin); while (true) { if ( target[rle1start-1]==target[rle1start+0] && target[rle1start+0]==target[rle1start+1] && target[rle1start+1]==target[rle1start+2] && target[rle1start+2]==target[rle1start+3]) { numchanged=rle1start; break; } if ( target[rle1start-2]==target[rle1start+0] && target[rle1start-1]==target[rle1start+1] && target[rle1start+0]==target[rle1start+2] && target[rle1start+1]==target[rle1start+3] && target[rle1start+2]==target[rle1start+4]) { numchanged=rle1start; break; } if (rle1start+3>=numchanged) break; rle1start++; } if (numchanged) { writenum((numchanged-1)<<2 | TargetRead); for (size_t i=0;iptr=out; patchmem->len=outlen; //while this may look like it can be fooled by a patch containing exactly one of any other command, it // can't, because the ones that aren't SourceRead requires an argument size_t i; for (i=mainContentPos;(out[i]&0x80)==0x00;i++) {} if (i==outlen-12-1) return bps_identical; return bps_ok; } #undef write_nocrc #undef write #undef writenum void bps_free(struct mem mem) { free(mem.ptr); } #undef error struct bpsinfo bps_get_info(file* patch, bool changefrac) { #define error(why) do { ret.error=why; return ret; } while(0) struct bpsinfo ret; size_t len = patch->len(); if (len<4+3+12) error(bps_broken); uint8_t top[256]; if (!patch->read(top, 0, len>256 ? 256 : len)) error(bps_io); if (memcmp(top, "BPS1", 4)) error(bps_broken); const uint8_t* patchdat=top+4; if (!decodenum(patchdat, ret.size_in)) error(bps_too_big); if (!decodenum(patchdat, ret.size_out)) error(bps_too_big); if (!decodenum(patchdat, ret.meta_size)) error(bps_too_big); ret.meta_start = patchdat - top; uint8_t checksums[12]; if (!patch->read(checksums, len-12, 12)) error(bps_io); ret.crc_in = read32(checksums+0); ret.crc_out = read32(checksums+4); ret.crc_patch=read32(checksums+8); if (changefrac && ret.size_in>0) { //algorithm: each command adds its length to the numerator, unless it's above 32, in which case // it adds 32; or if it's SourceRead, in which case it adds 0 //denominator is just input length uint8_t* patchbin=(uint8_t*)malloc(len); patch->read(patchbin, 0, len); size_t outpos=0; // position in the output file size_t changeamt=0; // change score const uint8_t* patchat=patchbin+(patchdat-top); size_t metasize; if (!decodenum(patchat, metasize)) error(bps_too_big); patchat+=metasize; const uint8_t* patchend=patchbin+len-12; while (patchat>2)+1; int action=(thisinstr&3); int min_len_32 = (length<32 ? length : 32); switch (action) { case SourceRead: { changeamt+=0; } break; case TargetRead: { changeamt+=min_len_32; patchat+=length; } break; case SourceCopy: case TargetCopy: { changeamt+=min_len_32; size_t ignore; decodenum(patchat, ignore); } break; } outpos+=length; } if (patchat>patchend || outpos>ret.size_out) error(bps_broken); ret.change_num = (changeamt # define z "z" void bps_disassemble(struct mem patch, FILE* out) { #define read8() (*(patchat++)) #define decodeto(var) decodenum(patchat, var) const uint8_t * patchat=patch.ptr; const uint8_t * patchend=patch.ptr+patch.len-12; if (read8() != 'B' || read8() != 'P' || read8() != 'S' || read8() != '1') { fprintf(out, "Not a BPS patch\n"); return; } size_t inreadat = 0; size_t inlen; decodeto(inlen); size_t outat = 0; size_t outreadat = 0; size_t outlen; decodeto(outlen); size_t metadatalen; decodeto(metadatalen); patchat += metadatalen; while (patchat>2)+1; int action=(thisinstr&3); switch (action) { case SourceRead: { fprintf(out, "SourceRead %" z "u from %" z "u to %" z "u\n", length, outat, outat); outat += length; } break; case TargetRead: { fprintf(out, "TargetRead %" z "u to %" z "u\n", length, outat); patchat += length; outat += length; } break; case SourceCopy: { size_t encodeddistance; decodeto(encodeddistance); size_t distance=encodeddistance>>1; if ((encodeddistance&1)==0) inreadat+=distance; else inreadat-=distance; fprintf(out, "SourceCopy %" z "u from %" z "u to %" z "u (rel %+" z "i)\n", length, inreadat, outat, inreadat-outat); inreadat += length; outat += length; } break; case TargetCopy: { size_t encodeddistance; decodeto(encodeddistance); size_t distance=encodeddistance>>1; if ((encodeddistance&1)==0) outreadat+=distance; else outreadat-=distance; fprintf(out, "TargetCopy %" z "u from %" z "u to %" z "u (rel %+" z "i)\n", length, outreadat, outat, outreadat-outat); outreadat += length; outat += length; } break; } } if (patchat != patchend) fprintf(out, "WARNING: patch pointer at %" z "u != %" z "u, corrupt patch?\n", patchat-patch.ptr, patchend-patch.ptr); if (outat != outlen) fprintf(out, "WARNING: output pointer at %" z "u != %" z "u, corrupt patch?\n", outat, outlen); #undef read8 #undef decodeto } //Module name: libbps-suf //Author: Alcaro //Date: See Git history //Licence: GPL v3.0 or higher #include #include #include #include //These two give minor performance penalties and will print some random stuff to stdout. //The former will verify the correctness of the output patch, the latter will print some performance data. //Can be useful for debugging, but should be disabled for release builds. #ifdef BPS_STANDALONE #endif //#define TEST_CORRECT //#define TEST_PERF //If the suffix array of [0, 0, 0, 0] is [3, 2, 1, 0], set to true. If it's [0, 1, 2, 3], this is false. //If it's [4, 3, 2, 1, 0] or [0, 1, 2, 3, 4], remove the 4 (easily done with some pointer math), and follow the above. //If it's something else, get a non-broken array calculator. #define EOF_IS_LAST false #if defined(TEST_CORRECT) || defined(TEST_PERF) #include #endif //Algorithm description: // //This is heavily built upon suffix sorting; the implementation I use, libdivsufsort, claims // O(n log n) complexity, so I'll believe that. There is also SA-IS, which claims O(n), but if that // is true, its constant factors are ridiculously high. // //The program starts by taking an equal amount of the source file and target file, concatenates that // with target first, and suffix sorts it. //It also calculates a reverse index, such that reverse[sorted[i]]==i. // //To find a match, it goes to reverse[outpos], and scans sorted[] up and down for the closest entry // that either starts before the current output position, or is somewhere in the source file. //As the source file comes last, the end-of-file marker (whose value is outside the range of a byte) // is guaranteed to not be in the way for a better match. //This is called O(n) times, and averages O(1) as at least 50% of sorted[] is in range. However, it // is worst-case O(n) for sorted inputs, giving a total of O(n^2). // //It then checks which of the two candidates are superior, by checking how far they match each // other, and then checking if the upper one has another correct byte. //This is potentially O(n), but for each matched byte, another iteration is removed from the outer // loop, so the sum of all calls is O(n). // //When the program approaches the end of the sorted area, it re-sorts twice as much as last time. // This gives O(log n) calls to the suffix sorter. //Given O(n log n) for one sorting step, the time taken is O(n/1 log n/1 + n/2 log n/2 + // n/4 log n/4 + ...), which is strictly less than O(n/1 log n + n/2 log n + n/4 log n + ...), which // equals O(2n log n), which is O(n log n). (The exact value of that infinite sum is 2n*log(n/2).) // //Many details were omitted from the above, but that's the basic setup. // //Thus, the program is O(max(n log n, n, n) = n log n) average and O(max(n log n, n^2, n) = n^2) // worst case. // //I conclude that the task of finding, understanding and implementing a sub-O(n^2) algorithm for // delta patching is resolved. //Known cases where this function does not emit the optimal encoding: //If a match in the target file would extend further than target_search_size, it is often skipped. // Penalty: O(log n), with extremely low constants (it'd require a >256B match to be exactly there). // Even for big files, the penalty is very likely to remain zero; even hitting double-digit bytes // would require a file designed exactly for that. //If multiple matches are equally good, it picks one at random, not the one that's cheaper to encode. // Penalty: Likely O(n) or O(n log log n), with low constants. I'd guess ~1.4% for my 48MB test file. //However, due to better heuristics and others' performance optimizations, this one still beats its // competitors. //Possible optimizations: //divsufsort() takes approximately 2/3 of the total time. create_reverse_index() takes roughly a third of the remainder. //Each iteration takes four times as long as the previous one. //If each iteration takes 4 times as long as the previous one, then the last one takes 3/4 of the total time. //Since divsufsort+create_reverse_index doesn't depend on anything else, the last iteration can be split off to its own thread. //This would split it to //Search, non-final: 2/9 * 1/4 = 2/36 //Search, final: 2/9 * 3/4 = 6/36 //Sort+rev, non-final: 7/9 * 1/4 = 7/36 //Sort+rev, final: 7/9 * 3/4 = 21/36 //All non-final must be done sequentially. Both Sort Final and non-final must be done before Search Final can start. //This means the final time, if Sort Final is split off, is //max(7/36+2/36, 21/36) + 6/36 = 27/36 = 3/4 //of the original time. //Due to //- the considerable complexity costs (OpenMP doesn't seem able to represent the "insert a wait in // the middle of this while loop" I would need) //- the added memory use, approximately 25% higher - it's already high enough //- libdivsufsort already using threads, which would make the gains lower // and would increase complexity, as I have to ensure the big one remains threaded - // and that the small ones are not, as that'd starve the big one //I deem a possible 25% boost not worthwhile. //Both sorting algorithms claim O(1) memory use (in addition to the bytes and the output). In // addition to that, this algorithm uses (source.len*target.len)*(sizeof(uint8_t)+2*sizeof(off_t)) // bytes of memory, plus the patch (the input/output files are read from disk). //For most hardware, this is 9*(source.len+target.len), or 5*(source+target) for the slim one. //I don't need 64bit support, it'd take 20GB RAM and way too long. //template //static void sufsort(sais_index_type* SA, const uint8_t* T, sais_index_type n) { // if(n <= 1) { if(n == 1) SA[0] = 0; return; } // sais_main(T, SA, 0, n, 256); //} //According to , divsufsort achieves // approximately half the time of SAIS for nearly all files, despite SAIS' promises of linear // performance (divsufsort claims O(n log n)). //divsufsort only allocates O(1) for some radix/bucket sorting. SAIS seems constant too. //I'd prefer to let them allocate from an array I give it, but divsuf doesn't allow that, and there // are only half a dozen allocations per call anyways. static void sufsort(int32_t* SA, uint8_t* T, int32_t n) { divsufsort(T, SA, n); } #ifdef USE_DIVSUFSORT64 static void sufsort(int64_t* SA, uint8_t* T, int64_t n) { divsufsort(T, SA, n); } #endif template static T min(T a, T b) { return a static T max(T a, T b) { return a outbuflen) { if (!outbuflen) outbuflen = 128; while (outlen+len > outbuflen) outbuflen *= 2; out = (uint8_t*)realloc(out, outbuflen); } } void append(const uint8_t * data, size_t len) { reserve(len); memcpy(out+outlen, data, len); outlen+=len; } void appendnum(size_t num) { #ifdef TEST_CORRECT if (num > 1000000000) printf("ERROR: Attempt to write %.8lX\n",(unsigned long)num),abort(); #endif reserve(sizeof(size_t)*8/7+1); while (num >= 128) { out[outlen++]=(num&0x7F); num>>=7; num--; } out[outlen++]=num|0x80; } void appendnum32(uint32_t num) { reserve(4); out[outlen++] = num>>0; out[outlen++] = num>>8; out[outlen++] = num>>16; out[outlen++] = num>>24; } static size_t maxsize() { return SIZE_MAX>>2; // can be reduced to SIZE_MAX>>1 by amending append_cmd, but the mallocs overflow at that point anyways. } size_t sourcelen; size_t targetlen; const uint8_t* targetmem; enum bpscmd { SourceRead, TargetRead, SourceCopy, TargetCopy }; size_t outpos; size_t sourcecopypos; size_t targetcopypos; size_t numtargetread; bps_creator(file* source, file* target, struct mem metadata) { outlen = 0; outbuflen = 128; out = (uint8_t*)malloc(outbuflen); outpos = 0; sourcelen = source->len(); targetlen = target->len(); sourcecopypos = 0; targetcopypos = 0; numtargetread = 0; append((const uint8_t*)"BPS1", 4); appendnum(sourcelen); appendnum(targetlen); appendnum(metadata.len); append(metadata.ptr, metadata.len); setProgress(NULL, NULL); } void move_target(const uint8_t* ptr) { targetmem = ptr; } size_t encode_delta(size_t prev, size_t next) { bool negative = (next= 1+cost+hastargetread+(len==1); } //Return value is how many bytes were used. If you believe the given one sucks, use TargetRead and return 1. size_t match(bool is_target, size_t pos, size_t len) { if (!use_match( numtargetread, (!is_target && pos==outpos) ? 1 : // SourceRead (num_cost(abs_diff(pos, (is_target ? targetcopypos : sourcecopypos)))+1), len )) { return emit_target_read(); } if (is_target) return emit_target_copy(pos, len); else return emit_source_copy(pos, len); } bool (*prog_func)(void* userdata, size_t done, size_t total); void* prog_dat; static bool prog_func_null( unused void* userdata, unused size_t done, unused size_t total) { return true; } void setProgress(bool (*progress)(void* userdata, size_t done, size_t total), void* userdata) { if (!progress) progress = prog_func_null; prog_func=progress; prog_dat=userdata; } bool progress(size_t done, size_t total) { return prog_func(prog_dat, done, total); } void finish(const uint8_t* source, const uint8_t* target) { flush_target_read(); #ifdef TEST_CORRECT if (outpos != targetlen) puts("ERROR: patch creates wrong ROM size"),abort(); #endif appendnum32(crc32(source, sourcelen)); appendnum32(crc32(target, targetlen)); appendnum32(crc32(out, outlen)); } struct mem getpatch() { struct mem ret = { out, outlen }; out = NULL; return ret; } ~bps_creator() { free(out); } }; } #ifdef TEST_PERF static int match_len_n=0; static int match_len_tot=0; #endif template static off_t match_len(const uint8_t* a, const uint8_t* b, off_t len) { off_t i; for (i=0;i static off_t pick_best_of_two(const uint8_t* search, off_t searchlen, const uint8_t* data, off_t datalen, off_t a, off_t b, off_t* bestlen) { off_t commonlen = match_len(data+a, data+b, min(datalen-a, datalen-b)); if (commonlen>=searchlen) { *bestlen=searchlen; return a; } if (a+commonlen static off_t adjust_match(off_t match, const uint8_t* search, off_t searchlen, const uint8_t* data,off_t datalen, off_t maxstart,off_t minstart, const off_t* sorted, off_t sortedlen, off_t* bestlen) { off_t match_up = match; off_t match_dn = match; while (match_up>=0 && sorted[match_up]>=maxstart && sorted[match_up]=maxstart && sorted[match_dn]=sortedlen) { if (match_up<0 && match_dn>=sortedlen) { *bestlen=0; return 0; } off_t pos = sorted[match_up<0 ? match_dn : match_up]; *bestlen = match_len(search, data+pos, min(searchlen, datalen-pos)); return pos; } return pick_best_of_two(search,searchlen, data,datalen, sorted[match_up],sorted[match_dn], bestlen); } static uint16_t read2_uc(const uint8_t* data) { return data[0]<<8 | data[1]; } template static uint16_t read2(const uint8_t* data, off_t len) { if (len>=2) return read2_uc(data); else { uint16_t out = (EOF_IS_LAST ? 0xFFFF : 0x0000); if (len==1) out = (data[0]<<8) | (out&0x00FF); return out; } } template static void create_buckets(const uint8_t* data, off_t* index, off_t len, off_t* buckets) { off_t low = 0; off_t high; for (int n=0;n<65536;n++) { //'low' remains from the previous iteration and is a known minimum high = low+(len/131072)+1; // optimal value: slightly above a third of the distance to the next one while (true) { if (high > len-1) break; off_t pos = index[high]; uint16_t here = read2(data+pos, len-pos); if (here >= n) break; else { off_t diff = high-low; low = high; high = high+diff*2; } } if (high > len-1) high = len-1; while (low < high) { off_t mid = low + (high-low)/2; off_t midpos = index[mid]; uint16_t here = read2(data+midpos, len-midpos); if (here < n) low = mid+1; else high = mid; } buckets[n] = low; } buckets[65536] = len; #ifdef TEST_CORRECT if (buckets[0]!=0) { printf("e: buckets suck, [0]=%i\n", buckets[0]); abort(); } for (int n=0;n<65536;n++) { off_t low = buckets[n]; off_t high = buckets[n+1]; for (off_t i=low;i static off_t find_index(off_t pos, const uint8_t* data, off_t datalen, const off_t* index, const off_t* reverse, off_t* buckets) { if (reverse) return reverse[pos]; uint16_t bucket = read2(data+pos, datalen-pos); //printf("p=%i b=%i\n",pos,bucket); off_t low = buckets[bucket]; off_t high = buckets[bucket+1]-1; off_t lowmatch = 2; off_t highmatch = 2; //printf("b=%i r=%i(%i)-%i(%i)\n",bucket,low,read2(data+index[low],datalen-index[low]),high,read2(data+index[high],datalen-index[high])); //fflush(stdout); while (true) { off_t mid = low + (high-low)/2; off_t midpos = index[mid]; if (midpos == pos) return mid; //printf("r=[%i]%i-%i \n",high-low,low,high,); //fflush(stdout); #ifdef TEST_CORRECT if (low >= high) { printf("E: [%i](%i): stuck at %i(%i)-%i(%i)\n", pos, read2_uc(data+pos), low, read2_uc(data+index[low]), high, read2_uc(data+index[high])); int n=0; while (index[n]!=pos) n++; printf("correct one is %i(%i)\n",n, read2_uc(data+index[n])); abort(); } #endif off_t matchlenstart = min(lowmatch, highmatch); off_t len = datalen - max(pos, midpos) - matchlenstart; const uint8_t* search = data+pos+matchlenstart; const uint8_t* here = data+midpos+matchlenstart; while (len>0 && *search==*here) { search++; here++; len--; } off_t matchlen = search-data-pos; bool less; if (len > 0) less = (*here<*search); else less = (here > search) ^ EOF_IS_LAST; if (less) { low = mid+1; lowmatch = matchlen; } else { high = mid-1; highmatch = matchlen; } if (low+256 > high) { off_t i=low; while (true) { if (index[i]==pos) return i; i++; } } } } template static void create_reverse_index(off_t* index, off_t* reverse, off_t len) { //testcase: linux 3.18.14 -> 4.0.4 .xz //without: real23.544 user32.930 //with: real22.636 user40.168 //'user' jumps up quite a lot, while 'real' only moves a short bit //I'm not sure why the tradeoff is so bad (do the cachelines bounce THAT badly?), but I deem it not worth it. //#pragma omp parallel for for (off_t i=0;i static off_t nextsize(off_t outpos, off_t sortedsize, off_t targetlen) { while (outpos >= sortedsize-256 && sortedsize < targetlen) sortedsize = min(sortedsize*4+3, targetlen); return sortedsize; } template off_t lerp(off_t x, off_t y, float frac) { return x + (y-x)*frac; } template static bpserror bps_create_suf_core(file* source, file* target, bool moremem, struct bps_creator * out) { #define error(which) do { err = which; goto error; } while(0) bpserror err; size_t realsourcelen = source->len(); size_t realtargetlen = target->len(); size_t overflowtest = realsourcelen + realtargetlen; //source+target length is bigger than size_t (how did that manage to get allocated?) if (overflowtest < realsourcelen) return bps_too_big; //source+target doesn't fit in unsigned off_t if ((size_t)(off_t)overflowtest != overflowtest) return bps_too_big; //source+target doesn't fit in signed off_t if ((off_t)overflowtest < 0) return bps_too_big; //the mallocs would overflow if (realsourcelen+realtargetlen >= SIZE_MAX/sizeof(off_t)) return bps_too_big; if (realsourcelen+realtargetlen >= out->maxsize()) return bps_too_big; off_t sourcelen = realsourcelen; off_t targetlen = realtargetlen; uint8_t* mem_joined = (uint8_t*)malloc(sizeof(uint8_t)*(realsourcelen+realtargetlen)); off_t* sorted = (off_t*)malloc(sizeof(off_t)*(realsourcelen+realtargetlen)); off_t* sorted_inverse = NULL; if (moremem) sorted_inverse = (off_t*)malloc(sizeof(off_t)*(realsourcelen+realtargetlen)); off_t* buckets = NULL; if (!sorted_inverse) buckets = (off_t*)malloc(sizeof(off_t)*65537); if (!sorted || !mem_joined || (!sorted_inverse && !buckets)) { free(mem_joined); free(sorted); free(sorted_inverse); free(buckets); return bps_out_of_mem; } //sortedsize is how much of the target file is sorted off_t sortedsize = targetlen; //divide by 4 for each iteration, to avoid sorting 50% of the file (the sorter is slow) while (sortedsize/4 > sourcelen && sortedsize > 1024) sortedsize >>= 2; off_t prevsortedsize = 0; off_t outpos = 0; goto reindex; // jump into the middle so I won't need a special case to enter it while (outpos < targetlen) { if (outpos >= sortedsize-256 && sortedsize < targetlen) { sortedsize = nextsize(outpos, sortedsize, targetlen); reindex: //this isn't an exact science const float percSort = sorted_inverse ? 0.67 : 0.50; const float percInv = sorted_inverse ? 0.11 : 0.10; //const float percFind = sorted_inverse ? 0.22 : 0.40; // unused const size_t progPreSort = lerp(prevsortedsize, sortedsize, 0); const size_t progPreInv = lerp(prevsortedsize, sortedsize, percSort); const size_t progPreFind = lerp(prevsortedsize, sortedsize, percSort+percInv); prevsortedsize = sortedsize; if (!out->progress(progPreSort, targetlen)) error(bps_canceled); if (!target->read(mem_joined, 0, sortedsize)) error(bps_io); if (!source->read(mem_joined+sortedsize, 0, sourcelen)) error(bps_io); out->move_target(mem_joined); sufsort(sorted, mem_joined, sortedsize+sourcelen); if (!out->progress(progPreInv, targetlen)) error(bps_canceled); if (sorted_inverse) create_reverse_index(sorted, sorted_inverse, sortedsize+sourcelen); else create_buckets(mem_joined, sorted, sortedsize+sourcelen, buckets); if (!out->progress(progPreFind, targetlen)) error(bps_canceled); } off_t matchlen = 0; off_t matchpos = adjust_match(find_index(outpos, mem_joined, sortedsize+sourcelen, sorted, sorted_inverse, buckets), mem_joined+outpos, sortedsize-outpos, mem_joined,sortedsize+sourcelen, outpos,sortedsize, sorted, sortedsize+sourcelen, &matchlen); #ifdef TEST_CORRECT if (matchlen && matchpos >= outpos && matchpos < sortedsize) puts("ERROR: found match in invalid location"),abort(); if (memcmp(mem_joined+matchpos, mem_joined+outpos, matchlen)) puts("ERROR: found match doesn't match"),abort(); #endif off_t taken; if (matchpos >= sortedsize) taken = out->match(false, matchpos-sortedsize, matchlen); else taken = out->match(true, matchpos, matchlen); #ifdef TEST_CORRECT if (taken < 0) puts("ERROR: match() returned negative"),abort(); if (matchlen >= 7 && taken < matchlen) printf("ERROR: match() took %i bytes, offered %i\n", taken, matchlen),abort(); #endif outpos += taken; } out->finish(mem_joined+sortedsize, mem_joined); err = bps_ok; error: free(buckets); free(sorted_inverse); free(sorted); free(mem_joined); return err; #undef error } //template static bpserror bps_create_suf_pick(file* source, file* target, bool moremem, struct bps_creator * bps); //template<> bpserror bps_create_suf_pick(file* source, file* target, bool moremem, struct bps_creator * bps) //{ // return bps_create_suf_core(source, target, moremem, bps); //} //template<> bpserror bps_create_suf_pick(file* source, file* target, bool moremem, struct bps_creator * bps) //{ // bpserror err = bps_create_suf_core(source, target, moremem, bps); // if (err==bps_too_big) err = bps_create_suf_core(source, target, moremem, bps); // return err; //} //This one picks a function based on 32-bit integers if that fits. This halves memory use for common inputs. //It also handles some stuff related to the BPS headers and footers. bpserror bps_create_delta(file* source, file* target, struct mem metadata, struct mem * patchmem, bool (*progress)(void* userdata, size_t done, size_t total), void* userdata, bool moremem) { bps_creator bps(source, target, metadata); bps.setProgress(progress, userdata); size_t maindata = bps.outlen; //off_t must be signed bpserror err = bps_create_suf_core(source, target, moremem, &bps); if (err!=bps_ok) return err; *patchmem = bps.getpatch(); while ((patchmem->ptr[maindata]&0x80) == 0x00) maindata++; if (maindata==patchmem->len-12-1) return bps_identical; return bps_ok; } enum bpserror bps_create_delta_inmem(struct mem source, struct mem target, struct mem metadata, struct mem * patch, bool (*progress)(void* userdata, size_t done, size_t total), void* userdata, bool moremem) { class memfile : public file { public: const uint8_t * m_ptr; size_t m_len; size_t len() { return m_len; } bool read(uint8_t* target, size_t start, size_t len) { memcpy(target, m_ptr+start, len); return true; } memfile(const uint8_t * ptr, size_t len) : m_ptr(ptr), m_len(len) {} }; memfile sourcef(source.ptr, source.len); memfile targetf(target.ptr, target.len); return bps_create_delta(&sourcef, &targetf, metadata, patch, progress, userdata, moremem); } #ifdef BPS_STANDALONE #include static struct mem ReadWholeFile(const char * filename) { struct mem null = {NULL, 0}; FILE * file=fopen(filename, "rb"); if (!file) return null; fseek(file, 0, SEEK_END); size_t len=ftell(file); fseek(file, 0, SEEK_SET); unsigned char * data=(unsigned char*)malloc(len); size_t truelen=fread(data, 1,len, file); fclose(file); if (len!=truelen) { free(data); return null; } struct mem ret = { (unsigned char*)data, len }; return ret; } static bool WriteWholeFile(const char * filename, struct mem data) { FILE * file=fopen(filename, "wb"); if (!file) return false; unsigned int truelen=fwrite(data.ptr, 1,data.len, file); fclose(file); return (truelen==data.len); } int main(int argc, char * argv[]) { //struct mem out = ReadWholeFile(argv[2]); //printf("check=%.8X\n",crc32(out.ptr, out.len)); struct mem in = ReadWholeFile(argv[1]); struct mem out = ReadWholeFile(argv[2]); struct mem null = {NULL, 0}; struct mem p={NULL,0}; //int n=50; //for(int i=0;i