diff --git a/tools/gzinject/.gitattributes b/tools/gzinject/.gitattributes new file mode 100644 index 000000000..66cccc58a --- /dev/null +++ b/tools/gzinject/.gitattributes @@ -0,0 +1,65 @@ +############################################################################### +# Set default behavior to automatically normalize line endings. +############################################################################### +* text=auto + +############################################################################### +# Set default behavior for command prompt diff. +# +# This is need for earlier builds of msysgit that does not have it on by +# default for csharp files. +# Note: This is only used by command line +############################################################################### +#*.cs diff=csharp + +############################################################################### +# Set the merge driver for project and solution files +# +# Merging from the command prompt will add diff markers to the files if there +# are conflicts (Merging from VS is not affected by the settings below, in VS +# the diff markers are never inserted). Diff markers may cause the following +# file extensions to fail to load in VS. An alternative would be to treat +# these files as binary and thus will always conflict and require user +# intervention with every merge. To do so, just uncomment the entries below +############################################################################### +#*.sln merge=binary +#*.csproj merge=binary +#*.vbproj merge=binary +#*.vcxproj merge=binary +#*.vcproj merge=binary +#*.dbproj merge=binary +#*.fsproj merge=binary +#*.lsproj merge=binary +#*.wixproj merge=binary +#*.modelproj merge=binary +#*.sqlproj merge=binary +#*.wwaproj merge=binary + +############################################################################### +# behavior for image files +# +# image files are treated as binary by default. +############################################################################### +#*.jpg binary +#*.png binary +#*.gif binary + +############################################################################### +# diff behavior for common document formats +# +# Convert binary document formats to text before diffing them. This feature +# is only available from the command line. Turn it on by uncommenting the +# entries below. +############################################################################### +#*.doc diff=astextplain +#*.DOC diff=astextplain +#*.docx diff=astextplain +#*.DOCX diff=astextplain +#*.dot diff=astextplain +#*.DOT diff=astextplain +#*.pdf diff=astextplain +#*.PDF diff=astextplain +#*.rtf diff=astextplain +#*.RTF diff=astextplain +*.h linguist-language=C +*.c linguist-language=C diff --git a/tools/gzinject/.gitignore b/tools/gzinject/.gitignore new file mode 100644 index 000000000..9ed774981 --- /dev/null +++ b/tools/gzinject/.gitignore @@ -0,0 +1,279 @@ +## Ignore Visual Studio temporary files, build results, and +## files generated by popular Visual Studio add-ons. + +# User-specific files +*.suo +*.user +*.userosscache +*.sln.docstates + +# User-specific files (MonoDevelop/Xamarin Studio) +*.userprefs + +# Build results +[Dd]ebug/ +[Dd]ebugPublic/ +[Rr]elease/ +[Rr]eleases/ +x64/ +x86/ +bld/ +[Bb]in/ +[Oo]bj/ +[Ll]og/ + +# Visual Studio 2015 cache/options directory +.vs/ +# Uncomment if you have tasks that create the project's static files in wwwroot +#wwwroot/ + +# MSTest test Results +[Tt]est[Rr]esult*/ +[Bb]uild[Ll]og.* + +# NUNIT +*.VisualState.xml +TestResult.xml + +# Build Results of an ATL Project +[Dd]ebugPS/ +[Rr]eleasePS/ +dlldata.c + +# DNX +project.lock.json +project.fragment.lock.json +artifacts/ + +*_i.c +*_p.c +*_i.h +*.ilk +*.meta +*.obj +*.pch +*.pdb +*.pgc +*.pgd +*.rsp +*.sbr +*.tlb +*.tli +*.tlh +*.tmp +*.tmp_proj +*.log +*.vspscc +*.vssscc +.builds +*.pidb +*.svclog +*.scc + +# Chutzpah Test files +_Chutzpah* + +# Visual C++ cache files +ipch/ +*.aps +*.ncb +*.opendb +*.opensdf +*.sdf +*.cachefile +*.VC.db +*.VC.VC.opendb + +# Visual Studio profiler +*.psess +*.vsp +*.vspx +*.sap + +# TFS 2012 Local Workspace +$tf/ + +# Guidance Automation Toolkit +*.gpState + +# ReSharper is a .NET coding add-in +_ReSharper*/ +*.[Rr]e[Ss]harper +*.DotSettings.user + +# JustCode is a .NET coding add-in +.JustCode + +# TeamCity is a build add-in +_TeamCity* + +# DotCover is a Code Coverage Tool +*.dotCover + +# NCrunch +_NCrunch_* +.*crunch*.local.xml +nCrunchTemp_* + +# MightyMoose +*.mm.* +AutoTest.Net/ + +# Web workbench (sass) +.sass-cache/ + +# Installshield output folder +[Ee]xpress/ + +# DocProject is a documentation generator add-in +DocProject/buildhelp/ +DocProject/Help/*.HxT +DocProject/Help/*.HxC +DocProject/Help/*.hhc +DocProject/Help/*.hhk +DocProject/Help/*.hhp +DocProject/Help/Html2 +DocProject/Help/html + +# Click-Once directory +publish/ + +# Publish Web Output +*.[Pp]ublish.xml +*.azurePubxml +# TODO: Comment the next line if you want to checkin your web deploy settings +# but database connection strings (with potential passwords) will be unencrypted +#*.pubxml +*.publishproj + +# Microsoft Azure Web App publish settings. Comment the next line if you want to +# checkin your Azure Web App publish settings, but sensitive information contained +# in these scripts will be unencrypted +PublishScripts/ + +# NuGet Packages +*.nupkg +# The packages folder can be ignored because of Package Restore +**/packages/* +# except build/, which is used as an MSBuild target. +!**/packages/build/ +# Uncomment if necessary however generally it will be regenerated when needed +#!**/packages/repositories.config +# NuGet v3's project.json files produces more ignoreable files +*.nuget.props +*.nuget.targets + +# Microsoft Azure Build Output +csx/ +*.build.csdef + +# Microsoft Azure Emulator +ecf/ +rcf/ + +# Windows Store app package directories and files +AppPackages/ +BundleArtifacts/ +Package.StoreAssociation.xml +_pkginfo.txt + +# Visual Studio cache files +# files ending in .cache can be ignored +*.[Cc]ache +# but keep track of directories ending in .cache +!*.[Cc]ache/ + +# Others +ClientBin/ +~$* +*~ +*.dbmdl +*.dbproj.schemaview +*.jfm +*.pfx +*.publishsettings +node_modules/ +orleans.codegen.cs + +# Since there are multiple workflows, uncomment next line to ignore bower_components +# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) +#bower_components/ + +# RIA/Silverlight projects +Generated_Code/ + +# Backup & report files from converting an old project file +# to a newer Visual Studio version. Backup files are not needed, +# because we have git ;-) +_UpgradeReport_Files/ +Backup*/ +UpgradeLog*.XML +UpgradeLog*.htm + +# SQL Server files +*.mdf +*.ldf + +# Business Intelligence projects +*.rdl.data +*.bim.layout +*.bim_*.settings + +# Microsoft Fakes +FakesAssemblies/ + +# GhostDoc plugin setting file +*.GhostDoc.xml + +# Node.js Tools for Visual Studio +.ntvs_analysis.dat + +# Visual Studio 6 build log +*.plg + +# Visual Studio 6 workspace options file +*.opt + +# Visual Studio LightSwitch build output +**/*.HTMLClient/GeneratedArtifacts +**/*.DesktopClient/GeneratedArtifacts +**/*.DesktopClient/ModelManifest.xml +**/*.Server/GeneratedArtifacts +**/*.Server/ModelManifest.xml +_Pvt_Extensions + +# Paket dependency manager +.paket/paket.exe +paket-files/ + +# FAKE - F# Make +.fake/ + +# JetBrains Rider +.idea/ +*.sln.iml + +# CodeRush +.cr/ + +# Python Tools for Visual Studio (PTVS) +__pycache__/ +*.pyc + +# Testing File +*.Wad +TestExtract/ +*.exe +*.stackdump +/gzinject/Debug +/gzinject.zip +*.bin +Debug/ +CppProperties.json +wadextract/ +*.o +Makefile +config.* +gzinject +*.zip +autom4te.cache/ \ No newline at end of file diff --git a/tools/gzinject/.gitrepo b/tools/gzinject/.gitrepo new file mode 100644 index 000000000..ccddd36d5 --- /dev/null +++ b/tools/gzinject/.gitrepo @@ -0,0 +1,12 @@ +; DO NOT EDIT (unless you know what you are doing) +; +; This subdirectory is a git "subrepo", and this file is maintained by the +; git-subrepo command. See https://github.com/ingydotnet/git-subrepo#readme +; +[subrepo] + remote = https://github.com/krimtonz/gzinject.git + branch = master + commit = ee44efce5d842e5d4488ee47c16da8b673da5086 + parent = 53941daac4bb1482a9125f3595642df1bafb5f6d + method = merge + cmdver = 0.4.5 diff --git a/tools/gzinject/BUILDING.md b/tools/gzinject/BUILDING.md new file mode 100644 index 000000000..7fb64a1c3 --- /dev/null +++ b/tools/gzinject/BUILDING.md @@ -0,0 +1,8 @@ +## Prerequisites + +gcc, make + +## Building +Run `./configure` You can use `--prefx=DIR` to specify the output directory, install will install gzinject to `DIR/bin`, then run `make` to build the executable, and `make install` to install it to `DIR/bin` + +By default gzinject will use the crypto library provided by OpenSSL, to disable this and use builtin (slower) crypto functions remove `-D_USE_LIBCRYPTO` From the make file, and change SRC = gzinject.c to SRC = *.c diff --git a/tools/gzinject/LICENSE b/tools/gzinject/LICENSE new file mode 100644 index 000000000..94a9ed024 --- /dev/null +++ b/tools/gzinject/LICENSE @@ -0,0 +1,674 @@ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + Copyright (C) + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. diff --git a/tools/gzinject/Makefile.in b/tools/gzinject/Makefile.in new file mode 100644 index 000000000..92b57f7ca --- /dev/null +++ b/tools/gzinject/Makefile.in @@ -0,0 +1,39 @@ +CC = @CC@ +LD = @CC@ +INSTALL = @INSTALL@ +CFLAGS = -Wall -Wno-unused-result @CFLAGS@ +CPPFLAGS = @CPPFLAGS@ +prefix = @prefix@ +exec_prefix = @exec_prefix@ +bindir = @bindir@ +PROGNAME = gzinject +LDFLAGS = -s @LDFLAGS@ +CFILES = *.c +SRCDIR = src +CSRC := $(foreach s,$(CFILES),$(wildcard $(SRCDIR)/$(s))) +COBJ = $(patsubst $(SRCDIR)/%,$(OBJDIR)/%.o,$(CSRC)) +LIBS = +OBJDIR = obj +OUTDIR = $(OBJDIR) + +.PHONY : all install clean distclean + +all : $(PROGNAME) + +clean : + rm -rf $(PROGNAME) obj + +distclean : clean + rm -f Makefile + +install : $(PROGNAME) + $(INSTALL) -p -D --target-directory=$(DESTDIR)$(bindir) $(PROGNAME) + +$(PROGNAME) : $(COBJ) + $(LD) $(LDFLAGS) $^ -o $@ $(LIBS) + +$(OUTDIR) : + mkdir -p $@ + +$(COBJ) : $(OBJDIR)/%.o: $(SRCDIR)/% | $(OBJDIR) + $(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@ diff --git a/tools/gzinject/README.md b/tools/gzinject/README.md new file mode 100644 index 000000000..a3893a7d2 --- /dev/null +++ b/tools/gzinject/README.md @@ -0,0 +1,74 @@ +## About + +gzinject is a wad editing utility, primarily used for patching N64 VC Emulators, and replacing the rom inside. gzinject uses patch files to patch content files within the wad. A description of the patch file format can be seen in the [Patch](#Patch) section. + +## Executable + +To build your own, run ./configure, then make, and make install. See BUILDING for more instructions + +Prebuilt Windows executable is contained under releases (https://github.com/krimtonz/gzinject/releases/latest) + +## Usage +``` +Usage: + gzinject -a extract -w SOURCEWAD [options] + gzinject -a pack -w DESTWAD [options] + gzinject -a inject -w SOURCEWAD -m ROM [options] + gzinject -a genkey [options] + gzinject --help + gzinject --version + +Actions: + extract extracts SOURCEWAD to directory + pack packs directory into DESTWAD + inject injects rom into SOURCEWAD + genkey generates wii common-key + +Options: + -i, --channelid=ID New Channel ID For Pack and Inject actions (default: none) + -t, --title=title New Channel name for pack and inject actions (default: none) + -h, --help Prints this help message + -k, --key=keyfile Location of the common-key file (default: common-key.bin) + -r, --region=1-3 Region to use (default: 3) + --verbose Print out verbose program execution information + -d, --directory=directory Directory to extract contents to, or directory to read contents from (default: wadextract) + --cleanup Remove files before performing actions + --version Prints the current version + -m, --rom=rom Rom to inject for inject action (default: none) + -o, --outputwad=outwad The output wad for inject actions (default: SOURCEWAD-inject.wad) + -p, --patch-file=patchfile gzi file to use for applying patches (default: none) + -c, --content=contentfile the primary content file (default: 5) + --dol-inject Binary data to inject into the emulator program, requires --dol-loading + --dol-loading The loading address for the binary specified by --dol-inject + --dol-after After which patch file to inject the dol, default: after all patches +``` + +## Patch +gzi files are text files with a command on each line. A # starting the line indicates a comment. + +line format: +ccss oooooooo dddddddd\ +Where c indicates the command, s indicates the data size, o indicates the offset into the current file, and d indicates the data to replace with. + +``` +Commands: + 00: Begin using content file specified by d, offset and size are not used for this command + 01: lz77 decompress the current content file. offset, size, and data are not used for this command + 02: lz77 compress the current content file. offset, size, and data are not used for this command + 03: apply patch to currently selected file. If offset is higher than the file sizes, or a current file has not been selected, the patch is not applied + +Sizes: + 01: a one byte value. data & 0x000000FF is applied to content + offset + 02: a two byte value. data & 0x0000FFFF is applied to content + offset + 04: a four byte value. data is applied to content + offset +``` + + +## Thanks/Authors + +gzinject was primarily written by me.\n +Thanks to glankk (https://github.com/glankk) for providing memory/controller fixes for OOT as well as debugging, testing, and providing fixes for various errors\ +The general workflow of extracting/packing the wad was taken from showmiiwads (https://github.com/dnasdw/showmiiwads/)\ +AES encryption/decryption was taken from kokke (https://github.com/kokke/tiny-AES-c)\ +SHA1 taken from clibs (https://github.com/clibs/sha1)\ +MD5 taken from Alexander Peslyak http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5 diff --git a/tools/gzinject/configure b/tools/gzinject/configure new file mode 100644 index 000000000..b9f5c308f --- /dev/null +++ b/tools/gzinject/configure @@ -0,0 +1,4468 @@ +#! /bin/sh +# Guess values for system-dependent variables and create Makefiles. +# Generated by GNU Autoconf 2.69. +# +# +# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc. +# +# +# This configure script is free software; the Free Software Foundation +# gives unlimited permission to copy, distribute and modify it. +## -------------------- ## +## M4sh Initialization. ## +## -------------------- ## + +# Be more Bourne compatible +DUALCASE=1; export DUALCASE # for MKS sh +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then : + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else + case `(set -o) 2>/dev/null` in #( + *posix*) : + set -o posix ;; #( + *) : + ;; +esac +fi + + +as_nl=' +' +export as_nl +# Printing a long string crashes Solaris 7 /usr/bin/printf. +as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo +# Prefer a ksh shell builtin over an external printf program on Solaris, +# but without wasting forks for bash or zsh. +if test -z "$BASH_VERSION$ZSH_VERSION" \ + && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then + as_echo='print -r --' + as_echo_n='print -rn --' +elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then + as_echo='printf %s\n' + as_echo_n='printf %s' +else + if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then + as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"' + as_echo_n='/usr/ucb/echo -n' + else + as_echo_body='eval expr "X$1" : "X\\(.*\\)"' + as_echo_n_body='eval + arg=$1; + case $arg in #( + *"$as_nl"*) + expr "X$arg" : "X\\(.*\\)$as_nl"; + arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;; + esac; + expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl" + ' + export as_echo_n_body + as_echo_n='sh -c $as_echo_n_body as_echo' + fi + export as_echo_body + as_echo='sh -c $as_echo_body as_echo' +fi + +# The user is always right. +if test "${PATH_SEPARATOR+set}" != set; then + PATH_SEPARATOR=: + (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { + (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || + PATH_SEPARATOR=';' + } +fi + + +# IFS +# We need space, tab and new line, in precisely that order. Quoting is +# there to prevent editors from complaining about space-tab. +# (If _AS_PATH_WALK were called with IFS unset, it would disable word +# splitting by setting IFS to empty value.) +IFS=" "" $as_nl" + +# Find who we are. Look in the path if we contain no directory separator. +as_myself= +case $0 in #(( + *[\\/]* ) as_myself=$0 ;; + *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break + done +IFS=$as_save_IFS + + ;; +esac +# We did not find ourselves, most probably we were run as `sh COMMAND' +# in which case we are not to be found in the path. +if test "x$as_myself" = x; then + as_myself=$0 +fi +if test ! -f "$as_myself"; then + $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 + exit 1 +fi + +# Unset variables that we do not need and which cause bugs (e.g. in +# pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1" +# suppresses any "Segmentation fault" message there. '((' could +# trigger a bug in pdksh 5.2.14. +for as_var in BASH_ENV ENV MAIL MAILPATH +do eval test x\${$as_var+set} = xset \ + && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : +done +PS1='$ ' +PS2='> ' +PS4='+ ' + +# NLS nuisances. +LC_ALL=C +export LC_ALL +LANGUAGE=C +export LANGUAGE + +# CDPATH. +(unset CDPATH) >/dev/null 2>&1 && unset CDPATH + +# Use a proper internal environment variable to ensure we don't fall + # into an infinite loop, continuously re-executing ourselves. + if test x"${_as_can_reexec}" != xno && test "x$CONFIG_SHELL" != x; then + _as_can_reexec=no; export _as_can_reexec; + # We cannot yet assume a decent shell, so we have to provide a +# neutralization value for shells without unset; and this also +# works around shells that cannot unset nonexistent variables. +# Preserve -v and -x to the replacement shell. +BASH_ENV=/dev/null +ENV=/dev/null +(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV +case $- in # (((( + *v*x* | *x*v* ) as_opts=-vx ;; + *v* ) as_opts=-v ;; + *x* ) as_opts=-x ;; + * ) as_opts= ;; +esac +exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"} +# Admittedly, this is quite paranoid, since all the known shells bail +# out after a failed `exec'. +$as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2 +as_fn_exit 255 + fi + # We don't want this to propagate to other subprocesses. + { _as_can_reexec=; unset _as_can_reexec;} +if test "x$CONFIG_SHELL" = x; then + as_bourne_compatible="if test -n \"\${ZSH_VERSION+set}\" && (emulate sh) >/dev/null 2>&1; then : + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on \${1+\"\$@\"}, which + # is contrary to our usage. Disable this feature. + alias -g '\${1+\"\$@\"}'='\"\$@\"' + setopt NO_GLOB_SUBST +else + case \`(set -o) 2>/dev/null\` in #( + *posix*) : + set -o posix ;; #( + *) : + ;; +esac +fi +" + as_required="as_fn_return () { (exit \$1); } +as_fn_success () { as_fn_return 0; } +as_fn_failure () { as_fn_return 1; } +as_fn_ret_success () { return 0; } +as_fn_ret_failure () { return 1; } + +exitcode=0 +as_fn_success || { exitcode=1; echo as_fn_success failed.; } +as_fn_failure && { exitcode=1; echo as_fn_failure succeeded.; } +as_fn_ret_success || { exitcode=1; echo as_fn_ret_success failed.; } +as_fn_ret_failure && { exitcode=1; echo as_fn_ret_failure succeeded.; } +if ( set x; as_fn_ret_success y && test x = \"\$1\" ); then : + +else + exitcode=1; echo positional parameters were not saved. +fi +test x\$exitcode = x0 || exit 1 +test -x / || exit 1" + as_suggested=" as_lineno_1=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_1a=\$LINENO + as_lineno_2=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_2a=\$LINENO + eval 'test \"x\$as_lineno_1'\$as_run'\" != \"x\$as_lineno_2'\$as_run'\" && + test \"x\`expr \$as_lineno_1'\$as_run' + 1\`\" = \"x\$as_lineno_2'\$as_run'\"' || exit 1 +test \$(( 1 + 1 )) = 2 || exit 1" + if (eval "$as_required") 2>/dev/null; then : + as_have_required=yes +else + as_have_required=no +fi + if test x$as_have_required = xyes && (eval "$as_suggested") 2>/dev/null; then : + +else + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +as_found=false +for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + as_found=: + case $as_dir in #( + /*) + for as_base in sh bash ksh sh5; do + # Try only shells that exist, to save several forks. + as_shell=$as_dir/$as_base + if { test -f "$as_shell" || test -f "$as_shell.exe"; } && + { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$as_shell"; } 2>/dev/null; then : + CONFIG_SHELL=$as_shell as_have_required=yes + if { $as_echo "$as_bourne_compatible""$as_suggested" | as_run=a "$as_shell"; } 2>/dev/null; then : + break 2 +fi +fi + done;; + esac + as_found=false +done +$as_found || { if { test -f "$SHELL" || test -f "$SHELL.exe"; } && + { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$SHELL"; } 2>/dev/null; then : + CONFIG_SHELL=$SHELL as_have_required=yes +fi; } +IFS=$as_save_IFS + + + if test "x$CONFIG_SHELL" != x; then : + export CONFIG_SHELL + # We cannot yet assume a decent shell, so we have to provide a +# neutralization value for shells without unset; and this also +# works around shells that cannot unset nonexistent variables. +# Preserve -v and -x to the replacement shell. +BASH_ENV=/dev/null +ENV=/dev/null +(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV +case $- in # (((( + *v*x* | *x*v* ) as_opts=-vx ;; + *v* ) as_opts=-v ;; + *x* ) as_opts=-x ;; + * ) as_opts= ;; +esac +exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"} +# Admittedly, this is quite paranoid, since all the known shells bail +# out after a failed `exec'. +$as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2 +exit 255 +fi + + if test x$as_have_required = xno; then : + $as_echo "$0: This script requires a shell more modern than all" + $as_echo "$0: the shells that I found on your system." + if test x${ZSH_VERSION+set} = xset ; then + $as_echo "$0: In particular, zsh $ZSH_VERSION has bugs and should" + $as_echo "$0: be upgraded to zsh 4.3.4 or later." + else + $as_echo "$0: Please tell bug-autoconf@gnu.org about your system, +$0: including any error possibly output before this +$0: message. Then install a modern shell, or manually run +$0: the script under such a shell if you do have one." + fi + exit 1 +fi +fi +fi +SHELL=${CONFIG_SHELL-/bin/sh} +export SHELL +# Unset more variables known to interfere with behavior of common tools. +CLICOLOR_FORCE= GREP_OPTIONS= +unset CLICOLOR_FORCE GREP_OPTIONS + +## --------------------- ## +## M4sh Shell Functions. ## +## --------------------- ## +# as_fn_unset VAR +# --------------- +# Portably unset VAR. +as_fn_unset () +{ + { eval $1=; unset $1;} +} +as_unset=as_fn_unset + +# as_fn_set_status STATUS +# ----------------------- +# Set $? to STATUS, without forking. +as_fn_set_status () +{ + return $1 +} # as_fn_set_status + +# as_fn_exit STATUS +# ----------------- +# Exit the shell with STATUS, even in a "trap 0" or "set -e" context. +as_fn_exit () +{ + set +e + as_fn_set_status $1 + exit $1 +} # as_fn_exit + +# as_fn_mkdir_p +# ------------- +# Create "$as_dir" as a directory, including parents if necessary. +as_fn_mkdir_p () +{ + + case $as_dir in #( + -*) as_dir=./$as_dir;; + esac + test -d "$as_dir" || eval $as_mkdir_p || { + as_dirs= + while :; do + case $as_dir in #( + *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( + *) as_qdir=$as_dir;; + esac + as_dirs="'$as_qdir' $as_dirs" + as_dir=`$as_dirname -- "$as_dir" || +$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_dir" : 'X\(//\)[^/]' \| \ + X"$as_dir" : 'X\(//\)$' \| \ + X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$as_dir" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + test -d "$as_dir" && break + done + test -z "$as_dirs" || eval "mkdir $as_dirs" + } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir" + + +} # as_fn_mkdir_p + +# as_fn_executable_p FILE +# ----------------------- +# Test if FILE is an executable regular file. +as_fn_executable_p () +{ + test -f "$1" && test -x "$1" +} # as_fn_executable_p +# as_fn_append VAR VALUE +# ---------------------- +# Append the text in VALUE to the end of the definition contained in VAR. Take +# advantage of any shell optimizations that allow amortized linear growth over +# repeated appends, instead of the typical quadratic growth present in naive +# implementations. +if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then : + eval 'as_fn_append () + { + eval $1+=\$2 + }' +else + as_fn_append () + { + eval $1=\$$1\$2 + } +fi # as_fn_append + +# as_fn_arith ARG... +# ------------------ +# Perform arithmetic evaluation on the ARGs, and store the result in the +# global $as_val. Take advantage of shells that can avoid forks. The arguments +# must be portable across $(()) and expr. +if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then : + eval 'as_fn_arith () + { + as_val=$(( $* )) + }' +else + as_fn_arith () + { + as_val=`expr "$@" || test $? -eq 1` + } +fi # as_fn_arith + + +# as_fn_error STATUS ERROR [LINENO LOG_FD] +# ---------------------------------------- +# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are +# provided, also output the error to LOG_FD, referencing LINENO. Then exit the +# script with STATUS, using 1 if that was 0. +as_fn_error () +{ + as_status=$1; test $as_status -eq 0 && as_status=1 + if test "$4"; then + as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 + fi + $as_echo "$as_me: error: $2" >&2 + as_fn_exit $as_status +} # as_fn_error + +if expr a : '\(a\)' >/dev/null 2>&1 && + test "X`expr 00001 : '.*\(...\)'`" = X001; then + as_expr=expr +else + as_expr=false +fi + +if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then + as_basename=basename +else + as_basename=false +fi + +if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then + as_dirname=dirname +else + as_dirname=false +fi + +as_me=`$as_basename -- "$0" || +$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X/"$0" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ + s//\1/ + q + } + /^X\/\(\/\/\)$/{ + s//\1/ + q + } + /^X\/\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + +# Avoid depending upon Character Ranges. +as_cr_letters='abcdefghijklmnopqrstuvwxyz' +as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' +as_cr_Letters=$as_cr_letters$as_cr_LETTERS +as_cr_digits='0123456789' +as_cr_alnum=$as_cr_Letters$as_cr_digits + + + as_lineno_1=$LINENO as_lineno_1a=$LINENO + as_lineno_2=$LINENO as_lineno_2a=$LINENO + eval 'test "x$as_lineno_1'$as_run'" != "x$as_lineno_2'$as_run'" && + test "x`expr $as_lineno_1'$as_run' + 1`" = "x$as_lineno_2'$as_run'"' || { + # Blame Lee E. McMahon (1931-1989) for sed's syntax. :-) + sed -n ' + p + /[$]LINENO/= + ' <$as_myself | + sed ' + s/[$]LINENO.*/&-/ + t lineno + b + :lineno + N + :loop + s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/ + t loop + s/-\n.*// + ' >$as_me.lineno && + chmod +x "$as_me.lineno" || + { $as_echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2; as_fn_exit 1; } + + # If we had to re-execute with $CONFIG_SHELL, we're ensured to have + # already done that, so ensure we don't try to do so again and fall + # in an infinite loop. This has already happened in practice. + _as_can_reexec=no; export _as_can_reexec + # Don't try to exec as it changes $[0], causing all sort of problems + # (the dirname of $[0] is not the place where we might find the + # original and so on. Autoconf is especially sensitive to this). + . "./$as_me.lineno" + # Exit status is that of the last command. + exit +} + +ECHO_C= ECHO_N= ECHO_T= +case `echo -n x` in #((((( +-n*) + case `echo 'xy\c'` in + *c*) ECHO_T=' ';; # ECHO_T is single tab character. + xy) ECHO_C='\c';; + *) echo `echo ksh88 bug on AIX 6.1` > /dev/null + ECHO_T=' ';; + esac;; +*) + ECHO_N='-n';; +esac + +rm -f conf$$ conf$$.exe conf$$.file +if test -d conf$$.dir; then + rm -f conf$$.dir/conf$$.file +else + rm -f conf$$.dir + mkdir conf$$.dir 2>/dev/null +fi +if (echo >conf$$.file) 2>/dev/null; then + if ln -s conf$$.file conf$$ 2>/dev/null; then + as_ln_s='ln -s' + # ... but there are two gotchas: + # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. + # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. + # In both cases, we have to default to `cp -pR'. + ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || + as_ln_s='cp -pR' + elif ln conf$$.file conf$$ 2>/dev/null; then + as_ln_s=ln + else + as_ln_s='cp -pR' + fi +else + as_ln_s='cp -pR' +fi +rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file +rmdir conf$$.dir 2>/dev/null + +if mkdir -p . 2>/dev/null; then + as_mkdir_p='mkdir -p "$as_dir"' +else + test -d ./-p && rmdir ./-p + as_mkdir_p=false +fi + +as_test_x='test -x' +as_executable_p=as_fn_executable_p + +# Sed expression to map a string onto a valid CPP name. +as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" + +# Sed expression to map a string onto a valid variable name. +as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" + + +test -n "$DJDIR" || exec 7<&0 &1 + +# Name of the host. +# hostname on some systems (SVR3.2, old GNU/Linux) returns a bogus exit status, +# so uname gets run too. +ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q` + +# +# Initializations. +# +ac_default_prefix=/usr/local +ac_clean_files= +ac_config_libobj_dir=. +LIBOBJS= +cross_compiling=no +subdirs= +MFLAGS= +MAKEFLAGS= + +# Identity of this package. +PACKAGE_NAME= +PACKAGE_TARNAME= +PACKAGE_VERSION= +PACKAGE_STRING= +PACKAGE_BUGREPORT= +PACKAGE_URL= + +ac_default_prefix=/usr/local +# Factoring default headers for most tests. +ac_includes_default="\ +#include +#ifdef HAVE_SYS_TYPES_H +# include +#endif +#ifdef HAVE_SYS_STAT_H +# include +#endif +#ifdef STDC_HEADERS +# include +# include +#else +# ifdef HAVE_STDLIB_H +# include +# endif +#endif +#ifdef HAVE_STRING_H +# if !defined STDC_HEADERS && defined HAVE_MEMORY_H +# include +# endif +# include +#endif +#ifdef HAVE_STRINGS_H +# include +#endif +#ifdef HAVE_INTTYPES_H +# include +#endif +#ifdef HAVE_STDINT_H +# include +#endif +#ifdef HAVE_UNISTD_H +# include +#endif" + +ac_subst_vars='LTLIBOBJS +LIBOBJS +EGREP +GREP +CPP +INSTALL_DATA +INSTALL_SCRIPT +INSTALL_PROGRAM +OBJEXT +EXEEXT +ac_ct_CC +CPPFLAGS +LDFLAGS +CFLAGS +CC +target_alias +host_alias +build_alias +LIBS +ECHO_T +ECHO_N +ECHO_C +DEFS +mandir +localedir +libdir +psdir +pdfdir +dvidir +htmldir +infodir +docdir +oldincludedir +includedir +runstatedir +localstatedir +sharedstatedir +sysconfdir +datadir +datarootdir +libexecdir +sbindir +bindir +program_transform_name +prefix +exec_prefix +PACKAGE_URL +PACKAGE_BUGREPORT +PACKAGE_STRING +PACKAGE_VERSION +PACKAGE_TARNAME +PACKAGE_NAME +PATH_SEPARATOR +SHELL' +ac_subst_files='' +ac_user_opts=' +enable_option_checking +' + ac_precious_vars='build_alias +host_alias +target_alias +CC +CFLAGS +LDFLAGS +LIBS +CPPFLAGS +CPP' + + +# Initialize some variables set by options. +ac_init_help= +ac_init_version=false +ac_unrecognized_opts= +ac_unrecognized_sep= +# The variables have the same names as the options, with +# dashes changed to underlines. +cache_file=/dev/null +exec_prefix=NONE +no_create= +no_recursion= +prefix=NONE +program_prefix=NONE +program_suffix=NONE +program_transform_name=s,x,x, +silent= +site= +srcdir= +verbose= +x_includes=NONE +x_libraries=NONE + +# Installation directory options. +# These are left unexpanded so users can "make install exec_prefix=/foo" +# and all the variables that are supposed to be based on exec_prefix +# by default will actually change. +# Use braces instead of parens because sh, perl, etc. also accept them. +# (The list follows the same order as the GNU Coding Standards.) +bindir='${exec_prefix}/bin' +sbindir='${exec_prefix}/sbin' +libexecdir='${exec_prefix}/libexec' +datarootdir='${prefix}/share' +datadir='${datarootdir}' +sysconfdir='${prefix}/etc' +sharedstatedir='${prefix}/com' +localstatedir='${prefix}/var' +runstatedir='${localstatedir}/run' +includedir='${prefix}/include' +oldincludedir='/usr/include' +docdir='${datarootdir}/doc/${PACKAGE}' +infodir='${datarootdir}/info' +htmldir='${docdir}' +dvidir='${docdir}' +pdfdir='${docdir}' +psdir='${docdir}' +libdir='${exec_prefix}/lib' +localedir='${datarootdir}/locale' +mandir='${datarootdir}/man' + +ac_prev= +ac_dashdash= +for ac_option +do + # If the previous option needs an argument, assign it. + if test -n "$ac_prev"; then + eval $ac_prev=\$ac_option + ac_prev= + continue + fi + + case $ac_option in + *=?*) ac_optarg=`expr "X$ac_option" : '[^=]*=\(.*\)'` ;; + *=) ac_optarg= ;; + *) ac_optarg=yes ;; + esac + + # Accept the important Cygnus configure options, so we can diagnose typos. + + case $ac_dashdash$ac_option in + --) + ac_dashdash=yes ;; + + -bindir | --bindir | --bindi | --bind | --bin | --bi) + ac_prev=bindir ;; + -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*) + bindir=$ac_optarg ;; + + -build | --build | --buil | --bui | --bu) + ac_prev=build_alias ;; + -build=* | --build=* | --buil=* | --bui=* | --bu=*) + build_alias=$ac_optarg ;; + + -cache-file | --cache-file | --cache-fil | --cache-fi \ + | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c) + ac_prev=cache_file ;; + -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \ + | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*) + cache_file=$ac_optarg ;; + + --config-cache | -C) + cache_file=config.cache ;; + + -datadir | --datadir | --datadi | --datad) + ac_prev=datadir ;; + -datadir=* | --datadir=* | --datadi=* | --datad=*) + datadir=$ac_optarg ;; + + -datarootdir | --datarootdir | --datarootdi | --datarootd | --dataroot \ + | --dataroo | --dataro | --datar) + ac_prev=datarootdir ;; + -datarootdir=* | --datarootdir=* | --datarootdi=* | --datarootd=* \ + | --dataroot=* | --dataroo=* | --dataro=* | --datar=*) + datarootdir=$ac_optarg ;; + + -disable-* | --disable-*) + ac_useropt=`expr "x$ac_option" : 'x-*disable-\(.*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid feature name: $ac_useropt" + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"enable_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--disable-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval enable_$ac_useropt=no ;; + + -docdir | --docdir | --docdi | --doc | --do) + ac_prev=docdir ;; + -docdir=* | --docdir=* | --docdi=* | --doc=* | --do=*) + docdir=$ac_optarg ;; + + -dvidir | --dvidir | --dvidi | --dvid | --dvi | --dv) + ac_prev=dvidir ;; + -dvidir=* | --dvidir=* | --dvidi=* | --dvid=* | --dvi=* | --dv=*) + dvidir=$ac_optarg ;; + + -enable-* | --enable-*) + ac_useropt=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid feature name: $ac_useropt" + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"enable_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--enable-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval enable_$ac_useropt=\$ac_optarg ;; + + -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \ + | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \ + | --exec | --exe | --ex) + ac_prev=exec_prefix ;; + -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \ + | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \ + | --exec=* | --exe=* | --ex=*) + exec_prefix=$ac_optarg ;; + + -gas | --gas | --ga | --g) + # Obsolete; use --with-gas. + with_gas=yes ;; + + -help | --help | --hel | --he | -h) + ac_init_help=long ;; + -help=r* | --help=r* | --hel=r* | --he=r* | -hr*) + ac_init_help=recursive ;; + -help=s* | --help=s* | --hel=s* | --he=s* | -hs*) + ac_init_help=short ;; + + -host | --host | --hos | --ho) + ac_prev=host_alias ;; + -host=* | --host=* | --hos=* | --ho=*) + host_alias=$ac_optarg ;; + + -htmldir | --htmldir | --htmldi | --htmld | --html | --htm | --ht) + ac_prev=htmldir ;; + -htmldir=* | --htmldir=* | --htmldi=* | --htmld=* | --html=* | --htm=* \ + | --ht=*) + htmldir=$ac_optarg ;; + + -includedir | --includedir | --includedi | --included | --include \ + | --includ | --inclu | --incl | --inc) + ac_prev=includedir ;; + -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \ + | --includ=* | --inclu=* | --incl=* | --inc=*) + includedir=$ac_optarg ;; + + -infodir | --infodir | --infodi | --infod | --info | --inf) + ac_prev=infodir ;; + -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*) + infodir=$ac_optarg ;; + + -libdir | --libdir | --libdi | --libd) + ac_prev=libdir ;; + -libdir=* | --libdir=* | --libdi=* | --libd=*) + libdir=$ac_optarg ;; + + -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \ + | --libexe | --libex | --libe) + ac_prev=libexecdir ;; + -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \ + | --libexe=* | --libex=* | --libe=*) + libexecdir=$ac_optarg ;; + + -localedir | --localedir | --localedi | --localed | --locale) + ac_prev=localedir ;; + -localedir=* | --localedir=* | --localedi=* | --localed=* | --locale=*) + localedir=$ac_optarg ;; + + -localstatedir | --localstatedir | --localstatedi | --localstated \ + | --localstate | --localstat | --localsta | --localst | --locals) + ac_prev=localstatedir ;; + -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \ + | --localstate=* | --localstat=* | --localsta=* | --localst=* | --locals=*) + localstatedir=$ac_optarg ;; + + -mandir | --mandir | --mandi | --mand | --man | --ma | --m) + ac_prev=mandir ;; + -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*) + mandir=$ac_optarg ;; + + -nfp | --nfp | --nf) + # Obsolete; use --without-fp. + with_fp=no ;; + + -no-create | --no-create | --no-creat | --no-crea | --no-cre \ + | --no-cr | --no-c | -n) + no_create=yes ;; + + -no-recursion | --no-recursion | --no-recursio | --no-recursi \ + | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) + no_recursion=yes ;; + + -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \ + | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \ + | --oldin | --oldi | --old | --ol | --o) + ac_prev=oldincludedir ;; + -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \ + | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \ + | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*) + oldincludedir=$ac_optarg ;; + + -prefix | --prefix | --prefi | --pref | --pre | --pr | --p) + ac_prev=prefix ;; + -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*) + prefix=$ac_optarg ;; + + -program-prefix | --program-prefix | --program-prefi | --program-pref \ + | --program-pre | --program-pr | --program-p) + ac_prev=program_prefix ;; + -program-prefix=* | --program-prefix=* | --program-prefi=* \ + | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*) + program_prefix=$ac_optarg ;; + + -program-suffix | --program-suffix | --program-suffi | --program-suff \ + | --program-suf | --program-su | --program-s) + ac_prev=program_suffix ;; + -program-suffix=* | --program-suffix=* | --program-suffi=* \ + | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*) + program_suffix=$ac_optarg ;; + + -program-transform-name | --program-transform-name \ + | --program-transform-nam | --program-transform-na \ + | --program-transform-n | --program-transform- \ + | --program-transform | --program-transfor \ + | --program-transfo | --program-transf \ + | --program-trans | --program-tran \ + | --progr-tra | --program-tr | --program-t) + ac_prev=program_transform_name ;; + -program-transform-name=* | --program-transform-name=* \ + | --program-transform-nam=* | --program-transform-na=* \ + | --program-transform-n=* | --program-transform-=* \ + | --program-transform=* | --program-transfor=* \ + | --program-transfo=* | --program-transf=* \ + | --program-trans=* | --program-tran=* \ + | --progr-tra=* | --program-tr=* | --program-t=*) + program_transform_name=$ac_optarg ;; + + -pdfdir | --pdfdir | --pdfdi | --pdfd | --pdf | --pd) + ac_prev=pdfdir ;; + -pdfdir=* | --pdfdir=* | --pdfdi=* | --pdfd=* | --pdf=* | --pd=*) + pdfdir=$ac_optarg ;; + + -psdir | --psdir | --psdi | --psd | --ps) + ac_prev=psdir ;; + -psdir=* | --psdir=* | --psdi=* | --psd=* | --ps=*) + psdir=$ac_optarg ;; + + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil) + silent=yes ;; + + -runstatedir | --runstatedir | --runstatedi | --runstated \ + | --runstate | --runstat | --runsta | --runst | --runs \ + | --run | --ru | --r) + ac_prev=runstatedir ;; + -runstatedir=* | --runstatedir=* | --runstatedi=* | --runstated=* \ + | --runstate=* | --runstat=* | --runsta=* | --runst=* | --runs=* \ + | --run=* | --ru=* | --r=*) + runstatedir=$ac_optarg ;; + + -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb) + ac_prev=sbindir ;; + -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \ + | --sbi=* | --sb=*) + sbindir=$ac_optarg ;; + + -sharedstatedir | --sharedstatedir | --sharedstatedi \ + | --sharedstated | --sharedstate | --sharedstat | --sharedsta \ + | --sharedst | --shareds | --shared | --share | --shar \ + | --sha | --sh) + ac_prev=sharedstatedir ;; + -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \ + | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \ + | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \ + | --sha=* | --sh=*) + sharedstatedir=$ac_optarg ;; + + -site | --site | --sit) + ac_prev=site ;; + -site=* | --site=* | --sit=*) + site=$ac_optarg ;; + + -srcdir | --srcdir | --srcdi | --srcd | --src | --sr) + ac_prev=srcdir ;; + -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*) + srcdir=$ac_optarg ;; + + -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \ + | --syscon | --sysco | --sysc | --sys | --sy) + ac_prev=sysconfdir ;; + -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \ + | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*) + sysconfdir=$ac_optarg ;; + + -target | --target | --targe | --targ | --tar | --ta | --t) + ac_prev=target_alias ;; + -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*) + target_alias=$ac_optarg ;; + + -v | -verbose | --verbose | --verbos | --verbo | --verb) + verbose=yes ;; + + -version | --version | --versio | --versi | --vers | -V) + ac_init_version=: ;; + + -with-* | --with-*) + ac_useropt=`expr "x$ac_option" : 'x-*with-\([^=]*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid package name: $ac_useropt" + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"with_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--with-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval with_$ac_useropt=\$ac_optarg ;; + + -without-* | --without-*) + ac_useropt=`expr "x$ac_option" : 'x-*without-\(.*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid package name: $ac_useropt" + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"with_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--without-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval with_$ac_useropt=no ;; + + --x) + # Obsolete; use --with-x. + with_x=yes ;; + + -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \ + | --x-incl | --x-inc | --x-in | --x-i) + ac_prev=x_includes ;; + -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \ + | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*) + x_includes=$ac_optarg ;; + + -x-libraries | --x-libraries | --x-librarie | --x-librari \ + | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l) + ac_prev=x_libraries ;; + -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \ + | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*) + x_libraries=$ac_optarg ;; + + -*) as_fn_error $? "unrecognized option: \`$ac_option' +Try \`$0 --help' for more information" + ;; + + *=*) + ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='` + # Reject names that are not valid shell variable names. + case $ac_envvar in #( + '' | [0-9]* | *[!_$as_cr_alnum]* ) + as_fn_error $? "invalid variable name: \`$ac_envvar'" ;; + esac + eval $ac_envvar=\$ac_optarg + export $ac_envvar ;; + + *) + # FIXME: should be removed in autoconf 3.0. + $as_echo "$as_me: WARNING: you should use --build, --host, --target" >&2 + expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null && + $as_echo "$as_me: WARNING: invalid host type: $ac_option" >&2 + : "${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option}" + ;; + + esac +done + +if test -n "$ac_prev"; then + ac_option=--`echo $ac_prev | sed 's/_/-/g'` + as_fn_error $? "missing argument to $ac_option" +fi + +if test -n "$ac_unrecognized_opts"; then + case $enable_option_checking in + no) ;; + fatal) as_fn_error $? "unrecognized options: $ac_unrecognized_opts" ;; + *) $as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2 ;; + esac +fi + +# Check all directory arguments for consistency. +for ac_var in exec_prefix prefix bindir sbindir libexecdir datarootdir \ + datadir sysconfdir sharedstatedir localstatedir includedir \ + oldincludedir docdir infodir htmldir dvidir pdfdir psdir \ + libdir localedir mandir runstatedir +do + eval ac_val=\$$ac_var + # Remove trailing slashes. + case $ac_val in + */ ) + ac_val=`expr "X$ac_val" : 'X\(.*[^/]\)' \| "X$ac_val" : 'X\(.*\)'` + eval $ac_var=\$ac_val;; + esac + # Be sure to have absolute directory names. + case $ac_val in + [\\/$]* | ?:[\\/]* ) continue;; + NONE | '' ) case $ac_var in *prefix ) continue;; esac;; + esac + as_fn_error $? "expected an absolute directory name for --$ac_var: $ac_val" +done + +# There might be people who depend on the old broken behavior: `$host' +# used to hold the argument of --host etc. +# FIXME: To remove some day. +build=$build_alias +host=$host_alias +target=$target_alias + +# FIXME: To remove some day. +if test "x$host_alias" != x; then + if test "x$build_alias" = x; then + cross_compiling=maybe + elif test "x$build_alias" != "x$host_alias"; then + cross_compiling=yes + fi +fi + +ac_tool_prefix= +test -n "$host_alias" && ac_tool_prefix=$host_alias- + +test "$silent" = yes && exec 6>/dev/null + + +ac_pwd=`pwd` && test -n "$ac_pwd" && +ac_ls_di=`ls -di .` && +ac_pwd_ls_di=`cd "$ac_pwd" && ls -di .` || + as_fn_error $? "working directory cannot be determined" +test "X$ac_ls_di" = "X$ac_pwd_ls_di" || + as_fn_error $? "pwd does not report name of working directory" + + +# Find the source files, if location was not specified. +if test -z "$srcdir"; then + ac_srcdir_defaulted=yes + # Try the directory containing this script, then the parent directory. + ac_confdir=`$as_dirname -- "$as_myself" || +$as_expr X"$as_myself" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_myself" : 'X\(//\)[^/]' \| \ + X"$as_myself" : 'X\(//\)$' \| \ + X"$as_myself" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$as_myself" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + srcdir=$ac_confdir + if test ! -r "$srcdir/$ac_unique_file"; then + srcdir=.. + fi +else + ac_srcdir_defaulted=no +fi +if test ! -r "$srcdir/$ac_unique_file"; then + test "$ac_srcdir_defaulted" = yes && srcdir="$ac_confdir or .." + as_fn_error $? "cannot find sources ($ac_unique_file) in $srcdir" +fi +ac_msg="sources are in $srcdir, but \`cd $srcdir' does not work" +ac_abs_confdir=`( + cd "$srcdir" && test -r "./$ac_unique_file" || as_fn_error $? "$ac_msg" + pwd)` +# When building in place, set srcdir=. +if test "$ac_abs_confdir" = "$ac_pwd"; then + srcdir=. +fi +# Remove unnecessary trailing slashes from srcdir. +# Double slashes in file names in object file debugging info +# mess up M-x gdb in Emacs. +case $srcdir in +*/) srcdir=`expr "X$srcdir" : 'X\(.*[^/]\)' \| "X$srcdir" : 'X\(.*\)'`;; +esac +for ac_var in $ac_precious_vars; do + eval ac_env_${ac_var}_set=\${${ac_var}+set} + eval ac_env_${ac_var}_value=\$${ac_var} + eval ac_cv_env_${ac_var}_set=\${${ac_var}+set} + eval ac_cv_env_${ac_var}_value=\$${ac_var} +done + +# +# Report the --help message. +# +if test "$ac_init_help" = "long"; then + # Omit some internal or obsolete options to make the list less imposing. + # This message is too long to be a string in the A/UX 3.1 sh. + cat <<_ACEOF +\`configure' configures this package to adapt to many kinds of systems. + +Usage: $0 [OPTION]... [VAR=VALUE]... + +To assign environment variables (e.g., CC, CFLAGS...), specify them as +VAR=VALUE. See below for descriptions of some of the useful variables. + +Defaults for the options are specified in brackets. + +Configuration: + -h, --help display this help and exit + --help=short display options specific to this package + --help=recursive display the short help of all the included packages + -V, --version display version information and exit + -q, --quiet, --silent do not print \`checking ...' messages + --cache-file=FILE cache test results in FILE [disabled] + -C, --config-cache alias for \`--cache-file=config.cache' + -n, --no-create do not create output files + --srcdir=DIR find the sources in DIR [configure dir or \`..'] + +Installation directories: + --prefix=PREFIX install architecture-independent files in PREFIX + [$ac_default_prefix] + --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX + [PREFIX] + +By default, \`make install' will install all the files in +\`$ac_default_prefix/bin', \`$ac_default_prefix/lib' etc. You can specify +an installation prefix other than \`$ac_default_prefix' using \`--prefix', +for instance \`--prefix=\$HOME'. + +For better control, use the options below. + +Fine tuning of the installation directories: + --bindir=DIR user executables [EPREFIX/bin] + --sbindir=DIR system admin executables [EPREFIX/sbin] + --libexecdir=DIR program executables [EPREFIX/libexec] + --sysconfdir=DIR read-only single-machine data [PREFIX/etc] + --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com] + --localstatedir=DIR modifiable single-machine data [PREFIX/var] + --runstatedir=DIR modifiable per-process data [LOCALSTATEDIR/run] + --libdir=DIR object code libraries [EPREFIX/lib] + --includedir=DIR C header files [PREFIX/include] + --oldincludedir=DIR C header files for non-gcc [/usr/include] + --datarootdir=DIR read-only arch.-independent data root [PREFIX/share] + --datadir=DIR read-only architecture-independent data [DATAROOTDIR] + --infodir=DIR info documentation [DATAROOTDIR/info] + --localedir=DIR locale-dependent data [DATAROOTDIR/locale] + --mandir=DIR man documentation [DATAROOTDIR/man] + --docdir=DIR documentation root [DATAROOTDIR/doc/PACKAGE] + --htmldir=DIR html documentation [DOCDIR] + --dvidir=DIR dvi documentation [DOCDIR] + --pdfdir=DIR pdf documentation [DOCDIR] + --psdir=DIR ps documentation [DOCDIR] +_ACEOF + + cat <<\_ACEOF +_ACEOF +fi + +if test -n "$ac_init_help"; then + + cat <<\_ACEOF + +Some influential environment variables: + CC C compiler command + CFLAGS C compiler flags + LDFLAGS linker flags, e.g. -L if you have libraries in a + nonstandard directory + LIBS libraries to pass to the linker, e.g. -l + CPPFLAGS (Objective) C/C++ preprocessor flags, e.g. -I if + you have headers in a nonstandard directory + CPP C preprocessor + +Use these variables to override the choices made by `configure' or to help +it to find libraries and programs with nonstandard names/locations. + +Report bugs to the package provider. +_ACEOF +ac_status=$? +fi + +if test "$ac_init_help" = "recursive"; then + # If there are subdirs, report their specific --help. + for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue + test -d "$ac_dir" || + { cd "$srcdir" && ac_pwd=`pwd` && srcdir=. && test -d "$ac_dir"; } || + continue + ac_builddir=. + +case "$ac_dir" in +.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; +*) + ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'` + # A ".." for each directory in $ac_dir_suffix. + ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` + case $ac_top_builddir_sub in + "") ac_top_builddir_sub=. ac_top_build_prefix= ;; + *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; + esac ;; +esac +ac_abs_top_builddir=$ac_pwd +ac_abs_builddir=$ac_pwd$ac_dir_suffix +# for backward compatibility: +ac_top_builddir=$ac_top_build_prefix + +case $srcdir in + .) # We are building in place. + ac_srcdir=. + ac_top_srcdir=$ac_top_builddir_sub + ac_abs_top_srcdir=$ac_pwd ;; + [\\/]* | ?:[\\/]* ) # Absolute name. + ac_srcdir=$srcdir$ac_dir_suffix; + ac_top_srcdir=$srcdir + ac_abs_top_srcdir=$srcdir ;; + *) # Relative name. + ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix + ac_top_srcdir=$ac_top_build_prefix$srcdir + ac_abs_top_srcdir=$ac_pwd/$srcdir ;; +esac +ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix + + cd "$ac_dir" || { ac_status=$?; continue; } + # Check for guested configure. + if test -f "$ac_srcdir/configure.gnu"; then + echo && + $SHELL "$ac_srcdir/configure.gnu" --help=recursive + elif test -f "$ac_srcdir/configure"; then + echo && + $SHELL "$ac_srcdir/configure" --help=recursive + else + $as_echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2 + fi || ac_status=$? + cd "$ac_pwd" || { ac_status=$?; break; } + done +fi + +test -n "$ac_init_help" && exit $ac_status +if $ac_init_version; then + cat <<\_ACEOF +configure +generated by GNU Autoconf 2.69 + +Copyright (C) 2012 Free Software Foundation, Inc. +This configure script is free software; the Free Software Foundation +gives unlimited permission to copy, distribute and modify it. +_ACEOF + exit +fi + +## ------------------------ ## +## Autoconf initialization. ## +## ------------------------ ## + +# ac_fn_c_try_compile LINENO +# -------------------------- +# Try to compile conftest.$ac_ext, and return whether this succeeded. +ac_fn_c_try_compile () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + rm -f conftest.$ac_objext + if { { ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_compile") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then : + ac_retval=0 +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 +fi + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_compile + +# ac_fn_c_try_cpp LINENO +# ---------------------- +# Try to preprocess conftest.$ac_ext, and return whether this succeeded. +ac_fn_c_try_cpp () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + if { { ac_try="$ac_cpp conftest.$ac_ext" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_cpp conftest.$ac_ext") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } > conftest.i && { + test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || + test ! -s conftest.err + }; then : + ac_retval=0 +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 +fi + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_cpp + +# ac_fn_c_check_header_mongrel LINENO HEADER VAR INCLUDES +# ------------------------------------------------------- +# Tests whether HEADER exists, giving a warning if it cannot be compiled using +# the include files in INCLUDES and setting the cache variable VAR +# accordingly. +ac_fn_c_check_header_mongrel () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + if eval \${$3+:} false; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +$as_echo_n "checking for $2... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } +else + # Is the header compilable? +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 usability" >&5 +$as_echo_n "checking $2 usability... " >&6; } +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +#include <$2> +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_header_compiler=yes +else + ac_header_compiler=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_compiler" >&5 +$as_echo "$ac_header_compiler" >&6; } + +# Is the header present? +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 presence" >&5 +$as_echo_n "checking $2 presence... " >&6; } +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include <$2> +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + ac_header_preproc=yes +else + ac_header_preproc=no +fi +rm -f conftest.err conftest.i conftest.$ac_ext +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_preproc" >&5 +$as_echo "$ac_header_preproc" >&6; } + +# So? What about this header? +case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in #(( + yes:no: ) + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&5 +$as_echo "$as_me: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5 +$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;} + ;; + no:yes:* ) + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: present but cannot be compiled" >&5 +$as_echo "$as_me: WARNING: $2: present but cannot be compiled" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: check for missing prerequisite headers?" >&5 +$as_echo "$as_me: WARNING: $2: check for missing prerequisite headers?" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: see the Autoconf documentation" >&5 +$as_echo "$as_me: WARNING: $2: see the Autoconf documentation" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: section \"Present But Cannot Be Compiled\"" >&5 +$as_echo "$as_me: WARNING: $2: section \"Present But Cannot Be Compiled\"" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5 +$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;} + ;; +esac + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +$as_echo_n "checking for $2... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +else + eval "$3=\$ac_header_compiler" +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } +fi + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_header_mongrel + +# ac_fn_c_try_run LINENO +# ---------------------- +# Try to link conftest.$ac_ext, and return whether this succeeded. Assumes +# that executables *can* be run. +ac_fn_c_try_run () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + if { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { ac_try='./conftest$ac_exeext' + { { case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; }; then : + ac_retval=0 +else + $as_echo "$as_me: program exited with status $ac_status" >&5 + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=$ac_status +fi + rm -rf conftest.dSYM conftest_ipa8_conftest.oo + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_run + +# ac_fn_c_check_header_compile LINENO HEADER VAR INCLUDES +# ------------------------------------------------------- +# Tests whether HEADER exists and can be compiled using the include files in +# INCLUDES, setting the cache variable VAR accordingly. +ac_fn_c_check_header_compile () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +$as_echo_n "checking for $2... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +#include <$2> +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + eval "$3=yes" +else + eval "$3=no" +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_header_compile +cat >config.log <<_ACEOF +This file contains any messages produced by compilers while +running configure, to aid debugging if configure makes a mistake. + +It was created by $as_me, which was +generated by GNU Autoconf 2.69. Invocation command line was + + $ $0 $@ + +_ACEOF +exec 5>>config.log +{ +cat <<_ASUNAME +## --------- ## +## Platform. ## +## --------- ## + +hostname = `(hostname || uname -n) 2>/dev/null | sed 1q` +uname -m = `(uname -m) 2>/dev/null || echo unknown` +uname -r = `(uname -r) 2>/dev/null || echo unknown` +uname -s = `(uname -s) 2>/dev/null || echo unknown` +uname -v = `(uname -v) 2>/dev/null || echo unknown` + +/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown` +/bin/uname -X = `(/bin/uname -X) 2>/dev/null || echo unknown` + +/bin/arch = `(/bin/arch) 2>/dev/null || echo unknown` +/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null || echo unknown` +/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown` +/usr/bin/hostinfo = `(/usr/bin/hostinfo) 2>/dev/null || echo unknown` +/bin/machine = `(/bin/machine) 2>/dev/null || echo unknown` +/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null || echo unknown` +/bin/universe = `(/bin/universe) 2>/dev/null || echo unknown` + +_ASUNAME + +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + $as_echo "PATH: $as_dir" + done +IFS=$as_save_IFS + +} >&5 + +cat >&5 <<_ACEOF + + +## ----------- ## +## Core tests. ## +## ----------- ## + +_ACEOF + + +# Keep a trace of the command line. +# Strip out --no-create and --no-recursion so they do not pile up. +# Strip out --silent because we don't want to record it for future runs. +# Also quote any args containing shell meta-characters. +# Make two passes to allow for proper duplicate-argument suppression. +ac_configure_args= +ac_configure_args0= +ac_configure_args1= +ac_must_keep_next=false +for ac_pass in 1 2 +do + for ac_arg + do + case $ac_arg in + -no-create | --no-c* | -n | -no-recursion | --no-r*) continue ;; + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil) + continue ;; + *\'*) + ac_arg=`$as_echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;; + esac + case $ac_pass in + 1) as_fn_append ac_configure_args0 " '$ac_arg'" ;; + 2) + as_fn_append ac_configure_args1 " '$ac_arg'" + if test $ac_must_keep_next = true; then + ac_must_keep_next=false # Got value, back to normal. + else + case $ac_arg in + *=* | --config-cache | -C | -disable-* | --disable-* \ + | -enable-* | --enable-* | -gas | --g* | -nfp | --nf* \ + | -q | -quiet | --q* | -silent | --sil* | -v | -verb* \ + | -with-* | --with-* | -without-* | --without-* | --x) + case "$ac_configure_args0 " in + "$ac_configure_args1"*" '$ac_arg' "* ) continue ;; + esac + ;; + -* ) ac_must_keep_next=true ;; + esac + fi + as_fn_append ac_configure_args " '$ac_arg'" + ;; + esac + done +done +{ ac_configure_args0=; unset ac_configure_args0;} +{ ac_configure_args1=; unset ac_configure_args1;} + +# When interrupted or exit'd, cleanup temporary files, and complete +# config.log. We remove comments because anyway the quotes in there +# would cause problems or look ugly. +# WARNING: Use '\'' to represent an apostrophe within the trap. +# WARNING: Do not start the trap code with a newline, due to a FreeBSD 4.0 bug. +trap 'exit_status=$? + # Save into config.log some information that might help in debugging. + { + echo + + $as_echo "## ---------------- ## +## Cache variables. ## +## ---------------- ##" + echo + # The following way of writing the cache mishandles newlines in values, +( + for ac_var in `(set) 2>&1 | sed -n '\''s/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'\''`; do + eval ac_val=\$$ac_var + case $ac_val in #( + *${as_nl}*) + case $ac_var in #( + *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 +$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; + esac + case $ac_var in #( + _ | IFS | as_nl) ;; #( + BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #( + *) { eval $ac_var=; unset $ac_var;} ;; + esac ;; + esac + done + (set) 2>&1 | + case $as_nl`(ac_space='\'' '\''; set) 2>&1` in #( + *${as_nl}ac_space=\ *) + sed -n \ + "s/'\''/'\''\\\\'\'''\''/g; + s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\''\\2'\''/p" + ;; #( + *) + sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" + ;; + esac | + sort +) + echo + + $as_echo "## ----------------- ## +## Output variables. ## +## ----------------- ##" + echo + for ac_var in $ac_subst_vars + do + eval ac_val=\$$ac_var + case $ac_val in + *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; + esac + $as_echo "$ac_var='\''$ac_val'\''" + done | sort + echo + + if test -n "$ac_subst_files"; then + $as_echo "## ------------------- ## +## File substitutions. ## +## ------------------- ##" + echo + for ac_var in $ac_subst_files + do + eval ac_val=\$$ac_var + case $ac_val in + *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; + esac + $as_echo "$ac_var='\''$ac_val'\''" + done | sort + echo + fi + + if test -s confdefs.h; then + $as_echo "## ----------- ## +## confdefs.h. ## +## ----------- ##" + echo + cat confdefs.h + echo + fi + test "$ac_signal" != 0 && + $as_echo "$as_me: caught signal $ac_signal" + $as_echo "$as_me: exit $exit_status" + } >&5 + rm -f core *.core core.conftest.* && + rm -f -r conftest* confdefs* conf$$* $ac_clean_files && + exit $exit_status +' 0 +for ac_signal in 1 2 13 15; do + trap 'ac_signal='$ac_signal'; as_fn_exit 1' $ac_signal +done +ac_signal=0 + +# confdefs.h avoids OS command line length limits that DEFS can exceed. +rm -f -r conftest* confdefs.h + +$as_echo "/* confdefs.h */" > confdefs.h + +# Predefined preprocessor variables. + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_NAME "$PACKAGE_NAME" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_TARNAME "$PACKAGE_TARNAME" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_VERSION "$PACKAGE_VERSION" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_STRING "$PACKAGE_STRING" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_URL "$PACKAGE_URL" +_ACEOF + + +# Let the site file select an alternate cache file if it wants to. +# Prefer an explicitly selected file to automatically selected ones. +ac_site_file1=NONE +ac_site_file2=NONE +if test -n "$CONFIG_SITE"; then + # We do not want a PATH search for config.site. + case $CONFIG_SITE in #(( + -*) ac_site_file1=./$CONFIG_SITE;; + */*) ac_site_file1=$CONFIG_SITE;; + *) ac_site_file1=./$CONFIG_SITE;; + esac +elif test "x$prefix" != xNONE; then + ac_site_file1=$prefix/share/config.site + ac_site_file2=$prefix/etc/config.site +else + ac_site_file1=$ac_default_prefix/share/config.site + ac_site_file2=$ac_default_prefix/etc/config.site +fi +for ac_site_file in "$ac_site_file1" "$ac_site_file2" +do + test "x$ac_site_file" = xNONE && continue + if test /dev/null != "$ac_site_file" && test -r "$ac_site_file"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: loading site script $ac_site_file" >&5 +$as_echo "$as_me: loading site script $ac_site_file" >&6;} + sed 's/^/| /' "$ac_site_file" >&5 + . "$ac_site_file" \ + || { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "failed to load site script $ac_site_file +See \`config.log' for more details" "$LINENO" 5; } + fi +done + +if test -r "$cache_file"; then + # Some versions of bash will fail to source /dev/null (special files + # actually), so we avoid doing that. DJGPP emulates it as a regular file. + if test /dev/null != "$cache_file" && test -f "$cache_file"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: loading cache $cache_file" >&5 +$as_echo "$as_me: loading cache $cache_file" >&6;} + case $cache_file in + [\\/]* | ?:[\\/]* ) . "$cache_file";; + *) . "./$cache_file";; + esac + fi +else + { $as_echo "$as_me:${as_lineno-$LINENO}: creating cache $cache_file" >&5 +$as_echo "$as_me: creating cache $cache_file" >&6;} + >$cache_file +fi + +# Check that the precious variables saved in the cache have kept the same +# value. +ac_cache_corrupted=false +for ac_var in $ac_precious_vars; do + eval ac_old_set=\$ac_cv_env_${ac_var}_set + eval ac_new_set=\$ac_env_${ac_var}_set + eval ac_old_val=\$ac_cv_env_${ac_var}_value + eval ac_new_val=\$ac_env_${ac_var}_value + case $ac_old_set,$ac_new_set in + set,) + { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5 +$as_echo "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;} + ac_cache_corrupted=: ;; + ,set) + { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was not set in the previous run" >&5 +$as_echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;} + ac_cache_corrupted=: ;; + ,);; + *) + if test "x$ac_old_val" != "x$ac_new_val"; then + # differences in whitespace do not lead to failure. + ac_old_val_w=`echo x $ac_old_val` + ac_new_val_w=`echo x $ac_new_val` + if test "$ac_old_val_w" != "$ac_new_val_w"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' has changed since the previous run:" >&5 +$as_echo "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;} + ac_cache_corrupted=: + else + { $as_echo "$as_me:${as_lineno-$LINENO}: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&5 +$as_echo "$as_me: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&2;} + eval $ac_var=\$ac_old_val + fi + { $as_echo "$as_me:${as_lineno-$LINENO}: former value: \`$ac_old_val'" >&5 +$as_echo "$as_me: former value: \`$ac_old_val'" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: current value: \`$ac_new_val'" >&5 +$as_echo "$as_me: current value: \`$ac_new_val'" >&2;} + fi;; + esac + # Pass precious variables to config.status. + if test "$ac_new_set" = set; then + case $ac_new_val in + *\'*) ac_arg=$ac_var=`$as_echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;; + *) ac_arg=$ac_var=$ac_new_val ;; + esac + case " $ac_configure_args " in + *" '$ac_arg' "*) ;; # Avoid dups. Use of quotes ensures accuracy. + *) as_fn_append ac_configure_args " '$ac_arg'" ;; + esac + fi +done +if $ac_cache_corrupted; then + { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: error: changes in the environment can compromise the build" >&5 +$as_echo "$as_me: error: changes in the environment can compromise the build" >&2;} + as_fn_error $? "run \`make distclean' and/or \`rm $cache_file' and start over" "$LINENO" 5 +fi +## -------------------- ## +## Main body of script. ## +## -------------------- ## + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args. +set dummy ${ac_tool_prefix}gcc; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="${ac_tool_prefix}gcc" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_CC"; then + ac_ct_CC=$CC + # Extract the first word of "gcc", so it can be a program name with args. +set dummy gcc; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="gcc" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 +$as_echo "$ac_ct_CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_CC" = x; then + CC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CC=$ac_ct_CC + fi +else + CC="$ac_cv_prog_CC" +fi + +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args. +set dummy ${ac_tool_prefix}cc; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="${ac_tool_prefix}cc" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + fi +fi +if test -z "$CC"; then + # Extract the first word of "cc", so it can be a program name with args. +set dummy cc; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else + ac_prog_rejected=no +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then + ac_prog_rejected=yes + continue + fi + ac_cv_prog_CC="cc" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +if test $ac_prog_rejected = yes; then + # We found a bogon in the path, so make sure we never use it. + set dummy $ac_cv_prog_CC + shift + if test $# != 0; then + # We chose a different compiler from the bogus one. + # However, it has the same basename, so the bogon will be chosen + # first if we set CC to just the basename; use the full file name. + shift + ac_cv_prog_CC="$as_dir/$ac_word${1+' '}$@" + fi +fi +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + for ac_prog in cl.exe + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="$ac_tool_prefix$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$CC" && break + done +fi +if test -z "$CC"; then + ac_ct_CC=$CC + for ac_prog in cl.exe +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 +$as_echo "$ac_ct_CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$ac_ct_CC" && break +done + + if test "x$ac_ct_CC" = x; then + CC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CC=$ac_ct_CC + fi +fi + +fi + + +test -z "$CC" && { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "no acceptable C compiler found in \$PATH +See \`config.log' for more details" "$LINENO" 5; } + +# Provide some information about the compiler. +$as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler version" >&5 +set X $ac_compile +ac_compiler=$2 +for ac_option in --version -v -V -qversion; do + { { ac_try="$ac_compiler $ac_option >&5" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_compiler $ac_option >&5") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + sed '10a\ +... rest of stderr output deleted ... + 10q' conftest.err >conftest.er1 + cat conftest.er1 >&5 + fi + rm -f conftest.er1 conftest.err + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } +done + +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +ac_clean_files_save=$ac_clean_files +ac_clean_files="$ac_clean_files a.out a.out.dSYM a.exe b.out" +# Try to create an executable without -o first, disregard a.out. +# It will help us diagnose broken compilers, and finding out an intuition +# of exeext. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the C compiler works" >&5 +$as_echo_n "checking whether the C compiler works... " >&6; } +ac_link_default=`$as_echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'` + +# The possible output files: +ac_files="a.out conftest.exe conftest a.exe a_out.exe b.out conftest.*" + +ac_rmfiles= +for ac_file in $ac_files +do + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; + * ) ac_rmfiles="$ac_rmfiles $ac_file";; + esac +done +rm -f $ac_rmfiles + +if { { ac_try="$ac_link_default" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link_default") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then : + # Autoconf-2.13 could set the ac_cv_exeext variable to `no'. +# So ignore a value of `no', otherwise this would lead to `EXEEXT = no' +# in a Makefile. We should not override ac_cv_exeext if it was cached, +# so that the user can short-circuit this test for compilers unknown to +# Autoconf. +for ac_file in $ac_files '' +do + test -f "$ac_file" || continue + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) + ;; + [ab].out ) + # We found the default executable, but exeext='' is most + # certainly right. + break;; + *.* ) + if test "${ac_cv_exeext+set}" = set && test "$ac_cv_exeext" != no; + then :; else + ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` + fi + # We set ac_cv_exeext here because the later test for it is not + # safe: cross compilers may not add the suffix if given an `-o' + # argument, so we may need to know it at that point already. + # Even if this section looks crufty: it has the advantage of + # actually working. + break;; + * ) + break;; + esac +done +test "$ac_cv_exeext" = no && ac_cv_exeext= + +else + ac_file='' +fi +if test -z "$ac_file"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +$as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error 77 "C compiler cannot create executables +See \`config.log' for more details" "$LINENO" 5; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler default output file name" >&5 +$as_echo_n "checking for C compiler default output file name... " >&6; } +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_file" >&5 +$as_echo "$ac_file" >&6; } +ac_exeext=$ac_cv_exeext + +rm -f -r a.out a.out.dSYM a.exe conftest$ac_cv_exeext b.out +ac_clean_files=$ac_clean_files_save +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of executables" >&5 +$as_echo_n "checking for suffix of executables... " >&6; } +if { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then : + # If both `conftest.exe' and `conftest' are `present' (well, observable) +# catch `conftest.exe'. For instance with Cygwin, `ls conftest' will +# work properly (i.e., refer to `conftest.exe'), while it won't with +# `rm'. +for ac_file in conftest.exe conftest conftest.*; do + test -f "$ac_file" || continue + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; + *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` + break;; + * ) break;; + esac +done +else + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot compute suffix of executables: cannot compile and link +See \`config.log' for more details" "$LINENO" 5; } +fi +rm -f conftest conftest$ac_cv_exeext +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_exeext" >&5 +$as_echo "$ac_cv_exeext" >&6; } + +rm -f conftest.$ac_ext +EXEEXT=$ac_cv_exeext +ac_exeext=$EXEEXT +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main () +{ +FILE *f = fopen ("conftest.out", "w"); + return ferror (f) || fclose (f) != 0; + + ; + return 0; +} +_ACEOF +ac_clean_files="$ac_clean_files conftest.out" +# Check that the compiler produces executables we can run. If not, either +# the compiler is broken, or we cross compile. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are cross compiling" >&5 +$as_echo_n "checking whether we are cross compiling... " >&6; } +if test "$cross_compiling" != yes; then + { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + if { ac_try='./conftest$ac_cv_exeext' + { { case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; }; then + cross_compiling=no + else + if test "$cross_compiling" = maybe; then + cross_compiling=yes + else + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot run C compiled programs. +If you meant to cross compile, use \`--host'. +See \`config.log' for more details" "$LINENO" 5; } + fi + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $cross_compiling" >&5 +$as_echo "$cross_compiling" >&6; } + +rm -f conftest.$ac_ext conftest$ac_cv_exeext conftest.out +ac_clean_files=$ac_clean_files_save +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of object files" >&5 +$as_echo_n "checking for suffix of object files... " >&6; } +if ${ac_cv_objext+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.o conftest.obj +if { { ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_compile") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then : + for ac_file in conftest.o conftest.obj conftest.*; do + test -f "$ac_file" || continue; + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM ) ;; + *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'` + break;; + esac +done +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot compute suffix of object files: cannot compile +See \`config.log' for more details" "$LINENO" 5; } +fi +rm -f conftest.$ac_cv_objext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_objext" >&5 +$as_echo "$ac_cv_objext" >&6; } +OBJEXT=$ac_cv_objext +ac_objext=$OBJEXT +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU C compiler" >&5 +$as_echo_n "checking whether we are using the GNU C compiler... " >&6; } +if ${ac_cv_c_compiler_gnu+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ +#ifndef __GNUC__ + choke me +#endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_compiler_gnu=yes +else + ac_compiler_gnu=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +ac_cv_c_compiler_gnu=$ac_compiler_gnu + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_compiler_gnu" >&5 +$as_echo "$ac_cv_c_compiler_gnu" >&6; } +if test $ac_compiler_gnu = yes; then + GCC=yes +else + GCC= +fi +ac_test_CFLAGS=${CFLAGS+set} +ac_save_CFLAGS=$CFLAGS +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC accepts -g" >&5 +$as_echo_n "checking whether $CC accepts -g... " >&6; } +if ${ac_cv_prog_cc_g+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_save_c_werror_flag=$ac_c_werror_flag + ac_c_werror_flag=yes + ac_cv_prog_cc_g=no + CFLAGS="-g" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_prog_cc_g=yes +else + CFLAGS="" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + +else + ac_c_werror_flag=$ac_save_c_werror_flag + CFLAGS="-g" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_prog_cc_g=yes +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + ac_c_werror_flag=$ac_save_c_werror_flag +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_g" >&5 +$as_echo "$ac_cv_prog_cc_g" >&6; } +if test "$ac_test_CFLAGS" = set; then + CFLAGS=$ac_save_CFLAGS +elif test $ac_cv_prog_cc_g = yes; then + if test "$GCC" = yes; then + CFLAGS="-g -O2" + else + CFLAGS="-g" + fi +else + if test "$GCC" = yes; then + CFLAGS="-O2" + else + CFLAGS= + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO C89" >&5 +$as_echo_n "checking for $CC option to accept ISO C89... " >&6; } +if ${ac_cv_prog_cc_c89+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_cv_prog_cc_c89=no +ac_save_CC=$CC +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#include +struct stat; +/* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */ +struct buf { int x; }; +FILE * (*rcsopen) (struct buf *, struct stat *, int); +static char *e (p, i) + char **p; + int i; +{ + return p[i]; +} +static char *f (char * (*g) (char **, int), char **p, ...) +{ + char *s; + va_list v; + va_start (v,p); + s = g (p, va_arg (v,int)); + va_end (v); + return s; +} + +/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has + function prototypes and stuff, but not '\xHH' hex character constants. + These don't provoke an error unfortunately, instead are silently treated + as 'x'. The following induces an error, until -std is added to get + proper ANSI mode. Curiously '\x00'!='x' always comes out true, for an + array size at least. It's necessary to write '\x00'==0 to get something + that's true only with -std. */ +int osf4_cc_array ['\x00' == 0 ? 1 : -1]; + +/* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters + inside strings and character constants. */ +#define FOO(x) 'x' +int xlc6_cc_array[FOO(a) == 'x' ? 1 : -1]; + +int test (int i, double x); +struct s1 {int (*f) (int a);}; +struct s2 {int (*f) (double a);}; +int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int); +int argc; +char **argv; +int +main () +{ +return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]; + ; + return 0; +} +_ACEOF +for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std \ + -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" +do + CC="$ac_save_CC $ac_arg" + if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_prog_cc_c89=$ac_arg +fi +rm -f core conftest.err conftest.$ac_objext + test "x$ac_cv_prog_cc_c89" != "xno" && break +done +rm -f conftest.$ac_ext +CC=$ac_save_CC + +fi +# AC_CACHE_VAL +case "x$ac_cv_prog_cc_c89" in + x) + { $as_echo "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 +$as_echo "none needed" >&6; } ;; + xno) + { $as_echo "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 +$as_echo "unsupported" >&6; } ;; + *) + CC="$CC $ac_cv_prog_cc_c89" + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c89" >&5 +$as_echo "$ac_cv_prog_cc_c89" >&6; } ;; +esac +if test "x$ac_cv_prog_cc_c89" != xno; then : + +fi + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +ac_aux_dir= +for ac_dir in "$srcdir" "$srcdir/.." "$srcdir/../.."; do + if test -f "$ac_dir/install-sh"; then + ac_aux_dir=$ac_dir + ac_install_sh="$ac_aux_dir/install-sh -c" + break + elif test -f "$ac_dir/install.sh"; then + ac_aux_dir=$ac_dir + ac_install_sh="$ac_aux_dir/install.sh -c" + break + elif test -f "$ac_dir/shtool"; then + ac_aux_dir=$ac_dir + ac_install_sh="$ac_aux_dir/shtool install -c" + break + fi +done +if test -z "$ac_aux_dir"; then + as_fn_error $? "cannot find install-sh, install.sh, or shtool in \"$srcdir\" \"$srcdir/..\" \"$srcdir/../..\"" "$LINENO" 5 +fi + +# These three variables are undocumented and unsupported, +# and are intended to be withdrawn in a future Autoconf release. +# They can cause serious problems if a builder's source tree is in a directory +# whose full name contains unusual characters. +ac_config_guess="$SHELL $ac_aux_dir/config.guess" # Please don't use this var. +ac_config_sub="$SHELL $ac_aux_dir/config.sub" # Please don't use this var. +ac_configure="$SHELL $ac_aux_dir/configure" # Please don't use this var. + + +# Find a good install program. We prefer a C program (faster), +# so one script is as good as another. But avoid the broken or +# incompatible versions: +# SysV /etc/install, /usr/sbin/install +# SunOS /usr/etc/install +# IRIX /sbin/install +# AIX /bin/install +# AmigaOS /C/install, which installs bootblocks on floppy discs +# AIX 4 /usr/bin/installbsd, which doesn't work without a -g flag +# AFS /usr/afsws/bin/install, which mishandles nonexistent args +# SVR4 /usr/ucb/install, which tries to use the nonexistent group "staff" +# OS/2's system install, which has a completely different semantic +# ./install, which can be erroneously created by make from ./install.sh. +# Reject install programs that cannot install multiple files. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for a BSD-compatible install" >&5 +$as_echo_n "checking for a BSD-compatible install... " >&6; } +if test -z "$INSTALL"; then +if ${ac_cv_path_install+:} false; then : + $as_echo_n "(cached) " >&6 +else + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + # Account for people who put trailing slashes in PATH elements. +case $as_dir/ in #(( + ./ | .// | /[cC]/* | \ + /etc/* | /usr/sbin/* | /usr/etc/* | /sbin/* | /usr/afsws/bin/* | \ + ?:[\\/]os2[\\/]install[\\/]* | ?:[\\/]OS2[\\/]INSTALL[\\/]* | \ + /usr/ucb/* ) ;; + *) + # OSF1 and SCO ODT 3.0 have their own names for install. + # Don't use installbsd from OSF since it installs stuff as root + # by default. + for ac_prog in ginstall scoinst install; do + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_prog$ac_exec_ext"; then + if test $ac_prog = install && + grep dspmsg "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then + # AIX install. It has an incompatible calling convention. + : + elif test $ac_prog = install && + grep pwplus "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then + # program-specific install script used by HP pwplus--don't use. + : + else + rm -rf conftest.one conftest.two conftest.dir + echo one > conftest.one + echo two > conftest.two + mkdir conftest.dir + if "$as_dir/$ac_prog$ac_exec_ext" -c conftest.one conftest.two "`pwd`/conftest.dir" && + test -s conftest.one && test -s conftest.two && + test -s conftest.dir/conftest.one && + test -s conftest.dir/conftest.two + then + ac_cv_path_install="$as_dir/$ac_prog$ac_exec_ext -c" + break 3 + fi + fi + fi + done + done + ;; +esac + + done +IFS=$as_save_IFS + +rm -rf conftest.one conftest.two conftest.dir + +fi + if test "${ac_cv_path_install+set}" = set; then + INSTALL=$ac_cv_path_install + else + # As a last resort, use the slow shell script. Don't cache a + # value for INSTALL within a source directory, because that will + # break other packages using the cache if that directory is + # removed, or if the value is a relative name. + INSTALL=$ac_install_sh + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $INSTALL" >&5 +$as_echo "$INSTALL" >&6; } + +# Use test -z because SunOS4 sh mishandles braces in ${var-val}. +# It thinks the first close brace ends the variable substitution. +test -z "$INSTALL_PROGRAM" && INSTALL_PROGRAM='${INSTALL}' + +test -z "$INSTALL_SCRIPT" && INSTALL_SCRIPT='${INSTALL}' + +test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644' + +CFLAGS="-O3" + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to run the C preprocessor" >&5 +$as_echo_n "checking how to run the C preprocessor... " >&6; } +# On Suns, sometimes $CPP names a directory. +if test -n "$CPP" && test -d "$CPP"; then + CPP= +fi +if test -z "$CPP"; then + if ${ac_cv_prog_CPP+:} false; then : + $as_echo_n "(cached) " >&6 +else + # Double quotes because CPP needs to be expanded + for CPP in "$CC -E" "$CC -E -traditional-cpp" "/lib/cpp" + do + ac_preproc_ok=false +for ac_c_preproc_warn_flag in '' yes +do + # Use a header file that comes with gcc, so configuring glibc + # with a fresh cross-compiler works. + # Prefer to if __STDC__ is defined, since + # exists even on freestanding compilers. + # On the NeXT, cc -E runs the code through the compiler's parser, + # not just through cpp. "Syntax error" is here to catch this case. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#ifdef __STDC__ +# include +#else +# include +#endif + Syntax error +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + +else + # Broken: fails on valid input. +continue +fi +rm -f conftest.err conftest.i conftest.$ac_ext + + # OK, works on sane cases. Now check whether nonexistent headers + # can be detected and how. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + # Broken: success on invalid input. +continue +else + # Passes both tests. +ac_preproc_ok=: +break +fi +rm -f conftest.err conftest.i conftest.$ac_ext + +done +# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. +rm -f conftest.i conftest.err conftest.$ac_ext +if $ac_preproc_ok; then : + break +fi + + done + ac_cv_prog_CPP=$CPP + +fi + CPP=$ac_cv_prog_CPP +else + ac_cv_prog_CPP=$CPP +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $CPP" >&5 +$as_echo "$CPP" >&6; } +ac_preproc_ok=false +for ac_c_preproc_warn_flag in '' yes +do + # Use a header file that comes with gcc, so configuring glibc + # with a fresh cross-compiler works. + # Prefer to if __STDC__ is defined, since + # exists even on freestanding compilers. + # On the NeXT, cc -E runs the code through the compiler's parser, + # not just through cpp. "Syntax error" is here to catch this case. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#ifdef __STDC__ +# include +#else +# include +#endif + Syntax error +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + +else + # Broken: fails on valid input. +continue +fi +rm -f conftest.err conftest.i conftest.$ac_ext + + # OK, works on sane cases. Now check whether nonexistent headers + # can be detected and how. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + # Broken: success on invalid input. +continue +else + # Passes both tests. +ac_preproc_ok=: +break +fi +rm -f conftest.err conftest.i conftest.$ac_ext + +done +# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. +rm -f conftest.i conftest.err conftest.$ac_ext +if $ac_preproc_ok; then : + +else + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "C preprocessor \"$CPP\" fails sanity check +See \`config.log' for more details" "$LINENO" 5; } +fi + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for grep that handles long lines and -e" >&5 +$as_echo_n "checking for grep that handles long lines and -e... " >&6; } +if ${ac_cv_path_GREP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -z "$GREP"; then + ac_path_GREP_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_prog in grep ggrep; do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_GREP="$as_dir/$ac_prog$ac_exec_ext" + as_fn_executable_p "$ac_path_GREP" || continue +# Check for GNU ac_path_GREP and select it if it is found. + # Check for GNU $ac_path_GREP +case `"$ac_path_GREP" --version 2>&1` in +*GNU*) + ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_found=:;; +*) + ac_count=0 + $as_echo_n 0123456789 >"conftest.in" + while : + do + cat "conftest.in" "conftest.in" >"conftest.tmp" + mv "conftest.tmp" "conftest.in" + cp "conftest.in" "conftest.nl" + $as_echo 'GREP' >> "conftest.nl" + "$ac_path_GREP" -e 'GREP$' -e '-(cannot match)-' < "conftest.nl" >"conftest.out" 2>/dev/null || break + diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break + as_fn_arith $ac_count + 1 && ac_count=$as_val + if test $ac_count -gt ${ac_path_GREP_max-0}; then + # Best one so far, save it but keep looking for a better one + ac_cv_path_GREP="$ac_path_GREP" + ac_path_GREP_max=$ac_count + fi + # 10*(2^10) chars as input seems more than enough + test $ac_count -gt 10 && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out;; +esac + + $ac_path_GREP_found && break 3 + done + done + done +IFS=$as_save_IFS + if test -z "$ac_cv_path_GREP"; then + as_fn_error $? "no acceptable grep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 + fi +else + ac_cv_path_GREP=$GREP +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_GREP" >&5 +$as_echo "$ac_cv_path_GREP" >&6; } + GREP="$ac_cv_path_GREP" + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for egrep" >&5 +$as_echo_n "checking for egrep... " >&6; } +if ${ac_cv_path_EGREP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if echo a | $GREP -E '(a|b)' >/dev/null 2>&1 + then ac_cv_path_EGREP="$GREP -E" + else + if test -z "$EGREP"; then + ac_path_EGREP_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_prog in egrep; do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_EGREP="$as_dir/$ac_prog$ac_exec_ext" + as_fn_executable_p "$ac_path_EGREP" || continue +# Check for GNU ac_path_EGREP and select it if it is found. + # Check for GNU $ac_path_EGREP +case `"$ac_path_EGREP" --version 2>&1` in +*GNU*) + ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_found=:;; +*) + ac_count=0 + $as_echo_n 0123456789 >"conftest.in" + while : + do + cat "conftest.in" "conftest.in" >"conftest.tmp" + mv "conftest.tmp" "conftest.in" + cp "conftest.in" "conftest.nl" + $as_echo 'EGREP' >> "conftest.nl" + "$ac_path_EGREP" 'EGREP$' < "conftest.nl" >"conftest.out" 2>/dev/null || break + diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break + as_fn_arith $ac_count + 1 && ac_count=$as_val + if test $ac_count -gt ${ac_path_EGREP_max-0}; then + # Best one so far, save it but keep looking for a better one + ac_cv_path_EGREP="$ac_path_EGREP" + ac_path_EGREP_max=$ac_count + fi + # 10*(2^10) chars as input seems more than enough + test $ac_count -gt 10 && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out;; +esac + + $ac_path_EGREP_found && break 3 + done + done + done +IFS=$as_save_IFS + if test -z "$ac_cv_path_EGREP"; then + as_fn_error $? "no acceptable egrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 + fi +else + ac_cv_path_EGREP=$EGREP +fi + + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_EGREP" >&5 +$as_echo "$ac_cv_path_EGREP" >&6; } + EGREP="$ac_cv_path_EGREP" + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for ANSI C header files" >&5 +$as_echo_n "checking for ANSI C header files... " >&6; } +if ${ac_cv_header_stdc+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#include +#include +#include + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_header_stdc=yes +else + ac_cv_header_stdc=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + +if test $ac_cv_header_stdc = yes; then + # SunOS 4.x string.h does not declare mem*, contrary to ANSI. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "memchr" >/dev/null 2>&1; then : + +else + ac_cv_header_stdc=no +fi +rm -f conftest* + +fi + +if test $ac_cv_header_stdc = yes; then + # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "free" >/dev/null 2>&1; then : + +else + ac_cv_header_stdc=no +fi +rm -f conftest* + +fi + +if test $ac_cv_header_stdc = yes; then + # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi. + if test "$cross_compiling" = yes; then : + : +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#include +#if ((' ' & 0x0FF) == 0x020) +# define ISLOWER(c) ('a' <= (c) && (c) <= 'z') +# define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c)) +#else +# define ISLOWER(c) \ + (('a' <= (c) && (c) <= 'i') \ + || ('j' <= (c) && (c) <= 'r') \ + || ('s' <= (c) && (c) <= 'z')) +# define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c)) +#endif + +#define XOR(e, f) (((e) && !(f)) || (!(e) && (f))) +int +main () +{ + int i; + for (i = 0; i < 256; i++) + if (XOR (islower (i), ISLOWER (i)) + || toupper (i) != TOUPPER (i)) + return 2; + return 0; +} +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + +else + ac_cv_header_stdc=no +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + +fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_header_stdc" >&5 +$as_echo "$ac_cv_header_stdc" >&6; } +if test $ac_cv_header_stdc = yes; then + +$as_echo "#define STDC_HEADERS 1" >>confdefs.h + +fi + +# On IRIX 5.3, sys/types and inttypes.h are conflicting. +for ac_header in sys/types.h sys/stat.h stdlib.h string.h memory.h strings.h \ + inttypes.h stdint.h unistd.h +do : + as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh` +ac_fn_c_check_header_compile "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default +" +if eval test \"x\$"$as_ac_Header"\" = x"yes"; then : + cat >>confdefs.h <<_ACEOF +#define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1 +_ACEOF + +fi + +done + + +for ac_header in wmmintrin.h +do : + ac_fn_c_check_header_mongrel "$LINENO" "wmmintrin.h" "ac_cv_header_wmmintrin_h" "$ac_includes_default" +if test "x$ac_cv_header_wmmintrin_h" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_WMMINTRIN_H 1 +_ACEOF + + CFLAGS+=" -maes -msse -msse2" + CPPFLAGS="-DFASTAES" + + +fi + +done + +ac_config_files="$ac_config_files Makefile" + +cat >confcache <<\_ACEOF +# This file is a shell script that caches the results of configure +# tests run on this system so they can be shared between configure +# scripts and configure runs, see configure's option --config-cache. +# It is not useful on other systems. If it contains results you don't +# want to keep, you may remove or edit it. +# +# config.status only pays attention to the cache file if you give it +# the --recheck option to rerun configure. +# +# `ac_cv_env_foo' variables (set or unset) will be overridden when +# loading this file, other *unset* `ac_cv_foo' will be assigned the +# following values. + +_ACEOF + +# The following way of writing the cache mishandles newlines in values, +# but we know of no workaround that is simple, portable, and efficient. +# So, we kill variables containing newlines. +# Ultrix sh set writes to stderr and can't be redirected directly, +# and sets the high bit in the cache file unless we assign to the vars. +( + for ac_var in `(set) 2>&1 | sed -n 's/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'`; do + eval ac_val=\$$ac_var + case $ac_val in #( + *${as_nl}*) + case $ac_var in #( + *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 +$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; + esac + case $ac_var in #( + _ | IFS | as_nl) ;; #( + BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #( + *) { eval $ac_var=; unset $ac_var;} ;; + esac ;; + esac + done + + (set) 2>&1 | + case $as_nl`(ac_space=' '; set) 2>&1` in #( + *${as_nl}ac_space=\ *) + # `set' does not quote correctly, so add quotes: double-quote + # substitution turns \\\\ into \\, and sed turns \\ into \. + sed -n \ + "s/'/'\\\\''/g; + s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p" + ;; #( + *) + # `set' quotes correctly as required by POSIX, so do not add quotes. + sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" + ;; + esac | + sort +) | + sed ' + /^ac_cv_env_/b end + t clear + :clear + s/^\([^=]*\)=\(.*[{}].*\)$/test "${\1+set}" = set || &/ + t end + s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/ + :end' >>confcache +if diff "$cache_file" confcache >/dev/null 2>&1; then :; else + if test -w "$cache_file"; then + if test "x$cache_file" != "x/dev/null"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: updating cache $cache_file" >&5 +$as_echo "$as_me: updating cache $cache_file" >&6;} + if test ! -f "$cache_file" || test -h "$cache_file"; then + cat confcache >"$cache_file" + else + case $cache_file in #( + */* | ?:*) + mv -f confcache "$cache_file"$$ && + mv -f "$cache_file"$$ "$cache_file" ;; #( + *) + mv -f confcache "$cache_file" ;; + esac + fi + fi + else + { $as_echo "$as_me:${as_lineno-$LINENO}: not updating unwritable cache $cache_file" >&5 +$as_echo "$as_me: not updating unwritable cache $cache_file" >&6;} + fi +fi +rm -f confcache + +test "x$prefix" = xNONE && prefix=$ac_default_prefix +# Let make expand exec_prefix. +test "x$exec_prefix" = xNONE && exec_prefix='${prefix}' + +# Transform confdefs.h into DEFS. +# Protect against shell expansion while executing Makefile rules. +# Protect against Makefile macro expansion. +# +# If the first sed substitution is executed (which looks for macros that +# take arguments), then branch to the quote section. Otherwise, +# look for a macro that doesn't take arguments. +ac_script=' +:mline +/\\$/{ + N + s,\\\n,, + b mline +} +t clear +:clear +s/^[ ]*#[ ]*define[ ][ ]*\([^ (][^ (]*([^)]*)\)[ ]*\(.*\)/-D\1=\2/g +t quote +s/^[ ]*#[ ]*define[ ][ ]*\([^ ][^ ]*\)[ ]*\(.*\)/-D\1=\2/g +t quote +b any +:quote +s/[ `~#$^&*(){}\\|;'\''"<>?]/\\&/g +s/\[/\\&/g +s/\]/\\&/g +s/\$/$$/g +H +:any +${ + g + s/^\n// + s/\n/ /g + p +} +' +DEFS=`sed -n "$ac_script" confdefs.h` + + +ac_libobjs= +ac_ltlibobjs= +U= +for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue + # 1. Remove the extension, and $U if already installed. + ac_script='s/\$U\././;s/\.o$//;s/\.obj$//' + ac_i=`$as_echo "$ac_i" | sed "$ac_script"` + # 2. Prepend LIBOBJDIR. When used with automake>=1.10 LIBOBJDIR + # will be set to the directory where LIBOBJS objects are built. + as_fn_append ac_libobjs " \${LIBOBJDIR}$ac_i\$U.$ac_objext" + as_fn_append ac_ltlibobjs " \${LIBOBJDIR}$ac_i"'$U.lo' +done +LIBOBJS=$ac_libobjs + +LTLIBOBJS=$ac_ltlibobjs + + + +: "${CONFIG_STATUS=./config.status}" +ac_write_fail=0 +ac_clean_files_save=$ac_clean_files +ac_clean_files="$ac_clean_files $CONFIG_STATUS" +{ $as_echo "$as_me:${as_lineno-$LINENO}: creating $CONFIG_STATUS" >&5 +$as_echo "$as_me: creating $CONFIG_STATUS" >&6;} +as_write_fail=0 +cat >$CONFIG_STATUS <<_ASEOF || as_write_fail=1 +#! $SHELL +# Generated by $as_me. +# Run this file to recreate the current configuration. +# Compiler output produced by configure, useful for debugging +# configure, is in config.log if it exists. + +debug=false +ac_cs_recheck=false +ac_cs_silent=false + +SHELL=\${CONFIG_SHELL-$SHELL} +export SHELL +_ASEOF +cat >>$CONFIG_STATUS <<\_ASEOF || as_write_fail=1 +## -------------------- ## +## M4sh Initialization. ## +## -------------------- ## + +# Be more Bourne compatible +DUALCASE=1; export DUALCASE # for MKS sh +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then : + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else + case `(set -o) 2>/dev/null` in #( + *posix*) : + set -o posix ;; #( + *) : + ;; +esac +fi + + +as_nl=' +' +export as_nl +# Printing a long string crashes Solaris 7 /usr/bin/printf. +as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo +# Prefer a ksh shell builtin over an external printf program on Solaris, +# but without wasting forks for bash or zsh. +if test -z "$BASH_VERSION$ZSH_VERSION" \ + && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then + as_echo='print -r --' + as_echo_n='print -rn --' +elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then + as_echo='printf %s\n' + as_echo_n='printf %s' +else + if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then + as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"' + as_echo_n='/usr/ucb/echo -n' + else + as_echo_body='eval expr "X$1" : "X\\(.*\\)"' + as_echo_n_body='eval + arg=$1; + case $arg in #( + *"$as_nl"*) + expr "X$arg" : "X\\(.*\\)$as_nl"; + arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;; + esac; + expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl" + ' + export as_echo_n_body + as_echo_n='sh -c $as_echo_n_body as_echo' + fi + export as_echo_body + as_echo='sh -c $as_echo_body as_echo' +fi + +# The user is always right. +if test "${PATH_SEPARATOR+set}" != set; then + PATH_SEPARATOR=: + (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { + (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || + PATH_SEPARATOR=';' + } +fi + + +# IFS +# We need space, tab and new line, in precisely that order. Quoting is +# there to prevent editors from complaining about space-tab. +# (If _AS_PATH_WALK were called with IFS unset, it would disable word +# splitting by setting IFS to empty value.) +IFS=" "" $as_nl" + +# Find who we are. Look in the path if we contain no directory separator. +as_myself= +case $0 in #(( + *[\\/]* ) as_myself=$0 ;; + *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break + done +IFS=$as_save_IFS + + ;; +esac +# We did not find ourselves, most probably we were run as `sh COMMAND' +# in which case we are not to be found in the path. +if test "x$as_myself" = x; then + as_myself=$0 +fi +if test ! -f "$as_myself"; then + $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 + exit 1 +fi + +# Unset variables that we do not need and which cause bugs (e.g. in +# pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1" +# suppresses any "Segmentation fault" message there. '((' could +# trigger a bug in pdksh 5.2.14. +for as_var in BASH_ENV ENV MAIL MAILPATH +do eval test x\${$as_var+set} = xset \ + && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : +done +PS1='$ ' +PS2='> ' +PS4='+ ' + +# NLS nuisances. +LC_ALL=C +export LC_ALL +LANGUAGE=C +export LANGUAGE + +# CDPATH. +(unset CDPATH) >/dev/null 2>&1 && unset CDPATH + + +# as_fn_error STATUS ERROR [LINENO LOG_FD] +# ---------------------------------------- +# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are +# provided, also output the error to LOG_FD, referencing LINENO. Then exit the +# script with STATUS, using 1 if that was 0. +as_fn_error () +{ + as_status=$1; test $as_status -eq 0 && as_status=1 + if test "$4"; then + as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 + fi + $as_echo "$as_me: error: $2" >&2 + as_fn_exit $as_status +} # as_fn_error + + +# as_fn_set_status STATUS +# ----------------------- +# Set $? to STATUS, without forking. +as_fn_set_status () +{ + return $1 +} # as_fn_set_status + +# as_fn_exit STATUS +# ----------------- +# Exit the shell with STATUS, even in a "trap 0" or "set -e" context. +as_fn_exit () +{ + set +e + as_fn_set_status $1 + exit $1 +} # as_fn_exit + +# as_fn_unset VAR +# --------------- +# Portably unset VAR. +as_fn_unset () +{ + { eval $1=; unset $1;} +} +as_unset=as_fn_unset +# as_fn_append VAR VALUE +# ---------------------- +# Append the text in VALUE to the end of the definition contained in VAR. Take +# advantage of any shell optimizations that allow amortized linear growth over +# repeated appends, instead of the typical quadratic growth present in naive +# implementations. +if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then : + eval 'as_fn_append () + { + eval $1+=\$2 + }' +else + as_fn_append () + { + eval $1=\$$1\$2 + } +fi # as_fn_append + +# as_fn_arith ARG... +# ------------------ +# Perform arithmetic evaluation on the ARGs, and store the result in the +# global $as_val. Take advantage of shells that can avoid forks. The arguments +# must be portable across $(()) and expr. +if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then : + eval 'as_fn_arith () + { + as_val=$(( $* )) + }' +else + as_fn_arith () + { + as_val=`expr "$@" || test $? -eq 1` + } +fi # as_fn_arith + + +if expr a : '\(a\)' >/dev/null 2>&1 && + test "X`expr 00001 : '.*\(...\)'`" = X001; then + as_expr=expr +else + as_expr=false +fi + +if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then + as_basename=basename +else + as_basename=false +fi + +if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then + as_dirname=dirname +else + as_dirname=false +fi + +as_me=`$as_basename -- "$0" || +$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X/"$0" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ + s//\1/ + q + } + /^X\/\(\/\/\)$/{ + s//\1/ + q + } + /^X\/\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + +# Avoid depending upon Character Ranges. +as_cr_letters='abcdefghijklmnopqrstuvwxyz' +as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' +as_cr_Letters=$as_cr_letters$as_cr_LETTERS +as_cr_digits='0123456789' +as_cr_alnum=$as_cr_Letters$as_cr_digits + +ECHO_C= ECHO_N= ECHO_T= +case `echo -n x` in #((((( +-n*) + case `echo 'xy\c'` in + *c*) ECHO_T=' ';; # ECHO_T is single tab character. + xy) ECHO_C='\c';; + *) echo `echo ksh88 bug on AIX 6.1` > /dev/null + ECHO_T=' ';; + esac;; +*) + ECHO_N='-n';; +esac + +rm -f conf$$ conf$$.exe conf$$.file +if test -d conf$$.dir; then + rm -f conf$$.dir/conf$$.file +else + rm -f conf$$.dir + mkdir conf$$.dir 2>/dev/null +fi +if (echo >conf$$.file) 2>/dev/null; then + if ln -s conf$$.file conf$$ 2>/dev/null; then + as_ln_s='ln -s' + # ... but there are two gotchas: + # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. + # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. + # In both cases, we have to default to `cp -pR'. + ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || + as_ln_s='cp -pR' + elif ln conf$$.file conf$$ 2>/dev/null; then + as_ln_s=ln + else + as_ln_s='cp -pR' + fi +else + as_ln_s='cp -pR' +fi +rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file +rmdir conf$$.dir 2>/dev/null + + +# as_fn_mkdir_p +# ------------- +# Create "$as_dir" as a directory, including parents if necessary. +as_fn_mkdir_p () +{ + + case $as_dir in #( + -*) as_dir=./$as_dir;; + esac + test -d "$as_dir" || eval $as_mkdir_p || { + as_dirs= + while :; do + case $as_dir in #( + *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( + *) as_qdir=$as_dir;; + esac + as_dirs="'$as_qdir' $as_dirs" + as_dir=`$as_dirname -- "$as_dir" || +$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_dir" : 'X\(//\)[^/]' \| \ + X"$as_dir" : 'X\(//\)$' \| \ + X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$as_dir" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + test -d "$as_dir" && break + done + test -z "$as_dirs" || eval "mkdir $as_dirs" + } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir" + + +} # as_fn_mkdir_p +if mkdir -p . 2>/dev/null; then + as_mkdir_p='mkdir -p "$as_dir"' +else + test -d ./-p && rmdir ./-p + as_mkdir_p=false +fi + + +# as_fn_executable_p FILE +# ----------------------- +# Test if FILE is an executable regular file. +as_fn_executable_p () +{ + test -f "$1" && test -x "$1" +} # as_fn_executable_p +as_test_x='test -x' +as_executable_p=as_fn_executable_p + +# Sed expression to map a string onto a valid CPP name. +as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" + +# Sed expression to map a string onto a valid variable name. +as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" + + +exec 6>&1 +## ----------------------------------- ## +## Main body of $CONFIG_STATUS script. ## +## ----------------------------------- ## +_ASEOF +test $as_write_fail = 0 && chmod +x $CONFIG_STATUS || ac_write_fail=1 + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +# Save the log message, to keep $0 and so on meaningful, and to +# report actual input values of CONFIG_FILES etc. instead of their +# values after options handling. +ac_log=" +This file was extended by $as_me, which was +generated by GNU Autoconf 2.69. Invocation command line was + + CONFIG_FILES = $CONFIG_FILES + CONFIG_HEADERS = $CONFIG_HEADERS + CONFIG_LINKS = $CONFIG_LINKS + CONFIG_COMMANDS = $CONFIG_COMMANDS + $ $0 $@ + +on `(hostname || uname -n) 2>/dev/null | sed 1q` +" + +_ACEOF + +case $ac_config_files in *" +"*) set x $ac_config_files; shift; ac_config_files=$*;; +esac + + + +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +# Files that config.status was made for. +config_files="$ac_config_files" + +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +ac_cs_usage="\ +\`$as_me' instantiates files and other configuration actions +from templates according to the current configuration. Unless the files +and actions are specified as TAGs, all are instantiated by default. + +Usage: $0 [OPTION]... [TAG]... + + -h, --help print this help, then exit + -V, --version print version number and configuration settings, then exit + --config print configuration, then exit + -q, --quiet, --silent + do not print progress messages + -d, --debug don't remove temporary files + --recheck update $as_me by reconfiguring in the same conditions + --file=FILE[:TEMPLATE] + instantiate the configuration file FILE + +Configuration files: +$config_files + +Report bugs to the package provider." + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" +ac_cs_version="\\ +config.status +configured by $0, generated by GNU Autoconf 2.69, + with options \\"\$ac_cs_config\\" + +Copyright (C) 2012 Free Software Foundation, Inc. +This config.status script is free software; the Free Software Foundation +gives unlimited permission to copy, distribute and modify it." + +ac_pwd='$ac_pwd' +srcdir='$srcdir' +INSTALL='$INSTALL' +test -n "\$AWK" || AWK=awk +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +# The default lists apply if the user does not specify any file. +ac_need_defaults=: +while test $# != 0 +do + case $1 in + --*=?*) + ac_option=`expr "X$1" : 'X\([^=]*\)='` + ac_optarg=`expr "X$1" : 'X[^=]*=\(.*\)'` + ac_shift=: + ;; + --*=) + ac_option=`expr "X$1" : 'X\([^=]*\)='` + ac_optarg= + ac_shift=: + ;; + *) + ac_option=$1 + ac_optarg=$2 + ac_shift=shift + ;; + esac + + case $ac_option in + # Handling of the options. + -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) + ac_cs_recheck=: ;; + --version | --versio | --versi | --vers | --ver | --ve | --v | -V ) + $as_echo "$ac_cs_version"; exit ;; + --config | --confi | --conf | --con | --co | --c ) + $as_echo "$ac_cs_config"; exit ;; + --debug | --debu | --deb | --de | --d | -d ) + debug=: ;; + --file | --fil | --fi | --f ) + $ac_shift + case $ac_optarg in + *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; + '') as_fn_error $? "missing file argument" ;; + esac + as_fn_append CONFIG_FILES " '$ac_optarg'" + ac_need_defaults=false;; + --he | --h | --help | --hel | -h ) + $as_echo "$ac_cs_usage"; exit ;; + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil | --si | --s) + ac_cs_silent=: ;; + + # This is an error. + -*) as_fn_error $? "unrecognized option: \`$1' +Try \`$0 --help' for more information." ;; + + *) as_fn_append ac_config_targets " $1" + ac_need_defaults=false ;; + + esac + shift +done + +ac_configure_extra_args= + +if $ac_cs_silent; then + exec 6>/dev/null + ac_configure_extra_args="$ac_configure_extra_args --silent" +fi + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +if \$ac_cs_recheck; then + set X $SHELL '$0' $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion + shift + \$as_echo "running CONFIG_SHELL=$SHELL \$*" >&6 + CONFIG_SHELL='$SHELL' + export CONFIG_SHELL + exec "\$@" +fi + +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +exec 5>>config.log +{ + echo + sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX +## Running $as_me. ## +_ASBOX + $as_echo "$ac_log" +} >&5 + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 + +# Handling of arguments. +for ac_config_target in $ac_config_targets +do + case $ac_config_target in + "Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;; + + *) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;; + esac +done + + +# If the user did not use the arguments to specify the items to instantiate, +# then the envvar interface is used. Set only those that are not. +# We use the long form for the default assignment because of an extremely +# bizarre bug on SunOS 4.1.3. +if $ac_need_defaults; then + test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files +fi + +# Have a temporary directory for convenience. Make it in the build tree +# simply because there is no reason against having it here, and in addition, +# creating and moving files from /tmp can sometimes cause problems. +# Hook for its removal unless debugging. +# Note that there is a small window in which the directory will not be cleaned: +# after its creation but before its name has been assigned to `$tmp'. +$debug || +{ + tmp= ac_tmp= + trap 'exit_status=$? + : "${ac_tmp:=$tmp}" + { test ! -d "$ac_tmp" || rm -fr "$ac_tmp"; } && exit $exit_status +' 0 + trap 'as_fn_exit 1' 1 2 13 15 +} +# Create a (secure) tmp directory for tmp files. + +{ + tmp=`(umask 077 && mktemp -d "./confXXXXXX") 2>/dev/null` && + test -d "$tmp" +} || +{ + tmp=./conf$$-$RANDOM + (umask 077 && mkdir "$tmp") +} || as_fn_error $? "cannot create a temporary directory in ." "$LINENO" 5 +ac_tmp=$tmp + +# Set up the scripts for CONFIG_FILES section. +# No need to generate them if there are no CONFIG_FILES. +# This happens for instance with `./config.status config.h'. +if test -n "$CONFIG_FILES"; then + + +ac_cr=`echo X | tr X '\015'` +# On cygwin, bash can eat \r inside `` if the user requested igncr. +# But we know of no other shell where ac_cr would be empty at this +# point, so we can use a bashism as a fallback. +if test "x$ac_cr" = x; then + eval ac_cr=\$\'\\r\' +fi +ac_cs_awk_cr=`$AWK 'BEGIN { print "a\rb" }' /dev/null` +if test "$ac_cs_awk_cr" = "a${ac_cr}b"; then + ac_cs_awk_cr='\\r' +else + ac_cs_awk_cr=$ac_cr +fi + +echo 'BEGIN {' >"$ac_tmp/subs1.awk" && +_ACEOF + + +{ + echo "cat >conf$$subs.awk <<_ACEOF" && + echo "$ac_subst_vars" | sed 's/.*/&!$&$ac_delim/' && + echo "_ACEOF" +} >conf$$subs.sh || + as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 +ac_delim_num=`echo "$ac_subst_vars" | grep -c '^'` +ac_delim='%!_!# ' +for ac_last_try in false false false false false :; do + . ./conf$$subs.sh || + as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 + + ac_delim_n=`sed -n "s/.*$ac_delim\$/X/p" conf$$subs.awk | grep -c X` + if test $ac_delim_n = $ac_delim_num; then + break + elif $ac_last_try; then + as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 + else + ac_delim="$ac_delim!$ac_delim _$ac_delim!! " + fi +done +rm -f conf$$subs.sh + +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +cat >>"\$ac_tmp/subs1.awk" <<\\_ACAWK && +_ACEOF +sed -n ' +h +s/^/S["/; s/!.*/"]=/ +p +g +s/^[^!]*!// +:repl +t repl +s/'"$ac_delim"'$// +t delim +:nl +h +s/\(.\{148\}\)..*/\1/ +t more1 +s/["\\]/\\&/g; s/^/"/; s/$/\\n"\\/ +p +n +b repl +:more1 +s/["\\]/\\&/g; s/^/"/; s/$/"\\/ +p +g +s/.\{148\}// +t nl +:delim +h +s/\(.\{148\}\)..*/\1/ +t more2 +s/["\\]/\\&/g; s/^/"/; s/$/"/ +p +b +:more2 +s/["\\]/\\&/g; s/^/"/; s/$/"\\/ +p +g +s/.\{148\}// +t delim +' >$CONFIG_STATUS || ac_write_fail=1 +rm -f conf$$subs.awk +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +_ACAWK +cat >>"\$ac_tmp/subs1.awk" <<_ACAWK && + for (key in S) S_is_set[key] = 1 + FS = "" + +} +{ + line = $ 0 + nfields = split(line, field, "@") + substed = 0 + len = length(field[1]) + for (i = 2; i < nfields; i++) { + key = field[i] + keylen = length(key) + if (S_is_set[key]) { + value = S[key] + line = substr(line, 1, len) "" value "" substr(line, len + keylen + 3) + len += length(value) + length(field[++i]) + substed = 1 + } else + len += 1 + keylen + } + + print line +} + +_ACAWK +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +if sed "s/$ac_cr//" < /dev/null > /dev/null 2>&1; then + sed "s/$ac_cr\$//; s/$ac_cr/$ac_cs_awk_cr/g" +else + cat +fi < "$ac_tmp/subs1.awk" > "$ac_tmp/subs.awk" \ + || as_fn_error $? "could not setup config files machinery" "$LINENO" 5 +_ACEOF + +# VPATH may cause trouble with some makes, so we remove sole $(srcdir), +# ${srcdir} and @srcdir@ entries from VPATH if srcdir is ".", strip leading and +# trailing colons and then remove the whole line if VPATH becomes empty +# (actually we leave an empty line to preserve line numbers). +if test "x$srcdir" = x.; then + ac_vpsub='/^[ ]*VPATH[ ]*=[ ]*/{ +h +s/// +s/^/:/ +s/[ ]*$/:/ +s/:\$(srcdir):/:/g +s/:\${srcdir}:/:/g +s/:@srcdir@:/:/g +s/^:*// +s/:*$// +x +s/\(=[ ]*\).*/\1/ +G +s/\n// +s/^[^=]*=[ ]*$// +}' +fi + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +fi # test -n "$CONFIG_FILES" + + +eval set X " :F $CONFIG_FILES " +shift +for ac_tag +do + case $ac_tag in + :[FHLC]) ac_mode=$ac_tag; continue;; + esac + case $ac_mode$ac_tag in + :[FHL]*:*);; + :L* | :C*:*) as_fn_error $? "invalid tag \`$ac_tag'" "$LINENO" 5;; + :[FH]-) ac_tag=-:-;; + :[FH]*) ac_tag=$ac_tag:$ac_tag.in;; + esac + ac_save_IFS=$IFS + IFS=: + set x $ac_tag + IFS=$ac_save_IFS + shift + ac_file=$1 + shift + + case $ac_mode in + :L) ac_source=$1;; + :[FH]) + ac_file_inputs= + for ac_f + do + case $ac_f in + -) ac_f="$ac_tmp/stdin";; + *) # Look for the file first in the build tree, then in the source tree + # (if the path is not absolute). The absolute path cannot be DOS-style, + # because $ac_f cannot contain `:'. + test -f "$ac_f" || + case $ac_f in + [\\/$]*) false;; + *) test -f "$srcdir/$ac_f" && ac_f="$srcdir/$ac_f";; + esac || + as_fn_error 1 "cannot find input file: \`$ac_f'" "$LINENO" 5;; + esac + case $ac_f in *\'*) ac_f=`$as_echo "$ac_f" | sed "s/'/'\\\\\\\\''/g"`;; esac + as_fn_append ac_file_inputs " '$ac_f'" + done + + # Let's still pretend it is `configure' which instantiates (i.e., don't + # use $as_me), people would be surprised to read: + # /* config.h. Generated by config.status. */ + configure_input='Generated from '` + $as_echo "$*" | sed 's|^[^:]*/||;s|:[^:]*/|, |g' + `' by configure.' + if test x"$ac_file" != x-; then + configure_input="$ac_file. $configure_input" + { $as_echo "$as_me:${as_lineno-$LINENO}: creating $ac_file" >&5 +$as_echo "$as_me: creating $ac_file" >&6;} + fi + # Neutralize special characters interpreted by sed in replacement strings. + case $configure_input in #( + *\&* | *\|* | *\\* ) + ac_sed_conf_input=`$as_echo "$configure_input" | + sed 's/[\\\\&|]/\\\\&/g'`;; #( + *) ac_sed_conf_input=$configure_input;; + esac + + case $ac_tag in + *:-:* | *:-) cat >"$ac_tmp/stdin" \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 ;; + esac + ;; + esac + + ac_dir=`$as_dirname -- "$ac_file" || +$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$ac_file" : 'X\(//\)[^/]' \| \ + X"$ac_file" : 'X\(//\)$' \| \ + X"$ac_file" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$ac_file" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + as_dir="$ac_dir"; as_fn_mkdir_p + ac_builddir=. + +case "$ac_dir" in +.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; +*) + ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'` + # A ".." for each directory in $ac_dir_suffix. + ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` + case $ac_top_builddir_sub in + "") ac_top_builddir_sub=. ac_top_build_prefix= ;; + *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; + esac ;; +esac +ac_abs_top_builddir=$ac_pwd +ac_abs_builddir=$ac_pwd$ac_dir_suffix +# for backward compatibility: +ac_top_builddir=$ac_top_build_prefix + +case $srcdir in + .) # We are building in place. + ac_srcdir=. + ac_top_srcdir=$ac_top_builddir_sub + ac_abs_top_srcdir=$ac_pwd ;; + [\\/]* | ?:[\\/]* ) # Absolute name. + ac_srcdir=$srcdir$ac_dir_suffix; + ac_top_srcdir=$srcdir + ac_abs_top_srcdir=$srcdir ;; + *) # Relative name. + ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix + ac_top_srcdir=$ac_top_build_prefix$srcdir + ac_abs_top_srcdir=$ac_pwd/$srcdir ;; +esac +ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix + + + case $ac_mode in + :F) + # + # CONFIG_FILE + # + + case $INSTALL in + [\\/$]* | ?:[\\/]* ) ac_INSTALL=$INSTALL ;; + *) ac_INSTALL=$ac_top_build_prefix$INSTALL ;; + esac +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +# If the template does not know about datarootdir, expand it. +# FIXME: This hack should be removed a few years after 2.60. +ac_datarootdir_hack=; ac_datarootdir_seen= +ac_sed_dataroot=' +/datarootdir/ { + p + q +} +/@datadir@/p +/@docdir@/p +/@infodir@/p +/@localedir@/p +/@mandir@/p' +case `eval "sed -n \"\$ac_sed_dataroot\" $ac_file_inputs"` in +*datarootdir*) ac_datarootdir_seen=yes;; +*@datadir@*|*@docdir@*|*@infodir@*|*@localedir@*|*@mandir@*) + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5 +$as_echo "$as_me: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&2;} +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 + ac_datarootdir_hack=' + s&@datadir@&$datadir&g + s&@docdir@&$docdir&g + s&@infodir@&$infodir&g + s&@localedir@&$localedir&g + s&@mandir@&$mandir&g + s&\\\${datarootdir}&$datarootdir&g' ;; +esac +_ACEOF + +# Neutralize VPATH when `$srcdir' = `.'. +# Shell code in configure.ac might set extrasub. +# FIXME: do we really want to maintain this feature? +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +ac_sed_extra="$ac_vpsub +$extrasub +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +:t +/@[a-zA-Z_][a-zA-Z_0-9]*@/!b +s|@configure_input@|$ac_sed_conf_input|;t t +s&@top_builddir@&$ac_top_builddir_sub&;t t +s&@top_build_prefix@&$ac_top_build_prefix&;t t +s&@srcdir@&$ac_srcdir&;t t +s&@abs_srcdir@&$ac_abs_srcdir&;t t +s&@top_srcdir@&$ac_top_srcdir&;t t +s&@abs_top_srcdir@&$ac_abs_top_srcdir&;t t +s&@builddir@&$ac_builddir&;t t +s&@abs_builddir@&$ac_abs_builddir&;t t +s&@abs_top_builddir@&$ac_abs_top_builddir&;t t +s&@INSTALL@&$ac_INSTALL&;t t +$ac_datarootdir_hack +" +eval sed \"\$ac_sed_extra\" "$ac_file_inputs" | $AWK -f "$ac_tmp/subs.awk" \ + >$ac_tmp/out || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + +test -z "$ac_datarootdir_hack$ac_datarootdir_seen" && + { ac_out=`sed -n '/\${datarootdir}/p' "$ac_tmp/out"`; test -n "$ac_out"; } && + { ac_out=`sed -n '/^[ ]*datarootdir[ ]*:*=/p' \ + "$ac_tmp/out"`; test -z "$ac_out"; } && + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file contains a reference to the variable \`datarootdir' +which seems to be undefined. Please make sure it is defined" >&5 +$as_echo "$as_me: WARNING: $ac_file contains a reference to the variable \`datarootdir' +which seems to be undefined. Please make sure it is defined" >&2;} + + rm -f "$ac_tmp/stdin" + case $ac_file in + -) cat "$ac_tmp/out" && rm -f "$ac_tmp/out";; + *) rm -f "$ac_file" && mv "$ac_tmp/out" "$ac_file";; + esac \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + ;; + + + + esac + +done # for ac_tag + + +as_fn_exit 0 +_ACEOF +ac_clean_files=$ac_clean_files_save + +test $ac_write_fail = 0 || + as_fn_error $? "write failure creating $CONFIG_STATUS" "$LINENO" 5 + + +# configure is writing to config.log, and then calls config.status. +# config.status does its own redirection, appending to config.log. +# Unfortunately, on DOS this fails, as config.log is still kept open +# by configure, so config.status won't be able to write to it; its +# output is simply discarded. So we exec the FD to /dev/null, +# effectively closing config.log, so it can be properly (re)opened and +# appended to by config.status. When coming back to configure, we +# need to make the FD available again. +if test "$no_create" != yes; then + ac_cs_success=: + ac_config_status_args= + test "$silent" = yes && + ac_config_status_args="$ac_config_status_args --quiet" + exec 5>/dev/null + $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false + exec 5>>config.log + # Use ||, not &&, to avoid exiting from the if with $? = 1, which + # would make configure fail if this is the last instruction. + $ac_cs_success || as_fn_exit 1 +fi +if test -n "$ac_unrecognized_opts" && test "$enable_option_checking" != no; then + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: unrecognized options: $ac_unrecognized_opts" >&5 +$as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2;} +fi + diff --git a/tools/gzinject/configure.ac b/tools/gzinject/configure.ac new file mode 100644 index 000000000..bf8d7008d --- /dev/null +++ b/tools/gzinject/configure.ac @@ -0,0 +1,14 @@ +AC_PREREQ([2.69]) +AC_INIT +AC_PREFIX_DEFAULT([/usr/local]) +AC_PROG_CC +AC_PROG_INSTALL +CFLAGS="-O3" +AC_CHECK_HEADERS([wmmintrin.h], + [ + CFLAGS+=" -maes -msse -msse2" + CPPFLAGS="-DFASTAES" + ] +) +AC_CONFIG_FILES([Makefile]) +AC_OUTPUT diff --git a/tools/gzinject/install-sh b/tools/gzinject/install-sh new file mode 100644 index 000000000..0360b79e7 --- /dev/null +++ b/tools/gzinject/install-sh @@ -0,0 +1,501 @@ +#!/bin/sh +# install - install a program, script, or datafile + +scriptversion=2016-01-11.22; # UTC + +# This originates from X11R5 (mit/util/scripts/install.sh), which was +# later released in X11R6 (xc/config/util/install.sh) with the +# following copyright and license. +# +# Copyright (C) 1994 X Consortium +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to +# deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +# sell copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN +# AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC- +# TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +# Except as contained in this notice, the name of the X Consortium shall not +# be used in advertising or otherwise to promote the sale, use or other deal- +# ings in this Software without prior written authorization from the X Consor- +# tium. +# +# +# FSF changes to this file are in the public domain. +# +# Calling this script install-sh is preferred over install.sh, to prevent +# 'make' implicit rules from creating a file called install from it +# when there is no Makefile. +# +# This script is compatible with the BSD install script, but was written +# from scratch. + +tab=' ' +nl=' +' +IFS=" $tab$nl" + +# Set DOITPROG to "echo" to test this script. + +doit=${DOITPROG-} +doit_exec=${doit:-exec} + +# Put in absolute file names if you don't have them in your path; +# or use environment vars. + +chgrpprog=${CHGRPPROG-chgrp} +chmodprog=${CHMODPROG-chmod} +chownprog=${CHOWNPROG-chown} +cmpprog=${CMPPROG-cmp} +cpprog=${CPPROG-cp} +mkdirprog=${MKDIRPROG-mkdir} +mvprog=${MVPROG-mv} +rmprog=${RMPROG-rm} +stripprog=${STRIPPROG-strip} + +posix_mkdir= + +# Desired mode of installed file. +mode=0755 + +chgrpcmd= +chmodcmd=$chmodprog +chowncmd= +mvcmd=$mvprog +rmcmd="$rmprog -f" +stripcmd= + +src= +dst= +dir_arg= +dst_arg= + +copy_on_change=false +is_target_a_directory=possibly + +usage="\ +Usage: $0 [OPTION]... [-T] SRCFILE DSTFILE + or: $0 [OPTION]... SRCFILES... DIRECTORY + or: $0 [OPTION]... -t DIRECTORY SRCFILES... + or: $0 [OPTION]... -d DIRECTORIES... + +In the 1st form, copy SRCFILE to DSTFILE. +In the 2nd and 3rd, copy all SRCFILES to DIRECTORY. +In the 4th, create DIRECTORIES. + +Options: + --help display this help and exit. + --version display version info and exit. + + -c (ignored) + -C install only if different (preserve the last data modification time) + -d create directories instead of installing files. + -g GROUP $chgrpprog installed files to GROUP. + -m MODE $chmodprog installed files to MODE. + -o USER $chownprog installed files to USER. + -s $stripprog installed files. + -t DIRECTORY install into DIRECTORY. + -T report an error if DSTFILE is a directory. + +Environment variables override the default commands: + CHGRPPROG CHMODPROG CHOWNPROG CMPPROG CPPROG MKDIRPROG MVPROG + RMPROG STRIPPROG +" + +while test $# -ne 0; do + case $1 in + -c) ;; + + -C) copy_on_change=true;; + + -d) dir_arg=true;; + + -g) chgrpcmd="$chgrpprog $2" + shift;; + + --help) echo "$usage"; exit $?;; + + -m) mode=$2 + case $mode in + *' '* | *"$tab"* | *"$nl"* | *'*'* | *'?'* | *'['*) + echo "$0: invalid mode: $mode" >&2 + exit 1;; + esac + shift;; + + -o) chowncmd="$chownprog $2" + shift;; + + -s) stripcmd=$stripprog;; + + -t) + is_target_a_directory=always + dst_arg=$2 + # Protect names problematic for 'test' and other utilities. + case $dst_arg in + -* | [=\(\)!]) dst_arg=./$dst_arg;; + esac + shift;; + + -T) is_target_a_directory=never;; + + --version) echo "$0 $scriptversion"; exit $?;; + + --) shift + break;; + + -*) echo "$0: invalid option: $1" >&2 + exit 1;; + + *) break;; + esac + shift +done + +# We allow the use of options -d and -T together, by making -d +# take the precedence; this is for compatibility with GNU install. + +if test -n "$dir_arg"; then + if test -n "$dst_arg"; then + echo "$0: target directory not allowed when installing a directory." >&2 + exit 1 + fi +fi + +if test $# -ne 0 && test -z "$dir_arg$dst_arg"; then + # When -d is used, all remaining arguments are directories to create. + # When -t is used, the destination is already specified. + # Otherwise, the last argument is the destination. Remove it from $@. + for arg + do + if test -n "$dst_arg"; then + # $@ is not empty: it contains at least $arg. + set fnord "$@" "$dst_arg" + shift # fnord + fi + shift # arg + dst_arg=$arg + # Protect names problematic for 'test' and other utilities. + case $dst_arg in + -* | [=\(\)!]) dst_arg=./$dst_arg;; + esac + done +fi + +if test $# -eq 0; then + if test -z "$dir_arg"; then + echo "$0: no input file specified." >&2 + exit 1 + fi + # It's OK to call 'install-sh -d' without argument. + # This can happen when creating conditional directories. + exit 0 +fi + +if test -z "$dir_arg"; then + if test $# -gt 1 || test "$is_target_a_directory" = always; then + if test ! -d "$dst_arg"; then + echo "$0: $dst_arg: Is not a directory." >&2 + exit 1 + fi + fi +fi + +if test -z "$dir_arg"; then + do_exit='(exit $ret); exit $ret' + trap "ret=129; $do_exit" 1 + trap "ret=130; $do_exit" 2 + trap "ret=141; $do_exit" 13 + trap "ret=143; $do_exit" 15 + + # Set umask so as not to create temps with too-generous modes. + # However, 'strip' requires both read and write access to temps. + case $mode in + # Optimize common cases. + *644) cp_umask=133;; + *755) cp_umask=22;; + + *[0-7]) + if test -z "$stripcmd"; then + u_plus_rw= + else + u_plus_rw='% 200' + fi + cp_umask=`expr '(' 777 - $mode % 1000 ')' $u_plus_rw`;; + *) + if test -z "$stripcmd"; then + u_plus_rw= + else + u_plus_rw=,u+rw + fi + cp_umask=$mode$u_plus_rw;; + esac +fi + +for src +do + # Protect names problematic for 'test' and other utilities. + case $src in + -* | [=\(\)!]) src=./$src;; + esac + + if test -n "$dir_arg"; then + dst=$src + dstdir=$dst + test -d "$dstdir" + dstdir_status=$? + else + + # Waiting for this to be detected by the "$cpprog $src $dsttmp" command + # might cause directories to be created, which would be especially bad + # if $src (and thus $dsttmp) contains '*'. + if test ! -f "$src" && test ! -d "$src"; then + echo "$0: $src does not exist." >&2 + exit 1 + fi + + if test -z "$dst_arg"; then + echo "$0: no destination specified." >&2 + exit 1 + fi + dst=$dst_arg + + # If destination is a directory, append the input filename; won't work + # if double slashes aren't ignored. + if test -d "$dst"; then + if test "$is_target_a_directory" = never; then + echo "$0: $dst_arg: Is a directory" >&2 + exit 1 + fi + dstdir=$dst + dst=$dstdir/`basename "$src"` + dstdir_status=0 + else + dstdir=`dirname "$dst"` + test -d "$dstdir" + dstdir_status=$? + fi + fi + + obsolete_mkdir_used=false + + if test $dstdir_status != 0; then + case $posix_mkdir in + '') + # Create intermediate dirs using mode 755 as modified by the umask. + # This is like FreeBSD 'install' as of 1997-10-28. + umask=`umask` + case $stripcmd.$umask in + # Optimize common cases. + *[2367][2367]) mkdir_umask=$umask;; + .*0[02][02] | .[02][02] | .[02]) mkdir_umask=22;; + + *[0-7]) + mkdir_umask=`expr $umask + 22 \ + - $umask % 100 % 40 + $umask % 20 \ + - $umask % 10 % 4 + $umask % 2 + `;; + *) mkdir_umask=$umask,go-w;; + esac + + # With -d, create the new directory with the user-specified mode. + # Otherwise, rely on $mkdir_umask. + if test -n "$dir_arg"; then + mkdir_mode=-m$mode + else + mkdir_mode= + fi + + posix_mkdir=false + case $umask in + *[123567][0-7][0-7]) + # POSIX mkdir -p sets u+wx bits regardless of umask, which + # is incompatible with FreeBSD 'install' when (umask & 300) != 0. + ;; + *) + tmpdir=${TMPDIR-/tmp}/ins$RANDOM-$$ + trap 'ret=$?; rmdir "$tmpdir/d" "$tmpdir" 2>/dev/null; exit $ret' 0 + + if (umask $mkdir_umask && + exec $mkdirprog $mkdir_mode -p -- "$tmpdir/d") >/dev/null 2>&1 + then + if test -z "$dir_arg" || { + # Check for POSIX incompatibilities with -m. + # HP-UX 11.23 and IRIX 6.5 mkdir -m -p sets group- or + # other-writable bit of parent directory when it shouldn't. + # FreeBSD 6.1 mkdir -m -p sets mode of existing directory. + ls_ld_tmpdir=`ls -ld "$tmpdir"` + case $ls_ld_tmpdir in + d????-?r-*) different_mode=700;; + d????-?--*) different_mode=755;; + *) false;; + esac && + $mkdirprog -m$different_mode -p -- "$tmpdir" && { + ls_ld_tmpdir_1=`ls -ld "$tmpdir"` + test "$ls_ld_tmpdir" = "$ls_ld_tmpdir_1" + } + } + then posix_mkdir=: + fi + rmdir "$tmpdir/d" "$tmpdir" + else + # Remove any dirs left behind by ancient mkdir implementations. + rmdir ./$mkdir_mode ./-p ./-- 2>/dev/null + fi + trap '' 0;; + esac;; + esac + + if + $posix_mkdir && ( + umask $mkdir_umask && + $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir" + ) + then : + else + + # The umask is ridiculous, or mkdir does not conform to POSIX, + # or it failed possibly due to a race condition. Create the + # directory the slow way, step by step, checking for races as we go. + + case $dstdir in + /*) prefix='/';; + [-=\(\)!]*) prefix='./';; + *) prefix='';; + esac + + oIFS=$IFS + IFS=/ + set -f + set fnord $dstdir + shift + set +f + IFS=$oIFS + + prefixes= + + for d + do + test X"$d" = X && continue + + prefix=$prefix$d + if test -d "$prefix"; then + prefixes= + else + if $posix_mkdir; then + (umask=$mkdir_umask && + $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir") && break + # Don't fail if two instances are running concurrently. + test -d "$prefix" || exit 1 + else + case $prefix in + *\'*) qprefix=`echo "$prefix" | sed "s/'/'\\\\\\\\''/g"`;; + *) qprefix=$prefix;; + esac + prefixes="$prefixes '$qprefix'" + fi + fi + prefix=$prefix/ + done + + if test -n "$prefixes"; then + # Don't fail if two instances are running concurrently. + (umask $mkdir_umask && + eval "\$doit_exec \$mkdirprog $prefixes") || + test -d "$dstdir" || exit 1 + obsolete_mkdir_used=true + fi + fi + fi + + if test -n "$dir_arg"; then + { test -z "$chowncmd" || $doit $chowncmd "$dst"; } && + { test -z "$chgrpcmd" || $doit $chgrpcmd "$dst"; } && + { test "$obsolete_mkdir_used$chowncmd$chgrpcmd" = false || + test -z "$chmodcmd" || $doit $chmodcmd $mode "$dst"; } || exit 1 + else + + # Make a couple of temp file names in the proper directory. + dsttmp=$dstdir/_inst.$$_ + rmtmp=$dstdir/_rm.$$_ + + # Trap to clean up those temp files at exit. + trap 'ret=$?; rm -f "$dsttmp" "$rmtmp" && exit $ret' 0 + + # Copy the file name to the temp name. + (umask $cp_umask && $doit_exec $cpprog "$src" "$dsttmp") && + + # and set any options; do chmod last to preserve setuid bits. + # + # If any of these fail, we abort the whole thing. If we want to + # ignore errors from any of these, just make sure not to ignore + # errors from the above "$doit $cpprog $src $dsttmp" command. + # + { test -z "$chowncmd" || $doit $chowncmd "$dsttmp"; } && + { test -z "$chgrpcmd" || $doit $chgrpcmd "$dsttmp"; } && + { test -z "$stripcmd" || $doit $stripcmd "$dsttmp"; } && + { test -z "$chmodcmd" || $doit $chmodcmd $mode "$dsttmp"; } && + + # If -C, don't bother to copy if it wouldn't change the file. + if $copy_on_change && + old=`LC_ALL=C ls -dlL "$dst" 2>/dev/null` && + new=`LC_ALL=C ls -dlL "$dsttmp" 2>/dev/null` && + set -f && + set X $old && old=:$2:$4:$5:$6 && + set X $new && new=:$2:$4:$5:$6 && + set +f && + test "$old" = "$new" && + $cmpprog "$dst" "$dsttmp" >/dev/null 2>&1 + then + rm -f "$dsttmp" + else + # Rename the file to the real destination. + $doit $mvcmd -f "$dsttmp" "$dst" 2>/dev/null || + + # The rename failed, perhaps because mv can't rename something else + # to itself, or perhaps because mv is so ancient that it does not + # support -f. + { + # Now remove or move aside any old file at destination location. + # We try this two ways since rm can't unlink itself on some + # systems and the destination file might be busy for other + # reasons. In this case, the final cleanup might fail but the new + # file should still install successfully. + { + test ! -f "$dst" || + $doit $rmcmd -f "$dst" 2>/dev/null || + { $doit $mvcmd -f "$dst" "$rmtmp" 2>/dev/null && + { $doit $rmcmd -f "$rmtmp" 2>/dev/null; :; } + } || + { echo "$0: cannot unlink or rename $dst" >&2 + (exit 1); exit 1 + } + } && + + # Now rename the file to the real destination. + $doit $mvcmd "$dsttmp" "$dst" + } + fi || exit 1 + + trap '' 0 + fi +done + +# Local variables: +# eval: (add-hook 'write-file-hooks 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-time-zone: "UTC0" +# time-stamp-end: "; # UTC" +# End: diff --git a/tools/gzinject/patches/NACE.gzi b/tools/gzinject/patches/NACE.gzi new file mode 100644 index 000000000..30ab26f55 --- /dev/null +++ b/tools/gzinject/patches/NACE.gzi @@ -0,0 +1,6 @@ +# default gz patches for NACE +0000 00000000 00000001 +# use 8MB memory +0304 00002EB0 60000000 +# allocate 32MB for rom +0304 0005BFD4 3C807200 diff --git a/tools/gzinject/patches/NACJ.gzi b/tools/gzinject/patches/NACJ.gzi new file mode 100644 index 000000000..28415cb8f --- /dev/null +++ b/tools/gzinject/patches/NACJ.gzi @@ -0,0 +1,6 @@ +# default gz patches for NACJ +0000 00000000 00000001 +# use 8MB memory +0304 00002EB0 60000000 +# allocate 32MB for rom +0304 0005BF44 3C807200 diff --git a/tools/gzinject/patches/NKZE.gzi b/tools/gzinject/patches/NKZE.gzi new file mode 100644 index 000000000..440cbee08 --- /dev/null +++ b/tools/gzinject/patches/NKZE.gzi @@ -0,0 +1,16 @@ +# NKZE kz-NZSE +0000 00000000 00000001 +# decompress content1 +0100 00000000 00000000 +# apply 12MB fixes +0304 00010B58 3C8000C0 +0304 0004BD20 67E47000 +0304 0004BC80 3CA00100 +# apply controller remappings +0302 00148514 00000800 +0302 00148518 00000400 +0302 0014851C 00000200 +0302 00148520 00000100 +0302 00148528 00000020 +# compress content1 +0200 00000000 00000000 \ No newline at end of file diff --git a/tools/gzinject/patches/NKZJ.gzi b/tools/gzinject/patches/NKZJ.gzi new file mode 100644 index 000000000..d2ee665dd --- /dev/null +++ b/tools/gzinject/patches/NKZJ.gzi @@ -0,0 +1,16 @@ +# NKZJ kz-NZSJ +0000 00000000 00000001 +# decompress content1 +0100 00000000 00000000 +# apply 12MB fixes +0304 00010B58 3C8000C0 +0304 0004BD94 67E47000 +0304 0004BCF4 3CA00100 +# apply controller remappings +0302 0014AA54 00000800 +0302 0014AA58 00000400 +0302 0014AA5C 00000200 +0302 0014AA60 00000100 +0302 0014AA68 00000020 +# compress content1 +0200 00000000 00000000 \ No newline at end of file diff --git a/tools/gzinject/patches/gz_default_remap.gzi b/tools/gzinject/patches/gz_default_remap.gzi new file mode 100644 index 000000000..22b857ac4 --- /dev/null +++ b/tools/gzinject/patches/gz_default_remap.gzi @@ -0,0 +1,9 @@ +# gz standard remapping for NACE and NACJ +0000 00000000 00000001 +# apply d-pad remappings +0302 0016BAF0 00000800 +0302 0016BAF4 00000400 +0302 0016BAF8 00000200 +0302 0016BAFC 00000100 +# apply c-stick remapping +0302 0016BB04 00000020 diff --git a/tools/gzinject/patches/gz_raphnet_remap.gzi b/tools/gzinject/patches/gz_raphnet_remap.gzi new file mode 100644 index 000000000..61e1a968c --- /dev/null +++ b/tools/gzinject/patches/gz_raphnet_remap.gzi @@ -0,0 +1,9 @@ +# gz raphnet remapping for NACE and NACJ +0000 00000000 00000001 +# apply d-pad remappings +0302 0016BAF0 00000800 +0302 0016BAF4 00000400 +0302 0016BAF8 00000200 +0302 0016BAFC 00000100 +# apply z-trigger remapping +0302 0016BAD8 00000020 diff --git a/tools/gzinject/patches/hb_NACE.gzi b/tools/gzinject/patches/hb_NACE.gzi new file mode 100644 index 000000000..dccad2e18 --- /dev/null +++ b/tools/gzinject/patches/hb_NACE.gzi @@ -0,0 +1,15 @@ +# homeboy patches for NACE +0000 00000000 00000001 +# resize MEM2 heap for homeboy +0302 00085732 00009010 +0304 00085738 60000000 +0304 00085744 60000000 +# homeboy hook +0304 00002EA8 3c809000 +0304 00002EAC 38840800 +0304 00002EB0 7c8903a6 +0304 00002EB4 80630018 +0304 00002EB8 4e800421 +# Change iOS to 61 +0000 00000000 00000064 +0301 0000018B 0000003D \ No newline at end of file diff --git a/tools/gzinject/patches/hb_NACJ.gzi b/tools/gzinject/patches/hb_NACJ.gzi new file mode 100644 index 000000000..0e852695d --- /dev/null +++ b/tools/gzinject/patches/hb_NACJ.gzi @@ -0,0 +1,15 @@ +# homeboy patches for NACJ +0000 00000000 00000001 +# resize MEM2 heap for homeboy +0302 00085726 00009010 +0304 0008572C 60000000 +0304 00085738 60000000 +# homeboy hook +0304 00002EA8 3c809000 +0304 00002EAC 38840800 +0304 00002EB0 7c8903a6 +0304 00002EB4 80630018 +0304 00002EB8 4e800421 +# Change iOS to 61 +0000 00000000 00000064 +0301 0000018B 0000003D \ No newline at end of file diff --git a/tools/gzinject/patches/ootr_dpad_remap.gzi b/tools/gzinject/patches/ootr_dpad_remap.gzi new file mode 100644 index 000000000..57fcb9256 --- /dev/null +++ b/tools/gzinject/patches/ootr_dpad_remap.gzi @@ -0,0 +1,6 @@ +# ootr remapping for NACE and NACJ +0000 00000000 00000001 +# apply d-pad remappings +0302 0016BAF4 00000400 +0302 0016BAF8 00000200 +0302 0016BAFC 00000100 diff --git a/tools/gzinject/src/aes.c b/tools/gzinject/src/aes.c new file mode 100644 index 000000000..f7701f0a8 --- /dev/null +++ b/tools/gzinject/src/aes.c @@ -0,0 +1,567 @@ +/* + +This is an implementation of the AES algorithm, specifically ECB, CTR and CBC mode. +Block size can be chosen in aes.h - available choices are AES128, AES192, AES256. + +The implementation is verified against the test vectors in: +National Institute of Standards and Technology Special Publication 800-38A 2001 ED + +ECB-AES128 +---------- + +plain-text: +6bc1bee22e409f96e93d7e117393172a +ae2d8a571e03ac9c9eb76fac45af8e51 +30c81c46a35ce411e5fbc1191a0a52ef +f69f2445df4f9b17ad2b417be66c3710 + +key: +2b7e151628aed2a6abf7158809cf4f3c + +resulting cipher +3ad77bb40d7a3660a89ecaf32466ef97 +f5d3d58503b9699de785895a96fdbaaf +43b1cd7f598ece23881b00e3ed030688 +7b0c785e27e8ad3f8223207104725dd4 + + +NOTE: String length must be evenly divisible by 16byte (str_len % 16 == 0) +You should pad the end of the string with zeros if this is not the case. +For AES192/256 the key size is proportionally larger. + +*/ + + +/*****************************************************************************/ +/* Includes: */ +/*****************************************************************************/ +#include +#include // CBC mode, for memset +#include "aes.h" + +/*****************************************************************************/ +/* Defines: */ +/*****************************************************************************/ +// The number of columns comprising a state in AES. This is a constant in AES. Value=4 +#define Nb 4 + +#if defined(AES256) && (AES256 == 1) +#define Nk 8 +#define Nr 14 +#elif defined(AES192) && (AES192 == 1) +#define Nk 6 +#define Nr 12 +#else +#define Nk 4 // The number of 32 bit words in a key. +#define Nr 10 // The number of rounds in AES Cipher. +#endif + +// jcallan@github points out that declaring Multiply as a function +// reduces code size considerably with the Keil ARM compiler. +// See this link for more information: https://github.com/kokke/tiny-AES-C/pull/3 +#ifndef MULTIPLY_AS_A_FUNCTION +#define MULTIPLY_AS_A_FUNCTION 0 +#endif + + + + +/*****************************************************************************/ +/* Private variables: */ +/*****************************************************************************/ +// state - array holding the intermediate results during decryption. +typedef uint8_t state_t[4][4]; + + + +// The lookup-tables are marked const so they can be placed in read-only storage instead of RAM +// The numbers below can be computed dynamically trading ROM for RAM - +// This can be useful in (embedded) bootloader applications, where ROM is often limited. +static const uint8_t sbox[256] = { + //0 1 2 3 4 5 6 7 8 9 A B C D E F + 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76, + 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, + 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15, + 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75, + 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84, + 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, + 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, + 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, + 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, + 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, + 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, + 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, + 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, + 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, + 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, + 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 }; + +static const uint8_t rsbox[256] = { + 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb, + 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb, + 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e, + 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25, + 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, + 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84, + 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06, + 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b, + 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73, + 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e, + 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, + 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4, + 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f, + 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef, + 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61, + 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d }; + +// The round constant word array, Rcon[i], contains the values given by +// x to the power (i-1) being powers of x (x is denoted as {02}) in the field GF(2^8) +static const uint8_t Rcon[11] = { + 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36 }; + +/* +* Jordan Goulder points out in PR #12 (https://github.com/kokke/tiny-AES-C/pull/12), +* that you can remove most of the elements in the Rcon array, because they are unused. +* +* From Wikipedia's article on the Rijndael key schedule @ https://en.wikipedia.org/wiki/Rijndael_key_schedule#Rcon +* +* "Only the first some of these constants are actually used – up to rcon[10] for AES-128 (as 11 round keys are needed), +* up to rcon[8] for AES-192, up to rcon[7] for AES-256. rcon[0] is not used in AES algorithm." +*/ + + +/*****************************************************************************/ +/* Private functions: */ +/*****************************************************************************/ +/* +static uint8_t getSBoxValue(uint8_t num) +{ +return sbox[num]; +} +*/ +#define getSBoxValue(num) (sbox[(num)]) +/* +static uint8_t getSBoxInvert(uint8_t num) +{ +return rsbox[num]; +} +*/ +#define getSBoxInvert(num) (rsbox[(num)]) + +// This function produces Nb(Nr+1) round keys. The round keys are used in each round to decrypt the states. +static void KeyExpansion(uint8_t* RoundKey, const uint8_t* Key) +{ + unsigned i, j, k; + uint8_t tempa[4]; // Used for the column/row operations + + // The first round key is the key itself. + for (i = 0; i < Nk; ++i) + { + RoundKey[(i * 4) + 0] = Key[(i * 4) + 0]; + RoundKey[(i * 4) + 1] = Key[(i * 4) + 1]; + RoundKey[(i * 4) + 2] = Key[(i * 4) + 2]; + RoundKey[(i * 4) + 3] = Key[(i * 4) + 3]; + } + + // All other round keys are found from the previous round keys. + for (i = Nk; i < Nb * (Nr + 1); ++i) + { + { + k = (i - 1) * 4; + tempa[0] = RoundKey[k + 0]; + tempa[1] = RoundKey[k + 1]; + tempa[2] = RoundKey[k + 2]; + tempa[3] = RoundKey[k + 3]; + + } + + if (i % Nk == 0) + { + // This function shifts the 4 bytes in a word to the left once. + // [a0,a1,a2,a3] becomes [a1,a2,a3,a0] + + // Function RotWord() + { + k = tempa[0]; + tempa[0] = tempa[1]; + tempa[1] = tempa[2]; + tempa[2] = tempa[3]; + tempa[3] = k; + } + + // SubWord() is a function that takes a four-byte input word and + // applies the S-box to each of the four bytes to produce an output word. + + // Function Subword() + { + tempa[0] = getSBoxValue(tempa[0]); + tempa[1] = getSBoxValue(tempa[1]); + tempa[2] = getSBoxValue(tempa[2]); + tempa[3] = getSBoxValue(tempa[3]); + } + + tempa[0] = tempa[0] ^ Rcon[i / Nk]; + } +#if defined(AES256) && (AES256 == 1) + if (i % Nk == 4) + { + // Function Subword() + { + tempa[0] = getSBoxValue(tempa[0]); + tempa[1] = getSBoxValue(tempa[1]); + tempa[2] = getSBoxValue(tempa[2]); + tempa[3] = getSBoxValue(tempa[3]); + } + } +#endif + j = i * 4; k = (i - Nk) * 4; + RoundKey[j + 0] = RoundKey[k + 0] ^ tempa[0]; + RoundKey[j + 1] = RoundKey[k + 1] ^ tempa[1]; + RoundKey[j + 2] = RoundKey[k + 2] ^ tempa[2]; + RoundKey[j + 3] = RoundKey[k + 3] ^ tempa[3]; + } +} + +void AES_init_ctx(struct AES_ctx* ctx, const uint8_t* key) +{ + KeyExpansion(ctx->RoundKey, key); +} +#if defined(CBC) && (CBC == 1) +void AES_init_ctx_iv(struct AES_ctx* ctx, const uint8_t* key, const uint8_t* iv) +{ + KeyExpansion(ctx->RoundKey, key); + memcpy(ctx->Iv, iv, AES_BLOCKLEN); +} +void AES_ctx_set_iv(struct AES_ctx* ctx, const uint8_t* iv) +{ + memcpy(ctx->Iv, iv, AES_BLOCKLEN); +} +#endif + +// This function adds the round key to state. +// The round key is added to the state by an XOR function. +static void AddRoundKey(uint8_t round, state_t* state, uint8_t* RoundKey) +{ + uint8_t i, j; + for (i = 0; i < 4; ++i) + { + for (j = 0; j < 4; ++j) + { + (*state)[i][j] ^= RoundKey[(round * Nb * 4) + (i * Nb) + j]; + } + } +} + +// The SubBytes Function Substitutes the values in the +// state matrix with values in an S-box. +static void SubBytes(state_t* state) +{ + uint8_t i, j; + for (i = 0; i < 4; ++i) + { + for (j = 0; j < 4; ++j) + { + (*state)[j][i] = getSBoxValue((*state)[j][i]); + } + } +} + +// The ShiftRows() function shifts the rows in the state to the left. +// Each row is shifted with different offset. +// Offset = Row number. So the first row is not shifted. +static void ShiftRows(state_t* state) +{ + uint8_t temp; + + // Rotate first row 1 columns to left + temp = (*state)[0][1]; + (*state)[0][1] = (*state)[1][1]; + (*state)[1][1] = (*state)[2][1]; + (*state)[2][1] = (*state)[3][1]; + (*state)[3][1] = temp; + + // Rotate second row 2 columns to left + temp = (*state)[0][2]; + (*state)[0][2] = (*state)[2][2]; + (*state)[2][2] = temp; + + temp = (*state)[1][2]; + (*state)[1][2] = (*state)[3][2]; + (*state)[3][2] = temp; + + // Rotate third row 3 columns to left + temp = (*state)[0][3]; + (*state)[0][3] = (*state)[3][3]; + (*state)[3][3] = (*state)[2][3]; + (*state)[2][3] = (*state)[1][3]; + (*state)[1][3] = temp; +} + +static uint8_t xtime(uint8_t x) +{ + return ((x << 1) ^ (((x >> 7) & 1) * 0x1b)); +} + +// MixColumns function mixes the columns of the state matrix +static void MixColumns(state_t* state) +{ + uint8_t i; + uint8_t Tmp, Tm, t; + for (i = 0; i < 4; ++i) + { + t = (*state)[i][0]; + Tmp = (*state)[i][0] ^ (*state)[i][1] ^ (*state)[i][2] ^ (*state)[i][3]; + Tm = (*state)[i][0] ^ (*state)[i][1]; Tm = xtime(Tm); (*state)[i][0] ^= Tm ^ Tmp; + Tm = (*state)[i][1] ^ (*state)[i][2]; Tm = xtime(Tm); (*state)[i][1] ^= Tm ^ Tmp; + Tm = (*state)[i][2] ^ (*state)[i][3]; Tm = xtime(Tm); (*state)[i][2] ^= Tm ^ Tmp; + Tm = (*state)[i][3] ^ t; Tm = xtime(Tm); (*state)[i][3] ^= Tm ^ Tmp; + } +} + +// Multiply is used to multiply numbers in the field GF(2^8) +#if MULTIPLY_AS_A_FUNCTION +static uint8_t Multiply(uint8_t x, uint8_t y) +{ + return (((y & 1) * x) ^ + ((y >> 1 & 1) * xtime(x)) ^ + ((y >> 2 & 1) * xtime(xtime(x))) ^ + ((y >> 3 & 1) * xtime(xtime(xtime(x)))) ^ + ((y >> 4 & 1) * xtime(xtime(xtime(xtime(x)))))); +} +#else +#define Multiply(x, y) \ + ( ((y & 1) * x) ^ \ + ((y>>1 & 1) * xtime(x)) ^ \ + ((y>>2 & 1) * xtime(xtime(x))) ^ \ + ((y>>3 & 1) * xtime(xtime(xtime(x)))) ^ \ + ((y>>4 & 1) * xtime(xtime(xtime(xtime(x)))))) \ + +#endif + +// MixColumns function mixes the columns of the state matrix. +// The method used to multiply may be difficult to understand for the inexperienced. +// Please use the references to gain more information. +static void InvMixColumns(state_t* state) +{ + int i; + uint8_t a, b, c, d; + for (i = 0; i < 4; ++i) + { + a = (*state)[i][0]; + b = (*state)[i][1]; + c = (*state)[i][2]; + d = (*state)[i][3]; + + (*state)[i][0] = Multiply(a, 0x0e) ^ Multiply(b, 0x0b) ^ Multiply(c, 0x0d) ^ Multiply(d, 0x09); + (*state)[i][1] = Multiply(a, 0x09) ^ Multiply(b, 0x0e) ^ Multiply(c, 0x0b) ^ Multiply(d, 0x0d); + (*state)[i][2] = Multiply(a, 0x0d) ^ Multiply(b, 0x09) ^ Multiply(c, 0x0e) ^ Multiply(d, 0x0b); + (*state)[i][3] = Multiply(a, 0x0b) ^ Multiply(b, 0x0d) ^ Multiply(c, 0x09) ^ Multiply(d, 0x0e); + } +} + + +// The SubBytes Function Substitutes the values in the +// state matrix with values in an S-box. +static void InvSubBytes(state_t* state) +{ + uint8_t i, j; + for (i = 0; i < 4; ++i) + { + for (j = 0; j < 4; ++j) + { + (*state)[j][i] = getSBoxInvert((*state)[j][i]); + } + } +} + +static void InvShiftRows(state_t* state) +{ + uint8_t temp; + + // Rotate first row 1 columns to right + temp = (*state)[3][1]; + (*state)[3][1] = (*state)[2][1]; + (*state)[2][1] = (*state)[1][1]; + (*state)[1][1] = (*state)[0][1]; + (*state)[0][1] = temp; + + // Rotate second row 2 columns to right + temp = (*state)[0][2]; + (*state)[0][2] = (*state)[2][2]; + (*state)[2][2] = temp; + + temp = (*state)[1][2]; + (*state)[1][2] = (*state)[3][2]; + (*state)[3][2] = temp; + + // Rotate third row 3 columns to right + temp = (*state)[0][3]; + (*state)[0][3] = (*state)[1][3]; + (*state)[1][3] = (*state)[2][3]; + (*state)[2][3] = (*state)[3][3]; + (*state)[3][3] = temp; +} + + +// Cipher is the main function that encrypts the PlainText. +static void Cipher(state_t* state, uint8_t* RoundKey) +{ + uint8_t round = 0; + + // Add the First round key to the state before starting the rounds. + AddRoundKey(0, state, RoundKey); + + // There will be Nr rounds. + // The first Nr-1 rounds are identical. + // These Nr-1 rounds are executed in the loop below. + for (round = 1; round < Nr; ++round) + { + SubBytes(state); + ShiftRows(state); + MixColumns(state); + AddRoundKey(round, state, RoundKey); + } + + // The last round is given below. + // The MixColumns function is not here in the last round. + SubBytes(state); + ShiftRows(state); + AddRoundKey(Nr, state, RoundKey); +} + +static void InvCipher(state_t* state, uint8_t* RoundKey) +{ + uint8_t round = 0; + + // Add the First round key to the state before starting the rounds. + AddRoundKey(Nr, state, RoundKey); + + // There will be Nr rounds. + // The first Nr-1 rounds are identical. + // These Nr-1 rounds are executed in the loop below. + for (round = (Nr - 1); round > 0; --round) + { + InvShiftRows(state); + InvSubBytes(state); + AddRoundKey(round, state, RoundKey); + InvMixColumns(state); + } + + // The last round is given below. + // The MixColumns function is not here in the last round. + InvShiftRows(state); + InvSubBytes(state); + AddRoundKey(0, state, RoundKey); +} + + +/*****************************************************************************/ +/* Public functions: */ +/*****************************************************************************/ +#if defined(ECB) && (ECB == 1) + + +void AES_ECB_encrypt(struct AES_ctx *ctx, const uint8_t* buf) +{ + // The next function call encrypts the PlainText with the Key using AES algorithm. + Cipher((state_t*)buf, ctx->RoundKey); +} + +void AES_ECB_decrypt(struct AES_ctx* ctx, const uint8_t* buf) +{ + // The next function call decrypts the PlainText with the Key using AES algorithm. + InvCipher((state_t*)buf, ctx->RoundKey); +} + + +#endif // #if defined(ECB) && (ECB == 1) + + + + + +#if defined(CBC) && (CBC == 1) + + +static void XorWithIv(uint8_t* buf, uint8_t* Iv) +{ + uint8_t i; + for (i = 0; i < AES_BLOCKLEN; ++i) // The block in AES is always 128bit no matter the key size + { + buf[i] ^= Iv[i]; + } +} + +void AES_CBC_encrypt_buffer(struct AES_ctx *ctx, uint8_t* buf, uint32_t length) +{ + uintptr_t i; + uint8_t *Iv = ctx->Iv; + for (i = 0; i < length; i += AES_BLOCKLEN) + { + XorWithIv(buf, Iv); + Cipher((state_t*)buf, ctx->RoundKey); + Iv = buf; + buf += AES_BLOCKLEN; + //printf("Step %d - %d", i/16, i); + } + /* store Iv in ctx for next call */ + memcpy(ctx->Iv, Iv, AES_BLOCKLEN); +} + +void AES_CBC_decrypt_buffer(struct AES_ctx* ctx, uint8_t* buf, uint32_t length) +{ + uintptr_t i; + uint8_t storeNextIv[AES_BLOCKLEN]; + for (i = 0; i < length; i += AES_BLOCKLEN) + { + memcpy(storeNextIv, buf, AES_BLOCKLEN); + InvCipher((state_t*)buf, ctx->RoundKey); + XorWithIv(buf, ctx->Iv); + memcpy(ctx->Iv, storeNextIv, AES_BLOCKLEN); + buf += AES_BLOCKLEN; + } + +} + +#endif // #if defined(CBC) && (CBC == 1) + + + +#if defined(CTR) && (CTR == 1) + +/* Symmetrical operation: same function for encrypting as for decrypting. Note any IV/nonce should never be reused with the same key */ +void AES_CTR_xcrypt_buffer(struct AES_ctx* ctx, uint8_t* buf, uint32_t length) +{ + uint8_t buffer[AES_BLOCKLEN]; + + unsigned i; + int bi; + for (i = 0, bi = AES_BLOCKLEN; i < length; ++i, ++bi) + { + if (bi == AES_BLOCKLEN) /* we need to regen xor compliment in buffer */ + { + + memcpy(buffer, ctx->Iv, AES_BLOCKLEN); + Cipher((state_t*)buffer, ctx->RoundKey); + + /* Increment Iv and handle overflow */ + for (bi = (AES_BLOCKLEN - 1); bi >= 0; --bi) + { + /* inc will owerflow */ + if (ctx->Iv[bi] == 255) + { + ctx->Iv[bi] = 0; + continue; + } + ctx->Iv[bi] += 1; + break; + } + bi = 0; + } + + buf[i] = (buf[i] ^ buffer[bi]); + } +} + +#endif // #if defined(CTR) && (CTR == 1) + diff --git a/tools/gzinject/src/aes.h b/tools/gzinject/src/aes.h new file mode 100644 index 000000000..d1a468630 --- /dev/null +++ b/tools/gzinject/src/aes.h @@ -0,0 +1,90 @@ +#ifndef _AES_H_ +#define _AES_H_ + +#include + +// #define the macros below to 1/0 to enable/disable the mode of operation. +// +// CBC enables AES encryption in CBC-mode of operation. +// CTR enables encryption in counter-mode. +// ECB enables the basic ECB 16-byte block algorithm. All can be enabled simultaneously. + +// The #ifndef-guard allows it to be configured before #include'ing or at compile time. +#ifndef CBC +#define CBC 1 +#endif + +#ifndef ECB +#define ECB 1 +#endif + +#ifndef CTR +#define CTR 1 +#endif + + +#define AES128 1 +//#define AES192 1 +//#define AES256 1 + +#define AES_BLOCKLEN 16 //Block length in bytes AES is 128b block only + +#if defined(AES256) && (AES256 == 1) +#define AES_KEYLEN 32 +#define AES_keyExpSize 240 +#elif defined(AES192) && (AES192 == 1) +#define AES_KEYLEN 24 +#define AES_keyExpSize 208 +#else +#define AES_KEYLEN 16 // Key length in bytes +#define AES_keyExpSize 176 +#endif + +struct AES_ctx +{ + uint8_t RoundKey[AES_keyExpSize]; +#if (defined(CBC) && (CBC == 1)) || (defined(CTR) && (CTR == 1)) + uint8_t Iv[AES_BLOCKLEN]; +#endif +}; + +void AES_init_ctx(struct AES_ctx* ctx, const uint8_t* key); +#if defined(CBC) && (CBC == 1) +void AES_init_ctx_iv(struct AES_ctx* ctx, const uint8_t* key, const uint8_t* iv); +void AES_ctx_set_iv(struct AES_ctx* ctx, const uint8_t* iv); +#endif + +#if defined(ECB) && (ECB == 1) +// buffer size is exactly AES_BLOCKLEN bytes; +// you need only AES_init_ctx as IV is not used in ECB +// NB: ECB is considered insecure for most uses +void AES_ECB_encrypt(struct AES_ctx* ctx, const uint8_t* buf); +void AES_ECB_decrypt(struct AES_ctx* ctx, const uint8_t* buf); + +#endif // #if defined(ECB) && (ECB == !) + + +#if defined(CBC) && (CBC == 1) +// buffer size MUST be mutile of AES_BLOCKLEN; +// Suggest https://en.wikipedia.org/wiki/Padding_(cryptography)#PKCS7 for padding scheme +// NOTES: you need to set IV in ctx via AES_init_ctx_iv() or AES_ctx_set_iv() +// no IV should ever be reused with the same key +void AES_CBC_encrypt_buffer(struct AES_ctx* ctx, uint8_t* buf, uint32_t length); +void AES_CBC_decrypt_buffer(struct AES_ctx* ctx, uint8_t* buf, uint32_t length); + +#endif // #if defined(CBC) && (CBC == 1) + + +#if defined(CTR) && (CTR == 1) + +// Same function for encrypting as for decrypting. +// IV is incremented for every block, and used after encryption as XOR-compliment for output +// Suggesting https://en.wikipedia.org/wiki/Padding_(cryptography)#PKCS7 for padding scheme +// NOTES: you need to set IV in ctx with AES_init_ctx_iv() or AES_ctx_set_iv() +// no IV should ever be reused with the same key +void AES_CTR_xcrypt_buffer(struct AES_ctx* ctx, uint8_t* buf, uint32_t length); + +#endif // #if defined(CTR) && (CTR == 1) + + +#endif //_AES_H_ \ No newline at end of file diff --git a/tools/gzinject/src/doltool.c b/tools/gzinject/src/doltool.c new file mode 100644 index 000000000..dbc48e451 --- /dev/null +++ b/tools/gzinject/src/doltool.c @@ -0,0 +1,99 @@ +#include +#include +#include +#include + +#include "doltool.h" +#include "gzinject.h" + +void dol_load(doltool_ctxt_t *ctxt, uint8_t **file_data, uint32_t *file_size){ + + ctxt->file_data = file_data; + ctxt->file_size = file_size; + + memcpy(&ctxt->hdr,*(ctxt->file_data),sizeof(ctxt->hdr)); + + for(int i=0;i<7;i++){ + ctxt->hdr.text_size[i] = REVERSEENDIAN32(ctxt->hdr.text_size[i]); + ctxt->hdr.text_offset[i] = REVERSEENDIAN32(ctxt->hdr.text_offset[i]); + ctxt->hdr.text_loading[i] = REVERSEENDIAN32(ctxt->hdr.text_loading[i]); + if(ctxt->hdr.text_size[i]>0){ + ctxt->text_sections[i] = *(ctxt->file_data) + ctxt->hdr.text_offset[i]; + } + } + for(int i=0;i<11;i++){ + ctxt->hdr.data_size[i] = REVERSEENDIAN32(ctxt->hdr.data_size[i]); + ctxt->hdr.data_offset[i] = REVERSEENDIAN32(ctxt->hdr.data_offset[i]); + ctxt->hdr.data_loading[i] = REVERSEENDIAN32(ctxt->hdr.data_loading[i]); + if(ctxt->hdr.data_size[i]>0){ + ctxt->data_sections[i] = *(ctxt->file_data) + ctxt->hdr.data_offset[i]; + } + } +} + +void dol_inject(doltool_ctxt_t *ctxt, uint8_t *text, size_t size, uint32_t loading_addr){ + int injection_idx = 0; + size = addpadding(size,16); + for(int i = 0;i<7;i++){ + if(ctxt->text_sections[i]) continue; + injection_idx = i; + ctxt->text_sections[i] = text; + ctxt->hdr.text_loading[i] = loading_addr; + ctxt->hdr.text_offset[i] = ctxt->hdr.data_offset[0]; + ctxt->hdr.text_size[i] = size; + break; + } + for(int i=0;i<11;i++){ + if(ctxt->data_sections[i]){ + ctxt->hdr.data_offset[i] += ctxt->hdr.text_size[injection_idx]; + }else{ + break; + } + } +} + +size_t dol_save(doltool_ctxt_t *ctxt){ + uint32_t text_sizes[7]; + uint32_t data_sizes[11]; + memcpy(text_sizes,ctxt->hdr.text_size,sizeof(ctxt->hdr.text_size)); + memcpy(data_sizes,ctxt->hdr.data_size,sizeof(ctxt->hdr.data_size)); + uint32_t totalsize = 0x100; + for(int i=0;i<7;i++){ + totalsize += ctxt->hdr.text_size[i]; + ctxt->hdr.text_size[i] = REVERSEENDIAN32(ctxt->hdr.text_size[i]); + ctxt->hdr.text_offset[i] = REVERSEENDIAN32(ctxt->hdr.text_offset[i]); + ctxt->hdr.text_loading[i] = REVERSEENDIAN32(ctxt->hdr.text_loading[i]); + } + for(int i=0;i<11;i++){ + totalsize += ctxt->hdr.data_size[i]; + ctxt->hdr.data_size[i] = REVERSEENDIAN32(ctxt->hdr.data_size[i]); + ctxt->hdr.data_offset[i] = REVERSEENDIAN32(ctxt->hdr.data_offset[i]); + ctxt->hdr.data_loading[i] = REVERSEENDIAN32(ctxt->hdr.data_loading[i]); + } + + uint8_t *new_data = malloc(totalsize); + if(!new_data){ + perror("Could not allocate new dol"); + return 0; + } + memcpy(new_data,&ctxt->hdr,sizeof(ctxt->hdr)); + uint8_t *p = new_data + sizeof(ctxt->hdr); + for(int i=0;i<7;i++){ + if(ctxt->text_sections[i]){ + memcpy(p,ctxt->text_sections[i],text_sizes[i]); + p += text_sizes[i]; + } + } + for(int i=0;i<11;i++){ + if(ctxt->data_sections[i]){ + memcpy(p,ctxt->data_sections[i],data_sizes[i]); + p += data_sizes[i]; + } + } + free(*(ctxt->file_data)); + *(ctxt->file_data) = new_data; + if(ctxt->file_size){ + *(ctxt->file_size) = totalsize; + } + return totalsize; +} \ No newline at end of file diff --git a/tools/gzinject/src/doltool.h b/tools/gzinject/src/doltool.h new file mode 100644 index 000000000..3c30194e9 --- /dev/null +++ b/tools/gzinject/src/doltool.h @@ -0,0 +1,31 @@ +#ifndef _DOLTOOL_H +#define _DOLTOOL_H + +#include + +typedef struct { + uint32_t text_offset[7]; /* 0x000 */ + uint32_t data_offset[11]; /* 0x01C */ + uint32_t text_loading[7]; /* 0x048 */ + uint32_t data_loading[11]; /* 0x064 */ + uint32_t text_size[7]; /* 0x090 */ + uint32_t data_size[11]; /* 0x0AC */ + uint32_t bss_loading; /* 0x0D8 */ + uint32_t bss_size; /* 0x0DC */ + uint32_t entry; /* 0x0E0 */ + char padding[0x1C]; /* 0x0E4 */ +} dol_hdr_t; /* 0x100 */ + +typedef struct{ + dol_hdr_t hdr; + uint8_t *text_sections[7]; + uint8_t *data_sections[11]; + uint8_t **file_data; + uint32_t *file_size; +} doltool_ctxt_t; + +void dol_load(doltool_ctxt_t *ctxt, uint8_t **file_data, uint32_t *file_size); +void dol_inject(doltool_ctxt_t *ctxt, uint8_t *text, size_t size, uint32_t loading_addr); +size_t dol_save(doltool_ctxt_t *ctxt); + +#endif \ No newline at end of file diff --git a/tools/gzinject/src/fastaes.c b/tools/gzinject/src/fastaes.c new file mode 100644 index 000000000..ed6e9ae4b --- /dev/null +++ b/tools/gzinject/src/fastaes.c @@ -0,0 +1,115 @@ +#ifdef FASTAES +#include +#include "fastaes.h" + +static __m128i do_key_exp(__m128i a, __m128i b) { + __m128i tmp; + + b = _mm_shuffle_epi32(b, 0xFF); + tmp = _mm_slli_si128(a, 4); + a = _mm_xor_si128(a, tmp); + tmp = _mm_slli_si128(a, 4); + a = _mm_xor_si128(a, tmp); + tmp = _mm_slli_si128(a, 4); + a = _mm_xor_si128(a, tmp); + a = _mm_xor_si128(a, b); + + return a; +} + +static void key_expansion(const uint8_t *key, __m128i *key_sched, __m128i *dkey_sched) { + key_sched[0] = _mm_loadu_si128((const __m128i_u*)key); + key_sched[1] = do_key_exp(key_sched[0], _mm_aeskeygenassist_si128(key_sched[0], 0x01)); + key_sched[2] = do_key_exp(key_sched[1], _mm_aeskeygenassist_si128(key_sched[1], 0x02)); + key_sched[3] = do_key_exp(key_sched[2], _mm_aeskeygenassist_si128(key_sched[2], 0x04)); + key_sched[4] = do_key_exp(key_sched[3], _mm_aeskeygenassist_si128(key_sched[3], 0x08)); + key_sched[5] = do_key_exp(key_sched[4], _mm_aeskeygenassist_si128(key_sched[4], 0x10)); + key_sched[6] = do_key_exp(key_sched[5], _mm_aeskeygenassist_si128(key_sched[5], 0x20)); + key_sched[7] = do_key_exp(key_sched[6], _mm_aeskeygenassist_si128(key_sched[6], 0x40)); + key_sched[8] = do_key_exp(key_sched[7], _mm_aeskeygenassist_si128(key_sched[7], 0x80)); + key_sched[9] = do_key_exp(key_sched[8], _mm_aeskeygenassist_si128(key_sched[8], 0x1B)); + key_sched[10] = do_key_exp(key_sched[9], _mm_aeskeygenassist_si128(key_sched[9], 0x36)); + + dkey_sched[0] = key_sched[0]; + dkey_sched[1] = _mm_aesimc_si128(key_sched[1]); + dkey_sched[2] = _mm_aesimc_si128(key_sched[2]); + dkey_sched[3] = _mm_aesimc_si128(key_sched[3]); + dkey_sched[4] = _mm_aesimc_si128(key_sched[4]); + dkey_sched[5] = _mm_aesimc_si128(key_sched[5]); + dkey_sched[6] = _mm_aesimc_si128(key_sched[6]); + dkey_sched[7] = _mm_aesimc_si128(key_sched[7]); + dkey_sched[8] = _mm_aesimc_si128(key_sched[8]); + dkey_sched[9] = _mm_aesimc_si128(key_sched[9]); + dkey_sched[10] = key_sched[10]; + +} + +void aes_ctx_init(aes_ctxt_t *ctx, const uint8_t *key, const uint8_t *iv) { + memcpy(ctx->iv, iv, sizeof(ctx->iv)); + key_expansion(key, ctx->key_schedule, ctx->dkey_schedule); +} + +static __m128i cipher(__m128i state, __m128i *key_sched) { + state = _mm_xor_si128(state, key_sched[0]); + + for(int i = 1; i < 10; i++) { + state = _mm_aesenc_si128(state, key_sched[i]); + } + + return _mm_aesenclast_si128(state, key_sched[10]); +} + +static __m128i inv_cipher(__m128i state, __m128i *key_sched) { + state = _mm_xor_si128(state, key_sched[10]); + state = _mm_aesdec_si128(state, key_sched[9]); + state = _mm_aesdec_si128(state, key_sched[8]); + state = _mm_aesdec_si128(state, key_sched[7]); + state = _mm_aesdec_si128(state, key_sched[6]); + state = _mm_aesdec_si128(state, key_sched[5]); + state = _mm_aesdec_si128(state, key_sched[4]); + state = _mm_aesdec_si128(state, key_sched[3]); + state = _mm_aesdec_si128(state, key_sched[2]); + state = _mm_aesdec_si128(state, key_sched[1]); + + state = _mm_aesdeclast_si128(state, key_sched[0]); + + return state; +} + +void aes_encrypt_buffer(aes_ctxt_t *ctx, uint8_t *buffer, size_t len) { + __m128i iv = _mm_loadu_si128((const __m128i*)ctx->iv); + __m128i state; + + for(int i = 0; i < len; i += 16) { + state = _mm_loadu_si128((const __m128i*)buffer); + state = _mm_xor_si128(state, iv); + state = cipher(state, ctx->key_schedule); + _mm_storeu_si128((__m128i_u*)buffer, state); + iv = state; + + buffer += 16; + } + + _mm_storeu_si128((__m128i_u*)&ctx->state, state); + _mm_storeu_si128((__m128i_u*)ctx->iv, iv); +} + +void aes_decrypt_buffer(aes_ctxt_t *ctx, uint8_t *buffer, size_t len) { + __m128i state; + __m128i iv = _mm_loadu_si128((const __m128i_u*)ctx->iv); + __m128i next_iv; + + for(int i = 0; i < len; i += 16) { + state = _mm_loadu_si128((const __m128i_u*)buffer); + next_iv = state; + state = inv_cipher(state, ctx->dkey_schedule); + state = _mm_xor_si128(state, iv); + iv = next_iv; + _mm_storeu_si128((__m128i_u*)buffer, state); + buffer += 16; + } + + _mm_storeu_si128((__m128i_u*)&ctx->state, state); + _mm_storeu_si128((__m128i_u*)ctx->iv, iv); +} +#endif diff --git a/tools/gzinject/src/fastaes.h b/tools/gzinject/src/fastaes.h new file mode 100644 index 000000000..3080751fb --- /dev/null +++ b/tools/gzinject/src/fastaes.h @@ -0,0 +1,27 @@ +#ifdef FASTAES +#ifndef _FASTAES_H +#define _FASTAES_H + +#include +#include + +/** + * fast aes for x86/x86-64 processors. + */ + +typedef uint8_t state_t[4][4]; + +typedef struct { + state_t state; + uint8_t iv[16]; + // gzinject only cares about aes128 + __m128i key_schedule[11]; + __m128i dkey_schedule[11]; +} aes_ctxt_t; + +void aes_ctx_init(aes_ctxt_t *ctx, const uint8_t *key, const uint8_t *iv); +void aes_encrypt_buffer(aes_ctxt_t *ctx, uint8_t *buffer, size_t len); +void aes_decrypt_buffer(aes_ctxt_t *ctx, uint8_t *buffer, size_t len); + +#endif +#endif diff --git a/tools/gzinject/src/gzi.c b/tools/gzinject/src/gzi.c new file mode 100644 index 000000000..95768c217 --- /dev/null +++ b/tools/gzinject/src/gzi.c @@ -0,0 +1,218 @@ +#include +#include +#include +#include +#include +#include + +#include "gzi.h" +#include "lz77.h" +#include "gzinject.h" + +typedef int (*gzi_action_t)(gzi_ctxt_t *ctxt, int pos); + +static int gzi_cmd_file(gzi_ctxt_t *ctxt, int pos){ + ctxt->curfile = ctxt->codes[pos].data & 0xFF; + if(verbose){ + printf("Setting current file to %d\n",ctxt->curfile); + } + return 1; +} + +static int gzi_cmd_lz77_decomp(gzi_ctxt_t *ctxt, int pos){ + int32_t curfile = ctxt->curfile; + if(curfile<0){ + printf("Warning: No file Selected, not decompressing.\n"); + return 0; + } + if(verbose){ + printf("LZ77 Decompressing %d\n",curfile); + } + int decompsize = addpadding(lz77_decompressed_size(ctxt->file_ptrs[curfile]),16); + uint8_t *decomp = calloc(decompsize,1); + lz77_decompress(ctxt->file_ptrs[curfile],decomp); + free(ctxt->file_ptrs[curfile]); + ctxt->file_ptrs[curfile] = decomp; + ctxt->file_sizes[curfile] = decompsize; + return 1; +} + +static int gzi_cmd_lz77_comp(gzi_ctxt_t *ctxt, int pos){ + int32_t curfile = ctxt->curfile; + if(curfile<0){ + printf("Warning: No file selected, not compressing.\n"); + return 0; + } + if(verbose){ + printf("LZ77 Compressing %d\n",curfile); + } + uint8_t *comp = NULL; + uint32_t len = ctxt->file_sizes[curfile]; + + // I hate this, but it works for now. + len -= (8 - (len & 0x8)); + int complen = lz77_compress(ctxt->file_ptrs[curfile],&comp,len,&len); + free(ctxt->file_ptrs[curfile]); + ctxt->file_ptrs[curfile] = comp; + ctxt->file_sizes[curfile] = complen; + return 1; +} + +static int gzi_cmd_apply_patch(gzi_ctxt_t *ctxt, int pos){ + int32_t curfile = ctxt->curfile; + if(curfile<0){ + printf("Warning: No file selected, not applying patch.\n"); + } + gzi_code_t code = ctxt->codes[pos]; + uint32_t val = code.data; + if(verbose){ + printf("Apply patch to %d. offset 0x%x = 0x%x\n",curfile,code.offset,code.data); + } + uint8_t *p; + switch(curfile){ + case GZI_FILE_TMD: + p = ctxt->tmd; + break; + case GZI_FILE_TIK: + p = ctxt->tik; + break; + case GZI_FILE_CERT: + p = ctxt->cert; + break; + default: + if(curfile>ctxt->filecnt-1){ + return -1; + } + p = ctxt->file_ptrs[curfile]; + break; + } + switch(code.len){ + case 1: + *((uint8_t*)(p + code.offset)) = (uint8_t)val; + break; + case 2: + *((uint16_t*)(p + code.offset)) = REVERSEENDIAN16((uint16_t)val); + break; + case 4: + default: + *((uint32_t*)(p + code.offset)) = REVERSEENDIAN32(val); + break; + } + return 1; +} + +static gzi_action_t commands[] = { + gzi_cmd_file, + gzi_cmd_lz77_decomp, + gzi_cmd_lz77_comp, + gzi_cmd_apply_patch, +}; + +static char *readline(FILE *fle){ + char *line = NULL; + int buflen=256; + for(int i=0;;++i){ + int c = fgetc(fle); + + if(i%buflen==0){ + char *new = realloc(line,i+buflen); + line = new; + } + if(c==EOF || c=='\n'){ + line[i] = 0; + return line; + }else{ + line[i] = c; + } + } +} + +int ishexstring(const char *string, size_t len){ + const char *s; + for(s = string; *s!=0;s++){ + if(!isxdigit(*s)){ + return 0; + } + } + return s - string == len; +} + +void parseline(gzi_ctxt_t *ctxt, const char *line){ + char command[6]={0}; + char offset[10]={0}; + char data[10]={0}; + sscanf(line,"%5s %9s %9s",command,offset,data); + if(!ishexstring(command,4) || !ishexstring(offset,8) || !ishexstring(offset,8)) + return; + ctxt->codecnt++; + gzi_code_t *new_codes = realloc(ctxt->codes,sizeof(gzi_code_t) * ctxt->codecnt); + if(new_codes){ + ctxt->codes = new_codes; + } + gzi_code_t code; + uint16_t cmd; + sscanf(command,"%"SCNx16,&cmd); + code.command = (cmd & 0xFF00) >> 8; + code.len = cmd & 0xFF; + sscanf(offset,"%"SCNx32,&code.offset); + sscanf(data,"%"SCNx32,&code.data); + memcpy(ctxt->codes + (ctxt->codecnt - 1),&code,sizeof(code)); +} + +int gzi_parse_file(gzi_ctxt_t *ctxt, const char *file){ + FILE *fle = fopen(file,"r"); + if(!fle){ + fprintf(stderr,"Could not open %s, cannot parse file.\n",file); + } + if(verbose){ + printf("Parsing gzi file %s\n",file); + } + while(!feof(fle)){ + char *line = readline(fle); + if(!line){ + fprintf(stderr,"Could not readline from gzi file %s.\n",file); + return 0; + } + if(line[0]=='#' || line[0]==0){ + free(line); + continue; + } + parseline(ctxt,line); + free(line); + } + fclose(fle); + return 1; +} + +int gzi_run(gzi_ctxt_t *ctxt){ + if(verbose){ + printf("Running gzi commands\n"); + } + for(int i=0;icodecnt;i++){ + commands[ctxt->codes[i].command](ctxt,i); + } + return 1; +} + +int gzi_init(gzi_ctxt_t *ctxt, uint8_t **files, uint32_t *filesizes, int filecnt, + uint8_t *tmd, uint8_t *tik, uint8_t *cert, + uint32_t *tmd_size, uint32_t *tik_size, uint32_t *cert_size){ + ctxt->codes = NULL; + ctxt->codecnt=0; + ctxt->curfile=-1; + ctxt->file_ptrs = files; + ctxt->file_sizes = filesizes; + ctxt->filecnt = filecnt; + ctxt->tmd = tmd; + ctxt->tik = tik; + ctxt->cert = cert; + ctxt->tmd_size = tmd_size; + ctxt->tik_size = tik_size; + ctxt->cert_size = cert_size; + return 1; +} + +int gzi_destroy(gzi_ctxt_t *ctxt){ + if(ctxt->codes) free(ctxt->codes); + return 1; +} \ No newline at end of file diff --git a/tools/gzinject/src/gzi.h b/tools/gzinject/src/gzi.h new file mode 100644 index 000000000..1b49fcfa9 --- /dev/null +++ b/tools/gzinject/src/gzi.h @@ -0,0 +1,39 @@ +#ifndef _PATCH_H_ +#define _PATCH_H_ + +#include + +#define GZI_FILE_TMD 100 +#define GZI_FILE_TIK 101 +#define GZI_FILE_CERT 102 + +typedef struct { + uint8_t command; + uint8_t len; + uint32_t offset; + uint32_t data; +} gzi_code_t; + +typedef struct{ + gzi_code_t *codes; + int codecnt; + int8_t curfile; + uint8_t **file_ptrs; + uint32_t *file_sizes; + uint8_t filecnt; + uint8_t *tmd; + uint8_t *tik; + uint8_t *cert; + uint32_t *tmd_size; + uint32_t *tik_size; + uint32_t *cert_size; +} gzi_ctxt_t; + +int gzi_parse_file(gzi_ctxt_t *ctxt, const char *file); +int gzi_run(gzi_ctxt_t *ctxt); +int gzi_init(gzi_ctxt_t *ctxt, uint8_t **files, uint32_t *filesizes, int filecnt, + uint8_t *tmd, uint8_t *tik, uint8_t *cert, + uint32_t *tmd_size, uint32_t *tik_size, uint32_t *cert_size); +int gzi_destroy(gzi_ctxt_t *ctxt); + +#endif \ No newline at end of file diff --git a/tools/gzinject/src/gzinject.c b/tools/gzinject/src/gzinject.c new file mode 100644 index 000000000..1b15397ae --- /dev/null +++ b/tools/gzinject/src/gzinject.c @@ -0,0 +1,1329 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gzinject.h" +#include "lz77.h" +#include "u8.h" +#include "gzi.h" +#include "aes.h" +#include "sha1.h" +#include "md5.h" +#include "romchu.h" +#include "doltool.h" +#include "fastaes.h" + +static uint8_t key[16] = {0}; +static uint8_t region = 0x03; + +static int cleanup = 0; +static int content_num = 5; + +int verbose = 0; +int dol_after = -1; + +static char *wad = NULL; +static char *directory = NULL; +static char *keyfile = NULL; +static char *workingdirectory = NULL; +static char *rom = NULL; +static char *outwad = NULL; +static patch_list_t *patch = NULL; +static patch_list_t **patch_link = &patch; +static dol_list_t *dol = NULL; +static dol_list_t **dol_link = &dol; +static dol_loading_list_t *dol_loading = NULL; +static dol_loading_list_t **dol_loading_link = &dol_loading; +static char *titleid = NULL; +static char *channelname = NULL; + +uint16_t be16(const uint8_t *p) +{ + return (p[0] << 8) | p[1]; +} + +uint32_t be32(const uint8_t *p) +{ + return (p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3]; +} + +static const struct option cmdoptions[] = { + { "action",required_argument,0,'a' }, + { "wad",required_argument,0,'w' }, + { "channelid",required_argument,0,'i' }, + { "channeltitle",required_argument,0,'t' }, + { "help",no_argument,0,'h' }, + { "key",required_argument,0,'k' }, + { "region",required_argument,0,'r' }, + { "verbose",no_argument,&verbose,1 }, + { "directory",required_argument,0,'d' }, + { "cleanup", no_argument,&cleanup,1}, + { "version",no_argument,0,'v'}, + { "rom",required_argument,0,'m'}, + { "outputwad",required_argument,0,'o'}, + { "patch-file",required_argument,0,'p'}, + { "content-num",required_argument,0,'c'}, + { "dol-inject",required_argument,0,'f'}, + { "dol-loading",required_argument,0,'l'}, + { "dol-after", required_argument,0,'e'}, + { 0,0,0,0} +}; + +const uint8_t newkey[16] = { + 0x47, 0x5a, 0x49, 0x73, 0x4c, 0x69, 0x66, 0x65, 0x41, 0x6e, 0x64, 0x42, 0x65, 0x65, 0x72, 0x21 +}; + +static SHA1_CTX sha1; +static MD5_CTX md5; + +#ifdef FASTAES +static aes_ctxt_t aes; +static void do_encrypt(uint8_t *input, size_t size, const uint8_t *key, const uint8_t *iv) { + aes_ctx_init(&aes, key, iv); + aes_encrypt_buffer(&aes, input, size); +} + +static void do_decrypt(uint8_t *input, size_t size, const uint8_t *key, const uint8_t *iv) { + aes_ctx_init(&aes, key, iv); + aes_decrypt_buffer(&aes, input, size); +} + +#else + +static struct AES_ctx aes; +static void do_encrypt(uint8_t *input, size_t size, const uint8_t *key, const uint8_t *iv) { + AES_init_ctx_iv(&aes, key, iv); + AES_CBC_encrypt_buffer(&aes, input, size); +} + +static void do_decrypt(uint8_t *input, size_t size, const uint8_t *key, const uint8_t *iv) { + AES_init_ctx_iv(&aes, key, iv); + AES_CBC_decrypt_buffer(&aes, input, size); +} +#endif + + static void do_sha1(uint8_t *input, uint8_t *output, size_t size) { + SHA1Init(&sha1); + SHA1Update(&sha1, input, size); + SHA1Final(output, &sha1); +} + + static void do_md5(uint8_t *input, uint8_t *output, size_t size) { + MD5_Init(&md5); + MD5_Update(&md5, input, size); + MD5_Final(output, &md5); +} + +uint32_t addpadding(uint32_t inp, uint32_t padding) { + int ret = inp; + if (inp % padding != 0) { + ret = inp + (padding - (inp % padding)); + } + return ret; +} + +static uint32_t getcontentlength(uint8_t *tmd, uint32_t contentnum) { + uint32_t off = 0x1ec + (36 * contentnum); + return tmd[off + 4] << 24 | + tmd[off + 5] << 16 | + tmd[off + 6] << 8 | + tmd[off + 7]; +} + +static void setcontentlength(uint8_t *tmd, uint32_t contentnum, uint32_t size){ + uint32_t off = 0x1ec + (36 * contentnum) + 4; + *((uint32_t*)(tmd + off)) = REVERSEENDIAN32(size); +} + +static void removedir(const char *file); + +static void removefile(const char* file) { + struct stat sbuffer; + if (stat(file, &sbuffer) == 0) { + if ((sbuffer.st_mode & S_IFMT) == S_IFDIR) { + removedir(file); + } + else if ((sbuffer.st_mode & S_IFMT) == S_IFREG) { + if (verbose) { + printf("Removing %s\n", file); + } + remove(file); + } + + } +} + +static void removedir(const char *file) { + DIR *dir; + struct dirent *ent; + if ((dir = opendir(file)) != NULL) { + while ((ent = readdir(dir)) != NULL) { + if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0) + continue; + char *path = malloc(1000); + snprintf(path, 1000, "%s/%s", file, ent->d_name); + removefile(path); + free(path); + } + if (verbose) { + printf("Removing %s\n", file); + } + rmdir(file); + } + +} + +static char *removeext(char* mystr) { + char *retstr; + char *lastdot; + if (mystr == NULL) + return NULL; + if ((retstr = malloc(strlen(mystr) + 1)) == NULL) + return NULL; + strcpy(retstr, mystr); + lastdot = strrchr(retstr, '.'); + if (lastdot != NULL) + *lastdot = '\0'; + return retstr; +} + +static void print_usage() { + puts("Usage:\n" + " gzinject -a extract -w SOURCEWAD [options]\n" + " gzinject -a pack -w DESTWAD [options]\n" + " gzinject -a inject -w SOURCEWAD -m ROM [options]\n" + " gzinject -a romc -m INROM -o OUTROM [options]\n" + " gzinject -a genkey [options]\n" + " gzinject --help\n" + " gzinject --version\n\n" + "Actions:\n" + " extract extracts SOURCEWAD to directory\n" + " pack packs directory into DESTWAD\n" + " inject injects rom into SOURCEWAD\n" + " romc decompresses a romc compressed rom\n" + " genkey generates wii common-key\n\n" + "Options:\n" + " -i, --channelid=ID New Channel ID For Pack and Inject actions (default: none)\n" + " -t, --title=title New Channel name for pack and inject actions (default: none)\n" + " -h, --help Prints this help message\n" + " -k, --key=keyfile Location of the common-key file (default: ./common-key.bin)\n" + " -r, --region=1-3 Region to use (default: 3)\n" + " --verbose Print out verbose program execution information\n" + " -d, --directory=directory Directory to extract contents to, or directory to read contents from (default: ./wadextract)\n" + " --cleanup Remove files before performing actions\n" + " --version Prints the current version\n" + " -m, --rom=rom Rom to inject for inject action (default: none), also rom to romc decompress\n" + " -o, --outputwad=outwad The output wad for inject actions (default: SOURCEWAD-inject.wad), also output for romc decompression\n" + " -p, --patch-file=patchfile gzi file to use for applying patches (default: none)\n" + " -c, --content=contentfile the primary content file (default: 5)\n" + " --dol-inject Binary data to inject into the emulator program, requires --dol-loading\n" + " --dol-loading The loading address for the binary specified by --dol-inject\n" + " --dol-after After which patch file to inject the dol, default: after all patches\n" + ); +} + +static void print_version(const char* prog) { + printf("%s Version ", prog); + printf(GZINJECT_VERSION); + printf("\n"); +} + +static void truchasign(uint8_t *data, uint8_t type, size_t len) { + uint16_t pos = 0x1f2; + if (type == W_TMD) { + pos = 0x1d4; + } + + uint8_t digest[20]; + do_sha1(data + 0x140, digest, len - 0x140); + + uint16_t i; + if (digest[0] != 0x00) { + for (i = 4; i < 260; i++) { + data[i] = 0x00; + } + for (i = 0; i < 0xFFFF; i++) { + uint16_t revi = REVERSEENDIAN16(i); + memcpy(data + pos, &revi, 2); + + do_sha1(data + 0x140, digest, len - 0x140); + + if (digest[0] == 0x00) { + break; + } + } + } +} + +static int do_extract() { + struct stat sbuffer; + + if (stat(wad, &sbuffer) != 0) { + printf("Could not open %s\n", wad); + return 0; + } + + if (verbose) { + printf("Extracting %s to %s\n", wad, directory); + } + + uint8_t *data = (uint8_t*)malloc(sbuffer.st_size); + if(!data){ + fprintf(stderr,"Could not allocate %ld bytes for wad\n",sbuffer.st_size); + return 0; + } + FILE *wadfile = fopen(wad, "rb"); + if(!wadfile){ + fprintf(stderr,"Could not open %s wad file\n",wad); + free(data); + return 0; + } + int bytesread = fread(data, 1, sbuffer.st_size, wadfile); + if(bytesread!=sbuffer.st_size || ferror(wadfile)){ + fprintf(stderr,"Could not read total wad, or file error occured"); + free(data); + fclose(wadfile); + return 0; + } + fclose(wadfile); + if (be32(&data[3]) != 0x20497300) { + fprintf(stderr,"%s is an invalid wad file!\n",wad); + free(data); + return 0; + } + + uint32_t certsize = be32(data + 0x08); + uint32_t tiksize = be32(data + 0x10); + uint32_t tmdsize = be32(data + 0x14); + uint32_t datasize = be32(data + 0x18); + uint32_t footersize = be32(data + 0x1C); + + uint32_t certpos = 0x40; + uint32_t tikpos = 0x40 + addpadding(certsize, 64); + uint32_t tmdpos = tikpos + addpadding(tiksize, 64); + uint32_t datapos = tmdpos + addpadding(tmdsize, 64); + uint32_t footerpos = datapos + addpadding(datasize,64); + + if (cleanup == 1) removedir(directory); + + stat(directory,&sbuffer); + if(S_ISDIR(sbuffer.st_mode)){ + if(verbose){ + printf("%s exists, not creating.\n",directory); + } + }else{ + if(verbose) + printf("Creating %s\n",directory); + if(mkdir(directory, 0755)==-1){ + fprintf(stderr,"Could not mkdir %s\n",directory); + free(data); + return 0; + } + } + + if(chdir(directory)==-1){ + fprintf(stderr,"Could not chdir to %s\n",directory); + free(data); + return 0; + } + + uint16_t contentcount = be16(data + tmdpos + 0x1de); + + if (verbose) { + printf("Writing cert.cert.\n"); + } + FILE* outfile = fopen("cert.cert", "wb"); + if(!outfile){ + perror("Could not open cert.cert for writing\n"); + free(data); + return 0; + } + + fwrite(data + certpos, 1, certsize, outfile); + if(ferror(outfile)){ + perror("Could not write to cert.cert\n"); + free(data); + return 0; + } + fclose(outfile); + + if (verbose) { + printf("Writing ticket.tik.\n"); + } + outfile = fopen("ticket.tik", "wb"); + if(!outfile){ + perror("Could not open ticket.tik for writing.\n"); + free(data); + return 0; + } + fwrite(data + tikpos, 1, tiksize, outfile); + if(ferror(outfile)){ + perror("Could not write to ticket.tik\n"); + free(data); + return 0; + } + fclose(outfile); + + if (verbose) { + printf("Writing metadata.tmd.\n"); + } + outfile = fopen("metadata.tmd", "wb"); + if(!outfile){ + perror("Could not open metadata.tmd for writing\n"); + free(data); + return 0; + } + fwrite(data + tmdpos, 1, tmdsize, outfile); + if(ferror(outfile)){ + perror("Could not write to metadata.tmd\n"); + free(data); + return 0; + } + fclose(outfile); + + if(verbose){ + printf("Writing footer.bin\n"); + } + outfile = fopen("footer.bin","wb"); + if(!outfile){ + perror("Could not open footer.bin for writing.\n"); + free(data); + return 0; + } + fwrite(data + footerpos, 1, footersize, outfile); + if(ferror(outfile)){ + perror("Could not write to footer.bin\n"); + free(data); + return 0; + } + fclose(outfile); + + uint8_t encryptedkey[16], iv[16]; + + uint8_t i, j; + for (i = 0; i < 16; i++) { + encryptedkey[i] = data[tikpos + 0x1bf + i]; + } + for (i = 0; i < 8; i++) { + iv[i] = data[tikpos + 0x1dc + i]; + iv[i + 8] = 0x00; + } + ; + do_decrypt(encryptedkey, 16, key, iv); + + for (j = 2; j < 16; j++) iv[j] = 0x00; + + uint8_t *contentpos = data + datapos; + + for (i = 0; i < contentcount; i++) { + + iv[0] = data[tmdpos + 0x1e8 + (0x24 * i)]; + iv[1] = data[tmdpos + 0x1e9 + (0x24 * i)]; + + uint32_t size = addpadding(getcontentlength(data + tmdpos, i), 16); + + if (verbose) { + printf("Decrypting contents %d.\n", i); + } + + do_decrypt(contentpos, size, encryptedkey, iv); + + // Main rom content file + if (i == content_num) { + if (verbose) { + printf("Extracting content %d uint8_t Archive.\n",content_num); + } + char dbuf[100]; + snprintf(dbuf,100,"content%d",content_num); + if(!extract_u8_archive(contentpos,dbuf)){ + perror("Could not extract u8 archive"); + free(data); + return 0; + } + } + + char contentname[100]; + snprintf(contentname, 100, "content%d.app", i); + if (verbose) { + printf("Writing %s.\n", contentname); + } + outfile = fopen(contentname, "wb"); + if(!outfile){ + fprintf(stderr,"Could not open %s for writing\n",contentname); + free(data); + return 0; + } + fwrite(contentpos, 1, getcontentlength(data + tmdpos, i), outfile); + if(ferror(outfile)){ + fprintf(stderr,"Could not write to %s\n",contentname); + free(data); + return 0; + } + fclose(outfile); + contentpos += addpadding(size, 64); + } + chdir(".."); + free(data); + return 1; +} + +static int apply_dol_patch(const char *dol_file, uint32_t loading_address, uint8_t **data, uint32_t *size){ + if(verbose){ + printf("Injecting dol file %s\n",dol_file); + } + struct stat sbuffer; + chdir(workingdirectory); + doltool_ctxt_t *dolctxt = calloc(1,sizeof(*dolctxt)); + if(!dolctxt){ + perror("Could not create dol ctxt"); + errno = ENOMEM; + return -1; + } + dol_load(dolctxt,data,size); + FILE *inject_file = fopen(dol_file,"rb"); + if(!inject_file){ + free(dolctxt); + perror(dol_file); + errno = ENOENT; + return -1; + } + stat(dol_file,&sbuffer); + uint8_t *inject_data = malloc(sbuffer.st_size); + fread(inject_data,1,sbuffer.st_size,inject_file); + fclose(inject_file); + dol_inject(dolctxt,inject_data,sbuffer.st_size,loading_address); + dol_save(dolctxt); + free(dolctxt); + free(inject_data); + chdir(directory); + return 0; +} + +static int do_pack() { + DIR *testdir = opendir(directory); + if (testdir) { + closedir(testdir); + } + else { + fprintf(stderr,"%s doesn't exist, or is not a directory!\n", directory); + return 0; + } + + if (verbose) { + printf("Packing %s into %s\n", directory, wad); + } + if(chdir(directory)==-1){ + fprintf(stderr,"Could not change directory to %s",directory); + return 0; + } + + if (verbose) { + printf("Gathering WAD Header Information\n"); + } + + struct stat sbuffer; + if(stat("cert.cert", &sbuffer)!=0){ + perror("Could not stat cert.cert\n"); + return 0; + } + uint32_t certsize = sbuffer.st_size; + + if(stat("ticket.tik", &sbuffer)!=0){ + perror("Could not stat ticket.tik\n"); + return 0; + } + uint32_t tiksize = sbuffer.st_size; + + if(stat("metadata.tmd", &sbuffer)!=0){ + perror("Could not stat metadata.tmd\n"); + return 0; + } + uint32_t tmdsize = sbuffer.st_size; + + if (verbose) { + printf("Reading cert.cert\n"); + } + FILE *infile = fopen("cert.cert", "rb"); + if(!infile){ + perror("Could not open cert.cert for reading\n"); + return 0; + } + uint8_t *cert = calloc(addpadding(certsize, 64), sizeof(uint8_t)); + if(!cert){ + fprintf(stderr,"Could not allocate %d bytes for cert\n",certsize); + return 0; + } + int bytesread = fread(cert, 1, certsize, infile); + if(bytesread!=certsize || ferror(infile)){ + perror("Error reading from cert.cert\n"); + free(cert); + return 0; + } + fclose(infile); + + if (verbose) { + printf("Reading ticket.cert\n"); + } + infile = fopen("ticket.tik", "rb"); + if(!infile){ + perror("Could not open ticket.tik for reading\n"); + free(cert); + return 0; + } + uint8_t *tik = calloc(addpadding(tiksize, 64), sizeof(uint8_t)); + if(!tik){ + fprintf(stderr,"Could not allocate %d bytes for ticket\n",tiksize); + free(cert); + return 0; + } + bytesread = fread(tik, 1, tiksize, infile); + if(bytesread!=tiksize || ferror(infile)){ + perror("Error reading from ticket.tik\n"); + free(cert); + free(tik); + return 0; + } + fclose(infile); + + if (verbose) { + printf("Reading metadata.tmd\n"); + } + infile = fopen("metadata.tmd", "rb"); + if(!infile){ + perror("Could not open metadata.tmd for reading\n"); + free(cert); + free(tik); + return 0; + } + uint8_t *tmd = calloc(addpadding(tmdsize, 64), sizeof(uint8_t)); + if(!tmd){ + fprintf(stderr,"Could not allocate %d bytes for tmd\n",tmdsize); + free(cert); + free(tik); + return 0; + } + bytesread = fread(tmd, 1, tmdsize, infile); + if(bytesread!=tmdsize || ferror(infile)){ + perror("Error reading from tmddata.tmd\n"); + free(cert); + free(tik); + free(tmd); + return 0; + } + fclose(infile); + + if (verbose) { + printf("Generating Footer signature\n"); + } + char footer[0x40] = {0}; + sprintf(footer,"gzinject v%s https://github.com/krimtonz/gzinject", GZINJECT_VERSION); + uint32_t footersize = 0x40; + + // Build Content5 into a .app file first + char dbuf[100], nbuf[100] = {0}; + snprintf(dbuf,100,"content%d",content_num); + strcpy(nbuf,dbuf); + strcat(nbuf,".app"); + if(verbose){ + printf("Generating %s u8 archive\n",nbuf); + } + + int content5len = create_u8_archive(dbuf,nbuf); + if(!content5len){ + fprintf(stderr,"Could not create u8 archive from %s into %s\n",dbuf,nbuf); + free(cert); + free(tik); + free(tmd); + return 0; + } + chdir(workingdirectory); + chdir(directory); + if (verbose) { + printf("Modifying content metadata in the TMD\n"); + } + uint16_t contentsc = be16(tmd + 0x1DE); + int i; + + char cfname[100]; + uint8_t **fileptrs = malloc(sizeof(*fileptrs) * contentsc); + if(!fileptrs){ + perror("Could not allocate filepointers.\n"); + free(cert); + free(tik); + free(tmd); + return 0; + } + uint32_t *filesizes = malloc(sizeof(*filesizes) * contentsc); + if(!filesizes){ + perror("Could not allocate filesizes\n"); + free(cert); + free(tik); + free(tmd); + free(fileptrs); + return 0; + } + + for (i = 0; i < contentsc; i++) { + snprintf(cfname, 30, "content%d.app", i); + stat(cfname, &sbuffer); + filesizes[i] = addpadding(sbuffer.st_size,16); + fileptrs[i] = calloc(filesizes[i],1); + if(!fileptrs[i]){ + fprintf(stderr,"Could not allocate %ld bytes for %s\n",sbuffer.st_size,cfname); + goto error; + } + infile = fopen(cfname,"rb"); + if(!infile){ + fprintf(stderr,"Could not open %s for reading\n",cfname); + goto error; + } + bytesread = fread(fileptrs[i],1,sbuffer.st_size,infile); + if(bytesread!=sbuffer.st_size || ferror(infile)){ + fprintf(stderr,"Error reading from %s\n",cfname); + goto error; + } + fclose(infile); + setcontentlength(tmd,i,filesizes[i]); + } + + int patch_idx = 0; + int dol_applied = 0; + if(dol_after>=101) dol_after-=101; + + while(patch){ + if(verbose){ + printf("Applying %s gzi patches\n",patch->filename); + } + + if(chdir(workingdirectory)!=0){ + fprintf(stderr,"Could not change directory to %s",workingdirectory); + } + gzi_ctxt_t gzi; + if(!gzi_init(&gzi,fileptrs,filesizes,contentsc,tmd,tik,cert,&tmdsize,&tiksize,&certsize)){ + perror("Could not initialize patch file"); + goto error; + + } + if(!gzi_parse_file(&gzi,patch->filename)){ + perror("Could not parse gzi patch file"); + goto error; + } + if(!gzi_run(&gzi)){ + perror("Could not run gzi patch file"); + goto error; + } + if(chdir(directory)!=0){ + fprintf(stderr,"Could not change directory to %s",directory); + goto error; + } + + for(int i=0;inext; + free(old_patch); + if(dol_after == patch_idx){ + while(dol && dol_loading){ + if (apply_dol_patch(dol->filename,dol_loading->loading_address,&fileptrs[1],&filesizes[1]) != 0) { + fprintf(stderr, "Could not inject dol patch\n"); + goto error; + } + dol_list_t *old_dol = dol; + dol = dol->next; + free(old_dol); + dol_loading_list_t *old_loading = dol_loading; + dol_loading = dol_loading->next; + free(old_loading); + } + dol_applied = 1; + setcontentlength(tmd,1,filesizes[1]); + } + patch_idx++; + } + + if(!dol_applied && dol && dol_loading){ + while(dol && dol_loading){ + if (apply_dol_patch(dol->filename,dol_loading->loading_address,&fileptrs[1],&filesizes[1]) != 0) { + fprintf(stderr, "Could not inject dol patch\n"); + goto error; + } + dol_list_t *old_dol = dol; + dol = dol->next; + free(old_dol); + dol_loading_list_t *old_loading = dol_loading; + dol_loading = dol_loading->next; + free(old_loading); + } + setcontentlength(tmd,1,filesizes[1]); + } + + // Change Title ID + if (titleid != NULL) { + if (verbose) { + printf("Changing Channel ID\n"); + } + memcpy(tik + 0x1e0, titleid, 4); + memcpy(tmd + 0x190, titleid, 4); + } + + if (verbose) { + printf("Changing region in the TMD\n"); + } + // Change the Region + tmd[0x19d] = region; + + if (verbose) { + printf("Changing encryption key in the ticket\n"); + } + // New key + memcpy(tik + 0x1bf, &newkey, 16); + + //Decrypt the new key + uint8_t newenc[16]; + uint8_t iv[16]; + + for (i = 0; i < 16; i++) { + newenc[i] = *(tik + 0x1bf + i); + } + for (i = 0; i < 8; i++) { + iv[i] = *(tik + 0x1dc + i); + iv[i + 8] = 0x00; + } + + do_decrypt(newenc, 16, key, iv); + + int j; + + for (j = 2; j < 15; j++) { + iv[j] = 0x00; + } + + for (i = 0; i < contentsc; i++) { + uint8_t *contents = fileptrs[i]; + + if (i == 0) { + if (channelname != NULL) { + if (verbose) { + printf("Changing the Channel Name in content0.app\n"); + } + + uint16_t imetpos = -1; + for (j = 0; j < 400; j++) { + if (strcmp((char*)(contents + j),"IMET")==0) { + imetpos = j; + break; + } + } + if(imetpos!=-1){ + uint16_t count = 0; + size_t cnamelen = strlen(channelname); + char namebuf[40] = {0}; + for(j=0,count=0;count0){ + fwrite(&padding,1,padcnt,outwadfile); + if(ferror(outwadfile)){ + perror("Could not write write content padding\n"); + goto error; + } + } + } + if (verbose) { + printf("Writing footer\n"); + } + fwrite(footer, 1, 0x40, outwadfile); + if(ferror(outwadfile)){ + perror("Could not write footer\n"); + goto error; + } + fclose(outwadfile); + + + free(cert); + free(tik); + free(tmd); + for(i=0;ifilename = optarg; + new_patch->next = NULL; + *patch_link = new_patch; + patch_link = &new_patch->next; + break; + } + case 'c': + content_num = optarg[0] - 0x30; + if(content_num<0 || content_num>9) content_num=5; + break; + case 'f': + { + dol_list_t *new_dol = malloc(sizeof(*new_dol)); + if(new_dol == NULL){ + perror("Could not allocate dol list"); + exit(1); + } + new_dol->filename = optarg; + new_dol->next = NULL; + *dol_link = new_dol; + dol_link = &new_dol->next; + break; + } + case 'l':{ + char loading_address[10]; + sscanf(optarg,"%9s",loading_address); + uint32_t addr; + sscanf(loading_address,"%"SCNx32,&addr); + dol_loading_list_t *new_dol_loading = malloc(sizeof(*new_dol_loading)); + if(new_dol_loading == NULL){ + perror("Could not allocate dol loading address."); + exit(1); + } + new_dol_loading->loading_address = addr; + new_dol_loading->next = NULL; + *dol_loading_link = new_dol_loading; + dol_loading_link = &new_dol_loading->next; + break; + } + case 'e': { + char dol_after_str[10]; + sscanf(optarg, "%s", dol_after_str); + sscanf(dol_after_str, "%"SCNu32, &dol_after); + break; + } + default: + break; + } + + } + + if (action == NULL) { + print_usage(); + exit(1); + } + + if(strcmp(action, "romc") == 0){ + romc(); + return 0; + } + + if (strcmp(action, "genkey") == 0){ + genkey(); + return 0; + } + + if (strcmp(action, "extract") != 0 && strcmp(action, "pack") != 0 && strcmp(action, "inject") != 0) { + print_usage(); + exit(1); + } + + if (wad == NULL) { + print_usage(); + exit(1); + } + + if (directory == NULL) directory = "wadextract"; + + struct stat sbuffer; + if (keyfile == NULL) { + if (stat("key.bin", &sbuffer) == 0) { + keyfile = "key.bin"; + } + else if (stat("common-key.bin", &sbuffer) == 0) { + keyfile = "common-key.bin"; + } + else { + printf("Cannot find key.bin or common-key.bin.\n"); + exit(1); + } + } + else { + if (stat(keyfile, &sbuffer) != 0) { + printf("Cannot find keyfile specified.\n"); + exit(1); + } + } + + FILE *fkeyfile = fopen(keyfile, "rb"); + if(!fkeyfile){ + perror("Could not open keyfile"); + exit(1); + } + + fread(&key, 1, 16, fkeyfile); + if(ferror(fkeyfile)){ + perror("Could not read from keyfile."); + exit(1); + } + fclose(fkeyfile); + + workingdirectory = malloc(200); + if(!workingdirectory){ + perror("Could not allocate for working directory"); + exit(1); + } + workingdirectory = getcwd(workingdirectory, 200); + + if (strcmp(action, "extract") == 0) { + if(!do_extract()){ + exit(1); + } + } + else if (strcmp(action, "pack") == 0) { + if(!do_pack()){ + exit(1); + } + } + else if (strcmp(action, "inject") == 0) { + if (rom == NULL) { + printf("-a inject specified, but no rom to inject\n"); + free(workingdirectory); + exit(1); + + } + if(!do_extract()){ + perror("Could not extract wad\n"); + free(workingdirectory); + exit(1); + } + + if (verbose) { + printf("Copying %s to %s/content%d/rom\n", rom, directory,content_num); + } + FILE *from = fopen(rom, "rb"); + fseek(from, 0, SEEK_END); + size_t fromlen = ftell(from); + fseek(from, 0, SEEK_SET); + uint8_t *inrom = malloc(fromlen); + if(!inrom){ + perror("could not allocate input rom\n"); + free(workingdirectory); + exit(1); + } + fread(inrom, 1, fromlen, from); + fclose(from); + + char *orom = malloc(200); + if(!orom){ + perror("Could not allocate output rom name\n"); + free(workingdirectory); + free(inrom); + exit(1); + } + snprintf(orom, 200, "%s/content%d/rom", directory,content_num); + from = fopen(orom, "wb"); + fwrite(inrom, 1, fromlen, from); + fclose(from); + free(inrom); + free(orom); + + + char *wadname = removeext(wad), + *outname = malloc(strlen(wadname) + 12); + if(!outname){ + perror("could not allocate for output wad name\n"); + free(workingdirectory); + exit(1); + } + sprintf(outname, "%s-inject.wad", wadname); + free(wadname); + if (outwad == NULL) { + wad = outname; + } + else { + wad = outwad; + } + + if(!do_pack()){ + perror("Could not pack wad\n"); + free(outname); + free(workingdirectory); + exit(1); + } + free(outname); + } + + free(workingdirectory); + return 0; +} diff --git a/tools/gzinject/src/gzinject.h b/tools/gzinject/src/gzinject.h new file mode 100644 index 000000000..493557caf --- /dev/null +++ b/tools/gzinject/src/gzinject.h @@ -0,0 +1,50 @@ +#ifndef _GZINJECT_H_ +#define _GZINJECT_H_ + +#include +#include + +#define REVERSEENDIAN32(X) (((X) >> 24) & 0xff) | (((X)<<8) & 0xFF0000) | (((X) >> 8) & 0xff00) | (((X)<<24) & 0xff000000) +#define REVERSEENDIAN16(X) (((X)>>8) & 0xff) | (((X)<<8) & 0xFF00) + +#define W_TIK 0x00 +#define W_TMD 0x01 +#define GZINJECT_VERSION "0.3.3" + +#if _WIN32 +#define mkdir(X,Y) mkdir(X) +#define getcwd(X,Y) _getcwd(X,Y) +#endif + +typedef enum{ + FILE_DIRECTORY, + FILE_NORMAL +}filetype_t; + +typedef struct patch_list patch_list_t; +struct patch_list { + const char *filename; + patch_list_t *next; +}; + +typedef struct dol_list dol_list_t; +struct dol_list{ + const char *filename; + dol_list_t *next; +}; + +typedef struct dol_loading_list dol_loading_list_t; +struct dol_loading_list{ + uint32_t loading_address; + dol_loading_list_t *next; +}; + + uint16_t be16(const uint8_t *p); + + uint32_t be32(const uint8_t *p); + +uint32_t addpadding(uint32_t inp, uint32_t padding); + +extern int verbose; + +#endif diff --git a/tools/gzinject/src/lz77.c b/tools/gzinject/src/lz77.c new file mode 100644 index 000000000..5f9063ab3 --- /dev/null +++ b/tools/gzinject/src/lz77.c @@ -0,0 +1,175 @@ +#include +#include +#include +#include "lz77.h" +#include "gzinject.h" + +int lz77_compressed_length(uint8_t *src){ + if(*src!=0x10){ + return -1; + } + uint32_t size = *(uint32_t*)src >> 8; + uint32_t pos = 0; + int idx = 4; +cloop: + while(pos>4 & 0xF) + 3; + if(((src[idx] & 0xF)<<8) + (src[idx + 1] + 1) <= pos){ + pos+=n; + idx+=2; + }else{ + break; + } + }else{ + pos++; + idx++; + } + flags <<= 1; + lab++; + }else{ + goto cloop; + } + + } + return -1; + } + if(idx%4!=0){ + idx+=4-idx%4; + } + return idx; +} + +int lz77_decompressed_size(uint8_t *source){ + return *(uint32_t*)(source + 1); +} + +int lz77_decompress(uint8_t *src, uint8_t *dest){ + if(*src++ != 0x10){ + return -1; + } + int index1 = 0; + int num1 = src[0] + (src[1] << 8) + (src[2]<<16); + src+=3; + while(index1>4); + int num4 = 1 + ((src[0] & 0xF) << 8) + src[1]; + src+=2; + if(num4>num1){ + return -1; + } + for(int index3 = 0;index3=len){ + dest[0] = -1; + dest[1] = 0; + return; + } + if(pos<2 || len-pos<2){ + dest[0] = 0; + dest[1] = 0; + return; + } + + int didx = 0; + for(int index = 1; index<0x1000 && index=0;--index){ + if(source[pos+num]!=source[pos-d[index] + num % d[index]]){ + if(didx>1){ + memmove((void*)d + (sizeof(int) * index),(void*)d + (sizeof(int) * (index+1)),sizeof(int) * (didx - index - 1)); + didx--; + }else{ + flag = 0; + } + } + } + } + dest[0] = num; + dest[1] = d[0]; +} + +int lz77_compress(uint8_t *src, uint8_t **dest, uint32_t len, uint32_t *lenp){ + int pos = 0; + int cpos = 0; + uint8_t *comp = calloc(len,1); + comp[cpos++] = 0x10; + uint8_t *cp = (uint8_t*)lenp; + for(int index=0;index<3;++index){ + comp[cpos++]=*(uint8_t*)cp++; + } + int d[2]; + int dbuf[0x4000]; + while(pos 2){ + uint8_t num2 = ((((d[0] - 3) & 0xF) << 4) + ((d[1] - 1) >> 8 & 0xF)); + comp2[bpos++] = num2; + uint8_t num3 = (d[1] - 1) & 0xFF; + comp2[bpos++] = num3; + pos+=d[0]; + num1 |= 1 << (8 - (index+1)); + }else if(d[0]>=0){ + comp2[bpos++] = src[pos++]; + }else{ + break; + } + } + comp[cpos++] = num1; + for(int i=0;i +#include + +int lz77_compressed_length(uint8_t *source); +int lz77_decompress(uint8_t *source, uint8_t *dest); +int lz77_decompressed_size(uint8_t *source); +int lz77_compress(uint8_t *src, uint8_t **dest, uint32_t len, uint32_t *intp); + +#endif \ No newline at end of file diff --git a/tools/gzinject/src/md5.c b/tools/gzinject/src/md5.c new file mode 100644 index 000000000..e0affaaf0 --- /dev/null +++ b/tools/gzinject/src/md5.c @@ -0,0 +1,291 @@ +/* +* This is an OpenSSL-compatible implementation of the RSA Data Security, Inc. +* MD5 Message-Digest Algorithm (RFC 1321). +* +* Homepage: +* http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5 +* +* Author: +* Alexander Peslyak, better known as Solar Designer +* +* This software was written by Alexander Peslyak in 2001. No copyright is +* claimed, and the software is hereby placed in the public domain. +* In case this attempt to disclaim copyright and place the software in the +* public domain is deemed null and void, then the software is +* Copyright (c) 2001 Alexander Peslyak and it is hereby released to the +* general public under the following terms: +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted. +* +* There's ABSOLUTELY NO WARRANTY, express or implied. +* +* (This is a heavily cut-down "BSD license".) +* +* This differs from Colin Plumb's older public domain implementation in that +* no exactly 32-bit integer data type is required (any 32-bit or wider +* unsigned integer data type will do), there's no compile-time endianness +* configuration, and the function prototypes match OpenSSL's. No code from +* Colin Plumb's implementation has been reused; this comment merely compares +* the properties of the two independent implementations. +* +* The primary goals of this implementation are portability and ease of use. +* It is meant to be fast, but not as fast as possible. Some known +* optimizations are not included to reduce source code size and avoid +* compile-time configuration. +*/ + +#ifndef HAVE_OPENSSL + +#include + +#include "md5.h" + +/* +* The basic MD5 functions. +* +* F and G are optimized compared to their RFC 1321 definitions for +* architectures that lack an AND-NOT instruction, just like in Colin Plumb's +* implementation. +*/ +#define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z)))) +#define G(x, y, z) ((y) ^ ((z) & ((x) ^ (y)))) +#define H(x, y, z) (((x) ^ (y)) ^ (z)) +#define H2(x, y, z) ((x) ^ ((y) ^ (z))) +#define I(x, y, z) ((y) ^ ((x) | ~(z))) + +/* +* The MD5 transformation for all four rounds. +*/ +#define STEP(f, a, b, c, d, x, t, s) \ + (a) += f((b), (c), (d)) + (x) + (t); \ + (a) = (((a) << (s)) | (((a) & 0xffffffff) >> (32 - (s)))); \ + (a) += (b); + +/* +* SET reads 4 input bytes in little-endian byte order and stores them in a +* properly aligned word in host byte order. +* +* The check for little-endian architectures that tolerate unaligned memory +* accesses is just an optimization. Nothing will break if it fails to detect +* a suitable architecture. +* +* Unfortunately, this optimization may be a C strict aliasing rules violation +* if the caller's data buffer has effective type that cannot be aliased by +* MD5_u32plus. In practice, this problem may occur if these MD5 routines are +* inlined into a calling function, or with future and dangerously advanced +* link-time optimizations. For the time being, keeping these MD5 routines in +* their own translation unit avoids the problem. +*/ +#if defined(__i386__) || defined(__x86_64__) || defined(__vax__) +#define SET(n) \ + (*(MD5_u32plus *)&ptr[(n) * 4]) +#define GET(n) \ + SET(n) +#else +#define SET(n) \ + (ctx->block[(n)] = \ + (MD5_u32plus)ptr[(n) * 4] | \ + ((MD5_u32plus)ptr[(n) * 4 + 1] << 8) | \ + ((MD5_u32plus)ptr[(n) * 4 + 2] << 16) | \ + ((MD5_u32plus)ptr[(n) * 4 + 3] << 24)) +#define GET(n) \ + (ctx->block[(n)]) +#endif + +/* +* This processes one or more 64-byte data blocks, but does NOT update the bit +* counters. There are no alignment requirements. +*/ +static const void *body(MD5_CTX *ctx, const void *data, unsigned long size) +{ + const unsigned char *ptr; + MD5_u32plus a, b, c, d; + MD5_u32plus saved_a, saved_b, saved_c, saved_d; + + ptr = (const unsigned char *)data; + + a = ctx->a; + b = ctx->b; + c = ctx->c; + d = ctx->d; + + do { + saved_a = a; + saved_b = b; + saved_c = c; + saved_d = d; + + /* Round 1 */ + STEP(F, a, b, c, d, SET(0), 0xd76aa478, 7) + STEP(F, d, a, b, c, SET(1), 0xe8c7b756, 12) + STEP(F, c, d, a, b, SET(2), 0x242070db, 17) + STEP(F, b, c, d, a, SET(3), 0xc1bdceee, 22) + STEP(F, a, b, c, d, SET(4), 0xf57c0faf, 7) + STEP(F, d, a, b, c, SET(5), 0x4787c62a, 12) + STEP(F, c, d, a, b, SET(6), 0xa8304613, 17) + STEP(F, b, c, d, a, SET(7), 0xfd469501, 22) + STEP(F, a, b, c, d, SET(8), 0x698098d8, 7) + STEP(F, d, a, b, c, SET(9), 0x8b44f7af, 12) + STEP(F, c, d, a, b, SET(10), 0xffff5bb1, 17) + STEP(F, b, c, d, a, SET(11), 0x895cd7be, 22) + STEP(F, a, b, c, d, SET(12), 0x6b901122, 7) + STEP(F, d, a, b, c, SET(13), 0xfd987193, 12) + STEP(F, c, d, a, b, SET(14), 0xa679438e, 17) + STEP(F, b, c, d, a, SET(15), 0x49b40821, 22) + + /* Round 2 */ + STEP(G, a, b, c, d, GET(1), 0xf61e2562, 5) + STEP(G, d, a, b, c, GET(6), 0xc040b340, 9) + STEP(G, c, d, a, b, GET(11), 0x265e5a51, 14) + STEP(G, b, c, d, a, GET(0), 0xe9b6c7aa, 20) + STEP(G, a, b, c, d, GET(5), 0xd62f105d, 5) + STEP(G, d, a, b, c, GET(10), 0x02441453, 9) + STEP(G, c, d, a, b, GET(15), 0xd8a1e681, 14) + STEP(G, b, c, d, a, GET(4), 0xe7d3fbc8, 20) + STEP(G, a, b, c, d, GET(9), 0x21e1cde6, 5) + STEP(G, d, a, b, c, GET(14), 0xc33707d6, 9) + STEP(G, c, d, a, b, GET(3), 0xf4d50d87, 14) + STEP(G, b, c, d, a, GET(8), 0x455a14ed, 20) + STEP(G, a, b, c, d, GET(13), 0xa9e3e905, 5) + STEP(G, d, a, b, c, GET(2), 0xfcefa3f8, 9) + STEP(G, c, d, a, b, GET(7), 0x676f02d9, 14) + STEP(G, b, c, d, a, GET(12), 0x8d2a4c8a, 20) + + /* Round 3 */ + STEP(H, a, b, c, d, GET(5), 0xfffa3942, 4) + STEP(H2, d, a, b, c, GET(8), 0x8771f681, 11) + STEP(H, c, d, a, b, GET(11), 0x6d9d6122, 16) + STEP(H2, b, c, d, a, GET(14), 0xfde5380c, 23) + STEP(H, a, b, c, d, GET(1), 0xa4beea44, 4) + STEP(H2, d, a, b, c, GET(4), 0x4bdecfa9, 11) + STEP(H, c, d, a, b, GET(7), 0xf6bb4b60, 16) + STEP(H2, b, c, d, a, GET(10), 0xbebfbc70, 23) + STEP(H, a, b, c, d, GET(13), 0x289b7ec6, 4) + STEP(H2, d, a, b, c, GET(0), 0xeaa127fa, 11) + STEP(H, c, d, a, b, GET(3), 0xd4ef3085, 16) + STEP(H2, b, c, d, a, GET(6), 0x04881d05, 23) + STEP(H, a, b, c, d, GET(9), 0xd9d4d039, 4) + STEP(H2, d, a, b, c, GET(12), 0xe6db99e5, 11) + STEP(H, c, d, a, b, GET(15), 0x1fa27cf8, 16) + STEP(H2, b, c, d, a, GET(2), 0xc4ac5665, 23) + + /* Round 4 */ + STEP(I, a, b, c, d, GET(0), 0xf4292244, 6) + STEP(I, d, a, b, c, GET(7), 0x432aff97, 10) + STEP(I, c, d, a, b, GET(14), 0xab9423a7, 15) + STEP(I, b, c, d, a, GET(5), 0xfc93a039, 21) + STEP(I, a, b, c, d, GET(12), 0x655b59c3, 6) + STEP(I, d, a, b, c, GET(3), 0x8f0ccc92, 10) + STEP(I, c, d, a, b, GET(10), 0xffeff47d, 15) + STEP(I, b, c, d, a, GET(1), 0x85845dd1, 21) + STEP(I, a, b, c, d, GET(8), 0x6fa87e4f, 6) + STEP(I, d, a, b, c, GET(15), 0xfe2ce6e0, 10) + STEP(I, c, d, a, b, GET(6), 0xa3014314, 15) + STEP(I, b, c, d, a, GET(13), 0x4e0811a1, 21) + STEP(I, a, b, c, d, GET(4), 0xf7537e82, 6) + STEP(I, d, a, b, c, GET(11), 0xbd3af235, 10) + STEP(I, c, d, a, b, GET(2), 0x2ad7d2bb, 15) + STEP(I, b, c, d, a, GET(9), 0xeb86d391, 21) + + a += saved_a; + b += saved_b; + c += saved_c; + d += saved_d; + + ptr += 64; + } while (size -= 64); + + ctx->a = a; + ctx->b = b; + ctx->c = c; + ctx->d = d; + + return ptr; +} + +void MD5_Init(MD5_CTX *ctx) +{ + ctx->a = 0x67452301; + ctx->b = 0xefcdab89; + ctx->c = 0x98badcfe; + ctx->d = 0x10325476; + + ctx->lo = 0; + ctx->hi = 0; +} + +void MD5_Update(MD5_CTX *ctx, const void *data, unsigned long size) +{ + MD5_u32plus saved_lo; + unsigned long used, available; + + saved_lo = ctx->lo; + if ((ctx->lo = (saved_lo + size) & 0x1fffffff) < saved_lo) + ctx->hi++; + ctx->hi += size >> 29; + + used = saved_lo & 0x3f; + + if (used) { + available = 64 - used; + + if (size < available) { + memcpy(&ctx->buffer[used], data, size); + return; + } + + memcpy(&ctx->buffer[used], data, available); + data = (const unsigned char *)data + available; + size -= available; + body(ctx, ctx->buffer, 64); + } + + if (size >= 64) { + data = body(ctx, data, size & ~(unsigned long)0x3f); + size &= 0x3f; + } + + memcpy(ctx->buffer, data, size); +} + +#define OUT(dst, src) \ + (dst)[0] = (unsigned char)(src); \ + (dst)[1] = (unsigned char)((src) >> 8); \ + (dst)[2] = (unsigned char)((src) >> 16); \ + (dst)[3] = (unsigned char)((src) >> 24); + +void MD5_Final(unsigned char *result, MD5_CTX *ctx) +{ + unsigned long used, available; + + used = ctx->lo & 0x3f; + + ctx->buffer[used++] = 0x80; + + available = 64 - used; + + if (available < 8) { + memset(&ctx->buffer[used], 0, available); + body(ctx, ctx->buffer, 64); + used = 0; + available = 64; + } + + memset(&ctx->buffer[used], 0, available - 8); + + ctx->lo <<= 3; + OUT(&ctx->buffer[56], ctx->lo) + OUT(&ctx->buffer[60], ctx->hi) + + body(ctx, ctx->buffer, 64); + + OUT(&result[0], ctx->a) + OUT(&result[4], ctx->b) + OUT(&result[8], ctx->c) + OUT(&result[12], ctx->d) + + memset(ctx, 0, sizeof(*ctx)); +} + +#endif \ No newline at end of file diff --git a/tools/gzinject/src/md5.h b/tools/gzinject/src/md5.h new file mode 100644 index 000000000..f51d33e6d --- /dev/null +++ b/tools/gzinject/src/md5.h @@ -0,0 +1,45 @@ +/* +* This is an OpenSSL-compatible implementation of the RSA Data Security, Inc. +* MD5 Message-Digest Algorithm (RFC 1321). +* +* Homepage: +* http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5 +* +* Author: +* Alexander Peslyak, better known as Solar Designer +* +* This software was written by Alexander Peslyak in 2001. No copyright is +* claimed, and the software is hereby placed in the public domain. +* In case this attempt to disclaim copyright and place the software in the +* public domain is deemed null and void, then the software is +* Copyright (c) 2001 Alexander Peslyak and it is hereby released to the +* general public under the following terms: +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted. +* +* There's ABSOLUTELY NO WARRANTY, express or implied. +* +* See md5.c for more information. +*/ + +#ifdef HAVE_OPENSSL +#include +#elif !defined(_MD5_H) +#define _MD5_H + +/* Any 32-bit or wider unsigned integer data type will do */ +typedef unsigned int MD5_u32plus; + +typedef struct { + MD5_u32plus lo, hi; + MD5_u32plus a, b, c, d; + unsigned char buffer[64]; + MD5_u32plus block[16]; +} MD5_CTX; + +extern void MD5_Init(MD5_CTX *ctx); +extern void MD5_Update(MD5_CTX *ctx, const void *data, unsigned long size); +extern void MD5_Final(unsigned char *result, MD5_CTX *ctx); + +#endif \ No newline at end of file diff --git a/tools/gzinject/src/romchu.c b/tools/gzinject/src/romchu.c new file mode 100644 index 000000000..aa37bb9cc --- /dev/null +++ b/tools/gzinject/src/romchu.c @@ -0,0 +1,543 @@ +#include +#include +#include +#include +#include + +/* romchu 0.6 */ +/* a decompressor for type 2 romc */ +/* reversed by hcs from the Wii VC wad for Super Smash Bros EU. */ +/* this code is public domain, have at it */ +/* Taken from https://forum.xentax.com/viewtopic.php?t=5364 */ + +#define VERSION "0.6" + +struct bitstream; + +struct bitstream *init_bitstream(const unsigned char *pool, unsigned long pool_size); +uint32_t get_bits(struct bitstream *bs, int bits); +int bitstream_eof(struct bitstream *bs); +void free_bitstream(struct bitstream *bs); + +struct huftable; + +struct huftable *load_table(struct bitstream *bs, int symbols); +int huf_lookup(struct bitstream *bs, struct huftable *ht); +void free_table(struct huftable *); + +struct { + unsigned int bits; + unsigned int base; +} backref_len[0x1D], backref_disp[0x1E]; + +uint8_t *romchu_decompress(uint8_t *compressed, size_t comp_size, size_t *decomp_size){ + + unsigned char head_buf[4]; + unsigned char payload_buf[0x10000]; + int block_count = 0; + long out_offset = 0; + uint8_t *decompressed; + + uint64_t nominal_size; + int romc_type; + uint8_t *comp = compressed; + // read header + { + memcpy(head_buf,compressed,4); + nominal_size = head_buf[0]; + nominal_size *= 0x100; + nominal_size |= head_buf[1]; + nominal_size *= 0x100; + nominal_size |= head_buf[2]; + nominal_size *= 0x40; + nominal_size |= head_buf[3]>>2; + romc_type = head_buf[3]&0x3; + decompressed = malloc(nominal_size); + if(decomp_size) *decomp_size = nominal_size; + if (!decompressed) + { + perror("malloc big outbuf buffer"); + return NULL; + } + + switch(romc_type) { + case 0: + memcpy(decompressed, compressed + 4, *decomp_size); + return decompressed; + case 2: + break; + default: + fprintf(stderr, "Unsupported romc type. %d\n", romc_type); + return NULL; + + } + } + + // initialize backreference lookup tables + { + for (unsigned int i = 0; i < 8; i++) + { + backref_len[i].bits = 0; + backref_len[i].base = i; + } + + for (unsigned int i = 8, scale = 1; scale < 6; scale++) + { + for (unsigned int k = (1<<(scale+2)); + k < (1<<(scale+3)); + k += (1< 0) + { + read_size ++; + } + + if (read_size > sizeof(payload_buf)) + { + fprintf(stderr, "payload too large\n"); + free(decompressed); + return NULL; + } + memcpy(payload_buf,compressed,read_size); + compressed+=read_size; + + /* attempt to parse... */ + + if (compression_flag) + { + uint16_t tab1_size, tab2_size; + uint32_t body_size; + unsigned long tab1_offset, tab2_offset, body_offset; + struct bitstream *bs; + struct huftable *table1, *table2; + + /* read table 1 size */ + tab1_offset = 0; + bs = init_bitstream(payload_buf + tab1_offset, payload_bytes*8+payload_bits); + tab1_size = get_bits(bs, 16); + free_bitstream(bs); + + /* load table 1 */ + bs = init_bitstream(payload_buf + tab1_offset + 2, tab1_size); + table1 = load_table(bs, 0x11D); + free_bitstream(bs); + + /* read table 2 size */ + tab2_offset = tab1_offset + 2 + (tab1_size+7) / 8; + bs = init_bitstream(payload_buf + tab2_offset, 2*8); + tab2_size = get_bits(bs, 16); + free_bitstream(bs); + + /* load table 2 */ + bs = init_bitstream(payload_buf + tab2_offset + 2, tab2_size); + table2 = load_table(bs, 0x1E); + free_bitstream(bs); + + /* decode body */ + body_offset = tab2_offset + 2 + (tab2_size+7) / 8; + body_size = payload_bytes*8 + payload_bits - body_offset*8; + bs = init_bitstream(payload_buf + body_offset, body_size); + + while (!bitstream_eof(bs)) + { + int symbol = huf_lookup(bs, table1); + + if (symbol < 0x100) + { + /* byte literal */ + unsigned char b = symbol; + if (out_offset >= nominal_size) + { + fprintf(stderr, "generated too many bytes\n"); + free(decompressed); + return NULL; + } + decompressed[out_offset++] = b; + } + else + { + /* backreference */ + unsigned int len_bits = backref_len[symbol-0x100].bits; + unsigned int len = backref_len[symbol-0x100].base; + if (len_bits > 0) + { + len += get_bits(bs, len_bits); + } + len += 3; + + int symbol2 = huf_lookup(bs, table2); + + unsigned int disp_bits = backref_disp[symbol2].bits; + unsigned int disp = backref_disp[symbol2].base; + if (disp_bits > 0) + { + disp += get_bits(bs, disp_bits); + } + disp ++; + + if (disp > out_offset) + { + fprintf(stderr, "backreference too far\n"); + free(decompressed); + return NULL; + } + if (out_offset+len > nominal_size) + { + fprintf(stderr, "generated too many bytes\n"); + free(decompressed); + return NULL; + } + for (unsigned int i = 0; i < len; i++, out_offset++) + { + decompressed[out_offset] = decompressed[out_offset-disp]; + } + } + } + + free_table(table1); + free_table(table2); + free_bitstream(bs); + } + else + { + if (out_offset + payload_bytes > nominal_size) + { + fprintf(stderr, "generated too many bytes\n"); + free(decompressed); + return NULL; + } + memcpy(decompressed+out_offset, payload_buf, payload_bytes); + out_offset += payload_bytes; + } + + block_count ++; + } + return decompressed; +} + +/* bitstream reader */ +struct bitstream +{ + const unsigned char *pool; + long bits_left; + uint8_t first_byte; + int first_byte_bits; +}; + +struct bitstream *init_bitstream(const unsigned char *pool, unsigned long pool_size) +{ + struct bitstream *bs = malloc(sizeof(struct bitstream)); + if (!bs) + { + perror("bitstream malloc"); + exit(EXIT_FAILURE); + } + + bs->pool = pool; + bs->bits_left = pool_size; + bs->first_byte_bits = 0; + + /* check that padding bits are 0 (to ensure we aren't ignoring anything) */ + if (pool_size%8) + { + if (pool[pool_size/8] & ~((1<<(pool_size%8))-1)) + { + fprintf(stderr, "nonzero padding at end of bitstream\n"); + exit(EXIT_FAILURE); + } + } + + return bs; +} + +uint32_t get_bits(struct bitstream *bs, int bits) +{ + uint32_t accum = 0; + + if (bits > 32) + { + fprintf(stderr, "get_bits() supports max 32\n"); + exit(EXIT_FAILURE); + } + if (bits > bs->bits_left + bs->first_byte_bits) + { + fprintf(stderr, "get_bits() underflow\n"); + exit(EXIT_FAILURE); + } + + for (int i = 0; i < bits; i++) + { + if (bs->first_byte_bits == 0) + { + bs->first_byte = *bs->pool; + bs->pool ++; + if (bs->bits_left >= 8) + { + bs->first_byte_bits = 8; + bs->bits_left -= 8; + } + else + { + bs->first_byte_bits = bs->bits_left; + bs->bits_left = 0; + } + } + + accum >>= 1; + accum |= (bs->first_byte & 1)<<31; + bs->first_byte >>= 1; + bs->first_byte_bits --; + } + + return accum>>(32-bits); +} + +int bitstream_eof(struct bitstream *bs) +{ + return (bs->bits_left + bs->first_byte_bits == 0); +} + +void free_bitstream(struct bitstream *bs) +{ + free(bs); +} + +/* Huffman code handling */ +struct hufnode { + int is_leaf; + union { + struct { + int left, right; + } inner; + struct { + int symbol; + } leaf; + } u; +}; +struct huftable { + int symbols; + struct hufnode *t; +}; + +struct huftable *load_table(struct bitstream *bs, int symbols) +{ + int len_count[32] = {0}; + uint32_t codes[32]; + int *length_of = malloc(sizeof(*length_of) * symbols); + struct huftable *ht; + int next_free_node; + + for (int i = 0; i < symbols; ) + { + if (get_bits(bs, 1)) + { + /* run of equal lengths */ + int count = get_bits(bs, 7) + 2; + int length = get_bits(bs, 5); + + len_count[length] += count; + for (int j = 0; j < count; j++, i++) + { + length_of[i] = length; + } + } + else + { + /* set of inequal lengths */ + int count = get_bits(bs, 7) + 1; + + for (int j = 0; j < count; j++, i++) + { + int length = get_bits(bs, 5); + length_of[i] = length; + len_count[length] ++; + } + } + } + + if (!bitstream_eof(bs)) + { + fprintf(stderr, "did not exhaust bitstream reading table\n"); + exit(EXIT_FAILURE); + } + + /* compute the first canonical Huffman code for each length */ + len_count[0] = 0; // not strictly necessary + for (uint32_t i = 1, accum = 0; i < 32; i++) + { + accum = codes[i] = (accum + len_count[i-1]) << 1; + } + + /* allocate space for the tree */ + ht = malloc(sizeof(struct huftable)); + if (!ht) + { + perror("malloc of huftable"); + exit(EXIT_FAILURE); + } + ht->symbols = symbols; + ht->t = malloc(sizeof(struct hufnode) * symbols * 2); + if (!ht->t) + { + perror("malloc of hufnodes"); + exit(EXIT_FAILURE); + } + + /* determine codes and build a tree */ + for (int i = 0; i < symbols*2; i++) + { + ht->t[i].is_leaf = 0; + ht->t[i].u.inner.left = ht->t[i].u.inner.right = 0; + } + next_free_node = 1; + for (int i = 0; i < symbols; i++) + { + int cur = 0; + if (0 == length_of[i]) + { + // 0 length indicates absent symbol + continue; + } + + for (int j = length_of[i]-1; j >= 0; j --) + { + int next; + if (ht->t[cur].is_leaf) + { + fprintf(stderr, "oops, walked onto a leaf\n"); + exit(EXIT_FAILURE); + } + + if (codes[length_of[i]]&(1<t[cur].u.inner.right; + if (0 == next) + { + next = ht->t[cur].u.inner.right = next_free_node ++; + } + } + else + { + // 0 == left + next = ht->t[cur].u.inner.left ; + if (0 == next) + { + next = ht->t[cur].u.inner.left = next_free_node ++; + } + } + + cur = next; + } + + ht->t[cur].is_leaf = 1; + ht->t[cur].u.leaf.symbol = i; + + codes[length_of[i]] ++; + } + free(length_of); + return ht; +} + +int huf_lookup(struct bitstream *bs, struct huftable *ht) +{ + int cur = 0; + while (!ht->t[cur].is_leaf) + { + if (get_bits(bs, 1)) + { + // 1 == right + cur = ht->t[cur].u.inner.right; + } + else + { + // 0 == left + cur = ht->t[cur].u.inner.left; + } + } + + return ht->t[cur].u.leaf.symbol; +} + +void free_table(struct huftable *ht) +{ + if (ht) + { + free(ht->t); + } + free(ht); +} diff --git a/tools/gzinject/src/romchu.h b/tools/gzinject/src/romchu.h new file mode 100644 index 000000000..1c2f8b838 --- /dev/null +++ b/tools/gzinject/src/romchu.h @@ -0,0 +1,8 @@ +#ifndef _ROMCHU_H +#define _ROMCHU_H + +#include + +uint8_t *romchu_decompress(uint8_t *compressed, size_t comp_size, size_t *decomp_size); + +#endif \ No newline at end of file diff --git a/tools/gzinject/src/sha1.c b/tools/gzinject/src/sha1.c new file mode 100644 index 000000000..73794062a --- /dev/null +++ b/tools/gzinject/src/sha1.c @@ -0,0 +1,296 @@ +/* +SHA-1 in C +By Steve Reid +100% Public Domain + +Test Vectors (from FIPS PUB 180-1) +"abc" +A9993E36 4706816A BA3E2571 7850C26C 9CD0D89D +"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq" +84983E44 1C3BD26E BAAE4AA1 F95129E5 E54670F1 +A million repetitions of "a" +34AA973C D4C4DAA4 F61EEB2B DBAD2731 6534016F +*/ + +/* #define LITTLE_ENDIAN * This should be #define'd already, if true. */ +/* #define SHA1HANDSOFF * Copies data before messing with it. */ + +#define SHA1HANDSOFF + +#include +#include + +/* for uint32_t */ +#include + +#include "sha1.h" + + +#define rol(value, bits) (((value) << (bits)) | ((value) >> (32 - (bits)))) + +/* blk0() and blk() perform the initial expand. */ +/* I got the idea of expanding during the round function from SSLeay */ +#if BYTE_ORDER == LITTLE_ENDIAN +#define blk0(i) (block->l[i] = (rol(block->l[i],24)&0xFF00FF00) \ + |(rol(block->l[i],8)&0x00FF00FF)) +#elif BYTE_ORDER == BIG_ENDIAN +#define blk0(i) block->l[i] +#else +#error "Endianness not defined!" +#endif +#define blk(i) (block->l[i&15] = rol(block->l[(i+13)&15]^block->l[(i+8)&15] \ + ^block->l[(i+2)&15]^block->l[i&15],1)) + +/* (R0+R1), R2, R3, R4 are the different operations used in SHA1 */ +#define R0(v,w,x,y,z,i) z+=((w&(x^y))^y)+blk0(i)+0x5A827999+rol(v,5);w=rol(w,30); +#define R1(v,w,x,y,z,i) z+=((w&(x^y))^y)+blk(i)+0x5A827999+rol(v,5);w=rol(w,30); +#define R2(v,w,x,y,z,i) z+=(w^x^y)+blk(i)+0x6ED9EBA1+rol(v,5);w=rol(w,30); +#define R3(v,w,x,y,z,i) z+=(((w|x)&y)|(w&x))+blk(i)+0x8F1BBCDC+rol(v,5);w=rol(w,30); +#define R4(v,w,x,y,z,i) z+=(w^x^y)+blk(i)+0xCA62C1D6+rol(v,5);w=rol(w,30); + + +/* Hash a single 512-bit block. This is the core of the algorithm. */ + +void SHA1Transform( + uint32_t state[5], + const unsigned char buffer[64] +) +{ + uint32_t a, b, c, d, e; + + typedef union + { + unsigned char c[64]; + uint32_t l[16]; + } CHAR64LONG16; + +#ifdef SHA1HANDSOFF + CHAR64LONG16 block[1]; /* use array to appear as a pointer */ + + memcpy(block, buffer, 64); +#else + /* The following had better never be used because it causes the + * pointer-to-const buffer to be cast into a pointer to non-const. + * And the result is written through. I threw a "const" in, hoping + * this will cause a diagnostic. + */ + CHAR64LONG16 *block = (const CHAR64LONG16 *)buffer; +#endif + /* Copy context->state[] to working vars */ + a = state[0]; + b = state[1]; + c = state[2]; + d = state[3]; + e = state[4]; + /* 4 rounds of 20 operations each. Loop unrolled. */ + R0(a, b, c, d, e, 0); + R0(e, a, b, c, d, 1); + R0(d, e, a, b, c, 2); + R0(c, d, e, a, b, 3); + R0(b, c, d, e, a, 4); + R0(a, b, c, d, e, 5); + R0(e, a, b, c, d, 6); + R0(d, e, a, b, c, 7); + R0(c, d, e, a, b, 8); + R0(b, c, d, e, a, 9); + R0(a, b, c, d, e, 10); + R0(e, a, b, c, d, 11); + R0(d, e, a, b, c, 12); + R0(c, d, e, a, b, 13); + R0(b, c, d, e, a, 14); + R0(a, b, c, d, e, 15); + R1(e, a, b, c, d, 16); + R1(d, e, a, b, c, 17); + R1(c, d, e, a, b, 18); + R1(b, c, d, e, a, 19); + R2(a, b, c, d, e, 20); + R2(e, a, b, c, d, 21); + R2(d, e, a, b, c, 22); + R2(c, d, e, a, b, 23); + R2(b, c, d, e, a, 24); + R2(a, b, c, d, e, 25); + R2(e, a, b, c, d, 26); + R2(d, e, a, b, c, 27); + R2(c, d, e, a, b, 28); + R2(b, c, d, e, a, 29); + R2(a, b, c, d, e, 30); + R2(e, a, b, c, d, 31); + R2(d, e, a, b, c, 32); + R2(c, d, e, a, b, 33); + R2(b, c, d, e, a, 34); + R2(a, b, c, d, e, 35); + R2(e, a, b, c, d, 36); + R2(d, e, a, b, c, 37); + R2(c, d, e, a, b, 38); + R2(b, c, d, e, a, 39); + R3(a, b, c, d, e, 40); + R3(e, a, b, c, d, 41); + R3(d, e, a, b, c, 42); + R3(c, d, e, a, b, 43); + R3(b, c, d, e, a, 44); + R3(a, b, c, d, e, 45); + R3(e, a, b, c, d, 46); + R3(d, e, a, b, c, 47); + R3(c, d, e, a, b, 48); + R3(b, c, d, e, a, 49); + R3(a, b, c, d, e, 50); + R3(e, a, b, c, d, 51); + R3(d, e, a, b, c, 52); + R3(c, d, e, a, b, 53); + R3(b, c, d, e, a, 54); + R3(a, b, c, d, e, 55); + R3(e, a, b, c, d, 56); + R3(d, e, a, b, c, 57); + R3(c, d, e, a, b, 58); + R3(b, c, d, e, a, 59); + R4(a, b, c, d, e, 60); + R4(e, a, b, c, d, 61); + R4(d, e, a, b, c, 62); + R4(c, d, e, a, b, 63); + R4(b, c, d, e, a, 64); + R4(a, b, c, d, e, 65); + R4(e, a, b, c, d, 66); + R4(d, e, a, b, c, 67); + R4(c, d, e, a, b, 68); + R4(b, c, d, e, a, 69); + R4(a, b, c, d, e, 70); + R4(e, a, b, c, d, 71); + R4(d, e, a, b, c, 72); + R4(c, d, e, a, b, 73); + R4(b, c, d, e, a, 74); + R4(a, b, c, d, e, 75); + R4(e, a, b, c, d, 76); + R4(d, e, a, b, c, 77); + R4(c, d, e, a, b, 78); + R4(b, c, d, e, a, 79); + /* Add the working vars back into context.state[] */ + state[0] += a; + state[1] += b; + state[2] += c; + state[3] += d; + state[4] += e; + /* Wipe variables */ + a = b = c = d = e = 0; +#ifdef SHA1HANDSOFF + memset(block, '\0', sizeof(block)); +#endif +} + + +/* SHA1Init - Initialize new context */ + +void SHA1Init( + SHA1_CTX * context +) +{ + /* SHA1 initialization constants */ + context->state[0] = 0x67452301; + context->state[1] = 0xEFCDAB89; + context->state[2] = 0x98BADCFE; + context->state[3] = 0x10325476; + context->state[4] = 0xC3D2E1F0; + context->count[0] = context->count[1] = 0; +} + + +/* Run your data through this. */ + +void SHA1Update( + SHA1_CTX * context, + const unsigned char *data, + uint32_t len +) +{ + uint32_t i; + + uint32_t j; + + j = context->count[0]; + if ((context->count[0] += len << 3) < j) + context->count[1]++; + context->count[1] += (len >> 29); + j = (j >> 3) & 63; + if ((j + len) > 63) + { + memcpy(&context->buffer[j], data, (i = 64 - j)); + SHA1Transform(context->state, context->buffer); + for (; i + 63 < len; i += 64) + { + SHA1Transform(context->state, &data[i]); + } + j = 0; + } + else + i = 0; + memcpy(&context->buffer[j], &data[i], len - i); +} + + +/* Add padding and return the message digest. */ + +void SHA1Final( + unsigned char digest[20], + SHA1_CTX * context +) +{ + unsigned i; + + unsigned char finalcount[8]; + + unsigned char c; + +#if 0 /* untested "improvement" by DHR */ + /* Convert context->count to a sequence of bytes + * in finalcount. Second element first, but + * big-endian order within element. + * But we do it all backwards. + */ + unsigned char *fcp = &finalcount[8]; + + for (i = 0; i < 2; i++) + { + uint32_t t = context->count[i]; + + int j; + + for (j = 0; j < 4; t >>= 8, j++) + *--fcp = (unsigned char)t + } +#else + for (i = 0; i < 8; i++) + { + finalcount[i] = (unsigned char)((context->count[(i >= 4 ? 0 : 1)] >> ((3 - (i & 3)) * 8)) & 255); /* Endian independent */ + } +#endif + c = 0200; + SHA1Update(context, &c, 1); + while ((context->count[0] & 504) != 448) + { + c = 0000; + SHA1Update(context, &c, 1); + } + SHA1Update(context, finalcount, 8); /* Should cause a SHA1Transform() */ + for (i = 0; i < 20; i++) + { + digest[i] = (unsigned char) + ((context->state[i >> 2] >> ((3 - (i & 3)) * 8)) & 255); + } + /* Wipe variables */ + memset(context, '\0', sizeof(*context)); + memset(&finalcount, '\0', sizeof(finalcount)); +} + +void SHA1( + char *hash_out, + const char *str, + int len) +{ + SHA1_CTX ctx; + unsigned int ii; + + SHA1Init(&ctx); + for (ii = 0; ii +100% Public Domain +*/ + +#include "stdint.h" + +typedef struct +{ + uint32_t state[5]; + uint32_t count[2]; + unsigned char buffer[64]; +} SHA1_CTX; + +void SHA1Transform( + uint32_t state[5], + const unsigned char buffer[64] +); + +void SHA1Init( + SHA1_CTX * context +); + +void SHA1Update( + SHA1_CTX * context, + const unsigned char *data, + uint32_t len +); + +void SHA1Final( + unsigned char digest[20], + SHA1_CTX * context +); + +void SHA1( + char *hash_out, + const char *str, + int len); + +#endif /* SHA1_H */ \ No newline at end of file diff --git a/tools/gzinject/src/u8.c b/tools/gzinject/src/u8.c new file mode 100644 index 000000000..fec215538 --- /dev/null +++ b/tools/gzinject/src/u8.c @@ -0,0 +1,248 @@ +#include +#include +#include +#include +#include +#include +#include +#include "u8.h" + +void free_nodes(node_entry_t **nodes, uint8_t nodec){ + for(int i=0;ifilename) free(nodes[i]->filename); + free(nodes[i]); + } + } +} + +void get_dir_contents_recursive(const char *dirname, node_entry_t ***nodes, uint8_t *idx, node_entry_t *directory, int recursion){ + struct stat sbuffer; + node_entry_t **node_array = *nodes; + DIR *dir; + struct dirent *ent; + chdir(dirname); + if ((dir = opendir(".")) != NULL) { + while ((ent = readdir(dir)) != NULL) { + if(ent->d_name[0]=='.') continue; + uint8_t this_idx = *idx; + node_entry_t **new_nodes = realloc(node_array, ((this_idx+1) * sizeof(*node_array))); + node_array = new_nodes; + size_t len = strlen(ent->d_name); + char *name = malloc(strlen(ent->d_name) + 1); + strcpy(name,ent->d_name); + name[len] = 0; + node_entry_t *node = malloc(sizeof(node_entry_t)); + node->filename = name; + node->directory = directory; + stat(name, &sbuffer); + node->node.size = sbuffer.st_size; + (*idx)++; + directory->node.size++; + if ((sbuffer.st_mode & S_IFMT) == S_IFDIR) { + node->node.type = 0x0001; + node->node.data_offset=recursion; + node->node.size = 0; + get_dir_contents_recursive(name,&node_array,idx,node,recursion+1); + + }else{ + node->node.type = 0x0000; + } + node_array[this_idx] = node; + } + closedir(dir); + } + chdir(".."); + *nodes = node_array; +} + +void sort_dir(node_entry_t **src, node_entry_t **dest, node_entry_t *dir, size_t total_cnt,int start, int *pos){ + for(int i=start;idirectory == dir && node->node.type==0x0000){ + dest[(*pos)++] = node; + } + } + for(int i=start;idirectory == dir && node->node.type==0x0001){ + dest[(*pos)++] = node; + sort_dir(src,dest,node,total_cnt,start,pos); + node->node.size = *pos; + } + } +} + +int create_u8_archive(const char *dir, const char *output){ + // Root Directory node. + node_entry_t rootdirnode; + rootdirnode.node.data_offset = 0; + rootdirnode.node.type=0x0001; + rootdirnode.node.name_offset=0; + rootdirnode.node.size=0; + rootdirnode.directory=NULL; + rootdirnode.filename="."; + + uint8_t nodec = 1; + + node_entry_t **dirnodes = malloc(sizeof(*dirnodes)); + dirnodes[0] = &rootdirnode; + get_dir_contents_recursive(dir,&dirnodes,&nodec,dirnodes[0],0); + dirnodes[0]->node.size = nodec+1; + node_entry_t **sorted = malloc(sizeof(*sorted) * nodec); + sorted[0] = dirnodes[0]; + int pos = 1; + + sort_dir(dirnodes,sorted,sorted[0],nodec,pos,&pos); + free(dirnodes); + uint8_t *string_table = malloc(1); + string_table[0] = 0; + + int npos = 1 , dpos = 0, dirdepth = 0; + + uint8_t *data = NULL; + chdir(dir); + + for(int i=0;inode.name_offset = npos; + size_t nlen = strlen(sorted[i]->filename) + 1; + uint8_t *new_table = realloc(string_table,npos + nlen); + if(new_table!=NULL){ + string_table = new_table; + } + + memcpy(string_table + npos,sorted[i]->filename,nlen); + string_table[npos+nlen-1]=0; + npos+=nlen; + if(sorted[i]->node.type==0x0001){ + chdir(sorted[i]->filename); + dirdepth++; + }else{ + uint32_t padlen = addpadding(sorted[i]->node.size,32); + uint8_t *new_data = realloc(data,dpos + padlen); + if(new_data!=NULL){ + data = new_data; + } + memset(data + dpos,0,padlen); + FILE *fle = fopen(sorted[i]->filename, "rb"); + fread(data + dpos, 1, sorted[i]->node.size, fle); + fclose(fle); + sorted[i]->node.data_offset = dpos; + dpos+=padlen; + } + } + + for(int i=0;inode; + if(node.type==0x0000){ + node.data_offset+=dataoffset; + } + node.data_offset = REVERSEENDIAN32(node.data_offset); + node.size = REVERSEENDIAN32(node.size); + node.name_offset = REVERSEENDIAN16(node.name_offset); + fwrite(&node, 1, sizeof(u8_node), foutfile); + } + free_nodes(sorted + 1,nodec - 1); + free(sorted); + fwrite(string_table, 1, npos, foutfile); + free(string_table); + + uint8_t *padding = calloc(padcount, sizeof(uint8_t)); + fwrite(padding, 1, padcount, foutfile); + free(padding); + + fwrite(data, 1, dpos, foutfile); + free(data); + + fclose(foutfile); + + return 1; +} + +int extract_u8_archive(uint8_t *data, const char *outdir){ + mkdir(outdir, 0755); + chdir(outdir); + u8_header header; + uint32_t data_offset; + uint8_t *string_table; + size_t rest_size; + + memcpy(&header, data, sizeof(header)); + + int curpos = sizeof(header); + + u8_node root_node; + memcpy(&root_node, data + curpos, sizeof(u8_node)); + curpos += sizeof(u8_node); + + uint32_t nodec = be32((uint8_t*)&root_node.size) - 1; + u8_node *nodes = malloc(sizeof(u8_node)*nodec); + memcpy(nodes, data + curpos, sizeof(u8_node)*nodec); + curpos += sizeof(u8_node)*nodec; + + data_offset = be32((uint8_t*)&header.data_offset); + rest_size = data_offset - sizeof(header) - (nodec + 1) * sizeof(u8_node); + string_table = malloc(rest_size); + memcpy(string_table, data + curpos, rest_size); + + u8_node *node; + int dir_depth = 0; + FILE *outfile; + for (int j = 0; j < nodec; j++) { + node = &nodes[j]; + uint32_t doffset = be32((uint8_t*)&node->data_offset); + uint32_t dsize = be32((uint8_t*)&node->size); + uint16_t name_offset = be16((uint8_t*)&node->name_offset); + uint16_t type = be16((uint8_t*)&node->type); + char *name = (char*)&string_table[name_offset]; + if (type == 0x0000) { // Regular file + outfile = fopen(name, "wb"); + fwrite(data + doffset, 1, dsize, outfile); + fclose(outfile); + }else if(type==0x0100){ // Directory + while(dir_depth>doffset+1){ + chdir(".."); + dir_depth--; + } + mkdir(name,0755); + chdir(name); + dir_depth++; + } + } + do{ + chdir(".."); + dir_depth--; + }while(dir_depth>0); + free(string_table); + free(nodes); + return 1; +} \ No newline at end of file diff --git a/tools/gzinject/src/u8.h b/tools/gzinject/src/u8.h new file mode 100644 index 000000000..7519c692a --- /dev/null +++ b/tools/gzinject/src/u8.h @@ -0,0 +1,34 @@ +#ifndef U8_H_ +#define U8_H_ + +#include +#include "gzinject.h" + +typedef struct { + uint16_t type; + uint16_t name_offset; + uint32_t data_offset; + uint32_t size; +}u8_node; + +typedef struct +{ + uint32_t tag; + uint32_t rootnode_offset; + uint32_t header_size; + uint32_t data_offset; + uint8_t padding[16]; +} u8_header; + +typedef struct node_entry_s node_entry_t; + +struct node_entry_s { + u8_node node; + char *filename; + node_entry_t *directory; +}; + +int create_u8_archive(const char *dir, const char *output); +int extract_u8_archive(uint8_t *data, const char *outdir); + +#endif \ No newline at end of file diff --git a/tools/z64compress/.editorconfig b/tools/z64compress/.editorconfig new file mode 100644 index 000000000..342ff359c --- /dev/null +++ b/tools/z64compress/.editorconfig @@ -0,0 +1,15 @@ +root = true + +[*] +end_of_line = lf +insert_final_newline = true + +# Matches multiple files with brace expansion notation +[*.{c,h,ch}] +charset = utf-8 +indent_style = tab +indent_size = 3 +trim_trailing_whitespace = false + +[*.md] +trim_trailing_whitespace = false diff --git a/tools/z64compress/.gitignore b/tools/z64compress/.gitignore new file mode 100644 index 000000000..6a47e0517 --- /dev/null +++ b/tools/z64compress/.gitignore @@ -0,0 +1,3 @@ +bin/ +o/ +z64compress diff --git a/tools/z64compress/.gitrepo b/tools/z64compress/.gitrepo new file mode 100644 index 000000000..0165907a9 --- /dev/null +++ b/tools/z64compress/.gitrepo @@ -0,0 +1,12 @@ +; DO NOT EDIT (unless you know what you are doing) +; +; This subdirectory is a git "subrepo", and this file is maintained by the +; git-subrepo command. See https://github.com/ingydotnet/git-subrepo#readme +; +[subrepo] + remote = https://github.com/z64tools/z64compress.git + branch = main + commit = 331039828b0e9c995b8727a64b5bc083c78d1476 + parent = ce3fe6d65dd1b46509f3bbcb538e9bcc56f2cfa3 + method = merge + cmdver = 0.4.5 diff --git a/tools/z64compress/LICENSE b/tools/z64compress/LICENSE new file mode 100644 index 000000000..f288702d2 --- /dev/null +++ b/tools/z64compress/LICENSE @@ -0,0 +1,674 @@ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + Copyright (C) + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. diff --git a/tools/z64compress/Makefile b/tools/z64compress/Makefile new file mode 100644 index 000000000..7a83c6190 --- /dev/null +++ b/tools/z64compress/Makefile @@ -0,0 +1,49 @@ +CC := gcc +CFLAGS := -DNDEBUG -s -Os -flto -Wall -Wextra + +# Target platform, specify with TARGET= on the command line, linux64 is default. +# Currently supported: linux64, linux32, win32 +TARGET ?= linux64 + +ifeq ($(TARGET),linux32) + TARGET_CFLAGS := -m32 +else ifeq ($(TARGET),win32) +# If using a cross compiler, specify the compiler executable on the command line. +# make TARGET=win32 CC=~/c/mxe/usr/bin/i686-w64-mingw32.static-gcc + TARGET_LIBS := -mconsole -municode +else ifneq ($(TARGET),linux64) + $(error Supported targets: linux64, linux32, win32) +endif + +# Whether to use native optimizations, specify with NATIVE_OPT=0/1 on the command line, default is 0. +# This is not supported by all compilers which is particularly an issue on Mac, and may inhibit tests. +NATIVE_OPT ?= 0 +ifeq ($(NATIVE_OPT),1) + TARGET_CFLAGS += -march=native -mtune=native +endif + +OBJ_DIR := o/$(TARGET) + +$(OBJ_DIR)/src/enc/%.o: CFLAGS := -DNDEBUG -s -Ofast -flto -Wall -Isrc/enc/libdeflate + +SRC_DIRS := $(shell find src -type d) +C_DIRS := $(shell find src -type d -not -path "src/enc/libdeflate/*") +C_FILES := $(foreach dir,$(C_DIRS),$(wildcard $(dir)/*.c)) +C_FILES += src/enc/libdeflate/lib/deflate_compress.c src/enc/libdeflate/lib/utils.c +O_FILES := $(foreach f,$(C_FILES:.c=.o),$(OBJ_DIR)/$f) + +# Make build directories +$(shell mkdir -p $(foreach dir,$(SRC_DIRS),$(OBJ_DIR)/$(dir))) + +.PHONY: all clean + +all: z64compress + +z64compress: $(O_FILES) + $(CC) $(TARGET_CFLAGS) $(CFLAGS) $(O_FILES) -lm -lpthread -lz $(TARGET_LIBS) -o z64compress + +$(OBJ_DIR)/%.o: %.c + $(CC) -c $(TARGET_CFLAGS) $(CFLAGS) $< -o $@ + +clean: + $(RM) -rf z64compress bin o diff --git a/tools/z64compress/README.md b/tools/z64compress/README.md new file mode 100644 index 000000000..4e9a6ba86 --- /dev/null +++ b/tools/z64compress/README.md @@ -0,0 +1,102 @@ +# z64compress + +`z64compress` is a program for compressing Zelda 64 roms: be they retail, hacked traditionally, or custom-built from the [`Ocarina of Time`](https://github.com/zeldaret/oot) or [`Majora's Mask`](https://github.com/zeldaret/mm) reverse engineering projects. It is written in highly efficient C and leverages the power of multithreading to make compression as fast as possible. To reduce overhead on subsequent compressions, an optional cache directory can be specified. + +In addition to the default `yaz`, it supports some faster and more compact algorithms such as `DEFLATE`, `lzo`, `ucl`, and `aplib`. In order to use these, grab patches or code from my [`z64enc` repository](https://github.com/z64me/z64enc). + +If you add an algorithm, please make sure `valgrind` reports no memory leaks or other errors before making a pull request. Thank you! + +(By the way, `valgrind` works better without the `-march=native -mtune=native` optimizations, so turn those off when testing `valgrind`.) + +## Usage +This is a command line application. Learn from these common examples and adapt the arguments to your needs: +``` + compressing oot debug + --in "path/to/in.z64" + --out "path/to/out.z64" + --mb 32 + --codec yaz + --cache "path/to/cache" + --dma "0x12F70,1548" + --compress "9-14,28-END" + --threads 4 + + compressing oot ntsc 1.0 + --in "path/to/in.z64" + --out "path/to/out.z64" + --mb 32 + --codec yaz + --cache "path/to/cache" + --dma "0x7430,1526" + --compress "10-14,27-END" + --threads 4 + + compressing mm usa + --in "path/to/in.z64" + --out "path/to/out.z64" + --mb 32 + --codec yaz + --cache "path/to/cache" + --dma "0x1A500,1568" + --compress "10-14,23,24,31-END" + --skip "1127" + --repack "15-20,22" + --threads 4 +``` + +## Arguments +``` + --in uncompressed input rom + + --out compressed output rom + + --matching attempt matching compression at the cost of + some optimizations and reduced performance + + --mb how many mb the compressed rom should be + + --codec currently supported codecs + yaz + ucl + lzo + zlib + aplib + * to use non-yaz codecs, find patches + and code on my z64enc repo + + --cache is optional and won't be created if + no path is specified (having a cache + makes subsequent compressions faster) + * pro-tip: linux users who don't want a + cache to persist across power cycles + can use the path "/tmp/z64compress" + + --dma specify dmadata address and count + + --compress enable compression on specified files + + --skip disable compression on specified files + + --repack handles Majora's Mask archives + + --threads optional multithreading; + exclude this argument to disable it + + --only-stdout reserve stderr for errors and print + everything else to stdout + + arguments are executed as they + are parsed, so order matters! +``` + +## Building +First, clone the repository and initialize its submodules: +``` +git clone https://github.com/z64me/z64compress.git +cd z64compress +git submodule update --init +``` + +A Makefile-based build system is provided. Choose the target platform with `make TARGET=linux64|linux32|win32`, default is linux64. If building for windows with a cross compiler, specify the compiler executable with `make TARGET=win32 CC=/path/to/executable`. + +Alternatively, I have included shell scripts for building Linux and Windows binaries. Windows binaries are built using a cross compiler ([I recommend `MXE`](https://mxe.cc/)). diff --git a/tools/z64compress/release-linux.sh b/tools/z64compress/release-linux.sh new file mode 100644 index 000000000..bdac70dcc --- /dev/null +++ b/tools/z64compress/release-linux.sh @@ -0,0 +1,14 @@ +# build compression functions (slow) +gcc -DNDEBUG -s -Ofast -flto -lm -c -Wall -march=native -mtune=native src/enc/*.c src/enc/lzo/*.c src/enc/ucl/comp/*.c src/enc/apultra/*.c +mkdir -p o +mv *.o o + +# build everything else +gcc -o z64compress -DNDEBUG src/*.c o/*.o src/enc/libdeflate/lib/deflate_compress.c src/enc/libdeflate/lib/utils.c -Isrc/enc/libdeflate -Wall -Wextra -s -Os -flto -lpthread -lz -march=native -mtune=native + +# move to bin directory +mkdir -p bin/linux64 +mv z64compress bin/linux64 + + + diff --git a/tools/z64compress/release-linux32.sh b/tools/z64compress/release-linux32.sh new file mode 100644 index 000000000..06d829a7d --- /dev/null +++ b/tools/z64compress/release-linux32.sh @@ -0,0 +1,14 @@ +# build compression functions (slow) +gcc -m32 -DNDEBUG -s -Ofast -flto -lm -c -Wall -march=native -mtune=native src/enc/*.c src/enc/lzo/*.c src/enc/ucl/comp/*.c src/enc/apultra/*.c +mkdir -p o +mv *.o o + +# build everything else +gcc -m32 -o z64compress -DNDEBUG src/*.c o/*.o src/enc/libdeflate/lib/deflate_compress.c src/enc/libdeflate/lib/utils.c -Isrc/enc/libdeflate -Wall -Wextra -s -Os -flto -lpthread -lz -march=native -mtune=native + +# move to bin directory +mkdir -p bin/linux32 +mv z64compress bin/linux32 + + + diff --git a/tools/z64compress/release-win32.sh b/tools/z64compress/release-win32.sh new file mode 100644 index 000000000..94fc245c9 --- /dev/null +++ b/tools/z64compress/release-win32.sh @@ -0,0 +1,12 @@ +# build compression functions (slow) +i686-w64-mingw32.static-gcc -DNDEBUG -s -Ofast -flto -lm -c -Wall src/enc/*.c src/enc/lzo/*.c src/enc/ucl/comp/*.c src/enc/apultra/*.c +mkdir -p o +mv *.o o + +# build everything else +i686-w64-mingw32.static-gcc -o z64compress.exe -DNDEBUG src/*.c o/*.o src/enc/libdeflate/lib/deflate_compress.c src/enc/libdeflate/lib/utils.c -Isrc/enc/libdeflate -Wall -Wextra -s -Os -flto -lpthread -lz -mconsole -municode + +# move to bin directory +mkdir -p bin/win32 +mv z64compress.exe bin/win32 + diff --git a/tools/z64compress/src/enc/aplib.c b/tools/z64compress/src/enc/aplib.c new file mode 100644 index 000000000..c2e720a7b --- /dev/null +++ b/tools/z64compress/src/enc/aplib.c @@ -0,0 +1,48 @@ +#include +#include +#include +#include "apultra/libapultra.h" + +static void compression_progress(long long nOriginalSize, long long nCompressedSize) { + /* do nothing */ +} + +int +aplenc( + void *_src + , unsigned src_sz + , void *_dst + , unsigned *dst_sz + , void *_ctx +) +{ + unsigned char *src = _src; + unsigned char *dst = _dst; + int nMaxCompressedSize = apultra_get_max_compressed_size(src_sz); + apultra_stats stats; + + extern int g_hlen; /* header length */ + memset(dst, 0, g_hlen); + memcpy(dst, "APL0", 4); + dst[4] = (src_sz >> 24); + dst[5] = (src_sz >> 16); + dst[6] = (src_sz >> 8); + dst[7] = (src_sz >> 0); + + *dst_sz = apultra_compress( + src + , dst + g_hlen + , src_sz + , nMaxCompressedSize + , 0 /* flags */ + , 0 /* nMaxWindowSize */ + , 0 /* nDictionarySize */ + , compression_progress + , &stats + ); + + *dst_sz = *dst_sz + g_hlen; + + return 0; +} + diff --git a/tools/z64compress/src/enc/apultra/apultra.c b/tools/z64compress/src/enc/apultra/apultra.c new file mode 100644 index 000000000..24dc2b692 --- /dev/null +++ b/tools/z64compress/src/enc/apultra/apultra.c @@ -0,0 +1,1225 @@ +#if 0 +/* + * apultra.c - command line compression utility for the apultra library + * + * Copyright (C) 2019 Emmanuel Marty + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori + * + * Inspired by cap by Sven-Åke Dahl. https://github.com/svendahl/cap + * Also inspired by Charles Bloom's compression blog. http://cbloomrants.blogspot.com/ + * With ideas from LZ4 by Yann Collet. https://github.com/lz4/lz4 + * With help and support from spke + * + */ + +#include +#include +#include +#ifdef _WIN32 +#include +#include +#else +#include +#endif +#include "libapultra.h" + +#define OPT_VERBOSE 1 +#define OPT_STATS 2 +#define OPT_BACKWARD 4 + +#define TOOL_VERSION "1.4.1" + +/*---------------------------------------------------------------------------*/ + +#ifdef _WIN32 +LARGE_INTEGER hpc_frequency; +BOOL hpc_available = FALSE; +#endif + +static void do_init_time() { +#ifdef _WIN32 + hpc_frequency.QuadPart = 0; + hpc_available = QueryPerformanceFrequency(&hpc_frequency); +#endif +} + +static long long do_get_time() { + long long nTime; + +#ifdef _WIN32 + if (hpc_available) { + LARGE_INTEGER nCurTime; + + /* Use HPC hardware for best precision */ + QueryPerformanceCounter(&nCurTime); + nTime = (long long)(nCurTime.QuadPart * 1000000LL / hpc_frequency.QuadPart); + } + else { + struct _timeb tb; + _ftime(&tb); + + nTime = ((long long)tb.time * 1000LL + (long long)tb.millitm) * 1000LL; + } +#else + struct timeval tm; + gettimeofday(&tm, NULL); + + nTime = (long long)tm.tv_sec * 1000000LL + (long long)tm.tv_usec; +#endif + return nTime; +} + +static void do_reverse_buffer(unsigned char *pBuffer, size_t nBufferSize) { + size_t nMidPoint = nBufferSize / 2; + size_t i, j; + + for (i = 0, j = nBufferSize - 1; i < nMidPoint; i++, j--) { + unsigned char c = pBuffer[i]; + pBuffer[i] = pBuffer[j]; + pBuffer[j] = c; + } +} + +/*---------------------------------------------------------------------------*/ + +static void compression_progress(long long nOriginalSize, long long nCompressedSize) { + if (nOriginalSize >= 512 * 1024) { + fprintf(stdout, "\r%lld => %lld (%g %%) \b\b\b\b\b", nOriginalSize, nCompressedSize, (double)(nCompressedSize * 100.0 / nOriginalSize)); + fflush(stdout); + } +} + +static int do_compress(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions, const unsigned int nMaxWindowSize) { + long long nStartTime = 0LL, nEndTime = 0LL; + size_t nOriginalSize = 0L, nCompressedSize = 0L, nMaxCompressedSize; + int nFlags = 0; + apultra_stats stats; + unsigned char *pDecompressedData; + unsigned char *pCompressedData; + + if (nOptions & OPT_VERBOSE) { + nStartTime = do_get_time(); + } + + FILE* f_dict = NULL; + size_t nDictionarySize = 0; + if (pszDictionaryFilename) { + /* Open the dictionary */ + f_dict = fopen(pszDictionaryFilename, "rb"); + if (!f_dict) { + fprintf(stderr, "error opening dictionary '%s' for reading\n", pszDictionaryFilename); + return 100; + } + + /* Get dictionary size */ + fseek(f_dict, 0, SEEK_END); + nDictionarySize = (size_t)ftell(f_dict); + fseek(f_dict, 0, SEEK_SET); + + if (nDictionarySize > BLOCK_SIZE) nDictionarySize = BLOCK_SIZE; + } + + /* Read the whole original file in memory */ + + FILE *f_in = fopen(pszInFilename, "rb"); + if (!f_in) { + if (f_dict) fclose(f_dict); + fprintf(stderr, "error opening '%s' for reading\n", pszInFilename); + return 100; + } + + fseek(f_in, 0, SEEK_END); + nOriginalSize = (size_t)ftell(f_in); + fseek(f_in, 0, SEEK_SET); + + pDecompressedData = (unsigned char*)malloc(nDictionarySize + nOriginalSize); + if (!pDecompressedData) { + fclose(f_in); + if (f_dict) fclose(f_dict); + fprintf(stderr, "out of memory for reading '%s', %zd bytes needed\n", pszInFilename, nOriginalSize); + return 100; + } + + if (f_dict) { + /* Read dictionary data */ + if (fread(pDecompressedData + ((nOptions & OPT_BACKWARD) ? nOriginalSize : 0), 1, nDictionarySize, f_dict) != nDictionarySize) { + free(pDecompressedData); + fclose(f_in); + fclose(f_dict); + fprintf(stderr, "I/O error while reading dictionary '%s'\n", pszDictionaryFilename); + return 100; + } + + fclose(f_dict); + f_dict = NULL; + } + + /* Read input file data */ + if (fread(pDecompressedData + ((nOptions & OPT_BACKWARD) ? 0 : nDictionarySize), 1, nOriginalSize, f_in) != nOriginalSize) { + free(pDecompressedData); + fclose(f_in); + fprintf(stderr, "I/O error while reading '%s'\n", pszInFilename); + return 100; + } + + fclose(f_in); + + if (nOptions & OPT_BACKWARD) + do_reverse_buffer(pDecompressedData, nDictionarySize + nOriginalSize); + + /* Allocate max compressed size */ + + nMaxCompressedSize = apultra_get_max_compressed_size(nDictionarySize + nOriginalSize); + + pCompressedData = (unsigned char*)malloc(nMaxCompressedSize); + if (!pCompressedData) { + free(pDecompressedData); + fprintf(stderr, "out of memory for compressing '%s', %zd bytes needed\n", pszInFilename, nMaxCompressedSize); + return 100; + } + + memset(pCompressedData, 0, nMaxCompressedSize); + + nCompressedSize = apultra_compress(pDecompressedData, pCompressedData, nDictionarySize + nOriginalSize, nMaxCompressedSize, nFlags, nMaxWindowSize, nDictionarySize, compression_progress, &stats); + + if ((nOptions & OPT_VERBOSE)) { + nEndTime = do_get_time(); + } + + if (nCompressedSize == -1) { + free(pCompressedData); + free(pDecompressedData); + fprintf(stderr, "compression error for '%s'\n", pszInFilename); + return 100; + } + + if (nOptions & OPT_BACKWARD) + do_reverse_buffer(pCompressedData, nCompressedSize); + + if (pszOutFilename) { + FILE *f_out; + + /* Write whole compressed file out */ + + f_out = fopen(pszOutFilename, "wb"); + if (f_out) { + fwrite(pCompressedData, 1, nCompressedSize, f_out); + fclose(f_out); + } + } + + free(pCompressedData); + free(pDecompressedData); + + if ((nOptions & OPT_VERBOSE)) { + double fDelta = ((double)(nEndTime - nStartTime)) / 1000000.0; + double fSpeed = ((double)nOriginalSize / 1048576.0) / fDelta; + fprintf(stdout, "\rCompressed '%s' in %g seconds, %.02g Mb/s, %d tokens (%g bytes/token), %d into %d bytes ==> %g %%\n", + pszInFilename, fDelta, fSpeed, stats.commands_divisor, (double)nOriginalSize / (double)stats.commands_divisor, + (int)nOriginalSize, (int)nCompressedSize, (double)(nCompressedSize * 100.0 / nOriginalSize)); + } + + if (nOptions & OPT_STATS) { + fprintf(stdout, "Tokens: literals: %d short matches: %d normal matches: %d large matches: %d rep matches: %d EOD: %d\n", + stats.num_literals, stats.num_4bit_matches, stats.num_7bit_matches, stats.num_variable_matches, stats.num_rep_matches, stats.num_eod); + if (stats.match_divisor > 0) { + fprintf(stdout, "Offsets: min: %d avg: %d max: %d count: %d\n", stats.min_offset, (int)(stats.total_offsets / (long long)stats.match_divisor), stats.max_offset, stats.match_divisor); + fprintf(stdout, "Match lens: min: %d avg: %d max: %d count: %d\n", stats.min_match_len, stats.total_match_lens / stats.match_divisor, stats.max_match_len, stats.match_divisor); + } + else { + fprintf(stdout, "Offsets: none\n"); + fprintf(stdout, "Match lens: none\n"); + } + if (stats.rle1_divisor > 0) { + fprintf(stdout, "RLE1 lens: min: %d avg: %d max: %d count: %d\n", stats.min_rle1_len, stats.total_rle1_lens / stats.rle1_divisor, stats.max_rle1_len, stats.rle1_divisor); + } + else { + fprintf(stdout, "RLE1 lens: none\n"); + } + if (stats.rle2_divisor > 0) { + fprintf(stdout, "RLE2 lens: min: %d avg: %d max: %d count: %d\n", stats.min_rle2_len, stats.total_rle2_lens / stats.rle2_divisor, stats.max_rle2_len, stats.rle2_divisor); + } + else { + fprintf(stdout, "RLE2 lens: none\n"); + } + fprintf(stdout, "Safe distance: %d (0x%X)\n", stats.safe_dist, stats.safe_dist); + } + return 0; +} + +/*---------------------------------------------------------------------------*/ + +static int do_decompress(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions) { + long long nStartTime = 0LL, nEndTime = 0LL; + size_t nCompressedSize, nMaxDecompressedSize, nOriginalSize; + unsigned char *pCompressedData; + unsigned char *pDecompressedData; + int nFlags = 0; + + /* Read the whole compressed file in memory */ + + FILE *f_in = fopen(pszInFilename, "rb"); + if (!f_in) { + fprintf(stderr, "error opening '%s' for reading\n", pszInFilename); + return 100; + } + + fseek(f_in, 0, SEEK_END); + nCompressedSize = (size_t)ftell(f_in); + fseek(f_in, 0, SEEK_SET); + + pCompressedData = (unsigned char*)malloc(nCompressedSize); + if (!pCompressedData) { + fclose(f_in); + fprintf(stderr, "out of memory for reading '%s', %zd bytes needed\n", pszInFilename, nCompressedSize); + return 100; + } + + if (fread(pCompressedData, 1, nCompressedSize, f_in) != nCompressedSize) { + free(pCompressedData); + fclose(f_in); + fprintf(stderr, "I/O error while reading '%s'\n", pszInFilename); + return 100; + } + + fclose(f_in); + + if (nOptions & OPT_BACKWARD) + do_reverse_buffer(pCompressedData, nCompressedSize); + + /* Get max decompressed size */ + + nMaxDecompressedSize = apultra_get_max_decompressed_size(pCompressedData, nCompressedSize, nFlags); + if (nMaxDecompressedSize == -1) { + free(pCompressedData); + fprintf(stderr, "invalid compressed format for file '%s'\n", pszInFilename); + return 100; + } + + FILE* f_dict = NULL; + size_t nDictionarySize = 0; + if (pszDictionaryFilename) { + /* Open the dictionary */ + f_dict = fopen(pszDictionaryFilename, "rb"); + if (!f_dict) { + fprintf(stderr, "error opening dictionary '%s' for reading\n", pszDictionaryFilename); + return 100; + } + + /* Get dictionary size */ + fseek(f_dict, 0, SEEK_END); + nDictionarySize = (size_t)ftell(f_dict); + fseek(f_dict, 0, SEEK_SET); + + if (nDictionarySize > BLOCK_SIZE) nDictionarySize = BLOCK_SIZE; + } + + /* Allocate max decompressed size */ + + pDecompressedData = (unsigned char*)malloc(nDictionarySize + nMaxDecompressedSize); + if (!pDecompressedData) { + free(pCompressedData); + if (f_dict) fclose(f_dict); + fprintf(stderr, "out of memory for decompressing '%s', %zd bytes needed\n", pszInFilename, nMaxDecompressedSize); + return 100; + } + + memset(pDecompressedData, 0, nDictionarySize + nMaxDecompressedSize); + + if (f_dict) { + /* Read dictionary data */ + if (fread(pDecompressedData, 1, nDictionarySize, f_dict) != nDictionarySize) { + free(pDecompressedData); + fclose(f_in); + fclose(f_dict); + fprintf(stderr, "I/O error while reading dictionary '%s'\n", pszDictionaryFilename); + return 100; + } + + fclose(f_dict); + f_dict = NULL; + + if (nOptions & OPT_BACKWARD) + do_reverse_buffer(pDecompressedData, nDictionarySize); + } + + if (nOptions & OPT_VERBOSE) { + nStartTime = do_get_time(); + } + + nOriginalSize = apultra_decompress(pCompressedData, pDecompressedData, nCompressedSize, nMaxDecompressedSize, nDictionarySize, nFlags); + if (nOriginalSize == -1) { + free(pDecompressedData); + free(pCompressedData); + + fprintf(stderr, "decompression error for '%s'\n", pszInFilename); + return 100; + } + + if (nOptions & OPT_BACKWARD) + do_reverse_buffer(pDecompressedData + nDictionarySize, nOriginalSize); + + if (pszOutFilename) { + FILE *f_out; + + /* Write whole decompressed file out */ + + f_out = fopen(pszOutFilename, "wb"); + if (f_out) { + fwrite(pDecompressedData + nDictionarySize, 1, nOriginalSize, f_out); + fclose(f_out); + } + } + + free(pDecompressedData); + free(pCompressedData); + + if (nOptions & OPT_VERBOSE) { + nEndTime = do_get_time(); + double fDelta = ((double)(nEndTime - nStartTime)) / 1000000.0; + double fSpeed = ((double)nOriginalSize / 1048576.0) / fDelta; + fprintf(stdout, "Decompressed '%s' in %g seconds, %g Mb/s\n", + pszInFilename, fDelta, fSpeed); + } + + return 0; +} + +/*---------------------------------------------------------------------------*/ + +static int do_compare(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions) { + long long nStartTime = 0LL, nEndTime = 0LL; + size_t nCompressedSize, nMaxDecompressedSize, nOriginalSize, nDecompressedSize; + unsigned char *pCompressedData = NULL; + unsigned char *pOriginalData = NULL; + unsigned char *pDecompressedData = NULL; + int nFlags = 0; + + /* Read the whole compressed file in memory */ + + FILE *f_in = fopen(pszInFilename, "rb"); + if (!f_in) { + fprintf(stderr, "error opening '%s' for reading\n", pszInFilename); + return 100; + } + + fseek(f_in, 0, SEEK_END); + nCompressedSize = (size_t)ftell(f_in); + fseek(f_in, 0, SEEK_SET); + + pCompressedData = (unsigned char*)malloc(nCompressedSize); + if (!pCompressedData) { + fclose(f_in); + fprintf(stderr, "out of memory for reading '%s', %zd bytes needed\n", pszInFilename, nCompressedSize); + return 100; + } + + if (fread(pCompressedData, 1, nCompressedSize, f_in) != nCompressedSize) { + free(pCompressedData); + fclose(f_in); + fprintf(stderr, "I/O error while reading '%s'\n", pszInFilename); + return 100; + } + + fclose(f_in); + + if (nOptions & OPT_BACKWARD) + do_reverse_buffer(pCompressedData, nCompressedSize); + + /* Read the whole original file in memory */ + + f_in = fopen(pszOutFilename, "rb"); + if (!f_in) { + free(pCompressedData); + fprintf(stderr, "error opening '%s' for reading\n", pszInFilename); + return 100; + } + + fseek(f_in, 0, SEEK_END); + nOriginalSize = (size_t)ftell(f_in); + fseek(f_in, 0, SEEK_SET); + + pOriginalData = (unsigned char*)malloc(nOriginalSize); + if (!pOriginalData) { + fclose(f_in); + free(pCompressedData); + fprintf(stderr, "out of memory for reading '%s', %zd bytes needed\n", pszInFilename, nOriginalSize); + return 100; + } + + if (fread(pOriginalData, 1, nOriginalSize, f_in) != nOriginalSize) { + free(pOriginalData); + fclose(f_in); + free(pCompressedData); + fprintf(stderr, "I/O error while reading '%s'\n", pszInFilename); + return 100; + } + + fclose(f_in); + + /* Get max decompressed size */ + + nMaxDecompressedSize = apultra_get_max_decompressed_size(pCompressedData, nCompressedSize, nFlags); + if (nMaxDecompressedSize == -1) { + free(pOriginalData); + free(pCompressedData); + fprintf(stderr, "invalid compressed format for file '%s'\n", pszInFilename); + return 100; + } + + FILE* f_dict = NULL; + size_t nDictionarySize = 0; + if (pszDictionaryFilename) { + /* Open the dictionary */ + f_dict = fopen(pszDictionaryFilename, "rb"); + if (!f_dict) { + fprintf(stderr, "error opening dictionary '%s' for reading\n", pszDictionaryFilename); + return 100; + } + + /* Get dictionary size */ + fseek(f_dict, 0, SEEK_END); + nDictionarySize = (size_t)ftell(f_dict); + fseek(f_dict, 0, SEEK_SET); + + if (nDictionarySize > BLOCK_SIZE) nDictionarySize = BLOCK_SIZE; + } + + /* Allocate max decompressed size */ + + pDecompressedData = (unsigned char*)malloc(nDictionarySize + nMaxDecompressedSize); + if (!pDecompressedData) { + free(pOriginalData); + free(pCompressedData); + if (f_dict) fclose(f_dict); + fprintf(stderr, "out of memory for decompressing '%s', %zd bytes needed\n", pszInFilename, nMaxDecompressedSize); + return 100; + } + + memset(pDecompressedData, 0, nDictionarySize + nMaxDecompressedSize); + + if (f_dict) { + /* Read dictionary data */ + if (fread(pDecompressedData, 1, nDictionarySize, f_dict) != nDictionarySize) { + free(pDecompressedData); + fclose(f_in); + fclose(f_dict); + fprintf(stderr, "I/O error while reading dictionary '%s'\n", pszDictionaryFilename); + return 100; + } + + fclose(f_dict); + f_dict = NULL; + + if (nOptions & OPT_BACKWARD) + do_reverse_buffer(pDecompressedData, nDictionarySize); + } + + if (nOptions & OPT_VERBOSE) { + nStartTime = do_get_time(); + } + + nDecompressedSize = apultra_decompress(pCompressedData, pDecompressedData, nCompressedSize, nMaxDecompressedSize, nDictionarySize, nFlags); + if (nDecompressedSize == -1) { + free(pDecompressedData); + free(pOriginalData); + free(pCompressedData); + + fprintf(stderr, "decompression error for '%s'\n", pszInFilename); + return 100; + } + + if (nOptions & OPT_BACKWARD) + do_reverse_buffer(pDecompressedData + nDictionarySize, nDecompressedSize); + + if (nDecompressedSize != nOriginalSize || memcmp(pDecompressedData + nDictionarySize, pOriginalData, nOriginalSize)) { + fprintf(stderr, "error comparing compressed file '%s' with original '%s'\n", pszInFilename, pszOutFilename); + return 100; + } + + free(pDecompressedData); + free(pOriginalData); + free(pCompressedData); + + if (nOptions & OPT_VERBOSE) { + nEndTime = do_get_time(); + double fDelta = ((double)(nEndTime - nStartTime)) / 1000000.0; + double fSpeed = ((double)nOriginalSize / 1048576.0) / fDelta; + fprintf(stdout, "Compared '%s' in %g seconds, %g Mb/s\n", + pszInFilename, fDelta, fSpeed); + } + + return 0; +} + +/*---------------------------------------------------------------------------*/ + +static void generate_compressible_data(unsigned char *pBuffer, size_t nBufferSize, unsigned int nSeed, int nNumLiteralValues, float fMatchProbability) { + size_t nIndex = 0; + int nMatchProbability = (int)(fMatchProbability * 1023.0f); + + srand(nSeed); + + if (nIndex >= nBufferSize) return; + pBuffer[nIndex++] = rand() % nNumLiteralValues; + + while (nIndex < nBufferSize) { + if ((rand() & 1023) >= nMatchProbability) { + size_t nLiteralCount = rand() & 127; + if (nLiteralCount > (nBufferSize - nIndex)) + nLiteralCount = nBufferSize - nIndex; + + while (nLiteralCount--) + pBuffer[nIndex++] = rand() % nNumLiteralValues; + } + else { + size_t nMatchLength = MIN_MATCH_SIZE + (rand() & 1023); + size_t nMatchOffset; + + if (nMatchLength > (nBufferSize - nIndex)) + nMatchLength = nBufferSize - nIndex; + if (nMatchLength > nIndex) + nMatchLength = nIndex; + + if (nMatchLength < nIndex) + nMatchOffset = rand() % (nIndex - nMatchLength); + else + nMatchOffset = 0; + + while (nMatchLength--) { + pBuffer[nIndex] = pBuffer[nIndex - nMatchOffset]; + nIndex++; + } + } + } +} + +static void xor_data(unsigned char *pBuffer, size_t nBufferSize, unsigned int nSeed, float fXorProbability) { + size_t nIndex = 0; + int nXorProbability = (int)(fXorProbability * 1023.0f); + + srand(nSeed); + + if (nIndex >= nBufferSize) return; + + while (nIndex < nBufferSize) { + if ((rand() & 1023) < nXorProbability) { + pBuffer[nIndex] ^= 0xff; + } + nIndex++; + } +} + +static int do_self_test(const unsigned int nOptions, const unsigned int nMaxWindowSize, const int nIsQuickTest) { + unsigned char *pGeneratedData; + unsigned char *pCompressedData; + unsigned char *pTmpCompressedData; + unsigned char *pTmpDecompressedData; + size_t nGeneratedDataSize; + size_t nMaxCompressedDataSize; + unsigned int nSeed = 123; + int nFlags = 0; + int i; + + pGeneratedData = (unsigned char*)malloc(4 * BLOCK_SIZE); + if (!pGeneratedData) { + fprintf(stderr, "out of memory, %d bytes needed\n", 4 * BLOCK_SIZE); + return 100; + } + + nMaxCompressedDataSize = apultra_get_max_compressed_size(4 * BLOCK_SIZE); + pCompressedData = (unsigned char*)malloc(nMaxCompressedDataSize); + if (!pCompressedData) { + free(pGeneratedData); + pGeneratedData = NULL; + + fprintf(stderr, "out of memory, %zd bytes needed\n", nMaxCompressedDataSize); + return 100; + } + + pTmpCompressedData = (unsigned char*)malloc(nMaxCompressedDataSize); + if (!pTmpCompressedData) { + free(pCompressedData); + pCompressedData = NULL; + free(pGeneratedData); + pGeneratedData = NULL; + + fprintf(stderr, "out of memory, %zd bytes needed\n", nMaxCompressedDataSize); + return 100; + } + + pTmpDecompressedData = (unsigned char*)malloc(4 * BLOCK_SIZE); + if (!pTmpDecompressedData) { + free(pTmpCompressedData); + pTmpCompressedData = NULL; + free(pCompressedData); + pCompressedData = NULL; + free(pGeneratedData); + pGeneratedData = NULL; + + fprintf(stderr, "out of memory, %d bytes needed\n", 4 * BLOCK_SIZE); + return 100; + } + + memset(pGeneratedData, 0, 4 * BLOCK_SIZE); + memset(pCompressedData, 0, nMaxCompressedDataSize); + memset(pTmpCompressedData, 0, nMaxCompressedDataSize); + + /* Test compressing with a too small buffer to do anything, expect to fail cleanly */ + for (i = 0; i < 12; i++) { + generate_compressible_data(pGeneratedData, i, nSeed, 256, 0.5f); + apultra_compress(pGeneratedData, pCompressedData, i, i, nFlags, nMaxWindowSize, 0 /* dictionary size */, NULL, NULL); + } + + size_t nDataSizeStep = 128; + float fProbabilitySizeStep = nIsQuickTest ? 0.005f : 0.0005f; + + for (nGeneratedDataSize = 1024; nGeneratedDataSize <= (nIsQuickTest ? 1024U : (4U * BLOCK_SIZE)); nGeneratedDataSize += nDataSizeStep) { + float fMatchProbability; + + fprintf(stdout, "size %zd", nGeneratedDataSize); + for (fMatchProbability = 0; fMatchProbability <= 0.995f; fMatchProbability += fProbabilitySizeStep) { + int nNumLiteralValues[12] = { 1, 2, 3, 15, 30, 56, 96, 137, 178, 191, 255, 256 }; + float fXorProbability; + + fputc('.', stdout); + fflush(stdout); + + for (i = 0; i < 12; i++) { + /* Generate data to compress */ + generate_compressible_data(pGeneratedData, nGeneratedDataSize, nSeed, nNumLiteralValues[i], fMatchProbability); + + /* Try to compress it, expected to succeed */ + size_t nActualCompressedSize = apultra_compress(pGeneratedData, pCompressedData, nGeneratedDataSize, apultra_get_max_compressed_size(nGeneratedDataSize), + nFlags, nMaxWindowSize, 0 /* dictionary size */, NULL, NULL); + if (nActualCompressedSize == -1 || nActualCompressedSize < (1 + 1 + 1 /* footer */)) { + free(pTmpDecompressedData); + pTmpDecompressedData = NULL; + free(pTmpCompressedData); + pTmpCompressedData = NULL; + free(pCompressedData); + pCompressedData = NULL; + free(pGeneratedData); + pGeneratedData = NULL; + + fprintf(stderr, "\nself-test: error compressing size %zd, seed %d, match probability %f, literals range %d\n", nGeneratedDataSize, nSeed, fMatchProbability, nNumLiteralValues[i]); + return 100; + } + + /* Try to decompress it, expected to succeed */ + size_t nActualDecompressedSize; + nActualDecompressedSize = apultra_decompress(pCompressedData, pTmpDecompressedData, nActualCompressedSize, nGeneratedDataSize, 0 /* dictionary size */, nFlags); + if (nActualDecompressedSize == -1) { + free(pTmpDecompressedData); + pTmpDecompressedData = NULL; + free(pTmpCompressedData); + pTmpCompressedData = NULL; + free(pCompressedData); + pCompressedData = NULL; + free(pGeneratedData); + pGeneratedData = NULL; + + fprintf(stderr, "\nself-test: error decompressing size %zd, seed %d, match probability %f, literals range %d\n", nGeneratedDataSize, nSeed, fMatchProbability, nNumLiteralValues[i]); + return 100; + } + + if (memcmp(pGeneratedData, pTmpDecompressedData, nGeneratedDataSize)) { + free(pTmpDecompressedData); + pTmpDecompressedData = NULL; + free(pTmpCompressedData); + pTmpCompressedData = NULL; + free(pCompressedData); + pCompressedData = NULL; + free(pGeneratedData); + pGeneratedData = NULL; + + fprintf(stderr, "\nself-test: error comparing decompressed and original data, size %zd, seed %d, match probability %f, literals range %d\n", nGeneratedDataSize, nSeed, fMatchProbability, nNumLiteralValues[i]); + return 100; + } + + /* Try to decompress corrupted data, expected to fail cleanly, without crashing or corrupting memory outside the output buffer */ + for (fXorProbability = 0.05f; fXorProbability <= 0.5f; fXorProbability += 0.05f) { + memcpy(pTmpCompressedData, pCompressedData, nActualCompressedSize); + xor_data(pTmpCompressedData, nActualCompressedSize, nSeed, fXorProbability); + apultra_decompress(pTmpCompressedData, pGeneratedData, nActualCompressedSize, nGeneratedDataSize, 0 /* dictionary size */, nFlags); + } + } + + nSeed++; + } + + fputc(10, stdout); + fflush(stdout); + + nDataSizeStep <<= 1; + if (nDataSizeStep > (128 * 4096)) + nDataSizeStep = 128 * 4096; + fProbabilitySizeStep *= 1.25; + if (fProbabilitySizeStep > (0.0005f * 4096)) + fProbabilitySizeStep = 0.0005f * 4096; + } + + free(pTmpDecompressedData); + pTmpDecompressedData = NULL; + + free(pTmpCompressedData); + pTmpCompressedData = NULL; + + free(pCompressedData); + pCompressedData = NULL; + + free(pGeneratedData); + pGeneratedData = NULL; + + fprintf(stdout, "All tests passed.\n"); + return 0; +} + +/*---------------------------------------------------------------------------*/ + +static int do_compr_benchmark(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions, const unsigned int nMaxWindowSize) { + size_t nFileSize, nMaxCompressedSize; + unsigned char *pFileData; + unsigned char *pCompressedData; + int nFlags = 0; + int i; + + if (pszDictionaryFilename) { + fprintf(stderr, "in-memory benchmarking does not support dictionaries\n"); + return 100; + } + + /* Read the whole original file in memory */ + + FILE *f_in = fopen(pszInFilename, "rb"); + if (!f_in) { + fprintf(stderr, "error opening '%s' for reading\n", pszInFilename); + return 100; + } + + fseek(f_in, 0, SEEK_END); + nFileSize = (size_t)ftell(f_in); + fseek(f_in, 0, SEEK_SET); + + pFileData = (unsigned char*)malloc(nFileSize); + if (!pFileData) { + fclose(f_in); + fprintf(stderr, "out of memory for reading '%s', %zd bytes needed\n", pszInFilename, nFileSize); + return 100; + } + + if (fread(pFileData, 1, nFileSize, f_in) != nFileSize) { + free(pFileData); + fclose(f_in); + fprintf(stderr, "I/O error while reading '%s'\n", pszInFilename); + return 100; + } + + fclose(f_in); + + if (nOptions & OPT_BACKWARD) + do_reverse_buffer(pFileData, nFileSize); + + /* Allocate max compressed size */ + + nMaxCompressedSize = apultra_get_max_compressed_size(nFileSize); + + pCompressedData = (unsigned char*)malloc(nMaxCompressedSize + 2048); + if (!pCompressedData) { + free(pFileData); + fprintf(stderr, "out of memory for compressing '%s', %zd bytes needed\n", pszInFilename, nMaxCompressedSize); + return 100; + } + + memset(pCompressedData + 1024, 0, nMaxCompressedSize); + + long long nBestCompTime = -1; + + size_t nActualCompressedSize = 0; + size_t nRightGuardPos = nMaxCompressedSize; + + for (i = 0; i < 5; i++) { + unsigned char nGuard = 0x33 + i; + int j; + + /* Write guard bytes around the output buffer, to help check for writes outside of it by the compressor */ + memset(pCompressedData, nGuard, 1024); + memset(pCompressedData + 1024 + nRightGuardPos, nGuard, 1024); + + long long t0 = do_get_time(); + nActualCompressedSize = apultra_compress(pFileData, pCompressedData + 1024, nFileSize, nRightGuardPos, nFlags, nMaxWindowSize, 0 /* dictionary size */, NULL, NULL); + long long t1 = do_get_time(); + if (nActualCompressedSize == -1) { + free(pCompressedData); + free(pFileData); + fprintf(stderr, "compression error\n"); + return 100; + } + + long long nCurDecTime = t1 - t0; + if (nBestCompTime == -1 || nBestCompTime > nCurDecTime) + nBestCompTime = nCurDecTime; + + /* Check guard bytes before the output buffer */ + for (j = 0; j < 1024; j++) { + if (pCompressedData[j] != nGuard) { + free(pCompressedData); + free(pFileData); + fprintf(stderr, "error, wrote outside of output buffer at %d!\n", j - 1024); + return 100; + } + } + + /* Check guard bytes after the output buffer */ + for (j = 0; j < 1024; j++) { + if (pCompressedData[1024 + nRightGuardPos + j] != nGuard) { + free(pCompressedData); + free(pFileData); + fprintf(stderr, "error, wrote outside of output buffer at %d!\n", j); + return 100; + } + } + + nRightGuardPos = nActualCompressedSize; + } + + if (nOptions & OPT_BACKWARD) + do_reverse_buffer(pCompressedData + 1024, nActualCompressedSize); + + if (pszOutFilename) { + FILE *f_out; + + /* Write whole compressed file out */ + + f_out = fopen(pszOutFilename, "wb"); + if (f_out) { + fwrite(pCompressedData + 1024, 1, nActualCompressedSize, f_out); + fclose(f_out); + } + } + + free(pCompressedData); + free(pFileData); + + fprintf(stdout, "compressed size: %zd bytes\n", nActualCompressedSize); + fprintf(stdout, "compression time: %lld microseconds (%g Mb/s)\n", nBestCompTime, ((double)nActualCompressedSize / 1024.0) / ((double)nBestCompTime / 1000.0)); + + return 0; +} + +/*---------------------------------------------------------------------------*/ + +static int do_dec_benchmark(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions) { + size_t nFileSize, nMaxDecompressedSize; + unsigned char *pFileData; + unsigned char *pDecompressedData; + int nFlags = 0; + int i; + + if (pszDictionaryFilename) { + fprintf(stderr, "in-memory benchmarking does not support dictionaries\n"); + return 100; + } + + /* Read the whole compressed file in memory */ + + FILE *f_in = fopen(pszInFilename, "rb"); + if (!f_in) { + fprintf(stderr, "error opening '%s' for reading\n", pszInFilename); + return 100; + } + + fseek(f_in, 0, SEEK_END); + nFileSize = (size_t)ftell(f_in); + fseek(f_in, 0, SEEK_SET); + + pFileData = (unsigned char*)malloc(nFileSize); + if (!pFileData) { + fclose(f_in); + fprintf(stderr, "out of memory for reading '%s', %zd bytes needed\n", pszInFilename, nFileSize); + return 100; + } + + if (fread(pFileData, 1, nFileSize, f_in) != nFileSize) { + free(pFileData); + fclose(f_in); + fprintf(stderr, "I/O error while reading '%s'\n", pszInFilename); + return 100; + } + + fclose(f_in); + + if (nOptions & OPT_BACKWARD) + do_reverse_buffer(pFileData, nFileSize); + + /* Allocate max decompressed size */ + + nMaxDecompressedSize = apultra_get_max_decompressed_size(pFileData, nFileSize, nFlags); + if (nMaxDecompressedSize == -1) { + free(pFileData); + fprintf(stderr, "invalid compressed format for file '%s'\n", pszInFilename); + return 100; + } + + pDecompressedData = (unsigned char*)malloc(nMaxDecompressedSize); + if (!pDecompressedData) { + free(pFileData); + fprintf(stderr, "out of memory for decompressing '%s', %zd bytes needed\n", pszInFilename, nMaxDecompressedSize); + return 100; + } + + memset(pDecompressedData, 0, nMaxDecompressedSize); + + long long nBestDecTime = -1; + + size_t nActualDecompressedSize = 0; + for (i = 0; i < 50; i++) { + long long t0 = do_get_time(); + nActualDecompressedSize = apultra_decompress(pFileData, pDecompressedData, nFileSize, nMaxDecompressedSize, 0 /* dictionary size */, nFlags); + long long t1 = do_get_time(); + if (nActualDecompressedSize == -1) { + free(pDecompressedData); + free(pFileData); + fprintf(stderr, "decompression error\n"); + return 100; + } + + long long nCurDecTime = t1 - t0; + if (nBestDecTime == -1 || nBestDecTime > nCurDecTime) + nBestDecTime = nCurDecTime; + } + + if (nOptions & OPT_BACKWARD) + do_reverse_buffer(pDecompressedData, nActualDecompressedSize); + + if (pszOutFilename) { + FILE *f_out; + + /* Write whole decompressed file out */ + + f_out = fopen(pszOutFilename, "wb"); + if (f_out) { + fwrite(pDecompressedData, 1, nActualDecompressedSize, f_out); + fclose(f_out); + } + } + + free(pDecompressedData); + free(pFileData); + + fprintf(stdout, "decompressed size: %zd bytes\n", nActualDecompressedSize); + fprintf(stdout, "decompression time: %lld microseconds (%g Mb/s)\n", nBestDecTime, ((double)nActualDecompressedSize / 1024.0) / ((double)nBestDecTime / 1000.0)); + + return 0; +} + +/*---------------------------------------------------------------------------*/ + +int main(int argc, char **argv) { + int i; + const char *pszInFilename = NULL; + const char *pszOutFilename = NULL; + const char *pszDictionaryFilename = NULL; + int nArgsError = 0; + int nCommandDefined = 0; + int nVerifyCompression = 0; + char cCommand = 'z'; + unsigned int nOptions = 0; + unsigned int nMaxWindowSize = 0; + + for (i = 1; i < argc; i++) { + if (!strcmp(argv[i], "-d")) { + if (!nCommandDefined) { + nCommandDefined = 1; + cCommand = 'd'; + } + else + nArgsError = 1; + } + else if (!strcmp(argv[i], "-z")) { + if (!nCommandDefined) { + nCommandDefined = 1; + cCommand = 'z'; + } + else + nArgsError = 1; + } + else if (!strcmp(argv[i], "-c")) { + if (!nVerifyCompression) { + nVerifyCompression = 1; + } + else + nArgsError = 1; + } + else if (!strcmp(argv[i], "-cbench")) { + if (!nCommandDefined) { + nCommandDefined = 1; + cCommand = 'B'; + } + else + nArgsError = 1; + } + else if (!strcmp(argv[i], "-dbench")) { + if (!nCommandDefined) { + nCommandDefined = 1; + cCommand = 'b'; + } + else + nArgsError = 1; + } + else if (!strcmp(argv[i], "-test")) { + if (!nCommandDefined) { + nCommandDefined = 1; + cCommand = 't'; + } + else + nArgsError = 1; + } + else if (!strcmp(argv[i], "-quicktest")) { + if (!nCommandDefined) { + nCommandDefined = 1; + cCommand = 'T'; + } + else + nArgsError = 1; + } + else if (!strcmp(argv[i], "-D")) { + if (!pszDictionaryFilename && (i + 1) < argc) { + pszDictionaryFilename = argv[i + 1]; + i++; + } + else + nArgsError = 1; + } + else if (!strncmp(argv[i], "-D", 2)) { + if (!pszDictionaryFilename) { + pszDictionaryFilename = argv[i] + 2; + } + else + nArgsError = 1; + } + else if (!strcmp(argv[i], "-v")) { + if ((nOptions & OPT_VERBOSE) == 0) { + nOptions |= OPT_VERBOSE; + } + else + nArgsError = 1; + } + else if (!strcmp(argv[i], "-w")) { + if (!nMaxWindowSize && (i + 1) < argc) { + char *pEnd = NULL; + nMaxWindowSize = (int)strtol(argv[i + 1], &pEnd, 10); + if (pEnd && pEnd != argv[i + 1] && (nMaxWindowSize >= 16 && nMaxWindowSize <= 0x200000)) { + i++; + } + else { + nArgsError = 1; + } + } + else + nArgsError = 1; + } + else if (!strncmp(argv[i], "-w", 2)) { + if (!nMaxWindowSize) { + char *pEnd = NULL; + nMaxWindowSize = (int)strtol(argv[i] + 2, &pEnd, 10); + if (!(pEnd && pEnd != (argv[i] + 2) && (nMaxWindowSize >= 16 && nMaxWindowSize <= 0x200000))) { + nArgsError = 1; + } + } + else + nArgsError = 1; + } + else if (!strcmp(argv[i], "-stats")) { + if ((nOptions & OPT_STATS) == 0) { + nOptions |= OPT_STATS; + } + else + nArgsError = 1; + } + else if (!strcmp(argv[i], "-b")) { + if ((nOptions & OPT_BACKWARD) == 0) { + nOptions |= OPT_BACKWARD; + } + else + nArgsError = 1; + } + else { + if (!pszInFilename) + pszInFilename = argv[i]; + else { + if (!pszOutFilename) + pszOutFilename = argv[i]; + else + nArgsError = 1; + } + } + } + + if (!nArgsError && cCommand == 't') { + return do_self_test(nOptions, nMaxWindowSize, 0); + } + else if (!nArgsError && cCommand == 'T') { + return do_self_test(nOptions, nMaxWindowSize, 1); + } + + if (nArgsError || !pszInFilename || !pszOutFilename) { + fprintf(stderr, "apultra command-line tool v" TOOL_VERSION " by Emmanuel Marty and spke\n"); + fprintf(stderr, "usage: %s [-c] [-d] [-v] [-b] \n", argv[0]); + fprintf(stderr, " -c: check resulting stream after compressing\n"); + fprintf(stderr, " -d: decompress (default: compress)\n"); + fprintf(stderr, " -b: backwards compression or decompression\n"); + fprintf(stderr, " -w : maximum window size, in bytes (16..2097152), defaults to maximum\n"); + fprintf(stderr, " -D : use dictionary file\n"); + fprintf(stderr, " -cbench: benchmark in-memory compression\n"); + fprintf(stderr, " -dbench: benchmark in-memory decompression\n"); + fprintf(stderr, " -test: run full automated self-tests\n"); + fprintf(stderr, "-quicktest: run quick automated self-tests\n"); + fprintf(stderr, " -stats: show compressed data stats\n"); + fprintf(stderr, " -v: be verbose\n"); + return 100; + } + + do_init_time(); + + if (cCommand == 'z') { + int nResult = do_compress(pszInFilename, pszOutFilename, pszDictionaryFilename, nOptions, nMaxWindowSize); + if (nResult == 0 && nVerifyCompression) { + return do_compare(pszOutFilename, pszInFilename, pszDictionaryFilename, nOptions); + } else { + return nResult; + } + } + else if (cCommand == 'd') { + return do_decompress(pszInFilename, pszOutFilename, pszDictionaryFilename, nOptions); + } + else if (cCommand == 'B') { + return do_compr_benchmark(pszInFilename, pszOutFilename, pszDictionaryFilename, nOptions, nMaxWindowSize); + } + else if (cCommand == 'b') { + return do_dec_benchmark(pszInFilename, pszOutFilename, pszDictionaryFilename, nOptions); + } + else { + return 100; + } +} +#endif diff --git a/tools/z64compress/src/enc/apultra/divsufsort.c b/tools/z64compress/src/enc/apultra/divsufsort.c new file mode 100644 index 000000000..3a1c75304 --- /dev/null +++ b/tools/z64compress/src/enc/apultra/divsufsort.c @@ -0,0 +1,460 @@ +/* + * divsufsort.c for libdivsufsort + * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "divsufsort_private.h" +#ifdef _OPENMP +# include +#endif + + +/*- Private Functions -*/ + +/* Sorts suffixes of type B*. */ +static +saidx_t +sort_typeBstar(const sauchar_t *T, saidx_t *SA, + saidx_t *bucket_A, saidx_t *bucket_B, + saidx_t n) { + saidx_t *PAb, *ISAb, *buf; +#ifdef _OPENMP + saidx_t *curbuf; + saidx_t l; +#endif + saidx_t i, j, k, t, m, bufsize; + saint_t c0, c1; +#ifdef _OPENMP + saint_t d0, d1; + int tmp; +#endif + + /* Initialize bucket arrays. */ + for(i = 0; i < BUCKET_A_SIZE; ++i) { bucket_A[i] = 0; } + for(i = 0; i < BUCKET_B_SIZE; ++i) { bucket_B[i] = 0; } + + /* Count the number of occurrences of the first one or two characters of each + type A, B and B* suffix. Moreover, store the beginning position of all + type B* suffixes into the array SA. */ + for(i = n - 1, m = n, c0 = T[n - 1]; 0 <= i;) { + /* type A suffix. */ + do { ++BUCKET_A(c1 = c0); } while((0 <= --i) && ((c0 = T[i]) >= c1)); + if(0 <= i) { + /* type B* suffix. */ + ++BUCKET_BSTAR(c0, c1); + SA[--m] = i; + /* type B suffix. */ + for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { + ++BUCKET_B(c0, c1); + } + } + } + m = n - m; +/* +note: + A type B* suffix is lexicographically smaller than a type B suffix that + begins with the same first two characters. +*/ + + /* Calculate the index of start/end point of each bucket. */ + for(c0 = 0, i = 0, j = 0; c0 < ALPHABET_SIZE; ++c0) { + t = i + BUCKET_A(c0); + BUCKET_A(c0) = i + j; /* start point */ + i = t + BUCKET_B(c0, c0); + for(c1 = c0 + 1; c1 < ALPHABET_SIZE; ++c1) { + j += BUCKET_BSTAR(c0, c1); + BUCKET_BSTAR(c0, c1) = j; /* end point */ + i += BUCKET_B(c0, c1); + } + } + + if(0 < m) { + /* Sort the type B* suffixes by their first two characters. */ + PAb = SA + n - m; ISAb = SA + m; + for(i = m - 2; 0 <= i; --i) { + t = PAb[i], c0 = T[t], c1 = T[t + 1]; + SA[--BUCKET_BSTAR(c0, c1)] = i; + } + t = PAb[m - 1], c0 = T[t], c1 = T[t + 1]; + SA[--BUCKET_BSTAR(c0, c1)] = m - 1; + + /* Sort the type B* substrings using sssort. */ +#ifdef _OPENMP + tmp = omp_get_max_threads(); + buf = SA + m, bufsize = (n - (2 * m)) / tmp; + c0 = ALPHABET_SIZE - 2, c1 = ALPHABET_SIZE - 1, j = m; +#pragma omp parallel default(shared) private(curbuf, k, l, d0, d1, tmp) + { + tmp = omp_get_thread_num(); + curbuf = buf + tmp * bufsize; + k = 0; + for(;;) { + #pragma omp critical(sssort_lock) + { + if(0 < (l = j)) { + d0 = c0, d1 = c1; + do { + k = BUCKET_BSTAR(d0, d1); + if(--d1 <= d0) { + d1 = ALPHABET_SIZE - 1; + if(--d0 < 0) { break; } + } + } while(((l - k) <= 1) && (0 < (l = k))); + c0 = d0, c1 = d1, j = k; + } + } + if(l == 0) { break; } + sssort(T, PAb, SA + k, SA + l, + curbuf, bufsize, 2, n, *(SA + k) == (m - 1)); + } + } +#else + buf = SA + m, bufsize = n - (2 * m); + for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) { + for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) { + i = BUCKET_BSTAR(c0, c1); + if(1 < (j - i)) { + sssort(T, PAb, SA + i, SA + j, + buf, bufsize, 2, n, *(SA + i) == (m - 1)); + } + } + } +#endif + + /* Compute ranks of type B* substrings. */ + for(i = m - 1; 0 <= i; --i) { + if(0 <= SA[i]) { + j = i; + do { ISAb[SA[i]] = i; } while((0 <= --i) && (0 <= SA[i])); + SA[i + 1] = i - j; + if(i <= 0) { break; } + } + j = i; + do { ISAb[SA[i] = ~SA[i]] = j; } while(SA[--i] < 0); + ISAb[SA[i]] = j; + } + + /* Construct the inverse suffix array of type B* suffixes using trsort. */ + trsort(ISAb, SA, m, 1); + + /* Set the sorted order of tyoe B* suffixes. */ + for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) { + for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { } + if(0 <= i) { + t = i; + for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { } + SA[ISAb[--j]] = ((t == 0) || (1 < (t - i))) ? t : ~t; + } + } + + /* Calculate the index of start/end point of each bucket. */ + BUCKET_B(ALPHABET_SIZE - 1, ALPHABET_SIZE - 1) = n; /* end point */ + for(c0 = ALPHABET_SIZE - 2, k = m - 1; 0 <= c0; --c0) { + i = BUCKET_A(c0 + 1) - 1; + for(c1 = ALPHABET_SIZE - 1; c0 < c1; --c1) { + t = i - BUCKET_B(c0, c1); + BUCKET_B(c0, c1) = i; /* end point */ + + /* Move all type B* suffixes to the correct position. */ + for(i = t, j = BUCKET_BSTAR(c0, c1); + j <= k; + --i, --k) { SA[i] = SA[k]; } + } + BUCKET_BSTAR(c0, c0 + 1) = i - BUCKET_B(c0, c0) + 1; /* start point */ + BUCKET_B(c0, c0) = i; /* end point */ + } + } + + return m; +} + +/* Constructs the suffix array by using the sorted order of type B* suffixes. */ +static +void +construct_SA(const sauchar_t *T, saidx_t *SA, + saidx_t *bucket_A, saidx_t *bucket_B, + saidx_t n, saidx_t m) { + saidx_t *i, *j, *k; + saidx_t s; + saint_t c0, c1, c2; + + if(0 < m) { + /* Construct the sorted order of type B suffixes by using + the sorted order of type B* suffixes. */ + for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) { + /* Scan the suffix array from right to left. */ + for(i = SA + BUCKET_BSTAR(c1, c1 + 1), + j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1; + i <= j; + --j) { + if(0 < (s = *j)) { + assert(T[s] == c1); + assert(((s + 1) < n) && (T[s] <= T[s + 1])); + assert(T[s - 1] <= T[s]); + *j = ~s; + c0 = T[--s]; + if((0 < s) && (T[s - 1] > c0)) { s = ~s; } + if(c0 != c2) { + if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; } + k = SA + BUCKET_B(c2 = c0, c1); + } + assert(k < j); + *k-- = s; + } else { + assert(((s == 0) && (T[s] == c1)) || (s < 0)); + *j = ~s; + } + } + } + } + + /* Construct the suffix array by using + the sorted order of type B suffixes. */ + k = SA + BUCKET_A(c2 = T[n - 1]); + *k++ = (T[n - 2] < c2) ? ~(n - 1) : (n - 1); + /* Scan the suffix array from left to right. */ + for(i = SA, j = SA + n; i < j; ++i) { + if(0 < (s = *i)) { + assert(T[s - 1] >= T[s]); + c0 = T[--s]; + if((s == 0) || (T[s - 1] < c0)) { s = ~s; } + if(c0 != c2) { + BUCKET_A(c2) = k - SA; + k = SA + BUCKET_A(c2 = c0); + } + assert(i < k); + *k++ = s; + } else { + assert(s < 0); + *i = ~s; + } + } +} + +#if 0 +/* Constructs the burrows-wheeler transformed string directly + by using the sorted order of type B* suffixes. */ +static +saidx_t +construct_BWT(const sauchar_t *T, saidx_t *SA, + saidx_t *bucket_A, saidx_t *bucket_B, + saidx_t n, saidx_t m) { + saidx_t *i, *j, *k, *orig; + saidx_t s; + saint_t c0, c1, c2; + + if(0 < m) { + /* Construct the sorted order of type B suffixes by using + the sorted order of type B* suffixes. */ + for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) { + /* Scan the suffix array from right to left. */ + for(i = SA + BUCKET_BSTAR(c1, c1 + 1), + j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1; + i <= j; + --j) { + if(0 < (s = *j)) { + assert(T[s] == c1); + assert(((s + 1) < n) && (T[s] <= T[s + 1])); + assert(T[s - 1] <= T[s]); + c0 = T[--s]; + *j = ~((saidx_t)c0); + if((0 < s) && (T[s - 1] > c0)) { s = ~s; } + if(c0 != c2) { + if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; } + k = SA + BUCKET_B(c2 = c0, c1); + } + assert(k < j); + *k-- = s; + } else if(s != 0) { + *j = ~s; +#ifndef NDEBUG + } else { + assert(T[s] == c1); +#endif + } + } + } + } + + /* Construct the BWTed string by using + the sorted order of type B suffixes. */ + k = SA + BUCKET_A(c2 = T[n - 1]); + *k++ = (T[n - 2] < c2) ? ~((saidx_t)T[n - 2]) : (n - 1); + /* Scan the suffix array from left to right. */ + for(i = SA, j = SA + n, orig = SA; i < j; ++i) { + if(0 < (s = *i)) { + assert(T[s - 1] >= T[s]); + c0 = T[--s]; + *i = c0; + if((0 < s) && (T[s - 1] < c0)) { s = ~((saidx_t)T[s - 1]); } + if(c0 != c2) { + BUCKET_A(c2) = k - SA; + k = SA + BUCKET_A(c2 = c0); + } + assert(i < k); + *k++ = s; + } else if(s != 0) { + *i = ~s; + } else { + orig = i; + } + } + + return orig - SA; +} +#endif + +/*---------------------------------------------------------------------------*/ + +/** + * Initialize suffix array context + * + * @return 0 for success, or non-zero in case of an error + */ +int divsufsort_init(divsufsort_ctx_t *ctx) { + ctx->bucket_A = (saidx_t *)malloc(BUCKET_A_SIZE * sizeof(saidx_t)); + ctx->bucket_B = NULL; + + if (ctx->bucket_A) { + ctx->bucket_B = (saidx_t *)malloc(BUCKET_B_SIZE * sizeof(saidx_t)); + + if (ctx->bucket_B) + return 0; + } + + divsufsort_destroy(ctx); + return -1; +} + +/** + * Destroy suffix array context + * + * @param ctx suffix array context to destroy + */ +void divsufsort_destroy(divsufsort_ctx_t *ctx) { + if (ctx->bucket_B) { + free(ctx->bucket_B); + ctx->bucket_B = NULL; + } + + if (ctx->bucket_A) { + free(ctx->bucket_A); + ctx->bucket_A = NULL; + } +} + +/*- Function -*/ + +saint_t +divsufsort_build_array(divsufsort_ctx_t *ctx, const sauchar_t *T, saidx_t *SA, saidx_t n) { + saidx_t m; + saint_t err = 0; + + /* Check arguments. */ + if((T == NULL) || (SA == NULL) || (n < 0)) { return -1; } + else if(n == 0) { return 0; } + else if(n == 1) { SA[0] = 0; return 0; } + else if(n == 2) { m = (T[0] < T[1]); SA[m ^ 1] = 0, SA[m] = 1; return 0; } + + /* Suffixsort. */ + if((ctx->bucket_A != NULL) && (ctx->bucket_B != NULL)) { + m = sort_typeBstar(T, SA, ctx->bucket_A, ctx->bucket_B, n); + construct_SA(T, SA, ctx->bucket_A, ctx->bucket_B, n, m); + } else { + err = -2; + } + + return err; +} + +#if 0 +saidx_t +divbwt(const sauchar_t *T, sauchar_t *U, saidx_t *A, saidx_t n) { + saidx_t *B; + saidx_t *bucket_A, *bucket_B; + saidx_t m, pidx, i; + + /* Check arguments. */ + if((T == NULL) || (U == NULL) || (n < 0)) { return -1; } + else if(n <= 1) { if(n == 1) { U[0] = T[0]; } return n; } + + if((B = A) == NULL) { B = (saidx_t *)malloc((size_t)(n + 1) * sizeof(saidx_t)); } + bucket_A = (saidx_t *)malloc(BUCKET_A_SIZE * sizeof(saidx_t)); + bucket_B = (saidx_t *)malloc(BUCKET_B_SIZE * sizeof(saidx_t)); + + /* Burrows-Wheeler Transform. */ + if((B != NULL) && (bucket_A != NULL) && (bucket_B != NULL)) { + m = sort_typeBstar(T, B, bucket_A, bucket_B, n); + pidx = construct_BWT(T, B, bucket_A, bucket_B, n, m); + + /* Copy to output string. */ + U[0] = T[n - 1]; + for(i = 0; i < pidx; ++i) { U[i + 1] = (sauchar_t)B[i]; } + for(i += 1; i < n; ++i) { U[i] = (sauchar_t)B[i]; } + pidx += 1; + } else { + pidx = -2; + } + + free(bucket_B); + free(bucket_A); + if(A == NULL) { free(B); } + + return pidx; +} + +const char * +divsufsort_version(void) { + return PROJECT_VERSION_FULL; +} +#endif + +saint_t +divsufsort(const sauchar_t *T, saidx_t *SA, saidx_t n) { + saidx_t *bucket_A, *bucket_B; + saidx_t m; + saint_t err = 0; + + /* Check arguments. */ + if((T == NULL) || (SA == NULL) || (n < 0)) { return -1; } + else if(n == 0) { return 0; } + else if(n == 1) { SA[0] = 0; return 0; } + else if(n == 2) { m = (T[0] < T[1]); SA[m ^ 1] = 0, SA[m] = 1; return 0; } + + bucket_A = (saidx_t *)malloc(BUCKET_A_SIZE * sizeof(saidx_t)); + bucket_B = (saidx_t *)malloc(BUCKET_B_SIZE * sizeof(saidx_t)); + + /* Suffixsort. */ + if((bucket_A != NULL) && (bucket_B != NULL)) { + m = sort_typeBstar(T, SA, bucket_A, bucket_B, n); + construct_SA(T, SA, bucket_A, bucket_B, n, m); + } else { + err = -2; + } + + free(bucket_B); + free(bucket_A); + + return err; +} diff --git a/tools/z64compress/src/enc/apultra/divsufsort.h b/tools/z64compress/src/enc/apultra/divsufsort.h new file mode 100644 index 000000000..5c617ee73 --- /dev/null +++ b/tools/z64compress/src/enc/apultra/divsufsort.h @@ -0,0 +1,192 @@ +/* + * divsufsort.h for libdivsufsort + * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _DIVSUFSORT_H +#define _DIVSUFSORT_H 1 + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +#define DIVSUFSORT_API + +/*- Datatypes -*/ +#ifndef SAUCHAR_T +#define SAUCHAR_T +typedef unsigned char sauchar_t; +#endif /* SAUCHAR_T */ +#ifndef SAINT_T +#define SAINT_T +typedef int saint_t; +#endif /* SAINT_T */ +#ifndef SAIDX_T +#define SAIDX_T +typedef int saidx_t; +#endif /* SAIDX_T */ +#ifndef PRIdSAIDX_T +#define PRIdSAIDX_T "d" +#endif + +/*- divsufsort context */ +typedef struct _divsufsort_ctx_t { + saidx_t *bucket_A; + saidx_t *bucket_B; +} divsufsort_ctx_t; + +/*- Prototypes -*/ + +/** + * Initialize suffix array context + * + * @return 0 for success, or non-zero in case of an error + */ +int divsufsort_init(divsufsort_ctx_t *ctx); + +/** + * Destroy suffix array context + * + * @param ctx suffix array context to destroy + */ +void divsufsort_destroy(divsufsort_ctx_t *ctx); + +/** + * Constructs the suffix array of a given string. + * @param ctx suffix array context + * @param T[0..n-1] The input string. + * @param SA[0..n-1] The output array of suffixes. + * @param n The length of the given string. + * @return 0 if no error occurred, -1 or -2 otherwise. + */ +DIVSUFSORT_API +saint_t divsufsort_build_array(divsufsort_ctx_t *ctx, const sauchar_t *T, saidx_t *SA, saidx_t n); + +#if 0 +/** + * Constructs the burrows-wheeler transformed string of a given string. + * @param T[0..n-1] The input string. + * @param U[0..n-1] The output string. (can be T) + * @param A[0..n-1] The temporary array. (can be NULL) + * @param n The length of the given string. + * @return The primary index if no error occurred, -1 or -2 otherwise. + */ +DIVSUFSORT_API +saidx_t +divbwt(const sauchar_t *T, sauchar_t *U, saidx_t *A, saidx_t n); + +/** + * Returns the version of the divsufsort library. + * @return The version number string. + */ +DIVSUFSORT_API +const char * +divsufsort_version(void); + + +/** + * Constructs the burrows-wheeler transformed string of a given string and suffix array. + * @param T[0..n-1] The input string. + * @param U[0..n-1] The output string. (can be T) + * @param SA[0..n-1] The suffix array. (can be NULL) + * @param n The length of the given string. + * @param idx The output primary index. + * @return 0 if no error occurred, -1 or -2 otherwise. + */ +DIVSUFSORT_API +saint_t +bw_transform(const sauchar_t *T, sauchar_t *U, + saidx_t *SA /* can NULL */, + saidx_t n, saidx_t *idx); + +/** + * Inverse BW-transforms a given BWTed string. + * @param T[0..n-1] The input string. + * @param U[0..n-1] The output string. (can be T) + * @param A[0..n-1] The temporary array. (can be NULL) + * @param n The length of the given string. + * @param idx The primary index. + * @return 0 if no error occurred, -1 or -2 otherwise. + */ +DIVSUFSORT_API +saint_t +inverse_bw_transform(const sauchar_t *T, sauchar_t *U, + saidx_t *A /* can NULL */, + saidx_t n, saidx_t idx); + +/** + * Checks the correctness of a given suffix array. + * @param T[0..n-1] The input string. + * @param SA[0..n-1] The input suffix array. + * @param n The length of the given string. + * @param verbose The verbose mode. + * @return 0 if no error occurred. + */ +DIVSUFSORT_API +saint_t +sufcheck(const sauchar_t *T, const saidx_t *SA, saidx_t n, saint_t verbose); + +/** + * Search for the pattern P in the string T. + * @param T[0..Tsize-1] The input string. + * @param Tsize The length of the given string. + * @param P[0..Psize-1] The input pattern string. + * @param Psize The length of the given pattern string. + * @param SA[0..SAsize-1] The input suffix array. + * @param SAsize The length of the given suffix array. + * @param idx The output index. + * @return The count of matches if no error occurred, -1 otherwise. + */ +DIVSUFSORT_API +saidx_t +sa_search(const sauchar_t *T, saidx_t Tsize, + const sauchar_t *P, saidx_t Psize, + const saidx_t *SA, saidx_t SAsize, + saidx_t *left); + +/** + * Search for the character c in the string T. + * @param T[0..Tsize-1] The input string. + * @param Tsize The length of the given string. + * @param SA[0..SAsize-1] The input suffix array. + * @param SAsize The length of the given suffix array. + * @param c The input character. + * @param idx The output index. + * @return The count of matches if no error occurred, -1 otherwise. + */ +DIVSUFSORT_API +saidx_t +sa_simplesearch(const sauchar_t *T, saidx_t Tsize, + const saidx_t *SA, saidx_t SAsize, + saint_t c, saidx_t *left); +#endif + +saint_t +divsufsort(const sauchar_t *T, saidx_t *SA, saidx_t n); + +#ifdef __cplusplus +} /* extern "C" */ +#endif /* __cplusplus */ + +#endif /* _DIVSUFSORT_H */ diff --git a/tools/z64compress/src/enc/apultra/divsufsort_config.h b/tools/z64compress/src/enc/apultra/divsufsort_config.h new file mode 100644 index 000000000..f112983cf --- /dev/null +++ b/tools/z64compress/src/enc/apultra/divsufsort_config.h @@ -0,0 +1,9 @@ +#define HAVE_STRING_H 1 +#define HAVE_STDLIB_H 1 +#define HAVE_MEMORY_H 1 +#define HAVE_STDINT_H 1 +#define INLINE inline + +#ifdef _MSC_VER +#pragma warning( disable : 4244 ) +#endif /* _MSC_VER */ diff --git a/tools/z64compress/src/enc/apultra/divsufsort_private.h b/tools/z64compress/src/enc/apultra/divsufsort_private.h new file mode 100644 index 000000000..b4d97ad4b --- /dev/null +++ b/tools/z64compress/src/enc/apultra/divsufsort_private.h @@ -0,0 +1,205 @@ +/* + * divsufsort_private.h for libdivsufsort + * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _DIVSUFSORT_PRIVATE_H +#define _DIVSUFSORT_PRIVATE_H 1 + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +#include "divsufsort_config.h" +#include +#include +#if HAVE_STRING_H +# include +#endif +#if HAVE_STDLIB_H +# include +#endif +#if HAVE_MEMORY_H +# include +#endif +#if HAVE_STDDEF_H +# include +#endif +#if HAVE_STRINGS_H +# include +#endif +#if HAVE_INTTYPES_H +# include +#else +# if HAVE_STDINT_H +# include +# endif +#endif +#if defined(BUILD_DIVSUFSORT64) +# include "divsufsort64.h" +# ifndef SAIDX_T +# define SAIDX_T +# define saidx_t saidx64_t +# endif /* SAIDX_T */ +# ifndef PRIdSAIDX_T +# define PRIdSAIDX_T PRIdSAIDX64_T +# endif /* PRIdSAIDX_T */ +# define divsufsort divsufsort64 +# define divbwt divbwt64 +# define divsufsort_version divsufsort64_version +# define bw_transform bw_transform64 +# define inverse_bw_transform inverse_bw_transform64 +# define sufcheck sufcheck64 +# define sa_search sa_search64 +# define sa_simplesearch sa_simplesearch64 +# define sssort sssort64 +# define trsort trsort64 +#else +# include "divsufsort.h" +#endif + + +/*- Constants -*/ +#if !defined(UINT8_MAX) +# define UINT8_MAX (255) +#endif /* UINT8_MAX */ +#if defined(ALPHABET_SIZE) && (ALPHABET_SIZE < 1) +# undef ALPHABET_SIZE +#endif +#if !defined(ALPHABET_SIZE) +# define ALPHABET_SIZE (UINT8_MAX + 1) +#endif +/* for divsufsort.c */ +#define BUCKET_A_SIZE (ALPHABET_SIZE) +#define BUCKET_B_SIZE (ALPHABET_SIZE * ALPHABET_SIZE) +/* for sssort.c */ +#if defined(SS_INSERTIONSORT_THRESHOLD) +# if SS_INSERTIONSORT_THRESHOLD < 1 +# undef SS_INSERTIONSORT_THRESHOLD +# define SS_INSERTIONSORT_THRESHOLD (1) +# endif +#else +# define SS_INSERTIONSORT_THRESHOLD (8) +#endif +#if defined(SS_BLOCKSIZE) +# if SS_BLOCKSIZE < 0 +# undef SS_BLOCKSIZE +# define SS_BLOCKSIZE (0) +# elif 32768 <= SS_BLOCKSIZE +# undef SS_BLOCKSIZE +# define SS_BLOCKSIZE (32767) +# endif +#else +# define SS_BLOCKSIZE (1024) +#endif +/* minstacksize = log(SS_BLOCKSIZE) / log(3) * 2 */ +#if SS_BLOCKSIZE == 0 +# if defined(BUILD_DIVSUFSORT64) +# define SS_MISORT_STACKSIZE (96) +# else +# define SS_MISORT_STACKSIZE (64) +# endif +#elif SS_BLOCKSIZE <= 4096 +# define SS_MISORT_STACKSIZE (16) +#else +# define SS_MISORT_STACKSIZE (24) +#endif +#if defined(BUILD_DIVSUFSORT64) +# define SS_SMERGE_STACKSIZE (64) +#else +# define SS_SMERGE_STACKSIZE (32) +#endif +/* for trsort.c */ +#define TR_INSERTIONSORT_THRESHOLD (8) +#if defined(BUILD_DIVSUFSORT64) +# define TR_STACKSIZE (96) +#else +# define TR_STACKSIZE (64) +#endif + + +/*- Macros -*/ +#ifndef SWAP +# define SWAP(_a, _b) do { t = (_a); (_a) = (_b); (_b) = t; } while(0) +#endif /* SWAP */ +#ifndef MIN +# define MIN(_a, _b) (((_a) < (_b)) ? (_a) : (_b)) +#endif /* MIN */ +#ifndef MAX +# define MAX(_a, _b) (((_a) > (_b)) ? (_a) : (_b)) +#endif /* MAX */ +#define STACK_PUSH(_a, _b, _c, _d)\ + do {\ + assert(ssize < STACK_SIZE);\ + stack[ssize].a = (_a), stack[ssize].b = (_b),\ + stack[ssize].c = (_c), stack[ssize++].d = (_d);\ + } while(0) +#define STACK_PUSH5(_a, _b, _c, _d, _e)\ + do {\ + assert(ssize < STACK_SIZE);\ + stack[ssize].a = (_a), stack[ssize].b = (_b),\ + stack[ssize].c = (_c), stack[ssize].d = (_d), stack[ssize++].e = (_e);\ + } while(0) +#define STACK_POP(_a, _b, _c, _d)\ + do {\ + assert(0 <= ssize);\ + if(ssize == 0) { return; }\ + (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\ + (_c) = stack[ssize].c, (_d) = stack[ssize].d;\ + } while(0) +#define STACK_POP5(_a, _b, _c, _d, _e)\ + do {\ + assert(0 <= ssize);\ + if(ssize == 0) { return; }\ + (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\ + (_c) = stack[ssize].c, (_d) = stack[ssize].d, (_e) = stack[ssize].e;\ + } while(0) +/* for divsufsort.c */ +#define BUCKET_A(_c0) bucket_A[(_c0)] +#if ALPHABET_SIZE == 256 +#define BUCKET_B(_c0, _c1) (bucket_B[((_c1) << 8) | (_c0)]) +#define BUCKET_BSTAR(_c0, _c1) (bucket_B[((_c0) << 8) | (_c1)]) +#else +#define BUCKET_B(_c0, _c1) (bucket_B[(_c1) * ALPHABET_SIZE + (_c0)]) +#define BUCKET_BSTAR(_c0, _c1) (bucket_B[(_c0) * ALPHABET_SIZE + (_c1)]) +#endif + + +/*- Private Prototypes -*/ +/* sssort.c */ +void +sssort(const sauchar_t *Td, const saidx_t *PA, + saidx_t *first, saidx_t *last, + saidx_t *buf, saidx_t bufsize, + saidx_t depth, saidx_t n, saint_t lastsuffix); +/* trsort.c */ +void +trsort(saidx_t *ISA, saidx_t *SA, saidx_t n, saidx_t depth); + + +#ifdef __cplusplus +} /* extern "C" */ +#endif /* __cplusplus */ + +#endif /* _DIVSUFSORT_PRIVATE_H */ diff --git a/tools/z64compress/src/enc/apultra/expand.c b/tools/z64compress/src/enc/apultra/expand.c new file mode 100644 index 000000000..c5ad18229 --- /dev/null +++ b/tools/z64compress/src/enc/apultra/expand.c @@ -0,0 +1,396 @@ +/* + * expand.c - decompressor implementation + * + * Copyright (C) 2019 Emmanuel Marty + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori + * + * Inspired by cap by Sven-Åke Dahl. https://github.com/svendahl/cap + * Also inspired by Charles Bloom's compression blog. http://cbloomrants.blogspot.com/ + * With ideas from LZ4 by Yann Collet. https://github.com/lz4/lz4 + * With help and support from spke + * + */ + +#include +#include +#include "format.h" +#include "expand.h" +#include "libapultra.h" + +#ifdef _MSC_VER +#define FORCE_INLINE __forceinline +#else /* _MSC_VER */ +#define FORCE_INLINE __attribute__((always_inline)) +#endif /* _MSC_VER */ + +static inline FORCE_INLINE int apultra_read_bit(const unsigned char **ppInBlock, const unsigned char *pDataEnd, int *nCurBitMask, unsigned char *bits) { + const unsigned char *pInBlock = *ppInBlock; + int nBit; + + if ((*nCurBitMask) == 0) { + if (pInBlock >= pDataEnd) return -1; + (*bits) = *pInBlock++; + (*nCurBitMask) = 128; + } + + nBit = ((*bits) & 128) ? 1 : 0; + + (*bits) <<= 1; + (*nCurBitMask) >>= 1; + + *ppInBlock = pInBlock; + return nBit; +} + +static inline FORCE_INLINE int apultra_read_gamma2(const unsigned char **ppInBlock, const unsigned char *pDataEnd, int *nCurBitMask, unsigned char *bits) { + int bit; + unsigned int v = 1; + + do { + v = (v << 1) + apultra_read_bit(ppInBlock, pDataEnd, nCurBitMask, bits); + bit = apultra_read_bit(ppInBlock, pDataEnd, nCurBitMask, bits); + if (bit < 0) return bit; + } while (bit); + + return v; +} + +/** + * Get maximum decompressed size of compressed data + * + * @param pInputData compressed data + * @param nInputSize compressed size in bytes + * @param nFlags compression flags (set to 0) + * + * @return maximum decompressed size + */ +size_t apultra_get_max_decompressed_size(const unsigned char *pInputData, size_t nInputSize, const unsigned int nFlags) { + const unsigned char *pInputDataEnd = pInputData + nInputSize; + int nCurBitMask = 0; + unsigned char bits = 0; + int nMatchOffset = -1; + int nFollowsLiteral = 3; + size_t nDecompressedSize = 0; + + if (pInputData >= pInputDataEnd) + return -1; + pInputData++; + nDecompressedSize++; + + while (1) { + int nResult; + + nResult = apultra_read_bit(&pInputData, pInputDataEnd, &nCurBitMask, &bits); + if (nResult < 0) return -1; + + if (!nResult) { + /* '0': literal */ + if (pInputData < pInputDataEnd) { + pInputData++; + nDecompressedSize++; + nFollowsLiteral = 3; + } + else { + return -1; + } + } + else { + nResult = apultra_read_bit(&pInputData, pInputDataEnd, &nCurBitMask, &bits); + if (nResult < 0) return -1; + + if (nResult == 0) { + unsigned int nMatchLen; + + /* '10': 8+n bits offset */ + int nMatchOffsetHi = apultra_read_gamma2(&pInputData, pInputDataEnd, &nCurBitMask, &bits); + nMatchOffsetHi -= nFollowsLiteral; + if (nMatchOffsetHi >= 0) { + nMatchOffset = ((unsigned int) nMatchOffsetHi) << 8; + nMatchOffset |= (unsigned int)(*pInputData++); + + nMatchLen = apultra_read_gamma2(&pInputData, pInputDataEnd, &nCurBitMask, &bits); + + if (nMatchOffset < 128 || nMatchOffset >= MINMATCH4_OFFSET) + nMatchLen += 2; + else if (nMatchOffset >= MINMATCH3_OFFSET) + nMatchLen++; + } + else { + /* else rep-match */ + nMatchLen = apultra_read_gamma2(&pInputData, pInputDataEnd, &nCurBitMask, &bits); + } + + nFollowsLiteral = 2; + + nDecompressedSize += nMatchLen; + } + else { + nResult = apultra_read_bit(&pInputData, pInputDataEnd, &nCurBitMask, &bits); + if (nResult < 0) return -1; + + if (nResult == 0) { + unsigned int nCommand; + unsigned int nMatchLen; + + /* '110': 7 bits offset + 1 bit length */ + nCommand = (unsigned int)(*pInputData++); + if (nCommand == 0x00) { + /* EOD. No match len follows. */ + break; + } + + /* Bits 7-1: offset; bit 0: length */ + nMatchOffset = (nCommand >> 1); + nMatchLen = (nCommand & 1) + 2; + + nFollowsLiteral = 2; + nDecompressedSize += nMatchLen; + } + else { + unsigned int nShortMatchOffset; + + /* '111': 4 bit offset */ + nResult = apultra_read_bit(&pInputData, pInputDataEnd, &nCurBitMask, &bits); + if (nResult < 0) return -1; + nShortMatchOffset = nResult << 3; + + nResult = apultra_read_bit(&pInputData, pInputDataEnd, &nCurBitMask, &bits); + if (nResult < 0) return -1; + nShortMatchOffset |= nResult << 2; + + nResult = apultra_read_bit(&pInputData, pInputDataEnd, &nCurBitMask, &bits); + if (nResult < 0) return -1; + nShortMatchOffset |= nResult << 1; + + nResult = apultra_read_bit(&pInputData, pInputDataEnd, &nCurBitMask, &bits); + if (nResult < 0) return -1; + nShortMatchOffset |= nResult << 0; + + nFollowsLiteral = 3; + nDecompressedSize++; + } + } + } + } + + return nDecompressedSize; +} + +/** + * Decompress data in memory + * + * @param pInputData compressed data + * @param pOutBuffer buffer for decompressed data + * @param nInputSize compressed size in bytes + * @param nMaxOutBufferSize maximum capacity of decompression buffer + * @param nDictionarySize size of dictionary in front of input data (0 for none) + * @param nFlags compression flags (set to 0) + * + * @return actual decompressed size, or -1 for error + */ +size_t apultra_decompress(const unsigned char *pInputData, unsigned char *pOutData, size_t nInputSize, size_t nMaxOutBufferSize, size_t nDictionarySize, const unsigned int nFlags) { + const unsigned char *pInputDataEnd = pInputData + nInputSize; + unsigned char *pCurOutData = pOutData + nDictionarySize; + const unsigned char *pOutDataEnd = pCurOutData + nMaxOutBufferSize; + const unsigned char *pOutDataFastEnd = pOutDataEnd - 20; + int nCurBitMask = 0; + unsigned char bits = 0; + int nMatchOffset = -1; + int nFollowsLiteral = 3; + + if (pInputData >= pInputDataEnd && pCurOutData < pOutDataEnd) + return -1; + *pCurOutData++ = *pInputData++; + + while (1) { + int nResult; + + nResult = apultra_read_bit(&pInputData, pInputDataEnd, &nCurBitMask, &bits); + if (nResult < 0) return -1; + + if (!nResult) { + /* '0': literal */ + if (pInputData < pInputDataEnd && pCurOutData < pOutDataEnd) { + *pCurOutData++ = *pInputData++; + nFollowsLiteral = 3; + } + else { + return -1; + } + } + else { + nResult = apultra_read_bit(&pInputData, pInputDataEnd, &nCurBitMask, &bits); + if (nResult < 0) return -1; + + if (nResult == 0) { + unsigned int nMatchLen; + + /* '10': 8+n bits offset */ + int nMatchOffsetHi = apultra_read_gamma2(&pInputData, pInputDataEnd, &nCurBitMask, &bits); + nMatchOffsetHi -= nFollowsLiteral; + if (nMatchOffsetHi >= 0) { + nMatchOffset = ((unsigned int) nMatchOffsetHi) << 8; + nMatchOffset |= (unsigned int)(*pInputData++); + + nMatchLen = apultra_read_gamma2(&pInputData, pInputDataEnd, &nCurBitMask, &bits); + + if (nMatchOffset < 128 || nMatchOffset >= MINMATCH4_OFFSET) + nMatchLen += 2; + else if (nMatchOffset >= MINMATCH3_OFFSET) + nMatchLen++; + } + else { + /* else rep-match */ + nMatchLen = apultra_read_gamma2(&pInputData, pInputDataEnd, &nCurBitMask, &bits); + } + + nFollowsLiteral = 2; + const unsigned char *pSrc = pCurOutData - nMatchOffset; + if (pSrc >= pOutData && (pSrc + nMatchLen) <= pOutDataEnd) { + if (nMatchLen < 11 && nMatchOffset >= 8 && pCurOutData < pOutDataFastEnd) { + memcpy(pCurOutData, pSrc, 8); + memcpy(pCurOutData + 8, pSrc + 8, 2); + pCurOutData += nMatchLen; + } + else { + if ((pCurOutData + nMatchLen) <= pOutDataEnd) { + /* Do a deterministic, left to right byte copy instead of memcpy() so as to handle overlaps */ + + if (nMatchOffset >= 16 && (pCurOutData + nMatchLen) < (pOutDataFastEnd - 15)) { + const unsigned char *pCopySrc = pSrc; + unsigned char *pCopyDst = pCurOutData; + const unsigned char *pCopyEndDst = pCurOutData + nMatchLen; + + do { + memcpy(pCopyDst, pCopySrc, 16); + pCopySrc += 16; + pCopyDst += 16; + } while (pCopyDst < pCopyEndDst); + + pCurOutData += nMatchLen; + } + else { + while (nMatchLen) { + *pCurOutData++ = *pSrc++; + nMatchLen--; + } + } + } + else { + return -1; + } + } + } + else { + return -1; + } + } + else { + nResult = apultra_read_bit(&pInputData, pInputDataEnd, &nCurBitMask, &bits); + if (nResult < 0) return -1; + + if (nResult == 0) { + unsigned int nCommand; + unsigned int nMatchLen; + + /* '110': 7 bits offset + 1 bit length */ + nCommand = (unsigned int)(*pInputData++); + if (nCommand == 0x00) { + /* EOD. No match len follows. */ + break; + } + + /* Bits 7-1: offset; bit 0: length */ + nMatchOffset = (nCommand >> 1); + nMatchLen = (nCommand & 1) + 2; + + nFollowsLiteral = 2; + const unsigned char *pSrc = pCurOutData - nMatchOffset; + if (pSrc >= pOutData && (pSrc + nMatchLen) <= pOutDataEnd) { + if (nMatchOffset >= 8 && pCurOutData < pOutDataFastEnd) { + memcpy(pCurOutData, pSrc, 8); + memcpy(pCurOutData + 8, pSrc + 8, 2); + pCurOutData += nMatchLen; + } + else { + if ((pCurOutData + nMatchLen) <= pOutDataEnd) { + while (nMatchLen) { + *pCurOutData++ = *pSrc++; + nMatchLen--; + } + } + else { + return -1; + } + } + } + else { + return -1; + } + } + else { + unsigned int nShortMatchOffset; + + /* '111': 4 bit offset */ + nResult = apultra_read_bit(&pInputData, pInputDataEnd, &nCurBitMask, &bits); + if (nResult < 0) return -1; + nShortMatchOffset = nResult << 3; + + nResult = apultra_read_bit(&pInputData, pInputDataEnd, &nCurBitMask, &bits); + if (nResult < 0) return -1; + nShortMatchOffset |= nResult << 2; + + nResult = apultra_read_bit(&pInputData, pInputDataEnd, &nCurBitMask, &bits); + if (nResult < 0) return -1; + nShortMatchOffset |= nResult << 1; + + nResult = apultra_read_bit(&pInputData, pInputDataEnd, &nCurBitMask, &bits); + if (nResult < 0) return -1; + nShortMatchOffset |= nResult << 0; + + nFollowsLiteral = 3; + if (nShortMatchOffset) { + /* Short offset, 1-15 */ + const unsigned char *pSrc = pCurOutData - nShortMatchOffset; + if (pSrc >= pOutData && (pCurOutData + 1) <= pOutDataEnd && (pSrc + 1) <= pOutDataEnd) { + *pCurOutData++ = *pSrc++; + } + else { + return -1; + } + } + else { + /* Write zero */ + if ((pCurOutData + 1) <= pOutDataEnd) { + *pCurOutData++ = 0; + } + else { + return -1; + } + } + } + } + } + } + + return (size_t)(pCurOutData - pOutData) - nDictionarySize; +} diff --git a/tools/z64compress/src/enc/apultra/expand.h b/tools/z64compress/src/enc/apultra/expand.h new file mode 100644 index 000000000..9cd658ad8 --- /dev/null +++ b/tools/z64compress/src/enc/apultra/expand.h @@ -0,0 +1,71 @@ +/* + * expand.h - decompressor definitions + * + * Copyright (C) 2019 Emmanuel Marty + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori + * + * Inspired by cap by Sven-Åke Dahl. https://github.com/svendahl/cap + * Also inspired by Charles Bloom's compression blog. http://cbloomrants.blogspot.com/ + * With ideas from LZ4 by Yann Collet. https://github.com/lz4/lz4 + * With help and support from spke + * + */ + +#ifndef _EXPAND_H +#define _EXPAND_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Get maximum decompressed size of compressed data + * + * @param pInputData compressed data + * @param nInputSize compressed size in bytes + * @param nFlags compression flags (set to 0) + * + * @return maximum decompressed size + */ +size_t apultra_get_max_decompressed_size(const unsigned char *pInputData, size_t nInputSize, const unsigned int nFlags); + +/** + * Decompress data in memory + * + * @param pInputData compressed data + * @param pOutBuffer buffer for decompressed data + * @param nInputSize compressed size in bytes + * @param nMaxOutBufferSize maximum capacity of decompression buffer + * @param nDictionarySize size of dictionary in front of input data (0 for none) + * @param nFlags compression flags (set to 0) + * + * @return actual decompressed size, or -1 for error + */ +size_t apultra_decompress(const unsigned char *pInputData, unsigned char *pOutBuffer, size_t nInputSize, size_t nMaxOutBufferSize, size_t nDictionarySize, const unsigned int nFlags); + +#ifdef __cplusplus +} +#endif + +#endif /* _EXPAND_H */ diff --git a/tools/z64compress/src/enc/apultra/format.h b/tools/z64compress/src/enc/apultra/format.h new file mode 100644 index 000000000..1e280c1b3 --- /dev/null +++ b/tools/z64compress/src/enc/apultra/format.h @@ -0,0 +1,47 @@ +/* + * format.h - byte stream format definitions + * + * Copyright (C) 2019 Emmanuel Marty + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori + * + * Inspired by cap by Sven-Åke Dahl. https://github.com/svendahl/cap + * Also inspired by Charles Bloom's compression blog. http://cbloomrants.blogspot.com/ + * With ideas from LZ4 by Yann Collet. https://github.com/lz4/lz4 + * With help and support from spke + * + */ + +#ifndef _FORMAT_H +#define _FORMAT_H + +#define MIN_OFFSET 1 +#define MAX_OFFSET 0x1fffff + +#define MAX_VARLEN 0x1fffff + +#define BLOCK_SIZE 0x100000 + +#define MIN_MATCH_SIZE 1 +#define MINMATCH3_OFFSET 1280 +#define MINMATCH4_OFFSET 32000 + +#endif /* _FORMAT_H */ diff --git a/tools/z64compress/src/enc/apultra/libapultra.h b/tools/z64compress/src/enc/apultra/libapultra.h new file mode 100644 index 000000000..36fd29555 --- /dev/null +++ b/tools/z64compress/src/enc/apultra/libapultra.h @@ -0,0 +1,40 @@ +/* + * libapultra.h - library definitions + * + * Copyright (C) 2019 Emmanuel Marty + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori + * + * Inspired by cap by Sven-Åke Dahl. https://github.com/svendahl/cap + * Also inspired by Charles Bloom's compression blog. http://cbloomrants.blogspot.com/ + * With ideas from LZ4 by Yann Collet. https://github.com/lz4/lz4 + * With help and support from spke + * + */ + +#ifndef _LIB_APULTRA_H +#define _LIB_APULTRA_H + +#include "format.h" +#include "shrink.h" +#include "expand.h" + +#endif /* _LIB_APULTRA_H */ diff --git a/tools/z64compress/src/enc/apultra/matchfinder.c b/tools/z64compress/src/enc/apultra/matchfinder.c new file mode 100644 index 000000000..8d7802a52 --- /dev/null +++ b/tools/z64compress/src/enc/apultra/matchfinder.c @@ -0,0 +1,449 @@ +/* + * matchfinder.c - LZ match finder implementation + * + * The following copying information applies to this specific source code file: + * + * Written in 2019 by Emmanuel Marty + * Portions written in 2014-2015 by Eric Biggers + * + * To the extent possible under law, the author(s) have dedicated all copyright + * and related and neighboring rights to this software to the public domain + * worldwide via the Creative Commons Zero 1.0 Universal Public Domain + * Dedication (the "CC0"). + * + * This software is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the CC0 for more details. + * + * You should have received a copy of the CC0 along with this software; if not + * see . + */ + +/* + * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori + * + * Inspired by cap by Sven-Åke Dahl. https://github.com/svendahl/cap + * Also inspired by Charles Bloom's compression blog. http://cbloomrants.blogspot.com/ + * With ideas from LZ4 by Yann Collet. https://github.com/lz4/lz4 + * With help and support from spke + * + */ + +#include +#include +#include "matchfinder.h" +#include "format.h" +#include "libapultra.h" + +/** + * Hash index into TAG_BITS + * + * @param nIndex index value + * + * @return hash + */ +static inline int apultra_get_index_tag(unsigned int nIndex) { + return (int)(((unsigned long long)nIndex * 11400714819323198485ULL) >> (64ULL - TAG_BITS)); +} + +/** + * Parse input data, build suffix array and overlaid data structures to speed up match finding + * + * @param pCompressor compression context + * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress) + * @param nInWindowSize total input size in bytes (previously compressed bytes + bytes to compress) + * + * @return 0 for success, non-zero for failure + */ +int apultra_build_suffix_array(apultra_compressor *pCompressor, const unsigned char *pInWindow, const int nInWindowSize) { + unsigned long long *intervals = pCompressor->intervals; + + /* Build suffix array from input data */ + saidx_t *suffixArray = (saidx_t*)intervals; + if (divsufsort_build_array(&pCompressor->divsufsort_context, pInWindow, suffixArray, nInWindowSize) != 0) { + return 100; + } + + int i, r; + + for (i = nInWindowSize - 1; i >= 0; i--) { + intervals[i] = suffixArray[i]; + } + + int *PLCP = (int*)pCompressor->pos_data; /* Use temporarily */ + int *Phi = PLCP; + int nCurLen = 0; + + /* Compute the permuted LCP first (Kärkkäinen method) */ + Phi[intervals[0]] = -1; + for (i = 1; i < nInWindowSize; i++) + Phi[intervals[i]] = (unsigned int)intervals[i - 1]; + for (i = 0; i < nInWindowSize; i++) { + if (Phi[i] == -1) { + PLCP[i] = 0; + continue; + } + int nMaxLen = (i > Phi[i]) ? (nInWindowSize - i) : (nInWindowSize - Phi[i]); + while (nCurLen < nMaxLen && pInWindow[i + nCurLen] == pInWindow[Phi[i] + nCurLen]) nCurLen++; + PLCP[i] = nCurLen; + if (nCurLen > 0) + nCurLen--; + } + + /* Rotate permuted LCP into the LCP. This has better cache locality than the direct Kasai LCP method. This also + * saves us from having to build the inverse suffix array index, as the LCP is calculated without it using this method, + * and the interval builder below doesn't need it either. */ + intervals[0] &= POS_MASK; + + for (i = 1; i < nInWindowSize; i++) { + int nIndex = (int)(intervals[i] & POS_MASK); + int nLen = PLCP[nIndex]; + if (nLen < MIN_MATCH_SIZE) + nLen = 0; + if (nLen > LCP_MAX) + nLen = LCP_MAX; + int nTaggedLen = 0; + if (nLen) + nTaggedLen = (nLen << TAG_BITS) | (apultra_get_index_tag((unsigned int)nIndex) & ((1 << TAG_BITS) - 1)); + intervals[i] = ((unsigned long long)nIndex) | (((unsigned long long)nTaggedLen) << LCP_SHIFT); + } + + /** + * Build intervals for finding matches + * + * Methodology and code fragment taken from wimlib (CC0 license): + * https://wimlib.net/git/?p=wimlib;a=blob_plain;f=src/lcpit_matchfinder.c;h=a2d6a1e0cd95200d1f3a5464d8359d5736b14cbe;hb=HEAD + */ + unsigned long long * const SA_and_LCP = intervals; + unsigned long long *pos_data = pCompressor->pos_data; + unsigned long long next_interval_idx; + unsigned long long *top = pCompressor->open_intervals; + unsigned long long prev_pos = SA_and_LCP[0] & POS_MASK; + + *top = 0; + intervals[0] = 0; + next_interval_idx = 1; + + for (r = 1; r < nInWindowSize; r++) { + const unsigned long long next_pos = SA_and_LCP[r] & POS_MASK; + const unsigned long long next_lcp = SA_and_LCP[r] & LCP_MASK; + const unsigned long long top_lcp = *top & LCP_MASK; + + if (next_lcp == top_lcp) { + /* Continuing the deepest open interval */ + pos_data[prev_pos] = *top; + } + else if (next_lcp > top_lcp) { + /* Opening a new interval */ + *++top = next_lcp | next_interval_idx++; + pos_data[prev_pos] = *top; + } + else { + /* Closing the deepest open interval */ + pos_data[prev_pos] = *top; + for (;;) { + const unsigned long long closed_interval_idx = *top-- & POS_MASK; + const unsigned long long superinterval_lcp = *top & LCP_MASK; + + if (next_lcp == superinterval_lcp) { + /* Continuing the superinterval */ + intervals[closed_interval_idx] = *top; + break; + } + else if (next_lcp > superinterval_lcp) { + /* Creating a new interval that is a + * superinterval of the one being + * closed, but still a subinterval of + * its superinterval */ + *++top = next_lcp | next_interval_idx++; + intervals[closed_interval_idx] = *top; + break; + } + else { + /* Also closing the superinterval */ + intervals[closed_interval_idx] = *top; + } + } + } + prev_pos = next_pos; + } + + /* Close any still-open intervals. */ + pos_data[prev_pos] = *top; + for (; top > pCompressor->open_intervals; top--) + intervals[*top & POS_MASK] = *(top - 1); + + /* Success */ + return 0; +} + +/** + * Find matches at the specified offset in the input window + * + * @param pCompressor compression context + * @param nOffset offset to find matches at, in the input window + * @param pMatches pointer to returned matches + * @param pMatchDepth pointer to returned match depths + * @param pMatch1 pointer to 1-byte length, 4 bit offset match + * @param nMaxMatches maximum number of matches to return (0 for none) + * @param nBlockFlags bit 0: 1 for first block, 0 otherwise; bit 1: 1 for last block, 0 otherwise + * + * @return number of matches + */ +int apultra_find_matches_at(apultra_compressor *pCompressor, const int nOffset, apultra_match *pMatches, unsigned short *pMatchDepth, unsigned char *pMatch1, const int nMaxMatches, const int nBlockFlags) { + unsigned long long *intervals = pCompressor->intervals; + unsigned long long *pos_data = pCompressor->pos_data; + unsigned long long ref; + unsigned long long super_ref; + unsigned long long match_pos; + apultra_match *matchptr; + unsigned short *depthptr; + const int nMaxOffset = pCompressor->max_offset; + + *pMatch1 = 0; + + /** + * Find matches using intervals + * + * Taken from wimlib (CC0 license): + * https://wimlib.net/git/?p=wimlib;a=blob_plain;f=src/lcpit_matchfinder.c;h=a2d6a1e0cd95200d1f3a5464d8359d5736b14cbe;hb=HEAD + */ + + /* Get the deepest lcp-interval containing the current suffix. */ + ref = pos_data[nOffset]; + + pos_data[nOffset] = 0; + + /* Ascend until we reach a visited interval, the root, or a child of the + * root. Link unvisited intervals to the current suffix as we go. */ + while ((super_ref = intervals[ref & POS_MASK]) & LCP_MASK) { + intervals[ref & POS_MASK] = nOffset | VISITED_FLAG; + ref = super_ref; + } + + if (super_ref == 0) { + /* In this case, the current interval may be any of: + * (1) the root; + * (2) an unvisited child of the root */ + + if (ref != 0) /* Not the root? */ + intervals[ref & POS_MASK] = nOffset | VISITED_FLAG; + return 0; + } + + /* Ascend indirectly via pos_data[] links. */ + match_pos = super_ref & EXCL_VISITED_MASK; + matchptr = pMatches; + depthptr = pMatchDepth; + int nPrevOffset = 0; + int nPrevLen = 0; + int nCurDepth = 0; + unsigned short *cur_depth = NULL; + + if (nOffset >= match_pos && (nBlockFlags & 3) == 3) { + int nMatchOffset = (int)(nOffset - match_pos); + int nMatchLen = (int)(ref >> (LCP_SHIFT + TAG_BITS)); + + if ((matchptr - pMatches) < nMaxMatches) { + if (nMatchOffset <= nMaxOffset) { + if (nPrevOffset && nPrevLen > 2 && nMatchOffset == (nPrevOffset - 1) && nMatchLen == (nPrevLen - 1) && cur_depth && nCurDepth < LCP_MAX) { + nCurDepth++; + *cur_depth = nCurDepth; + } + else { + nCurDepth = 0; + + cur_depth = depthptr; + matchptr->length = nMatchLen; + matchptr->offset = nMatchOffset; + *depthptr = 0; + matchptr++; + depthptr++; + } + + nPrevLen = nMatchLen; + nPrevOffset = nMatchOffset; + } + } + } + + for (;;) { + if ((super_ref = pos_data[match_pos]) > ref) { + match_pos = intervals[super_ref & POS_MASK] & EXCL_VISITED_MASK; + + if (nOffset >= match_pos && (nBlockFlags & 3) == 3) { + int nMatchOffset = (int)(nOffset - match_pos); + int nMatchLen = (int)(ref >> (LCP_SHIFT + TAG_BITS)); + + if ((matchptr - pMatches) < nMaxMatches) { + if (nMatchOffset <= nMaxOffset && abs(nMatchOffset - nPrevOffset) >= 128) { + if (nPrevOffset && nPrevLen > 2 && nMatchOffset == (nPrevOffset - 1) && nMatchLen == (nPrevLen - 1) && cur_depth && nCurDepth < LCP_MAX) { + nCurDepth++; + *cur_depth = nCurDepth | 0x8000; + } + else { + nCurDepth = 0; + + cur_depth = depthptr; + matchptr->length = nMatchLen; + matchptr->offset = nMatchOffset; + *depthptr = 0x8000; + matchptr++; + depthptr++; + } + + nPrevLen = nMatchLen; + nPrevOffset = nMatchOffset; + } + } + } + } + + while ((super_ref = pos_data[match_pos]) > ref) { + match_pos = intervals[super_ref & POS_MASK] & EXCL_VISITED_MASK; + + if (nOffset > match_pos && (nBlockFlags & 3) == 3) { + int nMatchOffset = (int)(nOffset - match_pos); + int nMatchLen = (int)(ref >> (LCP_SHIFT + TAG_BITS)); + + if ((matchptr - pMatches) < nMaxMatches) { + if (nMatchOffset <= nMaxOffset && (nMatchLen >= 3 || (nMatchLen >= 2 && (matchptr - pMatches) < (nMaxMatches - 1))) && nMatchLen < 1280 && abs(nMatchOffset - nPrevOffset) >= 128) { + if (nPrevOffset && nPrevLen > 2 && nMatchOffset == (nPrevOffset - 1) && nMatchLen == (nPrevLen - 1) && cur_depth && nCurDepth < LCP_MAX) { + nCurDepth++; + *cur_depth = nCurDepth | 0x8000; + } + else { + nCurDepth = 0; + + cur_depth = depthptr; + matchptr->length = nMatchLen; + matchptr->offset = nMatchOffset; + *depthptr = 0x8000; + matchptr++; + depthptr++; + } + + nPrevLen = nMatchLen; + nPrevOffset = nMatchOffset; + } + } + } + } + + intervals[ref & POS_MASK] = nOffset | VISITED_FLAG; + pos_data[match_pos] = (unsigned long long)ref; + + int nMatchOffset = (int)(nOffset - match_pos); + int nMatchLen = (int)(ref >> (LCP_SHIFT + TAG_BITS)); + + if ((matchptr - pMatches) < nMaxMatches) { + if (nMatchOffset <= nMaxOffset && nMatchOffset != nPrevOffset) { + if (nPrevOffset && nPrevLen > 2 && nMatchOffset == (nPrevOffset - 1) && nMatchLen == (nPrevLen - 1) && cur_depth && nCurDepth < LCP_MAX) { + nCurDepth++; + *cur_depth = nCurDepth; + } + else { + nCurDepth = 0; + + cur_depth = depthptr; + matchptr->length = nMatchLen; + matchptr->offset = nMatchOffset; + *depthptr = 0; + matchptr++; + depthptr++; + } + + nPrevLen = nMatchLen; + nPrevOffset = nMatchOffset; + } + } + + if (nMatchOffset && nMatchOffset < 16 && nMatchLen) + *pMatch1 = nMatchOffset; + + if (super_ref == 0) + break; + ref = super_ref; + match_pos = intervals[ref & POS_MASK] & EXCL_VISITED_MASK; + + if (nOffset > match_pos && (nBlockFlags & 3) == 3) { + int nMatchOffset = (int)(nOffset - match_pos); + int nMatchLen = (int)(ref >> (LCP_SHIFT + TAG_BITS)); + + if ((matchptr - pMatches) < nMaxMatches) { + if (nMatchOffset <= nMaxOffset && nMatchLen >= 2 && abs(nMatchOffset - nPrevOffset) >= 128) { + if (nPrevOffset && nPrevLen > 2 && nMatchOffset == (nPrevOffset - 1) && nMatchLen == (nPrevLen - 1) && cur_depth && nCurDepth < LCP_MAX) { + nCurDepth++; + *cur_depth = nCurDepth | 0x8000; + } + else { + nCurDepth = 0; + + cur_depth = depthptr; + matchptr->length = nMatchLen; + matchptr->offset = nMatchOffset; + *depthptr = 0x8000; + matchptr++; + depthptr++; + } + + nPrevLen = nMatchLen; + nPrevOffset = nMatchOffset; + } + } + } + } + + return (int)(matchptr - pMatches); +} + +/** + * Skip previously compressed bytes + * + * @param pCompressor compression context + * @param nStartOffset current offset in input window (typically 0) + * @param nEndOffset offset to skip to in input window (typically the number of previously compressed bytes) + */ +void apultra_skip_matches(apultra_compressor *pCompressor, const int nStartOffset, const int nEndOffset) { + apultra_match match; + unsigned short depth; + unsigned char match1; + int i; + + /* Skipping still requires scanning for matches, as this also performs a lazy update of the intervals. However, + * we don't store the matches. */ + for (i = nStartOffset; i < nEndOffset; i++) { + apultra_find_matches_at(pCompressor, i, &match, &depth, &match1, 0, 0); + } +} + +/** + * Find all matches for the data to be compressed + * + * @param pCompressor compression context + * @param nMatchesPerOffset maximum number of matches to store for each offset + * @param nStartOffset current offset in input window (typically the number of previously compressed bytes) + * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes + * @param nBlockFlags bit 0: 1 for first block, 0 otherwise; bit 1: 1 for last block, 0 otherwise + */ +void apultra_find_all_matches(apultra_compressor *pCompressor, const int nMatchesPerOffset, const int nStartOffset, const int nEndOffset, const int nBlockFlags) { + apultra_match *pMatch = pCompressor->match; + unsigned short *pMatchDepth = pCompressor->match_depth; + unsigned char *pMatch1 = pCompressor->match1; + int i; + + for (i = nStartOffset; i < nEndOffset; i++) { + int nMatches = apultra_find_matches_at(pCompressor, i, pMatch, pMatchDepth, pMatch1, nMatchesPerOffset, nBlockFlags); + + while (nMatches < nMatchesPerOffset) { + pMatch[nMatches].length = 0; + pMatch[nMatches].offset = 0; + pMatchDepth[nMatches] = 0; + nMatches++; + } + + pMatch += nMatchesPerOffset; + pMatchDepth += nMatchesPerOffset; + pMatch1++; + } +} diff --git a/tools/z64compress/src/enc/apultra/matchfinder.h b/tools/z64compress/src/enc/apultra/matchfinder.h new file mode 100644 index 000000000..4a6935435 --- /dev/null +++ b/tools/z64compress/src/enc/apultra/matchfinder.h @@ -0,0 +1,94 @@ +/* + * matchfinder.h - LZ match finder definitions + * + * Copyright (C) 2019 Emmanuel Marty + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori + * + * Inspired by cap by Sven-Åke Dahl. https://github.com/svendahl/cap + * Also inspired by Charles Bloom's compression blog. http://cbloomrants.blogspot.com/ + * With ideas from LZ4 by Yann Collet. https://github.com/lz4/lz4 + * With help and support from spke + * + */ + +#ifndef _MATCHFINDER_H +#define _MATCHFINDER_H + +#ifdef __cplusplus +extern "C" { +#endif + +/* Forward declarations */ +typedef struct _apultra_match apultra_match; +typedef struct _apultra_compressor apultra_compressor; + +/** + * Parse input data, build suffix array and overlaid data structures to speed up match finding + * + * @param pCompressor compression context + * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress) + * @param nInWindowSize total input size in bytes (previously compressed bytes + bytes to compress) + * + * @return 0 for success, non-zero for failure + */ +int apultra_build_suffix_array(apultra_compressor *pCompressor, const unsigned char *pInWindow, const int nInWindowSize); + +/** + * Find matches at the specified offset in the input window + * + * @param pCompressor compression context + * @param nOffset offset to find matches at, in the input window + * @param pMatches pointer to returned matches + * @param pMatchDepth pointer to returned match depths + * @param pMatch1 pointer to 1-byte length, 4 bit offset match + * @param nMaxMatches maximum number of matches to return (0 for none) + * @param nBlockFlags bit 0: 1 for first block, 0 otherwise; bit 1: 1 for last block, 0 otherwise + * + * @return number of matches + */ +int apultra_find_matches_at(apultra_compressor *pCompressor, const int nOffset, apultra_match *pMatches, unsigned short *pMatchDepth, unsigned char *pMatch1, const int nMaxMatches, const int nBlockFlags); + +/** + * Skip previously compressed bytes + * + * @param pCompressor compression context + * @param nStartOffset current offset in input window (typically 0) + * @param nEndOffset offset to skip to in input window (typically the number of previously compressed bytes) + */ +void apultra_skip_matches(apultra_compressor *pCompressor, const int nStartOffset, const int nEndOffset); + +/** + * Find all matches for the data to be compressed + * + * @param pCompressor compression context + * @param nMatchesPerOffset maximum number of matches to store for each offset + * @param nStartOffset current offset in input window (typically the number of previously compressed bytes) + * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes + * @param nBlockFlags bit 0: 1 for first block, 0 otherwise; bit 1: 1 for last block, 0 otherwise + */ +void apultra_find_all_matches(apultra_compressor *pCompressor, const int nMatchesPerOffset, const int nStartOffset, const int nEndOffset, const int nBlockFlags); + +#ifdef __cplusplus +} +#endif + +#endif /* _MATCHFINDER_H */ diff --git a/tools/z64compress/src/enc/apultra/shrink.c b/tools/z64compress/src/enc/apultra/shrink.c new file mode 100644 index 000000000..ece2144e8 --- /dev/null +++ b/tools/z64compress/src/enc/apultra/shrink.c @@ -0,0 +1,1731 @@ +/* + * shrink.c - compressor implementation + * + * Copyright (C) 2019 Emmanuel Marty + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori + * + * Inspired by cap by Sven-Åke Dahl. https://github.com/svendahl/cap + * Also inspired by Charles Bloom's compression blog. http://cbloomrants.blogspot.com/ + * With ideas from LZ4 by Yann Collet. https://github.com/lz4/lz4 + * With help and support from spke + * + */ + +#include +#include +#include +#include "libapultra.h" +#include "matchfinder.h" +#include "shrink.h" +#include "format.h" + +#define TOKEN_CODE_LARGE_MATCH 2 /* 10 */ +#define TOKEN_SIZE_LARGE_MATCH 2 + +#define TOKEN_CODE_7BIT_MATCH 6 /* 110 */ +#define TOKEN_SIZE_7BIT_MATCH 3 + +#define TOKEN_CODE_4BIT_MATCH 7 /* 111 */ +#define TOKEN_SIZE_4BIT_MATCH 3 + +#define CountShift(N,bits) if ((N)>>(bits)) { (N)>>=(bits); (n) += (bits); } + +/** Gamma2 bit counts for common values, up to 255 */ +static char _gamma2_size[256] = { + 0, 0, 2, 2, 4, 4, 4, 4, 6, 6, 6, 6, 6, 6, 6, 6, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, +}; + +/** + * Write bitpacked value to output (compressed) buffer + * + * @param pOutData pointer to output buffer + * @param nOutOffset current write index into output buffer + * @param nMaxOutDataSize maximum size of output buffer, in bytes + * @param nValue value to write + * @param nBits number of least significant bits to write in value + * @param nCurBitsOffset write index into output buffer, of current byte being filled with bits + * @param nCurBitShift bit shift count + * + * @return updated write index into output buffer, or -1 in case of an error + */ +static int apultra_write_bits(unsigned char *pOutData, int nOutOffset, const int nMaxOutDataSize, const int nValue, const int nBits, int *nCurBitsOffset, int *nCurBitShift) { + int i; + + if (nOutOffset < 0) return -1; + + for (i = nBits - 1; i >= 0; i--) { + if ((*nCurBitsOffset) == INT_MIN) { + /* Allocate a new byte in the stream to pack bits in */ + if (nOutOffset >= nMaxOutDataSize) return -1; + (*nCurBitsOffset) = nOutOffset; + (*nCurBitShift) = 7; + pOutData[nOutOffset++] = 0; + } + + pOutData[(*nCurBitsOffset)] |= ((nValue >> i) & 1) << (*nCurBitShift); + + (*nCurBitShift) --; + if ((*nCurBitShift) == -1) { + /* Current byte is full */ + (*nCurBitsOffset) = INT_MIN; + } + } + + return nOutOffset; +} + +/** + * Get size of gamma2 encoded value + * + * @param nValue value of evaluate (2..n) + * + * @return number of bits required + */ +static int apultra_get_gamma2_size(int nValue) { + if (nValue >= 0 && nValue < 256) + return _gamma2_size[nValue]; + else { + unsigned int n = 0; + CountShift(nValue, 16); + CountShift(nValue, 8); + CountShift(nValue, 4); + CountShift(nValue, 2); + CountShift(nValue, 1); + + return n << 1; + } +} + +/** + * Write gamma2 encoded value to output (compressed) buffer + * + * @param pOutData pointer to output buffer + * @param nOutOffset current write index into output buffer + * @param nMaxOutDataSize maximum size of output buffer, in bytes + * @param nValue value of write (2..n) + * @param nCurBitsOffset write index into output buffer, of current byte being filled with bits + * @param nCurBitShift bit shift count + * + * @return updated write index into output buffer, or -1 in case of an error + */ +static int apultra_write_gamma2_value(unsigned char *pOutData, int nOutOffset, const int nMaxOutDataSize, int nValue, int *nCurBitsOffset, int *nCurBitShift) { + int msb = 30; + while ((nValue >> msb--) == 0); + + while (msb > 0) { + int bit = (nValue >> msb) & 1; + + nOutOffset = apultra_write_bits(pOutData, nOutOffset, nMaxOutDataSize, bit, 1, nCurBitsOffset, nCurBitShift); + msb--; + nOutOffset = apultra_write_bits(pOutData, nOutOffset, nMaxOutDataSize, 1, 1, nCurBitsOffset, nCurBitShift); + } + + nOutOffset = apultra_write_bits(pOutData, nOutOffset, nMaxOutDataSize, nValue & 1, 1, nCurBitsOffset, nCurBitShift); + nOutOffset = apultra_write_bits(pOutData, nOutOffset, nMaxOutDataSize, 0, 1, nCurBitsOffset, nCurBitShift); + return nOutOffset; +} + +/** + * Get the number of extra bits required to represent a match offset + * + * @param nLength match length + * @param nMatchOffset match offset + * @param nFollowsLiteral non-zero if the match follows a literal, zero if it immediately follows another match + * + * @return number of extra bits required + */ +static inline int apultra_get_offset_varlen_size(const int nLength, const int nMatchOffset, const int nFollowsLiteral) { + if (nLength <= 3 && nMatchOffset < 128) + return 8 + TOKEN_SIZE_7BIT_MATCH; + else { + if (nFollowsLiteral) + return 8 + TOKEN_SIZE_LARGE_MATCH + apultra_get_gamma2_size((nMatchOffset >> 8) + 3); + else + return 8 + TOKEN_SIZE_LARGE_MATCH + apultra_get_gamma2_size((nMatchOffset >> 8) + 2); + } +} + +/** + * Get the number of extra bits required to represent a match length + * + * @param nLength match length + * @param nMatchOffset match offset + * + * @return number of extra bits required + */ +static inline int apultra_get_match_varlen_size(int nLength, const int nMatchOffset) { + if (nLength <= 3 && nMatchOffset < 128) + return 0; + else { + if (nMatchOffset < 128 || nMatchOffset >= MINMATCH4_OFFSET) + return apultra_get_gamma2_size(nLength - 2); + else if (nMatchOffset < MINMATCH3_OFFSET) + return apultra_get_gamma2_size(nLength); + else + return apultra_get_gamma2_size(nLength - 1); + } +} + +/** + * Insert forward rep candidate + * + * @param pCompressor compression context + * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress) + * @param i input data window position whose matches are being considered + * @param nMatchOffset match offset to use as rep candidate + * @param nStartOffset current offset in input window (typically the number of previously compressed bytes) + * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes + * @param nArrivalsPerPosition maximum number of arrivals per input buffer position + * @param nDepth current insertion depth + */ +static void apultra_insert_forward_match(apultra_compressor *pCompressor, const unsigned char *pInWindow, const int i, const int nMatchOffset, const int nStartOffset, const int nEndOffset, const int nArrivalsPerPosition, int nDepth) { + const apultra_arrival *arrival = pCompressor->arrival + ((i - nStartOffset) * nArrivalsPerPosition); + const int *rle_len = (int*)pCompressor->intervals /* reuse */; + int* visited = ((int*)pCompressor->pos_data) - nStartOffset /* reuse */; + int* visited2 = visited + (nEndOffset - nStartOffset) /* reuse */; + int j; + + for (j = 0; j < nArrivalsPerPosition && arrival[j].from_slot; j++) { + if (arrival[j].follows_literal) { + int nRepOffset = arrival[j].rep_offset; + + if (nMatchOffset != nRepOffset && nRepOffset) { + int nRepPos = arrival[j].rep_pos; + + if (nRepPos >= nStartOffset && + nRepPos < nEndOffset && + visited[nRepPos] != nMatchOffset) { + + visited[nRepPos] = nMatchOffset; + + if (visited2[nRepPos] != nMatchOffset && nRepPos >= nMatchOffset && pCompressor->match[((nRepPos - nStartOffset) << MATCHES_PER_INDEX_SHIFT) + NMATCHES_PER_INDEX - 1].length == 0) { + const unsigned char* pInWindowAtRepOffset = pInWindow + nRepPos; + + if (pInWindowAtRepOffset[0] == pInWindowAtRepOffset[-nMatchOffset]) { + int nLen0 = rle_len[nRepPos - nMatchOffset]; + int nLen1 = rle_len[nRepPos]; + int nMinLen = (nLen0 < nLen1) ? nLen0 : nLen1; + + int nMaxRepLen = nEndOffset - nRepPos; + if (nMaxRepLen > LCP_MAX) + nMaxRepLen = LCP_MAX; + + if (nMinLen > nMaxRepLen) + nMinLen = nMaxRepLen; + + const unsigned char* pInWindowMax = pInWindowAtRepOffset + nMaxRepLen; + pInWindowAtRepOffset += nMinLen; + + while ((pInWindowAtRepOffset + 8) < pInWindowMax && !memcmp(pInWindowAtRepOffset, pInWindowAtRepOffset - nMatchOffset, 8)) + pInWindowAtRepOffset += 8; + while ((pInWindowAtRepOffset + 4) < pInWindowMax && !memcmp(pInWindowAtRepOffset, pInWindowAtRepOffset - nMatchOffset, 4)) + pInWindowAtRepOffset += 4; + while (pInWindowAtRepOffset < pInWindowMax && pInWindowAtRepOffset[0] == pInWindowAtRepOffset[-nMatchOffset]) + pInWindowAtRepOffset++; + + int nCurRepLen = (int)(pInWindowAtRepOffset - (pInWindow + nRepPos)); + + if (nCurRepLen >= 2) { + apultra_match* fwd_match = pCompressor->match + ((nRepPos - nStartOffset) << MATCHES_PER_INDEX_SHIFT); + unsigned short* fwd_depth = pCompressor->match_depth + ((nRepPos - nStartOffset) << MATCHES_PER_INDEX_SHIFT); + int r; + + for (r = 0; fwd_match[r].length >= MIN_MATCH_SIZE; r++) { + if (fwd_match[r].offset == nMatchOffset && (fwd_depth[r] & 0x3fff) == 0) { + if ((int)fwd_match[r].length < nCurRepLen) { + fwd_match[r].length = nCurRepLen; + fwd_depth[r] = 0; + } + r = NMATCHES_PER_INDEX; + break; + } + } + + if (r < NMATCHES_PER_INDEX) { + visited2[nRepPos] = nMatchOffset; + + fwd_match[r].offset = nMatchOffset; + fwd_match[r].length = nCurRepLen; + fwd_depth[r] = 0; + + if (nDepth < 9) + apultra_insert_forward_match(pCompressor, pInWindow, nRepPos, nMatchOffset, nStartOffset, nEndOffset, nArrivalsPerPosition, nDepth + 1); + } + } + } + } + } + } + } + } +} + +/** + * Attempt to pick optimal matches, so as to produce the smallest possible output that decompresses to the same input + * + * @param pCompressor compression context + * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress) + * @param nStartOffset current offset in input window (typically the number of previously compressed bytes) + * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes + * @param nInsertForwardReps non-zero to insert forward repmatch candidates, zero to use the previously inserted candidates + * @param nCurRepMatchOffset starting rep offset for this block + * @param nBlockFlags bit 0: 1 for first block, 0 otherwise; bit 1: 1 for last block, 0 otherwise + * @param nArrivalsPerPosition maximum number of arrivals per input buffer position + */ +static void apultra_optimize_forward(apultra_compressor *pCompressor, const unsigned char *pInWindow, const int nStartOffset, const int nEndOffset, const int nInsertForwardReps, const int *nCurRepMatchOffset, const int nBlockFlags, const int nArrivalsPerPosition) { + apultra_arrival *arrival = pCompressor->arrival - (nStartOffset * nArrivalsPerPosition); + const int* rle_len = (int*)pCompressor->intervals /* reuse */; + int* visited = ((int*)pCompressor->pos_data) - nStartOffset /* reuse */; + int i, j, n; + + if ((nEndOffset - nStartOffset) > pCompressor->block_size) return; + + memset(arrival + (nStartOffset * nArrivalsPerPosition), 0, sizeof(apultra_arrival) * ((nEndOffset - nStartOffset + 1) * nArrivalsPerPosition)); + + arrival[nStartOffset * nArrivalsPerPosition].from_slot = -1; + arrival[nStartOffset * nArrivalsPerPosition].rep_offset = *nCurRepMatchOffset; + + for (i = (nStartOffset * nArrivalsPerPosition); i != ((nEndOffset+1) * nArrivalsPerPosition); i++) { + arrival[i].cost = 0x40000000; + } + + if (nInsertForwardReps) { + memset(visited + nStartOffset, 0, 2 * (nEndOffset - nStartOffset) * sizeof(int)); + } + + for (i = nStartOffset; i != nEndOffset; i++) { + apultra_arrival *cur_arrival = &arrival[i * nArrivalsPerPosition]; + int m; + + const unsigned char nMatch1Offs = pCompressor->match1[i - nStartOffset]; + int nShortOffset; + int nShortLen; + int nLiteralScore; + int nLiteralCost; + + if ((pInWindow[i] != 0 && nMatch1Offs == 0) || (i == nStartOffset && (nBlockFlags & 1))) { + nShortOffset = 0; + nShortLen = 0; + nLiteralCost = 9 /* literal bit + literal byte */; + } + else { + nShortOffset = (pInWindow[i] == 0) ? 0 : nMatch1Offs; + nShortLen = 1; + nLiteralCost = 4 + TOKEN_SIZE_4BIT_MATCH /* command and offset cost; no length cost */; + } + + nLiteralScore = nShortOffset ? 3 : 1; + + if (cur_arrival[nArrivalsPerPosition].from_slot) { + for (j = 0; j < nArrivalsPerPosition && cur_arrival[j].from_slot; j++) { + int nPrevCost = cur_arrival[j].cost & 0x3fffffff; + int nCodingChoiceCost = nPrevCost + nLiteralCost; + int nScore = cur_arrival[j].score + nLiteralScore; + + apultra_arrival* pDestSlots = &cur_arrival[nArrivalsPerPosition]; + if (nCodingChoiceCost < pDestSlots[nArrivalsPerPosition - 1].cost || + (nCodingChoiceCost == pDestSlots[nArrivalsPerPosition - 1].cost && nScore < pDestSlots[nArrivalsPerPosition - 1].score)) { + int nRepOffset = cur_arrival[j].rep_offset; + int exists = 0; + + for (n = 0; + pDestSlots[n].cost < nCodingChoiceCost; + n++) { + if (pDestSlots[n].rep_offset == nRepOffset) { + exists = 1; + break; + } + } + + if (!exists) { + for (; + n < nArrivalsPerPosition && pDestSlots[n].cost == nCodingChoiceCost && nScore >= pDestSlots[n].score; + n++) { + if (pDestSlots[n].rep_offset == nRepOffset) { + exists = 1; + break; + } + } + + if (!exists) { + if (n < nArrivalsPerPosition) { + int nn; + + for (nn = n; + nn < nArrivalsPerPosition && pDestSlots[nn].cost == nCodingChoiceCost; + nn++) { + if (pDestSlots[nn].rep_offset == nRepOffset) { + exists = 1; + break; + } + } + + if (!exists) { + int z; + + for (z = n; z < nArrivalsPerPosition - 1 && pDestSlots[z].from_slot; z++) { + if (pDestSlots[z].rep_offset == nRepOffset) + break; + } + + apultra_arrival* pDestArrival = &pDestSlots[n]; + memmove(&pDestSlots[n + 1], + &pDestSlots[n], + sizeof(apultra_arrival) * (z - n)); + + pDestArrival->cost = nCodingChoiceCost; + pDestArrival->from_pos = i; + pDestArrival->from_slot = j + 1; + pDestArrival->follows_literal = 1; + pDestArrival->rep_offset = nRepOffset; + pDestArrival->short_offset = nShortOffset; + pDestArrival->rep_pos = cur_arrival[j].rep_pos; + pDestArrival->match_len = nShortLen; + pDestArrival->score = nScore; + } + } + } + } + } + } + } + else { + for (j = 0; j < nArrivalsPerPosition && cur_arrival[j].from_slot; j++) { + int nPrevCost = cur_arrival[j].cost & 0x3fffffff; + int nCodingChoiceCost = nPrevCost + nLiteralCost; + int nScore = cur_arrival[j].score + nLiteralScore; + + apultra_arrival* pDestArrival = &cur_arrival[nArrivalsPerPosition + j]; + + pDestArrival->cost = nCodingChoiceCost; + pDestArrival->from_pos = i; + pDestArrival->from_slot = j + 1; + pDestArrival->follows_literal = 1; + pDestArrival->rep_offset = cur_arrival[j].rep_offset; + pDestArrival->short_offset = nShortOffset; + pDestArrival->rep_pos = cur_arrival[j].rep_pos; + pDestArrival->match_len = nShortLen; + pDestArrival->score = nScore; + } + } + + if (i == nStartOffset && (nBlockFlags & 1)) continue; + + const apultra_match *match = pCompressor->match + ((i - nStartOffset) << MATCHES_PER_INDEX_SHIFT); + const unsigned short *match_depth = pCompressor->match_depth + ((i - nStartOffset) << MATCHES_PER_INDEX_SHIFT); + int nNumArrivalsForThisPos = j, nOverallMinRepLen = 0, nOverallMaxRepLen = 0; + + int nRepLenForArrival[NARRIVALS_PER_POSITION_MAX]; + memset(nRepLenForArrival, 0, nArrivalsPerPosition * sizeof(int)); + + int nRepMatchArrivalIdx[NARRIVALS_PER_POSITION_MAX + 1]; + int nNumRepMatchArrivals = 0; + + int nMaxRepLenForPos = nEndOffset - i; + if (nMaxRepLenForPos > LCP_MAX) + nMaxRepLenForPos = LCP_MAX; + const unsigned char* pInWindowStart = pInWindow + i; + const unsigned char* pInWindowMax = pInWindowStart + nMaxRepLenForPos; + const int nLen1 = rle_len[i]; + + for (j = 0; j < nNumArrivalsForThisPos && (i + 2) <= nEndOffset; j++) { + if (cur_arrival[j].follows_literal) { + int nRepOffset = cur_arrival[j].rep_offset; + + if (nRepOffset && i >= nRepOffset) { + if (pInWindowStart[0] == pInWindowStart[-nRepOffset]) { + int nLen0 = rle_len[i - nRepOffset]; + int nMinLen = (nLen0 < nLen1) ? nLen0 : nLen1; + + if (nMinLen > nMaxRepLenForPos) + nMinLen = nMaxRepLenForPos; + + const unsigned char* pInWindowAtRepOffset = pInWindowStart + nMinLen; + while ((pInWindowAtRepOffset + 8) < pInWindowMax && !memcmp(pInWindowAtRepOffset, pInWindowAtRepOffset - nRepOffset, 8)) + pInWindowAtRepOffset += 8; + while ((pInWindowAtRepOffset + 4) < pInWindowMax && !memcmp(pInWindowAtRepOffset, pInWindowAtRepOffset - nRepOffset, 4)) + pInWindowAtRepOffset += 4; + while (pInWindowAtRepOffset < pInWindowMax && pInWindowAtRepOffset[0] == pInWindowAtRepOffset[-nRepOffset]) + pInWindowAtRepOffset++; + + int nCurMaxLen = (int)(pInWindowAtRepOffset - pInWindowStart); + + if (nCurMaxLen >= 2) { + nRepLenForArrival[j] = nCurMaxLen; + nRepMatchArrivalIdx[nNumRepMatchArrivals++] = j; + + if (nOverallMaxRepLen < nCurMaxLen) + nOverallMaxRepLen = nCurMaxLen; + } + } + } + } + } + nRepMatchArrivalIdx[nNumRepMatchArrivals] = -1; + + for (m = 0; m < NMATCHES_PER_INDEX && match[m].length; m++) { + const int nOrigMatchLen = match[m].length; + const int nOrigMatchOffset = match[m].offset; + const unsigned int nOrigMatchDepth = match_depth[m] & 0x3fff; + const int nScorePenalty = 3 + ((match_depth[m] & 0x8000) >> 15); + unsigned int d; + + for (d = 0; d <= nOrigMatchDepth; d += (nOrigMatchDepth ? nOrigMatchDepth : 1)) { + const int nMatchOffset = nOrigMatchOffset - d; + int nMatchLen = nOrigMatchLen - d; + + if ((i + nMatchLen) > nEndOffset) + nMatchLen = nEndOffset - i; + + if (nInsertForwardReps) { + apultra_insert_forward_match(pCompressor, pInWindow, i, nMatchOffset, nStartOffset, nEndOffset, nArrivalsPerPosition, 0); + } + + if (nMatchLen >= 2) { + int nStartingMatchLen, nJumpMatchLen, k; + int nNoRepMatchOffsetCostForLit[2], nNoRepMatchOffsetCostDelta; + int nMinMatchLenForOffset; + int nNoRepCostAdjusment = (nMatchLen >= LCP_MAX) ? 1 : 0; + + if (nMatchOffset < MINMATCH3_OFFSET) + nMinMatchLenForOffset = 2; + else { + if (nMatchOffset < MINMATCH4_OFFSET) + nMinMatchLenForOffset = 3; + else + nMinMatchLenForOffset = 4; + } + + if (nMatchLen >= LEAVE_ALONE_MATCH_SIZE && i >= nMatchLen) + nStartingMatchLen = nMatchLen; + else + nStartingMatchLen = 2; + + if ((nBlockFlags & 3) == 3 && nMatchLen > 90 && i >= 90) + nJumpMatchLen = 90; + else + nJumpMatchLen = nMatchLen + 1; + + if (nStartingMatchLen <= 3 && nMatchOffset < 128) { + nNoRepMatchOffsetCostForLit[0] = 8 + TOKEN_SIZE_7BIT_MATCH; + nNoRepMatchOffsetCostForLit[1] = 8 + TOKEN_SIZE_7BIT_MATCH; + } + else { + nNoRepMatchOffsetCostForLit[0] = 8 + TOKEN_SIZE_LARGE_MATCH + apultra_get_gamma2_size((nMatchOffset >> 8) + 2); + nNoRepMatchOffsetCostForLit[1] = 8 + TOKEN_SIZE_LARGE_MATCH + apultra_get_gamma2_size((nMatchOffset >> 8) + 3); + } + nNoRepMatchOffsetCostDelta = nNoRepMatchOffsetCostForLit[1] - nNoRepMatchOffsetCostForLit[0]; + + for (k = nStartingMatchLen; k <= nMatchLen; k++) { + int nRepMatchMatchLenCost = apultra_get_gamma2_size(k); + apultra_arrival *pDestSlots = &cur_arrival[k * nArrivalsPerPosition]; + + /* Insert non-repmatch candidate */ + + if (k >= nMinMatchLenForOffset) { + int nNoRepMatchMatchLenCost; + + if (k <= 3 && nMatchOffset < 128) + nNoRepMatchMatchLenCost = 0; + else { + if (nMatchOffset < 128 || nMatchOffset >= MINMATCH4_OFFSET) + nNoRepMatchMatchLenCost = apultra_get_gamma2_size(k - 2); + else if (nMatchOffset < MINMATCH3_OFFSET) + nNoRepMatchMatchLenCost = nRepMatchMatchLenCost; + else + nNoRepMatchMatchLenCost = apultra_get_gamma2_size(k - 1); + } + + for (j = 0; j < nNumArrivalsForThisPos; j++) { + if (nMatchOffset != cur_arrival[j].rep_offset || cur_arrival[j].follows_literal == 0) { + int nPrevCost = cur_arrival[j].cost & 0x3fffffff; + int nMatchCmdCost = nNoRepMatchMatchLenCost + nNoRepMatchOffsetCostForLit[cur_arrival[j].follows_literal]; + int nCodingChoiceCost = nPrevCost + nMatchCmdCost; + + if (nCodingChoiceCost <= (pDestSlots[nArrivalsPerPosition - 1].cost + 1)) { + int nScore = cur_arrival[j].score + nScorePenalty; + + if (nCodingChoiceCost < pDestSlots[nArrivalsPerPosition - 2].cost || + (nCodingChoiceCost == pDestSlots[nArrivalsPerPosition - 2].cost && nScore < pDestSlots[nArrivalsPerPosition - 2].score)) { + int exists = 0; + + for (n = 0; + pDestSlots[n].cost < nCodingChoiceCost; + n++) { + if (pDestSlots[n].rep_offset == nMatchOffset) { + exists = 1; + break; + } + } + + if (!exists) { + int nRevisedCodingChoiceCost = nCodingChoiceCost - nNoRepCostAdjusment; + + for (; + n < nArrivalsPerPosition - 1 && pDestSlots[n].cost == nRevisedCodingChoiceCost && nScore >= pDestSlots[n].score; + n++) { + if (pDestSlots[n].rep_offset == nMatchOffset) { + exists = 1; + break; + } + } + + if (!exists) { + if (n < nArrivalsPerPosition - 1) { + int nn; + + for (nn = n; + nn < nArrivalsPerPosition && pDestSlots[nn].cost == nCodingChoiceCost; + nn++) { + if (pDestSlots[nn].rep_offset == nMatchOffset) { + exists = 1; + break; + } + } + + if (!exists) { + int z; + + for (z = n; z < nArrivalsPerPosition - 1 && pDestSlots[z].from_slot; z++) { + if (pDestSlots[z].rep_offset == nMatchOffset) + break; + } + + apultra_arrival* pDestArrival = &pDestSlots[n]; + memmove(&pDestSlots[n + 1], + &pDestSlots[n], + sizeof(apultra_arrival) * (z - n)); + + pDestArrival->cost = nRevisedCodingChoiceCost; + pDestArrival->from_pos = i; + pDestArrival->from_slot = j + 1; + pDestArrival->follows_literal = 0; + pDestArrival->rep_offset = nMatchOffset; + pDestArrival->short_offset = 0; + pDestArrival->rep_pos = i; + pDestArrival->match_len = k; + pDestArrival->score = nScore; + } + } + } + } + else { + if ((nCodingChoiceCost - pDestSlots[n].cost) >= nNoRepMatchOffsetCostDelta) + break; + } + } + if (cur_arrival[j].follows_literal == 0 || nNoRepMatchOffsetCostDelta == 0) + break; + } + else { + break; + } + } + } + } + + /* Insert repmatch candidate */ + + if (k > nOverallMinRepLen && k <= nOverallMaxRepLen) { + int nRepMatchCmdCost = TOKEN_SIZE_LARGE_MATCH + 2 /* apultra_get_gamma2_size(2) */ + nRepMatchMatchLenCost; + int nCurRepMatchArrival; + + if (k <= 90) + nOverallMinRepLen = k; + else if (nOverallMaxRepLen == k) + nOverallMaxRepLen--; + + for (nCurRepMatchArrival = 0; (j = nRepMatchArrivalIdx[nCurRepMatchArrival]) >= 0; nCurRepMatchArrival++) { + if (nRepLenForArrival[j] >= k) { + int nPrevCost = cur_arrival[j].cost & 0x3fffffff; + int nRepCodingChoiceCost = nPrevCost + nRepMatchCmdCost; + int nScore = cur_arrival[j].score + 2; + + if (nRepCodingChoiceCost < pDestSlots[nArrivalsPerPosition - 1].cost || + (nRepCodingChoiceCost == pDestSlots[nArrivalsPerPosition - 1].cost && nScore < pDestSlots[nArrivalsPerPosition - 1].score)) { + int nRepOffset = cur_arrival[j].rep_offset; + int exists = 0; + + for (n = 0; + pDestSlots[n].cost < nRepCodingChoiceCost; + n++) { + if (pDestSlots[n].rep_offset == nRepOffset) { + exists = 1; + break; + } + } + + if (!exists) { + for (; + n < nArrivalsPerPosition && pDestSlots[n].cost == nRepCodingChoiceCost && nScore >= pDestSlots[n].score; + n++) { + if (pDestSlots[n].rep_offset == nRepOffset) { + exists = 1; + break; + } + } + + if (!exists) { + if (n < nArrivalsPerPosition) { + int nn; + + for (nn = n; + nn < nArrivalsPerPosition && pDestSlots[nn].cost == nRepCodingChoiceCost; + nn++) { + if (pDestSlots[nn].rep_offset == nRepOffset) { + exists = 1; + break; + } + } + + if (!exists) { + int z; + + for (z = n; z < nArrivalsPerPosition - 1 && pDestSlots[z].from_slot; z++) { + if (pDestSlots[z].rep_offset == nRepOffset) + break; + } + + apultra_arrival* pDestArrival = &pDestSlots[n]; + memmove(&pDestSlots[n + 1], + &pDestSlots[n], + sizeof(apultra_arrival) * (z - n)); + + pDestArrival->cost = nRepCodingChoiceCost; + pDestArrival->from_pos = i; + pDestArrival->from_slot = j + 1; + pDestArrival->follows_literal = 0; + pDestArrival->rep_offset = nRepOffset; + pDestArrival->short_offset = 0; + pDestArrival->rep_pos = i; + pDestArrival->match_len = k; + pDestArrival->score = nScore; + } + } + } + } + } + else { + break; + } + } + } + } + + if (k == 3 && nMatchOffset < 128) { + nNoRepMatchOffsetCostForLit[0] = 8 + TOKEN_SIZE_LARGE_MATCH + 2 /* apultra_get_gamma2_size((nMatchOffset >> 8) + 2) */; + nNoRepMatchOffsetCostForLit[1] = 8 + TOKEN_SIZE_LARGE_MATCH + 2 /* apultra_get_gamma2_size((nMatchOffset >> 8) + 3) */; + } + + if (k == nJumpMatchLen) + k = nMatchLen - 1; + } + } + + if (nOrigMatchLen >= 512) + break; + } + } + } + + if (!nInsertForwardReps) { + const apultra_arrival* end_arrival = &arrival[(i * nArrivalsPerPosition) + 0]; + apultra_final_match* pBestMatch = pCompressor->best_match - nStartOffset; + + while (end_arrival->from_slot > 0 && end_arrival->from_pos >= 0 && (int)end_arrival->from_pos < nEndOffset) { + pBestMatch[end_arrival->from_pos].length = end_arrival->match_len; + if (end_arrival->match_len >= 2) + pBestMatch[end_arrival->from_pos].offset = end_arrival->rep_offset; + else + pBestMatch[end_arrival->from_pos].offset = end_arrival->short_offset; + + end_arrival = &arrival[(end_arrival->from_pos * nArrivalsPerPosition) + (end_arrival->from_slot - 1)]; + } + } +} + +/** + * Attempt to replace matches by literals when it makes the final bitstream smaller, and merge large matches + * + * @param pCompressor compression context + * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress) + * @param pBestMatch optimal matches to evaluate and update + * @param nStartOffset current offset in input window (typically the number of previously compressed bytes) + * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes + * @param nCurRepMatchOffset starting rep offset for this block + * @param nBlockFlags bit 0: 1 for first block, 0 otherwise; bit 1: 1 for last block, 0 otherwise + * + * @return non-zero if the number of tokens was reduced, 0 if it wasn't + */ +static int apultra_reduce_commands(apultra_compressor *pCompressor, const unsigned char *pInWindow, apultra_final_match *pBestMatch, const int nStartOffset, const int nEndOffset, const int *nCurRepMatchOffset, const int nBlockFlags) { + int i; + int nRepMatchOffset = *nCurRepMatchOffset; + int nFollowsLiteral = 0; + int nDidReduce = 0; + int nLastMatchLen = 0; + const unsigned char *match1 = pCompressor->match1 - nStartOffset; + + for (i = nStartOffset + ((nBlockFlags & 1) ? 1 : 0); i < nEndOffset; ) { + apultra_final_match *pMatch = pBestMatch + i; + + if (pMatch->length <= 1 && + (i + 1) < nEndOffset && + pBestMatch[i + 1].length >= 2 && + pBestMatch[i + 1].length < MAX_VARLEN && + pBestMatch[i + 1].offset && + i >= pBestMatch[i + 1].offset && + (i + pBestMatch[i + 1].length + 1) <= nEndOffset && + !memcmp(pInWindow + i - (pBestMatch[i + 1].offset), pInWindow + i, pBestMatch[i + 1].length + 1)) { + if ((pBestMatch[i + 1].offset < MINMATCH3_OFFSET || (pBestMatch[i + 1].length + 1) >= 3 || (pBestMatch[i + 1].offset == nRepMatchOffset && nFollowsLiteral)) && + (pBestMatch[i + 1].offset < MINMATCH4_OFFSET || (pBestMatch[i + 1].length + 1) >= 4 || (pBestMatch[i + 1].offset == nRepMatchOffset && nFollowsLiteral))) { + + int nCurPartialCommandSize = (pMatch->length == 1) ? (TOKEN_SIZE_4BIT_MATCH + 4) : (1 /* literal bit */ + 8 /* literal size */); + if (pBestMatch[i + 1].offset == nRepMatchOffset /* always follows a literal, the one at the current position */) { + nCurPartialCommandSize += TOKEN_SIZE_LARGE_MATCH + 2 /* apultra_get_gamma2_size(2) */ + apultra_get_gamma2_size(pBestMatch[i + 1].length); + } + else { + nCurPartialCommandSize += apultra_get_offset_varlen_size(pBestMatch[i + 1].length, pBestMatch[i + 1].offset, 1) + apultra_get_match_varlen_size(pBestMatch[i + 1].length, pBestMatch[i + 1].offset); + } + + int nReducedPartialCommandSize; + if (pBestMatch[i + 1].offset == nRepMatchOffset && nFollowsLiteral) { + nReducedPartialCommandSize = TOKEN_SIZE_LARGE_MATCH + 2 /* apultra_get_gamma2_size(2) */ + apultra_get_gamma2_size(pBestMatch[i + 1].length); + } + else { + nReducedPartialCommandSize = apultra_get_offset_varlen_size(pBestMatch[i + 1].length, pBestMatch[i + 1].offset, nFollowsLiteral) + apultra_get_match_varlen_size(pBestMatch[i + 1].length, pBestMatch[i + 1].offset); + } + + if (nReducedPartialCommandSize < nCurPartialCommandSize || (nFollowsLiteral == 0 && nLastMatchLen >= LCP_MAX)) { + /* Merge */ + pBestMatch[i].length = pBestMatch[i + 1].length + 1; + pBestMatch[i].offset = pBestMatch[i + 1].offset; + pBestMatch[i + 1].length = 0; + pBestMatch[i + 1].offset = 0; + nDidReduce = 1; + continue; + } + } + } + + if (pMatch->length >= 2) { + if (pMatch->length < 32 && /* Don't waste time considering large matches, they will always win over literals */ + (i + pMatch->length) < nEndOffset /* Don't consider the last match in the block, we can only reduce a match inbetween other tokens */) { + int nNextIndex = i + pMatch->length; + int nNextFollowsLiteral = 0; + int nCannotEncode = 0; + + while (nNextIndex < nEndOffset && pBestMatch[nNextIndex].length < 2) { + nNextIndex++; + nNextFollowsLiteral = 1; + } + + if (nNextIndex < nEndOffset && pBestMatch[nNextIndex].length >= 2) { + if (nRepMatchOffset && nRepMatchOffset != pMatch->offset && pBestMatch[nNextIndex].offset && pMatch->offset != pBestMatch[nNextIndex].offset && + nNextFollowsLiteral) { + /* Try to gain a match forward */ + if (i >= pBestMatch[nNextIndex].offset && (i - pBestMatch[nNextIndex].offset + pMatch->length) <= nEndOffset) { + if ((pBestMatch[nNextIndex].offset < MINMATCH3_OFFSET || pMatch->length >= 3) && + (pBestMatch[nNextIndex].offset < MINMATCH4_OFFSET || pMatch->length >= 4)) { + int nMaxLen = 0; + const unsigned char* pInWindowAtPos = pInWindow + i; + while (nMaxLen < pMatch->length && pInWindowAtPos[nMaxLen - pBestMatch[nNextIndex].offset] == pInWindowAtPos[nMaxLen]) + nMaxLen++; + + if (nMaxLen >= pMatch->length) { + /* Replace */ + pMatch->offset = pBestMatch[nNextIndex].offset; + nDidReduce = 1; + } + else if (nMaxLen >= 2) { + if ((nFollowsLiteral && nRepMatchOffset == pBestMatch[nNextIndex].offset) || + ((pBestMatch[nNextIndex].offset < MINMATCH3_OFFSET || nMaxLen >= 3) && + (pBestMatch[nNextIndex].offset < MINMATCH4_OFFSET || nMaxLen >= 4))) { + + int nPartialSizeBefore, nPartialSizeAfter, j; + + nPartialSizeBefore = apultra_get_offset_varlen_size(pMatch->length, pMatch->offset, nFollowsLiteral); + nPartialSizeBefore += apultra_get_match_varlen_size(pMatch->length, pMatch->offset); + + nPartialSizeBefore += apultra_get_offset_varlen_size(pBestMatch[nNextIndex].length, pBestMatch[nNextIndex].offset, 1); + nPartialSizeBefore += apultra_get_match_varlen_size(pBestMatch[nNextIndex].length, pBestMatch[nNextIndex].offset); + + nPartialSizeAfter = apultra_get_offset_varlen_size(nMaxLen, pBestMatch[nNextIndex].offset, nFollowsLiteral); + if (nFollowsLiteral && nRepMatchOffset == pBestMatch[nNextIndex].offset) + nPartialSizeAfter += apultra_get_gamma2_size(nMaxLen); + else + nPartialSizeAfter += apultra_get_match_varlen_size(nMaxLen, pBestMatch[nNextIndex].offset); + + nPartialSizeAfter += TOKEN_SIZE_LARGE_MATCH + 2 /* apultra_get_gamma2_size(2) */; + nPartialSizeAfter += apultra_get_gamma2_size(pBestMatch[nNextIndex].length); + + for (j = nMaxLen; j < pMatch->length; j++) { + if (pInWindow[i + j] == 0 || match1[i + j]) + nPartialSizeAfter += TOKEN_SIZE_4BIT_MATCH + 4; + else + nPartialSizeAfter += 1 /* literal bit */ + 8 /* literal byte */; + } + + if (nPartialSizeAfter < nPartialSizeBefore) { + /* We gain a repmatch that is shorter than the original match as this is the best we can do, so it is followed by extra literals, but + * we have calculated that this is shorter */ + + int nOrigLen = pMatch->length; + int j; + + pMatch->offset = pBestMatch[nNextIndex].offset; + pMatch->length = nMaxLen; + + for (j = nMaxLen; j < nOrigLen; j++) { + pBestMatch[i + j].offset = match1[i + j]; + pBestMatch[i + j].length = (pInWindow[i + j] && match1[i+j] == 0) ? 0 : 1; + } + + nDidReduce = 1; + continue; + } + } + } + } + } + } + + /* Calculate this command's current cost */ + + int nCurCommandSize; + if (pMatch->offset == nRepMatchOffset && nFollowsLiteral) { + nCurCommandSize = TOKEN_SIZE_LARGE_MATCH + 2 /* apultra_get_gamma2_size(2) */ + apultra_get_gamma2_size(pMatch->length); + } + else { + nCurCommandSize = apultra_get_offset_varlen_size(pMatch->length, pMatch->offset, nFollowsLiteral) + apultra_get_match_varlen_size(pMatch->length, pMatch->offset); + } + + /* Calculate the next command's current cost */ + int nNextCommandSize; + if (pBestMatch[nNextIndex].offset == pMatch->offset && nNextFollowsLiteral && pBestMatch[nNextIndex].length >= 2) { + nNextCommandSize = TOKEN_SIZE_LARGE_MATCH + 2 /* apultra_get_gamma2_size(2) */ + apultra_get_gamma2_size(pBestMatch[nNextIndex].length); + } + else { + nNextCommandSize = apultra_get_offset_varlen_size(pBestMatch[nNextIndex].length, pBestMatch[nNextIndex].offset, nNextFollowsLiteral) + apultra_get_match_varlen_size(pBestMatch[nNextIndex].length, pBestMatch[nNextIndex].offset); + } + + int nOriginalCombinedCommandSize = nCurCommandSize + nNextCommandSize; + + /* Calculate the cost of replacing this match command by literals + the effect on the cost of the next command */ + int nReducedCommandSize = 0; + int j; + + for (j = 0; j < pMatch->length; j++) { + if (pInWindow[i + j] == 0 || match1[i + j]) + nReducedCommandSize += TOKEN_SIZE_4BIT_MATCH + 4; + else + nReducedCommandSize += 1 /* literal bit */ + 8; + } + + if (pBestMatch[nNextIndex].offset == nRepMatchOffset /* the new command would always follow literals, the ones we create */ && pBestMatch[nNextIndex].length >= 2) { + nReducedCommandSize += TOKEN_SIZE_LARGE_MATCH + 2 /* apultra_get_gamma2_size(2) */ + apultra_get_gamma2_size(pBestMatch[nNextIndex].length); + } + else { + if ((pBestMatch[nNextIndex].length < 3 && pBestMatch[nNextIndex].offset >= MINMATCH3_OFFSET) || + (pBestMatch[nNextIndex].length < 4 && pBestMatch[nNextIndex].offset >= MINMATCH4_OFFSET)) { + /* This match length can only be encoded with a rep-match */ + nCannotEncode = 1; + } + else { + nReducedCommandSize += apultra_get_offset_varlen_size(pBestMatch[nNextIndex].length, pBestMatch[nNextIndex].offset, 1 /* follows literals */) + apultra_get_match_varlen_size(pBestMatch[nNextIndex].length, pBestMatch[nNextIndex].offset); + } + } + + if (!nCannotEncode && nOriginalCombinedCommandSize > nReducedCommandSize) { + /* Reduce */ + int nMatchLen = pMatch->length; + int j; + + for (j = 0; j < nMatchLen; j++) { + pBestMatch[i + j].offset = match1[i + j]; + pBestMatch[i + j].length = (pInWindow[i + j] && match1[i + j] == 0) ? 0 : 1; + } + + nDidReduce = 1; + continue; + } + } + } + + if ((i + pMatch->length) < nEndOffset && pMatch->offset > 0 && + pBestMatch[i + pMatch->length].offset > 0 && + pBestMatch[i + pMatch->length].length >= 2 && + (pMatch->length + pBestMatch[i + pMatch->length].length) >= LEAVE_ALONE_MATCH_SIZE && + (pMatch->length + pBestMatch[i + pMatch->length].length) <= MAX_VARLEN && + (i + pMatch->length) >= pMatch->offset && + (i + pMatch->length) >= pBestMatch[i + pMatch->length].offset && + (i + pMatch->length + pBestMatch[i + pMatch->length].length) <= nEndOffset && + !memcmp(pInWindow + i + pMatch->length - pMatch->offset, + pInWindow + i + pMatch->length - pBestMatch[i + pMatch->length].offset, + pBestMatch[i + pMatch->length].length)) { + int nMatchLen = pMatch->length; + + /* Join large matches */ + + int nNextIndex = i + pMatch->length + pBestMatch[i + pMatch->length].length; + int nNextFollowsLiteral = 0; + int nCannotEncode = 0; + + while (nNextIndex < nEndOffset && pBestMatch[nNextIndex].length < 2) { + nNextIndex++; + nNextFollowsLiteral = 1; + } + + if (nNextIndex < nEndOffset && nNextFollowsLiteral && pBestMatch[nNextIndex].length >= 2 && + pBestMatch[nNextIndex].offset == pBestMatch[i + pMatch->length].offset) { + if ((pBestMatch[nNextIndex].offset >= MINMATCH3_OFFSET && pBestMatch[nNextIndex].length < 3) || + (pBestMatch[nNextIndex].offset >= MINMATCH4_OFFSET && pBestMatch[nNextIndex].length < 4)) { + nCannotEncode = 1; + } + } + + if (!nCannotEncode) { + pMatch->length += pBestMatch[i + nMatchLen].length; + pBestMatch[i + nMatchLen].offset = 0; + pBestMatch[i + nMatchLen].length = -1; + nDidReduce = 1; + continue; + } + } + + nRepMatchOffset = pMatch->offset; + nFollowsLiteral = 0; + nLastMatchLen = pMatch->length; + + i += pMatch->length; + } + else { + /* 4 bits offset (1 byte match) or literal */ + i++; + nFollowsLiteral = 1; + nLastMatchLen = 0; + } + } + + return nDidReduce; +} + +/** + * Emit a block of compressed data + * + * @param pCompressor compression context + * @param pBestMatch optimal matches to emit + * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress) + * @param nStartOffset current offset in input window (typically the number of previously compressed bytes) + * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes + * @param pOutData pointer to output buffer + * @param nMaxOutDataSize maximum size of output buffer, in bytes + * @param nCurBitsOffset write index into output buffer, of current byte being filled with bits + * @param nCurBitShift bit shift count + * @param nFollowsLiteral non-zero if the next command to be issued follows a literal, 0 if not + * @param nCurRepMatchOffset starting rep offset for this block, updated after the block is compressed successfully + * @param nBlockFlags bit 0: 1 for first block, 0 otherwise; bit 1: 1 for last block, 0 otherwise + * + * @return size of compressed data in output buffer, or -1 if the data is uncompressible + */ +static int apultra_write_block(apultra_compressor *pCompressor, apultra_final_match *pBestMatch, const unsigned char *pInWindow, const int nStartOffset, const int nEndOffset, unsigned char *pOutData, int nOutOffset, const int nMaxOutDataSize, int *nCurBitsOffset, int *nCurBitShift, int *nFollowsLiteral, int *nCurRepMatchOffset, const int nBlockFlags) { + int i; + int nRepMatchOffset = *nCurRepMatchOffset; + const int nMaxOffset = pCompressor->max_offset; + + if (nBlockFlags & 1) { + if (nOutOffset < 0 || nOutOffset >= nMaxOutDataSize) + return -1; + pOutData[nOutOffset++] = pInWindow[nStartOffset]; + *nFollowsLiteral = 1; + } + + for (i = nStartOffset + ((nBlockFlags & 1) ? 1 : 0); i < nEndOffset; ) { + const apultra_final_match *pMatch = pBestMatch + i; + + if (pMatch->length >= 2) { + int nMatchOffset = pMatch->offset; + int nMatchLen = pMatch->length; + + if (nMatchOffset < MIN_OFFSET || nMatchOffset > nMaxOffset) + return -1; + + if (nMatchOffset == nRepMatchOffset && *nFollowsLiteral) { + /* Rep-match */ + nOutOffset = apultra_write_bits(pOutData, nOutOffset, nMaxOutDataSize, TOKEN_CODE_LARGE_MATCH, TOKEN_SIZE_LARGE_MATCH, nCurBitsOffset, nCurBitShift); + nOutOffset = apultra_write_bits(pOutData, nOutOffset, nMaxOutDataSize, 0 /* length of 2 encoded as gamma 2 */, 2, nCurBitsOffset, nCurBitShift); + + /* The match length isn't encoded in the command, emit elias gamma value */ + nOutOffset = apultra_write_gamma2_value(pOutData, nOutOffset, nMaxOutDataSize, nMatchLen, nCurBitsOffset, nCurBitShift); + if (nOutOffset < 0) return -1; + + *nFollowsLiteral = 0; + + pCompressor->stats.num_rep_matches++; + } + else { + if (nMatchLen <= 3 && nMatchOffset < 128) { + /* 7 bits offset + 1 bit length */ + nOutOffset = apultra_write_bits(pOutData, nOutOffset, nMaxOutDataSize, TOKEN_CODE_7BIT_MATCH, TOKEN_SIZE_7BIT_MATCH, nCurBitsOffset, nCurBitShift); + + if (nOutOffset < 0 || nOutOffset >= nMaxOutDataSize) + return -1; + pOutData[nOutOffset++] = ((nMatchOffset) & 0x7f) << 1 | (nMatchLen - 2); + + *nFollowsLiteral = 0; + nRepMatchOffset = nMatchOffset; + + pCompressor->stats.num_7bit_matches++; + } + else { + /* 8+n bits offset */ + nOutOffset = apultra_write_bits(pOutData, nOutOffset, nMaxOutDataSize, TOKEN_CODE_LARGE_MATCH, TOKEN_SIZE_LARGE_MATCH, nCurBitsOffset, nCurBitShift); + + if (nOutOffset < 0 || nOutOffset >= nMaxOutDataSize) + return -1; + if (*nFollowsLiteral) + nOutOffset = apultra_write_gamma2_value(pOutData, nOutOffset, nMaxOutDataSize, (nMatchOffset >> 8) + 3, nCurBitsOffset, nCurBitShift); + else + nOutOffset = apultra_write_gamma2_value(pOutData, nOutOffset, nMaxOutDataSize, (nMatchOffset >> 8) + 2, nCurBitsOffset, nCurBitShift); + pOutData[nOutOffset++] = nMatchOffset & 0xff; + + /* The match length isn't encoded in the command, emit elias gamma value */ + + if (nMatchOffset < 128 || nMatchOffset >= MINMATCH4_OFFSET) + nOutOffset = apultra_write_gamma2_value(pOutData, nOutOffset, nMaxOutDataSize, nMatchLen - 2, nCurBitsOffset, nCurBitShift); + else if (nMatchOffset < MINMATCH3_OFFSET) + nOutOffset = apultra_write_gamma2_value(pOutData, nOutOffset, nMaxOutDataSize, nMatchLen, nCurBitsOffset, nCurBitShift); + else + nOutOffset = apultra_write_gamma2_value(pOutData, nOutOffset, nMaxOutDataSize, nMatchLen - 1, nCurBitsOffset, nCurBitShift); + if (nOutOffset < 0) return -1; + + *nFollowsLiteral = 0; + nRepMatchOffset = nMatchOffset; + + pCompressor->stats.num_variable_matches++; + } + } + + if (nMatchOffset < pCompressor->stats.min_offset || pCompressor->stats.min_offset == -1) + pCompressor->stats.min_offset = nMatchOffset; + if (nMatchOffset > pCompressor->stats.max_offset) + pCompressor->stats.max_offset = nMatchOffset; + pCompressor->stats.total_offsets += (long long)nMatchOffset; + + if (nMatchLen < pCompressor->stats.min_match_len || pCompressor->stats.min_match_len == -1) + pCompressor->stats.min_match_len = nMatchLen; + if (nMatchLen > pCompressor->stats.max_match_len) + pCompressor->stats.max_match_len = nMatchLen; + pCompressor->stats.total_match_lens += nMatchLen; + pCompressor->stats.match_divisor++; + + if (nMatchOffset == 1) { + if (nMatchLen < pCompressor->stats.min_rle1_len || pCompressor->stats.min_rle1_len == -1) + pCompressor->stats.min_rle1_len = nMatchLen; + if (nMatchLen > pCompressor->stats.max_rle1_len) + pCompressor->stats.max_rle1_len = nMatchLen; + pCompressor->stats.total_rle1_lens += nMatchLen; + pCompressor->stats.rle1_divisor++; + } + else if (nMatchOffset == 2) { + if (nMatchLen < pCompressor->stats.min_rle2_len || pCompressor->stats.min_rle2_len == -1) + pCompressor->stats.min_rle2_len = nMatchLen; + if (nMatchLen > pCompressor->stats.max_rle2_len) + pCompressor->stats.max_rle2_len = nMatchLen; + pCompressor->stats.total_rle2_lens += nMatchLen; + pCompressor->stats.rle2_divisor++; + } + + i += nMatchLen; + + pCompressor->stats.commands_divisor++; + } + else if (pMatch->length == 1) { + int nMatchOffset = pMatch->offset; + + /* 4 bits offset */ + + if (nMatchOffset < 0 || nMatchOffset > 15) + return -1; + + nOutOffset = apultra_write_bits(pOutData, nOutOffset, nMaxOutDataSize, TOKEN_CODE_4BIT_MATCH, TOKEN_SIZE_4BIT_MATCH, nCurBitsOffset, nCurBitShift); + nOutOffset = apultra_write_bits(pOutData, nOutOffset, nMaxOutDataSize, nMatchOffset, 4, nCurBitsOffset, nCurBitShift); + if (nOutOffset < 0) return -1; + + pCompressor->stats.num_4bit_matches++; + pCompressor->stats.commands_divisor++; + + i++; + *nFollowsLiteral = 1; + } + else { + /* Literal */ + + nOutOffset = apultra_write_bits(pOutData, nOutOffset, nMaxOutDataSize, 0 /* literal */, 1, nCurBitsOffset, nCurBitShift); + + if (nOutOffset < 0 || nOutOffset >= nMaxOutDataSize) + return -1; + pOutData[nOutOffset++] = pInWindow[i]; + + pCompressor->stats.num_literals++; + pCompressor->stats.commands_divisor++; + i++; + *nFollowsLiteral = 1; + } + + int nCurSafeDist = (i - nStartOffset) - nOutOffset; + if (nCurSafeDist >= 0 && pCompressor->stats.safe_dist < nCurSafeDist) + pCompressor->stats.safe_dist = nCurSafeDist; + } + + if (nBlockFlags & 2) { + /* 8 bits offset */ + + nOutOffset = apultra_write_bits(pOutData, nOutOffset, nMaxOutDataSize, TOKEN_CODE_7BIT_MATCH, TOKEN_SIZE_7BIT_MATCH, nCurBitsOffset, nCurBitShift); + + if (nOutOffset < 0 || nOutOffset >= nMaxOutDataSize) + return -1; + pOutData[nOutOffset++] = 0x00; /* Offset: EOD */ + pCompressor->stats.num_eod++; + pCompressor->stats.commands_divisor++; + + int nCurSafeDist = (i - nStartOffset) - nOutOffset; + if (nCurSafeDist >= 0 && pCompressor->stats.safe_dist < nCurSafeDist) + pCompressor->stats.safe_dist = nCurSafeDist; + } + + *nCurRepMatchOffset = nRepMatchOffset; + return nOutOffset; +} + +/** + * Select the most optimal matches, reduce the token count if possible, and then emit a block of compressed data + * + * @param pCompressor compression context + * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress) + * @param nPreviousBlockSize number of previously compressed bytes (or 0 for none) + * @param nInDataSize number of input bytes to compress + * @param pOutData pointer to output buffer + * @param nMaxOutDataSize maximum size of output buffer, in bytes + * @param nCurBitsOffset write index into output buffer, of current byte being filled with bits + * @param nCurBitShift bit shift count + * @param nCurFollowsLiteral non-zero if the next command to be issued follows a literal, 0 if not + * @param nCurRepMatchOffset starting rep offset for this block, updated after the block is compressed successfully + * @param nBlockFlags bit 0: 1 for first block, 0 otherwise; bit 1: 1 for last block, 0 otherwise + * + * @return size of compressed data in output buffer, or -1 if the data is uncompressible + */ +static int apultra_optimize_and_write_block(apultra_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize, int *nCurBitsOffset, int *nCurBitShift, int *nCurFollowsLiteral, int *nCurRepMatchOffset, const int nBlockFlags) { + int nOutOffset = 0; + const int nEndOffset = nPreviousBlockSize + nInDataSize; + const int nArrivalsPerPosition = pCompressor->max_arrivals; + int *rle_len = (int*)pCompressor->intervals /* reuse */; + int i, nPosition; + + memset(pCompressor->best_match, 0, pCompressor->block_size * sizeof(apultra_final_match)); + + if ((nBlockFlags & 3) == 3) { + int *first_offset_for_byte = pCompressor->first_offset_for_byte; + int *next_offset_for_pos = pCompressor->next_offset_for_pos; + + /* Supplement 2 and 3-byte matches */ + + memset(first_offset_for_byte, 0xff, sizeof(int) * 65536); + memset(next_offset_for_pos, 0xff, sizeof(int) * nInDataSize); + + for (nPosition = nPreviousBlockSize; nPosition < (nEndOffset - 1); nPosition++) { + next_offset_for_pos[nPosition - nPreviousBlockSize] = first_offset_for_byte[((unsigned int)pInWindow[nPosition]) | (((unsigned int)pInWindow[nPosition + 1]) << 8)]; + first_offset_for_byte[((unsigned int)pInWindow[nPosition]) | (((unsigned int)pInWindow[nPosition + 1]) << 8)] = nPosition; + } + + for (nPosition = nPreviousBlockSize + 1; nPosition < (nEndOffset - 1); nPosition++) { + apultra_match *match = pCompressor->match + ((nPosition - nPreviousBlockSize) << MATCHES_PER_INDEX_SHIFT); + unsigned short *match_depth = pCompressor->match_depth + ((nPosition - nPreviousBlockSize) << MATCHES_PER_INDEX_SHIFT); + int m = 0, nInserted = 0; + int nMatchPos; + + while (m < 15 && match[m].length) + m++; + + for (nMatchPos = next_offset_for_pos[nPosition - nPreviousBlockSize]; m < 15 && nMatchPos >= 0; nMatchPos = next_offset_for_pos[nMatchPos - nPreviousBlockSize]) { + int nMatchOffset = nPosition - nMatchPos; + + if (nMatchOffset <= pCompressor->max_offset) { + int nExistingMatchIdx; + int nAlreadyExists = 0; + + for (nExistingMatchIdx = 0; nExistingMatchIdx < m; nExistingMatchIdx++) { + if (match[nExistingMatchIdx].offset == nMatchOffset || + (match[nExistingMatchIdx].offset - (match_depth[nExistingMatchIdx] & 0x3fff)) == nMatchOffset) { + nAlreadyExists = 1; + break; + } + } + + if (!nAlreadyExists) { + match[m].length = (nPosition < (nEndOffset - 2) && pInWindow[nMatchPos + 2] == pInWindow[nPosition + 2]) ? 3 : 2; + match[m].offset = nMatchOffset; + match_depth[m] = 0x4000; + m++; + nInserted++; + if (nInserted >= 6) + break; + } + } + else { + break; + } + } + } + } + + i = 0; + while (i < nEndOffset) { + int nRangeStartIdx = i; + unsigned char c = pInWindow[nRangeStartIdx]; + do { + i++; + } + while (i < nEndOffset && pInWindow[i] == c); + while (nRangeStartIdx < i) { + rle_len[nRangeStartIdx] = i - nRangeStartIdx; + nRangeStartIdx++; + } + } + + apultra_optimize_forward(pCompressor, pInWindow, nPreviousBlockSize, nEndOffset, 1 /* nInsertForwardReps */, nCurRepMatchOffset, nBlockFlags, nArrivalsPerPosition); + + if ((nBlockFlags & 3) == 3 && nArrivalsPerPosition == NARRIVALS_PER_POSITION_MAX) { + const int* next_offset_for_pos = pCompressor->next_offset_for_pos; + int* offset_cache = pCompressor->offset_cache; + + /* Supplement matches further */ + + memset(offset_cache, 0xff, sizeof(int) * 2048); + + for (nPosition = nPreviousBlockSize + 1; nPosition < (nEndOffset - 1); nPosition++) { + apultra_match* match = pCompressor->match + ((nPosition - nPreviousBlockSize) << MATCHES_PER_INDEX_SHIFT); + + if (match[0].length < 8) { + unsigned short* match_depth = pCompressor->match_depth + ((nPosition - nPreviousBlockSize) << MATCHES_PER_INDEX_SHIFT); + int m = 0, nInserted = 0; + int nMatchPos; + + while (m < 46 && match[m].length) { + offset_cache[match[m].offset & 2047] = nPosition; + offset_cache[(match[m].offset - (match_depth[m] & 0x3fff)) & 2047] = nPosition; + m++; + } + + for (nMatchPos = next_offset_for_pos[nPosition - nPreviousBlockSize]; m < 46 && nMatchPos >= 0; nMatchPos = next_offset_for_pos[nMatchPos - nPreviousBlockSize]) { + int nMatchOffset = nPosition - nMatchPos; + + if (nMatchOffset <= pCompressor->max_offset) { + int nAlreadyExists = 0; + + if (offset_cache[nMatchOffset & 2047] == nPosition) { + int nExistingMatchIdx; + + for (nExistingMatchIdx = 0; nExistingMatchIdx < m; nExistingMatchIdx++) { + if (match[nExistingMatchIdx].offset == nMatchOffset || + (match[nExistingMatchIdx].offset - (match_depth[nExistingMatchIdx] & 0x3fff)) == nMatchOffset) { + nAlreadyExists = 1; + + if (match_depth[nExistingMatchIdx] == 0x4000) { + int nMatchLen = 2; + while (nMatchLen < 16 && nPosition < (nEndOffset - nMatchLen) && pInWindow[nMatchPos + nMatchLen] == pInWindow[nPosition + nMatchLen]) + nMatchLen++; + if (nMatchLen > (int)match[nExistingMatchIdx].length) + match[nExistingMatchIdx].length = nMatchLen; + } + + break; + } + } + } + + if (!nAlreadyExists) { + int nForwardPos = nPosition + 2 + 1; + int nGotMatch = 0; + + while (nForwardPos >= nMatchOffset && (nForwardPos + 2) < nEndOffset && nForwardPos < (nPosition + 2 + 1 + 5)) { + if (!memcmp(pInWindow + nForwardPos, pInWindow + nForwardPos - nMatchOffset, 2)) { + nGotMatch = 1; + break; + } + nForwardPos++; + } + + if (nGotMatch) { + int nMatchLen = 2; + while (nMatchLen < 16 && nPosition < (nEndOffset - nMatchLen) && pInWindow[nMatchPos + nMatchLen] == pInWindow[nPosition + nMatchLen]) + nMatchLen++; + match[m].length = nMatchLen; + match[m].offset = nMatchOffset; + match_depth[m] = 0; + m++; + + apultra_insert_forward_match(pCompressor, pInWindow, nPosition, nMatchOffset, nPreviousBlockSize, nEndOffset, nArrivalsPerPosition, 8); + + nInserted++; + if (nInserted >= 18 || (nInserted >= 15 && m >= 38)) + break; + } + } + } + else { + break; + } + } + } + } + } + + /* Pick optimal matches */ + apultra_optimize_forward(pCompressor, pInWindow, nPreviousBlockSize, nEndOffset, 0 /* nInsertForwardReps */, nCurRepMatchOffset, nBlockFlags, nArrivalsPerPosition); + + /* Apply reduction and merge pass */ + int nDidReduce; + int nPasses = 0; + do { + nDidReduce = apultra_reduce_commands(pCompressor, pInWindow, pCompressor->best_match - nPreviousBlockSize, nPreviousBlockSize, nEndOffset, nCurRepMatchOffset, nBlockFlags); + nPasses++; + } while (nDidReduce && nPasses < 20); + + /* Write compressed block */ + + return apultra_write_block(pCompressor, pCompressor->best_match - nPreviousBlockSize, pInWindow, nPreviousBlockSize, nEndOffset, pOutData, nOutOffset, nMaxOutDataSize, nCurBitsOffset, nCurBitShift, nCurFollowsLiteral, nCurRepMatchOffset, nBlockFlags); +} + +/* Forward declaration */ +static void apultra_compressor_destroy(apultra_compressor *pCompressor); + +/** + * Initialize compression context + * + * @param pCompressor compression context to initialize + * @param nBlockSize maximum size of input data (bytes to compress only) + * @param nMaxWindowSize maximum size of input data window (previously compressed bytes + bytes to compress) + * @param nMaxArrivals maximum number of arrivals per position + * @param nFlags compression flags + * + * @return 0 for success, non-zero for failure + */ +static int apultra_compressor_init(apultra_compressor *pCompressor, const int nBlockSize, const int nMaxWindowSize, const int nMaxArrivals, const int nFlags) { + int nResult; + + nResult = divsufsort_init(&pCompressor->divsufsort_context); + pCompressor->intervals = NULL; + pCompressor->pos_data = NULL; + pCompressor->open_intervals = NULL; + pCompressor->match = NULL; + pCompressor->match_depth = NULL; + pCompressor->match1 = NULL; + pCompressor->best_match = NULL; + pCompressor->arrival = NULL; + pCompressor->first_offset_for_byte = NULL; + pCompressor->next_offset_for_pos = NULL; + pCompressor->offset_cache = NULL; + pCompressor->flags = nFlags; + pCompressor->block_size = nBlockSize; + pCompressor->max_arrivals = nMaxArrivals; + + memset(&pCompressor->stats, 0, sizeof(pCompressor->stats)); + pCompressor->stats.min_match_len = -1; + pCompressor->stats.min_offset = -1; + pCompressor->stats.min_rle1_len = -1; + pCompressor->stats.min_rle2_len = -1; + + if (!nResult) { + pCompressor->intervals = (unsigned long long *)malloc(nMaxWindowSize * sizeof(unsigned long long)); + + if (pCompressor->intervals) { + pCompressor->pos_data = (unsigned long long *)malloc(nMaxWindowSize * sizeof(unsigned long long)); + + if (pCompressor->pos_data) { + pCompressor->open_intervals = (unsigned long long *)malloc((LCP_AND_TAG_MAX + 1) * sizeof(unsigned long long)); + + if (pCompressor->open_intervals) { + pCompressor->arrival = (apultra_arrival *)malloc((nBlockSize + 1) * nMaxArrivals * sizeof(apultra_arrival)); + + if (pCompressor->arrival) { + pCompressor->best_match = (apultra_final_match *)malloc(nBlockSize * sizeof(apultra_final_match)); + + if (pCompressor->best_match) { + pCompressor->match = (apultra_match *)malloc(nBlockSize * NMATCHES_PER_INDEX * sizeof(apultra_match)); + if (pCompressor->match) { + pCompressor->match_depth = (unsigned short *)malloc(nBlockSize * NMATCHES_PER_INDEX * sizeof(unsigned short)); + if (pCompressor->match_depth) { + pCompressor->match1 = (unsigned char *)malloc(nBlockSize * sizeof(unsigned char)); + if (pCompressor->match1) { + pCompressor->first_offset_for_byte = (int*)malloc(65536 * sizeof(int)); + if (pCompressor->first_offset_for_byte) { + pCompressor->next_offset_for_pos = (int*)malloc(nBlockSize * sizeof(int)); + if (pCompressor->next_offset_for_pos) { + if (nMaxArrivals == NARRIVALS_PER_POSITION_MAX) { + pCompressor->offset_cache = (int*)malloc(2048 * sizeof(int)); + if (pCompressor->offset_cache) { + return 0; + } + } + else { + return 0; + } + } + } + } + } + } + } + } + } + } + } + } + + apultra_compressor_destroy(pCompressor); + return 100; +} + +/** + * Clean up compression context and free up any associated resources + * + * @param pCompressor compression context to clean up + */ +static void apultra_compressor_destroy(apultra_compressor *pCompressor) { + divsufsort_destroy(&pCompressor->divsufsort_context); + + if (pCompressor->offset_cache) { + free(pCompressor->offset_cache); + pCompressor->offset_cache = NULL; + } + + if (pCompressor->next_offset_for_pos) { + free(pCompressor->next_offset_for_pos); + pCompressor->next_offset_for_pos = NULL; + } + + if (pCompressor->first_offset_for_byte) { + free(pCompressor->first_offset_for_byte); + pCompressor->first_offset_for_byte = NULL; + } + + if (pCompressor->match1) { + free(pCompressor->match1); + pCompressor->match1 = NULL; + } + + if (pCompressor->match_depth) { + free(pCompressor->match_depth); + pCompressor->match_depth = NULL; + } + + if (pCompressor->match) { + free(pCompressor->match); + pCompressor->match = NULL; + } + + if (pCompressor->arrival) { + free(pCompressor->arrival); + pCompressor->arrival = NULL; + } + + if (pCompressor->best_match) { + free(pCompressor->best_match); + pCompressor->best_match = NULL; + } + + if (pCompressor->open_intervals) { + free(pCompressor->open_intervals); + pCompressor->open_intervals = NULL; + } + + if (pCompressor->pos_data) { + free(pCompressor->pos_data); + pCompressor->pos_data = NULL; + } + + if (pCompressor->intervals) { + free(pCompressor->intervals); + pCompressor->intervals = NULL; + } +} + +/** + * Compress one block of data + * + * @param pCompressor compression context + * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress) + * @param nPreviousBlockSize number of previously compressed bytes (or 0 for none) + * @param nInDataSize number of input bytes to compress + * @param pOutData pointer to output buffer + * @param nMaxOutDataSize maximum size of output buffer, in bytes + * @param nCurBitsOffset write index into output buffer, of current byte being filled with bits + * @param nCurBitShift bit shift count + * @param nCurFollowsLiteral non-zero if the next command to be issued follows a literal, 0 if not + * @param nCurRepMatchOffset starting rep offset for this block, updated after the block is compressed successfully + * @param nBlockFlags bit 0: 1 for first block, 0 otherwise; bit 1: 1 for last block, 0 otherwise + * + * @return size of compressed data in output buffer, or -1 if the data is uncompressible + */ +static int apultra_compressor_shrink_block(apultra_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize, int *nCurBitsOffset, int *nCurBitShift, int *nCurFollowsLiteral, int *nCurRepMatchOffset, const int nBlockFlags) { + int nCompressedSize; + + if (apultra_build_suffix_array(pCompressor, pInWindow, nPreviousBlockSize + nInDataSize)) + nCompressedSize = -1; + else { + if (nPreviousBlockSize) { + apultra_skip_matches(pCompressor, 0, nPreviousBlockSize); + } + apultra_find_all_matches(pCompressor, NMATCHES_PER_INDEX, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, nBlockFlags); + + nCompressedSize = apultra_optimize_and_write_block(pCompressor, pInWindow, nPreviousBlockSize, nInDataSize, pOutData, nMaxOutDataSize, nCurBitsOffset, nCurBitShift, nCurFollowsLiteral, nCurRepMatchOffset, nBlockFlags); + } + + return nCompressedSize; +} + +/** + * Get maximum compressed size of input(source) data + * + * @param nInputSize input(source) size in bytes + * + * @return maximum compressed size + */ +size_t apultra_get_max_compressed_size(size_t nInputSize) { + return ((nInputSize * 9 /* literals + literal bits */ + 1 /* match bit */ + 2 /* 7+1 command bits */ + 8 /* EOD offset bits */) + 7) >> 3; +} + +/** + * Compress memory + * + * @param pInputData pointer to input(source) data to compress + * @param pOutBuffer buffer for compressed data + * @param nInputSize input(source) size in bytes + * @param nMaxOutBufferSize maximum capacity of compression buffer + * @param nFlags compression flags (set to 0) + * @param nMaxWindowSize maximum window size to use (0 for default) + * @param nDictionarySize size of dictionary in front of input data (0 for none) + * @param progress progress function, called after compressing each block, or NULL for none + * @param pStats pointer to compression stats that are filled if this function is successful, or NULL + * + * @return actual compressed size, or -1 for error + */ +size_t apultra_compress(const unsigned char *pInputData, unsigned char *pOutBuffer, size_t nInputSize, size_t nMaxOutBufferSize, + const unsigned int nFlags, size_t nMaxWindowSize, size_t nDictionarySize, void(*progress)(long long nOriginalSize, long long nCompressedSize), apultra_stats *pStats) { + apultra_compressor compressor; + size_t nOriginalSize = 0; + size_t nCompressedSize = 0L; + int nResult; + int nMaxArrivals = NARRIVALS_PER_POSITION_SMALL; + int nError = 0; + const int nBlockSize = (nInputSize < BLOCK_SIZE) ? ((nInputSize < 1024) ? 1024 : (int)nInputSize) : BLOCK_SIZE; + const int nMaxOutBlockSize = (int)apultra_get_max_compressed_size(nBlockSize); + + if (nDictionarySize < nInputSize) { + int nInDataSize = (int)(nInputSize - nDictionarySize); + if (nInDataSize > nBlockSize) + nInDataSize = nBlockSize; + + if (nInDataSize > 0 && (nDictionarySize + nInDataSize) >= nInputSize) { + if (nInputSize <= 262144) + nMaxArrivals = NARRIVALS_PER_POSITION_MAX; + else + nMaxArrivals = NARRIVALS_PER_POSITION_NORMAL; + } + } + + nResult = apultra_compressor_init(&compressor, nBlockSize, nBlockSize * 2, nMaxArrivals, nFlags); + if (nResult != 0) { + return -1; + } + + compressor.max_offset = nMaxWindowSize ? (int)nMaxWindowSize : MAX_OFFSET; + + int nPreviousBlockSize = 0; + int nNumBlocks = 0; + int nCurBitsOffset = INT_MIN, nCurBitShift = 0, nCurFollowsLiteral = 0; + int nBlockFlags = 1; + int nCurRepMatchOffset = 0; + + if (nDictionarySize) { + nOriginalSize = (int)nDictionarySize; + nPreviousBlockSize = (int)nDictionarySize; + } + + while (nOriginalSize < nInputSize && !nError) { + int nInDataSize; + + nInDataSize = (int)(nInputSize - nOriginalSize); + if (nInDataSize > nBlockSize) + nInDataSize = nBlockSize; + + if (nInDataSize > 0) { + int nOutDataSize; + int nOutDataEnd = (int)(nMaxOutBufferSize - nCompressedSize); + + if (nOutDataEnd > nMaxOutBlockSize) + nOutDataEnd = nMaxOutBlockSize; + + if ((nOriginalSize + nInDataSize) >= nInputSize) + nBlockFlags |= 2; + nOutDataSize = apultra_compressor_shrink_block(&compressor, pInputData + nOriginalSize - nPreviousBlockSize, nPreviousBlockSize, nInDataSize, pOutBuffer + nCompressedSize, nOutDataEnd, + &nCurBitsOffset, &nCurBitShift, &nCurFollowsLiteral, &nCurRepMatchOffset, nBlockFlags); + nBlockFlags &= (~1); + + if (nOutDataSize >= 0) { + /* Write compressed block */ + + if (!nError) { + nOriginalSize += nInDataSize; + nCompressedSize += nOutDataSize; + if (nCurBitsOffset != INT_MIN) + nCurBitsOffset -= nOutDataSize; + } + } + else { + nError = -1; + } + + nPreviousBlockSize = nInDataSize; + nNumBlocks++; + } + + if (!nError && nOriginalSize < nInputSize) { + if (progress) + progress(nOriginalSize, nCompressedSize); + } + } + + if (progress) + progress(nOriginalSize, nCompressedSize); + if (pStats) + *pStats = compressor.stats; + + apultra_compressor_destroy(&compressor); + + if (nError) { + return -1; + } + else { + return nCompressedSize; + } +} diff --git a/tools/z64compress/src/enc/apultra/shrink.h b/tools/z64compress/src/enc/apultra/shrink.h new file mode 100644 index 000000000..bd905936f --- /dev/null +++ b/tools/z64compress/src/enc/apultra/shrink.h @@ -0,0 +1,174 @@ +/* + * shrink.h - compressor definitions + * + * Copyright (C) 2019 Emmanuel Marty + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori + * + * Inspired by cap by Sven-Åke Dahl. https://github.com/svendahl/cap + * Also inspired by Charles Bloom's compression blog. http://cbloomrants.blogspot.com/ + * With ideas from LZ4 by Yann Collet. https://github.com/lz4/lz4 + * With help and support from spke + * + */ + +#ifndef _SHRINK_H +#define _SHRINK_H + +#include "divsufsort.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define LCP_BITS 15 +#define TAG_BITS 4 +#define LCP_MAX ((1U<<(LCP_BITS - TAG_BITS)) - 1) +#define LCP_AND_TAG_MAX ((1U< + * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard + * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/ + * + */ + +#include +#include +//#include "shrink_context.h" +//#include "shrink_block.h" +#include "format.h" +#include "matchfinder.h" +//#include "lib.h" diff --git a/tools/z64compress/src/enc/apultra/sssort.c b/tools/z64compress/src/enc/apultra/sssort.c new file mode 100644 index 000000000..4a18fd2ab --- /dev/null +++ b/tools/z64compress/src/enc/apultra/sssort.c @@ -0,0 +1,815 @@ +/* + * sssort.c for libdivsufsort + * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "divsufsort_private.h" + + +/*- Private Functions -*/ + +static const saint_t lg_table[256]= { + -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, + 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7 +}; + +#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) + +static INLINE +saint_t +ss_ilg(saidx_t n) { +#if SS_BLOCKSIZE == 0 +# if defined(BUILD_DIVSUFSORT64) + return (n >> 32) ? + ((n >> 48) ? + ((n >> 56) ? + 56 + lg_table[(n >> 56) & 0xff] : + 48 + lg_table[(n >> 48) & 0xff]) : + ((n >> 40) ? + 40 + lg_table[(n >> 40) & 0xff] : + 32 + lg_table[(n >> 32) & 0xff])) : + ((n & 0xffff0000) ? + ((n & 0xff000000) ? + 24 + lg_table[(n >> 24) & 0xff] : + 16 + lg_table[(n >> 16) & 0xff]) : + ((n & 0x0000ff00) ? + 8 + lg_table[(n >> 8) & 0xff] : + 0 + lg_table[(n >> 0) & 0xff])); +# else + return (n & 0xffff0000) ? + ((n & 0xff000000) ? + 24 + lg_table[(n >> 24) & 0xff] : + 16 + lg_table[(n >> 16) & 0xff]) : + ((n & 0x0000ff00) ? + 8 + lg_table[(n >> 8) & 0xff] : + 0 + lg_table[(n >> 0) & 0xff]); +# endif +#elif SS_BLOCKSIZE < 256 + return lg_table[n]; +#else + return (n & 0xff00) ? + 8 + lg_table[(n >> 8) & 0xff] : + 0 + lg_table[(n >> 0) & 0xff]; +#endif +} + +#endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */ + +#if SS_BLOCKSIZE != 0 + +static const saint_t sqq_table[256] = { + 0, 16, 22, 27, 32, 35, 39, 42, 45, 48, 50, 53, 55, 57, 59, 61, + 64, 65, 67, 69, 71, 73, 75, 76, 78, 80, 81, 83, 84, 86, 87, 89, + 90, 91, 93, 94, 96, 97, 98, 99, 101, 102, 103, 104, 106, 107, 108, 109, +110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, +128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, +143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155, +156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168, +169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180, +181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191, +192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201, +202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211, +212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221, +221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230, +230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238, +239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247, +247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255 +}; + +static INLINE +saidx_t +ss_isqrt(saidx_t x) { + saidx_t y, e; + + if(x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) { return SS_BLOCKSIZE; } + e = (x & 0xffff0000) ? + ((x & 0xff000000) ? + 24 + lg_table[(x >> 24) & 0xff] : + 16 + lg_table[(x >> 16) & 0xff]) : + ((x & 0x0000ff00) ? + 8 + lg_table[(x >> 8) & 0xff] : + 0 + lg_table[(x >> 0) & 0xff]); + + if(e >= 16) { + y = sqq_table[x >> ((e - 6) - (e & 1))] << ((e >> 1) - 7); + if(e >= 24) { y = (y + 1 + x / y) >> 1; } + y = (y + 1 + x / y) >> 1; + } else if(e >= 8) { + y = (sqq_table[x >> ((e - 6) - (e & 1))] >> (7 - (e >> 1))) + 1; + } else { + return sqq_table[x] >> 4; + } + + return (x < (y * y)) ? y - 1 : y; +} + +#endif /* SS_BLOCKSIZE != 0 */ + + +/*---------------------------------------------------------------------------*/ + +/* Compares two suffixes. */ +static INLINE +saint_t +ss_compare(const sauchar_t *T, + const saidx_t *p1, const saidx_t *p2, + saidx_t depth) { + const sauchar_t *U1, *U2, *U1n, *U2n; + + for(U1 = T + depth + *p1, + U2 = T + depth + *p2, + U1n = T + *(p1 + 1) + 2, + U2n = T + *(p2 + 1) + 2; + (U1 < U1n) && (U2 < U2n) && (*U1 == *U2); + ++U1, ++U2) { + } + + return U1 < U1n ? + (U2 < U2n ? *U1 - *U2 : 1) : + (U2 < U2n ? -1 : 0); +} + + +/*---------------------------------------------------------------------------*/ + +#if (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) + +/* Insertionsort for small size groups */ +static +void +ss_insertionsort(const sauchar_t *T, const saidx_t *PA, + saidx_t *first, saidx_t *last, saidx_t depth) { + saidx_t *i, *j; + saidx_t t; + saint_t r; + + for(i = last - 2; first <= i; --i) { + for(t = *i, j = i + 1; 0 < (r = ss_compare(T, PA + t, PA + *j, depth));) { + do { *(j - 1) = *j; } while((++j < last) && (*j < 0)); + if(last <= j) { break; } + } + if(r == 0) { *j = ~*j; } + *(j - 1) = t; + } +} + +#endif /* (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) */ + + +/*---------------------------------------------------------------------------*/ + +#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) + +static INLINE +void +ss_fixdown(const sauchar_t *Td, const saidx_t *PA, + saidx_t *SA, saidx_t i, saidx_t size) { + saidx_t j, k; + saidx_t v; + saint_t c, d, e; + + for(v = SA[i], c = Td[PA[v]]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) { + d = Td[PA[SA[k = j++]]]; + if(d < (e = Td[PA[SA[j]]])) { k = j; d = e; } + if(d <= c) { break; } + } + SA[i] = v; +} + +/* Simple top-down heapsort. */ +static +void +ss_heapsort(const sauchar_t *Td, const saidx_t *PA, saidx_t *SA, saidx_t size) { + saidx_t i, m; + saidx_t t; + + m = size; + if((size % 2) == 0) { + m--; + if(Td[PA[SA[m / 2]]] < Td[PA[SA[m]]]) { SWAP(SA[m], SA[m / 2]); } + } + + for(i = m / 2 - 1; 0 <= i; --i) { ss_fixdown(Td, PA, SA, i, m); } + if((size % 2) == 0) { SWAP(SA[0], SA[m]); ss_fixdown(Td, PA, SA, 0, m); } + for(i = m - 1; 0 < i; --i) { + t = SA[0], SA[0] = SA[i]; + ss_fixdown(Td, PA, SA, 0, i); + SA[i] = t; + } +} + + +/*---------------------------------------------------------------------------*/ + +/* Returns the median of three elements. */ +static INLINE +saidx_t * +ss_median3(const sauchar_t *Td, const saidx_t *PA, + saidx_t *v1, saidx_t *v2, saidx_t *v3) { + saidx_t *t; + if(Td[PA[*v1]] > Td[PA[*v2]]) { SWAP(v1, v2); } + if(Td[PA[*v2]] > Td[PA[*v3]]) { + if(Td[PA[*v1]] > Td[PA[*v3]]) { return v1; } + else { return v3; } + } + return v2; +} + +/* Returns the median of five elements. */ +static INLINE +saidx_t * +ss_median5(const sauchar_t *Td, const saidx_t *PA, + saidx_t *v1, saidx_t *v2, saidx_t *v3, saidx_t *v4, saidx_t *v5) { + saidx_t *t; + if(Td[PA[*v2]] > Td[PA[*v3]]) { SWAP(v2, v3); } + if(Td[PA[*v4]] > Td[PA[*v5]]) { SWAP(v4, v5); } + if(Td[PA[*v2]] > Td[PA[*v4]]) { SWAP(v2, v4); SWAP(v3, v5); } + if(Td[PA[*v1]] > Td[PA[*v3]]) { SWAP(v1, v3); } + if(Td[PA[*v1]] > Td[PA[*v4]]) { SWAP(v1, v4); SWAP(v3, v5); } + if(Td[PA[*v3]] > Td[PA[*v4]]) { return v4; } + return v3; +} + +/* Returns the pivot element. */ +static INLINE +saidx_t * +ss_pivot(const sauchar_t *Td, const saidx_t *PA, saidx_t *first, saidx_t *last) { + saidx_t *middle; + saidx_t t; + + t = last - first; + middle = first + t / 2; + + if(t <= 512) { + if(t <= 32) { + return ss_median3(Td, PA, first, middle, last - 1); + } else { + t >>= 2; + return ss_median5(Td, PA, first, first + t, middle, last - 1 - t, last - 1); + } + } + t >>= 3; + first = ss_median3(Td, PA, first, first + t, first + (t << 1)); + middle = ss_median3(Td, PA, middle - t, middle, middle + t); + last = ss_median3(Td, PA, last - 1 - (t << 1), last - 1 - t, last - 1); + return ss_median3(Td, PA, first, middle, last); +} + + +/*---------------------------------------------------------------------------*/ + +/* Binary partition for substrings. */ +static INLINE +saidx_t * +ss_partition(const saidx_t *PA, + saidx_t *first, saidx_t *last, saidx_t depth) { + saidx_t *a, *b; + saidx_t t; + for(a = first - 1, b = last;;) { + for(; (++a < b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1));) { *a = ~*a; } + for(; (a < --b) && ((PA[*b] + depth) < (PA[*b + 1] + 1));) { } + if(b <= a) { break; } + t = ~*b; + *b = *a; + *a = t; + } + if(first < a) { *first = ~*first; } + return a; +} + +/* Multikey introsort for medium size groups. */ +static +void +ss_mintrosort(const sauchar_t *T, const saidx_t *PA, + saidx_t *first, saidx_t *last, + saidx_t depth) { +#define STACK_SIZE SS_MISORT_STACKSIZE + struct { saidx_t *a, *b, c; saint_t d; } stack[STACK_SIZE]; + const sauchar_t *Td; + saidx_t *a, *b, *c, *d, *e, *f; + saidx_t s, t; + saint_t ssize; + saint_t limit; + saint_t v, x = 0; + + for(ssize = 0, limit = ss_ilg(last - first);;) { + + if((last - first) <= SS_INSERTIONSORT_THRESHOLD) { +#if 1 < SS_INSERTIONSORT_THRESHOLD + if(1 < (last - first)) { ss_insertionsort(T, PA, first, last, depth); } +#endif + STACK_POP(first, last, depth, limit); + continue; + } + + Td = T + depth; + if(limit-- == 0) { ss_heapsort(Td, PA, first, last - first); } + if(limit < 0) { + for(a = first + 1, v = Td[PA[*first]]; a < last; ++a) { + if((x = Td[PA[*a]]) != v) { + if(1 < (a - first)) { break; } + v = x; + first = a; + } + } + if(Td[PA[*first] - 1] < v) { + first = ss_partition(PA, first, a, depth); + } + if((a - first) <= (last - a)) { + if(1 < (a - first)) { + STACK_PUSH(a, last, depth, -1); + last = a, depth += 1, limit = ss_ilg(a - first); + } else { + first = a, limit = -1; + } + } else { + if(1 < (last - a)) { + STACK_PUSH(first, a, depth + 1, ss_ilg(a - first)); + first = a, limit = -1; + } else { + last = a, depth += 1, limit = ss_ilg(a - first); + } + } + continue; + } + + /* choose pivot */ + a = ss_pivot(Td, PA, first, last); + v = Td[PA[*a]]; + SWAP(*first, *a); + + /* partition */ + for(b = first; (++b < last) && ((x = Td[PA[*b]]) == v);) { } + if(((a = b) < last) && (x < v)) { + for(; (++b < last) && ((x = Td[PA[*b]]) <= v);) { + if(x == v) { SWAP(*b, *a); ++a; } + } + } + for(c = last; (b < --c) && ((x = Td[PA[*c]]) == v);) { } + if((b < (d = c)) && (x > v)) { + for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) { + if(x == v) { SWAP(*c, *d); --d; } + } + } + for(; b < c;) { + SWAP(*b, *c); + for(; (++b < c) && ((x = Td[PA[*b]]) <= v);) { + if(x == v) { SWAP(*b, *a); ++a; } + } + for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) { + if(x == v) { SWAP(*c, *d); --d; } + } + } + + if(a <= d) { + c = b - 1; + + if((s = a - first) > (t = b - a)) { s = t; } + for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } + if((s = d - c) > (t = last - d - 1)) { s = t; } + for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } + + a = first + (b - a), c = last - (d - c); + b = (v <= Td[PA[*a] - 1]) ? a : ss_partition(PA, a, c, depth); + + if((a - first) <= (last - c)) { + if((last - c) <= (c - b)) { + STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); + STACK_PUSH(c, last, depth, limit); + last = a; + } else if((a - first) <= (c - b)) { + STACK_PUSH(c, last, depth, limit); + STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); + last = a; + } else { + STACK_PUSH(c, last, depth, limit); + STACK_PUSH(first, a, depth, limit); + first = b, last = c, depth += 1, limit = ss_ilg(c - b); + } + } else { + if((a - first) <= (c - b)) { + STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); + STACK_PUSH(first, a, depth, limit); + first = c; + } else if((last - c) <= (c - b)) { + STACK_PUSH(first, a, depth, limit); + STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); + first = c; + } else { + STACK_PUSH(first, a, depth, limit); + STACK_PUSH(c, last, depth, limit); + first = b, last = c, depth += 1, limit = ss_ilg(c - b); + } + } + } else { + limit += 1; + if(Td[PA[*first] - 1] < v) { + first = ss_partition(PA, first, last, depth); + limit = ss_ilg(last - first); + } + depth += 1; + } + } +#undef STACK_SIZE +} + +#endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */ + + +/*---------------------------------------------------------------------------*/ + +#if SS_BLOCKSIZE != 0 + +static INLINE +void +ss_blockswap(saidx_t *a, saidx_t *b, saidx_t n) { + saidx_t t; + for(; 0 < n; --n, ++a, ++b) { + t = *a, *a = *b, *b = t; + } +} + +static INLINE +void +ss_rotate(saidx_t *first, saidx_t *middle, saidx_t *last) { + saidx_t *a, *b, t; + saidx_t l, r; + l = middle - first, r = last - middle; + for(; (0 < l) && (0 < r);) { + if(l == r) { ss_blockswap(first, middle, l); break; } + if(l < r) { + a = last - 1, b = middle - 1; + t = *a; + do { + *a-- = *b, *b-- = *a; + if(b < first) { + *a = t; + last = a; + if((r -= l + 1) <= l) { break; } + a -= 1, b = middle - 1; + t = *a; + } + } while(1); + } else { + a = first, b = middle; + t = *a; + do { + *a++ = *b, *b++ = *a; + if(last <= b) { + *a = t; + first = a + 1; + if((l -= r + 1) <= r) { break; } + a += 1, b = middle; + t = *a; + } + } while(1); + } + } +} + + +/*---------------------------------------------------------------------------*/ + +static +void +ss_inplacemerge(const sauchar_t *T, const saidx_t *PA, + saidx_t *first, saidx_t *middle, saidx_t *last, + saidx_t depth) { + const saidx_t *p; + saidx_t *a, *b; + saidx_t len, half; + saint_t q, r; + saint_t x; + + for(;;) { + if(*(last - 1) < 0) { x = 1; p = PA + ~*(last - 1); } + else { x = 0; p = PA + *(last - 1); } + for(a = first, len = middle - first, half = len >> 1, r = -1; + 0 < len; + len = half, half >>= 1) { + b = a + half; + q = ss_compare(T, PA + ((0 <= *b) ? *b : ~*b), p, depth); + if(q < 0) { + a = b + 1; + half -= (len & 1) ^ 1; + } else { + r = q; + } + } + if(a < middle) { + if(r == 0) { *a = ~*a; } + ss_rotate(a, middle, last); + last -= middle - a; + middle = a; + if(first == middle) { break; } + } + --last; + if(x != 0) { while(*--last < 0) { } } + if(middle == last) { break; } + } +} + + +/*---------------------------------------------------------------------------*/ + +/* Merge-forward with internal buffer. */ +static +void +ss_mergeforward(const sauchar_t *T, const saidx_t *PA, + saidx_t *first, saidx_t *middle, saidx_t *last, + saidx_t *buf, saidx_t depth) { + saidx_t *a, *b, *c, *bufend; + saidx_t t; + saint_t r; + + bufend = buf + (middle - first) - 1; + ss_blockswap(buf, first, middle - first); + + for(t = *(a = first), b = buf, c = middle;;) { + r = ss_compare(T, PA + *b, PA + *c, depth); + if(r < 0) { + do { + *a++ = *b; + if(bufend <= b) { *bufend = t; return; } + *b++ = *a; + } while(*b < 0); + } else if(r > 0) { + do { + *a++ = *c, *c++ = *a; + if(last <= c) { + while(b < bufend) { *a++ = *b, *b++ = *a; } + *a = *b, *b = t; + return; + } + } while(*c < 0); + } else { + *c = ~*c; + do { + *a++ = *b; + if(bufend <= b) { *bufend = t; return; } + *b++ = *a; + } while(*b < 0); + + do { + *a++ = *c, *c++ = *a; + if(last <= c) { + while(b < bufend) { *a++ = *b, *b++ = *a; } + *a = *b, *b = t; + return; + } + } while(*c < 0); + } + } +} + +/* Merge-backward with internal buffer. */ +static +void +ss_mergebackward(const sauchar_t *T, const saidx_t *PA, + saidx_t *first, saidx_t *middle, saidx_t *last, + saidx_t *buf, saidx_t depth) { + const saidx_t *p1, *p2; + saidx_t *a, *b, *c, *bufend; + saidx_t t; + saint_t r; + saint_t x; + + bufend = buf + (last - middle) - 1; + ss_blockswap(buf, middle, last - middle); + + x = 0; + if(*bufend < 0) { p1 = PA + ~*bufend; x |= 1; } + else { p1 = PA + *bufend; } + if(*(middle - 1) < 0) { p2 = PA + ~*(middle - 1); x |= 2; } + else { p2 = PA + *(middle - 1); } + for(t = *(a = last - 1), b = bufend, c = middle - 1;;) { + r = ss_compare(T, p1, p2, depth); + if(0 < r) { + if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; } + *a-- = *b; + if(b <= buf) { *buf = t; break; } + *b-- = *a; + if(*b < 0) { p1 = PA + ~*b; x |= 1; } + else { p1 = PA + *b; } + } else if(r < 0) { + if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; } + *a-- = *c, *c-- = *a; + if(c < first) { + while(buf < b) { *a-- = *b, *b-- = *a; } + *a = *b, *b = t; + break; + } + if(*c < 0) { p2 = PA + ~*c; x |= 2; } + else { p2 = PA + *c; } + } else { + if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; } + *a-- = ~*b; + if(b <= buf) { *buf = t; break; } + *b-- = *a; + if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; } + *a-- = *c, *c-- = *a; + if(c < first) { + while(buf < b) { *a-- = *b, *b-- = *a; } + *a = *b, *b = t; + break; + } + if(*b < 0) { p1 = PA + ~*b; x |= 1; } + else { p1 = PA + *b; } + if(*c < 0) { p2 = PA + ~*c; x |= 2; } + else { p2 = PA + *c; } + } + } +} + +/* D&C based merge. */ +static +void +ss_swapmerge(const sauchar_t *T, const saidx_t *PA, + saidx_t *first, saidx_t *middle, saidx_t *last, + saidx_t *buf, saidx_t bufsize, saidx_t depth) { +#define STACK_SIZE SS_SMERGE_STACKSIZE +#define GETIDX(a) ((0 <= (a)) ? (a) : (~(a))) +#define MERGE_CHECK(a, b, c)\ + do {\ + if(((c) & 1) ||\ + (((c) & 2) && (ss_compare(T, PA + GETIDX(*((a) - 1)), PA + *(a), depth) == 0))) {\ + *(a) = ~*(a);\ + }\ + if(((c) & 4) && ((ss_compare(T, PA + GETIDX(*((b) - 1)), PA + *(b), depth) == 0))) {\ + *(b) = ~*(b);\ + }\ + } while(0) + struct { saidx_t *a, *b, *c; saint_t d; } stack[STACK_SIZE]; + saidx_t *l, *r, *lm, *rm; + saidx_t m, len, half; + saint_t ssize; + saint_t check, next; + + for(check = 0, ssize = 0;;) { + if((last - middle) <= bufsize) { + if((first < middle) && (middle < last)) { + ss_mergebackward(T, PA, first, middle, last, buf, depth); + } + MERGE_CHECK(first, last, check); + STACK_POP(first, middle, last, check); + continue; + } + + if((middle - first) <= bufsize) { + if(first < middle) { + ss_mergeforward(T, PA, first, middle, last, buf, depth); + } + MERGE_CHECK(first, last, check); + STACK_POP(first, middle, last, check); + continue; + } + + for(m = 0, len = MIN(middle - first, last - middle), half = len >> 1; + 0 < len; + len = half, half >>= 1) { + if(ss_compare(T, PA + GETIDX(*(middle + m + half)), + PA + GETIDX(*(middle - m - half - 1)), depth) < 0) { + m += half + 1; + half -= (len & 1) ^ 1; + } + } + + if(0 < m) { + lm = middle - m, rm = middle + m; + ss_blockswap(lm, middle, m); + l = r = middle, next = 0; + if(rm < last) { + if(*rm < 0) { + *rm = ~*rm; + if(first < lm) { for(; *--l < 0;) { } next |= 4; } + next |= 1; + } else if(first < lm) { + for(; *r < 0; ++r) { } + next |= 2; + } + } + + if((l - first) <= (last - r)) { + STACK_PUSH(r, rm, last, (next & 3) | (check & 4)); + middle = lm, last = l, check = (check & 3) | (next & 4); + } else { + if((next & 2) && (r == middle)) { next ^= 6; } + STACK_PUSH(first, lm, l, (check & 3) | (next & 4)); + first = r, middle = rm, check = (next & 3) | (check & 4); + } + } else { + if(ss_compare(T, PA + GETIDX(*(middle - 1)), PA + *middle, depth) == 0) { + *middle = ~*middle; + } + MERGE_CHECK(first, last, check); + STACK_POP(first, middle, last, check); + } + } +#undef STACK_SIZE +} + +#endif /* SS_BLOCKSIZE != 0 */ + + +/*---------------------------------------------------------------------------*/ + +/*- Function -*/ + +/* Substring sort */ +void +sssort(const sauchar_t *T, const saidx_t *PA, + saidx_t *first, saidx_t *last, + saidx_t *buf, saidx_t bufsize, + saidx_t depth, saidx_t n, saint_t lastsuffix) { + saidx_t *a; +#if SS_BLOCKSIZE != 0 + saidx_t *b, *middle, *curbuf; + saidx_t j, k, curbufsize, limit; +#endif + saidx_t i; + + if(lastsuffix != 0) { ++first; } + +#if SS_BLOCKSIZE == 0 + ss_mintrosort(T, PA, first, last, depth); +#else + if((bufsize < SS_BLOCKSIZE) && + (bufsize < (last - first)) && + (bufsize < (limit = ss_isqrt(last - first)))) { + if(SS_BLOCKSIZE < limit) { limit = SS_BLOCKSIZE; } + buf = middle = last - limit, bufsize = limit; + } else { + middle = last, limit = 0; + } + for(a = first, i = 0; SS_BLOCKSIZE < (middle - a); a += SS_BLOCKSIZE, ++i) { +#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE + ss_mintrosort(T, PA, a, a + SS_BLOCKSIZE, depth); +#elif 1 < SS_BLOCKSIZE + ss_insertionsort(T, PA, a, a + SS_BLOCKSIZE, depth); +#endif + curbufsize = last - (a + SS_BLOCKSIZE); + curbuf = a + SS_BLOCKSIZE; + if(curbufsize <= bufsize) { curbufsize = bufsize, curbuf = buf; } + for(b = a, k = SS_BLOCKSIZE, j = i; j & 1; b -= k, k <<= 1, j >>= 1) { + ss_swapmerge(T, PA, b - k, b, b + k, curbuf, curbufsize, depth); + } + } +#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE + ss_mintrosort(T, PA, a, middle, depth); +#elif 1 < SS_BLOCKSIZE + ss_insertionsort(T, PA, a, middle, depth); +#endif + for(k = SS_BLOCKSIZE; i != 0; k <<= 1, i >>= 1) { + if(i & 1) { + ss_swapmerge(T, PA, a - k, a, middle, buf, bufsize, depth); + a -= k; + } + } + if(limit != 0) { +#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE + ss_mintrosort(T, PA, middle, last, depth); +#elif 1 < SS_BLOCKSIZE + ss_insertionsort(T, PA, middle, last, depth); +#endif + ss_inplacemerge(T, PA, first, middle, last, depth); + } +#endif + + if(lastsuffix != 0) { + /* Insert last type B* suffix. */ + saidx_t PAi[2]; PAi[0] = PA[*(first - 1)], PAi[1] = n - 2; + for(a = first, i = *(first - 1); + (a < last) && ((*a < 0) || (0 < ss_compare(T, &(PAi[0]), PA + *a, depth))); + ++a) { + *(a - 1) = *a; + } + *(a - 1) = i; + } +} diff --git a/tools/z64compress/src/enc/apultra/trsort.c b/tools/z64compress/src/enc/apultra/trsort.c new file mode 100644 index 000000000..6fe3e67ba --- /dev/null +++ b/tools/z64compress/src/enc/apultra/trsort.c @@ -0,0 +1,586 @@ +/* + * trsort.c for libdivsufsort + * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "divsufsort_private.h" + + +/*- Private Functions -*/ + +static const saint_t lg_table[256]= { + -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, + 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7 +}; + +static INLINE +saint_t +tr_ilg(saidx_t n) { +#if defined(BUILD_DIVSUFSORT64) + return (n >> 32) ? + ((n >> 48) ? + ((n >> 56) ? + 56 + lg_table[(n >> 56) & 0xff] : + 48 + lg_table[(n >> 48) & 0xff]) : + ((n >> 40) ? + 40 + lg_table[(n >> 40) & 0xff] : + 32 + lg_table[(n >> 32) & 0xff])) : + ((n & 0xffff0000) ? + ((n & 0xff000000) ? + 24 + lg_table[(n >> 24) & 0xff] : + 16 + lg_table[(n >> 16) & 0xff]) : + ((n & 0x0000ff00) ? + 8 + lg_table[(n >> 8) & 0xff] : + 0 + lg_table[(n >> 0) & 0xff])); +#else + return (n & 0xffff0000) ? + ((n & 0xff000000) ? + 24 + lg_table[(n >> 24) & 0xff] : + 16 + lg_table[(n >> 16) & 0xff]) : + ((n & 0x0000ff00) ? + 8 + lg_table[(n >> 8) & 0xff] : + 0 + lg_table[(n >> 0) & 0xff]); +#endif +} + + +/*---------------------------------------------------------------------------*/ + +/* Simple insertionsort for small size groups. */ +static +void +tr_insertionsort(const saidx_t *ISAd, saidx_t *first, saidx_t *last) { + saidx_t *a, *b; + saidx_t t, r; + + for(a = first + 1; a < last; ++a) { + for(t = *a, b = a - 1; 0 > (r = ISAd[t] - ISAd[*b]);) { + do { *(b + 1) = *b; } while((first <= --b) && (*b < 0)); + if(b < first) { break; } + } + if(r == 0) { *b = ~*b; } + *(b + 1) = t; + } +} + + +/*---------------------------------------------------------------------------*/ + +static INLINE +void +tr_fixdown(const saidx_t *ISAd, saidx_t *SA, saidx_t i, saidx_t size) { + saidx_t j, k; + saidx_t v; + saidx_t c, d, e; + + for(v = SA[i], c = ISAd[v]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) { + d = ISAd[SA[k = j++]]; + if(d < (e = ISAd[SA[j]])) { k = j; d = e; } + if(d <= c) { break; } + } + SA[i] = v; +} + +/* Simple top-down heapsort. */ +static +void +tr_heapsort(const saidx_t *ISAd, saidx_t *SA, saidx_t size) { + saidx_t i, m; + saidx_t t; + + m = size; + if((size % 2) == 0) { + m--; + if(ISAd[SA[m / 2]] < ISAd[SA[m]]) { SWAP(SA[m], SA[m / 2]); } + } + + for(i = m / 2 - 1; 0 <= i; --i) { tr_fixdown(ISAd, SA, i, m); } + if((size % 2) == 0) { SWAP(SA[0], SA[m]); tr_fixdown(ISAd, SA, 0, m); } + for(i = m - 1; 0 < i; --i) { + t = SA[0], SA[0] = SA[i]; + tr_fixdown(ISAd, SA, 0, i); + SA[i] = t; + } +} + + +/*---------------------------------------------------------------------------*/ + +/* Returns the median of three elements. */ +static INLINE +saidx_t * +tr_median3(const saidx_t *ISAd, saidx_t *v1, saidx_t *v2, saidx_t *v3) { + saidx_t *t; + if(ISAd[*v1] > ISAd[*v2]) { SWAP(v1, v2); } + if(ISAd[*v2] > ISAd[*v3]) { + if(ISAd[*v1] > ISAd[*v3]) { return v1; } + else { return v3; } + } + return v2; +} + +/* Returns the median of five elements. */ +static INLINE +saidx_t * +tr_median5(const saidx_t *ISAd, + saidx_t *v1, saidx_t *v2, saidx_t *v3, saidx_t *v4, saidx_t *v5) { + saidx_t *t; + if(ISAd[*v2] > ISAd[*v3]) { SWAP(v2, v3); } + if(ISAd[*v4] > ISAd[*v5]) { SWAP(v4, v5); } + if(ISAd[*v2] > ISAd[*v4]) { SWAP(v2, v4); SWAP(v3, v5); } + if(ISAd[*v1] > ISAd[*v3]) { SWAP(v1, v3); } + if(ISAd[*v1] > ISAd[*v4]) { SWAP(v1, v4); SWAP(v3, v5); } + if(ISAd[*v3] > ISAd[*v4]) { return v4; } + return v3; +} + +/* Returns the pivot element. */ +static INLINE +saidx_t * +tr_pivot(const saidx_t *ISAd, saidx_t *first, saidx_t *last) { + saidx_t *middle; + saidx_t t; + + t = last - first; + middle = first + t / 2; + + if(t <= 512) { + if(t <= 32) { + return tr_median3(ISAd, first, middle, last - 1); + } else { + t >>= 2; + return tr_median5(ISAd, first, first + t, middle, last - 1 - t, last - 1); + } + } + t >>= 3; + first = tr_median3(ISAd, first, first + t, first + (t << 1)); + middle = tr_median3(ISAd, middle - t, middle, middle + t); + last = tr_median3(ISAd, last - 1 - (t << 1), last - 1 - t, last - 1); + return tr_median3(ISAd, first, middle, last); +} + + +/*---------------------------------------------------------------------------*/ + +typedef struct _trbudget_t trbudget_t; +struct _trbudget_t { + saidx_t chance; + saidx_t remain; + saidx_t incval; + saidx_t count; +}; + +static INLINE +void +trbudget_init(trbudget_t *budget, saidx_t chance, saidx_t incval) { + budget->chance = chance; + budget->remain = budget->incval = incval; +} + +static INLINE +saint_t +trbudget_check(trbudget_t *budget, saidx_t size) { + if(size <= budget->remain) { budget->remain -= size; return 1; } + if(budget->chance == 0) { budget->count += size; return 0; } + budget->remain += budget->incval - size; + budget->chance -= 1; + return 1; +} + + +/*---------------------------------------------------------------------------*/ + +static INLINE +void +tr_partition(const saidx_t *ISAd, + saidx_t *first, saidx_t *middle, saidx_t *last, + saidx_t **pa, saidx_t **pb, saidx_t v) { + saidx_t *a, *b, *c, *d, *e, *f; + saidx_t t, s; + saidx_t x = 0; + + for(b = middle - 1; (++b < last) && ((x = ISAd[*b]) == v);) { } + if(((a = b) < last) && (x < v)) { + for(; (++b < last) && ((x = ISAd[*b]) <= v);) { + if(x == v) { SWAP(*b, *a); ++a; } + } + } + for(c = last; (b < --c) && ((x = ISAd[*c]) == v);) { } + if((b < (d = c)) && (x > v)) { + for(; (b < --c) && ((x = ISAd[*c]) >= v);) { + if(x == v) { SWAP(*c, *d); --d; } + } + } + for(; b < c;) { + SWAP(*b, *c); + for(; (++b < c) && ((x = ISAd[*b]) <= v);) { + if(x == v) { SWAP(*b, *a); ++a; } + } + for(; (b < --c) && ((x = ISAd[*c]) >= v);) { + if(x == v) { SWAP(*c, *d); --d; } + } + } + + if(a <= d) { + c = b - 1; + if((s = a - first) > (t = b - a)) { s = t; } + for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } + if((s = d - c) > (t = last - d - 1)) { s = t; } + for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } + first += (b - a), last -= (d - c); + } + *pa = first, *pb = last; +} + +static +void +tr_copy(saidx_t *ISA, const saidx_t *SA, + saidx_t *first, saidx_t *a, saidx_t *b, saidx_t *last, + saidx_t depth) { + /* sort suffixes of middle partition + by using sorted order of suffixes of left and right partition. */ + saidx_t *c, *d, *e; + saidx_t s, v; + + v = b - SA - 1; + for(c = first, d = a - 1; c <= d; ++c) { + if((0 <= (s = *c - depth)) && (ISA[s] == v)) { + *++d = s; + ISA[s] = d - SA; + } + } + for(c = last - 1, e = d + 1, d = b; e < d; --c) { + if((0 <= (s = *c - depth)) && (ISA[s] == v)) { + *--d = s; + ISA[s] = d - SA; + } + } +} + +static +void +tr_partialcopy(saidx_t *ISA, const saidx_t *SA, + saidx_t *first, saidx_t *a, saidx_t *b, saidx_t *last, + saidx_t depth) { + saidx_t *c, *d, *e; + saidx_t s, v; + saidx_t rank, lastrank, newrank = -1; + + v = b - SA - 1; + lastrank = -1; + for(c = first, d = a - 1; c <= d; ++c) { + if((0 <= (s = *c - depth)) && (ISA[s] == v)) { + *++d = s; + rank = ISA[s + depth]; + if(lastrank != rank) { lastrank = rank; newrank = d - SA; } + ISA[s] = newrank; + } + } + + lastrank = -1; + for(e = d; first <= e; --e) { + rank = ISA[*e]; + if(lastrank != rank) { lastrank = rank; newrank = e - SA; } + if(newrank != rank) { ISA[*e] = newrank; } + } + + lastrank = -1; + for(c = last - 1, e = d + 1, d = b; e < d; --c) { + if((0 <= (s = *c - depth)) && (ISA[s] == v)) { + *--d = s; + rank = ISA[s + depth]; + if(lastrank != rank) { lastrank = rank; newrank = d - SA; } + ISA[s] = newrank; + } + } +} + +static +void +tr_introsort(saidx_t *ISA, const saidx_t *ISAd, + saidx_t *SA, saidx_t *first, saidx_t *last, + trbudget_t *budget) { +#define STACK_SIZE TR_STACKSIZE + struct { const saidx_t *a; saidx_t *b, *c; saint_t d, e; }stack[STACK_SIZE]; + saidx_t *a, *b, *c; + saidx_t t; + saidx_t v, x = 0; + saidx_t incr = ISAd - ISA; + saint_t limit, next; + saint_t ssize, trlink = -1; + + for(ssize = 0, limit = tr_ilg(last - first);;) { + + if(limit < 0) { + if(limit == -1) { + /* tandem repeat partition */ + tr_partition(ISAd - incr, first, first, last, &a, &b, last - SA - 1); + + /* update ranks */ + if(a < last) { + for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; } + } + if(b < last) { + for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } + } + + /* push */ + if(1 < (b - a)) { + STACK_PUSH5(NULL, a, b, 0, 0); + STACK_PUSH5(ISAd - incr, first, last, -2, trlink); + trlink = ssize - 2; + } + if((a - first) <= (last - b)) { + if(1 < (a - first)) { + STACK_PUSH5(ISAd, b, last, tr_ilg(last - b), trlink); + last = a, limit = tr_ilg(a - first); + } else if(1 < (last - b)) { + first = b, limit = tr_ilg(last - b); + } else { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } else { + if(1 < (last - b)) { + STACK_PUSH5(ISAd, first, a, tr_ilg(a - first), trlink); + first = b, limit = tr_ilg(last - b); + } else if(1 < (a - first)) { + last = a, limit = tr_ilg(a - first); + } else { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } + } else if(limit == -2) { + /* tandem repeat copy */ + a = stack[--ssize].b, b = stack[ssize].c; + if(stack[ssize].d == 0) { + tr_copy(ISA, SA, first, a, b, last, ISAd - ISA); + } else { + if(0 <= trlink) { stack[trlink].d = -1; } + tr_partialcopy(ISA, SA, first, a, b, last, ISAd - ISA); + } + STACK_POP5(ISAd, first, last, limit, trlink); + } else { + /* sorted partition */ + if(0 <= *first) { + a = first; + do { ISA[*a] = a - SA; } while((++a < last) && (0 <= *a)); + first = a; + } + if(first < last) { + a = first; do { *a = ~*a; } while(*++a < 0); + next = (ISA[*a] != ISAd[*a]) ? tr_ilg(a - first + 1) : -1; + if(++a < last) { for(b = first, v = a - SA - 1; b < a; ++b) { ISA[*b] = v; } } + + /* push */ + if(trbudget_check(budget, a - first)) { + if((a - first) <= (last - a)) { + STACK_PUSH5(ISAd, a, last, -3, trlink); + ISAd += incr, last = a, limit = next; + } else { + if(1 < (last - a)) { + STACK_PUSH5(ISAd + incr, first, a, next, trlink); + first = a, limit = -3; + } else { + ISAd += incr, last = a, limit = next; + } + } + } else { + if(0 <= trlink) { stack[trlink].d = -1; } + if(1 < (last - a)) { + first = a, limit = -3; + } else { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } + } else { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } + continue; + } + + if((last - first) <= TR_INSERTIONSORT_THRESHOLD) { + tr_insertionsort(ISAd, first, last); + limit = -3; + continue; + } + + if(limit-- == 0) { + tr_heapsort(ISAd, first, last - first); + for(a = last - 1; first < a; a = b) { + for(x = ISAd[*a], b = a - 1; (first <= b) && (ISAd[*b] == x); --b) { *b = ~*b; } + } + limit = -3; + continue; + } + + /* choose pivot */ + a = tr_pivot(ISAd, first, last); + SWAP(*first, *a); + v = ISAd[*first]; + + /* partition */ + tr_partition(ISAd, first, first + 1, last, &a, &b, v); + if((last - first) != (b - a)) { + next = (ISA[*a] != v) ? tr_ilg(b - a) : -1; + + /* update ranks */ + for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; } + if(b < last) { for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } } + + /* push */ + if((1 < (b - a)) && (trbudget_check(budget, b - a))) { + if((a - first) <= (last - b)) { + if((last - b) <= (b - a)) { + if(1 < (a - first)) { + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + STACK_PUSH5(ISAd, b, last, limit, trlink); + last = a; + } else if(1 < (last - b)) { + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + first = b; + } else { + ISAd += incr, first = a, last = b, limit = next; + } + } else if((a - first) <= (b - a)) { + if(1 < (a - first)) { + STACK_PUSH5(ISAd, b, last, limit, trlink); + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + last = a; + } else { + STACK_PUSH5(ISAd, b, last, limit, trlink); + ISAd += incr, first = a, last = b, limit = next; + } + } else { + STACK_PUSH5(ISAd, b, last, limit, trlink); + STACK_PUSH5(ISAd, first, a, limit, trlink); + ISAd += incr, first = a, last = b, limit = next; + } + } else { + if((a - first) <= (b - a)) { + if(1 < (last - b)) { + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + STACK_PUSH5(ISAd, first, a, limit, trlink); + first = b; + } else if(1 < (a - first)) { + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + last = a; + } else { + ISAd += incr, first = a, last = b, limit = next; + } + } else if((last - b) <= (b - a)) { + if(1 < (last - b)) { + STACK_PUSH5(ISAd, first, a, limit, trlink); + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + first = b; + } else { + STACK_PUSH5(ISAd, first, a, limit, trlink); + ISAd += incr, first = a, last = b, limit = next; + } + } else { + STACK_PUSH5(ISAd, first, a, limit, trlink); + STACK_PUSH5(ISAd, b, last, limit, trlink); + ISAd += incr, first = a, last = b, limit = next; + } + } + } else { + if((1 < (b - a)) && (0 <= trlink)) { stack[trlink].d = -1; } + if((a - first) <= (last - b)) { + if(1 < (a - first)) { + STACK_PUSH5(ISAd, b, last, limit, trlink); + last = a; + } else if(1 < (last - b)) { + first = b; + } else { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } else { + if(1 < (last - b)) { + STACK_PUSH5(ISAd, first, a, limit, trlink); + first = b; + } else if(1 < (a - first)) { + last = a; + } else { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } + } + } else { + if(trbudget_check(budget, last - first)) { + limit = tr_ilg(last - first), ISAd += incr; + } else { + if(0 <= trlink) { stack[trlink].d = -1; } + STACK_POP5(ISAd, first, last, limit, trlink); + } + } + } +#undef STACK_SIZE +} + + + +/*---------------------------------------------------------------------------*/ + +/*- Function -*/ + +/* Tandem repeat sort */ +void +trsort(saidx_t *ISA, saidx_t *SA, saidx_t n, saidx_t depth) { + saidx_t *ISAd; + saidx_t *first, *last; + trbudget_t budget; + saidx_t t, skip, unsorted; + + trbudget_init(&budget, tr_ilg(n) * 2 / 3, n); +/* trbudget_init(&budget, tr_ilg(n) * 3 / 4, n); */ + for(ISAd = ISA + depth; -n < *SA; ISAd += ISAd - ISA) { + first = SA; + skip = 0; + unsorted = 0; + do { + if((t = *first) < 0) { first -= t; skip += t; } + else { + if(skip != 0) { *(first + skip) = skip; skip = 0; } + last = SA + ISA[t] + 1; + if(1 < (last - first)) { + budget.count = 0; + tr_introsort(ISA, ISAd, SA, first, last, &budget); + if(budget.count != 0) { unsorted += budget.count; } + else { skip = first - last; } + } else if((last - first) == 1) { + skip = -1; + } + first = last; + } + } while(first < (SA + n)); + if(skip != 0) { *(first + skip) = skip; } + if(unsorted == 0) { break; } + } +} diff --git a/tools/z64compress/src/enc/enc.h b/tools/z64compress/src/enc/enc.h new file mode 100644 index 000000000..60bae3b96 --- /dev/null +++ b/tools/z64compress/src/enc/enc.h @@ -0,0 +1,59 @@ +#ifndef Z64COMPRESS_ENC_H_INCLUDED +#define Z64COMPRESS_ENC_H_INCLUDED + +int yazenc( + void *src + , unsigned src_sz + , void *dst + , unsigned *dst_sz + , void *_ctx +); +void *yazCtx_new(void); +void yazCtx_free(void *_ctx); +int yazdec(void *_src, void *_dst, unsigned dstSz, unsigned *srcSz); + +int lzoenc( + void *src + , unsigned src_sz + , void *dst + , unsigned *dst_sz + , void *_ctx +); +void *lzoCtx_new(void); +void lzoCtx_free(void *_ctx); + +int uclenc( + void *src + , unsigned src_sz + , void *dst + , unsigned *dst_sz + , void *_ctx +); + +int zx7enc( + void *src + , unsigned src_sz + , void *dst + , unsigned *dst_sz + , void *_ctx +); + +int +zlibenc( + void *_src + , unsigned src_sz + , void *_dst + , unsigned *dst_sz + , void *_ctx +); + +int aplenc( + void *_src + , unsigned src_sz + , void *_dst + , unsigned *dst_sz + , void *_ctx +); + +#endif /* Z64COMPRESS_ENC_H_INCLUDED */ + diff --git a/tools/z64compress/src/enc/libdeflate/.cirrus.yml b/tools/z64compress/src/enc/libdeflate/.cirrus.yml new file mode 100644 index 000000000..a4f5cad51 --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/.cirrus.yml @@ -0,0 +1,10 @@ +task: + freebsd_instance: + matrix: + - image_family: freebsd-12-3 + - image_family: freebsd-13-0 + install_script: pkg install -y cmake + script: + - cmake -B build -DLIBDEFLATE_BUILD_TESTS=1 + - cmake --build build + - ctest --test-dir build diff --git a/tools/z64compress/src/enc/libdeflate/.github/workflows/ci.yml b/tools/z64compress/src/enc/libdeflate/.github/workflows/ci.yml new file mode 100644 index 000000000..6902e8e16 --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/.github/workflows/ci.yml @@ -0,0 +1,192 @@ +name: CI +on: [pull_request] + +jobs: + x86_64-build-and-test: + name: Build and test (x86_64, ${{ matrix.os }}, ${{ matrix.compiler }}) + strategy: + matrix: + os: [ubuntu-20.04, ubuntu-18.04] + compiler: [gcc, clang] + runs-on: ${{ matrix.os }} + env: + CC: ${{ matrix.compiler }} + steps: + - uses: actions/checkout@v2 + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install -y clang llvm libz-dev valgrind + - run: scripts/run_tests.sh + + other-arch-build-and-test: + name: Build and test (${{ matrix.arch }}, Debian Bullseye, ${{ matrix.compiler }}) + strategy: + matrix: + arch: [armv6, armv7, aarch64, s390x, ppc64le] + compiler: [gcc, clang] + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: uraimo/run-on-arch-action@v2.2.0 + with: + arch: ${{ matrix.arch }} + distro: bullseye + githubToken: ${{ github.token }} + install: | + apt-get update + apt-get install -y build-essential cmake clang llvm libz-dev + run: | + tests=(regular) + if [ ${{matrix.compiler}} = clang ]; then + tests+=(ubsan) + fi + CC=${{matrix.compiler}} scripts/run_tests.sh "${tests[@]}" + + macos-build-and-test: + name: Build and test (macOS) + runs-on: macos-latest + env: + CFLAGS: -Werror -DLIBDEFLATE_ENABLE_ASSERTIONS + steps: + - uses: actions/checkout@v2 + - run: cmake -B build -DLIBDEFLATE_BUILD_TESTS=1 + - run: cmake --build build --verbose + - run: ctest --test-dir build + + windows-msys2-build-and-test: + name: Build and test (Windows, MSYS2, ${{matrix.sys}}) + runs-on: windows-latest + strategy: + matrix: + include: + - { sys: mingw64, env: x86_64 } + - { sys: mingw32, env: i686 } + defaults: + run: + shell: msys2 {0} + env: + CFLAGS: -Werror -DLIBDEFLATE_ENABLE_ASSERTIONS + steps: + - uses: actions/checkout@v2 + - uses: msys2/setup-msys2@v2 + with: + msystem: ${{matrix.sys}} + update: true + install: > + make + mingw-w64-${{matrix.env}}-cc + mingw-w64-${{matrix.env}}-cmake + mingw-w64-${{matrix.env}}-ninja + mingw-w64-${{matrix.env}}-zlib + - run: cmake -B build -G Ninja -DLIBDEFLATE_BUILD_TESTS=1 + - run: cmake --build build --verbose + - run: ctest --test-dir build + + windows-visualstudio-build-and-test: + name: Build and test (Windows, Visual Studio ${{matrix.toolset}}, ${{matrix.platform.vs}}) + strategy: + matrix: + platform: [ {vs: x64, vcpkg: x64-windows}, + {vs: Win32, vcpkg: x86-windows} ] + toolset: [v143, ClangCL] + runs-on: windows-latest + steps: + - uses: actions/checkout@v2 + - uses: microsoft/setup-msbuild@v1.1 + - run: vcpkg install zlib:${{matrix.platform.vcpkg}} + - run: > + echo C:\vcpkg\packages\zlib_${{matrix.platform.vcpkg}}\bin + | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append + - run: > + cmake -B build -G "Visual Studio 17 2022" -T ${{matrix.toolset}} + -A ${{matrix.platform.vs}} -DLIBDEFLATE_BUILD_TESTS=1 + -DCMAKE_C_FLAGS="/W4 /WX /DLIBDEFLATE_ENABLE_ASSERTIONS /IC:\vcpkg\packages\zlib_${{matrix.platform.vcpkg}}\include" + -DZLIB_LIBRARY=C:\vcpkg\packages\zlib_${{matrix.platform.vcpkg}}\lib\zlib.lib + - run: cmake --build build --verbose --config Debug + - run: ctest --test-dir build -C Debug + + windows-visualstudio-build: + name: Build (Windows, Visual Studio ${{matrix.toolset}}, ${{matrix.platform}}) + strategy: + matrix: + platform: [ARM64, ARM] + toolset: [v143, ClangCL] + exclude: # Exclude unsupported combinations + - platform: ARM + toolset: ClangCL + runs-on: windows-latest + steps: + - uses: actions/checkout@v2 + - uses: microsoft/setup-msbuild@v1.1 + - run: > + cmake -B build -G "Visual Studio 17 2022" -T ${{matrix.toolset}} + -A ${{matrix.platform}} -DCMAKE_C_FLAGS="/W4 /WX" + - run: cmake --build build --verbose + + run-clang-static-analyzer: + name: Run clang static analyzer + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install -y clang-tools + - run: scan-build cmake -B build -DLIBDEFLATE_BUILD_TESTS=1 + - run: scan-build cmake --build build --verbose + + run-shellcheck: + name: Run shellcheck + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install -y shellcheck + - name: Run shellcheck + run: shellcheck scripts/*.sh + + cross-compile-for-windows: + name: Cross compile for Windows + runs-on: ubuntu-latest + env: + CFLAGS: -Werror -DLIBDEFLATE_ENABLE_ASSERTIONS + steps: + - uses: actions/checkout@v2 + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install -y gcc-mingw-w64-i686 gcc-mingw-w64-x86-64 libz-mingw-w64-dev + # Unfortunately Ubuntu doesn't have {i686,x86_64}-w64-mingw32-cmake like + # some distros have, so we have to provide our own toolchain files here. + - name: 32-bit build + run: | + scripts/cmake-helper.sh -DLIBDEFLATE_BUILD_TESTS=1 \ + -DCMAKE_TOOLCHAIN_FILE=scripts/toolchain-i686-w64-mingw32.cmake + cmake --build build --verbose + - name: 64-bit build + run: | + scripts/cmake-helper.sh -DLIBDEFLATE_BUILD_TESTS=1 \ + -DCMAKE_TOOLCHAIN_FILE=scripts/toolchain-x86_64-w64-mingw32.cmake + cmake --build build --verbose + + cross-compile-for-android: + name: Cross compile for ${{matrix.abi}} Android on ${{matrix.os}} + strategy: + matrix: + os: [ubuntu-18.04, ubuntu-20.04, macos-latest] + abi: [armeabi-v7a, arm64-v8a, x86, x86_64] + runs-on: ${{matrix.os}} + env: + CFLAGS: -Werror -DLIBDEFLATE_ENABLE_ASSERTIONS + steps: + - uses: actions/checkout@v2 + - run: | + scripts/cmake-helper.sh \ + -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK_LATEST_HOME"/build/cmake/android.toolchain.cmake \ + -DANDROID_ABI=${{matrix.abi}} \ + -DANDROID_PLATFORM=28 \ + -DLIBDEFLATE_BUILD_TESTS=1 + cmake --build build --verbose diff --git a/tools/z64compress/src/enc/libdeflate/.gitignore b/tools/z64compress/src/enc/libdeflate/.gitignore new file mode 100644 index 000000000..3a696efc5 --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/.gitignore @@ -0,0 +1,3 @@ +/build* +cscope* +tags diff --git a/tools/z64compress/src/enc/libdeflate/COPYING b/tools/z64compress/src/enc/libdeflate/COPYING new file mode 100644 index 000000000..1f1b81cd5 --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/COPYING @@ -0,0 +1,21 @@ +Copyright 2016 Eric Biggers + +Permission is hereby granted, free of charge, to any person +obtaining a copy of this software and associated documentation files +(the "Software"), to deal in the Software without restriction, +including without limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of the Software, +and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/tools/z64compress/src/enc/libdeflate/NEWS.md b/tools/z64compress/src/enc/libdeflate/NEWS.md new file mode 100644 index 000000000..497ae2199 --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/NEWS.md @@ -0,0 +1,389 @@ +# libdeflate release notes + +## Version 1.15 + +* libdeflate now uses CMake instead of a plain Makefile. + +* Improved MSVC support. Enabled most architecture-specific code with MSVC, + fixed building with clang in MSVC compatibility mode, and other improvements. + +* When libdeflate is built with MinGW, the static library and import library are + now named using the MinGW convention (`*.a` and `*.dll.a`) instead of the + Visual Studio convention. This affects the official Windows binaries. + +## Version 1.14 + +Significantly improved decompression performance on all platforms. Examples +include (measuring DEFLATE only): + +| Platform | Speedup over v1.13 | +|------------------------------------|--------------------| +| x86_64 (Intel Comet Lake), gcc | 1.287x | +| x86_64 (Intel Comet Lake), clang | 1.437x | +| x86_64 (Intel Ice Lake), gcc | 1.332x | +| x86_64 (Intel Ice Lake), clang | 1.296x | +| x86_64 (Intel Sandy Bridge), gcc | 1.162x | +| x86_64 (Intel Sandy Bridge), clang | 1.092x | +| x86_64 (AMD Zen 2), gcc | 1.263x | +| x86_64 (AMD Zen 2), clang | 1.259x | +| i386 (Intel Comet Lake), gcc | 1.570x | +| i386 (Intel Comet Lake), clang | 1.344x | +| arm64 (Apple M1), clang | 1.306x | +| arm64 (Cortex-A76), clang | 1.355x | +| arm64 (Cortex-A55), clang | 1.190x | +| arm32 (Cortex-A76), clang | 1.665x | +| arm32 (Cortex-A55), clang | 1.283x | + +Thanks to Dougall Johnson (https://dougallj.wordpress.com/) for ideas for many +of the improvements. + +## Version 1.13 + +* Changed the 32-bit Windows build of the library to use the default calling + convention (cdecl) instead of stdcall, reverting a change from libdeflate 1.4. + +* Fixed a couple macOS compatibility issues with the gzip program. + +## Version 1.12 + +This release focuses on improving the performance of the CRC-32 and Adler-32 +checksum algorithms on x86 and ARM (both 32-bit and 64-bit). + +* Build updates: + + * Fixed building libdeflate on Apple platforms. + + * For Visual Studio builds, Visual Studio 2015 or later is now required. + +* CRC-32 algorithm updates: + + * Improved CRC-32 performance on short inputs on x86 and ARM. + + * Improved CRC-32 performance on Apple Silicon Macs by using a 12-way pmull + implementation. Performance on large inputs on M1 is now about 67 GB/s, + compared to 8 GB/s before, or 31 GB/s with the Apple-provided zlib. + + * Improved CRC-32 performance on some other ARM CPUs by reworking the code so + that multiple crc32 instructions can be issued in parallel. + + * Improved CRC-32 performance on some x86 CPUs by increasing the stride length + of the pclmul implementation. + +* Adler-32 algorithm updates: + + * Improved Adler-32 performance on some x86 CPUs by optimizing the AVX-2 + implementation. E.g., performance on Zen 1 improved from 19 to 30 GB/s, and + on Ice Lake from 35 to 41 GB/s (if the AVX-512 implementation is excluded). + + * Removed the AVX-512 implementation of Adler-32 to avoid CPU frequency + downclocking, and because the AVX-2 implementation was made faster. + + * Improved Adler-32 performance on some ARM CPUs by optimizing the NEON + implementation. E.g., Apple M1 improved from about 36 to 52 GB/s. + +## Version 1.11 + +* Library updates: + + * Improved compression performance slightly. + + * Detect arm64 CPU features on Apple platforms, which should improve + performance in some areas such as CRC-32 computation. + +* Program updates: + + * The included `gzip` and `gunzip` programs now support the `-q` option. + + * The included `gunzip` program now passes through non-gzip data when both + the `-f` and `-c` options are used. + +* Build updates: + + * Avoided a build error on arm32 with certain gcc versions, by disabling + building `crc32_arm()` as dynamically-dispatched code when needed. + + * Support building with the LLVM toolchain on Windows. + + * Disabled the use of the "stdcall" ABI in static library builds on Windows. + + * Use the correct `install_name` in macOS builds. + + * Support Haiku builds. + +## Version 1.10 + +* Added an additional check to the decompressor to make it quickly detect + certain bad inputs and not try to generate an unbounded amount of output. + + Note: this was only a problem when decompressing with an unknown output size, + which isn't the recommended use case of libdeflate. However, + `libdeflate-gunzip` has to do this, and it would run out of memory as it would + keep trying to allocate a larger output buffer. + +* Fixed a build error on Solaris. + +* Cleaned up a few things in the compression code. + +## Version 1.9 + +* Made many improvements to the compression algorithms, and rebalanced the + compression levels: + + * Heuristics were implemented which significantly improve the compression + ratio on data where short matches aren't useful, such as DNA sequencing + data. This applies to all compression levels, but primarily to levels 1-9. + + * Level 1 was made much faster, though it often compresses slightly worse than + before (but still better than zlib). + + * Levels 8-9 were also made faster, though they often compress slightly worse + than before (but still better than zlib). On some data, levels 8-9 are much + faster and compress much better than before; this change addressed an issue + where levels 8-9 did poorly on certain files. The algorithm used by levels + 8-9 is now more similar to that of levels 6-7 than to that of levels 10-12. + + * Levels 2-3, 7, and 10-12 were strengthened slightly. + + * Levels 4-6 were also strengthened slightly, but some of this improvement was + traded off to speed them up slightly as well. + + * Levels 1-9 had their per-compressor memory usage greatly reduced. + + As always, compression ratios will vary depending on the input data, and + compression speeds will vary depending on the input data and target platform. + +* `make install` will now install a pkg-config file for libdeflate. + +* The Makefile now supports the `DISABLE_SHARED` parameter to disable building + the shared library. + +* Improved the Android build support in the Makefile. + +## Version 1.8 + +* Added `-t` (test) option to `libdeflate-gunzip`. + +* Unaligned access optimizations are now enabled on WebAssembly builds. + +* Fixed a build error when building with the Intel C Compiler (ICC). + +* Fixed a build error when building with uClibc. + +* libdeflate's CI system has switched from Travis CI to GitHub Actions. + +* Made some improvements to test scripts. + +## Version 1.7 + +* Added support for compression level 0, "no compression". + +* Added an ARM CRC32 instruction accelerated implementation of CRC32. + +* Added support for linking the programs to the shared library version of + libdeflate rather than to the static library version. + +* Made the compression level affect the minimum input size at which compression + is attempted. + +* Fixed undefined behavior in x86 Adler32 implementation. (No miscompilations + were observed in practice.) + +* Fixed undefined behavior in x86 CPU feature code. (No miscompilations were + observed in practice.) + +* Fixed installing shared lib symlink on macOS. + +* Documented third-party bindings. + +* Made a lot of improvements to the testing scripts and the CI configuration + file. + +* Lots of other small improvements and cleanups. + +## Version 1.6 + +* Prevented gcc 10 from miscompiling libdeflate (workaround for + https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94994). + +* Removed workaround for gcc 5 and earlier producing slow code on ARM32. If + this affects you, please upgrade your compiler. + +* New API function: `libdeflate_zlib_decompress_ex()`. It provides the actual + size of the stream that was decompressed, like the gzip and DEFLATE + equivalents. + +* `libdeflate_zlib_decompress()` now accepts trailing bytes after the end of the + stream, like the gzip and DEFLATE equivalents. + +* Added support for custom memory allocators. (New API function: + `libdeflate_set_memory_allocator()`) + +* Added support for building the library in freestanding mode. + +* Building libdeflate no longer requires `CPPFLAGS=-Icommon`. + +## Version 1.5 + +* Fixed up stdcall support on 32-bit Windows: the functions are now exported + using both suffixed and non-suffixed names, and fixed `libdeflate.h` to be + MSVC-compatible again. + +## Version 1.4 + +* The 32-bit Windows build of libdeflate now uses the "stdcall" calling + convention instead of "cdecl". If you're calling `libdeflate.dll` directly + from C or C++, you'll need to recompile your code. If you're calling it from + another language, or calling it indirectly using `LoadLibrary()`, you'll need + to update your code to use the stdcall calling convention. + +* The Makefile now supports building libdeflate as a shared + library (`.dylib`) on macOS. + +* Fixed a bug where support for certain optimizations and optional features + (file access hints and more precise timestamps) was incorrectly omitted when + libdeflate was compiled with `-Werror`. + +* Added `make check` target to the Makefile. + +* Added CI configuration files. + +## Version 1.3 + +* `make install` now supports customizing the directories into which binaries, + headers, and libraries are installed. + +* `make install` now installs into `/usr/local` by default. To change it, use + e.g. `make install PREFIX=/usr`. + +* `make install` now works on more platforms. + +* The Makefile now supports overriding the optimization flags. + +* The compression functions now correctly handle an output data buffer >= 4 GiB + in size, and `gzip` and `gunzip` now correctly handle multi-gigabyte files (if + enough memory is available). + +## Version 1.2 + +* Slight improvements to decompression speed. + +* Added an AVX-512BW implementation of Adler-32. + +* The Makefile now supports a user-specified installation `PREFIX`. + +* Fixed build error with some Visual Studio versions. + +## Version 1.1 + +* Fixed crash in CRC-32 code when the prebuilt libdeflate for 32-bit Windows was + called by a program built with Visual Studio. + +* Improved the worst-case decompression speed of malicious data. + +* Fixed build error when compiling for an ARM processor without hardware + floating point support. + +* Improved performance on the PowerPC64 architecture. + +* Added soname to `libdeflate.so`, to make packaging easier. + +* Added `make install` target to the Makefile. + +* The Makefile now supports user-specified `CPPFLAGS`. + +* The Windows binary releases now include the import library for + `libdeflate.dll`. `libdeflate.lib` is now the import library, and + `libdeflatestatic.lib` is the static library. + +## Version 1.0 + +* Added support for multi-member gzip files. + +* Moved architecture-specific code into subdirectories. If you aren't using the + provided Makefile to build libdeflate, you now need to compile `lib/*.c` and + `lib/*/*.c` instead of just `lib/*.c`. + +* Added an ARM PMULL implementation of CRC-32, which speeds up gzip compression + and decompression on 32-bit and 64-bit ARM processors that have the + Cryptography Extensions. + +* Improved detection of CPU features, resulting in accelerated functions being + used in more cases. This includes: + + * Detect CPU features on 32-bit x86, not just 64-bit as was done previously. + + * Detect CPU features on ARM, both 32 and 64-bit. (Limited to Linux only + currently.) + +## Version 0.8 + +* Build fixes for certain platforms and compilers. + +* libdeflate now produces the same output on all CPU architectures. + +* Improved documentation for building libdeflate on Windows. + +## Version 0.7 + +* Fixed a very rare bug that caused data to be compressed incorrectly. The bug + affected compression levels 7 and below since libdeflate v0.2. Although there + have been no user reports of the bug, and I believe it would have been highly + unlikely to encounter on realistic data, it could occur on data specially + crafted to reproduce it. + +* Fixed a compilation error when building with clang 3.7. + +## Version 0.6 + +* Various improvements to the gzip program's behavior. + +* Faster CRC-32 on AVX-capable processors. + +* Other minor changes. + +## Version 0.5 + +* The CRC-32 checksum algorithm has been optimized with carryless multiplication + instructions for `x86_64` (PCLMUL). This speeds up gzip compression and + decompression. + +* Build fixes for certain platforms and compilers. + +* Added more test programs and scripts. + +* libdeflate is now entirely MIT-licensed. + +## Version 0.4 + +* The Adler-32 checksum algorithm has been optimized with vector instructions + for `x86_64` (SSE2 and AVX2) and ARM (NEON). This speeds up zlib compression + and decompression. + +* To avoid naming collisions, functions and definitions in libdeflate's API have + been renamed to be prefixed with `libdeflate_` or `LIBDEFLATE_`. Programs + using the old API will need to be updated. + +* Various bug fixes and other improvements. + +## Version 0.3 + +* Some bug fixes and other minor changes. + +## Version 0.2 + +* Implemented a new block splitting algorithm which typically improves the + compression ratio slightly at all compression levels. + +* The compressor now outputs each block using the cheapest type (dynamic + Huffman, static Huffman, or uncompressed). + +* The gzip program has received an overhaul and now behaves more like the + standard version. + +* Build system updates, including: some build options were changed and some + build options were removed, and the default 'make' target now includes the + gzip program as well as the library. + +## Version 0.1 + +* Initial official release. diff --git a/tools/z64compress/src/enc/libdeflate/README.md b/tools/z64compress/src/enc/libdeflate/README.md new file mode 100644 index 000000000..f5bbd93c2 --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/README.md @@ -0,0 +1,204 @@ +# Overview + +libdeflate is a library for fast, whole-buffer DEFLATE-based compression and +decompression. + +The supported formats are: + +- DEFLATE (raw) +- zlib (a.k.a. DEFLATE with a zlib wrapper) +- gzip (a.k.a. DEFLATE with a gzip wrapper) + +libdeflate is heavily optimized. It is significantly faster than the zlib +library, both for compression and decompression, and especially on x86 +processors. In addition, libdeflate provides optional high compression modes +that provide a better compression ratio than the zlib's "level 9". + +libdeflate itself is a library. The following command-line programs which use +this library are also included: + +* `libdeflate-gzip`, a program which can be a drop-in replacement for standard + `gzip` under some circumstances. Note that `libdeflate-gzip` has some + limitations; it is provided for convenience and is **not** meant to be the + main use case of libdeflate. It needs a lot of memory to process large files, + and it omits support for some infrequently-used options of GNU gzip. + +* `benchmark`, a test program that does round-trip compression and decompression + of the provided data, and measures the compression and decompression speed. + It can use libdeflate, zlib, or a combination of the two. + +* `checksum`, a test program that checksums the provided data with Adler-32 or + CRC-32, and optionally measures the speed. It can use libdeflate or zlib. + +For the release notes, see the [NEWS file](NEWS.md). + +## Table of Contents + +- [Building](#building) + - [Using CMake](#using-cmake) + - [Directly integrating the library sources](#directly-integrating-the-library-sources) +- [API](#api) +- [Bindings for other programming languages](#bindings-for-other-programming-languages) +- [DEFLATE vs. zlib vs. gzip](#deflate-vs-zlib-vs-gzip) +- [Compression levels](#compression-levels) +- [Motivation](#motivation) +- [License](#license) + +# Building + +## Using CMake + +libdeflate uses [CMake](https://cmake.org/). It can be built just like any +other CMake project, e.g. with: + + cmake -B build && cmake --build build + +By default the following targets are built: + +- The static library (normally called `libdeflate.a`) +- The shared library (normally called `libdeflate.so`) +- The `libdeflate-gzip` program, including its alias `libdeflate-gunzip` + +Besides the standard CMake build and installation options, there are some +libdeflate-specific build options. See `CMakeLists.txt` for the list of these +options. To set an option, add `-DOPTION=VALUE` to the `cmake` command. + +Prebuilt Windows binaries can be downloaded from +https://github.com/ebiggers/libdeflate/releases. + +## Directly integrating the library sources + +Although the official build system is CMake, care has been taken to keep the +library source files compilable directly, without a prerequisite configuration +step. Therefore, it is also fine to just add the library source files directly +to your application, without using CMake. + +You should compile both `lib/*.c` and `lib/*/*.c`. You don't need to worry +about excluding irrelevant architecture-specific code, as this is already +handled in the source files themselves using `#ifdef`s. + +It is strongly recommended to use either gcc or clang, and to use `-O2`. + +If you are doing a freestanding build with `-ffreestanding`, you must add +`-DFREESTANDING` as well (matching what the `CMakeLists.txt` does). + +# API + +libdeflate has a simple API that is not zlib-compatible. You can create +compressors and decompressors and use them to compress or decompress buffers. +See libdeflate.h for details. + +There is currently no support for streaming. This has been considered, but it +always significantly increases complexity and slows down fast paths. +Unfortunately, at this point it remains a future TODO. So: if your application +compresses data in "chunks", say, less than 1 MB in size, then libdeflate is a +great choice for you; that's what it's designed to do. This is perfect for +certain use cases such as transparent filesystem compression. But if your +application compresses large files as a single compressed stream, similarly to +the `gzip` program, then libdeflate isn't for you. + +Note that with chunk-based compression, you generally should have the +uncompressed size of each chunk stored outside of the compressed data itself. +This enables you to allocate an output buffer of the correct size without +guessing. However, libdeflate's decompression routines do optionally provide +the actual number of output bytes in case you need it. + +Windows developers: note that the calling convention of libdeflate.dll is +"cdecl". (libdeflate v1.4 through v1.12 used "stdcall" instead.) + +# Bindings for other programming languages + +The libdeflate project itself only provides a C library. If you need to use +libdeflate from a programming language other than C or C++, consider using the +following bindings: + +* C#: [LibDeflate.NET](https://github.com/jzebedee/LibDeflate.NET) +* Go: [go-libdeflate](https://github.com/4kills/go-libdeflate) +* Java: [libdeflate-java](https://github.com/astei/libdeflate-java) +* Julia: [LibDeflate.jl](https://github.com/jakobnissen/LibDeflate.jl) +* Perl: [Gzip::Libdeflate](https://github.com/benkasminbullock/gzip-libdeflate) +* Python: [deflate](https://github.com/dcwatson/deflate) +* Ruby: [libdeflate-ruby](https://github.com/kaorimatz/libdeflate-ruby) +* Rust: [libdeflater](https://github.com/adamkewley/libdeflater) + +Note: these are third-party projects which haven't necessarily been vetted by +the authors of libdeflate. Please direct all questions, bugs, and improvements +for these bindings to their authors. + +# DEFLATE vs. zlib vs. gzip + +The DEFLATE format ([rfc1951](https://www.ietf.org/rfc/rfc1951.txt)), the zlib +format ([rfc1950](https://www.ietf.org/rfc/rfc1950.txt)), and the gzip format +([rfc1952](https://www.ietf.org/rfc/rfc1952.txt)) are commonly confused with +each other as well as with the [zlib software library](http://zlib.net), which +actually supports all three formats. libdeflate (this library) also supports +all three formats. + +Briefly, DEFLATE is a raw compressed stream, whereas zlib and gzip are different +wrappers for this stream. Both zlib and gzip include checksums, but gzip can +include extra information such as the original filename. Generally, you should +choose a format as follows: + +- If you are compressing whole files with no subdivisions, similar to the `gzip` + program, you probably should use the gzip format. +- Otherwise, if you don't need the features of the gzip header and footer but do + still want a checksum for corruption detection, you probably should use the + zlib format. +- Otherwise, you probably should use raw DEFLATE. This is ideal if you don't + need checksums, e.g. because they're simply not needed for your use case or + because you already compute your own checksums that are stored separately from + the compressed stream. + +Note that gzip and zlib streams can be distinguished from each other based on +their starting bytes, but this is not necessarily true of raw DEFLATE streams. + +# Compression levels + +An often-underappreciated fact of compression formats such as DEFLATE is that +there are an enormous number of different ways that a given input could be +compressed. Different algorithms and different amounts of computation time will +result in different compression ratios, while remaining equally compatible with +the decompressor. + +For this reason, the commonly used zlib library provides nine compression +levels. Level 1 is the fastest but provides the worst compression; level 9 +provides the best compression but is the slowest. It defaults to level 6. +libdeflate uses this same design but is designed to improve on both zlib's +performance *and* compression ratio at every compression level. In addition, +libdeflate's levels go [up to 12](https://xkcd.com/670/) to make room for a +minimum-cost-path based algorithm (sometimes called "optimal parsing") that can +significantly improve on zlib's compression ratio. + +If you are using DEFLATE (or zlib, or gzip) in your application, you should test +different levels to see which works best for your application. + +# Motivation + +Despite DEFLATE's widespread use mainly through the zlib library, in the +compression community this format from the early 1990s is often considered +obsolete. And in a few significant ways, it is. + +So why implement DEFLATE at all, instead of focusing entirely on +bzip2/LZMA/xz/LZ4/LZX/ZSTD/Brotli/LZHAM/LZFSE/[insert cool new format here]? + +To do something better, you need to understand what came before. And it turns +out that most ideas from DEFLATE are still relevant. Many of the newer formats +share a similar structure as DEFLATE, with different tweaks. The effects of +trivial but very useful tweaks, such as increasing the sliding window size, are +often confused with the effects of nontrivial but less useful tweaks. And +actually, many of these formats are similar enough that common algorithms and +optimizations (e.g. those dealing with LZ77 matchfinding) can be reused. + +In addition, comparing compressors fairly is difficult because the performance +of a compressor depends heavily on optimizations which are not intrinsic to the +compression format itself. In this respect, the zlib library sometimes compares +poorly to certain newer code because zlib is not well optimized for modern +processors. libdeflate addresses this by providing an optimized DEFLATE +implementation which can be used for benchmarking purposes. And, of course, +real applications can use it as well. + +# License + +libdeflate is [MIT-licensed](COPYING). + +I am not aware of any patents or patent applications relevant to libdeflate. diff --git a/tools/z64compress/src/enc/libdeflate/common_defs.h b/tools/z64compress/src/enc/libdeflate/common_defs.h new file mode 100644 index 000000000..debdc7d41 --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/common_defs.h @@ -0,0 +1,716 @@ +/* + * common_defs.h + * + * Copyright 2016 Eric Biggers + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef COMMON_DEFS_H +#define COMMON_DEFS_H + +#include +#include /* for size_t */ +#include +#ifdef _MSC_VER +# include /* for _BitScan*() and other intrinsics */ +# include /* for _byteswap_*() */ + /* Disable MSVC warnings that are expected. */ + /* /W2 */ +# pragma warning(disable : 4146) /* unary minus on unsigned type */ + /* /W3 */ +# pragma warning(disable : 4018) /* signed/unsigned mismatch */ +# pragma warning(disable : 4244) /* possible loss of data */ +# pragma warning(disable : 4267) /* possible loss of precision */ +# pragma warning(disable : 4310) /* cast truncates constant value */ + /* /W4 */ +# pragma warning(disable : 4100) /* unreferenced formal parameter */ +# pragma warning(disable : 4127) /* conditional expression is constant */ +# pragma warning(disable : 4189) /* local variable initialized but not referenced */ +# pragma warning(disable : 4232) /* nonstandard extension used */ +# pragma warning(disable : 4245) /* conversion from 'int' to 'unsigned int' */ +# pragma warning(disable : 4295) /* array too small to include terminating null */ +#endif +#ifndef FREESTANDING +# include /* for memcpy() */ +#endif + +/* ========================================================================== */ +/* Target architecture */ +/* ========================================================================== */ + +/* If possible, define a compiler-independent ARCH_* macro. */ +#undef ARCH_X86_64 +#undef ARCH_X86_32 +#undef ARCH_ARM64 +#undef ARCH_ARM32 +#ifdef _MSC_VER +# if defined(_M_X64) +# define ARCH_X86_64 +# elif defined(_M_IX86) +# define ARCH_X86_32 +# elif defined(_M_ARM64) +# define ARCH_ARM64 +# elif defined(_M_ARM) +# define ARCH_ARM32 +# endif +#else +# if defined(__x86_64__) +# define ARCH_X86_64 +# elif defined(__i386__) +# define ARCH_X86_32 +# elif defined(__aarch64__) +# define ARCH_ARM64 +# elif defined(__arm__) +# define ARCH_ARM32 +# endif +#endif + +/* ========================================================================== */ +/* Type definitions */ +/* ========================================================================== */ + +/* Fixed-width integer types */ +typedef uint8_t u8; +typedef uint16_t u16; +typedef uint32_t u32; +typedef uint64_t u64; +typedef int8_t s8; +typedef int16_t s16; +typedef int32_t s32; +typedef int64_t s64; + +/* ssize_t, if not available in */ +#ifdef _MSC_VER +# ifdef _WIN64 + typedef long long ssize_t; +# else + typedef long ssize_t; +# endif +#endif + +/* + * Word type of the target architecture. Use 'size_t' instead of + * 'unsigned long' to account for platforms such as Windows that use 32-bit + * 'unsigned long' on 64-bit architectures. + */ +typedef size_t machine_word_t; + +/* Number of bytes in a word */ +#define WORDBYTES ((int)sizeof(machine_word_t)) + +/* Number of bits in a word */ +#define WORDBITS (8 * WORDBYTES) + +/* ========================================================================== */ +/* Optional compiler features */ +/* ========================================================================== */ + +/* Compiler version checks. Only use when absolutely necessary. */ +#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER) +# define GCC_PREREQ(major, minor) \ + (__GNUC__ > (major) || \ + (__GNUC__ == (major) && __GNUC_MINOR__ >= (minor))) +#else +# define GCC_PREREQ(major, minor) 0 +#endif +#ifdef __clang__ +# ifdef __apple_build_version__ +# define CLANG_PREREQ(major, minor, apple_version) \ + (__apple_build_version__ >= (apple_version)) +# else +# define CLANG_PREREQ(major, minor, apple_version) \ + (__clang_major__ > (major) || \ + (__clang_major__ == (major) && __clang_minor__ >= (minor))) +# endif +#else +# define CLANG_PREREQ(major, minor, apple_version) 0 +#endif + +/* + * Macros to check for compiler support for attributes and builtins. clang + * implements these macros, but gcc doesn't, so generally any use of one of + * these macros must also be combined with a gcc version check. + */ +#ifndef __has_attribute +# define __has_attribute(attribute) 0 +#endif +#ifndef __has_builtin +# define __has_builtin(builtin) 0 +#endif + +/* inline - suggest that a function be inlined */ +#ifdef _MSC_VER +# define inline __inline +#endif /* else assume 'inline' is usable as-is */ + +/* forceinline - force a function to be inlined, if possible */ +#if defined(__GNUC__) || __has_attribute(always_inline) +# define forceinline inline __attribute__((always_inline)) +#elif defined(_MSC_VER) +# define forceinline __forceinline +#else +# define forceinline inline +#endif + +/* MAYBE_UNUSED - mark a function or variable as maybe unused */ +#if defined(__GNUC__) || __has_attribute(unused) +# define MAYBE_UNUSED __attribute__((unused)) +#else +# define MAYBE_UNUSED +#endif + +/* + * restrict - hint that writes only occur through the given pointer. + * + * Don't use MSVC's __restrict, since it has nonstandard behavior. + * Standard restrict is okay, if it is supported. + */ +#if !defined(__STDC_VERSION__) || (__STDC_VERSION__ < 201112L) +# if defined(__GNUC__) || defined(__clang__) +# define restrict __restrict__ +# else +# define restrict +# endif +#endif /* else assume 'restrict' is usable as-is */ + +/* likely(expr) - hint that an expression is usually true */ +#if defined(__GNUC__) || __has_builtin(__builtin_expect) +# define likely(expr) __builtin_expect(!!(expr), 1) +#else +# define likely(expr) (expr) +#endif + +/* unlikely(expr) - hint that an expression is usually false */ +#if defined(__GNUC__) || __has_builtin(__builtin_expect) +# define unlikely(expr) __builtin_expect(!!(expr), 0) +#else +# define unlikely(expr) (expr) +#endif + +/* prefetchr(addr) - prefetch into L1 cache for read */ +#undef prefetchr +#if defined(__GNUC__) || __has_builtin(__builtin_prefetch) +# define prefetchr(addr) __builtin_prefetch((addr), 0) +#elif defined(_MSC_VER) +# if defined(ARCH_X86_32) || defined(ARCH_X86_64) +# define prefetchr(addr) _mm_prefetch((addr), _MM_HINT_T0) +# elif defined(ARCH_ARM64) +# define prefetchr(addr) __prefetch2((addr), 0x00 /* prfop=PLDL1KEEP */) +# elif defined(ARCH_ARM32) +# define prefetchr(addr) __prefetch(addr) +# endif +#endif +#ifndef prefetchr +# define prefetchr(addr) +#endif + +/* prefetchw(addr) - prefetch into L1 cache for write */ +#undef prefetchw +#if defined(__GNUC__) || __has_builtin(__builtin_prefetch) +# define prefetchw(addr) __builtin_prefetch((addr), 1) +#elif defined(_MSC_VER) +# if defined(ARCH_X86_32) || defined(ARCH_X86_64) +# define prefetchw(addr) _m_prefetchw(addr) +# elif defined(ARCH_ARM64) +# define prefetchw(addr) __prefetch2((addr), 0x10 /* prfop=PSTL1KEEP */) +# elif defined(ARCH_ARM32) +# define prefetchw(addr) __prefetchw(addr) +# endif +#endif +#ifndef prefetchw +# define prefetchw(addr) +#endif + +/* + * _aligned_attribute(n) - declare that the annotated variable, or variables of + * the annotated type, must be aligned on n-byte boundaries. + */ +#undef _aligned_attribute +#if defined(__GNUC__) || __has_attribute(aligned) +# define _aligned_attribute(n) __attribute__((aligned(n))) +#elif defined(_MSC_VER) +# define _aligned_attribute(n) __declspec(align(n)) +#endif + +/* + * _target_attribute(attrs) - override the compilation target for a function. + * + * This accepts one or more comma-separated suffixes to the -m prefix jointly + * forming the name of a machine-dependent option. On gcc-like compilers, this + * enables codegen for the given targets, including arbitrary compiler-generated + * code as well as the corresponding intrinsics. On other compilers this macro + * expands to nothing, though MSVC allows intrinsics to be used anywhere anyway. + */ +#if GCC_PREREQ(4, 4) || __has_attribute(target) +# define _target_attribute(attrs) __attribute__((target(attrs))) +# define COMPILER_SUPPORTS_TARGET_FUNCTION_ATTRIBUTE 1 +#else +# define _target_attribute(attrs) +# define COMPILER_SUPPORTS_TARGET_FUNCTION_ATTRIBUTE 0 +#endif + +/* ========================================================================== */ +/* Miscellaneous macros */ +/* ========================================================================== */ + +#define ARRAY_LEN(A) (sizeof(A) / sizeof((A)[0])) +#define MIN(a, b) ((a) <= (b) ? (a) : (b)) +#define MAX(a, b) ((a) >= (b) ? (a) : (b)) +#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) +#define STATIC_ASSERT(expr) ((void)sizeof(char[1 - 2 * !(expr)])) +#define ALIGN(n, a) (((n) + (a) - 1) & ~((a) - 1)) +#define ROUND_UP(n, d) ((d) * DIV_ROUND_UP((n), (d))) + +/* ========================================================================== */ +/* Endianness handling */ +/* ========================================================================== */ + +/* + * CPU_IS_LITTLE_ENDIAN() - 1 if the CPU is little endian, or 0 if it is big + * endian. When possible this is a compile-time macro that can be used in + * preprocessor conditionals. As a fallback, a generic method is used that + * can't be used in preprocessor conditionals but should still be optimized out. + */ +#if defined(__BYTE_ORDER__) /* gcc v4.6+ and clang */ +# define CPU_IS_LITTLE_ENDIAN() (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) +#elif defined(_MSC_VER) +# define CPU_IS_LITTLE_ENDIAN() true +#else +static forceinline bool CPU_IS_LITTLE_ENDIAN(void) +{ + union { + u32 w; + u8 b; + } u; + + u.w = 1; + return u.b; +} +#endif + +/* bswap16(v) - swap the bytes of a 16-bit integer */ +static forceinline u16 bswap16(u16 v) +{ +#if GCC_PREREQ(4, 8) || __has_builtin(__builtin_bswap16) + return __builtin_bswap16(v); +#elif defined(_MSC_VER) + return _byteswap_ushort(v); +#else + return (v << 8) | (v >> 8); +#endif +} + +/* bswap32(v) - swap the bytes of a 32-bit integer */ +static forceinline u32 bswap32(u32 v) +{ +#if GCC_PREREQ(4, 3) || __has_builtin(__builtin_bswap32) + return __builtin_bswap32(v); +#elif defined(_MSC_VER) + return _byteswap_ulong(v); +#else + return ((v & 0x000000FF) << 24) | + ((v & 0x0000FF00) << 8) | + ((v & 0x00FF0000) >> 8) | + ((v & 0xFF000000) >> 24); +#endif +} + +/* bswap64(v) - swap the bytes of a 64-bit integer */ +static forceinline u64 bswap64(u64 v) +{ +#if GCC_PREREQ(4, 3) || __has_builtin(__builtin_bswap64) + return __builtin_bswap64(v); +#elif defined(_MSC_VER) + return _byteswap_uint64(v); +#else + return ((v & 0x00000000000000FF) << 56) | + ((v & 0x000000000000FF00) << 40) | + ((v & 0x0000000000FF0000) << 24) | + ((v & 0x00000000FF000000) << 8) | + ((v & 0x000000FF00000000) >> 8) | + ((v & 0x0000FF0000000000) >> 24) | + ((v & 0x00FF000000000000) >> 40) | + ((v & 0xFF00000000000000) >> 56); +#endif +} + +#define le16_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap16(v)) +#define le32_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap32(v)) +#define le64_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap64(v)) +#define be16_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap16(v) : (v)) +#define be32_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap32(v) : (v)) +#define be64_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap64(v) : (v)) + +/* ========================================================================== */ +/* Unaligned memory accesses */ +/* ========================================================================== */ + +/* + * UNALIGNED_ACCESS_IS_FAST() - 1 if unaligned memory accesses can be performed + * efficiently on the target platform, otherwise 0. + */ +#if (defined(__GNUC__) || defined(__clang__)) && \ + (defined(ARCH_X86_64) || defined(ARCH_X86_32) || \ + defined(__ARM_FEATURE_UNALIGNED) || defined(__powerpc64__) || \ + /* + * For all compilation purposes, WebAssembly behaves like any other CPU + * instruction set. Even though WebAssembly engine might be running on + * top of different actual CPU architectures, the WebAssembly spec + * itself permits unaligned access and it will be fast on most of those + * platforms, and simulated at the engine level on others, so it's + * worth treating it as a CPU architecture with fast unaligned access. + */ defined(__wasm__)) +# define UNALIGNED_ACCESS_IS_FAST 1 +#elif defined(_MSC_VER) +# define UNALIGNED_ACCESS_IS_FAST 1 +#else +# define UNALIGNED_ACCESS_IS_FAST 0 +#endif + +/* + * Implementing unaligned memory accesses using memcpy() is portable, and it + * usually gets optimized appropriately by modern compilers. I.e., each + * memcpy() of 1, 2, 4, or WORDBYTES bytes gets compiled to a load or store + * instruction, not to an actual function call. + * + * We no longer use the "packed struct" approach to unaligned accesses, as that + * is nonstandard, has unclear semantics, and doesn't receive enough testing + * (see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94994). + * + * arm32 with __ARM_FEATURE_UNALIGNED in gcc 5 and earlier is a known exception + * where memcpy() generates inefficient code + * (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67366). However, we no longer + * consider that one case important enough to maintain different code for. + * If you run into it, please just use a newer version of gcc (or use clang). + */ + +#ifdef FREESTANDING +# define MEMCOPY __builtin_memcpy +#else +# define MEMCOPY memcpy +#endif + +/* Unaligned loads and stores without endianness conversion */ + +#define DEFINE_UNALIGNED_TYPE(type) \ +static forceinline type \ +load_##type##_unaligned(const void *p) \ +{ \ + type v; \ + \ + MEMCOPY(&v, p, sizeof(v)); \ + return v; \ +} \ + \ +static forceinline void \ +store_##type##_unaligned(type v, void *p) \ +{ \ + MEMCOPY(p, &v, sizeof(v)); \ +} + +DEFINE_UNALIGNED_TYPE(u16) +DEFINE_UNALIGNED_TYPE(u32) +DEFINE_UNALIGNED_TYPE(u64) +DEFINE_UNALIGNED_TYPE(machine_word_t) + +#undef MEMCOPY + +#define load_word_unaligned load_machine_word_t_unaligned +#define store_word_unaligned store_machine_word_t_unaligned + +/* Unaligned loads with endianness conversion */ + +static forceinline u16 +get_unaligned_le16(const u8 *p) +{ + if (UNALIGNED_ACCESS_IS_FAST) + return le16_bswap(load_u16_unaligned(p)); + else + return ((u16)p[1] << 8) | p[0]; +} + +static forceinline u16 +get_unaligned_be16(const u8 *p) +{ + if (UNALIGNED_ACCESS_IS_FAST) + return be16_bswap(load_u16_unaligned(p)); + else + return ((u16)p[0] << 8) | p[1]; +} + +static forceinline u32 +get_unaligned_le32(const u8 *p) +{ + if (UNALIGNED_ACCESS_IS_FAST) + return le32_bswap(load_u32_unaligned(p)); + else + return ((u32)p[3] << 24) | ((u32)p[2] << 16) | + ((u32)p[1] << 8) | p[0]; +} + +static forceinline u32 +get_unaligned_be32(const u8 *p) +{ + if (UNALIGNED_ACCESS_IS_FAST) + return be32_bswap(load_u32_unaligned(p)); + else + return ((u32)p[0] << 24) | ((u32)p[1] << 16) | + ((u32)p[2] << 8) | p[3]; +} + +static forceinline u64 +get_unaligned_le64(const u8 *p) +{ + if (UNALIGNED_ACCESS_IS_FAST) + return le64_bswap(load_u64_unaligned(p)); + else + return ((u64)p[7] << 56) | ((u64)p[6] << 48) | + ((u64)p[5] << 40) | ((u64)p[4] << 32) | + ((u64)p[3] << 24) | ((u64)p[2] << 16) | + ((u64)p[1] << 8) | p[0]; +} + +static forceinline machine_word_t +get_unaligned_leword(const u8 *p) +{ + STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64); + if (WORDBITS == 32) + return get_unaligned_le32(p); + else + return get_unaligned_le64(p); +} + +/* Unaligned stores with endianness conversion */ + +static forceinline void +put_unaligned_le16(u16 v, u8 *p) +{ + if (UNALIGNED_ACCESS_IS_FAST) { + store_u16_unaligned(le16_bswap(v), p); + } else { + p[0] = (u8)(v >> 0); + p[1] = (u8)(v >> 8); + } +} + +static forceinline void +put_unaligned_be16(u16 v, u8 *p) +{ + if (UNALIGNED_ACCESS_IS_FAST) { + store_u16_unaligned(be16_bswap(v), p); + } else { + p[0] = (u8)(v >> 8); + p[1] = (u8)(v >> 0); + } +} + +static forceinline void +put_unaligned_le32(u32 v, u8 *p) +{ + if (UNALIGNED_ACCESS_IS_FAST) { + store_u32_unaligned(le32_bswap(v), p); + } else { + p[0] = (u8)(v >> 0); + p[1] = (u8)(v >> 8); + p[2] = (u8)(v >> 16); + p[3] = (u8)(v >> 24); + } +} + +static forceinline void +put_unaligned_be32(u32 v, u8 *p) +{ + if (UNALIGNED_ACCESS_IS_FAST) { + store_u32_unaligned(be32_bswap(v), p); + } else { + p[0] = (u8)(v >> 24); + p[1] = (u8)(v >> 16); + p[2] = (u8)(v >> 8); + p[3] = (u8)(v >> 0); + } +} + +static forceinline void +put_unaligned_le64(u64 v, u8 *p) +{ + if (UNALIGNED_ACCESS_IS_FAST) { + store_u64_unaligned(le64_bswap(v), p); + } else { + p[0] = (u8)(v >> 0); + p[1] = (u8)(v >> 8); + p[2] = (u8)(v >> 16); + p[3] = (u8)(v >> 24); + p[4] = (u8)(v >> 32); + p[5] = (u8)(v >> 40); + p[6] = (u8)(v >> 48); + p[7] = (u8)(v >> 56); + } +} + +static forceinline void +put_unaligned_leword(machine_word_t v, u8 *p) +{ + STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64); + if (WORDBITS == 32) + put_unaligned_le32(v, p); + else + put_unaligned_le64(v, p); +} + +/* ========================================================================== */ +/* Bit manipulation functions */ +/* ========================================================================== */ + +/* + * Bit Scan Reverse (BSR) - find the 0-based index (relative to the least + * significant end) of the *most* significant 1 bit in the input value. The + * input value must be nonzero! + */ + +static forceinline unsigned +bsr32(u32 v) +{ +#if defined(__GNUC__) || __has_builtin(__builtin_clz) + return 31 - __builtin_clz(v); +#elif defined(_MSC_VER) + unsigned long i; + + _BitScanReverse(&i, v); + return i; +#else + unsigned i = 0; + + while ((v >>= 1) != 0) + i++; + return i; +#endif +} + +static forceinline unsigned +bsr64(u64 v) +{ +#if defined(__GNUC__) || __has_builtin(__builtin_clzll) + return 63 - __builtin_clzll(v); +#elif defined(_MSC_VER) && defined(_WIN64) + unsigned long i; + + _BitScanReverse64(&i, v); + return i; +#else + unsigned i = 0; + + while ((v >>= 1) != 0) + i++; + return i; +#endif +} + +static forceinline unsigned +bsrw(machine_word_t v) +{ + STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64); + if (WORDBITS == 32) + return bsr32(v); + else + return bsr64(v); +} + +/* + * Bit Scan Forward (BSF) - find the 0-based index (relative to the least + * significant end) of the *least* significant 1 bit in the input value. The + * input value must be nonzero! + */ + +static forceinline unsigned +bsf32(u32 v) +{ +#if defined(__GNUC__) || __has_builtin(__builtin_ctz) + return __builtin_ctz(v); +#elif defined(_MSC_VER) + unsigned long i; + + _BitScanForward(&i, v); + return i; +#else + unsigned i = 0; + + for (; (v & 1) == 0; v >>= 1) + i++; + return i; +#endif +} + +static forceinline unsigned +bsf64(u64 v) +{ +#if defined(__GNUC__) || __has_builtin(__builtin_ctzll) + return __builtin_ctzll(v); +#elif defined(_MSC_VER) && defined(_WIN64) + unsigned long i; + + _BitScanForward64(&i, v); + return i; +#else + unsigned i = 0; + + for (; (v & 1) == 0; v >>= 1) + i++; + return i; +#endif +} + +static forceinline unsigned +bsfw(machine_word_t v) +{ + STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64); + if (WORDBITS == 32) + return bsf32(v); + else + return bsf64(v); +} + +/* + * rbit32(v): reverse the bits in a 32-bit integer. This doesn't have a + * fallback implementation; use '#ifdef rbit32' to check if this is available. + */ +#undef rbit32 +#if (defined(__GNUC__) || defined(__clang__)) && defined(ARCH_ARM32) && \ + (__ARM_ARCH >= 7 || (__ARM_ARCH == 6 && defined(__ARM_ARCH_6T2__))) +static forceinline u32 +rbit32(u32 v) +{ + __asm__("rbit %0, %1" : "=r" (v) : "r" (v)); + return v; +} +#define rbit32 rbit32 +#elif (defined(__GNUC__) || defined(__clang__)) && defined(ARCH_ARM64) +static forceinline u32 +rbit32(u32 v) +{ + __asm__("rbit %w0, %w1" : "=r" (v) : "r" (v)); + return v; +} +#define rbit32 rbit32 +#endif + +#endif /* COMMON_DEFS_H */ diff --git a/tools/z64compress/src/enc/libdeflate/lib/adler32.c b/tools/z64compress/src/enc/libdeflate/lib/adler32.c new file mode 100644 index 000000000..b743c6943 --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/lib/adler32.c @@ -0,0 +1,131 @@ +/* + * adler32.c - Adler-32 checksum algorithm + * + * Copyright 2016 Eric Biggers + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "lib_common.h" +#include "libdeflate.h" + +/* The Adler-32 divisor, or "base", value */ +#define DIVISOR 65521 + +/* + * MAX_CHUNK_LEN is the most bytes that can be processed without the possibility + * of s2 overflowing when it is represented as an unsigned 32-bit integer. This + * value was computed using the following Python script: + * + * divisor = 65521 + * count = 0 + * s1 = divisor - 1 + * s2 = divisor - 1 + * while True: + * s1 += 0xFF + * s2 += s1 + * if s2 > 0xFFFFFFFF: + * break + * count += 1 + * print(count) + * + * Note that to get the correct worst-case value, we must assume that every byte + * has value 0xFF and that s1 and s2 started with the highest possible values + * modulo the divisor. + */ +#define MAX_CHUNK_LEN 5552 + +static u32 MAYBE_UNUSED +adler32_generic(u32 adler, const u8 *p, size_t len) +{ + u32 s1 = adler & 0xFFFF; + u32 s2 = adler >> 16; + const u8 * const end = p + len; + + while (p != end) { + size_t chunk_len = MIN(end - p, MAX_CHUNK_LEN); + const u8 *chunk_end = p + chunk_len; + size_t num_unrolled_iterations = chunk_len / 4; + + while (num_unrolled_iterations--) { + s1 += *p++; + s2 += s1; + s1 += *p++; + s2 += s1; + s1 += *p++; + s2 += s1; + s1 += *p++; + s2 += s1; + } + while (p != chunk_end) { + s1 += *p++; + s2 += s1; + } + s1 %= DIVISOR; + s2 %= DIVISOR; + } + + return (s2 << 16) | s1; +} + +/* Include architecture-specific implementation(s) if available. */ +#undef DEFAULT_IMPL +#undef arch_select_adler32_func +typedef u32 (*adler32_func_t)(u32 adler, const u8 *p, size_t len); +#if defined(ARCH_ARM32) || defined(ARCH_ARM64) +# include "arm/adler32_impl.h" +#elif defined(ARCH_X86_32) || defined(ARCH_X86_64) +# include "x86/adler32_impl.h" +#endif + +#ifndef DEFAULT_IMPL +# define DEFAULT_IMPL adler32_generic +#endif + +#ifdef arch_select_adler32_func +static u32 dispatch_adler32(u32 adler, const u8 *p, size_t len); + +static volatile adler32_func_t adler32_impl = dispatch_adler32; + +/* Choose the best implementation at runtime. */ +static u32 dispatch_adler32(u32 adler, const u8 *p, size_t len) +{ + adler32_func_t f = arch_select_adler32_func(); + + if (f == NULL) + f = DEFAULT_IMPL; + + adler32_impl = f; + return f(adler, p, len); +} +#else +/* The best implementation is statically known, so call it directly. */ +#define adler32_impl DEFAULT_IMPL +#endif + +LIBDEFLATEAPI u32 +libdeflate_adler32(u32 adler, const void *buffer, size_t len) +{ + if (buffer == NULL) /* Return initial value. */ + return 1; + return adler32_impl(adler, buffer, len); +} diff --git a/tools/z64compress/src/enc/libdeflate/lib/adler32_vec_template.h b/tools/z64compress/src/enc/libdeflate/lib/adler32_vec_template.h new file mode 100644 index 000000000..98c086bbc --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/lib/adler32_vec_template.h @@ -0,0 +1,123 @@ +/* + * adler32_vec_template.h - template for vectorized Adler-32 implementations + * + * Copyright 2016 Eric Biggers + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * This file contains a template for vectorized Adler-32 implementations. + * + * The inner loop between reductions modulo 65521 of an unvectorized Adler-32 + * implementation looks something like this: + * + * do { + * s1 += *p; + * s2 += s1; + * } while (++p != chunk_end); + * + * For vectorized calculation of s1, we only need to sum the input bytes. They + * can be accumulated into multiple counters which are eventually summed + * together. + * + * For vectorized calculation of s2, the basic idea is that for each iteration + * that processes N bytes, we can perform the following vectorizable + * calculation: + * + * s2 += N*byte_1 + (N-1)*byte_2 + (N-2)*byte_3 + ... + 1*byte_N + * + * Or, equivalently, we can sum the byte_1...byte_N for each iteration into N + * separate counters, then do the multiplications by N...1 just once at the end + * rather than once per iteration. + * + * Also, we must account for how previous bytes will affect s2 by doing the + * following at beginning of each iteration: + * + * s2 += s1 * N + * + * Furthermore, like s1, "s2" can actually be multiple counters which are + * eventually summed together. + */ + +static u32 ATTRIBUTES MAYBE_UNUSED +FUNCNAME(u32 adler, const u8 *p, size_t len) +{ + const size_t max_chunk_len = + MIN(MAX_CHUNK_LEN, IMPL_MAX_CHUNK_LEN) - + (MIN(MAX_CHUNK_LEN, IMPL_MAX_CHUNK_LEN) % IMPL_SEGMENT_LEN); + u32 s1 = adler & 0xFFFF; + u32 s2 = adler >> 16; + const u8 * const end = p + len; + const u8 *vend; + + /* Process a byte at a time until the needed alignment is reached. */ + if (p != end && (uintptr_t)p % IMPL_ALIGNMENT) { + do { + s1 += *p++; + s2 += s1; + } while (p != end && (uintptr_t)p % IMPL_ALIGNMENT); + s1 %= DIVISOR; + s2 %= DIVISOR; + } + + /* + * Process "chunks" of bytes using vector instructions. Chunk lengths + * are limited to MAX_CHUNK_LEN, which guarantees that s1 and s2 never + * overflow before being reduced modulo DIVISOR. For vector processing, + * chunk lengths are also made evenly divisible by IMPL_SEGMENT_LEN and + * may be further limited to IMPL_MAX_CHUNK_LEN. + */ + STATIC_ASSERT(IMPL_SEGMENT_LEN % IMPL_ALIGNMENT == 0); + vend = end - ((size_t)(end - p) % IMPL_SEGMENT_LEN); + while (p != vend) { + size_t chunk_len = MIN((size_t)(vend - p), max_chunk_len); + + s2 += s1 * chunk_len; + + FUNCNAME_CHUNK((const void *)p, (const void *)(p + chunk_len), + &s1, &s2); + + p += chunk_len; + s1 %= DIVISOR; + s2 %= DIVISOR; + } + + /* Process any remaining bytes. */ + if (p != end) { + do { + s1 += *p++; + s2 += s1; + } while (p != end); + s1 %= DIVISOR; + s2 %= DIVISOR; + } + + return (s2 << 16) | s1; +} + +#undef FUNCNAME +#undef FUNCNAME_CHUNK +#undef ATTRIBUTES +#undef IMPL_ALIGNMENT +#undef IMPL_SEGMENT_LEN +#undef IMPL_MAX_CHUNK_LEN diff --git a/tools/z64compress/src/enc/libdeflate/lib/arm/adler32_impl.h b/tools/z64compress/src/enc/libdeflate/lib/arm/adler32_impl.h new file mode 100644 index 000000000..4083b2ef3 --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/lib/arm/adler32_impl.h @@ -0,0 +1,272 @@ +/* + * arm/adler32_impl.h - ARM implementations of Adler-32 checksum algorithm + * + * Copyright 2016 Eric Biggers + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef LIB_ARM_ADLER32_IMPL_H +#define LIB_ARM_ADLER32_IMPL_H + +#include "cpu_features.h" + +/* Regular NEON implementation */ +#if HAVE_NEON_INTRIN && CPU_IS_LITTLE_ENDIAN() +# define adler32_neon adler32_neon +# define FUNCNAME adler32_neon +# define FUNCNAME_CHUNK adler32_neon_chunk +# define IMPL_ALIGNMENT 16 +# define IMPL_SEGMENT_LEN 64 +/* Prevent unsigned overflow of the 16-bit precision byte counters */ +# define IMPL_MAX_CHUNK_LEN (64 * (0xFFFF / 0xFF)) +# if HAVE_NEON_NATIVE +# define ATTRIBUTES +# else +# ifdef ARCH_ARM32 +# define ATTRIBUTES _target_attribute("fpu=neon") +# else +# define ATTRIBUTES _target_attribute("+simd") +# endif +# endif +# include +static forceinline ATTRIBUTES void +adler32_neon_chunk(const uint8x16_t *p, const uint8x16_t * const end, + u32 *s1, u32 *s2) +{ + static const u16 _aligned_attribute(16) mults[64] = { + 64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, + 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, + 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, + 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, + }; + const uint16x8_t mults_a = vld1q_u16(&mults[0]); + const uint16x8_t mults_b = vld1q_u16(&mults[8]); + const uint16x8_t mults_c = vld1q_u16(&mults[16]); + const uint16x8_t mults_d = vld1q_u16(&mults[24]); + const uint16x8_t mults_e = vld1q_u16(&mults[32]); + const uint16x8_t mults_f = vld1q_u16(&mults[40]); + const uint16x8_t mults_g = vld1q_u16(&mults[48]); + const uint16x8_t mults_h = vld1q_u16(&mults[56]); + + uint32x4_t v_s1 = vdupq_n_u32(0); + uint32x4_t v_s2 = vdupq_n_u32(0); + /* + * v_byte_sums_* contain the sum of the bytes at index i across all + * 64-byte segments, for each index 0..63. + */ + uint16x8_t v_byte_sums_a = vdupq_n_u16(0); + uint16x8_t v_byte_sums_b = vdupq_n_u16(0); + uint16x8_t v_byte_sums_c = vdupq_n_u16(0); + uint16x8_t v_byte_sums_d = vdupq_n_u16(0); + uint16x8_t v_byte_sums_e = vdupq_n_u16(0); + uint16x8_t v_byte_sums_f = vdupq_n_u16(0); + uint16x8_t v_byte_sums_g = vdupq_n_u16(0); + uint16x8_t v_byte_sums_h = vdupq_n_u16(0); + + do { + /* Load the next 64 bytes. */ + const uint8x16_t bytes1 = *p++; + const uint8x16_t bytes2 = *p++; + const uint8x16_t bytes3 = *p++; + const uint8x16_t bytes4 = *p++; + uint16x8_t tmp; + + /* + * Accumulate the previous s1 counters into the s2 counters. + * The needed multiplication by 64 is delayed to later. + */ + v_s2 = vaddq_u32(v_s2, v_s1); + + /* + * Add the 64 bytes to their corresponding v_byte_sums counters, + * while also accumulating the sums of each adjacent set of 4 + * bytes into v_s1. + */ + tmp = vpaddlq_u8(bytes1); + v_byte_sums_a = vaddw_u8(v_byte_sums_a, vget_low_u8(bytes1)); + v_byte_sums_b = vaddw_u8(v_byte_sums_b, vget_high_u8(bytes1)); + tmp = vpadalq_u8(tmp, bytes2); + v_byte_sums_c = vaddw_u8(v_byte_sums_c, vget_low_u8(bytes2)); + v_byte_sums_d = vaddw_u8(v_byte_sums_d, vget_high_u8(bytes2)); + tmp = vpadalq_u8(tmp, bytes3); + v_byte_sums_e = vaddw_u8(v_byte_sums_e, vget_low_u8(bytes3)); + v_byte_sums_f = vaddw_u8(v_byte_sums_f, vget_high_u8(bytes3)); + tmp = vpadalq_u8(tmp, bytes4); + v_byte_sums_g = vaddw_u8(v_byte_sums_g, vget_low_u8(bytes4)); + v_byte_sums_h = vaddw_u8(v_byte_sums_h, vget_high_u8(bytes4)); + v_s1 = vpadalq_u16(v_s1, tmp); + + } while (p != end); + + /* s2 = 64*s2 + (64*bytesum0 + 63*bytesum1 + ... + 1*bytesum63) */ +#ifdef ARCH_ARM32 +# define umlal2(a, b, c) vmlal_u16((a), vget_high_u16(b), vget_high_u16(c)) +#else +# define umlal2 vmlal_high_u16 +#endif + v_s2 = vqshlq_n_u32(v_s2, 6); + v_s2 = vmlal_u16(v_s2, vget_low_u16(v_byte_sums_a), vget_low_u16(mults_a)); + v_s2 = umlal2(v_s2, v_byte_sums_a, mults_a); + v_s2 = vmlal_u16(v_s2, vget_low_u16(v_byte_sums_b), vget_low_u16(mults_b)); + v_s2 = umlal2(v_s2, v_byte_sums_b, mults_b); + v_s2 = vmlal_u16(v_s2, vget_low_u16(v_byte_sums_c), vget_low_u16(mults_c)); + v_s2 = umlal2(v_s2, v_byte_sums_c, mults_c); + v_s2 = vmlal_u16(v_s2, vget_low_u16(v_byte_sums_d), vget_low_u16(mults_d)); + v_s2 = umlal2(v_s2, v_byte_sums_d, mults_d); + v_s2 = vmlal_u16(v_s2, vget_low_u16(v_byte_sums_e), vget_low_u16(mults_e)); + v_s2 = umlal2(v_s2, v_byte_sums_e, mults_e); + v_s2 = vmlal_u16(v_s2, vget_low_u16(v_byte_sums_f), vget_low_u16(mults_f)); + v_s2 = umlal2(v_s2, v_byte_sums_f, mults_f); + v_s2 = vmlal_u16(v_s2, vget_low_u16(v_byte_sums_g), vget_low_u16(mults_g)); + v_s2 = umlal2(v_s2, v_byte_sums_g, mults_g); + v_s2 = vmlal_u16(v_s2, vget_low_u16(v_byte_sums_h), vget_low_u16(mults_h)); + v_s2 = umlal2(v_s2, v_byte_sums_h, mults_h); +#undef umlal2 + + /* Horizontal sum to finish up */ +#ifdef ARCH_ARM32 + *s1 += vgetq_lane_u32(v_s1, 0) + vgetq_lane_u32(v_s1, 1) + + vgetq_lane_u32(v_s1, 2) + vgetq_lane_u32(v_s1, 3); + *s2 += vgetq_lane_u32(v_s2, 0) + vgetq_lane_u32(v_s2, 1) + + vgetq_lane_u32(v_s2, 2) + vgetq_lane_u32(v_s2, 3); +#else + *s1 += vaddvq_u32(v_s1); + *s2 += vaddvq_u32(v_s2); +#endif +} +# include "../adler32_vec_template.h" +#endif /* Regular NEON implementation */ + +/* NEON+dotprod implementation */ +#if HAVE_DOTPROD_INTRIN && CPU_IS_LITTLE_ENDIAN() +# define adler32_neon_dotprod adler32_neon_dotprod +# define FUNCNAME adler32_neon_dotprod +# define FUNCNAME_CHUNK adler32_neon_dotprod_chunk +# define IMPL_ALIGNMENT 16 +# define IMPL_SEGMENT_LEN 64 +# define IMPL_MAX_CHUNK_LEN MAX_CHUNK_LEN +# if HAVE_DOTPROD_NATIVE +# define ATTRIBUTES +# else +# ifdef __clang__ +# define ATTRIBUTES _target_attribute("dotprod") + /* + * With gcc, arch=armv8.2-a is needed for dotprod intrinsics, unless the + * default target is armv8.3-a or later in which case it must be omitted. + * armv8.3-a or later can be detected by checking for __ARM_FEATURE_JCVT. + */ +# elif defined(__ARM_FEATURE_JCVT) +# define ATTRIBUTES _target_attribute("+dotprod") +# else +# define ATTRIBUTES _target_attribute("arch=armv8.2-a+dotprod") +# endif +# endif +# include +static forceinline ATTRIBUTES void +adler32_neon_dotprod_chunk(const uint8x16_t *p, const uint8x16_t * const end, + u32 *s1, u32 *s2) +{ + static const u8 _aligned_attribute(16) mults[64] = { + 64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, + 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, + 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, + 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, + }; + const uint8x16_t mults_a = vld1q_u8(&mults[0]); + const uint8x16_t mults_b = vld1q_u8(&mults[16]); + const uint8x16_t mults_c = vld1q_u8(&mults[32]); + const uint8x16_t mults_d = vld1q_u8(&mults[48]); + const uint8x16_t ones = vdupq_n_u8(1); + uint32x4_t v_s1_a = vdupq_n_u32(0); + uint32x4_t v_s1_b = vdupq_n_u32(0); + uint32x4_t v_s1_c = vdupq_n_u32(0); + uint32x4_t v_s1_d = vdupq_n_u32(0); + uint32x4_t v_s2_a = vdupq_n_u32(0); + uint32x4_t v_s2_b = vdupq_n_u32(0); + uint32x4_t v_s2_c = vdupq_n_u32(0); + uint32x4_t v_s2_d = vdupq_n_u32(0); + uint32x4_t v_s1_sums_a = vdupq_n_u32(0); + uint32x4_t v_s1_sums_b = vdupq_n_u32(0); + uint32x4_t v_s1_sums_c = vdupq_n_u32(0); + uint32x4_t v_s1_sums_d = vdupq_n_u32(0); + uint32x4_t v_s1; + uint32x4_t v_s2; + uint32x4_t v_s1_sums; + + do { + uint8x16_t bytes_a = *p++; + uint8x16_t bytes_b = *p++; + uint8x16_t bytes_c = *p++; + uint8x16_t bytes_d = *p++; + + v_s1_sums_a = vaddq_u32(v_s1_sums_a, v_s1_a); + v_s1_a = vdotq_u32(v_s1_a, bytes_a, ones); + v_s2_a = vdotq_u32(v_s2_a, bytes_a, mults_a); + + v_s1_sums_b = vaddq_u32(v_s1_sums_b, v_s1_b); + v_s1_b = vdotq_u32(v_s1_b, bytes_b, ones); + v_s2_b = vdotq_u32(v_s2_b, bytes_b, mults_b); + + v_s1_sums_c = vaddq_u32(v_s1_sums_c, v_s1_c); + v_s1_c = vdotq_u32(v_s1_c, bytes_c, ones); + v_s2_c = vdotq_u32(v_s2_c, bytes_c, mults_c); + + v_s1_sums_d = vaddq_u32(v_s1_sums_d, v_s1_d); + v_s1_d = vdotq_u32(v_s1_d, bytes_d, ones); + v_s2_d = vdotq_u32(v_s2_d, bytes_d, mults_d); + } while (p != end); + + v_s1 = vaddq_u32(vaddq_u32(v_s1_a, v_s1_b), vaddq_u32(v_s1_c, v_s1_d)); + v_s2 = vaddq_u32(vaddq_u32(v_s2_a, v_s2_b), vaddq_u32(v_s2_c, v_s2_d)); + v_s1_sums = vaddq_u32(vaddq_u32(v_s1_sums_a, v_s1_sums_b), + vaddq_u32(v_s1_sums_c, v_s1_sums_d)); + v_s2 = vaddq_u32(v_s2, vqshlq_n_u32(v_s1_sums, 6)); + + *s1 += vaddvq_u32(v_s1); + *s2 += vaddvq_u32(v_s2); +} +# include "../adler32_vec_template.h" +#endif /* NEON+dotprod implementation */ + +#if defined(adler32_neon_dotprod) && HAVE_DOTPROD_NATIVE +#define DEFAULT_IMPL adler32_neon_dotprod +#else +static inline adler32_func_t +arch_select_adler32_func(void) +{ + const u32 features MAYBE_UNUSED = get_arm_cpu_features(); + +#ifdef adler32_neon_dotprod + if (HAVE_NEON(features) && HAVE_DOTPROD(features)) + return adler32_neon_dotprod; +#endif +#ifdef adler32_neon + if (HAVE_NEON(features)) + return adler32_neon; +#endif + return NULL; +} +#define arch_select_adler32_func arch_select_adler32_func +#endif + +#endif /* LIB_ARM_ADLER32_IMPL_H */ diff --git a/tools/z64compress/src/enc/libdeflate/lib/arm/cpu_features.c b/tools/z64compress/src/enc/libdeflate/lib/arm/cpu_features.c new file mode 100644 index 000000000..ed710bc6f --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/lib/arm/cpu_features.c @@ -0,0 +1,211 @@ +/* + * arm/cpu_features.c - feature detection for ARM CPUs + * + * Copyright 2018 Eric Biggers + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * ARM CPUs don't have a standard way for unprivileged programs to detect CPU + * features. But an OS-specific way can be used when available. + */ + +#ifdef __APPLE__ +#undef _ANSI_SOURCE +#define _DARWIN_C_SOURCE /* for sysctlbyname() */ +#endif + +#include "../cpu_features_common.h" /* must be included first */ +#include "cpu_features.h" + +#if HAVE_DYNAMIC_ARM_CPU_FEATURES + +#ifdef __linux__ +/* + * On Linux, arm32 and arm64 CPU features can be detected by reading the + * AT_HWCAP and AT_HWCAP2 values from /proc/self/auxv. + * + * Ideally we'd use the C library function getauxval(), but it's not guaranteed + * to be available: it was only added to glibc in 2.16, and in Android it was + * added to API level 18 for arm32 and level 21 for arm64. + */ + +#include +#include +#include +#include + +#define AT_HWCAP 16 +#define AT_HWCAP2 26 + +static void scan_auxv(unsigned long *hwcap, unsigned long *hwcap2) +{ + int fd; + unsigned long auxbuf[32]; + int filled = 0; + int i; + + fd = open("/proc/self/auxv", O_RDONLY); + if (fd < 0) + return; + + for (;;) { + do { + int ret = read(fd, &((char *)auxbuf)[filled], + sizeof(auxbuf) - filled); + if (ret <= 0) { + if (ret < 0 && errno == EINTR) + continue; + goto out; + } + filled += ret; + } while (filled < 2 * sizeof(long)); + + i = 0; + do { + unsigned long type = auxbuf[i]; + unsigned long value = auxbuf[i + 1]; + + if (type == AT_HWCAP) + *hwcap = value; + else if (type == AT_HWCAP2) + *hwcap2 = value; + i += 2; + filled -= 2 * sizeof(long); + } while (filled >= 2 * sizeof(long)); + + memmove(auxbuf, &auxbuf[i], filled); + } +out: + close(fd); +} + +static u32 query_arm_cpu_features(void) +{ + u32 features = 0; + unsigned long hwcap = 0; + unsigned long hwcap2 = 0; + + scan_auxv(&hwcap, &hwcap2); + +#ifdef ARCH_ARM32 + STATIC_ASSERT(sizeof(long) == 4); + if (hwcap & (1 << 12)) /* HWCAP_NEON */ + features |= ARM_CPU_FEATURE_NEON; + if (hwcap2 & (1 << 1)) /* HWCAP2_PMULL */ + features |= ARM_CPU_FEATURE_PMULL; + if (hwcap2 & (1 << 4)) /* HWCAP2_CRC32 */ + features |= ARM_CPU_FEATURE_CRC32; +#else + STATIC_ASSERT(sizeof(long) == 8); + if (hwcap & (1 << 1)) /* HWCAP_ASIMD */ + features |= ARM_CPU_FEATURE_NEON; + if (hwcap & (1 << 4)) /* HWCAP_PMULL */ + features |= ARM_CPU_FEATURE_PMULL; + if (hwcap & (1 << 7)) /* HWCAP_CRC32 */ + features |= ARM_CPU_FEATURE_CRC32; + if (hwcap & (1 << 17)) /* HWCAP_SHA3 */ + features |= ARM_CPU_FEATURE_SHA3; + if (hwcap & (1 << 20)) /* HWCAP_ASIMDDP */ + features |= ARM_CPU_FEATURE_DOTPROD; +#endif + return features; +} + +#elif defined(__APPLE__) +/* On Apple platforms, arm64 CPU features can be detected via sysctlbyname(). */ + +#include +#include + +static const struct { + const char *name; + u32 feature; +} feature_sysctls[] = { + { "hw.optional.neon", ARM_CPU_FEATURE_NEON }, + { "hw.optional.AdvSIMD", ARM_CPU_FEATURE_NEON }, + { "hw.optional.arm.FEAT_PMULL", ARM_CPU_FEATURE_PMULL }, + { "hw.optional.armv8_crc32", ARM_CPU_FEATURE_CRC32 }, + { "hw.optional.armv8_2_sha3", ARM_CPU_FEATURE_SHA3 }, + { "hw.optional.arm.FEAT_SHA3", ARM_CPU_FEATURE_SHA3 }, + { "hw.optional.arm.FEAT_DotProd", ARM_CPU_FEATURE_DOTPROD }, +}; + +static u32 query_arm_cpu_features(void) +{ + u32 features = 0; + size_t i; + + for (i = 0; i < ARRAY_LEN(feature_sysctls); i++) { + const char *name = feature_sysctls[i].name; + u32 val = 0; + size_t valsize = sizeof(val); + + if (sysctlbyname(name, &val, &valsize, NULL, 0) == 0 && + valsize == sizeof(val) && val == 1) + features |= feature_sysctls[i].feature; + } + return features; +} +#elif defined(_WIN32) + +#include + +static u32 query_arm_cpu_features(void) +{ + u32 features = ARM_CPU_FEATURE_NEON; + + if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE)) + features |= ARM_CPU_FEATURE_PMULL; + if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE)) + features |= ARM_CPU_FEATURE_CRC32; + + /* FIXME: detect SHA3 and DOTPROD support too. */ + + return features; +} +#else +#error "unhandled case" +#endif + +static const struct cpu_feature arm_cpu_feature_table[] = { + {ARM_CPU_FEATURE_NEON, "neon"}, + {ARM_CPU_FEATURE_PMULL, "pmull"}, + {ARM_CPU_FEATURE_CRC32, "crc32"}, + {ARM_CPU_FEATURE_SHA3, "sha3"}, + {ARM_CPU_FEATURE_DOTPROD, "dotprod"}, +}; + +volatile u32 libdeflate_arm_cpu_features = 0; + +void libdeflate_init_arm_cpu_features(void) +{ + u32 features = query_arm_cpu_features(); + + disable_cpu_features_for_testing(&features, arm_cpu_feature_table, + ARRAY_LEN(arm_cpu_feature_table)); + + libdeflate_arm_cpu_features = features | ARM_CPU_FEATURES_KNOWN; +} + +#endif /* HAVE_DYNAMIC_ARM_CPU_FEATURES */ diff --git a/tools/z64compress/src/enc/libdeflate/lib/arm/cpu_features.h b/tools/z64compress/src/enc/libdeflate/lib/arm/cpu_features.h new file mode 100644 index 000000000..548d31ea8 --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/lib/arm/cpu_features.h @@ -0,0 +1,223 @@ +/* + * arm/cpu_features.h - feature detection for ARM CPUs + * + * Copyright 2018 Eric Biggers + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef LIB_ARM_CPU_FEATURES_H +#define LIB_ARM_CPU_FEATURES_H + +#include "../lib_common.h" + +#define HAVE_DYNAMIC_ARM_CPU_FEATURES 0 + +#if defined(ARCH_ARM32) || defined(ARCH_ARM64) + +#if !defined(FREESTANDING) && \ + (COMPILER_SUPPORTS_TARGET_FUNCTION_ATTRIBUTE || defined(_MSC_VER)) && \ + (defined(__linux__) || \ + (defined(__APPLE__) && defined(ARCH_ARM64)) || \ + (defined(_WIN32) && defined(ARCH_ARM64))) +# undef HAVE_DYNAMIC_ARM_CPU_FEATURES +# define HAVE_DYNAMIC_ARM_CPU_FEATURES 1 +#endif + +#define ARM_CPU_FEATURE_NEON 0x00000001 +#define ARM_CPU_FEATURE_PMULL 0x00000002 +#define ARM_CPU_FEATURE_CRC32 0x00000004 +#define ARM_CPU_FEATURE_SHA3 0x00000008 +#define ARM_CPU_FEATURE_DOTPROD 0x00000010 + +#define HAVE_NEON(features) (HAVE_NEON_NATIVE || ((features) & ARM_CPU_FEATURE_NEON)) +#define HAVE_PMULL(features) (HAVE_PMULL_NATIVE || ((features) & ARM_CPU_FEATURE_PMULL)) +#define HAVE_CRC32(features) (HAVE_CRC32_NATIVE || ((features) & ARM_CPU_FEATURE_CRC32)) +#define HAVE_SHA3(features) (HAVE_SHA3_NATIVE || ((features) & ARM_CPU_FEATURE_SHA3)) +#define HAVE_DOTPROD(features) (HAVE_DOTPROD_NATIVE || ((features) & ARM_CPU_FEATURE_DOTPROD)) + +#if HAVE_DYNAMIC_ARM_CPU_FEATURES +#define ARM_CPU_FEATURES_KNOWN 0x80000000 +extern volatile u32 libdeflate_arm_cpu_features; + +void libdeflate_init_arm_cpu_features(void); + +static inline u32 get_arm_cpu_features(void) +{ + if (libdeflate_arm_cpu_features == 0) + libdeflate_init_arm_cpu_features(); + return libdeflate_arm_cpu_features; +} +#else /* HAVE_DYNAMIC_ARM_CPU_FEATURES */ +static inline u32 get_arm_cpu_features(void) { return 0; } +#endif /* !HAVE_DYNAMIC_ARM_CPU_FEATURES */ + +/* NEON */ +#if defined(__ARM_NEON) || defined(ARCH_ARM64) +# define HAVE_NEON_NATIVE 1 +#else +# define HAVE_NEON_NATIVE 0 +#endif +/* + * With both gcc and clang, NEON intrinsics require that the main target has + * NEON enabled already. Exception: with gcc 6.1 and later (r230411 for arm32, + * r226563 for arm64), hardware floating point support is sufficient. + */ +#if HAVE_NEON_NATIVE || \ + (HAVE_DYNAMIC_ARM_CPU_FEATURES && GCC_PREREQ(6, 1) && defined(__ARM_FP)) +# define HAVE_NEON_INTRIN 1 +#else +# define HAVE_NEON_INTRIN 0 +#endif + +/* PMULL */ +#ifdef __ARM_FEATURE_CRYPTO +# define HAVE_PMULL_NATIVE 1 +#else +# define HAVE_PMULL_NATIVE 0 +#endif +#if HAVE_PMULL_NATIVE || \ + (HAVE_DYNAMIC_ARM_CPU_FEATURES && \ + (GCC_PREREQ(6, 1) || __has_builtin(__builtin_neon_vmull_p64) || \ + defined(_MSC_VER)) && \ + /* + * On arm32 with clang, the crypto intrinsics (which include pmull) + * are not defined, even when using -mfpu=crypto-neon-fp-armv8, + * because clang's puts their definitions behind + * __aarch64__. + */ \ + !(defined(ARCH_ARM32) && defined(__clang__))) +# define HAVE_PMULL_INTRIN CPU_IS_LITTLE_ENDIAN() /* untested on big endian */ + /* Work around MSVC's vmull_p64() taking poly64x1_t instead of poly64_t */ +# ifdef _MSC_VER +# define compat_vmull_p64(a, b) vmull_p64(vcreate_p64(a), vcreate_p64(b)) +# else +# define compat_vmull_p64(a, b) vmull_p64((a), (b)) +# endif +#else +# define HAVE_PMULL_INTRIN 0 +#endif + +/* CRC32 */ +#ifdef __ARM_FEATURE_CRC32 +# define HAVE_CRC32_NATIVE 1 +#else +# define HAVE_CRC32_NATIVE 0 +#endif +/* + * Support for ARM CRC32 intrinsics when CRC32 instructions are not enabled in + * the main target has been affected by two gcc bugs, which we must avoid by + * only allowing gcc versions that have the corresponding fixes. First, gcc + * commit 943766d37ae4 ("[arm] Fix use of CRC32 intrinsics with Armv8-a and + * hard-float"), i.e. gcc 8.4+, 9.3+, 10.1+, or 11+, is needed. Second, gcc + * commit c1cdabe3aab8 ("arm: reorder assembler architecture directives + * [PR101723]"), i.e. gcc 9.5+, 10.4+, 11.3+, or 12+, is needed when binutils is + * 2.34 or later, due to https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104439. + * We use the second set of prerequisites, as they are stricter and we have no + * way to detect the binutils version directly from a C source file. + */ +#if HAVE_CRC32_NATIVE || \ + (HAVE_DYNAMIC_ARM_CPU_FEATURES && \ + (__has_builtin(__builtin_arm_crc32b) || \ + GCC_PREREQ(11, 3) || \ + (GCC_PREREQ(10, 4) && !GCC_PREREQ(11, 0)) || \ + (GCC_PREREQ(9, 5) && !GCC_PREREQ(10, 0)) || \ + defined(_MSC_VER))) +# define HAVE_CRC32_INTRIN 1 +#else +# define HAVE_CRC32_INTRIN 0 +#endif + +/* SHA3 (needed for the eor3 instruction) */ +#if defined(ARCH_ARM64) && !defined(_MSC_VER) +# ifdef __ARM_FEATURE_SHA3 +# define HAVE_SHA3_NATIVE 1 +# else +# define HAVE_SHA3_NATIVE 0 +# endif +# define HAVE_SHA3_TARGET (HAVE_DYNAMIC_ARM_CPU_FEATURES && \ + (GCC_PREREQ(8, 1) /* r256478 */ || \ + CLANG_PREREQ(7, 0, 10010463) /* r338010 */)) +# define HAVE_SHA3_INTRIN (HAVE_NEON_INTRIN && \ + (HAVE_SHA3_NATIVE || HAVE_SHA3_TARGET) && \ + (GCC_PREREQ(9, 1) /* r268049 */ || \ + __has_builtin(__builtin_neon_veor3q_v))) +#else +# define HAVE_SHA3_NATIVE 0 +# define HAVE_SHA3_TARGET 0 +# define HAVE_SHA3_INTRIN 0 +#endif + +/* dotprod */ +#ifdef ARCH_ARM64 +# ifdef __ARM_FEATURE_DOTPROD +# define HAVE_DOTPROD_NATIVE 1 +# else +# define HAVE_DOTPROD_NATIVE 0 +# endif +# if HAVE_DOTPROD_NATIVE || \ + (HAVE_DYNAMIC_ARM_CPU_FEATURES && \ + (GCC_PREREQ(8, 1) || __has_builtin(__builtin_neon_vdotq_v) || \ + defined(_MSC_VER))) +# define HAVE_DOTPROD_INTRIN 1 +# else +# define HAVE_DOTPROD_INTRIN 0 +# endif +#else +# define HAVE_DOTPROD_NATIVE 0 +# define HAVE_DOTPROD_INTRIN 0 +#endif + +/* + * Work around bugs in arm_acle.h and arm_neon.h where sometimes intrinsics are + * only defined when the corresponding __ARM_FEATURE_* macro is defined. The + * intrinsics actually work in target attribute functions too if they are + * defined, though, so work around this by temporarily defining the + * corresponding __ARM_FEATURE_* macros while including the headers. + */ +#if HAVE_CRC32_INTRIN && !HAVE_CRC32_NATIVE && \ + (defined(__clang__) || defined(ARCH_ARM32)) +# define __ARM_FEATURE_CRC32 1 +#endif +#if HAVE_SHA3_INTRIN && !HAVE_SHA3_NATIVE && defined(__clang__) +# define __ARM_FEATURE_SHA3 1 +#endif +#if HAVE_DOTPROD_INTRIN && !HAVE_DOTPROD_NATIVE && defined(__clang__) +# define __ARM_FEATURE_DOTPROD 1 +#endif +#if HAVE_CRC32_INTRIN && !HAVE_CRC32_NATIVE && \ + (defined(__clang__) || defined(ARCH_ARM32)) +# include +# undef __ARM_FEATURE_CRC32 +#endif +#if HAVE_SHA3_INTRIN && !HAVE_SHA3_NATIVE && defined(__clang__) +# include +# undef __ARM_FEATURE_SHA3 +#endif +#if HAVE_DOTPROD_INTRIN && !HAVE_DOTPROD_NATIVE && defined(__clang__) +# include +# undef __ARM_FEATURE_DOTPROD +#endif + +#endif /* ARCH_ARM32 || ARCH_ARM64 */ + +#endif /* LIB_ARM_CPU_FEATURES_H */ diff --git a/tools/z64compress/src/enc/libdeflate/lib/arm/crc32_impl.h b/tools/z64compress/src/enc/libdeflate/lib/arm/crc32_impl.h new file mode 100644 index 000000000..e426a63d6 --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/lib/arm/crc32_impl.h @@ -0,0 +1,665 @@ +/* + * arm/crc32_impl.h - ARM implementations of the gzip CRC-32 algorithm + * + * Copyright 2022 Eric Biggers + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef LIB_ARM_CRC32_IMPL_H +#define LIB_ARM_CRC32_IMPL_H + +#include "cpu_features.h" + +/* + * crc32_arm_crc() - implementation using crc32 instructions (only) + * + * In general this implementation is straightforward. However, naive use of the + * crc32 instructions is serial: one of the two inputs to each crc32 instruction + * is the output of the previous one. To take advantage of CPUs that can + * execute multiple crc32 instructions in parallel, when possible we interleave + * the checksumming of several adjacent chunks, then combine their CRCs. + * + * However, without pmull, combining CRCs is fairly slow. So in this pmull-less + * version, we only use a large chunk length, and thus we only do chunked + * processing if there is a lot of data to checksum. This also means that a + * variable chunk length wouldn't help much, so we just support a fixed length. + */ +#if HAVE_CRC32_INTRIN +# if HAVE_CRC32_NATIVE +# define ATTRIBUTES +# else +# ifdef ARCH_ARM32 +# ifdef __clang__ +# define ATTRIBUTES _target_attribute("armv8-a,crc") +# else +# define ATTRIBUTES _target_attribute("arch=armv8-a+crc") +# endif +# else +# ifdef __clang__ +# define ATTRIBUTES _target_attribute("crc") +# else +# define ATTRIBUTES _target_attribute("+crc") +# endif +# endif +# endif + +#ifndef _MSC_VER +# include +#endif + +/* + * Combine the CRCs for 4 adjacent chunks of length L = CRC32_FIXED_CHUNK_LEN + * bytes each by computing: + * + * [ crc0*x^(3*8*L) + crc1*x^(2*8*L) + crc2*x^(1*8*L) + crc3 ] mod G(x) + * + * This has been optimized in several ways: + * + * - The needed multipliers (x to some power, reduced mod G(x)) were + * precomputed. + * + * - The 3 multiplications are interleaved. + * + * - The reduction mod G(x) is delayed to the end and done using __crc32d. + * Note that the use of __crc32d introduces an extra factor of x^32. To + * cancel that out along with the extra factor of x^1 that gets introduced + * because of how the 63-bit products are aligned in their 64-bit integers, + * the multipliers are actually x^(j*8*L - 33) instead of x^(j*8*L). + */ +static forceinline ATTRIBUTES u32 +combine_crcs_slow(u32 crc0, u32 crc1, u32 crc2, u32 crc3) +{ + u64 res0 = 0, res1 = 0, res2 = 0; + int i; + + /* Multiply crc{0,1,2} by CRC32_FIXED_CHUNK_MULT_{3,2,1}. */ + for (i = 0; i < 32; i++) { + if (CRC32_FIXED_CHUNK_MULT_3 & (1U << i)) + res0 ^= (u64)crc0 << i; + if (CRC32_FIXED_CHUNK_MULT_2 & (1U << i)) + res1 ^= (u64)crc1 << i; + if (CRC32_FIXED_CHUNK_MULT_1 & (1U << i)) + res2 ^= (u64)crc2 << i; + } + /* Add the different parts and reduce mod G(x). */ + return __crc32d(0, res0 ^ res1 ^ res2) ^ crc3; +} + +#define crc32_arm_crc crc32_arm_crc +static u32 ATTRIBUTES MAYBE_UNUSED +crc32_arm_crc(u32 crc, const u8 *p, size_t len) +{ + if (len >= 64) { + const size_t align = -(uintptr_t)p & 7; + + /* Align p to the next 8-byte boundary. */ + if (align) { + if (align & 1) + crc = __crc32b(crc, *p++); + if (align & 2) { + crc = __crc32h(crc, le16_bswap(*(u16 *)p)); + p += 2; + } + if (align & 4) { + crc = __crc32w(crc, le32_bswap(*(u32 *)p)); + p += 4; + } + len -= align; + } + /* + * Interleave the processing of multiple adjacent data chunks to + * take advantage of instruction-level parallelism. + * + * Some CPUs don't prefetch the data if it's being fetched in + * multiple interleaved streams, so do explicit prefetching. + */ + while (len >= CRC32_NUM_CHUNKS * CRC32_FIXED_CHUNK_LEN) { + const u64 *wp0 = (const u64 *)p; + const u64 * const wp0_end = + (const u64 *)(p + CRC32_FIXED_CHUNK_LEN); + u32 crc1 = 0, crc2 = 0, crc3 = 0; + + STATIC_ASSERT(CRC32_NUM_CHUNKS == 4); + STATIC_ASSERT(CRC32_FIXED_CHUNK_LEN % (4 * 8) == 0); + do { + prefetchr(&wp0[64 + 0*CRC32_FIXED_CHUNK_LEN/8]); + prefetchr(&wp0[64 + 1*CRC32_FIXED_CHUNK_LEN/8]); + prefetchr(&wp0[64 + 2*CRC32_FIXED_CHUNK_LEN/8]); + prefetchr(&wp0[64 + 3*CRC32_FIXED_CHUNK_LEN/8]); + crc = __crc32d(crc, le64_bswap(wp0[0*CRC32_FIXED_CHUNK_LEN/8])); + crc1 = __crc32d(crc1, le64_bswap(wp0[1*CRC32_FIXED_CHUNK_LEN/8])); + crc2 = __crc32d(crc2, le64_bswap(wp0[2*CRC32_FIXED_CHUNK_LEN/8])); + crc3 = __crc32d(crc3, le64_bswap(wp0[3*CRC32_FIXED_CHUNK_LEN/8])); + wp0++; + crc = __crc32d(crc, le64_bswap(wp0[0*CRC32_FIXED_CHUNK_LEN/8])); + crc1 = __crc32d(crc1, le64_bswap(wp0[1*CRC32_FIXED_CHUNK_LEN/8])); + crc2 = __crc32d(crc2, le64_bswap(wp0[2*CRC32_FIXED_CHUNK_LEN/8])); + crc3 = __crc32d(crc3, le64_bswap(wp0[3*CRC32_FIXED_CHUNK_LEN/8])); + wp0++; + crc = __crc32d(crc, le64_bswap(wp0[0*CRC32_FIXED_CHUNK_LEN/8])); + crc1 = __crc32d(crc1, le64_bswap(wp0[1*CRC32_FIXED_CHUNK_LEN/8])); + crc2 = __crc32d(crc2, le64_bswap(wp0[2*CRC32_FIXED_CHUNK_LEN/8])); + crc3 = __crc32d(crc3, le64_bswap(wp0[3*CRC32_FIXED_CHUNK_LEN/8])); + wp0++; + crc = __crc32d(crc, le64_bswap(wp0[0*CRC32_FIXED_CHUNK_LEN/8])); + crc1 = __crc32d(crc1, le64_bswap(wp0[1*CRC32_FIXED_CHUNK_LEN/8])); + crc2 = __crc32d(crc2, le64_bswap(wp0[2*CRC32_FIXED_CHUNK_LEN/8])); + crc3 = __crc32d(crc3, le64_bswap(wp0[3*CRC32_FIXED_CHUNK_LEN/8])); + wp0++; + } while (wp0 != wp0_end); + crc = combine_crcs_slow(crc, crc1, crc2, crc3); + p += CRC32_NUM_CHUNKS * CRC32_FIXED_CHUNK_LEN; + len -= CRC32_NUM_CHUNKS * CRC32_FIXED_CHUNK_LEN; + } + /* + * Due to the large fixed chunk length used above, there might + * still be a lot of data left. So use a 64-byte loop here, + * instead of a loop that is less unrolled. + */ + while (len >= 64) { + crc = __crc32d(crc, le64_bswap(*(u64 *)(p + 0))); + crc = __crc32d(crc, le64_bswap(*(u64 *)(p + 8))); + crc = __crc32d(crc, le64_bswap(*(u64 *)(p + 16))); + crc = __crc32d(crc, le64_bswap(*(u64 *)(p + 24))); + crc = __crc32d(crc, le64_bswap(*(u64 *)(p + 32))); + crc = __crc32d(crc, le64_bswap(*(u64 *)(p + 40))); + crc = __crc32d(crc, le64_bswap(*(u64 *)(p + 48))); + crc = __crc32d(crc, le64_bswap(*(u64 *)(p + 56))); + p += 64; + len -= 64; + } + } + if (len & 32) { + crc = __crc32d(crc, get_unaligned_le64(p + 0)); + crc = __crc32d(crc, get_unaligned_le64(p + 8)); + crc = __crc32d(crc, get_unaligned_le64(p + 16)); + crc = __crc32d(crc, get_unaligned_le64(p + 24)); + p += 32; + } + if (len & 16) { + crc = __crc32d(crc, get_unaligned_le64(p + 0)); + crc = __crc32d(crc, get_unaligned_le64(p + 8)); + p += 16; + } + if (len & 8) { + crc = __crc32d(crc, get_unaligned_le64(p)); + p += 8; + } + if (len & 4) { + crc = __crc32w(crc, get_unaligned_le32(p)); + p += 4; + } + if (len & 2) { + crc = __crc32h(crc, get_unaligned_le16(p)); + p += 2; + } + if (len & 1) + crc = __crc32b(crc, *p); + return crc; +} +#undef ATTRIBUTES +#endif /* crc32_arm_crc() */ + +/* + * crc32_arm_crc_pmullcombine() - implementation using crc32 instructions, plus + * pmull instructions for CRC combining + * + * This is similar to crc32_arm_crc(), but it enables the use of pmull + * (carryless multiplication) instructions for the steps where the CRCs of + * adjacent data chunks are combined. As this greatly speeds up CRC + * combination, this implementation also differs from crc32_arm_crc() in that it + * uses a variable chunk length which can get fairly small. The precomputed + * multipliers needed for the selected chunk length are loaded from a table. + * + * Note that pmull is used here only for combining the CRCs of separately + * checksummed chunks, not for folding the data itself. See crc32_arm_pmull*() + * for implementations that use pmull for folding the data itself. + */ +#if HAVE_CRC32_INTRIN && HAVE_PMULL_INTRIN +# if HAVE_CRC32_NATIVE && HAVE_PMULL_NATIVE +# define ATTRIBUTES +# else +# ifdef ARCH_ARM32 +# define ATTRIBUTES _target_attribute("arch=armv8-a+crc,fpu=crypto-neon-fp-armv8") +# else +# ifdef __clang__ +# define ATTRIBUTES _target_attribute("crc,crypto") +# else +# define ATTRIBUTES _target_attribute("+crc,+crypto") +# endif +# endif +# endif + +#ifndef _MSC_VER +# include +#endif +#include + +/* Do carryless multiplication of two 32-bit values. */ +static forceinline ATTRIBUTES u64 +clmul_u32(u32 a, u32 b) +{ + uint64x2_t res = vreinterpretq_u64_p128( + compat_vmull_p64((poly64_t)a, (poly64_t)b)); + + return vgetq_lane_u64(res, 0); +} + +/* + * Like combine_crcs_slow(), but uses vmull_p64 to do the multiplications more + * quickly, and supports a variable chunk length. The chunk length is + * 'i * CRC32_MIN_VARIABLE_CHUNK_LEN' + * where 1 <= i < ARRAY_LEN(crc32_mults_for_chunklen). + */ +static forceinline ATTRIBUTES u32 +combine_crcs_fast(u32 crc0, u32 crc1, u32 crc2, u32 crc3, size_t i) +{ + u64 res0 = clmul_u32(crc0, crc32_mults_for_chunklen[i][0]); + u64 res1 = clmul_u32(crc1, crc32_mults_for_chunklen[i][1]); + u64 res2 = clmul_u32(crc2, crc32_mults_for_chunklen[i][2]); + + return __crc32d(0, res0 ^ res1 ^ res2) ^ crc3; +} + +#define crc32_arm_crc_pmullcombine crc32_arm_crc_pmullcombine +static u32 ATTRIBUTES MAYBE_UNUSED +crc32_arm_crc_pmullcombine(u32 crc, const u8 *p, size_t len) +{ + const size_t align = -(uintptr_t)p & 7; + + if (len >= align + CRC32_NUM_CHUNKS * CRC32_MIN_VARIABLE_CHUNK_LEN) { + /* Align p to the next 8-byte boundary. */ + if (align) { + if (align & 1) + crc = __crc32b(crc, *p++); + if (align & 2) { + crc = __crc32h(crc, le16_bswap(*(u16 *)p)); + p += 2; + } + if (align & 4) { + crc = __crc32w(crc, le32_bswap(*(u32 *)p)); + p += 4; + } + len -= align; + } + /* + * Handle CRC32_MAX_VARIABLE_CHUNK_LEN specially, so that better + * code is generated for it. + */ + while (len >= CRC32_NUM_CHUNKS * CRC32_MAX_VARIABLE_CHUNK_LEN) { + const u64 *wp0 = (const u64 *)p; + const u64 * const wp0_end = + (const u64 *)(p + CRC32_MAX_VARIABLE_CHUNK_LEN); + u32 crc1 = 0, crc2 = 0, crc3 = 0; + + STATIC_ASSERT(CRC32_NUM_CHUNKS == 4); + STATIC_ASSERT(CRC32_MAX_VARIABLE_CHUNK_LEN % (4 * 8) == 0); + do { + prefetchr(&wp0[64 + 0*CRC32_MAX_VARIABLE_CHUNK_LEN/8]); + prefetchr(&wp0[64 + 1*CRC32_MAX_VARIABLE_CHUNK_LEN/8]); + prefetchr(&wp0[64 + 2*CRC32_MAX_VARIABLE_CHUNK_LEN/8]); + prefetchr(&wp0[64 + 3*CRC32_MAX_VARIABLE_CHUNK_LEN/8]); + crc = __crc32d(crc, le64_bswap(wp0[0*CRC32_MAX_VARIABLE_CHUNK_LEN/8])); + crc1 = __crc32d(crc1, le64_bswap(wp0[1*CRC32_MAX_VARIABLE_CHUNK_LEN/8])); + crc2 = __crc32d(crc2, le64_bswap(wp0[2*CRC32_MAX_VARIABLE_CHUNK_LEN/8])); + crc3 = __crc32d(crc3, le64_bswap(wp0[3*CRC32_MAX_VARIABLE_CHUNK_LEN/8])); + wp0++; + crc = __crc32d(crc, le64_bswap(wp0[0*CRC32_MAX_VARIABLE_CHUNK_LEN/8])); + crc1 = __crc32d(crc1, le64_bswap(wp0[1*CRC32_MAX_VARIABLE_CHUNK_LEN/8])); + crc2 = __crc32d(crc2, le64_bswap(wp0[2*CRC32_MAX_VARIABLE_CHUNK_LEN/8])); + crc3 = __crc32d(crc3, le64_bswap(wp0[3*CRC32_MAX_VARIABLE_CHUNK_LEN/8])); + wp0++; + crc = __crc32d(crc, le64_bswap(wp0[0*CRC32_MAX_VARIABLE_CHUNK_LEN/8])); + crc1 = __crc32d(crc1, le64_bswap(wp0[1*CRC32_MAX_VARIABLE_CHUNK_LEN/8])); + crc2 = __crc32d(crc2, le64_bswap(wp0[2*CRC32_MAX_VARIABLE_CHUNK_LEN/8])); + crc3 = __crc32d(crc3, le64_bswap(wp0[3*CRC32_MAX_VARIABLE_CHUNK_LEN/8])); + wp0++; + crc = __crc32d(crc, le64_bswap(wp0[0*CRC32_MAX_VARIABLE_CHUNK_LEN/8])); + crc1 = __crc32d(crc1, le64_bswap(wp0[1*CRC32_MAX_VARIABLE_CHUNK_LEN/8])); + crc2 = __crc32d(crc2, le64_bswap(wp0[2*CRC32_MAX_VARIABLE_CHUNK_LEN/8])); + crc3 = __crc32d(crc3, le64_bswap(wp0[3*CRC32_MAX_VARIABLE_CHUNK_LEN/8])); + wp0++; + } while (wp0 != wp0_end); + crc = combine_crcs_fast(crc, crc1, crc2, crc3, + ARRAY_LEN(crc32_mults_for_chunklen) - 1); + p += CRC32_NUM_CHUNKS * CRC32_MAX_VARIABLE_CHUNK_LEN; + len -= CRC32_NUM_CHUNKS * CRC32_MAX_VARIABLE_CHUNK_LEN; + } + /* Handle up to one variable-length chunk. */ + if (len >= CRC32_NUM_CHUNKS * CRC32_MIN_VARIABLE_CHUNK_LEN) { + const size_t i = len / (CRC32_NUM_CHUNKS * + CRC32_MIN_VARIABLE_CHUNK_LEN); + const size_t chunk_len = + i * CRC32_MIN_VARIABLE_CHUNK_LEN; + const u64 *wp0 = (const u64 *)(p + 0*chunk_len); + const u64 *wp1 = (const u64 *)(p + 1*chunk_len); + const u64 *wp2 = (const u64 *)(p + 2*chunk_len); + const u64 *wp3 = (const u64 *)(p + 3*chunk_len); + const u64 * const wp0_end = wp1; + u32 crc1 = 0, crc2 = 0, crc3 = 0; + + STATIC_ASSERT(CRC32_NUM_CHUNKS == 4); + STATIC_ASSERT(CRC32_MIN_VARIABLE_CHUNK_LEN % (4 * 8) == 0); + do { + prefetchr(wp0 + 64); + prefetchr(wp1 + 64); + prefetchr(wp2 + 64); + prefetchr(wp3 + 64); + crc = __crc32d(crc, le64_bswap(*wp0++)); + crc1 = __crc32d(crc1, le64_bswap(*wp1++)); + crc2 = __crc32d(crc2, le64_bswap(*wp2++)); + crc3 = __crc32d(crc3, le64_bswap(*wp3++)); + crc = __crc32d(crc, le64_bswap(*wp0++)); + crc1 = __crc32d(crc1, le64_bswap(*wp1++)); + crc2 = __crc32d(crc2, le64_bswap(*wp2++)); + crc3 = __crc32d(crc3, le64_bswap(*wp3++)); + crc = __crc32d(crc, le64_bswap(*wp0++)); + crc1 = __crc32d(crc1, le64_bswap(*wp1++)); + crc2 = __crc32d(crc2, le64_bswap(*wp2++)); + crc3 = __crc32d(crc3, le64_bswap(*wp3++)); + crc = __crc32d(crc, le64_bswap(*wp0++)); + crc1 = __crc32d(crc1, le64_bswap(*wp1++)); + crc2 = __crc32d(crc2, le64_bswap(*wp2++)); + crc3 = __crc32d(crc3, le64_bswap(*wp3++)); + } while (wp0 != wp0_end); + crc = combine_crcs_fast(crc, crc1, crc2, crc3, i); + p += CRC32_NUM_CHUNKS * chunk_len; + len -= CRC32_NUM_CHUNKS * chunk_len; + } + + while (len >= 32) { + crc = __crc32d(crc, le64_bswap(*(u64 *)(p + 0))); + crc = __crc32d(crc, le64_bswap(*(u64 *)(p + 8))); + crc = __crc32d(crc, le64_bswap(*(u64 *)(p + 16))); + crc = __crc32d(crc, le64_bswap(*(u64 *)(p + 24))); + p += 32; + len -= 32; + } + } else { + while (len >= 32) { + crc = __crc32d(crc, get_unaligned_le64(p + 0)); + crc = __crc32d(crc, get_unaligned_le64(p + 8)); + crc = __crc32d(crc, get_unaligned_le64(p + 16)); + crc = __crc32d(crc, get_unaligned_le64(p + 24)); + p += 32; + len -= 32; + } + } + if (len & 16) { + crc = __crc32d(crc, get_unaligned_le64(p + 0)); + crc = __crc32d(crc, get_unaligned_le64(p + 8)); + p += 16; + } + if (len & 8) { + crc = __crc32d(crc, get_unaligned_le64(p)); + p += 8; + } + if (len & 4) { + crc = __crc32w(crc, get_unaligned_le32(p)); + p += 4; + } + if (len & 2) { + crc = __crc32h(crc, get_unaligned_le16(p)); + p += 2; + } + if (len & 1) + crc = __crc32b(crc, *p); + return crc; +} +#undef ATTRIBUTES +#endif /* crc32_arm_crc_pmullcombine() */ + +/* + * crc32_arm_pmullx4() - implementation using "folding" with pmull instructions + * + * This implementation is intended for CPUs that support pmull instructions but + * not crc32 instructions. + */ +#if HAVE_PMULL_INTRIN +# define crc32_arm_pmullx4 crc32_arm_pmullx4 +# define SUFFIX _pmullx4 +# if HAVE_PMULL_NATIVE +# define ATTRIBUTES +# else +# ifdef ARCH_ARM32 +# define ATTRIBUTES _target_attribute("fpu=crypto-neon-fp-armv8") +# else +# ifdef __clang__ +# define ATTRIBUTES _target_attribute("crypto") +# else +# define ATTRIBUTES _target_attribute("+crypto") +# endif +# endif +# endif +# define ENABLE_EOR3 0 +# include "crc32_pmull_helpers.h" + +static u32 ATTRIBUTES MAYBE_UNUSED +crc32_arm_pmullx4(u32 crc, const u8 *p, size_t len) +{ + static const u64 _aligned_attribute(16) mults[3][2] = { + CRC32_1VECS_MULTS, + CRC32_4VECS_MULTS, + CRC32_2VECS_MULTS, + }; + static const u64 _aligned_attribute(16) final_mults[3][2] = { + { CRC32_FINAL_MULT, 0 }, + { CRC32_BARRETT_CONSTANT_1, 0 }, + { CRC32_BARRETT_CONSTANT_2, 0 }, + }; + const uint8x16_t zeroes = vdupq_n_u8(0); + const uint8x16_t mask32 = vreinterpretq_u8_u64(vdupq_n_u64(0xFFFFFFFF)); + const poly64x2_t multipliers_1 = load_multipliers(mults[0]); + uint8x16_t v0, v1, v2, v3; + + if (len < 64 + 15) { + if (len < 16) + return crc32_slice1(crc, p, len); + v0 = veorq_u8(vld1q_u8(p), u32_to_bytevec(crc)); + p += 16; + len -= 16; + while (len >= 16) { + v0 = fold_vec(v0, vld1q_u8(p), multipliers_1); + p += 16; + len -= 16; + } + } else { + const poly64x2_t multipliers_4 = load_multipliers(mults[1]); + const poly64x2_t multipliers_2 = load_multipliers(mults[2]); + const size_t align = -(uintptr_t)p & 15; + const uint8x16_t *vp; + + v0 = veorq_u8(vld1q_u8(p), u32_to_bytevec(crc)); + p += 16; + /* Align p to the next 16-byte boundary. */ + if (align) { + v0 = fold_partial_vec(v0, p, align, multipliers_1); + p += align; + len -= align; + } + vp = (const uint8x16_t *)p; + v1 = *vp++; + v2 = *vp++; + v3 = *vp++; + while (len >= 64 + 64) { + v0 = fold_vec(v0, *vp++, multipliers_4); + v1 = fold_vec(v1, *vp++, multipliers_4); + v2 = fold_vec(v2, *vp++, multipliers_4); + v3 = fold_vec(v3, *vp++, multipliers_4); + len -= 64; + } + v0 = fold_vec(v0, v2, multipliers_2); + v1 = fold_vec(v1, v3, multipliers_2); + if (len & 32) { + v0 = fold_vec(v0, *vp++, multipliers_2); + v1 = fold_vec(v1, *vp++, multipliers_2); + } + v0 = fold_vec(v0, v1, multipliers_1); + if (len & 16) + v0 = fold_vec(v0, *vp++, multipliers_1); + p = (const u8 *)vp; + len &= 15; + } + + /* Handle any remaining partial block now before reducing to 32 bits. */ + if (len) + v0 = fold_partial_vec(v0, p, len, multipliers_1); + + /* + * Fold 128 => 96 bits. This also implicitly appends 32 zero bits, + * which is equivalent to multiplying by x^32. This is needed because + * the CRC is defined as M(x)*x^32 mod G(x), not just M(x) mod G(x). + */ + + v0 = veorq_u8(vextq_u8(v0, zeroes, 8), + clmul_high(vextq_u8(zeroes, v0, 8), multipliers_1)); + + /* Fold 96 => 64 bits. */ + v0 = veorq_u8(vextq_u8(v0, zeroes, 4), + clmul_low(vandq_u8(v0, mask32), + load_multipliers(final_mults[0]))); + + /* Reduce 64 => 32 bits using Barrett reduction. */ + v1 = clmul_low(vandq_u8(v0, mask32), load_multipliers(final_mults[1])); + v1 = clmul_low(vandq_u8(v1, mask32), load_multipliers(final_mults[2])); + return vgetq_lane_u32(vreinterpretq_u32_u8(veorq_u8(v0, v1)), 1); +} +#undef SUFFIX +#undef ATTRIBUTES +#undef ENABLE_EOR3 +#endif /* crc32_arm_pmullx4() */ + +/* + * crc32_arm_pmullx12_crc() - large-stride implementation using "folding" with + * pmull instructions, where crc32 instructions are also available + * + * See crc32_pmull_wide.h for explanation. + */ +#if defined(ARCH_ARM64) && HAVE_PMULL_INTRIN && HAVE_CRC32_INTRIN +# define crc32_arm_pmullx12_crc crc32_arm_pmullx12_crc +# define SUFFIX _pmullx12_crc +# if HAVE_PMULL_NATIVE && HAVE_CRC32_NATIVE +# define ATTRIBUTES +# else +# ifdef __clang__ +# define ATTRIBUTES _target_attribute("crypto,crc") +# else +# define ATTRIBUTES _target_attribute("+crypto,+crc") +# endif +# endif +# define ENABLE_EOR3 0 +# include "crc32_pmull_wide.h" +#endif + +/* + * crc32_arm_pmullx12_crc_eor3() + * + * This like crc32_arm_pmullx12_crc(), but it adds the eor3 instruction (from + * the sha3 extension) for even better performance. + * + * Note: we require HAVE_SHA3_TARGET (or HAVE_SHA3_NATIVE) rather than + * HAVE_SHA3_INTRIN, as we have an inline asm fallback for eor3. + */ +#if defined(ARCH_ARM64) && HAVE_PMULL_INTRIN && HAVE_CRC32_INTRIN && \ + (HAVE_SHA3_TARGET || HAVE_SHA3_NATIVE) +# define crc32_arm_pmullx12_crc_eor3 crc32_arm_pmullx12_crc_eor3 +# define SUFFIX _pmullx12_crc_eor3 +# if HAVE_PMULL_NATIVE && HAVE_CRC32_NATIVE && HAVE_SHA3_NATIVE +# define ATTRIBUTES +# else +# ifdef __clang__ +# define ATTRIBUTES _target_attribute("crypto,crc,sha3") + /* + * With gcc, arch=armv8.2-a is needed for the sha3 intrinsics, unless the + * default target is armv8.3-a or later in which case it must be omitted. + * armv8.3-a or later can be detected by checking for __ARM_FEATURE_JCVT. + */ +# elif defined(__ARM_FEATURE_JCVT) +# define ATTRIBUTES _target_attribute("+crypto,+crc,+sha3") +# else +# define ATTRIBUTES _target_attribute("arch=armv8.2-a+crypto+crc+sha3") +# endif +# endif +# define ENABLE_EOR3 1 +# include "crc32_pmull_wide.h" +#endif + +/* + * On the Apple M1 processor, crc32 instructions max out at about 25.5 GB/s in + * the best case of using a 3-way or greater interleaved chunked implementation, + * whereas a pmull-based implementation achieves 68 GB/s provided that the + * stride length is large enough (about 10+ vectors with eor3, or 12+ without). + * + * For now we assume that crc32 instructions are preferable in other cases. + */ +#define PREFER_PMULL_TO_CRC 0 +#ifdef __APPLE__ +# include +# if TARGET_OS_OSX +# undef PREFER_PMULL_TO_CRC +# define PREFER_PMULL_TO_CRC 1 +# endif +#endif + +/* + * If the best implementation is statically available, use it unconditionally. + * Otherwise choose the best implementation at runtime. + */ +#if PREFER_PMULL_TO_CRC && defined(crc32_arm_pmullx12_crc_eor3) && \ + HAVE_PMULL_NATIVE && HAVE_CRC32_NATIVE && HAVE_SHA3_NATIVE +# define DEFAULT_IMPL crc32_arm_pmullx12_crc_eor3 +#elif !PREFER_PMULL_TO_CRC && defined(crc32_arm_crc_pmullcombine) && \ + HAVE_CRC32_NATIVE && HAVE_PMULL_NATIVE +# define DEFAULT_IMPL crc32_arm_crc_pmullcombine +#else +static inline crc32_func_t +arch_select_crc32_func(void) +{ + const u32 features MAYBE_UNUSED = get_arm_cpu_features(); + +#if PREFER_PMULL_TO_CRC && defined(crc32_arm_pmullx12_crc_eor3) + if (HAVE_PMULL(features) && HAVE_CRC32(features) && HAVE_SHA3(features)) + return crc32_arm_pmullx12_crc_eor3; +#endif +#if PREFER_PMULL_TO_CRC && defined(crc32_arm_pmullx12_crc) + if (HAVE_PMULL(features) && HAVE_CRC32(features)) + return crc32_arm_pmullx12_crc; +#endif +#ifdef crc32_arm_crc_pmullcombine + if (HAVE_CRC32(features) && HAVE_PMULL(features)) + return crc32_arm_crc_pmullcombine; +#endif +#ifdef crc32_arm_crc + if (HAVE_CRC32(features)) + return crc32_arm_crc; +#endif +#ifdef crc32_arm_pmullx4 + if (HAVE_PMULL(features)) + return crc32_arm_pmullx4; +#endif + return NULL; +} +#define arch_select_crc32_func arch_select_crc32_func +#endif + +#endif /* LIB_ARM_CRC32_IMPL_H */ diff --git a/tools/z64compress/src/enc/libdeflate/lib/arm/crc32_pmull_helpers.h b/tools/z64compress/src/enc/libdeflate/lib/arm/crc32_pmull_helpers.h new file mode 100644 index 000000000..1cd1cc188 --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/lib/arm/crc32_pmull_helpers.h @@ -0,0 +1,184 @@ +/* + * arm/crc32_pmull_helpers.h - helper functions for CRC-32 folding with PMULL + * + * Copyright 2022 Eric Biggers + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * This file is a "template" for instantiating helper functions for CRC folding + * with pmull instructions. It accepts the following parameters: + * + * SUFFIX: + * Name suffix to append to all instantiated functions. + * ATTRIBUTES: + * Target function attributes to use. + * ENABLE_EOR3: + * Use the eor3 instruction (from the sha3 extension). + */ + +#include + +/* Create a vector with 'a' in the first 4 bytes, and the rest zeroed out. */ +#undef u32_to_bytevec +static forceinline ATTRIBUTES uint8x16_t +ADD_SUFFIX(u32_to_bytevec)(u32 a) +{ + return vreinterpretq_u8_u32(vsetq_lane_u32(a, vdupq_n_u32(0), 0)); +} +#define u32_to_bytevec ADD_SUFFIX(u32_to_bytevec) + +/* Load two 64-bit values into a vector. */ +#undef load_multipliers +static forceinline ATTRIBUTES poly64x2_t +ADD_SUFFIX(load_multipliers)(const u64 p[2]) +{ + return vreinterpretq_p64_u64(vld1q_u64(p)); +} +#define load_multipliers ADD_SUFFIX(load_multipliers) + +/* Do carryless multiplication of the low halves of two vectors. */ +#undef clmul_low +static forceinline ATTRIBUTES uint8x16_t +ADD_SUFFIX(clmul_low)(uint8x16_t a, poly64x2_t b) +{ + return vreinterpretq_u8_p128( + compat_vmull_p64(vgetq_lane_p64(vreinterpretq_p64_u8(a), 0), + vgetq_lane_p64(b, 0))); +} +#define clmul_low ADD_SUFFIX(clmul_low) + +/* Do carryless multiplication of the high halves of two vectors. */ +#undef clmul_high +static forceinline ATTRIBUTES uint8x16_t +ADD_SUFFIX(clmul_high)(uint8x16_t a, poly64x2_t b) +{ +#if defined(__clang__) && defined(ARCH_ARM64) + /* + * Use inline asm to ensure that pmull2 is really used. This works + * around clang bug https://github.com/llvm/llvm-project/issues/52868. + */ + uint8x16_t res; + + __asm__("pmull2 %0.1q, %1.2d, %2.2d" : "=w" (res) : "w" (a), "w" (b)); + return res; +#else + return vreinterpretq_u8_p128(vmull_high_p64(vreinterpretq_p64_u8(a), b)); +#endif +} +#define clmul_high ADD_SUFFIX(clmul_high) + +#undef eor3 +static forceinline ATTRIBUTES uint8x16_t +ADD_SUFFIX(eor3)(uint8x16_t a, uint8x16_t b, uint8x16_t c) +{ +#if ENABLE_EOR3 +#if HAVE_SHA3_INTRIN + return veor3q_u8(a, b, c); +#else + uint8x16_t res; + + __asm__("eor3 %0.16b, %1.16b, %2.16b, %3.16b" + : "=w" (res) : "w" (a), "w" (b), "w" (c)); + return res; +#endif +#else /* ENABLE_EOR3 */ + return veorq_u8(veorq_u8(a, b), c); +#endif /* !ENABLE_EOR3 */ +} +#define eor3 ADD_SUFFIX(eor3) + +#undef fold_vec +static forceinline ATTRIBUTES uint8x16_t +ADD_SUFFIX(fold_vec)(uint8x16_t src, uint8x16_t dst, poly64x2_t multipliers) +{ + uint8x16_t a = clmul_low(src, multipliers); + uint8x16_t b = clmul_high(src, multipliers); + + return eor3(a, b, dst); +} +#define fold_vec ADD_SUFFIX(fold_vec) + +#undef vtbl +static forceinline ATTRIBUTES uint8x16_t +ADD_SUFFIX(vtbl)(uint8x16_t table, uint8x16_t indices) +{ +#ifdef ARCH_ARM64 + return vqtbl1q_u8(table, indices); +#else + uint8x8x2_t tab2; + + tab2.val[0] = vget_low_u8(table); + tab2.val[1] = vget_high_u8(table); + + return vcombine_u8(vtbl2_u8(tab2, vget_low_u8(indices)), + vtbl2_u8(tab2, vget_high_u8(indices))); +#endif +} +#define vtbl ADD_SUFFIX(vtbl) + +/* + * Given v containing a 16-byte polynomial, and a pointer 'p' that points to the + * next '1 <= len <= 15' data bytes, rearrange the concatenation of v and the + * data into vectors x0 and x1 that contain 'len' bytes and 16 bytes, + * respectively. Then fold x0 into x1 and return the result. Assumes that + * 'p + len - 16' is in-bounds. + */ +#undef fold_partial_vec +static forceinline ATTRIBUTES MAYBE_UNUSED uint8x16_t +ADD_SUFFIX(fold_partial_vec)(uint8x16_t v, const u8 *p, size_t len, + poly64x2_t multipliers_1) +{ + /* + * vtbl(v, shift_tab[len..len+15]) left shifts v by 16-len bytes. + * vtbl(v, shift_tab[len+16..len+31]) right shifts v by len bytes. + */ + static const u8 shift_tab[48] = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + }; + const uint8x16_t lshift = vld1q_u8(&shift_tab[len]); + const uint8x16_t rshift = vld1q_u8(&shift_tab[len + 16]); + uint8x16_t x0, x1, bsl_mask; + + /* x0 = v left-shifted by '16 - len' bytes */ + x0 = vtbl(v, lshift); + + /* Create a vector of '16 - len' 0x00 bytes, then 'len' 0xff bytes. */ + bsl_mask = vreinterpretq_u8_s8( + vshrq_n_s8(vreinterpretq_s8_u8(rshift), 7)); + + /* + * x1 = the last '16 - len' bytes from v (i.e. v right-shifted by 'len' + * bytes) followed by the remaining data. + */ + x1 = vbslq_u8(bsl_mask /* 0 bits select from arg3, 1 bits from arg2 */, + vld1q_u8(p + len - 16), vtbl(v, rshift)); + + return fold_vec(x0, x1, multipliers_1); +} +#define fold_partial_vec ADD_SUFFIX(fold_partial_vec) diff --git a/tools/z64compress/src/enc/libdeflate/lib/arm/crc32_pmull_wide.h b/tools/z64compress/src/enc/libdeflate/lib/arm/crc32_pmull_wide.h new file mode 100644 index 000000000..a72e1d876 --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/lib/arm/crc32_pmull_wide.h @@ -0,0 +1,227 @@ +/* + * arm/crc32_pmull_wide.h - gzip CRC-32 with PMULL (extra-wide version) + * + * Copyright 2022 Eric Biggers + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * This file is a "template" for instantiating PMULL-based crc32_arm functions. + * The "parameters" are: + * + * SUFFIX: + * Name suffix to append to all instantiated functions. + * ATTRIBUTES: + * Target function attributes to use. + * ENABLE_EOR3: + * Use the eor3 instruction (from the sha3 extension). + * + * This is the extra-wide version; it uses an unusually large stride length of + * 12, and it assumes that crc32 instructions are available too. It's intended + * for powerful CPUs that support both pmull and crc32 instructions, but where + * throughput of pmull and xor (given enough instructions issued in parallel) is + * significantly higher than that of crc32, thus making the crc32 instructions + * (counterintuitively) not actually the fastest way to compute the CRC-32. The + * Apple M1 processor is an example of such a CPU. + */ + +#ifndef _MSC_VER +# include +#endif +#include + +#include "crc32_pmull_helpers.h" + +static u32 ATTRIBUTES MAYBE_UNUSED +ADD_SUFFIX(crc32_arm)(u32 crc, const u8 *p, size_t len) +{ + uint8x16_t v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11; + + if (len < 3 * 192) { + static const u64 _aligned_attribute(16) mults[3][2] = { + CRC32_4VECS_MULTS, CRC32_2VECS_MULTS, CRC32_1VECS_MULTS, + }; + poly64x2_t multipliers_4, multipliers_2, multipliers_1; + + if (len < 64) + goto tail; + multipliers_4 = load_multipliers(mults[0]); + multipliers_2 = load_multipliers(mults[1]); + multipliers_1 = load_multipliers(mults[2]); + /* + * Short length; don't bother aligning the pointer, and fold + * 64 bytes (4 vectors) at a time, at most. + */ + v0 = veorq_u8(vld1q_u8(p + 0), u32_to_bytevec(crc)); + v1 = vld1q_u8(p + 16); + v2 = vld1q_u8(p + 32); + v3 = vld1q_u8(p + 48); + p += 64; + len -= 64; + while (len >= 64) { + v0 = fold_vec(v0, vld1q_u8(p + 0), multipliers_4); + v1 = fold_vec(v1, vld1q_u8(p + 16), multipliers_4); + v2 = fold_vec(v2, vld1q_u8(p + 32), multipliers_4); + v3 = fold_vec(v3, vld1q_u8(p + 48), multipliers_4); + p += 64; + len -= 64; + } + v0 = fold_vec(v0, v2, multipliers_2); + v1 = fold_vec(v1, v3, multipliers_2); + if (len >= 32) { + v0 = fold_vec(v0, vld1q_u8(p + 0), multipliers_2); + v1 = fold_vec(v1, vld1q_u8(p + 16), multipliers_2); + p += 32; + len -= 32; + } + v0 = fold_vec(v0, v1, multipliers_1); + } else { + static const u64 _aligned_attribute(16) mults[4][2] = { + CRC32_12VECS_MULTS, CRC32_6VECS_MULTS, + CRC32_3VECS_MULTS, CRC32_1VECS_MULTS, + }; + const poly64x2_t multipliers_12 = load_multipliers(mults[0]); + const poly64x2_t multipliers_6 = load_multipliers(mults[1]); + const poly64x2_t multipliers_3 = load_multipliers(mults[2]); + const poly64x2_t multipliers_1 = load_multipliers(mults[3]); + const size_t align = -(uintptr_t)p & 15; + const uint8x16_t *vp; + + /* Align p to the next 16-byte boundary. */ + if (align) { + if (align & 1) + crc = __crc32b(crc, *p++); + if (align & 2) { + crc = __crc32h(crc, le16_bswap(*(u16 *)p)); + p += 2; + } + if (align & 4) { + crc = __crc32w(crc, le32_bswap(*(u32 *)p)); + p += 4; + } + if (align & 8) { + crc = __crc32d(crc, le64_bswap(*(u64 *)p)); + p += 8; + } + len -= align; + } + vp = (const uint8x16_t *)p; + v0 = veorq_u8(*vp++, u32_to_bytevec(crc)); + v1 = *vp++; + v2 = *vp++; + v3 = *vp++; + v4 = *vp++; + v5 = *vp++; + v6 = *vp++; + v7 = *vp++; + v8 = *vp++; + v9 = *vp++; + v10 = *vp++; + v11 = *vp++; + len -= 192; + /* Fold 192 bytes (12 vectors) at a time. */ + do { + v0 = fold_vec(v0, *vp++, multipliers_12); + v1 = fold_vec(v1, *vp++, multipliers_12); + v2 = fold_vec(v2, *vp++, multipliers_12); + v3 = fold_vec(v3, *vp++, multipliers_12); + v4 = fold_vec(v4, *vp++, multipliers_12); + v5 = fold_vec(v5, *vp++, multipliers_12); + v6 = fold_vec(v6, *vp++, multipliers_12); + v7 = fold_vec(v7, *vp++, multipliers_12); + v8 = fold_vec(v8, *vp++, multipliers_12); + v9 = fold_vec(v9, *vp++, multipliers_12); + v10 = fold_vec(v10, *vp++, multipliers_12); + v11 = fold_vec(v11, *vp++, multipliers_12); + len -= 192; + } while (len >= 192); + + /* + * Fewer than 192 bytes left. Fold v0-v11 down to just v0, + * while processing up to 144 more bytes. + */ + v0 = fold_vec(v0, v6, multipliers_6); + v1 = fold_vec(v1, v7, multipliers_6); + v2 = fold_vec(v2, v8, multipliers_6); + v3 = fold_vec(v3, v9, multipliers_6); + v4 = fold_vec(v4, v10, multipliers_6); + v5 = fold_vec(v5, v11, multipliers_6); + if (len >= 96) { + v0 = fold_vec(v0, *vp++, multipliers_6); + v1 = fold_vec(v1, *vp++, multipliers_6); + v2 = fold_vec(v2, *vp++, multipliers_6); + v3 = fold_vec(v3, *vp++, multipliers_6); + v4 = fold_vec(v4, *vp++, multipliers_6); + v5 = fold_vec(v5, *vp++, multipliers_6); + len -= 96; + } + v0 = fold_vec(v0, v3, multipliers_3); + v1 = fold_vec(v1, v4, multipliers_3); + v2 = fold_vec(v2, v5, multipliers_3); + if (len >= 48) { + v0 = fold_vec(v0, *vp++, multipliers_3); + v1 = fold_vec(v1, *vp++, multipliers_3); + v2 = fold_vec(v2, *vp++, multipliers_3); + len -= 48; + } + v0 = fold_vec(v0, v1, multipliers_1); + v0 = fold_vec(v0, v2, multipliers_1); + p = (const u8 *)vp; + } + /* Reduce 128 to 32 bits using crc32 instructions. */ + crc = __crc32d(0, vgetq_lane_u64(vreinterpretq_u64_u8(v0), 0)); + crc = __crc32d(crc, vgetq_lane_u64(vreinterpretq_u64_u8(v0), 1)); +tail: + /* Finish up the remainder using crc32 instructions. */ + if (len & 32) { + crc = __crc32d(crc, get_unaligned_le64(p + 0)); + crc = __crc32d(crc, get_unaligned_le64(p + 8)); + crc = __crc32d(crc, get_unaligned_le64(p + 16)); + crc = __crc32d(crc, get_unaligned_le64(p + 24)); + p += 32; + } + if (len & 16) { + crc = __crc32d(crc, get_unaligned_le64(p + 0)); + crc = __crc32d(crc, get_unaligned_le64(p + 8)); + p += 16; + } + if (len & 8) { + crc = __crc32d(crc, get_unaligned_le64(p)); + p += 8; + } + if (len & 4) { + crc = __crc32w(crc, get_unaligned_le32(p)); + p += 4; + } + if (len & 2) { + crc = __crc32h(crc, get_unaligned_le16(p)); + p += 2; + } + if (len & 1) + crc = __crc32b(crc, *p); + return crc; +} + +#undef SUFFIX +#undef ATTRIBUTES +#undef ENABLE_EOR3 diff --git a/tools/z64compress/src/enc/libdeflate/lib/arm/matchfinder_impl.h b/tools/z64compress/src/enc/libdeflate/lib/arm/matchfinder_impl.h new file mode 100644 index 000000000..b20f56a3b --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/lib/arm/matchfinder_impl.h @@ -0,0 +1,79 @@ +/* + * arm/matchfinder_impl.h - ARM implementations of matchfinder functions + * + * Copyright 2016 Eric Biggers + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef LIB_ARM_MATCHFINDER_IMPL_H +#define LIB_ARM_MATCHFINDER_IMPL_H + +#include "cpu_features.h" + +#if HAVE_NEON_NATIVE +# include +static forceinline void +matchfinder_init_neon(mf_pos_t *data, size_t size) +{ + int16x8_t *p = (int16x8_t *)data; + int16x8_t v = vdupq_n_s16(MATCHFINDER_INITVAL); + + STATIC_ASSERT(MATCHFINDER_MEM_ALIGNMENT % sizeof(*p) == 0); + STATIC_ASSERT(MATCHFINDER_SIZE_ALIGNMENT % (4 * sizeof(*p)) == 0); + STATIC_ASSERT(sizeof(mf_pos_t) == 2); + + do { + p[0] = v; + p[1] = v; + p[2] = v; + p[3] = v; + p += 4; + size -= 4 * sizeof(*p); + } while (size != 0); +} +#define matchfinder_init matchfinder_init_neon + +static forceinline void +matchfinder_rebase_neon(mf_pos_t *data, size_t size) +{ + int16x8_t *p = (int16x8_t *)data; + int16x8_t v = vdupq_n_s16((u16)-MATCHFINDER_WINDOW_SIZE); + + STATIC_ASSERT(MATCHFINDER_MEM_ALIGNMENT % sizeof(*p) == 0); + STATIC_ASSERT(MATCHFINDER_SIZE_ALIGNMENT % (4 * sizeof(*p)) == 0); + STATIC_ASSERT(sizeof(mf_pos_t) == 2); + + do { + p[0] = vqaddq_s16(p[0], v); + p[1] = vqaddq_s16(p[1], v); + p[2] = vqaddq_s16(p[2], v); + p[3] = vqaddq_s16(p[3], v); + p += 4; + size -= 4 * sizeof(*p); + } while (size != 0); +} +#define matchfinder_rebase matchfinder_rebase_neon + +#endif /* HAVE_NEON_NATIVE */ + +#endif /* LIB_ARM_MATCHFINDER_IMPL_H */ diff --git a/tools/z64compress/src/enc/libdeflate/lib/bt_matchfinder.h b/tools/z64compress/src/enc/libdeflate/lib/bt_matchfinder.h new file mode 100644 index 000000000..b247d4bcc --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/lib/bt_matchfinder.h @@ -0,0 +1,342 @@ +/* + * bt_matchfinder.h - Lempel-Ziv matchfinding with a hash table of binary trees + * + * Copyright 2016 Eric Biggers + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * ---------------------------------------------------------------------------- + * + * This is a Binary Trees (bt) based matchfinder. + * + * The main data structure is a hash table where each hash bucket contains a + * binary tree of sequences whose first 4 bytes share the same hash code. Each + * sequence is identified by its starting position in the input buffer. Each + * binary tree is always sorted such that each left child represents a sequence + * lexicographically lesser than its parent and each right child represents a + * sequence lexicographically greater than its parent. + * + * The algorithm processes the input buffer sequentially. At each byte + * position, the hash code of the first 4 bytes of the sequence beginning at + * that position (the sequence being matched against) is computed. This + * identifies the hash bucket to use for that position. Then, a new binary tree + * node is created to represent the current sequence. Then, in a single tree + * traversal, the hash bucket's binary tree is searched for matches and is + * re-rooted at the new node. + * + * Compared to the simpler algorithm that uses linked lists instead of binary + * trees (see hc_matchfinder.h), the binary tree version gains more information + * at each node visitation. Ideally, the binary tree version will examine only + * 'log(n)' nodes to find the same matches that the linked list version will + * find by examining 'n' nodes. In addition, the binary tree version can + * examine fewer bytes at each node by taking advantage of the common prefixes + * that result from the sort order, whereas the linked list version may have to + * examine up to the full length of the match at each node. + * + * However, it is not always best to use the binary tree version. It requires + * nearly twice as much memory as the linked list version, and it takes time to + * keep the binary trees sorted, even at positions where the compressor does not + * need matches. Generally, when doing fast compression on small buffers, + * binary trees are the wrong approach. They are best suited for thorough + * compression and/or large buffers. + * + * ---------------------------------------------------------------------------- + */ + +#ifndef LIB_BT_MATCHFINDER_H +#define LIB_BT_MATCHFINDER_H + +#include "matchfinder_common.h" + +#define BT_MATCHFINDER_HASH3_ORDER 16 +#define BT_MATCHFINDER_HASH3_WAYS 2 +#define BT_MATCHFINDER_HASH4_ORDER 16 + +#define BT_MATCHFINDER_TOTAL_HASH_SIZE \ + (((1UL << BT_MATCHFINDER_HASH3_ORDER) * BT_MATCHFINDER_HASH3_WAYS + \ + (1UL << BT_MATCHFINDER_HASH4_ORDER)) * sizeof(mf_pos_t)) + +/* Representation of a match found by the bt_matchfinder */ +struct lz_match { + + /* The number of bytes matched. */ + u16 length; + + /* The offset back from the current position that was matched. */ + u16 offset; +}; + +struct MATCHFINDER_ALIGNED bt_matchfinder { + + /* The hash table for finding length 3 matches */ + mf_pos_t hash3_tab[1UL << BT_MATCHFINDER_HASH3_ORDER][BT_MATCHFINDER_HASH3_WAYS]; + + /* The hash table which contains the roots of the binary trees for + * finding length 4+ matches */ + mf_pos_t hash4_tab[1UL << BT_MATCHFINDER_HASH4_ORDER]; + + /* The child node references for the binary trees. The left and right + * children of the node for the sequence with position 'pos' are + * 'child_tab[pos * 2]' and 'child_tab[pos * 2 + 1]', respectively. */ + mf_pos_t child_tab[2UL * MATCHFINDER_WINDOW_SIZE]; +}; + +/* Prepare the matchfinder for a new input buffer. */ +static forceinline void +bt_matchfinder_init(struct bt_matchfinder *mf) +{ + STATIC_ASSERT(BT_MATCHFINDER_TOTAL_HASH_SIZE % + MATCHFINDER_SIZE_ALIGNMENT == 0); + + matchfinder_init((mf_pos_t *)mf, BT_MATCHFINDER_TOTAL_HASH_SIZE); +} + +static forceinline void +bt_matchfinder_slide_window(struct bt_matchfinder *mf) +{ + STATIC_ASSERT(sizeof(*mf) % MATCHFINDER_SIZE_ALIGNMENT == 0); + + matchfinder_rebase((mf_pos_t *)mf, sizeof(*mf)); +} + +static forceinline mf_pos_t * +bt_left_child(struct bt_matchfinder *mf, s32 node) +{ + return &mf->child_tab[2 * (node & (MATCHFINDER_WINDOW_SIZE - 1)) + 0]; +} + +static forceinline mf_pos_t * +bt_right_child(struct bt_matchfinder *mf, s32 node) +{ + return &mf->child_tab[2 * (node & (MATCHFINDER_WINDOW_SIZE - 1)) + 1]; +} + +/* The minimum permissible value of 'max_len' for bt_matchfinder_get_matches() + * and bt_matchfinder_skip_byte(). There must be sufficiently many bytes + * remaining to load a 32-bit integer from the *next* position. */ +#define BT_MATCHFINDER_REQUIRED_NBYTES 5 + +/* Advance the binary tree matchfinder by one byte, optionally recording + * matches. @record_matches should be a compile-time constant. */ +static forceinline struct lz_match * +bt_matchfinder_advance_one_byte(struct bt_matchfinder * const mf, + const u8 * const in_base, + const ptrdiff_t cur_pos, + const u32 max_len, + const u32 nice_len, + const u32 max_search_depth, + u32 * const next_hashes, + struct lz_match *lz_matchptr, + const bool record_matches) +{ + const u8 *in_next = in_base + cur_pos; + u32 depth_remaining = max_search_depth; + const s32 cutoff = cur_pos - MATCHFINDER_WINDOW_SIZE; + u32 next_hashseq; + u32 hash3; + u32 hash4; + s32 cur_node; +#if BT_MATCHFINDER_HASH3_WAYS >= 2 + s32 cur_node_2; +#endif + const u8 *matchptr; + mf_pos_t *pending_lt_ptr, *pending_gt_ptr; + u32 best_lt_len, best_gt_len; + u32 len; + u32 best_len = 3; + + STATIC_ASSERT(BT_MATCHFINDER_HASH3_WAYS >= 1 && + BT_MATCHFINDER_HASH3_WAYS <= 2); + + next_hashseq = get_unaligned_le32(in_next + 1); + + hash3 = next_hashes[0]; + hash4 = next_hashes[1]; + + next_hashes[0] = lz_hash(next_hashseq & 0xFFFFFF, BT_MATCHFINDER_HASH3_ORDER); + next_hashes[1] = lz_hash(next_hashseq, BT_MATCHFINDER_HASH4_ORDER); + prefetchw(&mf->hash3_tab[next_hashes[0]]); + prefetchw(&mf->hash4_tab[next_hashes[1]]); + + cur_node = mf->hash3_tab[hash3][0]; + mf->hash3_tab[hash3][0] = cur_pos; +#if BT_MATCHFINDER_HASH3_WAYS >= 2 + cur_node_2 = mf->hash3_tab[hash3][1]; + mf->hash3_tab[hash3][1] = cur_node; +#endif + if (record_matches && cur_node > cutoff) { + u32 seq3 = load_u24_unaligned(in_next); + if (seq3 == load_u24_unaligned(&in_base[cur_node])) { + lz_matchptr->length = 3; + lz_matchptr->offset = in_next - &in_base[cur_node]; + lz_matchptr++; + } + #if BT_MATCHFINDER_HASH3_WAYS >= 2 + else if (cur_node_2 > cutoff && + seq3 == load_u24_unaligned(&in_base[cur_node_2])) + { + lz_matchptr->length = 3; + lz_matchptr->offset = in_next - &in_base[cur_node_2]; + lz_matchptr++; + } + #endif + } + + cur_node = mf->hash4_tab[hash4]; + mf->hash4_tab[hash4] = cur_pos; + + pending_lt_ptr = bt_left_child(mf, cur_pos); + pending_gt_ptr = bt_right_child(mf, cur_pos); + + if (cur_node <= cutoff) { + *pending_lt_ptr = MATCHFINDER_INITVAL; + *pending_gt_ptr = MATCHFINDER_INITVAL; + return lz_matchptr; + } + + best_lt_len = 0; + best_gt_len = 0; + len = 0; + + for (;;) { + matchptr = &in_base[cur_node]; + + if (matchptr[len] == in_next[len]) { + len = lz_extend(in_next, matchptr, len + 1, max_len); + if (!record_matches || len > best_len) { + if (record_matches) { + best_len = len; + lz_matchptr->length = len; + lz_matchptr->offset = in_next - matchptr; + lz_matchptr++; + } + if (len >= nice_len) { + *pending_lt_ptr = *bt_left_child(mf, cur_node); + *pending_gt_ptr = *bt_right_child(mf, cur_node); + return lz_matchptr; + } + } + } + + if (matchptr[len] < in_next[len]) { + *pending_lt_ptr = cur_node; + pending_lt_ptr = bt_right_child(mf, cur_node); + cur_node = *pending_lt_ptr; + best_lt_len = len; + if (best_gt_len < len) + len = best_gt_len; + } else { + *pending_gt_ptr = cur_node; + pending_gt_ptr = bt_left_child(mf, cur_node); + cur_node = *pending_gt_ptr; + best_gt_len = len; + if (best_lt_len < len) + len = best_lt_len; + } + + if (cur_node <= cutoff || !--depth_remaining) { + *pending_lt_ptr = MATCHFINDER_INITVAL; + *pending_gt_ptr = MATCHFINDER_INITVAL; + return lz_matchptr; + } + } +} + +/* + * Retrieve a list of matches with the current position. + * + * @mf + * The matchfinder structure. + * @in_base + * Pointer to the next byte in the input buffer to process _at the last + * time bt_matchfinder_init() or bt_matchfinder_slide_window() was called_. + * @cur_pos + * The current position in the input buffer relative to @in_base (the + * position of the sequence being matched against). + * @max_len + * The maximum permissible match length at this position. Must be >= + * BT_MATCHFINDER_REQUIRED_NBYTES. + * @nice_len + * Stop searching if a match of at least this length is found. + * Must be <= @max_len. + * @max_search_depth + * Limit on the number of potential matches to consider. Must be >= 1. + * @next_hashes + * The precomputed hash codes for the sequence beginning at @in_next. + * These will be used and then updated with the precomputed hashcodes for + * the sequence beginning at @in_next + 1. + * @lz_matchptr + * An array in which this function will record the matches. The recorded + * matches will be sorted by strictly increasing length and (non-strictly) + * increasing offset. The maximum number of matches that may be found is + * 'nice_len - 2'. + * + * The return value is a pointer to the next available slot in the @lz_matchptr + * array. (If no matches were found, this will be the same as @lz_matchptr.) + */ +static forceinline struct lz_match * +bt_matchfinder_get_matches(struct bt_matchfinder *mf, + const u8 *in_base, + ptrdiff_t cur_pos, + u32 max_len, + u32 nice_len, + u32 max_search_depth, + u32 next_hashes[2], + struct lz_match *lz_matchptr) +{ + return bt_matchfinder_advance_one_byte(mf, + in_base, + cur_pos, + max_len, + nice_len, + max_search_depth, + next_hashes, + lz_matchptr, + true); +} + +/* + * Advance the matchfinder, but don't record any matches. + * + * This is very similar to bt_matchfinder_get_matches() because both functions + * must do hashing and tree re-rooting. + */ +static forceinline void +bt_matchfinder_skip_byte(struct bt_matchfinder *mf, + const u8 *in_base, + ptrdiff_t cur_pos, + u32 nice_len, + u32 max_search_depth, + u32 next_hashes[2]) +{ + bt_matchfinder_advance_one_byte(mf, + in_base, + cur_pos, + nice_len, + nice_len, + max_search_depth, + next_hashes, + NULL, + false); +} + +#endif /* LIB_BT_MATCHFINDER_H */ diff --git a/tools/z64compress/src/enc/libdeflate/lib/cpu_features_common.h b/tools/z64compress/src/enc/libdeflate/lib/cpu_features_common.h new file mode 100644 index 000000000..bfcaa3637 --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/lib/cpu_features_common.h @@ -0,0 +1,91 @@ +/* + * cpu_features_common.h - code shared by all lib/$arch/cpu_features.c + * + * Copyright 2020 Eric Biggers + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef LIB_CPU_FEATURES_COMMON_H +#define LIB_CPU_FEATURES_COMMON_H + +#if defined(TEST_SUPPORT__DO_NOT_USE) && !defined(FREESTANDING) +# undef _ANSI_SOURCE /* for strdup() and strtok_r() */ +# ifndef __APPLE__ +# define _GNU_SOURCE 1 +# endif +# include +# include +# include +#endif + +#include "lib_common.h" + +struct cpu_feature { + u32 bit; + const char *name; +}; + +#if defined(TEST_SUPPORT__DO_NOT_USE) && !defined(FREESTANDING) +/* Disable any features that are listed in $LIBDEFLATE_DISABLE_CPU_FEATURES. */ +static inline void +disable_cpu_features_for_testing(u32 *features, + const struct cpu_feature *feature_table, + size_t feature_table_length) +{ + char *env_value, *strbuf, *p, *saveptr = NULL; + size_t i; + + env_value = getenv("LIBDEFLATE_DISABLE_CPU_FEATURES"); + if (!env_value) + return; + strbuf = strdup(env_value); + if (!strbuf) + abort(); + p = strtok_r(strbuf, ",", &saveptr); + while (p) { + for (i = 0; i < feature_table_length; i++) { + if (strcmp(p, feature_table[i].name) == 0) { + *features &= ~feature_table[i].bit; + break; + } + } + if (i == feature_table_length) { + fprintf(stderr, + "unrecognized feature in LIBDEFLATE_DISABLE_CPU_FEATURES: \"%s\"\n", + p); + abort(); + } + p = strtok_r(NULL, ",", &saveptr); + } + free(strbuf); +} +#else /* TEST_SUPPORT__DO_NOT_USE */ +static inline void +disable_cpu_features_for_testing(u32 *features, + const struct cpu_feature *feature_table, + size_t feature_table_length) +{ +} +#endif /* !TEST_SUPPORT__DO_NOT_USE */ + +#endif /* LIB_CPU_FEATURES_COMMON_H */ diff --git a/tools/z64compress/src/enc/libdeflate/lib/crc32.c b/tools/z64compress/src/enc/libdeflate/lib/crc32.c new file mode 100644 index 000000000..61c2cc763 --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/lib/crc32.c @@ -0,0 +1,263 @@ +/* + * crc32.c - CRC-32 checksum algorithm for the gzip format + * + * Copyright 2016 Eric Biggers + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * High-level description of CRC + * ============================= + * + * Consider a bit sequence 'bits[1...len]'. Interpret 'bits' as the "message" + * polynomial M(x) with coefficients in GF(2) (the field of integers modulo 2), + * where the coefficient of 'x^i' is 'bits[len - i]'. Then, compute: + * + * R(x) = M(x)*x^n mod G(x) + * + * where G(x) is a selected "generator" polynomial of degree 'n'. The remainder + * R(x) is a polynomial of max degree 'n - 1'. The CRC of 'bits' is R(x) + * interpreted as a bitstring of length 'n'. + * + * CRC used in gzip + * ================ + * + * In the gzip format (RFC 1952): + * + * - The bitstring to checksum is formed from the bytes of the uncompressed + * data by concatenating the bits from the bytes in order, proceeding + * from the low-order bit to the high-order bit within each byte. + * + * - The generator polynomial G(x) is: x^32 + x^26 + x^23 + x^22 + x^16 + + * x^12 + x^11 + x^10 + x^8 + x^7 + x^5 + x^4 + x^2 + x + 1. + * Consequently, the CRC length is 32 bits ("CRC-32"). + * + * - The highest order 32 coefficients of M(x)*x^n are inverted. + * + * - All 32 coefficients of R(x) are inverted. + * + * The two inversions cause added leading and trailing zero bits to affect the + * resulting CRC, whereas with a regular CRC such bits would have no effect on + * the CRC. + * + * Computation and optimizations + * ============================= + * + * We can compute R(x) through "long division", maintaining only 32 bits of + * state at any given time. Multiplication by 'x' can be implemented as + * right-shifting by 1 (assuming the polynomial<=>bitstring mapping where the + * highest order bit represents the coefficient of x^0), and both addition and + * subtraction can be implemented as bitwise exclusive OR (since we are working + * in GF(2)). Here is an unoptimized implementation: + * + * static u32 crc32_gzip(const u8 *p, size_t len) + * { + * u32 crc = 0; + * const u32 divisor = 0xEDB88320; + * + * for (size_t i = 0; i < len * 8 + 32; i++) { + * int bit; + * u32 multiple; + * + * if (i < len * 8) + * bit = (p[i / 8] >> (i % 8)) & 1; + * else + * bit = 0; // one of the 32 appended 0 bits + * + * if (i < 32) // the first 32 bits are inverted + * bit ^= 1; + * + * if (crc & 1) + * multiple = divisor; + * else + * multiple = 0; + * + * crc >>= 1; + * crc |= (u32)bit << 31; + * crc ^= multiple; + * } + * + * return ~crc; + * } + * + * In this implementation, the 32-bit integer 'crc' maintains the remainder of + * the currently processed portion of the message (with 32 zero bits appended) + * when divided by the generator polynomial. 'crc' is the representation of + * R(x), and 'divisor' is the representation of G(x) excluding the x^32 + * coefficient. For each bit to process, we multiply R(x) by 'x^1', then add + * 'x^0' if the new bit is a 1. If this causes R(x) to gain a nonzero x^32 + * term, then we subtract G(x) from R(x). + * + * We can speed this up by taking advantage of the fact that XOR is commutative + * and associative, so the order in which we combine the inputs into 'crc' is + * unimportant. And since each message bit we add doesn't affect the choice of + * 'multiple' until 32 bits later, we need not actually add each message bit + * until that point: + * + * static u32 crc32_gzip(const u8 *p, size_t len) + * { + * u32 crc = ~0; + * const u32 divisor = 0xEDB88320; + * + * for (size_t i = 0; i < len * 8; i++) { + * int bit; + * u32 multiple; + * + * bit = (p[i / 8] >> (i % 8)) & 1; + * crc ^= bit; + * if (crc & 1) + * multiple = divisor; + * else + * multiple = 0; + * crc >>= 1; + * crc ^= multiple; + * } + * + * return ~crc; + * } + * + * With the above implementation we get the effect of 32 appended 0 bits for + * free; they never affect the choice of a divisor, nor would they change the + * value of 'crc' if they were to be actually XOR'ed in. And by starting with a + * remainder of all 1 bits, we get the effect of complementing the first 32 + * message bits. + * + * The next optimization is to process the input in multi-bit units. Suppose + * that we insert the next 'n' message bits into the remainder. Then we get an + * intermediate remainder of length '32 + n' bits, and the CRC of the extra 'n' + * bits is the amount by which the low 32 bits of the remainder will change as a + * result of cancelling out those 'n' bits. Taking n=8 (one byte) and + * precomputing a table containing the CRC of each possible byte, we get + * crc32_slice1() defined below. + * + * As a further optimization, we could increase the multi-bit unit size to 16. + * However, that is inefficient because the table size explodes from 256 entries + * (1024 bytes) to 65536 entries (262144 bytes), which wastes memory and won't + * fit in L1 cache on typical processors. + * + * However, we can actually process 4 bytes at a time using 4 different tables + * with 256 entries each. Logically, we form a 64-bit intermediate remainder + * and cancel out the high 32 bits in 8-bit chunks. Bits 32-39 are cancelled + * out by the CRC of those bits, whereas bits 40-47 are be cancelled out by the + * CRC of those bits with 8 zero bits appended, and so on. + * + * In crc32_slice8(), this method is extended to 8 bytes at a time. The + * intermediate remainder (which we never actually store explicitly) is 96 bits. + * + * On CPUs that support fast carryless multiplication, CRCs can be computed even + * more quickly via "folding". See e.g. the x86 PCLMUL implementation. + */ + +#include "lib_common.h" +#include "libdeflate.h" +#include "crc32_multipliers.h" +#include "crc32_tables.h" + +/* This is the default implementation. It uses the slice-by-8 method. */ +static u32 MAYBE_UNUSED +crc32_slice8(u32 crc, const u8 *p, size_t len) +{ + const u8 * const end = p + len; + const u8 *end64; + + for (; ((uintptr_t)p & 7) && p != end; p++) + crc = (crc >> 8) ^ crc32_slice8_table[(u8)crc ^ *p]; + + end64 = p + ((end - p) & ~7); + for (; p != end64; p += 8) { + u32 v1 = le32_bswap(*(const u32 *)(p + 0)); + u32 v2 = le32_bswap(*(const u32 *)(p + 4)); + + crc = crc32_slice8_table[0x700 + (u8)((crc ^ v1) >> 0)] ^ + crc32_slice8_table[0x600 + (u8)((crc ^ v1) >> 8)] ^ + crc32_slice8_table[0x500 + (u8)((crc ^ v1) >> 16)] ^ + crc32_slice8_table[0x400 + (u8)((crc ^ v1) >> 24)] ^ + crc32_slice8_table[0x300 + (u8)(v2 >> 0)] ^ + crc32_slice8_table[0x200 + (u8)(v2 >> 8)] ^ + crc32_slice8_table[0x100 + (u8)(v2 >> 16)] ^ + crc32_slice8_table[0x000 + (u8)(v2 >> 24)]; + } + + for (; p != end; p++) + crc = (crc >> 8) ^ crc32_slice8_table[(u8)crc ^ *p]; + + return crc; +} + +/* + * This is a more lightweight generic implementation, which can be used as a + * subroutine by architecture-specific implementations to process small amounts + * of unaligned data at the beginning and/or end of the buffer. + */ +static forceinline u32 MAYBE_UNUSED +crc32_slice1(u32 crc, const u8 *p, size_t len) +{ + size_t i; + + for (i = 0; i < len; i++) + crc = (crc >> 8) ^ crc32_slice1_table[(u8)crc ^ p[i]]; + return crc; +} + +/* Include architecture-specific implementation(s) if available. */ +#undef DEFAULT_IMPL +#undef arch_select_crc32_func +typedef u32 (*crc32_func_t)(u32 crc, const u8 *p, size_t len); +#if defined(ARCH_ARM32) || defined(ARCH_ARM64) +# include "arm/crc32_impl.h" +#elif defined(ARCH_X86_32) || defined(ARCH_X86_64) +# include "x86/crc32_impl.h" +#endif + +#ifndef DEFAULT_IMPL +# define DEFAULT_IMPL crc32_slice8 +#endif + +#ifdef arch_select_crc32_func +static u32 dispatch_crc32(u32 crc, const u8 *p, size_t len); + +static volatile crc32_func_t crc32_impl = dispatch_crc32; + +/* Choose the best implementation at runtime. */ +static u32 dispatch_crc32(u32 crc, const u8 *p, size_t len) +{ + crc32_func_t f = arch_select_crc32_func(); + + if (f == NULL) + f = DEFAULT_IMPL; + + crc32_impl = f; + return f(crc, p, len); +} +#else +/* The best implementation is statically known, so call it directly. */ +#define crc32_impl DEFAULT_IMPL +#endif + +LIBDEFLATEAPI u32 +libdeflate_crc32(u32 crc, const void *p, size_t len) +{ + if (p == NULL) /* Return initial value. */ + return 0; + return ~crc32_impl(~crc, p, len); +} diff --git a/tools/z64compress/src/enc/libdeflate/lib/crc32_multipliers.h b/tools/z64compress/src/enc/libdeflate/lib/crc32_multipliers.h new file mode 100644 index 000000000..580b775bd --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/lib/crc32_multipliers.h @@ -0,0 +1,329 @@ +/* + * crc32_multipliers.h - constants for CRC-32 folding + * + * THIS FILE WAS GENERATED BY gen_crc32_multipliers.c. DO NOT EDIT. + */ + +#define CRC32_1VECS_MULT_1 0xae689191 /* x^159 mod G(x) */ +#define CRC32_1VECS_MULT_2 0xccaa009e /* x^95 mod G(x) */ +#define CRC32_1VECS_MULTS { CRC32_1VECS_MULT_1, CRC32_1VECS_MULT_2 } + +#define CRC32_2VECS_MULT_1 0xf1da05aa /* x^287 mod G(x) */ +#define CRC32_2VECS_MULT_2 0x81256527 /* x^223 mod G(x) */ +#define CRC32_2VECS_MULTS { CRC32_2VECS_MULT_1, CRC32_2VECS_MULT_2 } + +#define CRC32_3VECS_MULT_1 0x3db1ecdc /* x^415 mod G(x) */ +#define CRC32_3VECS_MULT_2 0xaf449247 /* x^351 mod G(x) */ +#define CRC32_3VECS_MULTS { CRC32_3VECS_MULT_1, CRC32_3VECS_MULT_2 } + +#define CRC32_4VECS_MULT_1 0x8f352d95 /* x^543 mod G(x) */ +#define CRC32_4VECS_MULT_2 0x1d9513d7 /* x^479 mod G(x) */ +#define CRC32_4VECS_MULTS { CRC32_4VECS_MULT_1, CRC32_4VECS_MULT_2 } + +#define CRC32_5VECS_MULT_1 0x1c279815 /* x^671 mod G(x) */ +#define CRC32_5VECS_MULT_2 0xae0b5394 /* x^607 mod G(x) */ +#define CRC32_5VECS_MULTS { CRC32_5VECS_MULT_1, CRC32_5VECS_MULT_2 } + +#define CRC32_6VECS_MULT_1 0xdf068dc2 /* x^799 mod G(x) */ +#define CRC32_6VECS_MULT_2 0x57c54819 /* x^735 mod G(x) */ +#define CRC32_6VECS_MULTS { CRC32_6VECS_MULT_1, CRC32_6VECS_MULT_2 } + +#define CRC32_7VECS_MULT_1 0x31f8303f /* x^927 mod G(x) */ +#define CRC32_7VECS_MULT_2 0x0cbec0ed /* x^863 mod G(x) */ +#define CRC32_7VECS_MULTS { CRC32_7VECS_MULT_1, CRC32_7VECS_MULT_2 } + +#define CRC32_8VECS_MULT_1 0x33fff533 /* x^1055 mod G(x) */ +#define CRC32_8VECS_MULT_2 0x910eeec1 /* x^991 mod G(x) */ +#define CRC32_8VECS_MULTS { CRC32_8VECS_MULT_1, CRC32_8VECS_MULT_2 } + +#define CRC32_9VECS_MULT_1 0x26b70c3d /* x^1183 mod G(x) */ +#define CRC32_9VECS_MULT_2 0x3f41287a /* x^1119 mod G(x) */ +#define CRC32_9VECS_MULTS { CRC32_9VECS_MULT_1, CRC32_9VECS_MULT_2 } + +#define CRC32_10VECS_MULT_1 0xe3543be0 /* x^1311 mod G(x) */ +#define CRC32_10VECS_MULT_2 0x9026d5b1 /* x^1247 mod G(x) */ +#define CRC32_10VECS_MULTS { CRC32_10VECS_MULT_1, CRC32_10VECS_MULT_2 } + +#define CRC32_11VECS_MULT_1 0x5a1bb05d /* x^1439 mod G(x) */ +#define CRC32_11VECS_MULT_2 0xd1df2327 /* x^1375 mod G(x) */ +#define CRC32_11VECS_MULTS { CRC32_11VECS_MULT_1, CRC32_11VECS_MULT_2 } + +#define CRC32_12VECS_MULT_1 0x596c8d81 /* x^1567 mod G(x) */ +#define CRC32_12VECS_MULT_2 0xf5e48c85 /* x^1503 mod G(x) */ +#define CRC32_12VECS_MULTS { CRC32_12VECS_MULT_1, CRC32_12VECS_MULT_2 } + +#define CRC32_FINAL_MULT 0xb8bc6765 /* x^63 mod G(x) */ +#define CRC32_BARRETT_CONSTANT_1 0x00000001f7011641ULL /* floor(x^64 / G(x)) */ +#define CRC32_BARRETT_CONSTANT_2 0x00000001db710641ULL /* G(x) */ +#define CRC32_BARRETT_CONSTANTS { CRC32_BARRETT_CONSTANT_1, CRC32_BARRETT_CONSTANT_2 } + +#define CRC32_NUM_CHUNKS 4 +#define CRC32_MIN_VARIABLE_CHUNK_LEN 128UL +#define CRC32_MAX_VARIABLE_CHUNK_LEN 16384UL + +/* Multipliers for implementations that use a variable chunk length */ +static const u32 crc32_mults_for_chunklen[][CRC32_NUM_CHUNKS - 1] MAYBE_UNUSED = { + { 0 /* unused row */ }, + /* chunk_len=128 */ + { 0xd31343ea /* x^3039 mod G(x) */, 0xe95c1271 /* x^2015 mod G(x) */, 0x910eeec1 /* x^991 mod G(x) */, }, + /* chunk_len=256 */ + { 0x1d6708a0 /* x^6111 mod G(x) */, 0x0c30f51d /* x^4063 mod G(x) */, 0xe95c1271 /* x^2015 mod G(x) */, }, + /* chunk_len=384 */ + { 0xdb3839f3 /* x^9183 mod G(x) */, 0x1d6708a0 /* x^6111 mod G(x) */, 0xd31343ea /* x^3039 mod G(x) */, }, + /* chunk_len=512 */ + { 0x1753ab84 /* x^12255 mod G(x) */, 0xbbf2f6d6 /* x^8159 mod G(x) */, 0x0c30f51d /* x^4063 mod G(x) */, }, + /* chunk_len=640 */ + { 0x3796455c /* x^15327 mod G(x) */, 0xb8e0e4a8 /* x^10207 mod G(x) */, 0xc352f6de /* x^5087 mod G(x) */, }, + /* chunk_len=768 */ + { 0x3954de39 /* x^18399 mod G(x) */, 0x1753ab84 /* x^12255 mod G(x) */, 0x1d6708a0 /* x^6111 mod G(x) */, }, + /* chunk_len=896 */ + { 0x632d78c5 /* x^21471 mod G(x) */, 0x3fc33de4 /* x^14303 mod G(x) */, 0x9a1b53c8 /* x^7135 mod G(x) */, }, + /* chunk_len=1024 */ + { 0xa0decef3 /* x^24543 mod G(x) */, 0x7b4aa8b7 /* x^16351 mod G(x) */, 0xbbf2f6d6 /* x^8159 mod G(x) */, }, + /* chunk_len=1152 */ + { 0xe9c09bb0 /* x^27615 mod G(x) */, 0x3954de39 /* x^18399 mod G(x) */, 0xdb3839f3 /* x^9183 mod G(x) */, }, + /* chunk_len=1280 */ + { 0xd51917a4 /* x^30687 mod G(x) */, 0xcae68461 /* x^20447 mod G(x) */, 0xb8e0e4a8 /* x^10207 mod G(x) */, }, + /* chunk_len=1408 */ + { 0x154a8a62 /* x^33759 mod G(x) */, 0x41e7589c /* x^22495 mod G(x) */, 0x3e9a43cd /* x^11231 mod G(x) */, }, + /* chunk_len=1536 */ + { 0xf196555d /* x^36831 mod G(x) */, 0xa0decef3 /* x^24543 mod G(x) */, 0x1753ab84 /* x^12255 mod G(x) */, }, + /* chunk_len=1664 */ + { 0x8eec2999 /* x^39903 mod G(x) */, 0xefb0a128 /* x^26591 mod G(x) */, 0x6044fbb0 /* x^13279 mod G(x) */, }, + /* chunk_len=1792 */ + { 0x27892abf /* x^42975 mod G(x) */, 0x48d72bb1 /* x^28639 mod G(x) */, 0x3fc33de4 /* x^14303 mod G(x) */, }, + /* chunk_len=1920 */ + { 0x77bc2419 /* x^46047 mod G(x) */, 0xd51917a4 /* x^30687 mod G(x) */, 0x3796455c /* x^15327 mod G(x) */, }, + /* chunk_len=2048 */ + { 0xcea114a5 /* x^49119 mod G(x) */, 0x68c0a2c5 /* x^32735 mod G(x) */, 0x7b4aa8b7 /* x^16351 mod G(x) */, }, + /* chunk_len=2176 */ + { 0xa1077e85 /* x^52191 mod G(x) */, 0x188cc628 /* x^34783 mod G(x) */, 0x0c21f835 /* x^17375 mod G(x) */, }, + /* chunk_len=2304 */ + { 0xc5ed75e1 /* x^55263 mod G(x) */, 0xf196555d /* x^36831 mod G(x) */, 0x3954de39 /* x^18399 mod G(x) */, }, + /* chunk_len=2432 */ + { 0xca4fba3f /* x^58335 mod G(x) */, 0x0acfa26f /* x^38879 mod G(x) */, 0x6cb21510 /* x^19423 mod G(x) */, }, + /* chunk_len=2560 */ + { 0xcf5bcdc4 /* x^61407 mod G(x) */, 0x4fae7fc0 /* x^40927 mod G(x) */, 0xcae68461 /* x^20447 mod G(x) */, }, + /* chunk_len=2688 */ + { 0xf36b9d16 /* x^64479 mod G(x) */, 0x27892abf /* x^42975 mod G(x) */, 0x632d78c5 /* x^21471 mod G(x) */, }, + /* chunk_len=2816 */ + { 0xf76fd988 /* x^67551 mod G(x) */, 0xed5c39b1 /* x^45023 mod G(x) */, 0x41e7589c /* x^22495 mod G(x) */, }, + /* chunk_len=2944 */ + { 0x6c45d92e /* x^70623 mod G(x) */, 0xff809fcd /* x^47071 mod G(x) */, 0x0c46baec /* x^23519 mod G(x) */, }, + /* chunk_len=3072 */ + { 0x6116b82b /* x^73695 mod G(x) */, 0xcea114a5 /* x^49119 mod G(x) */, 0xa0decef3 /* x^24543 mod G(x) */, }, + /* chunk_len=3200 */ + { 0x4d9899bb /* x^76767 mod G(x) */, 0x9f9d8d9c /* x^51167 mod G(x) */, 0x53deb236 /* x^25567 mod G(x) */, }, + /* chunk_len=3328 */ + { 0x3e7c93b9 /* x^79839 mod G(x) */, 0x6666b805 /* x^53215 mod G(x) */, 0xefb0a128 /* x^26591 mod G(x) */, }, + /* chunk_len=3456 */ + { 0x388b20ac /* x^82911 mod G(x) */, 0xc5ed75e1 /* x^55263 mod G(x) */, 0xe9c09bb0 /* x^27615 mod G(x) */, }, + /* chunk_len=3584 */ + { 0x0956d953 /* x^85983 mod G(x) */, 0x97fbdb14 /* x^57311 mod G(x) */, 0x48d72bb1 /* x^28639 mod G(x) */, }, + /* chunk_len=3712 */ + { 0x55cb4dfe /* x^89055 mod G(x) */, 0x1b37c832 /* x^59359 mod G(x) */, 0xc07331b3 /* x^29663 mod G(x) */, }, + /* chunk_len=3840 */ + { 0x52222fea /* x^92127 mod G(x) */, 0xcf5bcdc4 /* x^61407 mod G(x) */, 0xd51917a4 /* x^30687 mod G(x) */, }, + /* chunk_len=3968 */ + { 0x0603989b /* x^95199 mod G(x) */, 0xb03c8112 /* x^63455 mod G(x) */, 0x5e04b9a5 /* x^31711 mod G(x) */, }, + /* chunk_len=4096 */ + { 0x4470c029 /* x^98271 mod G(x) */, 0x2339d155 /* x^65503 mod G(x) */, 0x68c0a2c5 /* x^32735 mod G(x) */, }, + /* chunk_len=4224 */ + { 0xb6f35093 /* x^101343 mod G(x) */, 0xf76fd988 /* x^67551 mod G(x) */, 0x154a8a62 /* x^33759 mod G(x) */, }, + /* chunk_len=4352 */ + { 0xc46805ba /* x^104415 mod G(x) */, 0x416f9449 /* x^69599 mod G(x) */, 0x188cc628 /* x^34783 mod G(x) */, }, + /* chunk_len=4480 */ + { 0xc3876592 /* x^107487 mod G(x) */, 0x4b809189 /* x^71647 mod G(x) */, 0xc35cf6e7 /* x^35807 mod G(x) */, }, + /* chunk_len=4608 */ + { 0x5b0c98b9 /* x^110559 mod G(x) */, 0x6116b82b /* x^73695 mod G(x) */, 0xf196555d /* x^36831 mod G(x) */, }, + /* chunk_len=4736 */ + { 0x30d13e5f /* x^113631 mod G(x) */, 0x4c5a315a /* x^75743 mod G(x) */, 0x8c224466 /* x^37855 mod G(x) */, }, + /* chunk_len=4864 */ + { 0x54afca53 /* x^116703 mod G(x) */, 0xbccfa2c1 /* x^77791 mod G(x) */, 0x0acfa26f /* x^38879 mod G(x) */, }, + /* chunk_len=4992 */ + { 0x93102436 /* x^119775 mod G(x) */, 0x3e7c93b9 /* x^79839 mod G(x) */, 0x8eec2999 /* x^39903 mod G(x) */, }, + /* chunk_len=5120 */ + { 0xbd2655a8 /* x^122847 mod G(x) */, 0x3e116c9d /* x^81887 mod G(x) */, 0x4fae7fc0 /* x^40927 mod G(x) */, }, + /* chunk_len=5248 */ + { 0x70cd7f26 /* x^125919 mod G(x) */, 0x408e57f2 /* x^83935 mod G(x) */, 0x1691be45 /* x^41951 mod G(x) */, }, + /* chunk_len=5376 */ + { 0x2d546c53 /* x^128991 mod G(x) */, 0x0956d953 /* x^85983 mod G(x) */, 0x27892abf /* x^42975 mod G(x) */, }, + /* chunk_len=5504 */ + { 0xb53410a8 /* x^132063 mod G(x) */, 0x42ebf0ad /* x^88031 mod G(x) */, 0x161f3c12 /* x^43999 mod G(x) */, }, + /* chunk_len=5632 */ + { 0x67a93f75 /* x^135135 mod G(x) */, 0xcf3233e4 /* x^90079 mod G(x) */, 0xed5c39b1 /* x^45023 mod G(x) */, }, + /* chunk_len=5760 */ + { 0x9830ac33 /* x^138207 mod G(x) */, 0x52222fea /* x^92127 mod G(x) */, 0x77bc2419 /* x^46047 mod G(x) */, }, + /* chunk_len=5888 */ + { 0xb0b6fc3e /* x^141279 mod G(x) */, 0x2fde73f8 /* x^94175 mod G(x) */, 0xff809fcd /* x^47071 mod G(x) */, }, + /* chunk_len=6016 */ + { 0x84170f16 /* x^144351 mod G(x) */, 0xced90d99 /* x^96223 mod G(x) */, 0x30de0f98 /* x^48095 mod G(x) */, }, + /* chunk_len=6144 */ + { 0xd7017a0c /* x^147423 mod G(x) */, 0x4470c029 /* x^98271 mod G(x) */, 0xcea114a5 /* x^49119 mod G(x) */, }, + /* chunk_len=6272 */ + { 0xadb25de6 /* x^150495 mod G(x) */, 0x84f40beb /* x^100319 mod G(x) */, 0x2b7e0e1b /* x^50143 mod G(x) */, }, + /* chunk_len=6400 */ + { 0x8282fddc /* x^153567 mod G(x) */, 0xec855937 /* x^102367 mod G(x) */, 0x9f9d8d9c /* x^51167 mod G(x) */, }, + /* chunk_len=6528 */ + { 0x46362bee /* x^156639 mod G(x) */, 0xc46805ba /* x^104415 mod G(x) */, 0xa1077e85 /* x^52191 mod G(x) */, }, + /* chunk_len=6656 */ + { 0xb9077a01 /* x^159711 mod G(x) */, 0xdf7a24ac /* x^106463 mod G(x) */, 0x6666b805 /* x^53215 mod G(x) */, }, + /* chunk_len=6784 */ + { 0xf51d9bc6 /* x^162783 mod G(x) */, 0x2b52dc39 /* x^108511 mod G(x) */, 0x7e774cf6 /* x^54239 mod G(x) */, }, + /* chunk_len=6912 */ + { 0x4ca19a29 /* x^165855 mod G(x) */, 0x5b0c98b9 /* x^110559 mod G(x) */, 0xc5ed75e1 /* x^55263 mod G(x) */, }, + /* chunk_len=7040 */ + { 0xdc0fc3fc /* x^168927 mod G(x) */, 0xb939fcdf /* x^112607 mod G(x) */, 0x3678fed2 /* x^56287 mod G(x) */, }, + /* chunk_len=7168 */ + { 0x63c3d167 /* x^171999 mod G(x) */, 0x70f9947d /* x^114655 mod G(x) */, 0x97fbdb14 /* x^57311 mod G(x) */, }, + /* chunk_len=7296 */ + { 0x5851d254 /* x^175071 mod G(x) */, 0x54afca53 /* x^116703 mod G(x) */, 0xca4fba3f /* x^58335 mod G(x) */, }, + /* chunk_len=7424 */ + { 0xfeacf2a1 /* x^178143 mod G(x) */, 0x7a3c0a6a /* x^118751 mod G(x) */, 0x1b37c832 /* x^59359 mod G(x) */, }, + /* chunk_len=7552 */ + { 0x93b7edc8 /* x^181215 mod G(x) */, 0x1fea4d2a /* x^120799 mod G(x) */, 0x58fa96ee /* x^60383 mod G(x) */, }, + /* chunk_len=7680 */ + { 0x5539e44a /* x^184287 mod G(x) */, 0xbd2655a8 /* x^122847 mod G(x) */, 0xcf5bcdc4 /* x^61407 mod G(x) */, }, + /* chunk_len=7808 */ + { 0xde32a3d2 /* x^187359 mod G(x) */, 0x4ff61aa1 /* x^124895 mod G(x) */, 0x6a6a3694 /* x^62431 mod G(x) */, }, + /* chunk_len=7936 */ + { 0xf0baeeb6 /* x^190431 mod G(x) */, 0x7ae2f6f4 /* x^126943 mod G(x) */, 0xb03c8112 /* x^63455 mod G(x) */, }, + /* chunk_len=8064 */ + { 0xbe15887f /* x^193503 mod G(x) */, 0x2d546c53 /* x^128991 mod G(x) */, 0xf36b9d16 /* x^64479 mod G(x) */, }, + /* chunk_len=8192 */ + { 0x64f34a05 /* x^196575 mod G(x) */, 0xe0ee5efe /* x^131039 mod G(x) */, 0x2339d155 /* x^65503 mod G(x) */, }, + /* chunk_len=8320 */ + { 0x1b6d1aea /* x^199647 mod G(x) */, 0xfeafb67c /* x^133087 mod G(x) */, 0x4fb001a8 /* x^66527 mod G(x) */, }, + /* chunk_len=8448 */ + { 0x82adb0b8 /* x^202719 mod G(x) */, 0x67a93f75 /* x^135135 mod G(x) */, 0xf76fd988 /* x^67551 mod G(x) */, }, + /* chunk_len=8576 */ + { 0x694587c7 /* x^205791 mod G(x) */, 0x3b34408b /* x^137183 mod G(x) */, 0xeccb2978 /* x^68575 mod G(x) */, }, + /* chunk_len=8704 */ + { 0xd2fc57c3 /* x^208863 mod G(x) */, 0x07fcf8c6 /* x^139231 mod G(x) */, 0x416f9449 /* x^69599 mod G(x) */, }, + /* chunk_len=8832 */ + { 0x9dd6837c /* x^211935 mod G(x) */, 0xb0b6fc3e /* x^141279 mod G(x) */, 0x6c45d92e /* x^70623 mod G(x) */, }, + /* chunk_len=8960 */ + { 0x3a9d1f97 /* x^215007 mod G(x) */, 0xefd033b2 /* x^143327 mod G(x) */, 0x4b809189 /* x^71647 mod G(x) */, }, + /* chunk_len=9088 */ + { 0x1eee1d2a /* x^218079 mod G(x) */, 0xf2a6e46e /* x^145375 mod G(x) */, 0x55b4c814 /* x^72671 mod G(x) */, }, + /* chunk_len=9216 */ + { 0xb57c7728 /* x^221151 mod G(x) */, 0xd7017a0c /* x^147423 mod G(x) */, 0x6116b82b /* x^73695 mod G(x) */, }, + /* chunk_len=9344 */ + { 0xf2fc5d61 /* x^224223 mod G(x) */, 0x242aac86 /* x^149471 mod G(x) */, 0x05245cf0 /* x^74719 mod G(x) */, }, + /* chunk_len=9472 */ + { 0x26387824 /* x^227295 mod G(x) */, 0xc15c4ca5 /* x^151519 mod G(x) */, 0x4c5a315a /* x^75743 mod G(x) */, }, + /* chunk_len=9600 */ + { 0x8c151e77 /* x^230367 mod G(x) */, 0x8282fddc /* x^153567 mod G(x) */, 0x4d9899bb /* x^76767 mod G(x) */, }, + /* chunk_len=9728 */ + { 0x8ea1f680 /* x^233439 mod G(x) */, 0xf5ff6cdd /* x^155615 mod G(x) */, 0xbccfa2c1 /* x^77791 mod G(x) */, }, + /* chunk_len=9856 */ + { 0xe8cf3d2a /* x^236511 mod G(x) */, 0x338b1fb1 /* x^157663 mod G(x) */, 0xeda61f70 /* x^78815 mod G(x) */, }, + /* chunk_len=9984 */ + { 0x21f15b59 /* x^239583 mod G(x) */, 0xb9077a01 /* x^159711 mod G(x) */, 0x3e7c93b9 /* x^79839 mod G(x) */, }, + /* chunk_len=10112 */ + { 0x6f68d64a /* x^242655 mod G(x) */, 0x901b0161 /* x^161759 mod G(x) */, 0xb9fd3537 /* x^80863 mod G(x) */, }, + /* chunk_len=10240 */ + { 0x71b74d95 /* x^245727 mod G(x) */, 0xf5ddd5ad /* x^163807 mod G(x) */, 0x3e116c9d /* x^81887 mod G(x) */, }, + /* chunk_len=10368 */ + { 0x4c2e7261 /* x^248799 mod G(x) */, 0x4ca19a29 /* x^165855 mod G(x) */, 0x388b20ac /* x^82911 mod G(x) */, }, + /* chunk_len=10496 */ + { 0x8a2d38e8 /* x^251871 mod G(x) */, 0xd27ee0a1 /* x^167903 mod G(x) */, 0x408e57f2 /* x^83935 mod G(x) */, }, + /* chunk_len=10624 */ + { 0x7e58ca17 /* x^254943 mod G(x) */, 0x69dfedd2 /* x^169951 mod G(x) */, 0x3a76805e /* x^84959 mod G(x) */, }, + /* chunk_len=10752 */ + { 0xf997967f /* x^258015 mod G(x) */, 0x63c3d167 /* x^171999 mod G(x) */, 0x0956d953 /* x^85983 mod G(x) */, }, + /* chunk_len=10880 */ + { 0x48215963 /* x^261087 mod G(x) */, 0x71e1dfe0 /* x^174047 mod G(x) */, 0x42a6d410 /* x^87007 mod G(x) */, }, + /* chunk_len=11008 */ + { 0xa704b94c /* x^264159 mod G(x) */, 0x679f198a /* x^176095 mod G(x) */, 0x42ebf0ad /* x^88031 mod G(x) */, }, + /* chunk_len=11136 */ + { 0x1d699056 /* x^267231 mod G(x) */, 0xfeacf2a1 /* x^178143 mod G(x) */, 0x55cb4dfe /* x^89055 mod G(x) */, }, + /* chunk_len=11264 */ + { 0x6800bcc5 /* x^270303 mod G(x) */, 0x16024f15 /* x^180191 mod G(x) */, 0xcf3233e4 /* x^90079 mod G(x) */, }, + /* chunk_len=11392 */ + { 0x2d48e4ca /* x^273375 mod G(x) */, 0xbe61582f /* x^182239 mod G(x) */, 0x46026283 /* x^91103 mod G(x) */, }, + /* chunk_len=11520 */ + { 0x4c4c2b55 /* x^276447 mod G(x) */, 0x5539e44a /* x^184287 mod G(x) */, 0x52222fea /* x^92127 mod G(x) */, }, + /* chunk_len=11648 */ + { 0xd8ce94cb /* x^279519 mod G(x) */, 0xbc613c26 /* x^186335 mod G(x) */, 0x33776b4b /* x^93151 mod G(x) */, }, + /* chunk_len=11776 */ + { 0xd0b5a02b /* x^282591 mod G(x) */, 0x490d3cc6 /* x^188383 mod G(x) */, 0x2fde73f8 /* x^94175 mod G(x) */, }, + /* chunk_len=11904 */ + { 0xa223f7ec /* x^285663 mod G(x) */, 0xf0baeeb6 /* x^190431 mod G(x) */, 0x0603989b /* x^95199 mod G(x) */, }, + /* chunk_len=12032 */ + { 0x58de337a /* x^288735 mod G(x) */, 0x3bf3d597 /* x^192479 mod G(x) */, 0xced90d99 /* x^96223 mod G(x) */, }, + /* chunk_len=12160 */ + { 0x37f5d8f4 /* x^291807 mod G(x) */, 0x4d5b699b /* x^194527 mod G(x) */, 0xd7262e5f /* x^97247 mod G(x) */, }, + /* chunk_len=12288 */ + { 0xfa8a435d /* x^294879 mod G(x) */, 0x64f34a05 /* x^196575 mod G(x) */, 0x4470c029 /* x^98271 mod G(x) */, }, + /* chunk_len=12416 */ + { 0x238709fe /* x^297951 mod G(x) */, 0x52e7458f /* x^198623 mod G(x) */, 0x9a174cd3 /* x^99295 mod G(x) */, }, + /* chunk_len=12544 */ + { 0x9e1ba6f5 /* x^301023 mod G(x) */, 0xef0272f7 /* x^200671 mod G(x) */, 0x84f40beb /* x^100319 mod G(x) */, }, + /* chunk_len=12672 */ + { 0xcd8b57fa /* x^304095 mod G(x) */, 0x82adb0b8 /* x^202719 mod G(x) */, 0xb6f35093 /* x^101343 mod G(x) */, }, + /* chunk_len=12800 */ + { 0x0aed142f /* x^307167 mod G(x) */, 0xb1650290 /* x^204767 mod G(x) */, 0xec855937 /* x^102367 mod G(x) */, }, + /* chunk_len=12928 */ + { 0xd1f064db /* x^310239 mod G(x) */, 0x6e7340d3 /* x^206815 mod G(x) */, 0x5c28cb52 /* x^103391 mod G(x) */, }, + /* chunk_len=13056 */ + { 0x464ac895 /* x^313311 mod G(x) */, 0xd2fc57c3 /* x^208863 mod G(x) */, 0xc46805ba /* x^104415 mod G(x) */, }, + /* chunk_len=13184 */ + { 0xa0e6beea /* x^316383 mod G(x) */, 0xcfeec3d0 /* x^210911 mod G(x) */, 0x0225d214 /* x^105439 mod G(x) */, }, + /* chunk_len=13312 */ + { 0x78703ce0 /* x^319455 mod G(x) */, 0xc60f6075 /* x^212959 mod G(x) */, 0xdf7a24ac /* x^106463 mod G(x) */, }, + /* chunk_len=13440 */ + { 0xfea48165 /* x^322527 mod G(x) */, 0x3a9d1f97 /* x^215007 mod G(x) */, 0xc3876592 /* x^107487 mod G(x) */, }, + /* chunk_len=13568 */ + { 0xdb89b8db /* x^325599 mod G(x) */, 0xa6172211 /* x^217055 mod G(x) */, 0x2b52dc39 /* x^108511 mod G(x) */, }, + /* chunk_len=13696 */ + { 0x7ca03731 /* x^328671 mod G(x) */, 0x1db42849 /* x^219103 mod G(x) */, 0xc5df246e /* x^109535 mod G(x) */, }, + /* chunk_len=13824 */ + { 0x8801d0aa /* x^331743 mod G(x) */, 0xb57c7728 /* x^221151 mod G(x) */, 0x5b0c98b9 /* x^110559 mod G(x) */, }, + /* chunk_len=13952 */ + { 0xf89cd7f0 /* x^334815 mod G(x) */, 0xcc396a0b /* x^223199 mod G(x) */, 0xdb799c51 /* x^111583 mod G(x) */, }, + /* chunk_len=14080 */ + { 0x1611a808 /* x^337887 mod G(x) */, 0xaeae6105 /* x^225247 mod G(x) */, 0xb939fcdf /* x^112607 mod G(x) */, }, + /* chunk_len=14208 */ + { 0xe3cdb888 /* x^340959 mod G(x) */, 0x26387824 /* x^227295 mod G(x) */, 0x30d13e5f /* x^113631 mod G(x) */, }, + /* chunk_len=14336 */ + { 0x552a4cf6 /* x^344031 mod G(x) */, 0xee2d04bb /* x^229343 mod G(x) */, 0x70f9947d /* x^114655 mod G(x) */, }, + /* chunk_len=14464 */ + { 0x85e248e9 /* x^347103 mod G(x) */, 0x0a79663f /* x^231391 mod G(x) */, 0x53339cf7 /* x^115679 mod G(x) */, }, + /* chunk_len=14592 */ + { 0x1c61c3e9 /* x^350175 mod G(x) */, 0x8ea1f680 /* x^233439 mod G(x) */, 0x54afca53 /* x^116703 mod G(x) */, }, + /* chunk_len=14720 */ + { 0xb14cfc2b /* x^353247 mod G(x) */, 0x2e073302 /* x^235487 mod G(x) */, 0x10897992 /* x^117727 mod G(x) */, }, + /* chunk_len=14848 */ + { 0x6ec444cc /* x^356319 mod G(x) */, 0x9e819f13 /* x^237535 mod G(x) */, 0x7a3c0a6a /* x^118751 mod G(x) */, }, + /* chunk_len=14976 */ + { 0xe2fa5f80 /* x^359391 mod G(x) */, 0x21f15b59 /* x^239583 mod G(x) */, 0x93102436 /* x^119775 mod G(x) */, }, + /* chunk_len=15104 */ + { 0x6d33f4c6 /* x^362463 mod G(x) */, 0x31a27455 /* x^241631 mod G(x) */, 0x1fea4d2a /* x^120799 mod G(x) */, }, + /* chunk_len=15232 */ + { 0xb6dec609 /* x^365535 mod G(x) */, 0x4d437056 /* x^243679 mod G(x) */, 0x42eb1e2a /* x^121823 mod G(x) */, }, + /* chunk_len=15360 */ + { 0x1846c518 /* x^368607 mod G(x) */, 0x71b74d95 /* x^245727 mod G(x) */, 0xbd2655a8 /* x^122847 mod G(x) */, }, + /* chunk_len=15488 */ + { 0x9f947f8a /* x^371679 mod G(x) */, 0x2b501619 /* x^247775 mod G(x) */, 0xa4924b0e /* x^123871 mod G(x) */, }, + /* chunk_len=15616 */ + { 0xb7442f4d /* x^374751 mod G(x) */, 0xba30a5d8 /* x^249823 mod G(x) */, 0x4ff61aa1 /* x^124895 mod G(x) */, }, + /* chunk_len=15744 */ + { 0xe2c93242 /* x^377823 mod G(x) */, 0x8a2d38e8 /* x^251871 mod G(x) */, 0x70cd7f26 /* x^125919 mod G(x) */, }, + /* chunk_len=15872 */ + { 0xcd6863df /* x^380895 mod G(x) */, 0x78fd88dc /* x^253919 mod G(x) */, 0x7ae2f6f4 /* x^126943 mod G(x) */, }, + /* chunk_len=16000 */ + { 0xd512001d /* x^383967 mod G(x) */, 0xe6612dff /* x^255967 mod G(x) */, 0x5c4d0ca9 /* x^127967 mod G(x) */, }, + /* chunk_len=16128 */ + { 0x4e8d6b6c /* x^387039 mod G(x) */, 0xf997967f /* x^258015 mod G(x) */, 0x2d546c53 /* x^128991 mod G(x) */, }, + /* chunk_len=16256 */ + { 0xfa653ba1 /* x^390111 mod G(x) */, 0xc99014d4 /* x^260063 mod G(x) */, 0xa0c9fd27 /* x^130015 mod G(x) */, }, + /* chunk_len=16384 */ + { 0x49893408 /* x^393183 mod G(x) */, 0x29c2448b /* x^262111 mod G(x) */, 0xe0ee5efe /* x^131039 mod G(x) */, }, +}; + +/* Multipliers for implementations that use a large fixed chunk length */ +#define CRC32_FIXED_CHUNK_LEN 32768UL +#define CRC32_FIXED_CHUNK_MULT_1 0x29c2448b /* x^262111 mod G(x) */ +#define CRC32_FIXED_CHUNK_MULT_2 0x4b912f53 /* x^524255 mod G(x) */ +#define CRC32_FIXED_CHUNK_MULT_3 0x454c93be /* x^786399 mod G(x) */ diff --git a/tools/z64compress/src/enc/libdeflate/lib/crc32_tables.h b/tools/z64compress/src/enc/libdeflate/lib/crc32_tables.h new file mode 100644 index 000000000..86228c72a --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/lib/crc32_tables.h @@ -0,0 +1,587 @@ +/* + * crc32_tables.h - data tables for CRC-32 computation + * + * THIS FILE WAS GENERATED BY gen_crc32_tables.c. DO NOT EDIT. + */ + +static const u32 crc32_slice1_table[] MAYBE_UNUSED = { + 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, + 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3, + 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, + 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, + 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, + 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, + 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, + 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5, + 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, + 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, + 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, + 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59, + 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, + 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f, + 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, + 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, + 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a, + 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433, + 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, + 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, + 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, + 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, + 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c, + 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65, + 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, + 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, + 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, + 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, + 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086, + 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, + 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, + 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, + 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, + 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, + 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, + 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, + 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, + 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7, + 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, + 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, + 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, + 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b, + 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, + 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79, + 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, + 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, + 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, + 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d, + 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, + 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, + 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, + 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, + 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e, + 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777, + 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, + 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, + 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, + 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, + 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0, + 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, + 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, + 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf, + 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, + 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d, +}; + +static const u32 crc32_slice8_table[] MAYBE_UNUSED = { + 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, + 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3, + 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, + 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, + 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, + 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, + 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, + 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5, + 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, + 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, + 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, + 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59, + 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, + 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f, + 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, + 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, + 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a, + 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433, + 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, + 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, + 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, + 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, + 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c, + 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65, + 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, + 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, + 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, + 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, + 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086, + 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, + 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, + 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, + 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, + 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, + 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, + 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, + 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, + 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7, + 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, + 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, + 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, + 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b, + 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, + 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79, + 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, + 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, + 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, + 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d, + 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, + 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, + 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, + 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, + 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e, + 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777, + 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, + 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, + 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, + 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, + 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0, + 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, + 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, + 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf, + 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, + 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d, + 0x00000000, 0x191b3141, 0x32366282, 0x2b2d53c3, + 0x646cc504, 0x7d77f445, 0x565aa786, 0x4f4196c7, + 0xc8d98a08, 0xd1c2bb49, 0xfaefe88a, 0xe3f4d9cb, + 0xacb54f0c, 0xb5ae7e4d, 0x9e832d8e, 0x87981ccf, + 0x4ac21251, 0x53d92310, 0x78f470d3, 0x61ef4192, + 0x2eaed755, 0x37b5e614, 0x1c98b5d7, 0x05838496, + 0x821b9859, 0x9b00a918, 0xb02dfadb, 0xa936cb9a, + 0xe6775d5d, 0xff6c6c1c, 0xd4413fdf, 0xcd5a0e9e, + 0x958424a2, 0x8c9f15e3, 0xa7b24620, 0xbea97761, + 0xf1e8e1a6, 0xe8f3d0e7, 0xc3de8324, 0xdac5b265, + 0x5d5daeaa, 0x44469feb, 0x6f6bcc28, 0x7670fd69, + 0x39316bae, 0x202a5aef, 0x0b07092c, 0x121c386d, + 0xdf4636f3, 0xc65d07b2, 0xed705471, 0xf46b6530, + 0xbb2af3f7, 0xa231c2b6, 0x891c9175, 0x9007a034, + 0x179fbcfb, 0x0e848dba, 0x25a9de79, 0x3cb2ef38, + 0x73f379ff, 0x6ae848be, 0x41c51b7d, 0x58de2a3c, + 0xf0794f05, 0xe9627e44, 0xc24f2d87, 0xdb541cc6, + 0x94158a01, 0x8d0ebb40, 0xa623e883, 0xbf38d9c2, + 0x38a0c50d, 0x21bbf44c, 0x0a96a78f, 0x138d96ce, + 0x5ccc0009, 0x45d73148, 0x6efa628b, 0x77e153ca, + 0xbabb5d54, 0xa3a06c15, 0x888d3fd6, 0x91960e97, + 0xded79850, 0xc7cca911, 0xece1fad2, 0xf5facb93, + 0x7262d75c, 0x6b79e61d, 0x4054b5de, 0x594f849f, + 0x160e1258, 0x0f152319, 0x243870da, 0x3d23419b, + 0x65fd6ba7, 0x7ce65ae6, 0x57cb0925, 0x4ed03864, + 0x0191aea3, 0x188a9fe2, 0x33a7cc21, 0x2abcfd60, + 0xad24e1af, 0xb43fd0ee, 0x9f12832d, 0x8609b26c, + 0xc94824ab, 0xd05315ea, 0xfb7e4629, 0xe2657768, + 0x2f3f79f6, 0x362448b7, 0x1d091b74, 0x04122a35, + 0x4b53bcf2, 0x52488db3, 0x7965de70, 0x607eef31, + 0xe7e6f3fe, 0xfefdc2bf, 0xd5d0917c, 0xcccba03d, + 0x838a36fa, 0x9a9107bb, 0xb1bc5478, 0xa8a76539, + 0x3b83984b, 0x2298a90a, 0x09b5fac9, 0x10aecb88, + 0x5fef5d4f, 0x46f46c0e, 0x6dd93fcd, 0x74c20e8c, + 0xf35a1243, 0xea412302, 0xc16c70c1, 0xd8774180, + 0x9736d747, 0x8e2de606, 0xa500b5c5, 0xbc1b8484, + 0x71418a1a, 0x685abb5b, 0x4377e898, 0x5a6cd9d9, + 0x152d4f1e, 0x0c367e5f, 0x271b2d9c, 0x3e001cdd, + 0xb9980012, 0xa0833153, 0x8bae6290, 0x92b553d1, + 0xddf4c516, 0xc4eff457, 0xefc2a794, 0xf6d996d5, + 0xae07bce9, 0xb71c8da8, 0x9c31de6b, 0x852aef2a, + 0xca6b79ed, 0xd37048ac, 0xf85d1b6f, 0xe1462a2e, + 0x66de36e1, 0x7fc507a0, 0x54e85463, 0x4df36522, + 0x02b2f3e5, 0x1ba9c2a4, 0x30849167, 0x299fa026, + 0xe4c5aeb8, 0xfdde9ff9, 0xd6f3cc3a, 0xcfe8fd7b, + 0x80a96bbc, 0x99b25afd, 0xb29f093e, 0xab84387f, + 0x2c1c24b0, 0x350715f1, 0x1e2a4632, 0x07317773, + 0x4870e1b4, 0x516bd0f5, 0x7a468336, 0x635db277, + 0xcbfad74e, 0xd2e1e60f, 0xf9ccb5cc, 0xe0d7848d, + 0xaf96124a, 0xb68d230b, 0x9da070c8, 0x84bb4189, + 0x03235d46, 0x1a386c07, 0x31153fc4, 0x280e0e85, + 0x674f9842, 0x7e54a903, 0x5579fac0, 0x4c62cb81, + 0x8138c51f, 0x9823f45e, 0xb30ea79d, 0xaa1596dc, + 0xe554001b, 0xfc4f315a, 0xd7626299, 0xce7953d8, + 0x49e14f17, 0x50fa7e56, 0x7bd72d95, 0x62cc1cd4, + 0x2d8d8a13, 0x3496bb52, 0x1fbbe891, 0x06a0d9d0, + 0x5e7ef3ec, 0x4765c2ad, 0x6c48916e, 0x7553a02f, + 0x3a1236e8, 0x230907a9, 0x0824546a, 0x113f652b, + 0x96a779e4, 0x8fbc48a5, 0xa4911b66, 0xbd8a2a27, + 0xf2cbbce0, 0xebd08da1, 0xc0fdde62, 0xd9e6ef23, + 0x14bce1bd, 0x0da7d0fc, 0x268a833f, 0x3f91b27e, + 0x70d024b9, 0x69cb15f8, 0x42e6463b, 0x5bfd777a, + 0xdc656bb5, 0xc57e5af4, 0xee530937, 0xf7483876, + 0xb809aeb1, 0xa1129ff0, 0x8a3fcc33, 0x9324fd72, + 0x00000000, 0x01c26a37, 0x0384d46e, 0x0246be59, + 0x0709a8dc, 0x06cbc2eb, 0x048d7cb2, 0x054f1685, + 0x0e1351b8, 0x0fd13b8f, 0x0d9785d6, 0x0c55efe1, + 0x091af964, 0x08d89353, 0x0a9e2d0a, 0x0b5c473d, + 0x1c26a370, 0x1de4c947, 0x1fa2771e, 0x1e601d29, + 0x1b2f0bac, 0x1aed619b, 0x18abdfc2, 0x1969b5f5, + 0x1235f2c8, 0x13f798ff, 0x11b126a6, 0x10734c91, + 0x153c5a14, 0x14fe3023, 0x16b88e7a, 0x177ae44d, + 0x384d46e0, 0x398f2cd7, 0x3bc9928e, 0x3a0bf8b9, + 0x3f44ee3c, 0x3e86840b, 0x3cc03a52, 0x3d025065, + 0x365e1758, 0x379c7d6f, 0x35dac336, 0x3418a901, + 0x3157bf84, 0x3095d5b3, 0x32d36bea, 0x331101dd, + 0x246be590, 0x25a98fa7, 0x27ef31fe, 0x262d5bc9, + 0x23624d4c, 0x22a0277b, 0x20e69922, 0x2124f315, + 0x2a78b428, 0x2bbade1f, 0x29fc6046, 0x283e0a71, + 0x2d711cf4, 0x2cb376c3, 0x2ef5c89a, 0x2f37a2ad, + 0x709a8dc0, 0x7158e7f7, 0x731e59ae, 0x72dc3399, + 0x7793251c, 0x76514f2b, 0x7417f172, 0x75d59b45, + 0x7e89dc78, 0x7f4bb64f, 0x7d0d0816, 0x7ccf6221, + 0x798074a4, 0x78421e93, 0x7a04a0ca, 0x7bc6cafd, + 0x6cbc2eb0, 0x6d7e4487, 0x6f38fade, 0x6efa90e9, + 0x6bb5866c, 0x6a77ec5b, 0x68315202, 0x69f33835, + 0x62af7f08, 0x636d153f, 0x612bab66, 0x60e9c151, + 0x65a6d7d4, 0x6464bde3, 0x662203ba, 0x67e0698d, + 0x48d7cb20, 0x4915a117, 0x4b531f4e, 0x4a917579, + 0x4fde63fc, 0x4e1c09cb, 0x4c5ab792, 0x4d98dda5, + 0x46c49a98, 0x4706f0af, 0x45404ef6, 0x448224c1, + 0x41cd3244, 0x400f5873, 0x4249e62a, 0x438b8c1d, + 0x54f16850, 0x55330267, 0x5775bc3e, 0x56b7d609, + 0x53f8c08c, 0x523aaabb, 0x507c14e2, 0x51be7ed5, + 0x5ae239e8, 0x5b2053df, 0x5966ed86, 0x58a487b1, + 0x5deb9134, 0x5c29fb03, 0x5e6f455a, 0x5fad2f6d, + 0xe1351b80, 0xe0f771b7, 0xe2b1cfee, 0xe373a5d9, + 0xe63cb35c, 0xe7fed96b, 0xe5b86732, 0xe47a0d05, + 0xef264a38, 0xeee4200f, 0xeca29e56, 0xed60f461, + 0xe82fe2e4, 0xe9ed88d3, 0xebab368a, 0xea695cbd, + 0xfd13b8f0, 0xfcd1d2c7, 0xfe976c9e, 0xff5506a9, + 0xfa1a102c, 0xfbd87a1b, 0xf99ec442, 0xf85cae75, + 0xf300e948, 0xf2c2837f, 0xf0843d26, 0xf1465711, + 0xf4094194, 0xf5cb2ba3, 0xf78d95fa, 0xf64fffcd, + 0xd9785d60, 0xd8ba3757, 0xdafc890e, 0xdb3ee339, + 0xde71f5bc, 0xdfb39f8b, 0xddf521d2, 0xdc374be5, + 0xd76b0cd8, 0xd6a966ef, 0xd4efd8b6, 0xd52db281, + 0xd062a404, 0xd1a0ce33, 0xd3e6706a, 0xd2241a5d, + 0xc55efe10, 0xc49c9427, 0xc6da2a7e, 0xc7184049, + 0xc25756cc, 0xc3953cfb, 0xc1d382a2, 0xc011e895, + 0xcb4dafa8, 0xca8fc59f, 0xc8c97bc6, 0xc90b11f1, + 0xcc440774, 0xcd866d43, 0xcfc0d31a, 0xce02b92d, + 0x91af9640, 0x906dfc77, 0x922b422e, 0x93e92819, + 0x96a63e9c, 0x976454ab, 0x9522eaf2, 0x94e080c5, + 0x9fbcc7f8, 0x9e7eadcf, 0x9c381396, 0x9dfa79a1, + 0x98b56f24, 0x99770513, 0x9b31bb4a, 0x9af3d17d, + 0x8d893530, 0x8c4b5f07, 0x8e0de15e, 0x8fcf8b69, + 0x8a809dec, 0x8b42f7db, 0x89044982, 0x88c623b5, + 0x839a6488, 0x82580ebf, 0x801eb0e6, 0x81dcdad1, + 0x8493cc54, 0x8551a663, 0x8717183a, 0x86d5720d, + 0xa9e2d0a0, 0xa820ba97, 0xaa6604ce, 0xaba46ef9, + 0xaeeb787c, 0xaf29124b, 0xad6fac12, 0xacadc625, + 0xa7f18118, 0xa633eb2f, 0xa4755576, 0xa5b73f41, + 0xa0f829c4, 0xa13a43f3, 0xa37cfdaa, 0xa2be979d, + 0xb5c473d0, 0xb40619e7, 0xb640a7be, 0xb782cd89, + 0xb2cddb0c, 0xb30fb13b, 0xb1490f62, 0xb08b6555, + 0xbbd72268, 0xba15485f, 0xb853f606, 0xb9919c31, + 0xbcde8ab4, 0xbd1ce083, 0xbf5a5eda, 0xbe9834ed, + 0x00000000, 0xb8bc6765, 0xaa09c88b, 0x12b5afee, + 0x8f629757, 0x37def032, 0x256b5fdc, 0x9dd738b9, + 0xc5b428ef, 0x7d084f8a, 0x6fbde064, 0xd7018701, + 0x4ad6bfb8, 0xf26ad8dd, 0xe0df7733, 0x58631056, + 0x5019579f, 0xe8a530fa, 0xfa109f14, 0x42acf871, + 0xdf7bc0c8, 0x67c7a7ad, 0x75720843, 0xcdce6f26, + 0x95ad7f70, 0x2d111815, 0x3fa4b7fb, 0x8718d09e, + 0x1acfe827, 0xa2738f42, 0xb0c620ac, 0x087a47c9, + 0xa032af3e, 0x188ec85b, 0x0a3b67b5, 0xb28700d0, + 0x2f503869, 0x97ec5f0c, 0x8559f0e2, 0x3de59787, + 0x658687d1, 0xdd3ae0b4, 0xcf8f4f5a, 0x7733283f, + 0xeae41086, 0x525877e3, 0x40edd80d, 0xf851bf68, + 0xf02bf8a1, 0x48979fc4, 0x5a22302a, 0xe29e574f, + 0x7f496ff6, 0xc7f50893, 0xd540a77d, 0x6dfcc018, + 0x359fd04e, 0x8d23b72b, 0x9f9618c5, 0x272a7fa0, + 0xbafd4719, 0x0241207c, 0x10f48f92, 0xa848e8f7, + 0x9b14583d, 0x23a83f58, 0x311d90b6, 0x89a1f7d3, + 0x1476cf6a, 0xaccaa80f, 0xbe7f07e1, 0x06c36084, + 0x5ea070d2, 0xe61c17b7, 0xf4a9b859, 0x4c15df3c, + 0xd1c2e785, 0x697e80e0, 0x7bcb2f0e, 0xc377486b, + 0xcb0d0fa2, 0x73b168c7, 0x6104c729, 0xd9b8a04c, + 0x446f98f5, 0xfcd3ff90, 0xee66507e, 0x56da371b, + 0x0eb9274d, 0xb6054028, 0xa4b0efc6, 0x1c0c88a3, + 0x81dbb01a, 0x3967d77f, 0x2bd27891, 0x936e1ff4, + 0x3b26f703, 0x839a9066, 0x912f3f88, 0x299358ed, + 0xb4446054, 0x0cf80731, 0x1e4da8df, 0xa6f1cfba, + 0xfe92dfec, 0x462eb889, 0x549b1767, 0xec277002, + 0x71f048bb, 0xc94c2fde, 0xdbf98030, 0x6345e755, + 0x6b3fa09c, 0xd383c7f9, 0xc1366817, 0x798a0f72, + 0xe45d37cb, 0x5ce150ae, 0x4e54ff40, 0xf6e89825, + 0xae8b8873, 0x1637ef16, 0x048240f8, 0xbc3e279d, + 0x21e91f24, 0x99557841, 0x8be0d7af, 0x335cb0ca, + 0xed59b63b, 0x55e5d15e, 0x47507eb0, 0xffec19d5, + 0x623b216c, 0xda874609, 0xc832e9e7, 0x708e8e82, + 0x28ed9ed4, 0x9051f9b1, 0x82e4565f, 0x3a58313a, + 0xa78f0983, 0x1f336ee6, 0x0d86c108, 0xb53aa66d, + 0xbd40e1a4, 0x05fc86c1, 0x1749292f, 0xaff54e4a, + 0x322276f3, 0x8a9e1196, 0x982bbe78, 0x2097d91d, + 0x78f4c94b, 0xc048ae2e, 0xd2fd01c0, 0x6a4166a5, + 0xf7965e1c, 0x4f2a3979, 0x5d9f9697, 0xe523f1f2, + 0x4d6b1905, 0xf5d77e60, 0xe762d18e, 0x5fdeb6eb, + 0xc2098e52, 0x7ab5e937, 0x680046d9, 0xd0bc21bc, + 0x88df31ea, 0x3063568f, 0x22d6f961, 0x9a6a9e04, + 0x07bda6bd, 0xbf01c1d8, 0xadb46e36, 0x15080953, + 0x1d724e9a, 0xa5ce29ff, 0xb77b8611, 0x0fc7e174, + 0x9210d9cd, 0x2aacbea8, 0x38191146, 0x80a57623, + 0xd8c66675, 0x607a0110, 0x72cfaefe, 0xca73c99b, + 0x57a4f122, 0xef189647, 0xfdad39a9, 0x45115ecc, + 0x764dee06, 0xcef18963, 0xdc44268d, 0x64f841e8, + 0xf92f7951, 0x41931e34, 0x5326b1da, 0xeb9ad6bf, + 0xb3f9c6e9, 0x0b45a18c, 0x19f00e62, 0xa14c6907, + 0x3c9b51be, 0x842736db, 0x96929935, 0x2e2efe50, + 0x2654b999, 0x9ee8defc, 0x8c5d7112, 0x34e11677, + 0xa9362ece, 0x118a49ab, 0x033fe645, 0xbb838120, + 0xe3e09176, 0x5b5cf613, 0x49e959fd, 0xf1553e98, + 0x6c820621, 0xd43e6144, 0xc68bceaa, 0x7e37a9cf, + 0xd67f4138, 0x6ec3265d, 0x7c7689b3, 0xc4caeed6, + 0x591dd66f, 0xe1a1b10a, 0xf3141ee4, 0x4ba87981, + 0x13cb69d7, 0xab770eb2, 0xb9c2a15c, 0x017ec639, + 0x9ca9fe80, 0x241599e5, 0x36a0360b, 0x8e1c516e, + 0x866616a7, 0x3eda71c2, 0x2c6fde2c, 0x94d3b949, + 0x090481f0, 0xb1b8e695, 0xa30d497b, 0x1bb12e1e, + 0x43d23e48, 0xfb6e592d, 0xe9dbf6c3, 0x516791a6, + 0xccb0a91f, 0x740cce7a, 0x66b96194, 0xde0506f1, + 0x00000000, 0x3d6029b0, 0x7ac05360, 0x47a07ad0, + 0xf580a6c0, 0xc8e08f70, 0x8f40f5a0, 0xb220dc10, + 0x30704bc1, 0x0d106271, 0x4ab018a1, 0x77d03111, + 0xc5f0ed01, 0xf890c4b1, 0xbf30be61, 0x825097d1, + 0x60e09782, 0x5d80be32, 0x1a20c4e2, 0x2740ed52, + 0x95603142, 0xa80018f2, 0xefa06222, 0xd2c04b92, + 0x5090dc43, 0x6df0f5f3, 0x2a508f23, 0x1730a693, + 0xa5107a83, 0x98705333, 0xdfd029e3, 0xe2b00053, + 0xc1c12f04, 0xfca106b4, 0xbb017c64, 0x866155d4, + 0x344189c4, 0x0921a074, 0x4e81daa4, 0x73e1f314, + 0xf1b164c5, 0xccd14d75, 0x8b7137a5, 0xb6111e15, + 0x0431c205, 0x3951ebb5, 0x7ef19165, 0x4391b8d5, + 0xa121b886, 0x9c419136, 0xdbe1ebe6, 0xe681c256, + 0x54a11e46, 0x69c137f6, 0x2e614d26, 0x13016496, + 0x9151f347, 0xac31daf7, 0xeb91a027, 0xd6f18997, + 0x64d15587, 0x59b17c37, 0x1e1106e7, 0x23712f57, + 0x58f35849, 0x659371f9, 0x22330b29, 0x1f532299, + 0xad73fe89, 0x9013d739, 0xd7b3ade9, 0xead38459, + 0x68831388, 0x55e33a38, 0x124340e8, 0x2f236958, + 0x9d03b548, 0xa0639cf8, 0xe7c3e628, 0xdaa3cf98, + 0x3813cfcb, 0x0573e67b, 0x42d39cab, 0x7fb3b51b, + 0xcd93690b, 0xf0f340bb, 0xb7533a6b, 0x8a3313db, + 0x0863840a, 0x3503adba, 0x72a3d76a, 0x4fc3feda, + 0xfde322ca, 0xc0830b7a, 0x872371aa, 0xba43581a, + 0x9932774d, 0xa4525efd, 0xe3f2242d, 0xde920d9d, + 0x6cb2d18d, 0x51d2f83d, 0x167282ed, 0x2b12ab5d, + 0xa9423c8c, 0x9422153c, 0xd3826fec, 0xeee2465c, + 0x5cc29a4c, 0x61a2b3fc, 0x2602c92c, 0x1b62e09c, + 0xf9d2e0cf, 0xc4b2c97f, 0x8312b3af, 0xbe729a1f, + 0x0c52460f, 0x31326fbf, 0x7692156f, 0x4bf23cdf, + 0xc9a2ab0e, 0xf4c282be, 0xb362f86e, 0x8e02d1de, + 0x3c220dce, 0x0142247e, 0x46e25eae, 0x7b82771e, + 0xb1e6b092, 0x8c869922, 0xcb26e3f2, 0xf646ca42, + 0x44661652, 0x79063fe2, 0x3ea64532, 0x03c66c82, + 0x8196fb53, 0xbcf6d2e3, 0xfb56a833, 0xc6368183, + 0x74165d93, 0x49767423, 0x0ed60ef3, 0x33b62743, + 0xd1062710, 0xec660ea0, 0xabc67470, 0x96a65dc0, + 0x248681d0, 0x19e6a860, 0x5e46d2b0, 0x6326fb00, + 0xe1766cd1, 0xdc164561, 0x9bb63fb1, 0xa6d61601, + 0x14f6ca11, 0x2996e3a1, 0x6e369971, 0x5356b0c1, + 0x70279f96, 0x4d47b626, 0x0ae7ccf6, 0x3787e546, + 0x85a73956, 0xb8c710e6, 0xff676a36, 0xc2074386, + 0x4057d457, 0x7d37fde7, 0x3a978737, 0x07f7ae87, + 0xb5d77297, 0x88b75b27, 0xcf1721f7, 0xf2770847, + 0x10c70814, 0x2da721a4, 0x6a075b74, 0x576772c4, + 0xe547aed4, 0xd8278764, 0x9f87fdb4, 0xa2e7d404, + 0x20b743d5, 0x1dd76a65, 0x5a7710b5, 0x67173905, + 0xd537e515, 0xe857cca5, 0xaff7b675, 0x92979fc5, + 0xe915e8db, 0xd475c16b, 0x93d5bbbb, 0xaeb5920b, + 0x1c954e1b, 0x21f567ab, 0x66551d7b, 0x5b3534cb, + 0xd965a31a, 0xe4058aaa, 0xa3a5f07a, 0x9ec5d9ca, + 0x2ce505da, 0x11852c6a, 0x562556ba, 0x6b457f0a, + 0x89f57f59, 0xb49556e9, 0xf3352c39, 0xce550589, + 0x7c75d999, 0x4115f029, 0x06b58af9, 0x3bd5a349, + 0xb9853498, 0x84e51d28, 0xc34567f8, 0xfe254e48, + 0x4c059258, 0x7165bbe8, 0x36c5c138, 0x0ba5e888, + 0x28d4c7df, 0x15b4ee6f, 0x521494bf, 0x6f74bd0f, + 0xdd54611f, 0xe03448af, 0xa794327f, 0x9af41bcf, + 0x18a48c1e, 0x25c4a5ae, 0x6264df7e, 0x5f04f6ce, + 0xed242ade, 0xd044036e, 0x97e479be, 0xaa84500e, + 0x4834505d, 0x755479ed, 0x32f4033d, 0x0f942a8d, + 0xbdb4f69d, 0x80d4df2d, 0xc774a5fd, 0xfa148c4d, + 0x78441b9c, 0x4524322c, 0x028448fc, 0x3fe4614c, + 0x8dc4bd5c, 0xb0a494ec, 0xf704ee3c, 0xca64c78c, + 0x00000000, 0xcb5cd3a5, 0x4dc8a10b, 0x869472ae, + 0x9b914216, 0x50cd91b3, 0xd659e31d, 0x1d0530b8, + 0xec53826d, 0x270f51c8, 0xa19b2366, 0x6ac7f0c3, + 0x77c2c07b, 0xbc9e13de, 0x3a0a6170, 0xf156b2d5, + 0x03d6029b, 0xc88ad13e, 0x4e1ea390, 0x85427035, + 0x9847408d, 0x531b9328, 0xd58fe186, 0x1ed33223, + 0xef8580f6, 0x24d95353, 0xa24d21fd, 0x6911f258, + 0x7414c2e0, 0xbf481145, 0x39dc63eb, 0xf280b04e, + 0x07ac0536, 0xccf0d693, 0x4a64a43d, 0x81387798, + 0x9c3d4720, 0x57619485, 0xd1f5e62b, 0x1aa9358e, + 0xebff875b, 0x20a354fe, 0xa6372650, 0x6d6bf5f5, + 0x706ec54d, 0xbb3216e8, 0x3da66446, 0xf6fab7e3, + 0x047a07ad, 0xcf26d408, 0x49b2a6a6, 0x82ee7503, + 0x9feb45bb, 0x54b7961e, 0xd223e4b0, 0x197f3715, + 0xe82985c0, 0x23755665, 0xa5e124cb, 0x6ebdf76e, + 0x73b8c7d6, 0xb8e41473, 0x3e7066dd, 0xf52cb578, + 0x0f580a6c, 0xc404d9c9, 0x4290ab67, 0x89cc78c2, + 0x94c9487a, 0x5f959bdf, 0xd901e971, 0x125d3ad4, + 0xe30b8801, 0x28575ba4, 0xaec3290a, 0x659ffaaf, + 0x789aca17, 0xb3c619b2, 0x35526b1c, 0xfe0eb8b9, + 0x0c8e08f7, 0xc7d2db52, 0x4146a9fc, 0x8a1a7a59, + 0x971f4ae1, 0x5c439944, 0xdad7ebea, 0x118b384f, + 0xe0dd8a9a, 0x2b81593f, 0xad152b91, 0x6649f834, + 0x7b4cc88c, 0xb0101b29, 0x36846987, 0xfdd8ba22, + 0x08f40f5a, 0xc3a8dcff, 0x453cae51, 0x8e607df4, + 0x93654d4c, 0x58399ee9, 0xdeadec47, 0x15f13fe2, + 0xe4a78d37, 0x2ffb5e92, 0xa96f2c3c, 0x6233ff99, + 0x7f36cf21, 0xb46a1c84, 0x32fe6e2a, 0xf9a2bd8f, + 0x0b220dc1, 0xc07ede64, 0x46eaacca, 0x8db67f6f, + 0x90b34fd7, 0x5bef9c72, 0xdd7beedc, 0x16273d79, + 0xe7718fac, 0x2c2d5c09, 0xaab92ea7, 0x61e5fd02, + 0x7ce0cdba, 0xb7bc1e1f, 0x31286cb1, 0xfa74bf14, + 0x1eb014d8, 0xd5ecc77d, 0x5378b5d3, 0x98246676, + 0x852156ce, 0x4e7d856b, 0xc8e9f7c5, 0x03b52460, + 0xf2e396b5, 0x39bf4510, 0xbf2b37be, 0x7477e41b, + 0x6972d4a3, 0xa22e0706, 0x24ba75a8, 0xefe6a60d, + 0x1d661643, 0xd63ac5e6, 0x50aeb748, 0x9bf264ed, + 0x86f75455, 0x4dab87f0, 0xcb3ff55e, 0x006326fb, + 0xf135942e, 0x3a69478b, 0xbcfd3525, 0x77a1e680, + 0x6aa4d638, 0xa1f8059d, 0x276c7733, 0xec30a496, + 0x191c11ee, 0xd240c24b, 0x54d4b0e5, 0x9f886340, + 0x828d53f8, 0x49d1805d, 0xcf45f2f3, 0x04192156, + 0xf54f9383, 0x3e134026, 0xb8873288, 0x73dbe12d, + 0x6eded195, 0xa5820230, 0x2316709e, 0xe84aa33b, + 0x1aca1375, 0xd196c0d0, 0x5702b27e, 0x9c5e61db, + 0x815b5163, 0x4a0782c6, 0xcc93f068, 0x07cf23cd, + 0xf6999118, 0x3dc542bd, 0xbb513013, 0x700de3b6, + 0x6d08d30e, 0xa65400ab, 0x20c07205, 0xeb9ca1a0, + 0x11e81eb4, 0xdab4cd11, 0x5c20bfbf, 0x977c6c1a, + 0x8a795ca2, 0x41258f07, 0xc7b1fda9, 0x0ced2e0c, + 0xfdbb9cd9, 0x36e74f7c, 0xb0733dd2, 0x7b2fee77, + 0x662adecf, 0xad760d6a, 0x2be27fc4, 0xe0beac61, + 0x123e1c2f, 0xd962cf8a, 0x5ff6bd24, 0x94aa6e81, + 0x89af5e39, 0x42f38d9c, 0xc467ff32, 0x0f3b2c97, + 0xfe6d9e42, 0x35314de7, 0xb3a53f49, 0x78f9ecec, + 0x65fcdc54, 0xaea00ff1, 0x28347d5f, 0xe368aefa, + 0x16441b82, 0xdd18c827, 0x5b8cba89, 0x90d0692c, + 0x8dd55994, 0x46898a31, 0xc01df89f, 0x0b412b3a, + 0xfa1799ef, 0x314b4a4a, 0xb7df38e4, 0x7c83eb41, + 0x6186dbf9, 0xaada085c, 0x2c4e7af2, 0xe712a957, + 0x15921919, 0xdececabc, 0x585ab812, 0x93066bb7, + 0x8e035b0f, 0x455f88aa, 0xc3cbfa04, 0x089729a1, + 0xf9c19b74, 0x329d48d1, 0xb4093a7f, 0x7f55e9da, + 0x6250d962, 0xa90c0ac7, 0x2f987869, 0xe4c4abcc, + 0x00000000, 0xa6770bb4, 0x979f1129, 0x31e81a9d, + 0xf44f2413, 0x52382fa7, 0x63d0353a, 0xc5a73e8e, + 0x33ef4e67, 0x959845d3, 0xa4705f4e, 0x020754fa, + 0xc7a06a74, 0x61d761c0, 0x503f7b5d, 0xf64870e9, + 0x67de9cce, 0xc1a9977a, 0xf0418de7, 0x56368653, + 0x9391b8dd, 0x35e6b369, 0x040ea9f4, 0xa279a240, + 0x5431d2a9, 0xf246d91d, 0xc3aec380, 0x65d9c834, + 0xa07ef6ba, 0x0609fd0e, 0x37e1e793, 0x9196ec27, + 0xcfbd399c, 0x69ca3228, 0x582228b5, 0xfe552301, + 0x3bf21d8f, 0x9d85163b, 0xac6d0ca6, 0x0a1a0712, + 0xfc5277fb, 0x5a257c4f, 0x6bcd66d2, 0xcdba6d66, + 0x081d53e8, 0xae6a585c, 0x9f8242c1, 0x39f54975, + 0xa863a552, 0x0e14aee6, 0x3ffcb47b, 0x998bbfcf, + 0x5c2c8141, 0xfa5b8af5, 0xcbb39068, 0x6dc49bdc, + 0x9b8ceb35, 0x3dfbe081, 0x0c13fa1c, 0xaa64f1a8, + 0x6fc3cf26, 0xc9b4c492, 0xf85cde0f, 0x5e2bd5bb, + 0x440b7579, 0xe27c7ecd, 0xd3946450, 0x75e36fe4, + 0xb044516a, 0x16335ade, 0x27db4043, 0x81ac4bf7, + 0x77e43b1e, 0xd19330aa, 0xe07b2a37, 0x460c2183, + 0x83ab1f0d, 0x25dc14b9, 0x14340e24, 0xb2430590, + 0x23d5e9b7, 0x85a2e203, 0xb44af89e, 0x123df32a, + 0xd79acda4, 0x71edc610, 0x4005dc8d, 0xe672d739, + 0x103aa7d0, 0xb64dac64, 0x87a5b6f9, 0x21d2bd4d, + 0xe47583c3, 0x42028877, 0x73ea92ea, 0xd59d995e, + 0x8bb64ce5, 0x2dc14751, 0x1c295dcc, 0xba5e5678, + 0x7ff968f6, 0xd98e6342, 0xe86679df, 0x4e11726b, + 0xb8590282, 0x1e2e0936, 0x2fc613ab, 0x89b1181f, + 0x4c162691, 0xea612d25, 0xdb8937b8, 0x7dfe3c0c, + 0xec68d02b, 0x4a1fdb9f, 0x7bf7c102, 0xdd80cab6, + 0x1827f438, 0xbe50ff8c, 0x8fb8e511, 0x29cfeea5, + 0xdf879e4c, 0x79f095f8, 0x48188f65, 0xee6f84d1, + 0x2bc8ba5f, 0x8dbfb1eb, 0xbc57ab76, 0x1a20a0c2, + 0x8816eaf2, 0x2e61e146, 0x1f89fbdb, 0xb9fef06f, + 0x7c59cee1, 0xda2ec555, 0xebc6dfc8, 0x4db1d47c, + 0xbbf9a495, 0x1d8eaf21, 0x2c66b5bc, 0x8a11be08, + 0x4fb68086, 0xe9c18b32, 0xd82991af, 0x7e5e9a1b, + 0xefc8763c, 0x49bf7d88, 0x78576715, 0xde206ca1, + 0x1b87522f, 0xbdf0599b, 0x8c184306, 0x2a6f48b2, + 0xdc27385b, 0x7a5033ef, 0x4bb82972, 0xedcf22c6, + 0x28681c48, 0x8e1f17fc, 0xbff70d61, 0x198006d5, + 0x47abd36e, 0xe1dcd8da, 0xd034c247, 0x7643c9f3, + 0xb3e4f77d, 0x1593fcc9, 0x247be654, 0x820cede0, + 0x74449d09, 0xd23396bd, 0xe3db8c20, 0x45ac8794, + 0x800bb91a, 0x267cb2ae, 0x1794a833, 0xb1e3a387, + 0x20754fa0, 0x86024414, 0xb7ea5e89, 0x119d553d, + 0xd43a6bb3, 0x724d6007, 0x43a57a9a, 0xe5d2712e, + 0x139a01c7, 0xb5ed0a73, 0x840510ee, 0x22721b5a, + 0xe7d525d4, 0x41a22e60, 0x704a34fd, 0xd63d3f49, + 0xcc1d9f8b, 0x6a6a943f, 0x5b828ea2, 0xfdf58516, + 0x3852bb98, 0x9e25b02c, 0xafcdaab1, 0x09baa105, + 0xfff2d1ec, 0x5985da58, 0x686dc0c5, 0xce1acb71, + 0x0bbdf5ff, 0xadcafe4b, 0x9c22e4d6, 0x3a55ef62, + 0xabc30345, 0x0db408f1, 0x3c5c126c, 0x9a2b19d8, + 0x5f8c2756, 0xf9fb2ce2, 0xc813367f, 0x6e643dcb, + 0x982c4d22, 0x3e5b4696, 0x0fb35c0b, 0xa9c457bf, + 0x6c636931, 0xca146285, 0xfbfc7818, 0x5d8b73ac, + 0x03a0a617, 0xa5d7ada3, 0x943fb73e, 0x3248bc8a, + 0xf7ef8204, 0x519889b0, 0x6070932d, 0xc6079899, + 0x304fe870, 0x9638e3c4, 0xa7d0f959, 0x01a7f2ed, + 0xc400cc63, 0x6277c7d7, 0x539fdd4a, 0xf5e8d6fe, + 0x647e3ad9, 0xc209316d, 0xf3e12bf0, 0x55962044, + 0x90311eca, 0x3646157e, 0x07ae0fe3, 0xa1d90457, + 0x579174be, 0xf1e67f0a, 0xc00e6597, 0x66796e23, + 0xa3de50ad, 0x05a95b19, 0x34414184, 0x92364a30, + 0x00000000, 0xccaa009e, 0x4225077d, 0x8e8f07e3, + 0x844a0efa, 0x48e00e64, 0xc66f0987, 0x0ac50919, + 0xd3e51bb5, 0x1f4f1b2b, 0x91c01cc8, 0x5d6a1c56, + 0x57af154f, 0x9b0515d1, 0x158a1232, 0xd92012ac, + 0x7cbb312b, 0xb01131b5, 0x3e9e3656, 0xf23436c8, + 0xf8f13fd1, 0x345b3f4f, 0xbad438ac, 0x767e3832, + 0xaf5e2a9e, 0x63f42a00, 0xed7b2de3, 0x21d12d7d, + 0x2b142464, 0xe7be24fa, 0x69312319, 0xa59b2387, + 0xf9766256, 0x35dc62c8, 0xbb53652b, 0x77f965b5, + 0x7d3c6cac, 0xb1966c32, 0x3f196bd1, 0xf3b36b4f, + 0x2a9379e3, 0xe639797d, 0x68b67e9e, 0xa41c7e00, + 0xaed97719, 0x62737787, 0xecfc7064, 0x205670fa, + 0x85cd537d, 0x496753e3, 0xc7e85400, 0x0b42549e, + 0x01875d87, 0xcd2d5d19, 0x43a25afa, 0x8f085a64, + 0x562848c8, 0x9a824856, 0x140d4fb5, 0xd8a74f2b, + 0xd2624632, 0x1ec846ac, 0x9047414f, 0x5ced41d1, + 0x299dc2ed, 0xe537c273, 0x6bb8c590, 0xa712c50e, + 0xadd7cc17, 0x617dcc89, 0xeff2cb6a, 0x2358cbf4, + 0xfa78d958, 0x36d2d9c6, 0xb85dde25, 0x74f7debb, + 0x7e32d7a2, 0xb298d73c, 0x3c17d0df, 0xf0bdd041, + 0x5526f3c6, 0x998cf358, 0x1703f4bb, 0xdba9f425, + 0xd16cfd3c, 0x1dc6fda2, 0x9349fa41, 0x5fe3fadf, + 0x86c3e873, 0x4a69e8ed, 0xc4e6ef0e, 0x084cef90, + 0x0289e689, 0xce23e617, 0x40ace1f4, 0x8c06e16a, + 0xd0eba0bb, 0x1c41a025, 0x92cea7c6, 0x5e64a758, + 0x54a1ae41, 0x980baedf, 0x1684a93c, 0xda2ea9a2, + 0x030ebb0e, 0xcfa4bb90, 0x412bbc73, 0x8d81bced, + 0x8744b5f4, 0x4beeb56a, 0xc561b289, 0x09cbb217, + 0xac509190, 0x60fa910e, 0xee7596ed, 0x22df9673, + 0x281a9f6a, 0xe4b09ff4, 0x6a3f9817, 0xa6959889, + 0x7fb58a25, 0xb31f8abb, 0x3d908d58, 0xf13a8dc6, + 0xfbff84df, 0x37558441, 0xb9da83a2, 0x7570833c, + 0x533b85da, 0x9f918544, 0x111e82a7, 0xddb48239, + 0xd7718b20, 0x1bdb8bbe, 0x95548c5d, 0x59fe8cc3, + 0x80de9e6f, 0x4c749ef1, 0xc2fb9912, 0x0e51998c, + 0x04949095, 0xc83e900b, 0x46b197e8, 0x8a1b9776, + 0x2f80b4f1, 0xe32ab46f, 0x6da5b38c, 0xa10fb312, + 0xabcaba0b, 0x6760ba95, 0xe9efbd76, 0x2545bde8, + 0xfc65af44, 0x30cfafda, 0xbe40a839, 0x72eaa8a7, + 0x782fa1be, 0xb485a120, 0x3a0aa6c3, 0xf6a0a65d, + 0xaa4de78c, 0x66e7e712, 0xe868e0f1, 0x24c2e06f, + 0x2e07e976, 0xe2ade9e8, 0x6c22ee0b, 0xa088ee95, + 0x79a8fc39, 0xb502fca7, 0x3b8dfb44, 0xf727fbda, + 0xfde2f2c3, 0x3148f25d, 0xbfc7f5be, 0x736df520, + 0xd6f6d6a7, 0x1a5cd639, 0x94d3d1da, 0x5879d144, + 0x52bcd85d, 0x9e16d8c3, 0x1099df20, 0xdc33dfbe, + 0x0513cd12, 0xc9b9cd8c, 0x4736ca6f, 0x8b9ccaf1, + 0x8159c3e8, 0x4df3c376, 0xc37cc495, 0x0fd6c40b, + 0x7aa64737, 0xb60c47a9, 0x3883404a, 0xf42940d4, + 0xfeec49cd, 0x32464953, 0xbcc94eb0, 0x70634e2e, + 0xa9435c82, 0x65e95c1c, 0xeb665bff, 0x27cc5b61, + 0x2d095278, 0xe1a352e6, 0x6f2c5505, 0xa386559b, + 0x061d761c, 0xcab77682, 0x44387161, 0x889271ff, + 0x825778e6, 0x4efd7878, 0xc0727f9b, 0x0cd87f05, + 0xd5f86da9, 0x19526d37, 0x97dd6ad4, 0x5b776a4a, + 0x51b26353, 0x9d1863cd, 0x1397642e, 0xdf3d64b0, + 0x83d02561, 0x4f7a25ff, 0xc1f5221c, 0x0d5f2282, + 0x079a2b9b, 0xcb302b05, 0x45bf2ce6, 0x89152c78, + 0x50353ed4, 0x9c9f3e4a, 0x121039a9, 0xdeba3937, + 0xd47f302e, 0x18d530b0, 0x965a3753, 0x5af037cd, + 0xff6b144a, 0x33c114d4, 0xbd4e1337, 0x71e413a9, + 0x7b211ab0, 0xb78b1a2e, 0x39041dcd, 0xf5ae1d53, + 0x2c8e0fff, 0xe0240f61, 0x6eab0882, 0xa201081c, + 0xa8c40105, 0x646e019b, 0xeae10678, 0x264b06e6, +}; diff --git a/tools/z64compress/src/enc/libdeflate/lib/decompress_template.h b/tools/z64compress/src/enc/libdeflate/lib/decompress_template.h new file mode 100644 index 000000000..2d9dfa82b --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/lib/decompress_template.h @@ -0,0 +1,774 @@ +/* + * decompress_template.h + * + * Copyright 2016 Eric Biggers + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * This is the actual DEFLATE decompression routine, lifted out of + * deflate_decompress.c so that it can be compiled multiple times with different + * target instruction sets. + */ + +#ifndef ATTRIBUTES +# define ATTRIBUTES +#endif +#ifndef EXTRACT_VARBITS +# define EXTRACT_VARBITS(word, count) ((word) & BITMASK(count)) +#endif +#ifndef EXTRACT_VARBITS8 +# define EXTRACT_VARBITS8(word, count) ((word) & BITMASK((u8)(count))) +#endif + +static enum libdeflate_result ATTRIBUTES MAYBE_UNUSED +FUNCNAME(struct libdeflate_decompressor * restrict d, + const void * restrict in, size_t in_nbytes, + void * restrict out, size_t out_nbytes_avail, + size_t *actual_in_nbytes_ret, size_t *actual_out_nbytes_ret) +{ + u8 *out_next = out; + u8 * const out_end = out_next + out_nbytes_avail; + u8 * const out_fastloop_end = + out_end - MIN(out_nbytes_avail, FASTLOOP_MAX_BYTES_WRITTEN); + + /* Input bitstream state; see deflate_decompress.c for documentation */ + const u8 *in_next = in; + const u8 * const in_end = in_next + in_nbytes; + const u8 * const in_fastloop_end = + in_end - MIN(in_nbytes, FASTLOOP_MAX_BYTES_READ); + bitbuf_t bitbuf = 0; + bitbuf_t saved_bitbuf; + u32 bitsleft = 0; + size_t overread_count = 0; + + bool is_final_block; + unsigned block_type; + unsigned num_litlen_syms; + unsigned num_offset_syms; + bitbuf_t litlen_tablemask; + u32 entry; + +next_block: + /* Starting to read the next block */ + ; + + STATIC_ASSERT(CAN_CONSUME(1 + 2 + 5 + 5 + 4 + 3)); + REFILL_BITS(); + + /* BFINAL: 1 bit */ + is_final_block = bitbuf & BITMASK(1); + + /* BTYPE: 2 bits */ + block_type = (bitbuf >> 1) & BITMASK(2); + + if (block_type == DEFLATE_BLOCKTYPE_DYNAMIC_HUFFMAN) { + + /* Dynamic Huffman block */ + + /* The order in which precode lengths are stored */ + static const u8 deflate_precode_lens_permutation[DEFLATE_NUM_PRECODE_SYMS] = { + 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 + }; + + unsigned num_explicit_precode_lens; + unsigned i; + + /* Read the codeword length counts. */ + + STATIC_ASSERT(DEFLATE_NUM_LITLEN_SYMS == 257 + BITMASK(5)); + num_litlen_syms = 257 + ((bitbuf >> 3) & BITMASK(5)); + + STATIC_ASSERT(DEFLATE_NUM_OFFSET_SYMS == 1 + BITMASK(5)); + num_offset_syms = 1 + ((bitbuf >> 8) & BITMASK(5)); + + STATIC_ASSERT(DEFLATE_NUM_PRECODE_SYMS == 4 + BITMASK(4)); + num_explicit_precode_lens = 4 + ((bitbuf >> 13) & BITMASK(4)); + + d->static_codes_loaded = false; + + /* + * Read the precode codeword lengths. + * + * A 64-bit bitbuffer is just one bit too small to hold the + * maximum number of precode lens, so to minimize branches we + * merge one len with the previous fields. + */ + STATIC_ASSERT(DEFLATE_MAX_PRE_CODEWORD_LEN == (1 << 3) - 1); + if (CAN_CONSUME(3 * (DEFLATE_NUM_PRECODE_SYMS - 1))) { + d->u.precode_lens[deflate_precode_lens_permutation[0]] = + (bitbuf >> 17) & BITMASK(3); + bitbuf >>= 20; + bitsleft -= 20; + REFILL_BITS(); + i = 1; + do { + d->u.precode_lens[deflate_precode_lens_permutation[i]] = + bitbuf & BITMASK(3); + bitbuf >>= 3; + bitsleft -= 3; + } while (++i < num_explicit_precode_lens); + } else { + bitbuf >>= 17; + bitsleft -= 17; + i = 0; + do { + if ((u8)bitsleft < 3) + REFILL_BITS(); + d->u.precode_lens[deflate_precode_lens_permutation[i]] = + bitbuf & BITMASK(3); + bitbuf >>= 3; + bitsleft -= 3; + } while (++i < num_explicit_precode_lens); + } + for (; i < DEFLATE_NUM_PRECODE_SYMS; i++) + d->u.precode_lens[deflate_precode_lens_permutation[i]] = 0; + + /* Build the decode table for the precode. */ + SAFETY_CHECK(build_precode_decode_table(d)); + + /* Decode the litlen and offset codeword lengths. */ + i = 0; + do { + unsigned presym; + u8 rep_val; + unsigned rep_count; + + if ((u8)bitsleft < DEFLATE_MAX_PRE_CODEWORD_LEN + 7) + REFILL_BITS(); + + /* + * The code below assumes that the precode decode table + * doesn't have any subtables. + */ + STATIC_ASSERT(PRECODE_TABLEBITS == DEFLATE_MAX_PRE_CODEWORD_LEN); + + /* Decode the next precode symbol. */ + entry = d->u.l.precode_decode_table[ + bitbuf & BITMASK(DEFLATE_MAX_PRE_CODEWORD_LEN)]; + bitbuf >>= (u8)entry; + bitsleft -= entry; /* optimization: subtract full entry */ + presym = entry >> 16; + + if (presym < 16) { + /* Explicit codeword length */ + d->u.l.lens[i++] = presym; + continue; + } + + /* Run-length encoded codeword lengths */ + + /* + * Note: we don't need verify that the repeat count + * doesn't overflow the number of elements, since we've + * sized the lens array to have enough extra space to + * allow for the worst-case overrun (138 zeroes when + * only 1 length was remaining). + * + * In the case of the small repeat counts (presyms 16 + * and 17), it is fastest to always write the maximum + * number of entries. That gets rid of branches that + * would otherwise be required. + * + * It is not just because of the numerical order that + * our checks go in the order 'presym < 16', 'presym == + * 16', and 'presym == 17'. For typical data this is + * ordered from most frequent to least frequent case. + */ + STATIC_ASSERT(DEFLATE_MAX_LENS_OVERRUN == 138 - 1); + + if (presym == 16) { + /* Repeat the previous length 3 - 6 times. */ + SAFETY_CHECK(i != 0); + rep_val = d->u.l.lens[i - 1]; + STATIC_ASSERT(3 + BITMASK(2) == 6); + rep_count = 3 + (bitbuf & BITMASK(2)); + bitbuf >>= 2; + bitsleft -= 2; + d->u.l.lens[i + 0] = rep_val; + d->u.l.lens[i + 1] = rep_val; + d->u.l.lens[i + 2] = rep_val; + d->u.l.lens[i + 3] = rep_val; + d->u.l.lens[i + 4] = rep_val; + d->u.l.lens[i + 5] = rep_val; + i += rep_count; + } else if (presym == 17) { + /* Repeat zero 3 - 10 times. */ + STATIC_ASSERT(3 + BITMASK(3) == 10); + rep_count = 3 + (bitbuf & BITMASK(3)); + bitbuf >>= 3; + bitsleft -= 3; + d->u.l.lens[i + 0] = 0; + d->u.l.lens[i + 1] = 0; + d->u.l.lens[i + 2] = 0; + d->u.l.lens[i + 3] = 0; + d->u.l.lens[i + 4] = 0; + d->u.l.lens[i + 5] = 0; + d->u.l.lens[i + 6] = 0; + d->u.l.lens[i + 7] = 0; + d->u.l.lens[i + 8] = 0; + d->u.l.lens[i + 9] = 0; + i += rep_count; + } else { + /* Repeat zero 11 - 138 times. */ + STATIC_ASSERT(11 + BITMASK(7) == 138); + rep_count = 11 + (bitbuf & BITMASK(7)); + bitbuf >>= 7; + bitsleft -= 7; + memset(&d->u.l.lens[i], 0, + rep_count * sizeof(d->u.l.lens[i])); + i += rep_count; + } + } while (i < num_litlen_syms + num_offset_syms); + + } else if (block_type == DEFLATE_BLOCKTYPE_UNCOMPRESSED) { + u16 len, nlen; + + /* + * Uncompressed block: copy 'len' bytes literally from the input + * buffer to the output buffer. + */ + + bitsleft -= 3; /* for BTYPE and BFINAL */ + + /* + * Align the bitstream to the next byte boundary. This means + * the next byte boundary as if we were reading a byte at a + * time. Therefore, we have to rewind 'in_next' by any bytes + * that have been refilled but not actually consumed yet (not + * counting overread bytes, which don't increment 'in_next'). + */ + bitsleft = (u8)bitsleft; + SAFETY_CHECK(overread_count <= (bitsleft >> 3)); + in_next -= (bitsleft >> 3) - overread_count; + overread_count = 0; + bitbuf = 0; + bitsleft = 0; + + SAFETY_CHECK(in_end - in_next >= 4); + len = get_unaligned_le16(in_next); + nlen = get_unaligned_le16(in_next + 2); + in_next += 4; + + SAFETY_CHECK(len == (u16)~nlen); + if (unlikely(len > out_end - out_next)) + return LIBDEFLATE_INSUFFICIENT_SPACE; + SAFETY_CHECK(len <= in_end - in_next); + + memcpy(out_next, in_next, len); + in_next += len; + out_next += len; + + goto block_done; + + } else { + unsigned i; + + SAFETY_CHECK(block_type == DEFLATE_BLOCKTYPE_STATIC_HUFFMAN); + + /* + * Static Huffman block: build the decode tables for the static + * codes. Skip doing so if the tables are already set up from + * an earlier static block; this speeds up decompression of + * degenerate input of many empty or very short static blocks. + * + * Afterwards, the remainder is the same as decompressing a + * dynamic Huffman block. + */ + + bitbuf >>= 3; /* for BTYPE and BFINAL */ + bitsleft -= 3; + + if (d->static_codes_loaded) + goto have_decode_tables; + + d->static_codes_loaded = true; + + STATIC_ASSERT(DEFLATE_NUM_LITLEN_SYMS == 288); + STATIC_ASSERT(DEFLATE_NUM_OFFSET_SYMS == 32); + + for (i = 0; i < 144; i++) + d->u.l.lens[i] = 8; + for (; i < 256; i++) + d->u.l.lens[i] = 9; + for (; i < 280; i++) + d->u.l.lens[i] = 7; + for (; i < 288; i++) + d->u.l.lens[i] = 8; + + for (; i < 288 + 32; i++) + d->u.l.lens[i] = 5; + + num_litlen_syms = 288; + num_offset_syms = 32; + } + + /* Decompressing a Huffman block (either dynamic or static) */ + + SAFETY_CHECK(build_offset_decode_table(d, num_litlen_syms, num_offset_syms)); + SAFETY_CHECK(build_litlen_decode_table(d, num_litlen_syms, num_offset_syms)); +have_decode_tables: + litlen_tablemask = BITMASK(d->litlen_tablebits); + + /* + * This is the "fastloop" for decoding literals and matches. It does + * bounds checks on in_next and out_next in the loop conditions so that + * additional bounds checks aren't needed inside the loop body. + * + * To reduce latency, the bitbuffer is refilled and the next litlen + * decode table entry is preloaded before each loop iteration. + */ + if (in_next >= in_fastloop_end || out_next >= out_fastloop_end) + goto generic_loop; + REFILL_BITS_IN_FASTLOOP(); + entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask]; + do { + u32 length, offset, lit; + const u8 *src; + u8 *dst; + + /* + * Consume the bits for the litlen decode table entry. Save the + * original bitbuf for later, in case the extra match length + * bits need to be extracted from it. + */ + saved_bitbuf = bitbuf; + bitbuf >>= (u8)entry; + bitsleft -= entry; /* optimization: subtract full entry */ + + /* + * Begin by checking for a "fast" literal, i.e. a literal that + * doesn't need a subtable. + */ + if (entry & HUFFDEC_LITERAL) { + /* + * On 64-bit platforms, we decode up to 2 extra fast + * literals in addition to the primary item, as this + * increases performance and still leaves enough bits + * remaining for what follows. We could actually do 3, + * assuming LITLEN_TABLEBITS=11, but that actually + * decreases performance slightly (perhaps by messing + * with the branch prediction of the conditional refill + * that happens later while decoding the match offset). + * + * Note: the definitions of FASTLOOP_MAX_BYTES_WRITTEN + * and FASTLOOP_MAX_BYTES_READ need to be updated if the + * number of extra literals decoded here is changed. + */ + if (/* enough bits for 2 fast literals + length + offset preload? */ + CAN_CONSUME_AND_THEN_PRELOAD(2 * LITLEN_TABLEBITS + + LENGTH_MAXBITS, + OFFSET_TABLEBITS) && + /* enough bits for 2 fast literals + slow literal + litlen preload? */ + CAN_CONSUME_AND_THEN_PRELOAD(2 * LITLEN_TABLEBITS + + DEFLATE_MAX_LITLEN_CODEWORD_LEN, + LITLEN_TABLEBITS)) { + /* 1st extra fast literal */ + lit = entry >> 16; + entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask]; + saved_bitbuf = bitbuf; + bitbuf >>= (u8)entry; + bitsleft -= entry; + *out_next++ = lit; + if (entry & HUFFDEC_LITERAL) { + /* 2nd extra fast literal */ + lit = entry >> 16; + entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask]; + saved_bitbuf = bitbuf; + bitbuf >>= (u8)entry; + bitsleft -= entry; + *out_next++ = lit; + if (entry & HUFFDEC_LITERAL) { + /* + * Another fast literal, but + * this one is in lieu of the + * primary item, so it doesn't + * count as one of the extras. + */ + lit = entry >> 16; + entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask]; + REFILL_BITS_IN_FASTLOOP(); + *out_next++ = lit; + continue; + } + } + } else { + /* + * Decode a literal. While doing so, preload + * the next litlen decode table entry and refill + * the bitbuffer. To reduce latency, we've + * arranged for there to be enough "preloadable" + * bits remaining to do the table preload + * independently of the refill. + */ + STATIC_ASSERT(CAN_CONSUME_AND_THEN_PRELOAD( + LITLEN_TABLEBITS, LITLEN_TABLEBITS)); + lit = entry >> 16; + entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask]; + REFILL_BITS_IN_FASTLOOP(); + *out_next++ = lit; + continue; + } + } + + /* + * It's not a literal entry, so it can be a length entry, a + * subtable pointer entry, or an end-of-block entry. Detect the + * two unlikely cases by testing the HUFFDEC_EXCEPTIONAL flag. + */ + if (unlikely(entry & HUFFDEC_EXCEPTIONAL)) { + /* Subtable pointer or end-of-block entry */ + + if (unlikely(entry & HUFFDEC_END_OF_BLOCK)) + goto block_done; + + /* + * A subtable is required. Load and consume the + * subtable entry. The subtable entry can be of any + * type: literal, length, or end-of-block. + */ + entry = d->u.litlen_decode_table[(entry >> 16) + + EXTRACT_VARBITS(bitbuf, (entry >> 8) & 0x3F)]; + saved_bitbuf = bitbuf; + bitbuf >>= (u8)entry; + bitsleft -= entry; + + /* + * 32-bit platforms that use the byte-at-a-time refill + * method have to do a refill here for there to always + * be enough bits to decode a literal that requires a + * subtable, then preload the next litlen decode table + * entry; or to decode a match length that requires a + * subtable, then preload the offset decode table entry. + */ + if (!CAN_CONSUME_AND_THEN_PRELOAD(DEFLATE_MAX_LITLEN_CODEWORD_LEN, + LITLEN_TABLEBITS) || + !CAN_CONSUME_AND_THEN_PRELOAD(LENGTH_MAXBITS, + OFFSET_TABLEBITS)) + REFILL_BITS_IN_FASTLOOP(); + if (entry & HUFFDEC_LITERAL) { + /* Decode a literal that required a subtable. */ + lit = entry >> 16; + entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask]; + REFILL_BITS_IN_FASTLOOP(); + *out_next++ = lit; + continue; + } + if (unlikely(entry & HUFFDEC_END_OF_BLOCK)) + goto block_done; + /* Else, it's a length that required a subtable. */ + } + + /* + * Decode the match length: the length base value associated + * with the litlen symbol (which we extract from the decode + * table entry), plus the extra length bits. We don't need to + * consume the extra length bits here, as they were included in + * the bits consumed by the entry earlier. We also don't need + * to check for too-long matches here, as this is inside the + * fastloop where it's already been verified that the output + * buffer has enough space remaining to copy a max-length match. + */ + length = entry >> 16; + length += EXTRACT_VARBITS8(saved_bitbuf, entry) >> (u8)(entry >> 8); + + /* + * Decode the match offset. There are enough "preloadable" bits + * remaining to preload the offset decode table entry, but a + * refill might be needed before consuming it. + */ + STATIC_ASSERT(CAN_CONSUME_AND_THEN_PRELOAD(LENGTH_MAXFASTBITS, + OFFSET_TABLEBITS)); + entry = d->offset_decode_table[bitbuf & BITMASK(OFFSET_TABLEBITS)]; + if (CAN_CONSUME_AND_THEN_PRELOAD(OFFSET_MAXBITS, + LITLEN_TABLEBITS)) { + /* + * Decoding a match offset on a 64-bit platform. We may + * need to refill once, but then we can decode the whole + * offset and preload the next litlen table entry. + */ + if (unlikely(entry & HUFFDEC_EXCEPTIONAL)) { + /* Offset codeword requires a subtable */ + if (unlikely((u8)bitsleft < OFFSET_MAXBITS + + LITLEN_TABLEBITS - PRELOAD_SLACK)) + REFILL_BITS_IN_FASTLOOP(); + bitbuf >>= OFFSET_TABLEBITS; + bitsleft -= OFFSET_TABLEBITS; + entry = d->offset_decode_table[(entry >> 16) + + EXTRACT_VARBITS(bitbuf, (entry >> 8) & 0x3F)]; + } else if (unlikely((u8)bitsleft < OFFSET_MAXFASTBITS + + LITLEN_TABLEBITS - PRELOAD_SLACK)) + REFILL_BITS_IN_FASTLOOP(); + } else { + /* Decoding a match offset on a 32-bit platform */ + REFILL_BITS_IN_FASTLOOP(); + if (unlikely(entry & HUFFDEC_EXCEPTIONAL)) { + /* Offset codeword requires a subtable */ + bitbuf >>= OFFSET_TABLEBITS; + bitsleft -= OFFSET_TABLEBITS; + entry = d->offset_decode_table[(entry >> 16) + + EXTRACT_VARBITS(bitbuf, (entry >> 8) & 0x3F)]; + REFILL_BITS_IN_FASTLOOP(); + /* No further refill needed before extra bits */ + STATIC_ASSERT(CAN_CONSUME( + OFFSET_MAXBITS - OFFSET_TABLEBITS)); + } else { + /* No refill needed before extra bits */ + STATIC_ASSERT(CAN_CONSUME(OFFSET_MAXFASTBITS)); + } + } + saved_bitbuf = bitbuf; + bitbuf >>= (u8)entry; + bitsleft -= entry; /* optimization: subtract full entry */ + offset = entry >> 16; + offset += EXTRACT_VARBITS8(saved_bitbuf, entry) >> (u8)(entry >> 8); + + /* Validate the match offset; needed even in the fastloop. */ + SAFETY_CHECK(offset <= out_next - (const u8 *)out); + src = out_next - offset; + dst = out_next; + out_next += length; + + /* + * Before starting to issue the instructions to copy the match, + * refill the bitbuffer and preload the litlen decode table + * entry for the next loop iteration. This can increase + * performance by allowing the latency of the match copy to + * overlap with these other operations. To further reduce + * latency, we've arranged for there to be enough bits remaining + * to do the table preload independently of the refill, except + * on 32-bit platforms using the byte-at-a-time refill method. + */ + if (!CAN_CONSUME_AND_THEN_PRELOAD( + MAX(OFFSET_MAXBITS - OFFSET_TABLEBITS, + OFFSET_MAXFASTBITS), + LITLEN_TABLEBITS) && + unlikely((u8)bitsleft < LITLEN_TABLEBITS - PRELOAD_SLACK)) + REFILL_BITS_IN_FASTLOOP(); + entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask]; + REFILL_BITS_IN_FASTLOOP(); + + /* + * Copy the match. On most CPUs the fastest method is a + * word-at-a-time copy, unconditionally copying about 5 words + * since this is enough for most matches without being too much. + * + * The normal word-at-a-time copy works for offset >= WORDBYTES, + * which is most cases. The case of offset == 1 is also common + * and is worth optimizing for, since it is just RLE encoding of + * the previous byte, which is the result of compressing long + * runs of the same byte. + * + * Writing past the match 'length' is allowed here, since it's + * been ensured there is enough output space left for a slight + * overrun. FASTLOOP_MAX_BYTES_WRITTEN needs to be updated if + * the maximum possible overrun here is changed. + */ + if (UNALIGNED_ACCESS_IS_FAST && offset >= WORDBYTES) { + store_word_unaligned(load_word_unaligned(src), dst); + src += WORDBYTES; + dst += WORDBYTES; + store_word_unaligned(load_word_unaligned(src), dst); + src += WORDBYTES; + dst += WORDBYTES; + store_word_unaligned(load_word_unaligned(src), dst); + src += WORDBYTES; + dst += WORDBYTES; + store_word_unaligned(load_word_unaligned(src), dst); + src += WORDBYTES; + dst += WORDBYTES; + store_word_unaligned(load_word_unaligned(src), dst); + src += WORDBYTES; + dst += WORDBYTES; + while (dst < out_next) { + store_word_unaligned(load_word_unaligned(src), dst); + src += WORDBYTES; + dst += WORDBYTES; + store_word_unaligned(load_word_unaligned(src), dst); + src += WORDBYTES; + dst += WORDBYTES; + store_word_unaligned(load_word_unaligned(src), dst); + src += WORDBYTES; + dst += WORDBYTES; + store_word_unaligned(load_word_unaligned(src), dst); + src += WORDBYTES; + dst += WORDBYTES; + store_word_unaligned(load_word_unaligned(src), dst); + src += WORDBYTES; + dst += WORDBYTES; + } + } else if (UNALIGNED_ACCESS_IS_FAST && offset == 1) { + machine_word_t v; + + /* + * This part tends to get auto-vectorized, so keep it + * copying a multiple of 16 bytes at a time. + */ + v = (machine_word_t)0x0101010101010101 * src[0]; + store_word_unaligned(v, dst); + dst += WORDBYTES; + store_word_unaligned(v, dst); + dst += WORDBYTES; + store_word_unaligned(v, dst); + dst += WORDBYTES; + store_word_unaligned(v, dst); + dst += WORDBYTES; + while (dst < out_next) { + store_word_unaligned(v, dst); + dst += WORDBYTES; + store_word_unaligned(v, dst); + dst += WORDBYTES; + store_word_unaligned(v, dst); + dst += WORDBYTES; + store_word_unaligned(v, dst); + dst += WORDBYTES; + } + } else if (UNALIGNED_ACCESS_IS_FAST) { + store_word_unaligned(load_word_unaligned(src), dst); + src += offset; + dst += offset; + store_word_unaligned(load_word_unaligned(src), dst); + src += offset; + dst += offset; + do { + store_word_unaligned(load_word_unaligned(src), dst); + src += offset; + dst += offset; + store_word_unaligned(load_word_unaligned(src), dst); + src += offset; + dst += offset; + } while (dst < out_next); + } else { + *dst++ = *src++; + *dst++ = *src++; + do { + *dst++ = *src++; + } while (dst < out_next); + } + } while (in_next < in_fastloop_end && out_next < out_fastloop_end); + + /* + * This is the generic loop for decoding literals and matches. This + * handles cases where in_next and out_next are close to the end of + * their respective buffers. Usually this loop isn't performance- + * critical, as most time is spent in the fastloop above instead. We + * therefore omit some optimizations here in favor of smaller code. + */ +generic_loop: + for (;;) { + u32 length, offset; + const u8 *src; + u8 *dst; + + REFILL_BITS(); + entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask]; + saved_bitbuf = bitbuf; + bitbuf >>= (u8)entry; + bitsleft -= entry; + if (unlikely(entry & HUFFDEC_SUBTABLE_POINTER)) { + entry = d->u.litlen_decode_table[(entry >> 16) + + EXTRACT_VARBITS(bitbuf, (entry >> 8) & 0x3F)]; + saved_bitbuf = bitbuf; + bitbuf >>= (u8)entry; + bitsleft -= entry; + } + length = entry >> 16; + if (entry & HUFFDEC_LITERAL) { + if (unlikely(out_next == out_end)) + return LIBDEFLATE_INSUFFICIENT_SPACE; + *out_next++ = length; + continue; + } + if (unlikely(entry & HUFFDEC_END_OF_BLOCK)) + goto block_done; + length += EXTRACT_VARBITS8(saved_bitbuf, entry) >> (u8)(entry >> 8); + if (unlikely(length > out_end - out_next)) + return LIBDEFLATE_INSUFFICIENT_SPACE; + + if (!CAN_CONSUME(LENGTH_MAXBITS + OFFSET_MAXBITS)) + REFILL_BITS(); + entry = d->offset_decode_table[bitbuf & BITMASK(OFFSET_TABLEBITS)]; + if (unlikely(entry & HUFFDEC_EXCEPTIONAL)) { + bitbuf >>= OFFSET_TABLEBITS; + bitsleft -= OFFSET_TABLEBITS; + entry = d->offset_decode_table[(entry >> 16) + + EXTRACT_VARBITS(bitbuf, (entry >> 8) & 0x3F)]; + if (!CAN_CONSUME(OFFSET_MAXBITS)) + REFILL_BITS(); + } + offset = entry >> 16; + offset += EXTRACT_VARBITS8(bitbuf, entry) >> (u8)(entry >> 8); + bitbuf >>= (u8)entry; + bitsleft -= entry; + + SAFETY_CHECK(offset <= out_next - (const u8 *)out); + src = out_next - offset; + dst = out_next; + out_next += length; + + STATIC_ASSERT(DEFLATE_MIN_MATCH_LEN == 3); + *dst++ = *src++; + *dst++ = *src++; + do { + *dst++ = *src++; + } while (dst < out_next); + } + +block_done: + /* Finished decoding a block */ + + if (!is_final_block) + goto next_block; + + /* That was the last block. */ + + bitsleft = (u8)bitsleft; + + /* + * If any of the implicit appended zero bytes were consumed (not just + * refilled) before hitting end of stream, then the data is bad. + */ + SAFETY_CHECK(overread_count <= (bitsleft >> 3)); + + /* Optionally return the actual number of bytes consumed. */ + if (actual_in_nbytes_ret) { + /* Don't count bytes that were refilled but not consumed. */ + in_next -= (bitsleft >> 3) - overread_count; + + *actual_in_nbytes_ret = in_next - (u8 *)in; + } + + /* Optionally return the actual number of bytes written. */ + if (actual_out_nbytes_ret) { + *actual_out_nbytes_ret = out_next - (u8 *)out; + } else { + if (out_next != out_end) + return LIBDEFLATE_SHORT_OUTPUT; + } + return LIBDEFLATE_SUCCESS; +} + +#undef FUNCNAME +#undef ATTRIBUTES +#undef EXTRACT_VARBITS +#undef EXTRACT_VARBITS8 diff --git a/tools/z64compress/src/enc/libdeflate/lib/deflate_compress.c b/tools/z64compress/src/enc/libdeflate/lib/deflate_compress.c new file mode 100644 index 000000000..7c92d9823 --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/lib/deflate_compress.c @@ -0,0 +1,3877 @@ +/* + * deflate_compress.c - a compressor for DEFLATE + * + * Copyright 2016 Eric Biggers + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "deflate_compress.h" +#include "deflate_constants.h" + +#include "libdeflate.h" + +/******************************************************************************/ + +/* + * The following parameters can be changed at build time to customize the + * compression algorithms slightly: + * + * (Note, not all customizable parameters are here. Some others can be found in + * libdeflate_alloc_compressor() and in *_matchfinder.h.) + */ + +/* + * If this parameter is defined to 1, then the near-optimal parsing algorithm + * will be included, and compression levels 10-12 will use it. This algorithm + * usually produces a compression ratio significantly better than the other + * algorithms. However, it is slow. If this parameter is defined to 0, then + * levels 10-12 will be the same as level 9 and will use the lazy2 algorithm. + */ +#define SUPPORT_NEAR_OPTIMAL_PARSING 1 + +/* + * This is the minimum block length that the compressor will use, in + * uncompressed bytes. This should be a value below which using shorter blocks + * is unlikely to be worthwhile, due to the per-block overhead. This value does + * not apply to the final block, which may be shorter than this (if the input is + * shorter, it will have to be), or to the final uncompressed block in a series + * of uncompressed blocks that cover more than UINT16_MAX bytes. + * + * This value is also approximately the amount by which what would otherwise be + * the second-to-last block is allowed to grow past the soft maximum length in + * order to avoid having to use a very short final block. + * + * Defining a fixed minimum block length is needed in order to guarantee a + * reasonable upper bound on the compressed size. It's also needed because our + * block splitting algorithm doesn't work well on very short blocks. + */ +#define MIN_BLOCK_LENGTH 5000 + +/* + * For the greedy, lazy, lazy2, and near-optimal compressors: This is the soft + * maximum block length, in uncompressed bytes. The compressor will try to end + * blocks at this length, but it may go slightly past it if there is a match + * that straddles this limit or if the input data ends soon after this limit. + * This parameter doesn't apply to uncompressed blocks, which the DEFLATE format + * limits to 65535 bytes. + * + * This should be a value above which it is very likely that splitting the block + * would produce a better compression ratio. For the near-optimal compressor, + * increasing/decreasing this parameter will increase/decrease per-compressor + * memory usage linearly. + */ +#define SOFT_MAX_BLOCK_LENGTH 300000 + +/* + * For the greedy, lazy, and lazy2 compressors: this is the length of the + * sequence store, which is an array where the compressor temporarily stores + * matches that it's going to use in the current block. This value is the + * maximum number of matches that can be used in a block. If the sequence store + * fills up, then the compressor will be forced to end the block early. This + * value should be large enough so that this rarely happens, due to the block + * being ended normally before then. Increasing/decreasing this value will + * increase/decrease per-compressor memory usage linearly. + */ +#define SEQ_STORE_LENGTH 50000 + +/* + * For deflate_compress_fastest(): This is the soft maximum block length. + * deflate_compress_fastest() doesn't use the regular block splitting algorithm; + * it only ends blocks when they reach FAST_SOFT_MAX_BLOCK_LENGTH bytes or + * FAST_SEQ_STORE_LENGTH matches. Therefore, this value should be lower than + * the regular SOFT_MAX_BLOCK_LENGTH. + */ +#define FAST_SOFT_MAX_BLOCK_LENGTH 65535 + +/* + * For deflate_compress_fastest(): this is the length of the sequence store. + * This is like SEQ_STORE_LENGTH, but this should be a lower value. + */ +#define FAST_SEQ_STORE_LENGTH 8192 + +/* + * These are the maximum codeword lengths, in bits, the compressor will use for + * each Huffman code. The DEFLATE format defines limits for these. However, + * further limiting litlen codewords to 14 bits is beneficial, since it has + * negligible effect on compression ratio but allows some optimizations when + * outputting bits. (It allows 4 literals to be written at once rather than 3.) + */ +#define MAX_LITLEN_CODEWORD_LEN 14 +#define MAX_OFFSET_CODEWORD_LEN DEFLATE_MAX_OFFSET_CODEWORD_LEN +#define MAX_PRE_CODEWORD_LEN DEFLATE_MAX_PRE_CODEWORD_LEN + +#if SUPPORT_NEAR_OPTIMAL_PARSING + +/* Parameters specific to the near-optimal parsing algorithm */ + +/* + * BIT_COST is a scaling factor that allows the near-optimal compressor to + * consider fractional bit costs when deciding which literal/match sequence to + * use. This is useful when the true symbol costs are unknown. For example, if + * the compressor thinks that a symbol has 6.5 bits of entropy, it can set its + * cost to 6.5 bits rather than have to use 6 or 7 bits. Although in the end + * each symbol will use a whole number of bits due to the Huffman coding, + * considering fractional bits can be helpful due to the limited information. + * + * BIT_COST should be a power of 2. A value of 8 or 16 works well. A higher + * value isn't very useful since the calculations are approximate anyway. + * + * BIT_COST doesn't apply to deflate_flush_block(), which considers whole bits. + */ +#define BIT_COST 16 + +/* + * The NOSTAT_BITS value for a given alphabet is the number of bits assumed to + * be needed to output a symbol that was unused in the previous optimization + * pass. Assigning a default cost allows the symbol to be used in the next + * optimization pass. However, the cost should be relatively high because the + * symbol probably won't be used very many times (if at all). + */ +#define LITERAL_NOSTAT_BITS 13 +#define LENGTH_NOSTAT_BITS 13 +#define OFFSET_NOSTAT_BITS 10 + +/* + * This is (slightly less than) the maximum number of matches that the + * near-optimal compressor will cache per block. This behaves similarly to + * SEQ_STORE_LENGTH for the other compressors. + */ +#define MATCH_CACHE_LENGTH (SOFT_MAX_BLOCK_LENGTH * 5) + +#endif /* SUPPORT_NEAR_OPTIMAL_PARSING */ + +/******************************************************************************/ + +/* Include the needed matchfinders. */ +#define MATCHFINDER_WINDOW_ORDER DEFLATE_WINDOW_ORDER +#include "hc_matchfinder.h" +#include "ht_matchfinder.h" +#if SUPPORT_NEAR_OPTIMAL_PARSING +# include "bt_matchfinder.h" +/* + * This is the maximum number of matches the binary trees matchfinder can find + * at a single position. Since the matchfinder never finds more than one match + * for the same length, presuming one of each possible length is sufficient for + * an upper bound. (This says nothing about whether it is worthwhile to + * consider so many matches; this is just defining the worst case.) + */ +#define MAX_MATCHES_PER_POS \ + (DEFLATE_MAX_MATCH_LEN - DEFLATE_MIN_MATCH_LEN + 1) +#endif + +/* + * The largest block length we will ever use is when the final block is of + * length SOFT_MAX_BLOCK_LENGTH + MIN_BLOCK_LENGTH - 1, or when any block is of + * length SOFT_MAX_BLOCK_LENGTH + 1 + DEFLATE_MAX_MATCH_LEN. The latter case + * occurs when the lazy2 compressor chooses two literals and a maximum-length + * match, starting at SOFT_MAX_BLOCK_LENGTH - 1. + */ +#define MAX_BLOCK_LENGTH \ + MAX(SOFT_MAX_BLOCK_LENGTH + MIN_BLOCK_LENGTH - 1, \ + SOFT_MAX_BLOCK_LENGTH + 1 + DEFLATE_MAX_MATCH_LEN) + +static forceinline void +check_buildtime_parameters(void) +{ + /* + * Verify that MIN_BLOCK_LENGTH is being honored, as + * libdeflate_deflate_compress_bound() depends on it. + */ + STATIC_ASSERT(SOFT_MAX_BLOCK_LENGTH >= MIN_BLOCK_LENGTH); + STATIC_ASSERT(FAST_SOFT_MAX_BLOCK_LENGTH >= MIN_BLOCK_LENGTH); + STATIC_ASSERT(SEQ_STORE_LENGTH * DEFLATE_MIN_MATCH_LEN >= + MIN_BLOCK_LENGTH); + STATIC_ASSERT(FAST_SEQ_STORE_LENGTH * HT_MATCHFINDER_MIN_MATCH_LEN >= + MIN_BLOCK_LENGTH); +#if SUPPORT_NEAR_OPTIMAL_PARSING + STATIC_ASSERT(MIN_BLOCK_LENGTH * MAX_MATCHES_PER_POS <= + MATCH_CACHE_LENGTH); +#endif + + /* The definition of MAX_BLOCK_LENGTH assumes this. */ + STATIC_ASSERT(FAST_SOFT_MAX_BLOCK_LENGTH <= SOFT_MAX_BLOCK_LENGTH); + + /* Verify that the sequence stores aren't uselessly large. */ + STATIC_ASSERT(SEQ_STORE_LENGTH * DEFLATE_MIN_MATCH_LEN <= + SOFT_MAX_BLOCK_LENGTH + MIN_BLOCK_LENGTH); + STATIC_ASSERT(FAST_SEQ_STORE_LENGTH * HT_MATCHFINDER_MIN_MATCH_LEN <= + FAST_SOFT_MAX_BLOCK_LENGTH + MIN_BLOCK_LENGTH); + + /* Verify that the maximum codeword lengths are valid. */ + STATIC_ASSERT( + MAX_LITLEN_CODEWORD_LEN <= DEFLATE_MAX_LITLEN_CODEWORD_LEN); + STATIC_ASSERT( + MAX_OFFSET_CODEWORD_LEN <= DEFLATE_MAX_OFFSET_CODEWORD_LEN); + STATIC_ASSERT( + MAX_PRE_CODEWORD_LEN <= DEFLATE_MAX_PRE_CODEWORD_LEN); + STATIC_ASSERT( + (1U << MAX_LITLEN_CODEWORD_LEN) >= DEFLATE_NUM_LITLEN_SYMS); + STATIC_ASSERT( + (1U << MAX_OFFSET_CODEWORD_LEN) >= DEFLATE_NUM_OFFSET_SYMS); + STATIC_ASSERT( + (1U << MAX_PRE_CODEWORD_LEN) >= DEFLATE_NUM_PRECODE_SYMS); +} + +/******************************************************************************/ + +/* Table: length slot => length slot base value */ +static const unsigned deflate_length_slot_base[] = { + 3, 4, 5, 6, 7, 8, 9, 10, + 11, 13, 15, 17, 19, 23, 27, 31, + 35, 43, 51, 59, 67, 83, 99, 115, + 131, 163, 195, 227, 258, +}; + +/* Table: length slot => number of extra length bits */ +static const u8 deflate_extra_length_bits[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 2, 2, 2, 2, + 3, 3, 3, 3, 4, 4, 4, 4, + 5, 5, 5, 5, 0, +}; + +/* Table: offset slot => offset slot base value */ +static const unsigned deflate_offset_slot_base[] = { + 1, 2, 3, 4, 5, 7, 9, 13, + 17, 25, 33, 49, 65, 97, 129, 193, + 257, 385, 513, 769, 1025, 1537, 2049, 3073, + 4097, 6145, 8193, 12289, 16385, 24577, +}; + +/* Table: offset slot => number of extra offset bits */ +static const u8 deflate_extra_offset_bits[] = { + 0, 0, 0, 0, 1, 1, 2, 2, + 3, 3, 4, 4, 5, 5, 6, 6, + 7, 7, 8, 8, 9, 9, 10, 10, + 11, 11, 12, 12, 13, 13, +}; + +/* Table: length => length slot */ +static const u8 deflate_length_slot[DEFLATE_MAX_MATCH_LEN + 1] = { + 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 12, + 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 16, + 16, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, + 18, 19, 19, 19, 19, 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 28, +}; + +/* + * A condensed table which maps offset => offset slot as follows: + * + * offset <= 256: deflate_offset_slot[offset] + * offset > 256: deflate_offset_slot[256 + ((offset - 1) >> 7)] + * + * This table was generated by scripts/gen_offset_slot_map.py. + */ +static const u8 deflate_offset_slot[512] = { + 0, 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, + 7, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, + 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 0, 16, 17, 18, 18, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, + 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, + 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, + 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, + 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, + 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, + 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, +}; + +/* The order in which precode codeword lengths are stored */ +static const u8 deflate_precode_lens_permutation[DEFLATE_NUM_PRECODE_SYMS] = { + 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 +}; + +/* Table: precode symbol => number of extra bits */ +static const u8 deflate_extra_precode_bits[DEFLATE_NUM_PRECODE_SYMS] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 3, 7 +}; + +/* Codewords for the DEFLATE Huffman codes */ +struct deflate_codewords { + u32 litlen[DEFLATE_NUM_LITLEN_SYMS]; + u32 offset[DEFLATE_NUM_OFFSET_SYMS]; +}; + +/* + * Codeword lengths (in bits) for the DEFLATE Huffman codes. + * A zero length means the corresponding symbol had zero frequency. + */ +struct deflate_lens { + u8 litlen[DEFLATE_NUM_LITLEN_SYMS]; + u8 offset[DEFLATE_NUM_OFFSET_SYMS]; +}; + +/* Codewords and lengths for the DEFLATE Huffman codes */ +struct deflate_codes { + struct deflate_codewords codewords; + struct deflate_lens lens; +}; + +/* Symbol frequency counters for the DEFLATE Huffman codes */ +struct deflate_freqs { + u32 litlen[DEFLATE_NUM_LITLEN_SYMS]; + u32 offset[DEFLATE_NUM_OFFSET_SYMS]; +}; + +/* + * Represents a run of literals followed by a match or end-of-block. This + * struct is needed to temporarily store items chosen by the parser, since items + * cannot be written until all items for the block have been chosen and the + * block's Huffman codes have been computed. + */ +struct deflate_sequence { + + /* + * Bits 0..22: the number of literals in this run. This may be 0 and + * can be at most MAX_BLOCK_LENGTH. The literals are not stored + * explicitly in this structure; instead, they are read directly from + * the uncompressed data. + * + * Bits 23..31: the length of the match which follows the literals, or 0 + * if this literal run was the last in the block, so there is no match + * which follows it. + */ +#define SEQ_LENGTH_SHIFT 23 +#define SEQ_LITRUNLEN_MASK (((u32)1 << SEQ_LENGTH_SHIFT) - 1) + u32 litrunlen_and_length; + + /* + * If 'length' doesn't indicate end-of-block, then this is the offset of + * the match which follows the literals. + */ + u16 offset; + + /* + * If 'length' doesn't indicate end-of-block, then this is the offset + * slot of the match which follows the literals. + */ + u16 offset_slot; +}; + +#if SUPPORT_NEAR_OPTIMAL_PARSING + +/* Costs for the near-optimal parsing algorithm */ +struct deflate_costs { + + /* The cost to output each possible literal */ + u32 literal[DEFLATE_NUM_LITERALS]; + + /* The cost to output each possible match length */ + u32 length[DEFLATE_MAX_MATCH_LEN + 1]; + + /* The cost to output a match offset of each possible offset slot */ + u32 offset_slot[DEFLATE_NUM_OFFSET_SYMS]; +}; + +/* + * This structure represents a byte position in the input data and a node in the + * graph of possible match/literal choices for the current block. + * + * Logically, each incoming edge to this node is labeled with a literal or a + * match that can be taken to reach this position from an earlier position; and + * each outgoing edge from this node is labeled with a literal or a match that + * can be taken to advance from this position to a later position. + * + * But these "edges" are actually stored elsewhere (in 'match_cache'). Here we + * associate with each node just two pieces of information: + * + * 'cost_to_end' is the minimum cost to reach the end of the block from + * this position. + * + * 'item' represents the literal or match that must be chosen from here to + * reach the end of the block with the minimum cost. Equivalently, this + * can be interpreted as the label of the outgoing edge on the minimum-cost + * path to the "end of block" node from this node. + */ +struct deflate_optimum_node { + + u32 cost_to_end; + + /* + * Notes on the match/literal representation used here: + * + * The low bits of 'item' are the length: 1 if this is a literal, + * or the match length if this is a match. + * + * The high bits of 'item' are the actual literal byte if this is a + * literal, or the match offset if this is a match. + */ +#define OPTIMUM_OFFSET_SHIFT 9 +#define OPTIMUM_LEN_MASK (((u32)1 << OPTIMUM_OFFSET_SHIFT) - 1) + u32 item; + +}; + +#endif /* SUPPORT_NEAR_OPTIMAL_PARSING */ + +/* Block split statistics. See "Block splitting algorithm" below. */ +#define NUM_LITERAL_OBSERVATION_TYPES 8 +#define NUM_MATCH_OBSERVATION_TYPES 2 +#define NUM_OBSERVATION_TYPES (NUM_LITERAL_OBSERVATION_TYPES + \ + NUM_MATCH_OBSERVATION_TYPES) +#define NUM_OBSERVATIONS_PER_BLOCK_CHECK 512 +struct block_split_stats { + u32 new_observations[NUM_OBSERVATION_TYPES]; + u32 observations[NUM_OBSERVATION_TYPES]; + u32 num_new_observations; + u32 num_observations; +}; + +struct deflate_output_bitstream; + +/* The main DEFLATE compressor structure */ +struct libdeflate_compressor { + + /* Pointer to the compress() implementation chosen at allocation time */ + void (*impl)(struct libdeflate_compressor *restrict c, const u8 *in, + size_t in_nbytes, struct deflate_output_bitstream *os); + + /* The compression level with which this compressor was created */ + unsigned compression_level; + + /* Anything of this size or less we won't bother trying to compress. */ + size_t max_passthrough_size; + + /* + * The maximum search depth: consider at most this many potential + * matches at each position + */ + unsigned max_search_depth; + + /* + * The "nice" match length: if a match of this length is found, choose + * it immediately without further consideration + */ + unsigned nice_match_length; + + /* Frequency counters for the current block */ + struct deflate_freqs freqs; + + /* Block split statistics for the current block */ + struct block_split_stats split_stats; + + /* Dynamic Huffman codes for the current block */ + struct deflate_codes codes; + + /* The static Huffman codes defined by the DEFLATE format */ + struct deflate_codes static_codes; + + /* Temporary space for block flushing */ + union { + /* Information about the precode */ + struct { + u32 freqs[DEFLATE_NUM_PRECODE_SYMS]; + u32 codewords[DEFLATE_NUM_PRECODE_SYMS]; + u8 lens[DEFLATE_NUM_PRECODE_SYMS]; + unsigned items[DEFLATE_NUM_LITLEN_SYMS + + DEFLATE_NUM_OFFSET_SYMS]; + unsigned num_litlen_syms; + unsigned num_offset_syms; + unsigned num_explicit_lens; + unsigned num_items; + } precode; + /* + * The "full" length codewords. Used only after the information + * in 'precode' is no longer needed. + */ + struct { + u32 codewords[DEFLATE_MAX_MATCH_LEN + 1]; + u8 lens[DEFLATE_MAX_MATCH_LEN + 1]; + } length; + } o; + + union { + /* Data for greedy or lazy parsing */ + struct { + /* Hash chains matchfinder */ + struct hc_matchfinder hc_mf; + + /* Matches and literals chosen for the current block */ + struct deflate_sequence sequences[SEQ_STORE_LENGTH + 1]; + + } g; /* (g)reedy */ + + /* Data for fastest parsing */ + struct { + /* Hash table matchfinder */ + struct ht_matchfinder ht_mf; + + /* Matches and literals chosen for the current block */ + struct deflate_sequence sequences[ + FAST_SEQ_STORE_LENGTH + 1]; + + } f; /* (f)astest */ + + #if SUPPORT_NEAR_OPTIMAL_PARSING + /* Data for near-optimal parsing */ + struct { + + /* Binary tree matchfinder */ + struct bt_matchfinder bt_mf; + + /* + * Cached matches for the current block. This array + * contains the matches that were found at each position + * in the block. Specifically, for each position, there + * is a list of matches found at that position, if any, + * sorted by strictly increasing length. In addition, + * following the matches for each position, there is a + * special 'struct lz_match' whose 'length' member + * contains the number of matches found at that + * position, and whose 'offset' member contains the + * literal at that position. + * + * Note: in rare cases, there will be a very high number + * of matches in the block and this array will overflow. + * If this happens, we force the end of the current + * block. MATCH_CACHE_LENGTH is the length at which we + * actually check for overflow. The extra slots beyond + * this are enough to absorb the worst case overflow, + * which occurs if starting at + * &match_cache[MATCH_CACHE_LENGTH - 1], we write + * MAX_MATCHES_PER_POS matches and a match count header, + * then skip searching for matches at + * 'DEFLATE_MAX_MATCH_LEN - 1' positions and write the + * match count header for each. + */ + struct lz_match match_cache[MATCH_CACHE_LENGTH + + MAX_MATCHES_PER_POS + + DEFLATE_MAX_MATCH_LEN - 1]; + + /* + * Array of nodes, one per position, for running the + * minimum-cost path algorithm. + * + * This array must be large enough to accommodate the + * worst-case number of nodes, which is MAX_BLOCK_LENGTH + * plus 1 for the end-of-block node. + */ + struct deflate_optimum_node optimum_nodes[ + MAX_BLOCK_LENGTH + 1]; + + /* The current cost model being used */ + struct deflate_costs costs; + + /* + * A table that maps match offset to offset slot. This + * differs from deflate_offset_slot[] in that this is a + * full map, not a condensed one. The full map is more + * appropriate for the near-optimal parser, since the + * near-optimal parser does more offset => offset_slot + * translations, it doesn't intersperse them with + * matchfinding (so cache evictions are less of a + * concern), and it uses more memory anyway. + */ + u8 offset_slot_full[DEFLATE_MAX_MATCH_OFFSET + 1]; + + /* Literal/match statistics saved from previous block */ + u32 prev_observations[NUM_OBSERVATION_TYPES]; + u32 prev_num_observations; + + /* + * Approximate match length frequencies based on a + * greedy parse, gathered during matchfinding. This is + * used for setting the initial symbol costs. + */ + u32 new_match_len_freqs[DEFLATE_MAX_MATCH_LEN + 1]; + u32 match_len_freqs[DEFLATE_MAX_MATCH_LEN + 1]; + + unsigned num_optim_passes; + } n; /* (n)ear-optimal */ + #endif /* SUPPORT_NEAR_OPTIMAL_PARSING */ + + } p; /* (p)arser */ +}; + +/* + * The type for the bitbuffer variable, which temporarily holds bits that are + * being packed into bytes and written to the output buffer. For best + * performance, this should have size equal to a machine word. + */ +typedef machine_word_t bitbuf_t; + +/* + * The capacity of the bitbuffer, in bits. This is 1 less than the real size, + * in order to avoid undefined behavior when doing bitbuf >>= bitcount & ~7. + */ +#define BITBUF_NBITS (8 * sizeof(bitbuf_t) - 1) + +/* + * Can the specified number of bits always be added to 'bitbuf' after any + * pending bytes have been flushed? There can be up to 7 bits remaining after a + * flush, so the count must not exceed BITBUF_NBITS after adding 'n' more bits. + */ +#define CAN_BUFFER(n) (7 + (n) <= BITBUF_NBITS) + +/* + * Structure to keep track of the current state of sending bits to the + * compressed output buffer + */ +struct deflate_output_bitstream { + + /* Bits that haven't yet been written to the output buffer */ + bitbuf_t bitbuf; + + /* + * Number of bits currently held in @bitbuf. This can be between 0 and + * BITBUF_NBITS in general, or between 0 and 7 after a flush. + */ + unsigned bitcount; + + /* + * Pointer to the position in the output buffer at which the next byte + * should be written + */ + u8 *next; + + /* + * Pointer to near the end of the output buffer. 'next' will never + * exceed this. There are OUTPUT_END_PADDING bytes reserved after this + * to allow branchlessly writing a whole word at this location. + */ + u8 *end; +}; + +/* + * OUTPUT_END_PADDING is the size, in bytes, of the extra space that must be + * present following os->end, in order to not overrun the buffer when generating + * output. When UNALIGNED_ACCESS_IS_FAST, we need at least sizeof(bitbuf_t) + * bytes for put_unaligned_leword(). Otherwise we need only 1 byte. However, + * to make the compression algorithm produce the same result on all CPU + * architectures (which is sometimes desirable), we have to unconditionally use + * the maximum for any CPU, which is sizeof(bitbuf_t) == 8. + */ +#define OUTPUT_END_PADDING 8 + +/* + * Add some bits to the bitbuffer variable of the output bitstream. The caller + * must ensure that 'bitcount + n <= BITBUF_NBITS', by calling FLUSH_BITS() + * frequently enough. + */ +#define ADD_BITS(bits, n) \ +do { \ + bitbuf |= (bitbuf_t)(bits) << bitcount; \ + bitcount += (n); \ + ASSERT(bitcount <= BITBUF_NBITS); \ +} while (0) + +/* Flush bits from the bitbuffer variable to the output buffer. */ +#define FLUSH_BITS() \ +do { \ + if (UNALIGNED_ACCESS_IS_FAST) { \ + /* Flush a whole word (branchlessly). */ \ + put_unaligned_leword(bitbuf, out_next); \ + bitbuf >>= bitcount & ~7; \ + out_next += MIN(out_end - out_next, bitcount >> 3); \ + bitcount &= 7; \ + } else { \ + /* Flush a byte at a time. */ \ + while (bitcount >= 8) { \ + *out_next = bitbuf; \ + if (out_next != out_end) \ + out_next++; \ + bitcount -= 8; \ + bitbuf >>= 8; \ + } \ + } \ +} while (0) + +/* + * Given the binary tree node A[subtree_idx] whose children already satisfy the + * maxheap property, swap the node with its greater child until it is greater + * than or equal to both of its children, so that the maxheap property is + * satisfied in the subtree rooted at A[subtree_idx]. 'A' uses 1-based indices. + */ +static void +heapify_subtree(u32 A[], unsigned length, unsigned subtree_idx) +{ + unsigned parent_idx; + unsigned child_idx; + u32 v; + + v = A[subtree_idx]; + parent_idx = subtree_idx; + while ((child_idx = parent_idx * 2) <= length) { + if (child_idx < length && A[child_idx + 1] > A[child_idx]) + child_idx++; + if (v >= A[child_idx]) + break; + A[parent_idx] = A[child_idx]; + parent_idx = child_idx; + } + A[parent_idx] = v; +} + +/* + * Rearrange the array 'A' so that it satisfies the maxheap property. + * 'A' uses 1-based indices, so the children of A[i] are A[i*2] and A[i*2 + 1]. + */ +static void +heapify_array(u32 A[], unsigned length) +{ + unsigned subtree_idx; + + for (subtree_idx = length / 2; subtree_idx >= 1; subtree_idx--) + heapify_subtree(A, length, subtree_idx); +} + +/* + * Sort the array 'A', which contains 'length' unsigned 32-bit integers. + * + * Note: name this function heap_sort() instead of heapsort() to avoid colliding + * with heapsort() from stdlib.h on BSD-derived systems --- though this isn't + * necessary when compiling with -D_ANSI_SOURCE, which is the better solution. + */ +static void +heap_sort(u32 A[], unsigned length) +{ + A--; /* Use 1-based indices */ + + heapify_array(A, length); + + while (length >= 2) { + u32 tmp = A[length]; + + A[length] = A[1]; + A[1] = tmp; + length--; + heapify_subtree(A, length, 1); + } +} + +#define NUM_SYMBOL_BITS 10 +#define NUM_FREQ_BITS (32 - NUM_SYMBOL_BITS) +#define SYMBOL_MASK ((1 << NUM_SYMBOL_BITS) - 1) +#define FREQ_MASK (~SYMBOL_MASK) + +#define GET_NUM_COUNTERS(num_syms) (num_syms) + +/* + * Sort the symbols primarily by frequency and secondarily by symbol value. + * Discard symbols with zero frequency and fill in an array with the remaining + * symbols, along with their frequencies. The low NUM_SYMBOL_BITS bits of each + * array entry will contain the symbol value, and the remaining bits will + * contain the frequency. + * + * @num_syms + * Number of symbols in the alphabet, at most 1 << NUM_SYMBOL_BITS. + * + * @freqs[num_syms] + * Frequency of each symbol, summing to at most (1 << NUM_FREQ_BITS) - 1. + * + * @lens[num_syms] + * An array that eventually will hold the length of each codeword. This + * function only fills in the codeword lengths for symbols that have zero + * frequency, which are not well defined per se but will be set to 0. + * + * @symout[num_syms] + * The output array, described above. + * + * Returns the number of entries in 'symout' that were filled. This is the + * number of symbols that have nonzero frequency. + */ +static unsigned +sort_symbols(unsigned num_syms, const u32 freqs[], u8 lens[], u32 symout[]) +{ + unsigned sym; + unsigned i; + unsigned num_used_syms; + unsigned num_counters; + unsigned counters[GET_NUM_COUNTERS(DEFLATE_MAX_NUM_SYMS)]; + + /* + * We use heapsort, but with an added optimization. Since often most + * symbol frequencies are low, we first do a count sort using a limited + * number of counters. High frequencies are counted in the last + * counter, and only they will be sorted with heapsort. + * + * Note: with more symbols, it is generally beneficial to have more + * counters. About 1 counter per symbol seems fastest. + */ + + num_counters = GET_NUM_COUNTERS(num_syms); + + memset(counters, 0, num_counters * sizeof(counters[0])); + + /* Count the frequencies. */ + for (sym = 0; sym < num_syms; sym++) + counters[MIN(freqs[sym], num_counters - 1)]++; + + /* + * Make the counters cumulative, ignoring the zero-th, which counted + * symbols with zero frequency. As a side effect, this calculates the + * number of symbols with nonzero frequency. + */ + num_used_syms = 0; + for (i = 1; i < num_counters; i++) { + unsigned count = counters[i]; + + counters[i] = num_used_syms; + num_used_syms += count; + } + + /* + * Sort nonzero-frequency symbols using the counters. At the same time, + * set the codeword lengths of zero-frequency symbols to 0. + */ + for (sym = 0; sym < num_syms; sym++) { + u32 freq = freqs[sym]; + + if (freq != 0) { + symout[counters[MIN(freq, num_counters - 1)]++] = + sym | (freq << NUM_SYMBOL_BITS); + } else { + lens[sym] = 0; + } + } + + /* Sort the symbols counted in the last counter. */ + heap_sort(symout + counters[num_counters - 2], + counters[num_counters - 1] - counters[num_counters - 2]); + + return num_used_syms; +} + +/* + * Build a Huffman tree. + * + * This is an optimized implementation that + * (a) takes advantage of the frequencies being already sorted; + * (b) only generates non-leaf nodes, since the non-leaf nodes of a Huffman + * tree are sufficient to generate a canonical code; + * (c) Only stores parent pointers, not child pointers; + * (d) Produces the nodes in the same memory used for input frequency + * information. + * + * Array 'A', which contains 'sym_count' entries, is used for both input and + * output. For this function, 'sym_count' must be at least 2. + * + * For input, the array must contain the frequencies of the symbols, sorted in + * increasing order. Specifically, each entry must contain a frequency left + * shifted by NUM_SYMBOL_BITS bits. Any data in the low NUM_SYMBOL_BITS bits of + * the entries will be ignored by this function. Although these bits will, in + * fact, contain the symbols that correspond to the frequencies, this function + * is concerned with frequencies only and keeps the symbols as-is. + * + * For output, this function will produce the non-leaf nodes of the Huffman + * tree. These nodes will be stored in the first (sym_count - 1) entries of the + * array. Entry A[sym_count - 2] will represent the root node. Each other node + * will contain the zero-based index of its parent node in 'A', left shifted by + * NUM_SYMBOL_BITS bits. The low NUM_SYMBOL_BITS bits of each entry in A will + * be kept as-is. Again, note that although these low bits will, in fact, + * contain a symbol value, this symbol will have *no relationship* with the + * Huffman tree node that happens to occupy the same slot. This is because this + * implementation only generates the non-leaf nodes of the tree. + */ +static void +build_tree(u32 A[], unsigned sym_count) +{ + const unsigned last_idx = sym_count - 1; + + /* Index of the next lowest frequency leaf that still needs a parent */ + unsigned i = 0; + + /* + * Index of the next lowest frequency non-leaf that still needs a + * parent, or 'e' if there is currently no such node + */ + unsigned b = 0; + + /* Index of the next spot for a non-leaf (will overwrite a leaf) */ + unsigned e = 0; + + do { + u32 new_freq; + + /* + * Select the next two lowest frequency nodes among the leaves + * A[i] and non-leaves A[b], and create a new node A[e] to be + * their parent. Set the new node's frequency to the sum of the + * frequencies of its two children. + * + * Usually the next two lowest frequency nodes are of the same + * type (leaf or non-leaf), so check those cases first. + */ + if (i + 1 <= last_idx && + (b == e || (A[i + 1] & FREQ_MASK) <= (A[b] & FREQ_MASK))) { + /* Two leaves */ + new_freq = (A[i] & FREQ_MASK) + (A[i + 1] & FREQ_MASK); + i += 2; + } else if (b + 2 <= e && + (i > last_idx || + (A[b + 1] & FREQ_MASK) < (A[i] & FREQ_MASK))) { + /* Two non-leaves */ + new_freq = (A[b] & FREQ_MASK) + (A[b + 1] & FREQ_MASK); + A[b] = (e << NUM_SYMBOL_BITS) | (A[b] & SYMBOL_MASK); + A[b + 1] = (e << NUM_SYMBOL_BITS) | + (A[b + 1] & SYMBOL_MASK); + b += 2; + } else { + /* One leaf and one non-leaf */ + new_freq = (A[i] & FREQ_MASK) + (A[b] & FREQ_MASK); + A[b] = (e << NUM_SYMBOL_BITS) | (A[b] & SYMBOL_MASK); + i++; + b++; + } + A[e] = new_freq | (A[e] & SYMBOL_MASK); + /* + * A binary tree with 'n' leaves has 'n - 1' non-leaves, so the + * tree is complete once we've created 'n - 1' non-leaves. + */ + } while (++e < last_idx); +} + +/* + * Given the stripped-down Huffman tree constructed by build_tree(), determine + * the number of codewords that should be assigned each possible length, taking + * into account the length-limited constraint. + * + * @A + * The array produced by build_tree(), containing parent index information + * for the non-leaf nodes of the Huffman tree. Each entry in this array is + * a node; a node's parent always has a greater index than that node + * itself. This function will overwrite the parent index information in + * this array, so essentially it will destroy the tree. However, the data + * in the low NUM_SYMBOL_BITS of each entry will be preserved. + * + * @root_idx + * The 0-based index of the root node in 'A', and consequently one less + * than the number of tree node entries in 'A'. (Or, really 2 less than + * the actual length of 'A'.) + * + * @len_counts + * An array of length ('max_codeword_len' + 1) in which the number of + * codewords having each length <= max_codeword_len will be returned. + * + * @max_codeword_len + * The maximum permissible codeword length. + */ +static void +compute_length_counts(u32 A[], unsigned root_idx, unsigned len_counts[], + unsigned max_codeword_len) +{ + unsigned len; + int node; + + /* + * The key observations are: + * + * (1) We can traverse the non-leaf nodes of the tree, always visiting a + * parent before its children, by simply iterating through the array + * in reverse order. Consequently, we can compute the depth of each + * node in one pass, overwriting the parent indices with depths. + * + * (2) We can initially assume that in the real Huffman tree, both + * children of the root are leaves. This corresponds to two + * codewords of length 1. Then, whenever we visit a (non-leaf) node + * during the traversal, we modify this assumption to account for + * the current node *not* being a leaf, but rather its two children + * being leaves. This causes the loss of one codeword for the + * current depth and the addition of two codewords for the current + * depth plus one. + * + * (3) We can handle the length-limited constraint fairly easily by + * simply using the largest length available when a depth exceeds + * max_codeword_len. + */ + + for (len = 0; len <= max_codeword_len; len++) + len_counts[len] = 0; + len_counts[1] = 2; + + /* Set the root node's depth to 0. */ + A[root_idx] &= SYMBOL_MASK; + + for (node = root_idx - 1; node >= 0; node--) { + + /* Calculate the depth of this node. */ + + unsigned parent = A[node] >> NUM_SYMBOL_BITS; + unsigned parent_depth = A[parent] >> NUM_SYMBOL_BITS; + unsigned depth = parent_depth + 1; + + /* + * Set the depth of this node so that it is available when its + * children (if any) are processed. + */ + A[node] = (A[node] & SYMBOL_MASK) | (depth << NUM_SYMBOL_BITS); + + /* + * If needed, decrease the length to meet the length-limited + * constraint. This is not the optimal method for generating + * length-limited Huffman codes! But it should be good enough. + */ + if (depth >= max_codeword_len) { + depth = max_codeword_len; + do { + depth--; + } while (len_counts[depth] == 0); + } + + /* + * Account for the fact that we have a non-leaf node at the + * current depth. + */ + len_counts[depth]--; + len_counts[depth + 1] += 2; + } +} + +/* + * DEFLATE uses bit-reversed codewords, so we must bit-reverse the codewords + * after generating them. All codewords have length <= 16 bits. If the CPU has + * a bit-reversal instruction, then that is the fastest method. Otherwise the + * fastest method is to reverse the bits in each of the two bytes using a table. + * The table method is slightly faster than using bitwise operations to flip + * adjacent 1, 2, 4, and then 8-bit fields, even if 2 to 4 codewords are packed + * into a machine word and processed together using that method. + */ + +#ifdef rbit32 +static forceinline u32 reverse_codeword(u32 codeword, u8 len) +{ + return rbit32(codeword) >> ((32 - len) & 31); +} +#else +/* Generated by scripts/gen_bitreverse_tab.py */ +static const u8 bitreverse_tab[256] = { + 0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0, + 0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0, + 0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8, + 0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8, + 0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4, + 0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4, + 0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec, + 0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc, + 0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2, + 0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2, + 0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea, + 0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa, + 0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6, + 0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6, + 0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee, + 0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe, + 0x01, 0x81, 0x41, 0xc1, 0x21, 0xa1, 0x61, 0xe1, + 0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1, + 0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9, + 0x19, 0x99, 0x59, 0xd9, 0x39, 0xb9, 0x79, 0xf9, + 0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5, + 0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5, + 0x0d, 0x8d, 0x4d, 0xcd, 0x2d, 0xad, 0x6d, 0xed, + 0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd, + 0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3, + 0x13, 0x93, 0x53, 0xd3, 0x33, 0xb3, 0x73, 0xf3, + 0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb, + 0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb, + 0x07, 0x87, 0x47, 0xc7, 0x27, 0xa7, 0x67, 0xe7, + 0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7, + 0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef, + 0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff, +}; + +static forceinline u32 reverse_codeword(u32 codeword, u8 len) +{ + STATIC_ASSERT(DEFLATE_MAX_CODEWORD_LEN <= 16); + codeword = ((u32)bitreverse_tab[codeword & 0xff] << 8) | + bitreverse_tab[codeword >> 8]; + return codeword >> (16 - len); +} +#endif /* !rbit32 */ + +/* + * Generate the codewords for a canonical Huffman code. + * + * @A + * The output array for codewords. In addition, initially this + * array must contain the symbols, sorted primarily by frequency and + * secondarily by symbol value, in the low NUM_SYMBOL_BITS bits of + * each entry. + * + * @len + * Output array for codeword lengths. + * + * @len_counts + * An array that provides the number of codewords that will have + * each possible length <= max_codeword_len. + * + * @max_codeword_len + * Maximum length, in bits, of each codeword. + * + * @num_syms + * Number of symbols in the alphabet, including symbols with zero + * frequency. This is the length of the 'A' and 'len' arrays. + */ +static void +gen_codewords(u32 A[], u8 lens[], const unsigned len_counts[], + unsigned max_codeword_len, unsigned num_syms) +{ + u32 next_codewords[DEFLATE_MAX_CODEWORD_LEN + 1]; + unsigned i; + unsigned len; + unsigned sym; + + /* + * Given the number of codewords that will have each length, assign + * codeword lengths to symbols. We do this by assigning the lengths in + * decreasing order to the symbols sorted primarily by increasing + * frequency and secondarily by increasing symbol value. + */ + for (i = 0, len = max_codeword_len; len >= 1; len--) { + unsigned count = len_counts[len]; + + while (count--) + lens[A[i++] & SYMBOL_MASK] = len; + } + + /* + * Generate the codewords themselves. We initialize the + * 'next_codewords' array to provide the lexicographically first + * codeword of each length, then assign codewords in symbol order. This + * produces a canonical code. + */ + next_codewords[0] = 0; + next_codewords[1] = 0; + for (len = 2; len <= max_codeword_len; len++) + next_codewords[len] = + (next_codewords[len - 1] + len_counts[len - 1]) << 1; + + for (sym = 0; sym < num_syms; sym++) { + /* DEFLATE requires bit-reversed codewords. */ + A[sym] = reverse_codeword(next_codewords[lens[sym]]++, + lens[sym]); + } +} + +/* + * --------------------------------------------------------------------- + * deflate_make_huffman_code() + * --------------------------------------------------------------------- + * + * Given an alphabet and the frequency of each symbol in it, construct a + * length-limited canonical Huffman code. + * + * @num_syms + * The number of symbols in the alphabet. The symbols are the integers in + * the range [0, num_syms - 1]. This parameter must be at least 2 and + * must not exceed (1 << NUM_SYMBOL_BITS). + * + * @max_codeword_len + * The maximum permissible codeword length. + * + * @freqs + * An array of length @num_syms that gives the frequency of each symbol. + * It is valid for some, none, or all of the frequencies to be 0. The sum + * of frequencies must not exceed (1 << NUM_FREQ_BITS) - 1. + * + * @lens + * An array of @num_syms entries in which this function will return the + * length, in bits, of the codeword assigned to each symbol. Symbols with + * 0 frequency will not have codewords per se, but their entries in this + * array will be set to 0. No lengths greater than @max_codeword_len will + * be assigned. + * + * @codewords + * An array of @num_syms entries in which this function will return the + * codeword for each symbol, right-justified and padded on the left with + * zeroes. Codewords for symbols with 0 frequency will be undefined. + * + * --------------------------------------------------------------------- + * + * This function builds a length-limited canonical Huffman code. + * + * A length-limited Huffman code contains no codewords longer than some + * specified length, and has exactly (with some algorithms) or approximately + * (with the algorithm used here) the minimum weighted path length from the + * root, given this constraint. + * + * A canonical Huffman code satisfies the properties that a longer codeword + * never lexicographically precedes a shorter codeword, and the lexicographic + * ordering of codewords of the same length is the same as the lexicographic + * ordering of the corresponding symbols. A canonical Huffman code, or more + * generally a canonical prefix code, can be reconstructed from only a list + * containing the codeword length of each symbol. + * + * The classic algorithm to generate a Huffman code creates a node for each + * symbol, then inserts these nodes into a min-heap keyed by symbol frequency. + * Then, repeatedly, the two lowest-frequency nodes are removed from the + * min-heap and added as the children of a new node having frequency equal to + * the sum of its two children, which is then inserted into the min-heap. When + * only a single node remains in the min-heap, it is the root of the Huffman + * tree. The codeword for each symbol is determined by the path needed to reach + * the corresponding node from the root. Descending to the left child appends a + * 0 bit, whereas descending to the right child appends a 1 bit. + * + * The classic algorithm is relatively easy to understand, but it is subject to + * a number of inefficiencies. In practice, it is fastest to first sort the + * symbols by frequency. (This itself can be subject to an optimization based + * on the fact that most frequencies tend to be low.) At the same time, we sort + * secondarily by symbol value, which aids the process of generating a canonical + * code. Then, during tree construction, no heap is necessary because both the + * leaf nodes and the unparented non-leaf nodes can be easily maintained in + * sorted order. Consequently, there can never be more than two possibilities + * for the next-lowest-frequency node. + * + * In addition, because we're generating a canonical code, we actually don't + * need the leaf nodes of the tree at all, only the non-leaf nodes. This is + * because for canonical code generation we don't need to know where the symbols + * are in the tree. Rather, we only need to know how many leaf nodes have each + * depth (codeword length). And this information can, in fact, be quickly + * generated from the tree of non-leaves only. + * + * Furthermore, we can build this stripped-down Huffman tree directly in the + * array in which the codewords are to be generated, provided that these array + * slots are large enough to hold a symbol and frequency value. + * + * Still furthermore, we don't even need to maintain explicit child pointers. + * We only need the parent pointers, and even those can be overwritten in-place + * with depth information as part of the process of extracting codeword lengths + * from the tree. So in summary, we do NOT need a big structure like: + * + * struct huffman_tree_node { + * unsigned int symbol; + * unsigned int frequency; + * unsigned int depth; + * struct huffman_tree_node *left_child; + * struct huffman_tree_node *right_child; + * }; + * + * + * ... which often gets used in "naive" implementations of Huffman code + * generation. + * + * Many of these optimizations are based on the implementation in 7-Zip (source + * file: C/HuffEnc.c), which was placed in the public domain by Igor Pavlov. + */ +static void +deflate_make_huffman_code(unsigned num_syms, unsigned max_codeword_len, + const u32 freqs[], u8 lens[], u32 codewords[]) +{ + u32 *A = codewords; + unsigned num_used_syms; + + STATIC_ASSERT(DEFLATE_MAX_NUM_SYMS <= 1 << NUM_SYMBOL_BITS); + STATIC_ASSERT(MAX_BLOCK_LENGTH <= ((u32)1 << NUM_FREQ_BITS) - 1); + + /* + * We begin by sorting the symbols primarily by frequency and + * secondarily by symbol value. As an optimization, the array used for + * this purpose ('A') shares storage with the space in which we will + * eventually return the codewords. + */ + num_used_syms = sort_symbols(num_syms, freqs, lens, A); + + /* + * 'num_used_syms' is the number of symbols with nonzero frequency. + * This may be less than @num_syms. 'num_used_syms' is also the number + * of entries in 'A' that are valid. Each entry consists of a distinct + * symbol and a nonzero frequency packed into a 32-bit integer. + */ + + /* + * Handle special cases where only 0 or 1 symbols were used (had nonzero + * frequency). + */ + + if (unlikely(num_used_syms == 0)) { + /* + * Code is empty. sort_symbols() already set all lengths to 0, + * so there is nothing more to do. + */ + return; + } + + if (unlikely(num_used_syms == 1)) { + /* + * Only one symbol was used, so we only need one codeword. But + * two codewords are needed to form the smallest complete + * Huffman code, which uses codewords 0 and 1. Therefore, we + * choose another symbol to which to assign a codeword. We use + * 0 (if the used symbol is not 0) or 1 (if the used symbol is + * 0). In either case, the lesser-valued symbol must be + * assigned codeword 0 so that the resulting code is canonical. + */ + + unsigned sym = A[0] & SYMBOL_MASK; + unsigned nonzero_idx = sym ? sym : 1; + + codewords[0] = 0; + lens[0] = 1; + codewords[nonzero_idx] = 1; + lens[nonzero_idx] = 1; + return; + } + + /* + * Build a stripped-down version of the Huffman tree, sharing the array + * 'A' with the symbol values. Then extract length counts from the tree + * and use them to generate the final codewords. + */ + + build_tree(A, num_used_syms); + + { + unsigned len_counts[DEFLATE_MAX_CODEWORD_LEN + 1]; + + compute_length_counts(A, num_used_syms - 2, + len_counts, max_codeword_len); + + gen_codewords(A, lens, len_counts, max_codeword_len, num_syms); + } +} + +/* + * Clear the Huffman symbol frequency counters. This must be called when + * starting a new DEFLATE block. + */ +static void +deflate_reset_symbol_frequencies(struct libdeflate_compressor *c) +{ + memset(&c->freqs, 0, sizeof(c->freqs)); +} + +/* + * Build the literal/length and offset Huffman codes for a DEFLATE block. + * + * This takes as input the frequency tables for each alphabet and produces as + * output a set of tables that map symbols to codewords and codeword lengths. + */ +static void +deflate_make_huffman_codes(const struct deflate_freqs *freqs, + struct deflate_codes *codes) +{ + deflate_make_huffman_code(DEFLATE_NUM_LITLEN_SYMS, + MAX_LITLEN_CODEWORD_LEN, + freqs->litlen, + codes->lens.litlen, + codes->codewords.litlen); + + deflate_make_huffman_code(DEFLATE_NUM_OFFSET_SYMS, + MAX_OFFSET_CODEWORD_LEN, + freqs->offset, + codes->lens.offset, + codes->codewords.offset); +} + +/* Initialize c->static_codes. */ +static void +deflate_init_static_codes(struct libdeflate_compressor *c) +{ + unsigned i; + + for (i = 0; i < 144; i++) + c->freqs.litlen[i] = 1 << (9 - 8); + for (; i < 256; i++) + c->freqs.litlen[i] = 1 << (9 - 9); + for (; i < 280; i++) + c->freqs.litlen[i] = 1 << (9 - 7); + for (; i < 288; i++) + c->freqs.litlen[i] = 1 << (9 - 8); + + for (i = 0; i < 32; i++) + c->freqs.offset[i] = 1 << (5 - 5); + + deflate_make_huffman_codes(&c->freqs, &c->static_codes); +} + +/* Return the offset slot for the given match offset, using the small map. */ +static forceinline unsigned +deflate_get_offset_slot(unsigned offset) +{ +#if 1 + if (offset <= 256) + return deflate_offset_slot[offset]; + else + return deflate_offset_slot[256 + ((offset - 1) >> 7)]; +#else /* Branchless version */ + u32 i1 = offset; + u32 i2 = 256 + ((offset - 1) >> 7); + u32 is_small = (s32)(offset - 257) >> 31; + + return deflate_offset_slot[(i1 & is_small) ^ (i2 & ~is_small)]; +#endif +} + +static unsigned +deflate_compute_precode_items(const u8 lens[], const unsigned num_lens, + u32 precode_freqs[], unsigned precode_items[]) +{ + unsigned *itemptr; + unsigned run_start; + unsigned run_end; + unsigned extra_bits; + u8 len; + + memset(precode_freqs, 0, + DEFLATE_NUM_PRECODE_SYMS * sizeof(precode_freqs[0])); + + itemptr = precode_items; + run_start = 0; + do { + /* Find the next run of codeword lengths. */ + + /* len = the length being repeated */ + len = lens[run_start]; + + /* Extend the run. */ + run_end = run_start; + do { + run_end++; + } while (run_end != num_lens && len == lens[run_end]); + + if (len == 0) { + /* Run of zeroes. */ + + /* Symbol 18: RLE 11 to 138 zeroes at a time. */ + while ((run_end - run_start) >= 11) { + extra_bits = MIN((run_end - run_start) - 11, + 0x7F); + precode_freqs[18]++; + *itemptr++ = 18 | (extra_bits << 5); + run_start += 11 + extra_bits; + } + + /* Symbol 17: RLE 3 to 10 zeroes at a time. */ + if ((run_end - run_start) >= 3) { + extra_bits = MIN((run_end - run_start) - 3, + 0x7); + precode_freqs[17]++; + *itemptr++ = 17 | (extra_bits << 5); + run_start += 3 + extra_bits; + } + } else { + + /* A run of nonzero lengths. */ + + /* Symbol 16: RLE 3 to 6 of the previous length. */ + if ((run_end - run_start) >= 4) { + precode_freqs[len]++; + *itemptr++ = len; + run_start++; + do { + extra_bits = MIN((run_end - run_start) - + 3, 0x3); + precode_freqs[16]++; + *itemptr++ = 16 | (extra_bits << 5); + run_start += 3 + extra_bits; + } while ((run_end - run_start) >= 3); + } + } + + /* Output any remaining lengths without RLE. */ + while (run_start != run_end) { + precode_freqs[len]++; + *itemptr++ = len; + run_start++; + } + } while (run_start != num_lens); + + return itemptr - precode_items; +} + +/* + * Huffman codeword lengths for dynamic Huffman blocks are compressed using a + * separate Huffman code, the "precode", which contains a symbol for each + * possible codeword length in the larger code as well as several special + * symbols to represent repeated codeword lengths (a form of run-length + * encoding). The precode is itself constructed in canonical form, and its + * codeword lengths are represented literally in 19 3-bit fields that + * immediately precede the compressed codeword lengths of the larger code. + */ + +/* Precompute the information needed to output dynamic Huffman codes. */ +static void +deflate_precompute_huffman_header(struct libdeflate_compressor *c) +{ + /* Compute how many litlen and offset symbols are needed. */ + + for (c->o.precode.num_litlen_syms = DEFLATE_NUM_LITLEN_SYMS; + c->o.precode.num_litlen_syms > 257; + c->o.precode.num_litlen_syms--) + if (c->codes.lens.litlen[c->o.precode.num_litlen_syms - 1] != 0) + break; + + for (c->o.precode.num_offset_syms = DEFLATE_NUM_OFFSET_SYMS; + c->o.precode.num_offset_syms > 1; + c->o.precode.num_offset_syms--) + if (c->codes.lens.offset[c->o.precode.num_offset_syms - 1] != 0) + break; + + /* + * If we're not using the full set of literal/length codeword lengths, + * then temporarily move the offset codeword lengths over so that the + * literal/length and offset codeword lengths are contiguous. + */ + STATIC_ASSERT(offsetof(struct deflate_lens, offset) == + DEFLATE_NUM_LITLEN_SYMS); + if (c->o.precode.num_litlen_syms != DEFLATE_NUM_LITLEN_SYMS) { + memmove((u8 *)&c->codes.lens + c->o.precode.num_litlen_syms, + (u8 *)&c->codes.lens + DEFLATE_NUM_LITLEN_SYMS, + c->o.precode.num_offset_syms); + } + + /* + * Compute the "items" (RLE / literal tokens and extra bits) with which + * the codeword lengths in the larger code will be output. + */ + c->o.precode.num_items = + deflate_compute_precode_items((u8 *)&c->codes.lens, + c->o.precode.num_litlen_syms + + c->o.precode.num_offset_syms, + c->o.precode.freqs, + c->o.precode.items); + + /* Build the precode. */ + deflate_make_huffman_code(DEFLATE_NUM_PRECODE_SYMS, + MAX_PRE_CODEWORD_LEN, + c->o.precode.freqs, c->o.precode.lens, + c->o.precode.codewords); + + /* Count how many precode lengths we actually need to output. */ + for (c->o.precode.num_explicit_lens = DEFLATE_NUM_PRECODE_SYMS; + c->o.precode.num_explicit_lens > 4; + c->o.precode.num_explicit_lens--) + if (c->o.precode.lens[deflate_precode_lens_permutation[ + c->o.precode.num_explicit_lens - 1]] != 0) + break; + + /* Restore the offset codeword lengths if needed. */ + if (c->o.precode.num_litlen_syms != DEFLATE_NUM_LITLEN_SYMS) { + memmove((u8 *)&c->codes.lens + DEFLATE_NUM_LITLEN_SYMS, + (u8 *)&c->codes.lens + c->o.precode.num_litlen_syms, + c->o.precode.num_offset_syms); + } +} + +/* + * To make it faster to output matches, compute the "full" match length + * codewords, i.e. the concatenation of the litlen codeword and the extra bits + * for each possible match length. + */ +static void +deflate_compute_full_len_codewords(struct libdeflate_compressor *c, + const struct deflate_codes *codes) +{ + unsigned len; + + STATIC_ASSERT(MAX_LITLEN_CODEWORD_LEN + + DEFLATE_MAX_EXTRA_LENGTH_BITS <= 32); + + for (len = DEFLATE_MIN_MATCH_LEN; len <= DEFLATE_MAX_MATCH_LEN; len++) { + unsigned slot = deflate_length_slot[len]; + unsigned litlen_sym = DEFLATE_FIRST_LEN_SYM + slot; + u32 extra_bits = len - deflate_length_slot_base[slot]; + + c->o.length.codewords[len] = + codes->codewords.litlen[litlen_sym] | + (extra_bits << codes->lens.litlen[litlen_sym]); + c->o.length.lens[len] = codes->lens.litlen[litlen_sym] + + deflate_extra_length_bits[slot]; + } +} + +/* Write a match to the output buffer. */ +#define WRITE_MATCH(c_, codes_, length_, offset_, offset_slot_) \ +do { \ + const struct libdeflate_compressor *c__ = (c_); \ + const struct deflate_codes *codes__ = (codes_); \ + unsigned length__ = (length_); \ + unsigned offset__ = (offset_); \ + unsigned offset_slot__ = (offset_slot_); \ + \ + /* Litlen symbol and extra length bits */ \ + STATIC_ASSERT(CAN_BUFFER(MAX_LITLEN_CODEWORD_LEN + \ + DEFLATE_MAX_EXTRA_LENGTH_BITS)); \ + ADD_BITS(c__->o.length.codewords[length__], \ + c__->o.length.lens[length__]); \ + \ + if (!CAN_BUFFER(MAX_LITLEN_CODEWORD_LEN + \ + DEFLATE_MAX_EXTRA_LENGTH_BITS + \ + MAX_OFFSET_CODEWORD_LEN + \ + DEFLATE_MAX_EXTRA_OFFSET_BITS)) \ + FLUSH_BITS(); \ + \ + /* Offset symbol */ \ + ADD_BITS(codes__->codewords.offset[offset_slot__], \ + codes__->lens.offset[offset_slot__]); \ + \ + if (!CAN_BUFFER(MAX_OFFSET_CODEWORD_LEN + \ + DEFLATE_MAX_EXTRA_OFFSET_BITS)) \ + FLUSH_BITS(); \ + \ + /* Extra offset bits */ \ + ADD_BITS(offset__ - deflate_offset_slot_base[offset_slot__], \ + deflate_extra_offset_bits[offset_slot__]); \ + \ + FLUSH_BITS(); \ +} while (0) + +/* + * Choose the best type of block to use (dynamic Huffman, static Huffman, or + * uncompressed), then output it. + */ +static void +deflate_flush_block(struct libdeflate_compressor *c, + struct deflate_output_bitstream *os, + const u8 *block_begin, u32 block_length, + const struct deflate_sequence *sequences, + bool is_final_block) +{ + /* + * It is hard to get compilers to understand that writes to 'os->next' + * don't alias 'os'. That hurts performance significantly, as + * everything in 'os' would keep getting re-loaded. ('restrict' + * *should* do the trick, but it's unreliable.) Therefore, we keep all + * the output bitstream state in local variables, and output bits using + * macros. This is similar to what the decompressor does. + */ + const u8 *in_next = block_begin; + const u8 * const in_end = block_begin + block_length; + bitbuf_t bitbuf = os->bitbuf; + unsigned bitcount = os->bitcount; + u8 *out_next = os->next; + u8 * const out_end = os->end; + /* The cost for each block type, in bits */ + u32 dynamic_cost = 0; + u32 static_cost = 0; + u32 uncompressed_cost = 0; + u32 best_cost; + struct deflate_codes *codes; + unsigned sym; + + ASSERT(block_length >= MIN_BLOCK_LENGTH || is_final_block); + ASSERT(block_length <= MAX_BLOCK_LENGTH); + ASSERT(bitcount <= 7); + ASSERT((bitbuf & ~(((bitbuf_t)1 << bitcount) - 1)) == 0); + ASSERT(out_next <= out_end); + + if (sequences != NULL /* !near_optimal */ || + !SUPPORT_NEAR_OPTIMAL_PARSING) { + /* Tally the end-of-block symbol. */ + c->freqs.litlen[DEFLATE_END_OF_BLOCK]++; + + /* Build dynamic Huffman codes. */ + deflate_make_huffman_codes(&c->freqs, &c->codes); + } /* Else, this was already done. */ + + /* Precompute the precode items and build the precode. */ + deflate_precompute_huffman_header(c); + + /* Account for the cost of encoding dynamic Huffman codes. */ + dynamic_cost += 5 + 5 + 4 + (3 * c->o.precode.num_explicit_lens); + for (sym = 0; sym < DEFLATE_NUM_PRECODE_SYMS; sym++) { + u32 extra = deflate_extra_precode_bits[sym]; + + dynamic_cost += c->o.precode.freqs[sym] * + (extra + c->o.precode.lens[sym]); + } + + /* Account for the cost of encoding literals. */ + for (sym = 0; sym < 144; sym++) { + dynamic_cost += c->freqs.litlen[sym] * + c->codes.lens.litlen[sym]; + static_cost += c->freqs.litlen[sym] * 8; + } + for (; sym < 256; sym++) { + dynamic_cost += c->freqs.litlen[sym] * + c->codes.lens.litlen[sym]; + static_cost += c->freqs.litlen[sym] * 9; + } + + /* Account for the cost of encoding the end-of-block symbol. */ + dynamic_cost += c->codes.lens.litlen[DEFLATE_END_OF_BLOCK]; + static_cost += 7; + + /* Account for the cost of encoding lengths. */ + for (sym = DEFLATE_FIRST_LEN_SYM; + sym < DEFLATE_FIRST_LEN_SYM + ARRAY_LEN(deflate_extra_length_bits); + sym++) { + u32 extra = deflate_extra_length_bits[ + sym - DEFLATE_FIRST_LEN_SYM]; + + dynamic_cost += c->freqs.litlen[sym] * + (extra + c->codes.lens.litlen[sym]); + static_cost += c->freqs.litlen[sym] * + (extra + c->static_codes.lens.litlen[sym]); + } + + /* Account for the cost of encoding offsets. */ + for (sym = 0; sym < ARRAY_LEN(deflate_extra_offset_bits); sym++) { + u32 extra = deflate_extra_offset_bits[sym]; + + dynamic_cost += c->freqs.offset[sym] * + (extra + c->codes.lens.offset[sym]); + static_cost += c->freqs.offset[sym] * (extra + 5); + } + + /* Compute the cost of using uncompressed blocks. */ + uncompressed_cost += (-(bitcount + 3) & 7) + 32 + + (40 * (DIV_ROUND_UP(block_length, + UINT16_MAX) - 1)) + + (8 * block_length); + + /* Choose and output the cheapest type of block. */ + best_cost = MIN(static_cost, uncompressed_cost); + if (dynamic_cost < best_cost) { + const unsigned num_explicit_lens = c->o.precode.num_explicit_lens; + const unsigned num_precode_items = c->o.precode.num_items; + unsigned precode_sym, precode_item; + unsigned i; + + /* Dynamic Huffman block */ + + best_cost = dynamic_cost; + codes = &c->codes; + STATIC_ASSERT(CAN_BUFFER(1 + 2 + 5 + 5 + 4 + 3)); + ADD_BITS(is_final_block, 1); + ADD_BITS(DEFLATE_BLOCKTYPE_DYNAMIC_HUFFMAN, 2); + ADD_BITS(c->o.precode.num_litlen_syms - 257, 5); + ADD_BITS(c->o.precode.num_offset_syms - 1, 5); + ADD_BITS(num_explicit_lens - 4, 4); + + /* Output the lengths of the codewords in the precode. */ + if (CAN_BUFFER(3 * (DEFLATE_NUM_PRECODE_SYMS - 1))) { + /* + * A 64-bit bitbuffer is just one bit too small to hold + * the maximum number of precode lens, so to minimize + * flushes we merge one len with the previous fields. + */ + precode_sym = deflate_precode_lens_permutation[0]; + ADD_BITS(c->o.precode.lens[precode_sym], 3); + FLUSH_BITS(); + i = 1; /* num_explicit_lens >= 4 */ + do { + precode_sym = + deflate_precode_lens_permutation[i]; + ADD_BITS(c->o.precode.lens[precode_sym], 3); + } while (++i < num_explicit_lens); + FLUSH_BITS(); + } else { + FLUSH_BITS(); + i = 0; + do { + precode_sym = + deflate_precode_lens_permutation[i]; + ADD_BITS(c->o.precode.lens[precode_sym], 3); + FLUSH_BITS(); + } while (++i < num_explicit_lens); + } + + /* + * Output the lengths of the codewords in the litlen and offset + * codes, encoded by the precode. + */ + i = 0; + do { + precode_item = c->o.precode.items[i]; + precode_sym = precode_item & 0x1F; + STATIC_ASSERT(CAN_BUFFER(MAX_PRE_CODEWORD_LEN + 7)); + ADD_BITS(c->o.precode.codewords[precode_sym], + c->o.precode.lens[precode_sym]); + ADD_BITS(precode_item >> 5, + deflate_extra_precode_bits[precode_sym]); + FLUSH_BITS(); + } while (++i < num_precode_items); + } else if (static_cost < uncompressed_cost) { + /* Static Huffman block */ + codes = &c->static_codes; + ADD_BITS(is_final_block, 1); + ADD_BITS(DEFLATE_BLOCKTYPE_STATIC_HUFFMAN, 2); + FLUSH_BITS(); + } else { + /* + * Uncompressed block(s). DEFLATE limits the length of + * uncompressed blocks to UINT16_MAX bytes, so if the length of + * the "block" we're flushing is over UINT16_MAX, we actually + * output multiple blocks. + */ + do { + u8 bfinal = 0; + size_t len = UINT16_MAX; + + if (in_end - in_next <= UINT16_MAX) { + bfinal = is_final_block; + len = in_end - in_next; + } + if (out_end - out_next < + (bitcount + 3 + 7) / 8 + 4 + len) { + /* Not enough output space remaining. */ + out_next = out_end; + goto out; + } + /* + * Output BFINAL (1 bit) and BTYPE (2 bits), then align + * to a byte boundary. + */ + STATIC_ASSERT(DEFLATE_BLOCKTYPE_UNCOMPRESSED == 0); + *out_next++ = (bfinal << bitcount) | bitbuf; + if (bitcount > 5) + *out_next++ = 0; + bitbuf = 0; + bitcount = 0; + /* Output LEN and NLEN, then the data itself. */ + put_unaligned_le16(len, out_next); + out_next += 2; + put_unaligned_le16(~len, out_next); + out_next += 2; + memcpy(out_next, in_next, len); + out_next += len; + in_next += len; + } while (in_next != in_end); + /* Done outputting uncompressed block(s) */ + goto out; + } + + /* Output the literals and matches for a dynamic or static block. */ + ASSERT(bitcount <= 7); + deflate_compute_full_len_codewords(c, codes); +#if SUPPORT_NEAR_OPTIMAL_PARSING + if (sequences == NULL) { + /* Output the literals and matches from the minimum-cost path */ + struct deflate_optimum_node *cur_node = + &c->p.n.optimum_nodes[0]; + struct deflate_optimum_node * const end_node = + &c->p.n.optimum_nodes[block_length]; + do { + unsigned length = cur_node->item & OPTIMUM_LEN_MASK; + unsigned offset = cur_node->item >> + OPTIMUM_OFFSET_SHIFT; + if (length == 1) { + /* Literal */ + ADD_BITS(codes->codewords.litlen[offset], + codes->lens.litlen[offset]); + FLUSH_BITS(); + } else { + /* Match */ + WRITE_MATCH(c, codes, length, offset, + c->p.n.offset_slot_full[offset]); + } + cur_node += length; + } while (cur_node != end_node); + } else +#endif /* SUPPORT_NEAR_OPTIMAL_PARSING */ + { + /* Output the literals and matches from the sequences list. */ + const struct deflate_sequence *seq; + + for (seq = sequences; ; seq++) { + u32 litrunlen = seq->litrunlen_and_length & + SEQ_LITRUNLEN_MASK; + unsigned length = seq->litrunlen_and_length >> + SEQ_LENGTH_SHIFT; + unsigned lit; + + /* Output a run of literals. */ + if (CAN_BUFFER(4 * MAX_LITLEN_CODEWORD_LEN)) { + for (; litrunlen >= 4; litrunlen -= 4) { + lit = *in_next++; + ADD_BITS(codes->codewords.litlen[lit], + codes->lens.litlen[lit]); + lit = *in_next++; + ADD_BITS(codes->codewords.litlen[lit], + codes->lens.litlen[lit]); + lit = *in_next++; + ADD_BITS(codes->codewords.litlen[lit], + codes->lens.litlen[lit]); + lit = *in_next++; + ADD_BITS(codes->codewords.litlen[lit], + codes->lens.litlen[lit]); + FLUSH_BITS(); + } + if (litrunlen-- != 0) { + lit = *in_next++; + ADD_BITS(codes->codewords.litlen[lit], + codes->lens.litlen[lit]); + if (litrunlen-- != 0) { + lit = *in_next++; + ADD_BITS(codes->codewords.litlen[lit], + codes->lens.litlen[lit]); + if (litrunlen-- != 0) { + lit = *in_next++; + ADD_BITS(codes->codewords.litlen[lit], + codes->lens.litlen[lit]); + } + } + FLUSH_BITS(); + } + } else { + while (litrunlen--) { + lit = *in_next++; + ADD_BITS(codes->codewords.litlen[lit], + codes->lens.litlen[lit]); + FLUSH_BITS(); + } + } + + if (length == 0) { /* Last sequence? */ + ASSERT(in_next == in_end); + break; + } + + /* Output a match. */ + WRITE_MATCH(c, codes, length, seq->offset, + seq->offset_slot); + in_next += length; + } + } + + /* Output the end-of-block symbol. */ + ASSERT(bitcount <= 7); + ADD_BITS(codes->codewords.litlen[DEFLATE_END_OF_BLOCK], + codes->lens.litlen[DEFLATE_END_OF_BLOCK]); + FLUSH_BITS(); +out: + ASSERT(bitcount <= 7); + /* + * Assert that the block cost was computed correctly, as + * libdeflate_deflate_compress_bound() relies on this via the assumption + * that uncompressed blocks will always be used when cheaper. + */ + ASSERT(8 * (out_next - os->next) + bitcount - os->bitcount == + 3 + best_cost || out_next == out_end); + + os->bitbuf = bitbuf; + os->bitcount = bitcount; + os->next = out_next; +} + +/******************************************************************************/ + +/* + * Block splitting algorithm. The problem is to decide when it is worthwhile to + * start a new block with new Huffman codes. There is a theoretically optimal + * solution: recursively consider every possible block split, considering the + * exact cost of each block, and choose the minimum cost approach. But this is + * far too slow. Instead, as an approximation, we can count symbols and after + * every N symbols, compare the expected distribution of symbols based on the + * previous data with the actual distribution. If they differ "by enough", then + * start a new block. + * + * As an optimization and heuristic, we don't distinguish between every symbol + * but rather we combine many symbols into a single "observation type". For + * literals we only look at the high bits and low bits, and for matches we only + * look at whether the match is long or not. The assumption is that for typical + * "real" data, places that are good block boundaries will tend to be noticeable + * based only on changes in these aggregate probabilities, without looking for + * subtle differences in individual symbols. For example, a change from ASCII + * bytes to non-ASCII bytes, or from few matches (generally less compressible) + * to many matches (generally more compressible), would be easily noticed based + * on the aggregates. + * + * For determining whether the probability distributions are "different enough" + * to start a new block, the simple heuristic of splitting when the sum of + * absolute differences exceeds a constant seems to be good enough. We also add + * a number proportional to the block length so that the algorithm is more + * likely to end long blocks than short blocks. This reflects the general + * expectation that it will become increasingly beneficial to start a new block + * as the current block grows longer. + * + * Finally, for an approximation, it is not strictly necessary that the exact + * symbols being used are considered. With "near-optimal parsing", for example, + * the actual symbols that will be used are unknown until after the block + * boundary is chosen and the block has been optimized. Since the final choices + * cannot be used, we can use preliminary "greedy" choices instead. + */ + +/* Initialize the block split statistics when starting a new block. */ +static void +init_block_split_stats(struct block_split_stats *stats) +{ + int i; + + for (i = 0; i < NUM_OBSERVATION_TYPES; i++) { + stats->new_observations[i] = 0; + stats->observations[i] = 0; + } + stats->num_new_observations = 0; + stats->num_observations = 0; +} + +/* + * Literal observation. Heuristic: use the top 2 bits and low 1 bits of the + * literal, for 8 possible literal observation types. + */ +static forceinline void +observe_literal(struct block_split_stats *stats, u8 lit) +{ + stats->new_observations[((lit >> 5) & 0x6) | (lit & 1)]++; + stats->num_new_observations++; +} + +/* + * Match observation. Heuristic: use one observation type for "short match" and + * one observation type for "long match". + */ +static forceinline void +observe_match(struct block_split_stats *stats, unsigned length) +{ + stats->new_observations[NUM_LITERAL_OBSERVATION_TYPES + + (length >= 9)]++; + stats->num_new_observations++; +} + +static void +merge_new_observations(struct block_split_stats *stats) +{ + int i; + + for (i = 0; i < NUM_OBSERVATION_TYPES; i++) { + stats->observations[i] += stats->new_observations[i]; + stats->new_observations[i] = 0; + } + stats->num_observations += stats->num_new_observations; + stats->num_new_observations = 0; +} + +static bool +do_end_block_check(struct block_split_stats *stats, u32 block_length) +{ + if (stats->num_observations > 0) { + /* + * Compute the sum of absolute differences of probabilities. To + * avoid needing to use floating point arithmetic or do slow + * divisions, we do all arithmetic with the probabilities + * multiplied by num_observations * num_new_observations. E.g., + * for the "old" observations the probabilities would be + * (double)observations[i] / num_observations, but since we + * multiply by both num_observations and num_new_observations we + * really do observations[i] * num_new_observations. + */ + u32 total_delta = 0; + u32 num_items; + u32 cutoff; + int i; + + for (i = 0; i < NUM_OBSERVATION_TYPES; i++) { + u32 expected = stats->observations[i] * + stats->num_new_observations; + u32 actual = stats->new_observations[i] * + stats->num_observations; + u32 delta = (actual > expected) ? actual - expected : + expected - actual; + + total_delta += delta; + } + + num_items = stats->num_observations + + stats->num_new_observations; + /* + * Heuristic: the cutoff is when the sum of absolute differences + * of probabilities becomes at least 200/512. As above, the + * probability is multiplied by both num_new_observations and + * num_observations. Be careful to avoid integer overflow. + */ + cutoff = stats->num_new_observations * 200 / 512 * + stats->num_observations; + /* + * Very short blocks have a lot of overhead for the Huffman + * codes, so only use them if it clearly seems worthwhile. + * (This is an additional penalty, which adds to the smaller + * penalty below which scales more slowly.) + */ + if (block_length < 10000 && num_items < 8192) + cutoff += (u64)cutoff * (8192 - num_items) / 8192; + + /* Ready to end the block? */ + if (total_delta + + (block_length / 4096) * stats->num_observations >= cutoff) + return true; + } + merge_new_observations(stats); + return false; +} + +static forceinline bool +ready_to_check_block(const struct block_split_stats *stats, + const u8 *in_block_begin, const u8 *in_next, + const u8 *in_end) +{ + return stats->num_new_observations >= NUM_OBSERVATIONS_PER_BLOCK_CHECK + && in_next - in_block_begin >= MIN_BLOCK_LENGTH + && in_end - in_next >= MIN_BLOCK_LENGTH; +} + +static forceinline bool +should_end_block(struct block_split_stats *stats, + const u8 *in_block_begin, const u8 *in_next, const u8 *in_end) +{ + /* Ready to try to end the block (again)? */ + if (!ready_to_check_block(stats, in_block_begin, in_next, in_end)) + return false; + + return do_end_block_check(stats, in_next - in_block_begin); +} + +/******************************************************************************/ + +static void +deflate_begin_sequences(struct libdeflate_compressor *c, + struct deflate_sequence *first_seq) +{ + deflate_reset_symbol_frequencies(c); + first_seq->litrunlen_and_length = 0; +} + +static forceinline void +deflate_choose_literal(struct libdeflate_compressor *c, unsigned literal, + bool gather_split_stats, struct deflate_sequence *seq) +{ + c->freqs.litlen[literal]++; + + if (gather_split_stats) + observe_literal(&c->split_stats, literal); + + STATIC_ASSERT(MAX_BLOCK_LENGTH <= SEQ_LITRUNLEN_MASK); + seq->litrunlen_and_length++; +} + +static forceinline void +deflate_choose_match(struct libdeflate_compressor *c, + unsigned length, unsigned offset, bool gather_split_stats, + struct deflate_sequence **seq_p) +{ + struct deflate_sequence *seq = *seq_p; + unsigned length_slot = deflate_length_slot[length]; + unsigned offset_slot = deflate_get_offset_slot(offset); + + c->freqs.litlen[DEFLATE_FIRST_LEN_SYM + length_slot]++; + c->freqs.offset[offset_slot]++; + if (gather_split_stats) + observe_match(&c->split_stats, length); + + seq->litrunlen_and_length |= (u32)length << SEQ_LENGTH_SHIFT; + seq->offset = offset; + seq->offset_slot = offset_slot; + + seq++; + seq->litrunlen_and_length = 0; + *seq_p = seq; +} + +/* + * Decrease the maximum and nice match lengths if we're approaching the end of + * the input buffer. + */ +static forceinline void +adjust_max_and_nice_len(unsigned *max_len, unsigned *nice_len, size_t remaining) +{ + if (unlikely(remaining < DEFLATE_MAX_MATCH_LEN)) { + *max_len = remaining; + *nice_len = MIN(*nice_len, *max_len); + } +} + +/* + * Choose the minimum match length for the greedy and lazy parsers. + * + * By default the minimum match length is 3, which is the smallest length the + * DEFLATE format allows. However, with greedy and lazy parsing, some data + * (e.g. DNA sequencing data) benefits greatly from a longer minimum length. + * Typically, this is because literals are very cheap. In general, the + * near-optimal parser handles this case naturally, but the greedy and lazy + * parsers need a heuristic to decide when to use short matches. + * + * The heuristic we use is to make the minimum match length depend on the number + * of different literals that exist in the data. If there are many different + * literals, then literals will probably be expensive, so short matches will + * probably be worthwhile. Conversely, if not many literals are used, then + * probably literals will be cheap and short matches won't be worthwhile. + */ +static unsigned +choose_min_match_len(unsigned num_used_literals, unsigned max_search_depth) +{ + /* map from num_used_literals to min_len */ + static const u8 min_lens[] = { + 9, 9, 9, 9, 9, 9, 8, 8, 7, 7, 6, 6, 6, 6, 6, 6, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + /* The rest is implicitly 3. */ + }; + unsigned min_len; + + STATIC_ASSERT(DEFLATE_MIN_MATCH_LEN <= 3); + STATIC_ASSERT(ARRAY_LEN(min_lens) <= DEFLATE_NUM_LITERALS + 1); + + if (num_used_literals >= ARRAY_LEN(min_lens)) + return 3; + min_len = min_lens[num_used_literals]; + /* + * With a low max_search_depth, it may be too hard to find long matches. + */ + if (max_search_depth < 16) { + if (max_search_depth < 5) + min_len = MIN(min_len, 4); + else if (max_search_depth < 10) + min_len = MIN(min_len, 5); + else + min_len = MIN(min_len, 7); + } + return min_len; +} + +static unsigned +calculate_min_match_len(const u8 *data, size_t data_len, + unsigned max_search_depth) +{ + u8 used[256] = { 0 }; + unsigned num_used_literals = 0; + size_t i; + + /* + * For an initial approximation, scan the first 4 KiB of data. The + * caller may use recalculate_min_match_len() to update min_len later. + */ + data_len = MIN(data_len, 4096); + for (i = 0; i < data_len; i++) + used[data[i]] = 1; + for (i = 0; i < 256; i++) + num_used_literals += used[i]; + return choose_min_match_len(num_used_literals, max_search_depth); +} + +/* + * Recalculate the minimum match length for a block, now that we know the + * distribution of literals that are actually being used (freqs->litlen). + */ +static unsigned +recalculate_min_match_len(const struct deflate_freqs *freqs, + unsigned max_search_depth) +{ + u32 literal_freq = 0; + u32 cutoff; + unsigned num_used_literals = 0; + int i; + + for (i = 0; i < DEFLATE_NUM_LITERALS; i++) + literal_freq += freqs->litlen[i]; + + cutoff = literal_freq >> 10; /* Ignore literals used very rarely. */ + + for (i = 0; i < DEFLATE_NUM_LITERALS; i++) { + if (freqs->litlen[i] > cutoff) + num_used_literals++; + } + return choose_min_match_len(num_used_literals, max_search_depth); +} + +static forceinline const u8 * +choose_max_block_end(const u8 *in_block_begin, const u8 *in_end, + size_t soft_max_len) +{ + if (in_end - in_block_begin < soft_max_len + MIN_BLOCK_LENGTH) + return in_end; + return in_block_begin + soft_max_len; +} + +/* + * This is the level 0 "compressor". It always outputs uncompressed blocks. + */ +static size_t +deflate_compress_none(const u8 *in, size_t in_nbytes, + u8 *out, size_t out_nbytes_avail) +{ + const u8 *in_next = in; + const u8 * const in_end = in + in_nbytes; + u8 *out_next = out; + u8 * const out_end = out + out_nbytes_avail; + + /* + * If the input is zero-length, we still must output a block in order + * for the output to be a valid DEFLATE stream. Handle this case + * specially to avoid potentially passing NULL to memcpy() below. + */ + if (unlikely(in_nbytes == 0)) { + if (out_nbytes_avail < 5) + return 0; + /* BFINAL and BTYPE */ + *out_next++ = 1 | (DEFLATE_BLOCKTYPE_UNCOMPRESSED << 1); + /* LEN and NLEN */ + put_unaligned_le32(0xFFFF0000, out_next); + return 5; + } + + do { + u8 bfinal = 0; + size_t len = UINT16_MAX; + + if (in_end - in_next <= UINT16_MAX) { + bfinal = 1; + len = in_end - in_next; + } + if (out_end - out_next < 5 + len) + return 0; + /* + * Output BFINAL and BTYPE. The stream is already byte-aligned + * here, so this step always requires outputting exactly 1 byte. + */ + *out_next++ = bfinal | (DEFLATE_BLOCKTYPE_UNCOMPRESSED << 1); + + /* Output LEN and NLEN, then the data itself. */ + put_unaligned_le16(len, out_next); + out_next += 2; + put_unaligned_le16(~len, out_next); + out_next += 2; + memcpy(out_next, in_next, len); + out_next += len; + in_next += len; + } while (in_next != in_end); + + return out_next - out; +} + +/* + * This is a faster variant of deflate_compress_greedy(). It uses the + * ht_matchfinder rather than the hc_matchfinder. It also skips the block + * splitting algorithm and just uses fixed length blocks. c->max_search_depth + * has no effect with this algorithm, as it is hardcoded in ht_matchfinder.h. + */ +static void +deflate_compress_fastest(struct libdeflate_compressor * restrict c, + const u8 *in, size_t in_nbytes, + struct deflate_output_bitstream *os) +{ + const u8 *in_next = in; + const u8 *in_end = in_next + in_nbytes; + const u8 *in_cur_base = in_next; + unsigned max_len = DEFLATE_MAX_MATCH_LEN; + unsigned nice_len = MIN(c->nice_match_length, max_len); + u32 next_hash = 0; + + ht_matchfinder_init(&c->p.f.ht_mf); + + do { + /* Starting a new DEFLATE block */ + + const u8 * const in_block_begin = in_next; + const u8 * const in_max_block_end = choose_max_block_end( + in_next, in_end, FAST_SOFT_MAX_BLOCK_LENGTH); + struct deflate_sequence *seq = c->p.f.sequences; + + deflate_begin_sequences(c, seq); + + do { + u32 length; + u32 offset; + size_t remaining = in_end - in_next; + + if (unlikely(remaining < DEFLATE_MAX_MATCH_LEN)) { + max_len = remaining; + if (max_len < HT_MATCHFINDER_REQUIRED_NBYTES) { + do { + deflate_choose_literal(c, + *in_next++, false, seq); + } while (--max_len); + break; + } + nice_len = MIN(nice_len, max_len); + } + length = ht_matchfinder_longest_match(&c->p.f.ht_mf, + &in_cur_base, + in_next, + max_len, + nice_len, + &next_hash, + &offset); + if (length) { + /* Match found */ + deflate_choose_match(c, length, offset, false, + &seq); + ht_matchfinder_skip_bytes(&c->p.f.ht_mf, + &in_cur_base, + in_next + 1, + in_end, + length - 1, + &next_hash); + in_next += length; + } else { + /* No match found */ + deflate_choose_literal(c, *in_next++, false, + seq); + } + + /* Check if it's time to output another block. */ + } while (in_next < in_max_block_end && + seq < &c->p.f.sequences[FAST_SEQ_STORE_LENGTH]); + + deflate_flush_block(c, os, in_block_begin, + in_next - in_block_begin, + c->p.f.sequences, in_next == in_end); + } while (in_next != in_end); +} + +/* + * This is the "greedy" DEFLATE compressor. It always chooses the longest match. + */ +static void +deflate_compress_greedy(struct libdeflate_compressor * restrict c, + const u8 *in, size_t in_nbytes, + struct deflate_output_bitstream *os) +{ + const u8 *in_next = in; + const u8 *in_end = in_next + in_nbytes; + const u8 *in_cur_base = in_next; + unsigned max_len = DEFLATE_MAX_MATCH_LEN; + unsigned nice_len = MIN(c->nice_match_length, max_len); + u32 next_hashes[2] = {0, 0}; + + hc_matchfinder_init(&c->p.g.hc_mf); + + do { + /* Starting a new DEFLATE block */ + + const u8 * const in_block_begin = in_next; + const u8 * const in_max_block_end = choose_max_block_end( + in_next, in_end, SOFT_MAX_BLOCK_LENGTH); + struct deflate_sequence *seq = c->p.g.sequences; + unsigned min_len; + + init_block_split_stats(&c->split_stats); + deflate_begin_sequences(c, seq); + min_len = calculate_min_match_len(in_next, + in_max_block_end - in_next, + c->max_search_depth); + do { + u32 length; + u32 offset; + + adjust_max_and_nice_len(&max_len, &nice_len, + in_end - in_next); + length = hc_matchfinder_longest_match( + &c->p.g.hc_mf, + &in_cur_base, + in_next, + min_len - 1, + max_len, + nice_len, + c->max_search_depth, + next_hashes, + &offset); + + if (length >= min_len && + (length > DEFLATE_MIN_MATCH_LEN || + offset <= 4096)) { + /* Match found */ + deflate_choose_match(c, length, offset, true, + &seq); + hc_matchfinder_skip_bytes(&c->p.g.hc_mf, + &in_cur_base, + in_next + 1, + in_end, + length - 1, + next_hashes); + in_next += length; + } else { + /* No match found */ + deflate_choose_literal(c, *in_next++, true, + seq); + } + + /* Check if it's time to output another block. */ + } while (in_next < in_max_block_end && + seq < &c->p.g.sequences[SEQ_STORE_LENGTH] && + !should_end_block(&c->split_stats, + in_block_begin, in_next, in_end)); + + deflate_flush_block(c, os, in_block_begin, + in_next - in_block_begin, + c->p.g.sequences, in_next == in_end); + } while (in_next != in_end); +} + +static forceinline void +deflate_compress_lazy_generic(struct libdeflate_compressor * restrict c, + const u8 *in, size_t in_nbytes, + struct deflate_output_bitstream *os, bool lazy2) +{ + const u8 *in_next = in; + const u8 *in_end = in_next + in_nbytes; + const u8 *in_cur_base = in_next; + unsigned max_len = DEFLATE_MAX_MATCH_LEN; + unsigned nice_len = MIN(c->nice_match_length, max_len); + u32 next_hashes[2] = {0, 0}; + + hc_matchfinder_init(&c->p.g.hc_mf); + + do { + /* Starting a new DEFLATE block */ + + const u8 * const in_block_begin = in_next; + const u8 * const in_max_block_end = choose_max_block_end( + in_next, in_end, SOFT_MAX_BLOCK_LENGTH); + const u8 *next_recalc_min_len = + in_next + MIN(in_end - in_next, 10000); + struct deflate_sequence *seq = c->p.g.sequences; + unsigned min_len; + + init_block_split_stats(&c->split_stats); + deflate_begin_sequences(c, seq); + min_len = calculate_min_match_len(in_next, + in_max_block_end - in_next, + c->max_search_depth); + do { + unsigned cur_len; + unsigned cur_offset; + unsigned next_len; + unsigned next_offset; + + /* + * Recalculate the minimum match length if it hasn't + * been done recently. + */ + if (in_next >= next_recalc_min_len) { + min_len = recalculate_min_match_len( + &c->freqs, + c->max_search_depth); + next_recalc_min_len += + MIN(in_end - next_recalc_min_len, + in_next - in_block_begin); + } + + /* Find the longest match at the current position. */ + adjust_max_and_nice_len(&max_len, &nice_len, + in_end - in_next); + cur_len = hc_matchfinder_longest_match( + &c->p.g.hc_mf, + &in_cur_base, + in_next, + min_len - 1, + max_len, + nice_len, + c->max_search_depth, + next_hashes, + &cur_offset); + if (cur_len < min_len || + (cur_len == DEFLATE_MIN_MATCH_LEN && + cur_offset > 8192)) { + /* No match found. Choose a literal. */ + deflate_choose_literal(c, *in_next++, true, + seq); + continue; + } + in_next++; + +have_cur_match: + /* + * We have a match at the current position. + * If it's very long, choose it immediately. + */ + if (cur_len >= nice_len) { + deflate_choose_match(c, cur_len, cur_offset, + true, &seq); + hc_matchfinder_skip_bytes(&c->p.g.hc_mf, + &in_cur_base, + in_next, + in_end, + cur_len - 1, + next_hashes); + in_next += cur_len - 1; + continue; + } + + /* + * Try to find a better match at the next position. + * + * Note: since we already have a match at the *current* + * position, we use only half the 'max_search_depth' + * when checking the *next* position. This is a useful + * trade-off because it's more worthwhile to use a + * greater search depth on the initial match. + * + * Note: it's possible to structure the code such that + * there's only one call to longest_match(), which + * handles both the "find the initial match" and "try to + * find a better match" cases. However, it is faster to + * have two call sites, with longest_match() inlined at + * each. + */ + adjust_max_and_nice_len(&max_len, &nice_len, + in_end - in_next); + next_len = hc_matchfinder_longest_match( + &c->p.g.hc_mf, + &in_cur_base, + in_next++, + cur_len - 1, + max_len, + nice_len, + c->max_search_depth >> 1, + next_hashes, + &next_offset); + if (next_len >= cur_len && + 4 * (int)(next_len - cur_len) + + ((int)bsr32(cur_offset) - + (int)bsr32(next_offset)) > 2) { + /* + * Found a better match at the next position. + * Output a literal. Then the next match + * becomes the current match. + */ + deflate_choose_literal(c, *(in_next - 2), true, + seq); + cur_len = next_len; + cur_offset = next_offset; + goto have_cur_match; + } + + if (lazy2) { + /* In lazy2 mode, look ahead another position */ + adjust_max_and_nice_len(&max_len, &nice_len, + in_end - in_next); + next_len = hc_matchfinder_longest_match( + &c->p.g.hc_mf, + &in_cur_base, + in_next++, + cur_len - 1, + max_len, + nice_len, + c->max_search_depth >> 2, + next_hashes, + &next_offset); + if (next_len >= cur_len && + 4 * (int)(next_len - cur_len) + + ((int)bsr32(cur_offset) - + (int)bsr32(next_offset)) > 6) { + /* + * There's a much better match two + * positions ahead, so use two literals. + */ + deflate_choose_literal( + c, *(in_next - 3), true, seq); + deflate_choose_literal( + c, *(in_next - 2), true, seq); + cur_len = next_len; + cur_offset = next_offset; + goto have_cur_match; + } + /* + * No better match at either of the next 2 + * positions. Output the current match. + */ + deflate_choose_match(c, cur_len, cur_offset, + true, &seq); + if (cur_len > 3) { + hc_matchfinder_skip_bytes(&c->p.g.hc_mf, + &in_cur_base, + in_next, + in_end, + cur_len - 3, + next_hashes); + in_next += cur_len - 3; + } + } else { /* !lazy2 */ + /* + * No better match at the next position. Output + * the current match. + */ + deflate_choose_match(c, cur_len, cur_offset, + true, &seq); + hc_matchfinder_skip_bytes(&c->p.g.hc_mf, + &in_cur_base, + in_next, + in_end, + cur_len - 2, + next_hashes); + in_next += cur_len - 2; + } + /* Check if it's time to output another block. */ + } while (in_next < in_max_block_end && + seq < &c->p.g.sequences[SEQ_STORE_LENGTH] && + !should_end_block(&c->split_stats, + in_block_begin, in_next, in_end)); + + deflate_flush_block(c, os, in_block_begin, + in_next - in_block_begin, + c->p.g.sequences, in_next == in_end); + } while (in_next != in_end); +} + +/* + * This is the "lazy" DEFLATE compressor. Before choosing a match, it checks to + * see if there's a better match at the next position. If yes, it outputs a + * literal and continues to the next position. If no, it outputs the match. + */ +static void +deflate_compress_lazy(struct libdeflate_compressor * restrict c, + const u8 *in, size_t in_nbytes, + struct deflate_output_bitstream *os) +{ + deflate_compress_lazy_generic(c, in, in_nbytes, os, false); +} + +/* + * The lazy2 compressor. This is similar to the regular lazy one, but it looks + * for a better match at the next 2 positions rather than the next 1. This + * makes it take slightly more time, but compress some inputs slightly more. + */ +static void +deflate_compress_lazy2(struct libdeflate_compressor * restrict c, + const u8 *in, size_t in_nbytes, + struct deflate_output_bitstream *os) +{ + deflate_compress_lazy_generic(c, in, in_nbytes, os, true); +} + +#if SUPPORT_NEAR_OPTIMAL_PARSING + +/* + * Follow the minimum-cost path in the graph of possible match/literal choices + * for the current block and compute the frequencies of the Huffman symbols that + * would be needed to output those matches and literals. + */ +static void +deflate_tally_item_list(struct libdeflate_compressor *c, u32 block_length) +{ + struct deflate_optimum_node *cur_node = &c->p.n.optimum_nodes[0]; + struct deflate_optimum_node *end_node = + &c->p.n.optimum_nodes[block_length]; + + do { + unsigned length = cur_node->item & OPTIMUM_LEN_MASK; + unsigned offset = cur_node->item >> OPTIMUM_OFFSET_SHIFT; + + if (length == 1) { + /* Literal */ + c->freqs.litlen[offset]++; + } else { + /* Match */ + c->freqs.litlen[DEFLATE_FIRST_LEN_SYM + + deflate_length_slot[length]]++; + c->freqs.offset[c->p.n.offset_slot_full[offset]]++; + } + cur_node += length; + } while (cur_node != end_node); + + /* Tally the end-of-block symbol. */ + c->freqs.litlen[DEFLATE_END_OF_BLOCK]++; +} + +/* Set the current cost model from the codeword lengths specified in @lens. */ +static void +deflate_set_costs_from_codes(struct libdeflate_compressor *c, + const struct deflate_lens *lens) +{ + unsigned i; + + /* Literals */ + for (i = 0; i < DEFLATE_NUM_LITERALS; i++) { + u32 bits = (lens->litlen[i] ? + lens->litlen[i] : LITERAL_NOSTAT_BITS); + + c->p.n.costs.literal[i] = bits * BIT_COST; + } + + /* Lengths */ + for (i = DEFLATE_MIN_MATCH_LEN; i <= DEFLATE_MAX_MATCH_LEN; i++) { + unsigned length_slot = deflate_length_slot[i]; + unsigned litlen_sym = DEFLATE_FIRST_LEN_SYM + length_slot; + u32 bits = (lens->litlen[litlen_sym] ? + lens->litlen[litlen_sym] : LENGTH_NOSTAT_BITS); + + bits += deflate_extra_length_bits[length_slot]; + c->p.n.costs.length[i] = bits * BIT_COST; + } + + /* Offset slots */ + for (i = 0; i < ARRAY_LEN(deflate_offset_slot_base); i++) { + u32 bits = (lens->offset[i] ? + lens->offset[i] : OFFSET_NOSTAT_BITS); + + bits += deflate_extra_offset_bits[i]; + c->p.n.costs.offset_slot[i] = bits * BIT_COST; + } +} + +/* + * This lookup table gives the default cost of a literal symbol and of a length + * symbol, depending on the characteristics of the input data. It was generated + * by scripts/gen_default_litlen_costs.py. + * + * This table is indexed first by the estimated match probability: + * + * i=0: data doesn't contain many matches [match_prob=0.25] + * i=1: neutral [match_prob=0.50] + * i=2: data contains lots of matches [match_prob=0.75] + * + * This lookup produces a subtable which maps the number of distinct used + * literals to the default cost of a literal symbol, i.e.: + * + * int(-log2((1 - match_prob) / num_used_literals) * BIT_COST) + * + * ... for num_used_literals in [1, 256] (and 0, which is copied from 1). This + * accounts for literals usually getting cheaper as the number of distinct + * literals decreases, and as the proportion of literals to matches increases. + * + * The lookup also produces the cost of a length symbol, which is: + * + * int(-log2(match_prob/NUM_LEN_SLOTS) * BIT_COST) + * + * Note: we don't currently assign different costs to different literal symbols, + * or to different length symbols, as this is hard to do in a useful way. + */ +static const struct { + u8 used_lits_to_lit_cost[257]; + u8 len_sym_cost; +} default_litlen_costs[] = { + { /* match_prob = 0.25 */ + .used_lits_to_lit_cost = { + 6, 6, 22, 32, 38, 43, 48, 51, + 54, 57, 59, 61, 64, 65, 67, 69, + 70, 72, 73, 74, 75, 76, 77, 79, + 80, 80, 81, 82, 83, 84, 85, 85, + 86, 87, 88, 88, 89, 89, 90, 91, + 91, 92, 92, 93, 93, 94, 95, 95, + 96, 96, 96, 97, 97, 98, 98, 99, + 99, 99, 100, 100, 101, 101, 101, 102, + 102, 102, 103, 103, 104, 104, 104, 105, + 105, 105, 105, 106, 106, 106, 107, 107, + 107, 108, 108, 108, 108, 109, 109, 109, + 109, 110, 110, 110, 111, 111, 111, 111, + 112, 112, 112, 112, 112, 113, 113, 113, + 113, 114, 114, 114, 114, 114, 115, 115, + 115, 115, 115, 116, 116, 116, 116, 116, + 117, 117, 117, 117, 117, 118, 118, 118, + 118, 118, 118, 119, 119, 119, 119, 119, + 120, 120, 120, 120, 120, 120, 121, 121, + 121, 121, 121, 121, 121, 122, 122, 122, + 122, 122, 122, 123, 123, 123, 123, 123, + 123, 123, 124, 124, 124, 124, 124, 124, + 124, 125, 125, 125, 125, 125, 125, 125, + 125, 126, 126, 126, 126, 126, 126, 126, + 127, 127, 127, 127, 127, 127, 127, 127, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 129, 129, 129, 129, 129, 129, 129, + 129, 129, 130, 130, 130, 130, 130, 130, + 130, 130, 130, 131, 131, 131, 131, 131, + 131, 131, 131, 131, 131, 132, 132, 132, + 132, 132, 132, 132, 132, 132, 132, 133, + 133, 133, 133, 133, 133, 133, 133, 133, + 133, 134, 134, 134, 134, 134, 134, 134, + 134, + }, + .len_sym_cost = 109, + }, { /* match_prob = 0.5 */ + .used_lits_to_lit_cost = { + 16, 16, 32, 41, 48, 53, 57, 60, + 64, 66, 69, 71, 73, 75, 76, 78, + 80, 81, 82, 83, 85, 86, 87, 88, + 89, 90, 91, 92, 92, 93, 94, 95, + 96, 96, 97, 98, 98, 99, 99, 100, + 101, 101, 102, 102, 103, 103, 104, 104, + 105, 105, 106, 106, 107, 107, 108, 108, + 108, 109, 109, 110, 110, 110, 111, 111, + 112, 112, 112, 113, 113, 113, 114, 114, + 114, 115, 115, 115, 115, 116, 116, 116, + 117, 117, 117, 118, 118, 118, 118, 119, + 119, 119, 119, 120, 120, 120, 120, 121, + 121, 121, 121, 122, 122, 122, 122, 122, + 123, 123, 123, 123, 124, 124, 124, 124, + 124, 125, 125, 125, 125, 125, 126, 126, + 126, 126, 126, 127, 127, 127, 127, 127, + 128, 128, 128, 128, 128, 128, 129, 129, + 129, 129, 129, 129, 130, 130, 130, 130, + 130, 130, 131, 131, 131, 131, 131, 131, + 131, 132, 132, 132, 132, 132, 132, 133, + 133, 133, 133, 133, 133, 133, 134, 134, + 134, 134, 134, 134, 134, 134, 135, 135, + 135, 135, 135, 135, 135, 135, 136, 136, + 136, 136, 136, 136, 136, 136, 137, 137, + 137, 137, 137, 137, 137, 137, 138, 138, + 138, 138, 138, 138, 138, 138, 138, 139, + 139, 139, 139, 139, 139, 139, 139, 139, + 140, 140, 140, 140, 140, 140, 140, 140, + 140, 141, 141, 141, 141, 141, 141, 141, + 141, 141, 141, 142, 142, 142, 142, 142, + 142, 142, 142, 142, 142, 142, 143, 143, + 143, 143, 143, 143, 143, 143, 143, 143, + 144, + }, + .len_sym_cost = 93, + }, { /* match_prob = 0.75 */ + .used_lits_to_lit_cost = { + 32, 32, 48, 57, 64, 69, 73, 76, + 80, 82, 85, 87, 89, 91, 92, 94, + 96, 97, 98, 99, 101, 102, 103, 104, + 105, 106, 107, 108, 108, 109, 110, 111, + 112, 112, 113, 114, 114, 115, 115, 116, + 117, 117, 118, 118, 119, 119, 120, 120, + 121, 121, 122, 122, 123, 123, 124, 124, + 124, 125, 125, 126, 126, 126, 127, 127, + 128, 128, 128, 129, 129, 129, 130, 130, + 130, 131, 131, 131, 131, 132, 132, 132, + 133, 133, 133, 134, 134, 134, 134, 135, + 135, 135, 135, 136, 136, 136, 136, 137, + 137, 137, 137, 138, 138, 138, 138, 138, + 139, 139, 139, 139, 140, 140, 140, 140, + 140, 141, 141, 141, 141, 141, 142, 142, + 142, 142, 142, 143, 143, 143, 143, 143, + 144, 144, 144, 144, 144, 144, 145, 145, + 145, 145, 145, 145, 146, 146, 146, 146, + 146, 146, 147, 147, 147, 147, 147, 147, + 147, 148, 148, 148, 148, 148, 148, 149, + 149, 149, 149, 149, 149, 149, 150, 150, + 150, 150, 150, 150, 150, 150, 151, 151, + 151, 151, 151, 151, 151, 151, 152, 152, + 152, 152, 152, 152, 152, 152, 153, 153, + 153, 153, 153, 153, 153, 153, 154, 154, + 154, 154, 154, 154, 154, 154, 154, 155, + 155, 155, 155, 155, 155, 155, 155, 155, + 156, 156, 156, 156, 156, 156, 156, 156, + 156, 157, 157, 157, 157, 157, 157, 157, + 157, 157, 157, 158, 158, 158, 158, 158, + 158, 158, 158, 158, 158, 158, 159, 159, + 159, 159, 159, 159, 159, 159, 159, 159, + 160, + }, + .len_sym_cost = 84, + }, +}; + +/* + * Choose the default costs for literal and length symbols. These symbols are + * both part of the litlen alphabet. + */ +static void +deflate_choose_default_litlen_costs(struct libdeflate_compressor *c, + const u8 *block_begin, u32 block_length, + u32 *lit_cost, u32 *len_sym_cost) +{ + unsigned num_used_literals = 0; + u32 literal_freq = block_length; + u32 match_freq = 0; + u32 cutoff; + u32 i; + + /* Calculate the number of distinct literals that exist in the data. */ + memset(c->freqs.litlen, 0, + DEFLATE_NUM_LITERALS * sizeof(c->freqs.litlen[0])); + cutoff = literal_freq >> 11; /* Ignore literals used very rarely. */ + for (i = 0; i < block_length; i++) + c->freqs.litlen[block_begin[i]]++; + for (i = 0; i < DEFLATE_NUM_LITERALS; i++) { + if (c->freqs.litlen[i] > cutoff) + num_used_literals++; + } + if (num_used_literals == 0) + num_used_literals = 1; + + /* + * Estimate the relative frequency of literals and matches in the + * optimal parsing solution. We don't know the optimal solution, so + * this can only be a very rough estimate. Therefore, we basically use + * the match frequency from a greedy parse. We also apply the min_len + * heuristic used by the greedy and lazy parsers, to avoid counting too + * many matches when literals are cheaper than short matches. + */ + match_freq = 0; + i = choose_min_match_len(num_used_literals, c->max_search_depth); + for (; i < ARRAY_LEN(c->p.n.match_len_freqs); i++) { + match_freq += c->p.n.match_len_freqs[i]; + literal_freq -= i * c->p.n.match_len_freqs[i]; + } + if ((s32)literal_freq < 0) /* shouldn't happen */ + literal_freq = 0; + + if (match_freq > literal_freq) + i = 2; /* many matches */ + else if (match_freq * 4 > literal_freq) + i = 1; /* neutral */ + else + i = 0; /* few matches */ + + STATIC_ASSERT(BIT_COST == 16); + *lit_cost = default_litlen_costs[i].used_lits_to_lit_cost[ + num_used_literals]; + *len_sym_cost = default_litlen_costs[i].len_sym_cost; +} + +static forceinline u32 +deflate_default_length_cost(unsigned len, u32 len_sym_cost) +{ + unsigned slot = deflate_length_slot[len]; + u32 num_extra_bits = deflate_extra_length_bits[slot]; + + return len_sym_cost + (num_extra_bits * BIT_COST); +} + +static forceinline u32 +deflate_default_offset_slot_cost(unsigned slot) +{ + u32 num_extra_bits = deflate_extra_offset_bits[slot]; + /* + * Assume that all offset symbols are equally probable. + * The resulting cost is 'int(-log2(1/30) * BIT_COST)', + * where 30 is the number of potentially-used offset symbols. + */ + u32 offset_sym_cost = 4*BIT_COST + (907*BIT_COST)/1000; + + return offset_sym_cost + (num_extra_bits * BIT_COST); +} + +/* Set default symbol costs for the first block's first optimization pass. */ +static void +deflate_set_default_costs(struct libdeflate_compressor *c, + u32 lit_cost, u32 len_sym_cost) +{ + unsigned i; + + /* Literals */ + for (i = 0; i < DEFLATE_NUM_LITERALS; i++) + c->p.n.costs.literal[i] = lit_cost; + + /* Lengths */ + for (i = DEFLATE_MIN_MATCH_LEN; i <= DEFLATE_MAX_MATCH_LEN; i++) + c->p.n.costs.length[i] = + deflate_default_length_cost(i, len_sym_cost); + + /* Offset slots */ + for (i = 0; i < ARRAY_LEN(deflate_offset_slot_base); i++) + c->p.n.costs.offset_slot[i] = + deflate_default_offset_slot_cost(i); +} + +static forceinline void +deflate_adjust_cost(u32 *cost_p, u32 default_cost, int change_amount) +{ + if (change_amount == 0) + /* Block is very similar to previous; prefer previous costs. */ + *cost_p = (default_cost + 3 * *cost_p) / 4; + else if (change_amount == 1) + *cost_p = (default_cost + *cost_p) / 2; + else if (change_amount == 2) + *cost_p = (5 * default_cost + 3 * *cost_p) / 8; + else + /* Block differs greatly from previous; prefer default costs. */ + *cost_p = (3 * default_cost + *cost_p) / 4; +} + +static forceinline void +deflate_adjust_costs_impl(struct libdeflate_compressor *c, + u32 lit_cost, u32 len_sym_cost, int change_amount) +{ + unsigned i; + + /* Literals */ + for (i = 0; i < DEFLATE_NUM_LITERALS; i++) + deflate_adjust_cost(&c->p.n.costs.literal[i], lit_cost, + change_amount); + + /* Lengths */ + for (i = DEFLATE_MIN_MATCH_LEN; i <= DEFLATE_MAX_MATCH_LEN; i++) + deflate_adjust_cost(&c->p.n.costs.length[i], + deflate_default_length_cost(i, + len_sym_cost), + change_amount); + + /* Offset slots */ + for (i = 0; i < ARRAY_LEN(deflate_offset_slot_base); i++) + deflate_adjust_cost(&c->p.n.costs.offset_slot[i], + deflate_default_offset_slot_cost(i), + change_amount); +} + +/* + * Adjust the costs when beginning a new block. + * + * Since the current costs have been optimized for the data, it's undesirable to + * throw them away and start over with the default costs. At the same time, we + * don't want to bias the parse by assuming that the next block will be similar + * to the current block. As a compromise, make the costs closer to the + * defaults, but don't simply set them to the defaults. + */ +static void +deflate_adjust_costs(struct libdeflate_compressor *c, + u32 lit_cost, u32 len_sym_cost) +{ + u64 total_delta = 0; + u64 cutoff; + int i; + + /* + * Decide how different the current block is from the previous block, + * using the block splitting statistics from the current and previous + * blocks. The more different the current block is, the more we prefer + * the default costs rather than the previous block's costs. + * + * The algorithm here is similar to the end-of-block check one, but here + * we compare two entire blocks rather than a partial block with a small + * extra part, and therefore we need 64-bit numbers in some places. + */ + for (i = 0; i < NUM_OBSERVATION_TYPES; i++) { + u64 prev = (u64)c->p.n.prev_observations[i] * + c->split_stats.num_observations; + u64 cur = (u64)c->split_stats.observations[i] * + c->p.n.prev_num_observations; + + total_delta += prev > cur ? prev - cur : cur - prev; + } + cutoff = ((u64)c->p.n.prev_num_observations * + c->split_stats.num_observations * 200) / 512; + + if (4 * total_delta > 9 * cutoff) + deflate_adjust_costs_impl(c, lit_cost, len_sym_cost, 3); + else if (2 * total_delta > 3 * cutoff) + deflate_adjust_costs_impl(c, lit_cost, len_sym_cost, 2); + else if (2 * total_delta > cutoff) + deflate_adjust_costs_impl(c, lit_cost, len_sym_cost, 1); + else + deflate_adjust_costs_impl(c, lit_cost, len_sym_cost, 0); +} + +/* + * Find the minimum-cost path through the graph of possible match/literal + * choices for this block. + * + * We find the minimum cost path from 'c->p.n.optimum_nodes[0]', which + * represents the node at the beginning of the block, to + * 'c->p.n.optimum_nodes[block_length]', which represents the node at the end of + * the block. Edge costs are evaluated using the cost model 'c->p.n.costs'. + * + * The algorithm works backwards, starting at the end node and proceeding + * backwards one node at a time. At each node, the minimum cost to reach the + * end node is computed and the match/literal choice that begins that path is + * saved. + */ +static void +deflate_find_min_cost_path(struct libdeflate_compressor *c, + const u32 block_length, + const struct lz_match *cache_ptr) +{ + struct deflate_optimum_node *end_node = + &c->p.n.optimum_nodes[block_length]; + struct deflate_optimum_node *cur_node = end_node; + + cur_node->cost_to_end = 0; + do { + unsigned num_matches; + unsigned literal; + u32 best_cost_to_end; + + cur_node--; + cache_ptr--; + + num_matches = cache_ptr->length; + literal = cache_ptr->offset; + + /* It's always possible to choose a literal. */ + best_cost_to_end = c->p.n.costs.literal[literal] + + (cur_node + 1)->cost_to_end; + cur_node->item = ((u32)literal << OPTIMUM_OFFSET_SHIFT) | 1; + + /* Also consider matches if there are any. */ + if (num_matches) { + const struct lz_match *match; + unsigned len; + unsigned offset; + unsigned offset_slot; + u32 offset_cost; + u32 cost_to_end; + + /* + * Consider each length from the minimum + * (DEFLATE_MIN_MATCH_LEN) to the length of the longest + * match found at this position. For each length, we + * consider only the smallest offset for which that + * length is available. Although this is not guaranteed + * to be optimal due to the possibility of a larger + * offset costing less than a smaller offset to code, + * this is a very useful heuristic. + */ + match = cache_ptr - num_matches; + len = DEFLATE_MIN_MATCH_LEN; + do { + offset = match->offset; + offset_slot = c->p.n.offset_slot_full[offset]; + offset_cost = + c->p.n.costs.offset_slot[offset_slot]; + do { + cost_to_end = offset_cost + + c->p.n.costs.length[len] + + (cur_node + len)->cost_to_end; + if (cost_to_end < best_cost_to_end) { + best_cost_to_end = cost_to_end; + cur_node->item = len | + ((u32)offset << + OPTIMUM_OFFSET_SHIFT); + } + } while (++len <= match->length); + } while (++match != cache_ptr); + cache_ptr -= num_matches; + } + cur_node->cost_to_end = best_cost_to_end; + } while (cur_node != &c->p.n.optimum_nodes[0]); +} + +/* + * Choose the literal/match sequence to use for the current block. The basic + * algorithm finds a minimum-cost path through the block's graph of + * literal/match choices, given a cost model. However, the cost of each symbol + * is unknown until the Huffman codes have been built, but at the same time the + * Huffman codes depend on the frequencies of chosen symbols. Consequently, + * multiple passes must be used to try to approximate an optimal solution. The + * first pass uses default costs, mixed with the costs from the previous block + * if any. Later passes use the Huffman codeword lengths from the previous pass + * as the costs. + */ +static void +deflate_optimize_block(struct libdeflate_compressor *c, + const u8 *block_begin, u32 block_length, + const struct lz_match *cache_ptr, bool is_first_block, + bool is_final_block) +{ + unsigned num_passes_remaining = c->p.n.num_optim_passes; + u32 lit_cost, len_sym_cost; + u32 i; + + /* + * Force the block to really end at the desired length, even if some + * matches extend beyond it. + */ + for (i = block_length; + i <= MIN(block_length - 1 + DEFLATE_MAX_MATCH_LEN, + ARRAY_LEN(c->p.n.optimum_nodes) - 1); i++) + c->p.n.optimum_nodes[i].cost_to_end = 0x80000000; + + /* Set the initial costs. */ + deflate_choose_default_litlen_costs(c, block_begin, block_length, + &lit_cost, &len_sym_cost); + if (is_first_block) + deflate_set_default_costs(c, lit_cost, len_sym_cost); + else + deflate_adjust_costs(c, lit_cost, len_sym_cost); + + do { + /* Find the minimum cost path for this pass. */ + deflate_find_min_cost_path(c, block_length, cache_ptr); + + /* Compute frequencies of the chosen symbols. */ + deflate_reset_symbol_frequencies(c); + deflate_tally_item_list(c, block_length); + + /* Make the Huffman codes. */ + deflate_make_huffman_codes(&c->freqs, &c->codes); + + /* + * Update the costs. After the last optimization pass, the + * final costs won't be needed for this block, but they will be + * used in determining the initial costs for the next block. + */ + if (--num_passes_remaining || !is_final_block) + deflate_set_costs_from_codes(c, &c->codes.lens); + } while (num_passes_remaining); +} + +static void +deflate_near_optimal_init_stats(struct libdeflate_compressor *c) +{ + init_block_split_stats(&c->split_stats); + memset(c->p.n.new_match_len_freqs, 0, + sizeof(c->p.n.new_match_len_freqs)); + memset(c->p.n.match_len_freqs, 0, sizeof(c->p.n.match_len_freqs)); +} + +static void +deflate_near_optimal_merge_stats(struct libdeflate_compressor *c) +{ + unsigned i; + + merge_new_observations(&c->split_stats); + for (i = 0; i < ARRAY_LEN(c->p.n.match_len_freqs); i++) { + c->p.n.match_len_freqs[i] += c->p.n.new_match_len_freqs[i]; + c->p.n.new_match_len_freqs[i] = 0; + } +} + +/* + * Save some literal/match statistics from the previous block so that + * deflate_adjust_costs() will be able to decide how much the current block + * differs from the previous one. + */ +static void +deflate_near_optimal_save_stats(struct libdeflate_compressor *c) +{ + int i; + + for (i = 0; i < NUM_OBSERVATION_TYPES; i++) + c->p.n.prev_observations[i] = c->split_stats.observations[i]; + c->p.n.prev_num_observations = c->split_stats.num_observations; +} + +static void +deflate_near_optimal_clear_old_stats(struct libdeflate_compressor *c) +{ + int i; + + for (i = 0; i < NUM_OBSERVATION_TYPES; i++) + c->split_stats.observations[i] = 0; + c->split_stats.num_observations = 0; + memset(c->p.n.match_len_freqs, 0, sizeof(c->p.n.match_len_freqs)); +} + +/* + * This is the "near-optimal" DEFLATE compressor. It computes the optimal + * representation of each DEFLATE block using a minimum-cost path search over + * the graph of possible match/literal choices for that block, assuming a + * certain cost for each Huffman symbol. + * + * For several reasons, the end result is not guaranteed to be optimal: + * + * - Nonoptimal choice of blocks + * - Heuristic limitations on which matches are actually considered + * - Symbol costs are unknown until the symbols have already been chosen + * (so iterative optimization must be used) + */ +static void +deflate_compress_near_optimal(struct libdeflate_compressor * restrict c, + const u8 *in, size_t in_nbytes, + struct deflate_output_bitstream *os) +{ + const u8 *in_next = in; + const u8 *in_block_begin = in_next; + const u8 *in_end = in_next + in_nbytes; + const u8 *in_cur_base = in_next; + const u8 *in_next_slide = + in_next + MIN(in_end - in_next, MATCHFINDER_WINDOW_SIZE); + unsigned max_len = DEFLATE_MAX_MATCH_LEN; + unsigned nice_len = MIN(c->nice_match_length, max_len); + struct lz_match *cache_ptr = c->p.n.match_cache; + u32 next_hashes[2] = {0, 0}; + + bt_matchfinder_init(&c->p.n.bt_mf); + deflate_near_optimal_init_stats(c); + + do { + /* Starting a new DEFLATE block */ + const u8 * const in_max_block_end = choose_max_block_end( + in_block_begin, in_end, SOFT_MAX_BLOCK_LENGTH); + const u8 *prev_end_block_check = NULL; + bool change_detected = false; + const u8 *next_observation = in_next; + unsigned min_len; + + /* + * Use the minimum match length heuristic to improve the + * literal/match statistics gathered during matchfinding. + * However, the actual near-optimal parse won't respect min_len, + * as it can accurately assess the costs of different matches. + */ + min_len = calculate_min_match_len( + in_block_begin, + in_max_block_end - in_block_begin, + c->max_search_depth); + + /* + * Find matches until we decide to end the block. We end the + * block if any of the following is true: + * + * (1) Maximum block length has been reached + * (2) Match catch may overflow. + * (3) Block split heuristic says to split now. + */ + for (;;) { + struct lz_match *matches; + unsigned best_len; + size_t remaining = in_end - in_next; + + /* Slide the window forward if needed. */ + if (in_next == in_next_slide) { + bt_matchfinder_slide_window(&c->p.n.bt_mf); + in_cur_base = in_next; + in_next_slide = in_next + + MIN(remaining, MATCHFINDER_WINDOW_SIZE); + } + + /* + * Find matches with the current position using the + * binary tree matchfinder and save them in match_cache. + * + * Note: the binary tree matchfinder is more suited for + * optimal parsing than the hash chain matchfinder. The + * reasons for this include: + * + * - The binary tree matchfinder can find more matches + * in the same number of steps. + * - One of the major advantages of hash chains is that + * skipping positions (not searching for matches at + * them) is faster; however, with optimal parsing we + * search for matches at almost all positions, so this + * advantage of hash chains is negated. + */ + matches = cache_ptr; + best_len = 0; + adjust_max_and_nice_len(&max_len, &nice_len, remaining); + if (likely(max_len >= BT_MATCHFINDER_REQUIRED_NBYTES)) { + cache_ptr = bt_matchfinder_get_matches( + &c->p.n.bt_mf, + in_cur_base, + in_next - in_cur_base, + max_len, + nice_len, + c->max_search_depth, + next_hashes, + matches); + if (cache_ptr > matches) + best_len = cache_ptr[-1].length; + } + if (in_next >= next_observation) { + if (best_len >= min_len) { + observe_match(&c->split_stats, + best_len); + next_observation = in_next + best_len; + c->p.n.new_match_len_freqs[best_len]++; + } else { + observe_literal(&c->split_stats, + *in_next); + next_observation = in_next + 1; + } + } + + cache_ptr->length = cache_ptr - matches; + cache_ptr->offset = *in_next; + in_next++; + cache_ptr++; + + /* + * If there was a very long match found, don't cache any + * matches for the bytes covered by that match. This + * avoids degenerate behavior when compressing highly + * redundant data, where the number of matches can be + * very large. + * + * This heuristic doesn't actually hurt the compression + * ratio very much. If there's a long match, then the + * data must be highly compressible, so it doesn't + * matter much what we do. + */ + if (best_len >= DEFLATE_MIN_MATCH_LEN && + best_len >= nice_len) { + --best_len; + do { + remaining = in_end - in_next; + if (in_next == in_next_slide) { + bt_matchfinder_slide_window( + &c->p.n.bt_mf); + in_cur_base = in_next; + in_next_slide = in_next + + MIN(remaining, + MATCHFINDER_WINDOW_SIZE); + } + adjust_max_and_nice_len(&max_len, + &nice_len, + remaining); + if (max_len >= + BT_MATCHFINDER_REQUIRED_NBYTES) { + bt_matchfinder_skip_byte( + &c->p.n.bt_mf, + in_cur_base, + in_next - in_cur_base, + nice_len, + c->max_search_depth, + next_hashes); + } + cache_ptr->length = 0; + cache_ptr->offset = *in_next; + in_next++; + cache_ptr++; + } while (--best_len); + } + /* Maximum block length or end of input reached? */ + if (in_next >= in_max_block_end) + break; + /* Match cache overflowed? */ + if (cache_ptr >= + &c->p.n.match_cache[MATCH_CACHE_LENGTH]) + break; + /* Not ready to try to end the block (again)? */ + if (!ready_to_check_block(&c->split_stats, + in_block_begin, in_next, + in_end)) + continue; + /* Check if it would be worthwhile to end the block. */ + if (do_end_block_check(&c->split_stats, + in_next - in_block_begin)) { + change_detected = true; + break; + } + /* Ending the block doesn't seem worthwhile here. */ + deflate_near_optimal_merge_stats(c); + prev_end_block_check = in_next; + } + /* + * All the matches for this block have been cached. Now choose + * the precise end of the block and the sequence of items to + * output to represent it, then flush the block. + */ + if (change_detected && prev_end_block_check != NULL) { + /* + * The block is being ended because a recent chunk of + * data differs from the rest of the block. We could + * end the block at 'in_next' like the greedy and lazy + * compressors do, but that's not ideal since it would + * include the differing chunk in the block. The + * near-optimal compressor has time to do a better job. + * Therefore, we rewind to just before the chunk, and + * output a block that only goes up to there. + * + * We then set things up to correctly start the next + * block, considering that some work has already been + * done on it (some matches found and stats gathered). + */ + struct lz_match *orig_cache_ptr = cache_ptr; + const u8 *in_block_end = prev_end_block_check; + u32 block_length = in_block_end - in_block_begin; + bool is_first = (in_block_begin == in); + bool is_final = false; + u32 num_bytes_to_rewind = in_next - in_block_end; + size_t cache_len_rewound; + + /* Rewind the match cache. */ + do { + cache_ptr--; + cache_ptr -= cache_ptr->length; + } while (--num_bytes_to_rewind); + cache_len_rewound = orig_cache_ptr - cache_ptr; + + deflate_optimize_block(c, in_block_begin, block_length, + cache_ptr, is_first, is_final); + deflate_flush_block(c, os, in_block_begin, block_length, + NULL, is_final); + memmove(c->p.n.match_cache, cache_ptr, + cache_len_rewound * sizeof(*cache_ptr)); + cache_ptr = &c->p.n.match_cache[cache_len_rewound]; + deflate_near_optimal_save_stats(c); + /* + * Clear the stats for the just-flushed block, leaving + * just the stats for the beginning of the next block. + */ + deflate_near_optimal_clear_old_stats(c); + in_block_begin = in_block_end; + } else { + /* + * The block is being ended for a reason other than a + * differing data chunk being detected. Don't rewind at + * all; just end the block at the current position. + */ + u32 block_length = in_next - in_block_begin; + bool is_first = (in_block_begin == in); + bool is_final = (in_next == in_end); + + deflate_near_optimal_merge_stats(c); + deflate_optimize_block(c, in_block_begin, block_length, + cache_ptr, is_first, is_final); + deflate_flush_block(c, os, in_block_begin, block_length, + NULL, is_final); + cache_ptr = &c->p.n.match_cache[0]; + deflate_near_optimal_save_stats(c); + deflate_near_optimal_init_stats(c); + in_block_begin = in_next; + } + } while (in_next != in_end); +} + +/* Initialize c->p.n.offset_slot_full. */ +static void +deflate_init_offset_slot_full(struct libdeflate_compressor *c) +{ + unsigned offset_slot; + unsigned offset; + unsigned offset_end; + + for (offset_slot = 0; offset_slot < ARRAY_LEN(deflate_offset_slot_base); + offset_slot++) { + offset = deflate_offset_slot_base[offset_slot]; + offset_end = offset + + (1 << deflate_extra_offset_bits[offset_slot]); + do { + c->p.n.offset_slot_full[offset] = offset_slot; + } while (++offset != offset_end); + } +} + +#endif /* SUPPORT_NEAR_OPTIMAL_PARSING */ + +LIBDEFLATEAPI struct libdeflate_compressor * +libdeflate_alloc_compressor(int compression_level) +{ + struct libdeflate_compressor *c; + size_t size = offsetof(struct libdeflate_compressor, p); + + check_buildtime_parameters(); + + if (compression_level < 0 || compression_level > 12) + return NULL; + +#if SUPPORT_NEAR_OPTIMAL_PARSING + if (compression_level >= 10) + size += sizeof(c->p.n); + else +#endif + { + if (compression_level >= 2) + size += sizeof(c->p.g); + else if (compression_level == 1) + size += sizeof(c->p.f); + } + + c = libdeflate_aligned_malloc(MATCHFINDER_MEM_ALIGNMENT, size); + if (!c) + return NULL; + + c->compression_level = compression_level; + + /* + * The higher the compression level, the more we should bother trying to + * compress very small inputs. + */ + c->max_passthrough_size = 55 - (compression_level * 4); + + switch (compression_level) { + case 0: + c->max_passthrough_size = SIZE_MAX; + c->impl = NULL; /* not used */ + break; + case 1: + c->impl = deflate_compress_fastest; + /* max_search_depth is unused. */ + c->nice_match_length = 32; + break; + case 2: + c->impl = deflate_compress_greedy; + c->max_search_depth = 6; + c->nice_match_length = 10; + break; + case 3: + c->impl = deflate_compress_greedy; + c->max_search_depth = 12; + c->nice_match_length = 14; + break; + case 4: + c->impl = deflate_compress_greedy; + c->max_search_depth = 16; + c->nice_match_length = 30; + break; + case 5: + c->impl = deflate_compress_lazy; + c->max_search_depth = 16; + c->nice_match_length = 30; + break; + case 6: + c->impl = deflate_compress_lazy; + c->max_search_depth = 35; + c->nice_match_length = 65; + break; + case 7: + c->impl = deflate_compress_lazy; + c->max_search_depth = 100; + c->nice_match_length = 130; + break; + case 8: + c->impl = deflate_compress_lazy2; + c->max_search_depth = 300; + c->nice_match_length = DEFLATE_MAX_MATCH_LEN; + break; + case 9: +#if !SUPPORT_NEAR_OPTIMAL_PARSING + default: +#endif + c->impl = deflate_compress_lazy2; + c->max_search_depth = 600; + c->nice_match_length = DEFLATE_MAX_MATCH_LEN; + break; +#if SUPPORT_NEAR_OPTIMAL_PARSING + case 10: + c->impl = deflate_compress_near_optimal; + c->max_search_depth = 35; + c->nice_match_length = 75; + c->p.n.num_optim_passes = 2; + deflate_init_offset_slot_full(c); + break; + case 11: + c->impl = deflate_compress_near_optimal; + c->max_search_depth = 70; + c->nice_match_length = 150; + c->p.n.num_optim_passes = 3; + deflate_init_offset_slot_full(c); + break; + case 12: + default: + c->impl = deflate_compress_near_optimal; + c->max_search_depth = 150; + c->nice_match_length = DEFLATE_MAX_MATCH_LEN; + c->p.n.num_optim_passes = 4; + deflate_init_offset_slot_full(c); + break; +#endif /* SUPPORT_NEAR_OPTIMAL_PARSING */ + } + + deflate_init_static_codes(c); + + return c; +} + +LIBDEFLATEAPI size_t +libdeflate_deflate_compress(struct libdeflate_compressor *c, + const void *in, size_t in_nbytes, + void *out, size_t out_nbytes_avail) +{ + struct deflate_output_bitstream os; + + /* + * For extremely short inputs, or for compression level 0, just output + * uncompressed blocks. + */ + if (unlikely(in_nbytes <= c->max_passthrough_size)) + return deflate_compress_none(in, in_nbytes, + out, out_nbytes_avail); + + /* + * Initialize the output bitstream structure. + * + * The end is set to OUTPUT_END_PADDING below the true end, so that + * FLUSH_BITS() can be more efficient. + */ + if (unlikely(out_nbytes_avail <= OUTPUT_END_PADDING)) + return 0; + os.bitbuf = 0; + os.bitcount = 0; + os.next = out; + os.end = os.next + out_nbytes_avail - OUTPUT_END_PADDING; + (*c->impl)(c, in, in_nbytes, &os); + /* + * If 'os.next' reached 'os.end', then either there was not enough space + * in the output buffer, or the compressed size would have been within + * OUTPUT_END_PADDING of the true end. For performance reasons we don't + * distinguish between these cases; we just make sure to return some + * extra space from libdeflate_deflate_compress_bound(). + */ + if (os.next >= os.end) + return 0; + ASSERT(os.bitcount <= 7); + if (os.bitcount) + *os.next++ = os.bitbuf; + return os.next - (u8 *)out; +} + +LIBDEFLATEAPI void +libdeflate_free_compressor(struct libdeflate_compressor *c) +{ + libdeflate_aligned_free(c); +} + +unsigned int +libdeflate_get_compression_level(struct libdeflate_compressor *c) +{ + return c->compression_level; +} + +LIBDEFLATEAPI size_t +libdeflate_deflate_compress_bound(struct libdeflate_compressor *c, + size_t in_nbytes) +{ + size_t bound = 0; + size_t max_blocks; + + /* + * Since the compressor never uses a compressed block when an + * uncompressed block is cheaper, the worst case can be no worse than + * the case where only uncompressed blocks are used. + * + * This is true even though up to 7 bits are "wasted" to byte-align the + * bitstream when a compressed block is followed by an uncompressed + * block. This is because a compressed block wouldn't have been used if + * it wasn't cheaper than an uncompressed block, and uncompressed blocks + * always end on a byte boundary. So the alignment bits will, at worst, + * go up to the place where the uncompressed block would have ended. + */ + + /* + * The minimum length that is passed to deflate_flush_block() is + * MIN_BLOCK_LENGTH bytes, except for the final block if needed. + * + * If deflate_flush_block() decides to use an uncompressed block, it + * actually will (in general) output a series of uncompressed blocks in + * order to stay within the UINT16_MAX limit of DEFLATE. But this can + * be disregarded here as long as '2 * MIN_BLOCK_LENGTH <= UINT16_MAX', + * as in that case this behavior can't result in more blocks than the + * case where deflate_flush_block() is called with min-length inputs. + * + * So the number of uncompressed blocks needed would be bounded by + * DIV_ROUND_UP(in_nbytes, MIN_BLOCK_LENGTH). However, empty inputs + * need 1 (empty) block, which gives the final expression below. + */ + STATIC_ASSERT(2 * MIN_BLOCK_LENGTH <= UINT16_MAX); + max_blocks = MAX(DIV_ROUND_UP(in_nbytes, MIN_BLOCK_LENGTH), 1); + + /* + * Each uncompressed block has 5 bytes of overhead, for the BFINAL, + * BTYPE, LEN, and NLEN fields. (For the reason explained earlier, the + * alignment bits at the very start of the block can be disregarded; + * they would otherwise increase the overhead to 6 bytes per block.) + */ + bound += 5 * max_blocks; + + /* Account for the data itself, stored uncompressed. */ + bound += in_nbytes; + + /* + * Add 1 + OUTPUT_END_PADDING because for performance reasons, the + * compressor doesn't distinguish between cases where there wasn't + * enough space and cases where the compressed size would have been + * 'out_nbytes_avail - OUTPUT_END_PADDING' or greater. Adding + * 1 + OUTPUT_END_PADDING to the bound ensures the needed wiggle room. + */ + bound += 1 + OUTPUT_END_PADDING; + + return bound; +} diff --git a/tools/z64compress/src/enc/libdeflate/lib/deflate_compress.h b/tools/z64compress/src/enc/libdeflate/lib/deflate_compress.h new file mode 100644 index 000000000..9451d548b --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/lib/deflate_compress.h @@ -0,0 +1,15 @@ +#ifndef LIB_DEFLATE_COMPRESS_H +#define LIB_DEFLATE_COMPRESS_H + +#include "lib_common.h" + +/* + * DEFLATE compression is private to deflate_compress.c, but we do need to be + * able to query the compression level for zlib and gzip header generation. + */ + +struct libdeflate_compressor; + +unsigned int libdeflate_get_compression_level(struct libdeflate_compressor *c); + +#endif /* LIB_DEFLATE_COMPRESS_H */ diff --git a/tools/z64compress/src/enc/libdeflate/lib/deflate_constants.h b/tools/z64compress/src/enc/libdeflate/lib/deflate_constants.h new file mode 100644 index 000000000..95c9e0a50 --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/lib/deflate_constants.h @@ -0,0 +1,56 @@ +/* + * deflate_constants.h - constants for the DEFLATE compression format + */ + +#ifndef LIB_DEFLATE_CONSTANTS_H +#define LIB_DEFLATE_CONSTANTS_H + +/* Valid block types */ +#define DEFLATE_BLOCKTYPE_UNCOMPRESSED 0 +#define DEFLATE_BLOCKTYPE_STATIC_HUFFMAN 1 +#define DEFLATE_BLOCKTYPE_DYNAMIC_HUFFMAN 2 + +/* Minimum and maximum supported match lengths (in bytes) */ +#define DEFLATE_MIN_MATCH_LEN 3 +#define DEFLATE_MAX_MATCH_LEN 258 + +/* Maximum supported match offset (in bytes) */ +#define DEFLATE_MAX_MATCH_OFFSET 32768 + +/* log2 of DEFLATE_MAX_MATCH_OFFSET */ +#define DEFLATE_WINDOW_ORDER 15 + +/* Number of symbols in each Huffman code. Note: for the literal/length + * and offset codes, these are actually the maximum values; a given block + * might use fewer symbols. */ +#define DEFLATE_NUM_PRECODE_SYMS 19 +#define DEFLATE_NUM_LITLEN_SYMS 288 +#define DEFLATE_NUM_OFFSET_SYMS 32 + +/* The maximum number of symbols across all codes */ +#define DEFLATE_MAX_NUM_SYMS 288 + +/* Division of symbols in the literal/length code */ +#define DEFLATE_NUM_LITERALS 256 +#define DEFLATE_END_OF_BLOCK 256 +#define DEFLATE_FIRST_LEN_SYM 257 + +/* Maximum codeword length, in bits, within each Huffman code */ +#define DEFLATE_MAX_PRE_CODEWORD_LEN 7 +#define DEFLATE_MAX_LITLEN_CODEWORD_LEN 15 +#define DEFLATE_MAX_OFFSET_CODEWORD_LEN 15 + +/* The maximum codeword length across all codes */ +#define DEFLATE_MAX_CODEWORD_LEN 15 + +/* Maximum possible overrun when decoding codeword lengths */ +#define DEFLATE_MAX_LENS_OVERRUN 137 + +/* + * Maximum number of extra bits that may be required to represent a match + * length or offset. + */ +#define DEFLATE_MAX_EXTRA_LENGTH_BITS 5 +#define DEFLATE_MAX_EXTRA_OFFSET_BITS 13 + +#endif /* LIB_DEFLATE_CONSTANTS_H */ diff --git a/tools/z64compress/src/enc/libdeflate/lib/deflate_decompress.c b/tools/z64compress/src/enc/libdeflate/lib/deflate_decompress.c new file mode 100644 index 000000000..7d22fc443 --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/lib/deflate_decompress.c @@ -0,0 +1,1176 @@ +/* + * deflate_decompress.c - a decompressor for DEFLATE + * + * Copyright 2016 Eric Biggers + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * --------------------------------------------------------------------------- + * + * This is a highly optimized DEFLATE decompressor. It is much faster than + * vanilla zlib, typically well over twice as fast, though results vary by CPU. + * + * Why this is faster than vanilla zlib: + * + * - Word accesses rather than byte accesses when reading input + * - Word accesses rather than byte accesses when copying matches + * - Faster Huffman decoding combined with various DEFLATE-specific tricks + * - Larger bitbuffer variable that doesn't need to be refilled as often + * - Other optimizations to remove unnecessary branches + * - Only full-buffer decompression is supported, so the code doesn't need to + * support stopping and resuming decompression. + * - On x86_64, a version of the decompression routine is compiled with BMI2 + * instructions enabled and is used automatically at runtime when supported. + */ + +#include + +#include "lib_common.h" +#include "deflate_constants.h" + +#include "libdeflate.h" + +/* + * If the expression passed to SAFETY_CHECK() evaluates to false, then the + * decompression routine immediately returns LIBDEFLATE_BAD_DATA, indicating the + * compressed data is invalid. + * + * Theoretically, these checks could be disabled for specialized applications + * where all input to the decompressor will be trusted. + */ +#if 0 +# pragma message("UNSAFE DECOMPRESSION IS ENABLED. THIS MUST ONLY BE USED IF THE DECOMPRESSOR INPUT WILL ALWAYS BE TRUSTED!") +# define SAFETY_CHECK(expr) (void)(expr) +#else +# define SAFETY_CHECK(expr) if (unlikely(!(expr))) return LIBDEFLATE_BAD_DATA +#endif + +/***************************************************************************** + * Input bitstream * + *****************************************************************************/ + +/* + * The state of the "input bitstream" consists of the following variables: + * + * - in_next: a pointer to the next unread byte in the input buffer + * + * - in_end: a pointer to just past the end of the input buffer + * + * - bitbuf: a word-sized variable containing bits that have been read from + * the input buffer or from the implicit appended zero bytes + * + * - bitsleft: the number of bits in 'bitbuf' available to be consumed. + * After REFILL_BITS_BRANCHLESS(), 'bitbuf' can actually + * contain more bits than this. However, only the bits counted + * by 'bitsleft' can actually be consumed; the rest can only be + * used for preloading. + * + * As a micro-optimization, we allow bits 8 and higher of + * 'bitsleft' to contain garbage. When consuming the bits + * associated with a decode table entry, this allows us to do + * 'bitsleft -= entry' instead of 'bitsleft -= (u8)entry'. + * On some CPUs, this helps reduce instruction dependencies. + * This does have the disadvantage that 'bitsleft' sometimes + * needs to be cast to 'u8', such as when it's used as a shift + * amount in REFILL_BITS_BRANCHLESS(). But that one happens + * for free since most CPUs ignore high bits in shift amounts. + * + * - overread_count: the total number of implicit appended zero bytes that + * have been loaded into the bitbuffer, including any + * counted by 'bitsleft' and any already consumed + */ + +/* + * The type for the bitbuffer variable ('bitbuf' described above). For best + * performance, this should have size equal to a machine word. + * + * 64-bit platforms have a significant advantage: they get a bigger bitbuffer + * which they don't have to refill as often. + */ +typedef machine_word_t bitbuf_t; +#define BITBUF_NBITS (8 * (int)sizeof(bitbuf_t)) + +/* BITMASK(n) returns a bitmask of length 'n'. */ +#define BITMASK(n) (((bitbuf_t)1 << (n)) - 1) + +/* + * MAX_BITSLEFT is the maximum number of consumable bits, i.e. the maximum value + * of '(u8)bitsleft'. This is the size of the bitbuffer variable, minus 1 if + * the branchless refill method is being used (see REFILL_BITS_BRANCHLESS()). + */ +#define MAX_BITSLEFT \ + (UNALIGNED_ACCESS_IS_FAST ? BITBUF_NBITS - 1 : BITBUF_NBITS) + +/* + * CONSUMABLE_NBITS is the minimum number of bits that are guaranteed to be + * consumable (counted in 'bitsleft') immediately after refilling the bitbuffer. + * Since only whole bytes can be added to 'bitsleft', the worst case is + * 'MAX_BITSLEFT - 7': the smallest amount where another byte doesn't fit. + */ +#define CONSUMABLE_NBITS (MAX_BITSLEFT - 7) + +/* + * FASTLOOP_PRELOADABLE_NBITS is the minimum number of bits that are guaranteed + * to be preloadable immediately after REFILL_BITS_IN_FASTLOOP(). (It is *not* + * guaranteed after REFILL_BITS(), since REFILL_BITS() falls back to a + * byte-at-a-time refill method near the end of input.) This may exceed the + * number of consumable bits (counted by 'bitsleft'). Any bits not counted in + * 'bitsleft' can only be used for precomputation and cannot be consumed. + */ +#define FASTLOOP_PRELOADABLE_NBITS \ + (UNALIGNED_ACCESS_IS_FAST ? BITBUF_NBITS : CONSUMABLE_NBITS) + +/* + * PRELOAD_SLACK is the minimum number of bits that are guaranteed to be + * preloadable but not consumable, following REFILL_BITS_IN_FASTLOOP() and any + * subsequent consumptions. This is 1 bit if the branchless refill method is + * being used, and 0 bits otherwise. + */ +#define PRELOAD_SLACK MAX(0, FASTLOOP_PRELOADABLE_NBITS - MAX_BITSLEFT) + +/* + * CAN_CONSUME(n) is true if it's guaranteed that if the bitbuffer has just been + * refilled, then it's always possible to consume 'n' bits from it. 'n' should + * be a compile-time constant, to enable compile-time evaluation. + */ +#define CAN_CONSUME(n) (CONSUMABLE_NBITS >= (n)) + +/* + * CAN_CONSUME_AND_THEN_PRELOAD(consume_nbits, preload_nbits) is true if it's + * guaranteed that after REFILL_BITS_IN_FASTLOOP(), it's always possible to + * consume 'consume_nbits' bits, then preload 'preload_nbits' bits. The + * arguments should be compile-time constants to enable compile-time evaluation. + */ +#define CAN_CONSUME_AND_THEN_PRELOAD(consume_nbits, preload_nbits) \ + (CONSUMABLE_NBITS >= (consume_nbits) && \ + FASTLOOP_PRELOADABLE_NBITS >= (consume_nbits) + (preload_nbits)) + +/* + * REFILL_BITS_BRANCHLESS() branchlessly refills the bitbuffer variable by + * reading the next word from the input buffer and updating 'in_next' and + * 'bitsleft' based on how many bits were refilled -- counting whole bytes only. + * This is much faster than reading a byte at a time, at least if the CPU is + * little endian and supports fast unaligned memory accesses. + * + * The simplest way of branchlessly updating 'bitsleft' would be: + * + * bitsleft += (MAX_BITSLEFT - bitsleft) & ~7; + * + * To make it faster, we define MAX_BITSLEFT to be 'WORDBITS - 1' rather than + * WORDBITS, so that in binary it looks like 111111 or 11111. Then, we update + * 'bitsleft' by just setting the bits above the low 3 bits: + * + * bitsleft |= MAX_BITSLEFT & ~7; + * + * That compiles down to a single instruction like 'or $0x38, %rbp'. Using + * 'MAX_BITSLEFT == WORDBITS - 1' also has the advantage that refills can be + * done when 'bitsleft == MAX_BITSLEFT' without invoking undefined behavior. + * + * The simplest way of branchlessly updating 'in_next' would be: + * + * in_next += (MAX_BITSLEFT - bitsleft) >> 3; + * + * With 'MAX_BITSLEFT == WORDBITS - 1' we could use an XOR instead, though this + * isn't really better: + * + * in_next += (MAX_BITSLEFT ^ bitsleft) >> 3; + * + * An alternative which can be marginally better is the following: + * + * in_next += sizeof(bitbuf_t) - 1; + * in_next -= (bitsleft >> 3) & 0x7; + * + * It seems this would increase the number of CPU instructions from 3 (sub, shr, + * add) to 4 (add, shr, and, sub). However, if the CPU has a bitfield + * extraction instruction (e.g. arm's ubfx), it stays at 3, and is potentially + * more efficient because the length of the longest dependency chain decreases + * from 3 to 2. This alternative also has the advantage that it ignores the + * high bits in 'bitsleft', so it is compatible with the micro-optimization we + * use where we let the high bits of 'bitsleft' contain garbage. + */ +#define REFILL_BITS_BRANCHLESS() \ +do { \ + bitbuf |= get_unaligned_leword(in_next) << (u8)bitsleft; \ + in_next += sizeof(bitbuf_t) - 1; \ + in_next -= (bitsleft >> 3) & 0x7; \ + bitsleft |= MAX_BITSLEFT & ~7; \ +} while (0) + +/* + * REFILL_BITS() loads bits from the input buffer until the bitbuffer variable + * contains at least CONSUMABLE_NBITS consumable bits. + * + * This checks for the end of input, and it doesn't guarantee + * FASTLOOP_PRELOADABLE_NBITS, so it can't be used in the fastloop. + * + * If we would overread the input buffer, we just don't read anything, leaving + * the bits zeroed but marking them filled. This simplifies the decompressor + * because it removes the need to always be able to distinguish between real + * overreads and overreads caused only by the decompressor's own lookahead. + * + * We do still keep track of the number of bytes that have been overread, for + * two reasons. First, it allows us to determine the exact number of bytes that + * were consumed once the stream ends or an uncompressed block is reached. + * Second, it allows us to stop early if the overread amount gets so large (more + * than sizeof bitbuf) that it can only be caused by a real overread. (The + * second part is arguably unneeded, since libdeflate is buffer-based; given + * infinite zeroes, it will eventually either completely fill the output buffer + * or return an error. However, we do it to be slightly more friendly to the + * not-recommended use case of decompressing with an unknown output size.) + */ +#define REFILL_BITS() \ +do { \ + if (UNALIGNED_ACCESS_IS_FAST && \ + likely(in_end - in_next >= sizeof(bitbuf_t))) { \ + REFILL_BITS_BRANCHLESS(); \ + } else { \ + while ((u8)bitsleft < CONSUMABLE_NBITS) { \ + if (likely(in_next != in_end)) { \ + bitbuf |= (bitbuf_t)*in_next++ << \ + (u8)bitsleft; \ + } else { \ + overread_count++; \ + SAFETY_CHECK(overread_count <= \ + sizeof(bitbuf_t)); \ + } \ + bitsleft += 8; \ + } \ + } \ +} while (0) + +/* + * REFILL_BITS_IN_FASTLOOP() is like REFILL_BITS(), but it doesn't check for the + * end of the input. It can only be used in the fastloop. + */ +#define REFILL_BITS_IN_FASTLOOP() \ +do { \ + STATIC_ASSERT(UNALIGNED_ACCESS_IS_FAST || \ + FASTLOOP_PRELOADABLE_NBITS == CONSUMABLE_NBITS); \ + if (UNALIGNED_ACCESS_IS_FAST) { \ + REFILL_BITS_BRANCHLESS(); \ + } else { \ + while ((u8)bitsleft < CONSUMABLE_NBITS) { \ + bitbuf |= (bitbuf_t)*in_next++ << (u8)bitsleft; \ + bitsleft += 8; \ + } \ + } \ +} while (0) + +/* + * This is the worst-case maximum number of output bytes that are written to + * during each iteration of the fastloop. The worst case is 2 literals, then a + * match of length DEFLATE_MAX_MATCH_LEN. Additionally, some slack space must + * be included for the intentional overrun in the match copy implementation. + */ +#define FASTLOOP_MAX_BYTES_WRITTEN \ + (2 + DEFLATE_MAX_MATCH_LEN + (5 * WORDBYTES) - 1) + +/* + * This is the worst-case maximum number of input bytes that are read during + * each iteration of the fastloop. To get this value, we first compute the + * greatest number of bits that can be refilled during a loop iteration. The + * refill at the beginning can add at most MAX_BITSLEFT, and the amount that can + * be refilled later is no more than the maximum amount that can be consumed by + * 2 literals that don't need a subtable, then a match. We convert this value + * to bytes, rounding up; this gives the maximum number of bytes that 'in_next' + * can be advanced. Finally, we add sizeof(bitbuf_t) to account for + * REFILL_BITS_BRANCHLESS() reading a word past 'in_next'. + */ +#define FASTLOOP_MAX_BYTES_READ \ + (DIV_ROUND_UP(MAX_BITSLEFT + (2 * LITLEN_TABLEBITS) + \ + LENGTH_MAXBITS + OFFSET_MAXBITS, 8) + \ + sizeof(bitbuf_t)) + +/***************************************************************************** + * Huffman decoding * + *****************************************************************************/ + +/* + * The fastest way to decode Huffman-encoded data is basically to use a decode + * table that maps the next TABLEBITS bits of data to their symbol. Each entry + * decode_table[i] maps to the symbol whose codeword is a prefix of 'i'. A + * symbol with codeword length 'n' has '2**(TABLEBITS-n)' entries in the table. + * + * Ideally, TABLEBITS and the maximum codeword length would be the same; some + * compression formats are designed with this goal in mind. Unfortunately, in + * DEFLATE, the maximum litlen and offset codeword lengths are 15 bits, which is + * too large for a practical TABLEBITS. It's not *that* much larger, though, so + * the workaround is to use a single level of subtables. In the main table, + * entries for prefixes of codewords longer than TABLEBITS contain a "pointer" + * to the appropriate subtable along with the number of bits it is indexed with. + * + * The most efficient way to allocate subtables is to allocate them dynamically + * after the main table. The worst-case number of table entries needed, + * including subtables, is precomputable; see the ENOUGH constants below. + * + * A useful optimization is to store the codeword lengths in the decode table so + * that they don't have to be looked up by indexing a separate table that maps + * symbols to their codeword lengths. We basically do this; however, for the + * litlen and offset codes we also implement some DEFLATE-specific optimizations + * that build in the consideration of the "extra bits" and the + * literal/length/end-of-block division. For the exact decode table entry + * format we use, see the definitions of the *_decode_results[] arrays below. + */ + + +/* + * These are the TABLEBITS values we use for each of the DEFLATE Huffman codes, + * along with their corresponding ENOUGH values. + * + * For the precode, we use PRECODE_TABLEBITS == 7 since this is the maximum + * precode codeword length. This avoids ever needing subtables. + * + * For the litlen and offset codes, we cannot realistically avoid ever needing + * subtables, since litlen and offset codewords can be up to 15 bits. A higher + * TABLEBITS reduces the number of lookups that need a subtable, which increases + * performance; however, it increases memory usage and makes building the table + * take longer, which decreases performance. We choose values that work well in + * practice, making subtables rarely needed without making the tables too large. + * + * Our choice of OFFSET_TABLEBITS == 8 is a bit low; without any special + * considerations, 9 would fit the trade-off curve better. However, there is a + * performance benefit to using exactly 8 bits when it is a compile-time + * constant, as many CPUs can take the low byte more easily than the low 9 bits. + * + * zlib treats its equivalents of TABLEBITS as maximum values; whenever it + * builds a table, it caps the actual table_bits to the longest codeword. This + * makes sense in theory, as there's no need for the table to be any larger than + * needed to support the longest codeword. However, having the table bits be a + * compile-time constant is beneficial to the performance of the decode loop, so + * there is a trade-off. libdeflate currently uses the dynamic table_bits + * strategy for the litlen table only, due to its larger maximum size. + * PRECODE_TABLEBITS and OFFSET_TABLEBITS are smaller, so going dynamic there + * isn't as useful, and OFFSET_TABLEBITS=8 is useful as mentioned above. + * + * Each TABLEBITS value has a corresponding ENOUGH value that gives the + * worst-case maximum number of decode table entries, including the main table + * and all subtables. The ENOUGH value depends on three parameters: + * + * (1) the maximum number of symbols in the code (DEFLATE_NUM_*_SYMS) + * (2) the maximum number of main table bits (*_TABLEBITS) + * (3) the maximum allowed codeword length (DEFLATE_MAX_*_CODEWORD_LEN) + * + * The ENOUGH values were computed using the utility program 'enough' from zlib. + */ +#define PRECODE_TABLEBITS 7 +#define PRECODE_ENOUGH 128 /* enough 19 7 7 */ +#define LITLEN_TABLEBITS 11 +#define LITLEN_ENOUGH 2342 /* enough 288 11 15 */ +#define OFFSET_TABLEBITS 8 +#define OFFSET_ENOUGH 402 /* enough 32 8 15 */ + +/* + * make_decode_table_entry() creates a decode table entry for the given symbol + * by combining the static part 'decode_results[sym]' with the dynamic part + * 'len', which is the remaining codeword length (the codeword length for main + * table entries, or the codeword length minus TABLEBITS for subtable entries). + * + * In all cases, we add 'len' to each of the two low-order bytes to create the + * appropriately-formatted decode table entry. See the definitions of the + * *_decode_results[] arrays below, where the entry format is described. + */ +static forceinline u32 +make_decode_table_entry(const u32 decode_results[], u32 sym, u32 len) +{ + return decode_results[sym] + (len << 8) + len; +} + +/* + * Here is the format of our precode decode table entries. Bits not explicitly + * described contain zeroes: + * + * Bit 20-16: presym + * Bit 10-8: codeword length [not used] + * Bit 2-0: codeword length + * + * The precode decode table never has subtables, since we use + * PRECODE_TABLEBITS == DEFLATE_MAX_PRE_CODEWORD_LEN. + * + * precode_decode_results[] contains the static part of the entry for each + * symbol. make_decode_table_entry() produces the final entries. + */ +static const u32 precode_decode_results[] = { +#define ENTRY(presym) ((u32)presym << 16) + ENTRY(0) , ENTRY(1) , ENTRY(2) , ENTRY(3) , + ENTRY(4) , ENTRY(5) , ENTRY(6) , ENTRY(7) , + ENTRY(8) , ENTRY(9) , ENTRY(10) , ENTRY(11) , + ENTRY(12) , ENTRY(13) , ENTRY(14) , ENTRY(15) , + ENTRY(16) , ENTRY(17) , ENTRY(18) , +#undef ENTRY +}; + +/* Litlen and offset decode table entry flags */ + +/* Indicates a literal entry in the litlen decode table */ +#define HUFFDEC_LITERAL 0x80000000 + +/* Indicates that HUFFDEC_SUBTABLE_POINTER or HUFFDEC_END_OF_BLOCK is set */ +#define HUFFDEC_EXCEPTIONAL 0x00008000 + +/* Indicates a subtable pointer entry in the litlen or offset decode table */ +#define HUFFDEC_SUBTABLE_POINTER 0x00004000 + +/* Indicates an end-of-block entry in the litlen decode table */ +#define HUFFDEC_END_OF_BLOCK 0x00002000 + +/* Maximum number of bits that can be consumed by decoding a match length */ +#define LENGTH_MAXBITS (DEFLATE_MAX_LITLEN_CODEWORD_LEN + \ + DEFLATE_MAX_EXTRA_LENGTH_BITS) +#define LENGTH_MAXFASTBITS (LITLEN_TABLEBITS /* no subtable needed */ + \ + DEFLATE_MAX_EXTRA_LENGTH_BITS) + +/* + * Here is the format of our litlen decode table entries. Bits not explicitly + * described contain zeroes: + * + * Literals: + * Bit 31: 1 (HUFFDEC_LITERAL) + * Bit 23-16: literal value + * Bit 15: 0 (!HUFFDEC_EXCEPTIONAL) + * Bit 14: 0 (!HUFFDEC_SUBTABLE_POINTER) + * Bit 13: 0 (!HUFFDEC_END_OF_BLOCK) + * Bit 11-8: remaining codeword length [not used] + * Bit 3-0: remaining codeword length + * Lengths: + * Bit 31: 0 (!HUFFDEC_LITERAL) + * Bit 24-16: length base value + * Bit 15: 0 (!HUFFDEC_EXCEPTIONAL) + * Bit 14: 0 (!HUFFDEC_SUBTABLE_POINTER) + * Bit 13: 0 (!HUFFDEC_END_OF_BLOCK) + * Bit 11-8: remaining codeword length + * Bit 4-0: remaining codeword length + number of extra bits + * End of block: + * Bit 31: 0 (!HUFFDEC_LITERAL) + * Bit 15: 1 (HUFFDEC_EXCEPTIONAL) + * Bit 14: 0 (!HUFFDEC_SUBTABLE_POINTER) + * Bit 13: 1 (HUFFDEC_END_OF_BLOCK) + * Bit 11-8: remaining codeword length [not used] + * Bit 3-0: remaining codeword length + * Subtable pointer: + * Bit 31: 0 (!HUFFDEC_LITERAL) + * Bit 30-16: index of start of subtable + * Bit 15: 1 (HUFFDEC_EXCEPTIONAL) + * Bit 14: 1 (HUFFDEC_SUBTABLE_POINTER) + * Bit 13: 0 (!HUFFDEC_END_OF_BLOCK) + * Bit 11-8: number of subtable bits + * Bit 3-0: number of main table bits + * + * This format has several desirable properties: + * + * - The codeword length, length slot base, and number of extra length bits + * are all built in. This eliminates the need to separately look up this + * information by indexing separate arrays by symbol or length slot. + * + * - The HUFFDEC_* flags enable easily distinguishing between the different + * types of entries. The HUFFDEC_LITERAL flag enables a fast path for + * literals; the high bit is used for this, as some CPUs can test the + * high bit more easily than other bits. The HUFFDEC_EXCEPTIONAL flag + * makes it possible to detect the two unlikely cases (subtable pointer + * and end of block) in a single bit flag test. + * + * - The low byte is the number of bits that need to be removed from the + * bitstream; this makes this value easily accessible, and it enables the + * micro-optimization of doing 'bitsleft -= entry' instead of + * 'bitsleft -= (u8)entry'. It also includes the number of extra bits, + * so they don't need to be removed separately. + * + * - The flags in bits 15-13 are arranged to be 0 when the + * "remaining codeword length" in bits 11-8 is needed, making this value + * fairly easily accessible as well via a shift and downcast. + * + * - Similarly, bits 13-12 are 0 when the "subtable bits" in bits 11-8 are + * needed, making it possible to extract this value with '& 0x3F' rather + * than '& 0xF'. This value is only used as a shift amount, so this can + * save an 'and' instruction as the masking by 0x3F happens implicitly. + * + * litlen_decode_results[] contains the static part of the entry for each + * symbol. make_decode_table_entry() produces the final entries. + */ +static const u32 litlen_decode_results[] = { + + /* Literals */ +#define ENTRY(literal) (HUFFDEC_LITERAL | ((u32)literal << 16)) + ENTRY(0) , ENTRY(1) , ENTRY(2) , ENTRY(3) , + ENTRY(4) , ENTRY(5) , ENTRY(6) , ENTRY(7) , + ENTRY(8) , ENTRY(9) , ENTRY(10) , ENTRY(11) , + ENTRY(12) , ENTRY(13) , ENTRY(14) , ENTRY(15) , + ENTRY(16) , ENTRY(17) , ENTRY(18) , ENTRY(19) , + ENTRY(20) , ENTRY(21) , ENTRY(22) , ENTRY(23) , + ENTRY(24) , ENTRY(25) , ENTRY(26) , ENTRY(27) , + ENTRY(28) , ENTRY(29) , ENTRY(30) , ENTRY(31) , + ENTRY(32) , ENTRY(33) , ENTRY(34) , ENTRY(35) , + ENTRY(36) , ENTRY(37) , ENTRY(38) , ENTRY(39) , + ENTRY(40) , ENTRY(41) , ENTRY(42) , ENTRY(43) , + ENTRY(44) , ENTRY(45) , ENTRY(46) , ENTRY(47) , + ENTRY(48) , ENTRY(49) , ENTRY(50) , ENTRY(51) , + ENTRY(52) , ENTRY(53) , ENTRY(54) , ENTRY(55) , + ENTRY(56) , ENTRY(57) , ENTRY(58) , ENTRY(59) , + ENTRY(60) , ENTRY(61) , ENTRY(62) , ENTRY(63) , + ENTRY(64) , ENTRY(65) , ENTRY(66) , ENTRY(67) , + ENTRY(68) , ENTRY(69) , ENTRY(70) , ENTRY(71) , + ENTRY(72) , ENTRY(73) , ENTRY(74) , ENTRY(75) , + ENTRY(76) , ENTRY(77) , ENTRY(78) , ENTRY(79) , + ENTRY(80) , ENTRY(81) , ENTRY(82) , ENTRY(83) , + ENTRY(84) , ENTRY(85) , ENTRY(86) , ENTRY(87) , + ENTRY(88) , ENTRY(89) , ENTRY(90) , ENTRY(91) , + ENTRY(92) , ENTRY(93) , ENTRY(94) , ENTRY(95) , + ENTRY(96) , ENTRY(97) , ENTRY(98) , ENTRY(99) , + ENTRY(100) , ENTRY(101) , ENTRY(102) , ENTRY(103) , + ENTRY(104) , ENTRY(105) , ENTRY(106) , ENTRY(107) , + ENTRY(108) , ENTRY(109) , ENTRY(110) , ENTRY(111) , + ENTRY(112) , ENTRY(113) , ENTRY(114) , ENTRY(115) , + ENTRY(116) , ENTRY(117) , ENTRY(118) , ENTRY(119) , + ENTRY(120) , ENTRY(121) , ENTRY(122) , ENTRY(123) , + ENTRY(124) , ENTRY(125) , ENTRY(126) , ENTRY(127) , + ENTRY(128) , ENTRY(129) , ENTRY(130) , ENTRY(131) , + ENTRY(132) , ENTRY(133) , ENTRY(134) , ENTRY(135) , + ENTRY(136) , ENTRY(137) , ENTRY(138) , ENTRY(139) , + ENTRY(140) , ENTRY(141) , ENTRY(142) , ENTRY(143) , + ENTRY(144) , ENTRY(145) , ENTRY(146) , ENTRY(147) , + ENTRY(148) , ENTRY(149) , ENTRY(150) , ENTRY(151) , + ENTRY(152) , ENTRY(153) , ENTRY(154) , ENTRY(155) , + ENTRY(156) , ENTRY(157) , ENTRY(158) , ENTRY(159) , + ENTRY(160) , ENTRY(161) , ENTRY(162) , ENTRY(163) , + ENTRY(164) , ENTRY(165) , ENTRY(166) , ENTRY(167) , + ENTRY(168) , ENTRY(169) , ENTRY(170) , ENTRY(171) , + ENTRY(172) , ENTRY(173) , ENTRY(174) , ENTRY(175) , + ENTRY(176) , ENTRY(177) , ENTRY(178) , ENTRY(179) , + ENTRY(180) , ENTRY(181) , ENTRY(182) , ENTRY(183) , + ENTRY(184) , ENTRY(185) , ENTRY(186) , ENTRY(187) , + ENTRY(188) , ENTRY(189) , ENTRY(190) , ENTRY(191) , + ENTRY(192) , ENTRY(193) , ENTRY(194) , ENTRY(195) , + ENTRY(196) , ENTRY(197) , ENTRY(198) , ENTRY(199) , + ENTRY(200) , ENTRY(201) , ENTRY(202) , ENTRY(203) , + ENTRY(204) , ENTRY(205) , ENTRY(206) , ENTRY(207) , + ENTRY(208) , ENTRY(209) , ENTRY(210) , ENTRY(211) , + ENTRY(212) , ENTRY(213) , ENTRY(214) , ENTRY(215) , + ENTRY(216) , ENTRY(217) , ENTRY(218) , ENTRY(219) , + ENTRY(220) , ENTRY(221) , ENTRY(222) , ENTRY(223) , + ENTRY(224) , ENTRY(225) , ENTRY(226) , ENTRY(227) , + ENTRY(228) , ENTRY(229) , ENTRY(230) , ENTRY(231) , + ENTRY(232) , ENTRY(233) , ENTRY(234) , ENTRY(235) , + ENTRY(236) , ENTRY(237) , ENTRY(238) , ENTRY(239) , + ENTRY(240) , ENTRY(241) , ENTRY(242) , ENTRY(243) , + ENTRY(244) , ENTRY(245) , ENTRY(246) , ENTRY(247) , + ENTRY(248) , ENTRY(249) , ENTRY(250) , ENTRY(251) , + ENTRY(252) , ENTRY(253) , ENTRY(254) , ENTRY(255) , +#undef ENTRY + + /* End of block */ + HUFFDEC_EXCEPTIONAL | HUFFDEC_END_OF_BLOCK, + + /* Lengths */ +#define ENTRY(length_base, num_extra_bits) \ + (((u32)(length_base) << 16) | (num_extra_bits)) + ENTRY(3 , 0) , ENTRY(4 , 0) , ENTRY(5 , 0) , ENTRY(6 , 0), + ENTRY(7 , 0) , ENTRY(8 , 0) , ENTRY(9 , 0) , ENTRY(10 , 0), + ENTRY(11 , 1) , ENTRY(13 , 1) , ENTRY(15 , 1) , ENTRY(17 , 1), + ENTRY(19 , 2) , ENTRY(23 , 2) , ENTRY(27 , 2) , ENTRY(31 , 2), + ENTRY(35 , 3) , ENTRY(43 , 3) , ENTRY(51 , 3) , ENTRY(59 , 3), + ENTRY(67 , 4) , ENTRY(83 , 4) , ENTRY(99 , 4) , ENTRY(115, 4), + ENTRY(131, 5) , ENTRY(163, 5) , ENTRY(195, 5) , ENTRY(227, 5), + ENTRY(258, 0) , ENTRY(258, 0) , ENTRY(258, 0) , +#undef ENTRY +}; + +/* Maximum number of bits that can be consumed by decoding a match offset */ +#define OFFSET_MAXBITS (DEFLATE_MAX_OFFSET_CODEWORD_LEN + \ + DEFLATE_MAX_EXTRA_OFFSET_BITS) +#define OFFSET_MAXFASTBITS (OFFSET_TABLEBITS /* no subtable needed */ + \ + DEFLATE_MAX_EXTRA_OFFSET_BITS) + +/* + * Here is the format of our offset decode table entries. Bits not explicitly + * described contain zeroes: + * + * Offsets: + * Bit 31-16: offset base value + * Bit 15: 0 (!HUFFDEC_EXCEPTIONAL) + * Bit 14: 0 (!HUFFDEC_SUBTABLE_POINTER) + * Bit 11-8: remaining codeword length + * Bit 4-0: remaining codeword length + number of extra bits + * Subtable pointer: + * Bit 31-16: index of start of subtable + * Bit 15: 1 (HUFFDEC_EXCEPTIONAL) + * Bit 14: 1 (HUFFDEC_SUBTABLE_POINTER) + * Bit 11-8: number of subtable bits + * Bit 3-0: number of main table bits + * + * These work the same way as the length entries and subtable pointer entries in + * the litlen decode table; see litlen_decode_results[] above. + */ +static const u32 offset_decode_results[] = { +#define ENTRY(offset_base, num_extra_bits) \ + (((u32)(offset_base) << 16) | (num_extra_bits)) + ENTRY(1 , 0) , ENTRY(2 , 0) , ENTRY(3 , 0) , ENTRY(4 , 0) , + ENTRY(5 , 1) , ENTRY(7 , 1) , ENTRY(9 , 2) , ENTRY(13 , 2) , + ENTRY(17 , 3) , ENTRY(25 , 3) , ENTRY(33 , 4) , ENTRY(49 , 4) , + ENTRY(65 , 5) , ENTRY(97 , 5) , ENTRY(129 , 6) , ENTRY(193 , 6) , + ENTRY(257 , 7) , ENTRY(385 , 7) , ENTRY(513 , 8) , ENTRY(769 , 8) , + ENTRY(1025 , 9) , ENTRY(1537 , 9) , ENTRY(2049 , 10) , ENTRY(3073 , 10) , + ENTRY(4097 , 11) , ENTRY(6145 , 11) , ENTRY(8193 , 12) , ENTRY(12289 , 12) , + ENTRY(16385 , 13) , ENTRY(24577 , 13) , ENTRY(24577 , 13) , ENTRY(24577 , 13) , +#undef ENTRY +}; + +/* + * The main DEFLATE decompressor structure. Since libdeflate only supports + * full-buffer decompression, this structure doesn't store the entire + * decompression state, most of which is in stack variables. Instead, this + * struct just contains the decode tables and some temporary arrays used for + * building them, as these are too large to comfortably allocate on the stack. + * + * Storing the decode tables in the decompressor struct also allows the decode + * tables for the static codes to be reused whenever two static Huffman blocks + * are decoded without an intervening dynamic block, even across streams. + */ +struct libdeflate_decompressor { + + /* + * The arrays aren't all needed at the same time. 'precode_lens' and + * 'precode_decode_table' are unneeded after 'lens' has been filled. + * Furthermore, 'lens' need not be retained after building the litlen + * and offset decode tables. In fact, 'lens' can be in union with + * 'litlen_decode_table' provided that 'offset_decode_table' is separate + * and is built first. + */ + + union { + u8 precode_lens[DEFLATE_NUM_PRECODE_SYMS]; + + struct { + u8 lens[DEFLATE_NUM_LITLEN_SYMS + + DEFLATE_NUM_OFFSET_SYMS + + DEFLATE_MAX_LENS_OVERRUN]; + + u32 precode_decode_table[PRECODE_ENOUGH]; + } l; + + u32 litlen_decode_table[LITLEN_ENOUGH]; + } u; + + u32 offset_decode_table[OFFSET_ENOUGH]; + + /* used only during build_decode_table() */ + u16 sorted_syms[DEFLATE_MAX_NUM_SYMS]; + + bool static_codes_loaded; + unsigned litlen_tablebits; +}; + +/* + * Build a table for fast decoding of symbols from a Huffman code. As input, + * this function takes the codeword length of each symbol which may be used in + * the code. As output, it produces a decode table for the canonical Huffman + * code described by the codeword lengths. The decode table is built with the + * assumption that it will be indexed with "bit-reversed" codewords, where the + * low-order bit is the first bit of the codeword. This format is used for all + * Huffman codes in DEFLATE. + * + * @decode_table + * The array in which the decode table will be generated. This array must + * have sufficient length; see the definition of the ENOUGH numbers. + * @lens + * An array which provides, for each symbol, the length of the + * corresponding codeword in bits, or 0 if the symbol is unused. This may + * alias @decode_table, since nothing is written to @decode_table until all + * @lens have been consumed. All codeword lengths are assumed to be <= + * @max_codeword_len but are otherwise considered untrusted. If they do + * not form a valid Huffman code, then the decode table is not built and + * %false is returned. + * @num_syms + * The number of symbols in the code, including all unused symbols. + * @decode_results + * An array which gives the incomplete decode result for each symbol. The + * needed values in this array will be combined with codeword lengths to + * make the final decode table entries using make_decode_table_entry(). + * @table_bits + * The log base-2 of the number of main table entries to use. + * If @table_bits_ret != NULL, then @table_bits is treated as a maximum + * value and it will be decreased if a smaller table would be sufficient. + * @max_codeword_len + * The maximum allowed codeword length for this Huffman code. + * Must be <= DEFLATE_MAX_CODEWORD_LEN. + * @sorted_syms + * A temporary array of length @num_syms. + * @table_bits_ret + * If non-NULL, then the dynamic table_bits is enabled, and the actual + * table_bits value will be returned here. + * + * Returns %true if successful; %false if the codeword lengths do not form a + * valid Huffman code. + */ +static bool +build_decode_table(u32 decode_table[], + const u8 lens[], + const unsigned num_syms, + const u32 decode_results[], + unsigned table_bits, + unsigned max_codeword_len, + u16 *sorted_syms, + unsigned *table_bits_ret) +{ + unsigned len_counts[DEFLATE_MAX_CODEWORD_LEN + 1]; + unsigned offsets[DEFLATE_MAX_CODEWORD_LEN + 1]; + unsigned sym; /* current symbol */ + unsigned codeword; /* current codeword, bit-reversed */ + unsigned len; /* current codeword length in bits */ + unsigned count; /* num codewords remaining with this length */ + u32 codespace_used; /* codespace used out of '2^max_codeword_len' */ + unsigned cur_table_end; /* end index of current table */ + unsigned subtable_prefix; /* codeword prefix of current subtable */ + unsigned subtable_start; /* start index of current subtable */ + unsigned subtable_bits; /* log2 of current subtable length */ + + /* Count how many codewords have each length, including 0. */ + for (len = 0; len <= max_codeword_len; len++) + len_counts[len] = 0; + for (sym = 0; sym < num_syms; sym++) + len_counts[lens[sym]]++; + + /* + * Determine the actual maximum codeword length that was used, and + * decrease table_bits to it if allowed. + */ + while (max_codeword_len > 1 && len_counts[max_codeword_len] == 0) + max_codeword_len--; + if (table_bits_ret != NULL) { + table_bits = MIN(table_bits, max_codeword_len); + *table_bits_ret = table_bits; + } + + /* + * Sort the symbols primarily by increasing codeword length and + * secondarily by increasing symbol value; or equivalently by their + * codewords in lexicographic order, since a canonical code is assumed. + * + * For efficiency, also compute 'codespace_used' in the same pass over + * 'len_counts[]' used to build 'offsets[]' for sorting. + */ + + /* Ensure that 'codespace_used' cannot overflow. */ + STATIC_ASSERT(sizeof(codespace_used) == 4); + STATIC_ASSERT(UINT32_MAX / (1U << (DEFLATE_MAX_CODEWORD_LEN - 1)) >= + DEFLATE_MAX_NUM_SYMS); + + offsets[0] = 0; + offsets[1] = len_counts[0]; + codespace_used = 0; + for (len = 1; len < max_codeword_len; len++) { + offsets[len + 1] = offsets[len] + len_counts[len]; + codespace_used = (codespace_used << 1) + len_counts[len]; + } + codespace_used = (codespace_used << 1) + len_counts[len]; + + for (sym = 0; sym < num_syms; sym++) + sorted_syms[offsets[lens[sym]]++] = sym; + + sorted_syms += offsets[0]; /* Skip unused symbols */ + + /* lens[] is done being used, so we can write to decode_table[] now. */ + + /* + * Check whether the lengths form a complete code (exactly fills the + * codespace), an incomplete code (doesn't fill the codespace), or an + * overfull code (overflows the codespace). A codeword of length 'n' + * uses proportion '1/(2^n)' of the codespace. An overfull code is + * nonsensical, so is considered invalid. An incomplete code is + * considered valid only in two specific cases; see below. + */ + + /* overfull code? */ + if (unlikely(codespace_used > (1U << max_codeword_len))) + return false; + + /* incomplete code? */ + if (unlikely(codespace_used < (1U << max_codeword_len))) { + u32 entry; + unsigned i; + + if (codespace_used == 0) { + /* + * An empty code is allowed. This can happen for the + * offset code in DEFLATE, since a dynamic Huffman block + * need not contain any matches. + */ + + /* sym=0, len=1 (arbitrary) */ + entry = make_decode_table_entry(decode_results, 0, 1); + } else { + /* + * Allow codes with a single used symbol, with codeword + * length 1. The DEFLATE RFC is unclear regarding this + * case. What zlib's decompressor does is permit this + * for the litlen and offset codes and assume the + * codeword is '0' rather than '1'. We do the same + * except we allow this for precodes too, since there's + * no convincing reason to treat the codes differently. + * We also assign both codewords '0' and '1' to the + * symbol to avoid having to handle '1' specially. + */ + if (codespace_used != (1U << (max_codeword_len - 1)) || + len_counts[1] != 1) + return false; + entry = make_decode_table_entry(decode_results, + *sorted_syms, 1); + } + /* + * Note: the decode table still must be fully initialized, in + * case the stream is malformed and contains bits from the part + * of the codespace the incomplete code doesn't use. + */ + for (i = 0; i < (1U << table_bits); i++) + decode_table[i] = entry; + return true; + } + + /* + * The lengths form a complete code. Now, enumerate the codewords in + * lexicographic order and fill the decode table entries for each one. + * + * First, process all codewords with len <= table_bits. Each one gets + * '2^(table_bits-len)' direct entries in the table. + * + * Since DEFLATE uses bit-reversed codewords, these entries aren't + * consecutive but rather are spaced '2^len' entries apart. This makes + * filling them naively somewhat awkward and inefficient, since strided + * stores are less cache-friendly and preclude the use of word or + * vector-at-a-time stores to fill multiple entries per instruction. + * + * To optimize this, we incrementally double the table size. When + * processing codewords with length 'len', the table is treated as + * having only '2^len' entries, so each codeword uses just one entry. + * Then, each time 'len' is incremented, the table size is doubled and + * the first half is copied to the second half. This significantly + * improves performance over naively doing strided stores. + * + * Note that some entries copied for each table doubling may not have + * been initialized yet, but it doesn't matter since they're guaranteed + * to be initialized later (because the Huffman code is complete). + */ + codeword = 0; + len = 1; + while ((count = len_counts[len]) == 0) + len++; + cur_table_end = 1U << len; + while (len <= table_bits) { + /* Process all 'count' codewords with length 'len' bits. */ + do { + unsigned bit; + + /* Fill the first entry for the current codeword. */ + decode_table[codeword] = + make_decode_table_entry(decode_results, + *sorted_syms++, len); + + if (codeword == cur_table_end - 1) { + /* Last codeword (all 1's) */ + for (; len < table_bits; len++) { + memcpy(&decode_table[cur_table_end], + decode_table, + cur_table_end * + sizeof(decode_table[0])); + cur_table_end <<= 1; + } + return true; + } + /* + * To advance to the lexicographically next codeword in + * the canonical code, the codeword must be incremented, + * then 0's must be appended to the codeword as needed + * to match the next codeword's length. + * + * Since the codeword is bit-reversed, appending 0's is + * a no-op. However, incrementing it is nontrivial. To + * do so efficiently, use the 'bsr' instruction to find + * the last (highest order) 0 bit in the codeword, set + * it, and clear any later (higher order) 1 bits. But + * 'bsr' actually finds the highest order 1 bit, so to + * use it first flip all bits in the codeword by XOR'ing + * it with (1U << len) - 1 == cur_table_end - 1. + */ + bit = 1U << bsr32(codeword ^ (cur_table_end - 1)); + codeword &= bit - 1; + codeword |= bit; + } while (--count); + + /* Advance to the next codeword length. */ + do { + if (++len <= table_bits) { + memcpy(&decode_table[cur_table_end], + decode_table, + cur_table_end * sizeof(decode_table[0])); + cur_table_end <<= 1; + } + } while ((count = len_counts[len]) == 0); + } + + /* Process codewords with len > table_bits. These require subtables. */ + cur_table_end = 1U << table_bits; + subtable_prefix = -1; + subtable_start = 0; + for (;;) { + u32 entry; + unsigned i; + unsigned stride; + unsigned bit; + + /* + * Start a new subtable if the first 'table_bits' bits of the + * codeword don't match the prefix of the current subtable. + */ + if ((codeword & ((1U << table_bits) - 1)) != subtable_prefix) { + subtable_prefix = (codeword & ((1U << table_bits) - 1)); + subtable_start = cur_table_end; + /* + * Calculate the subtable length. If the codeword has + * length 'table_bits + n', then the subtable needs + * '2^n' entries. But it may need more; if fewer than + * '2^n' codewords of length 'table_bits + n' remain, + * then the length will need to be incremented to bring + * in longer codewords until the subtable can be + * completely filled. Note that because the Huffman + * code is complete, it will always be possible to fill + * the subtable eventually. + */ + subtable_bits = len - table_bits; + codespace_used = count; + while (codespace_used < (1U << subtable_bits)) { + subtable_bits++; + codespace_used = (codespace_used << 1) + + len_counts[table_bits + subtable_bits]; + } + cur_table_end = subtable_start + (1U << subtable_bits); + + /* + * Create the entry that points from the main table to + * the subtable. + */ + decode_table[subtable_prefix] = + ((u32)subtable_start << 16) | + HUFFDEC_EXCEPTIONAL | + HUFFDEC_SUBTABLE_POINTER | + (subtable_bits << 8) | table_bits; + } + + /* Fill the subtable entries for the current codeword. */ + entry = make_decode_table_entry(decode_results, *sorted_syms++, + len - table_bits); + i = subtable_start + (codeword >> table_bits); + stride = 1U << (len - table_bits); + do { + decode_table[i] = entry; + i += stride; + } while (i < cur_table_end); + + /* Advance to the next codeword. */ + if (codeword == (1U << len) - 1) /* last codeword (all 1's)? */ + return true; + bit = 1U << bsr32(codeword ^ ((1U << len) - 1)); + codeword &= bit - 1; + codeword |= bit; + count--; + while (count == 0) + count = len_counts[++len]; + } +} + +/* Build the decode table for the precode. */ +static bool +build_precode_decode_table(struct libdeflate_decompressor *d) +{ + /* When you change TABLEBITS, you must change ENOUGH, and vice versa! */ + STATIC_ASSERT(PRECODE_TABLEBITS == 7 && PRECODE_ENOUGH == 128); + + STATIC_ASSERT(ARRAY_LEN(precode_decode_results) == + DEFLATE_NUM_PRECODE_SYMS); + + return build_decode_table(d->u.l.precode_decode_table, + d->u.precode_lens, + DEFLATE_NUM_PRECODE_SYMS, + precode_decode_results, + PRECODE_TABLEBITS, + DEFLATE_MAX_PRE_CODEWORD_LEN, + d->sorted_syms, + NULL); +} + +/* Build the decode table for the literal/length code. */ +static bool +build_litlen_decode_table(struct libdeflate_decompressor *d, + unsigned num_litlen_syms, unsigned num_offset_syms) +{ + /* When you change TABLEBITS, you must change ENOUGH, and vice versa! */ + STATIC_ASSERT(LITLEN_TABLEBITS == 11 && LITLEN_ENOUGH == 2342); + + STATIC_ASSERT(ARRAY_LEN(litlen_decode_results) == + DEFLATE_NUM_LITLEN_SYMS); + + return build_decode_table(d->u.litlen_decode_table, + d->u.l.lens, + num_litlen_syms, + litlen_decode_results, + LITLEN_TABLEBITS, + DEFLATE_MAX_LITLEN_CODEWORD_LEN, + d->sorted_syms, + &d->litlen_tablebits); +} + +/* Build the decode table for the offset code. */ +static bool +build_offset_decode_table(struct libdeflate_decompressor *d, + unsigned num_litlen_syms, unsigned num_offset_syms) +{ + /* When you change TABLEBITS, you must change ENOUGH, and vice versa! */ + STATIC_ASSERT(OFFSET_TABLEBITS == 8 && OFFSET_ENOUGH == 402); + + STATIC_ASSERT(ARRAY_LEN(offset_decode_results) == + DEFLATE_NUM_OFFSET_SYMS); + + return build_decode_table(d->offset_decode_table, + d->u.l.lens + num_litlen_syms, + num_offset_syms, + offset_decode_results, + OFFSET_TABLEBITS, + DEFLATE_MAX_OFFSET_CODEWORD_LEN, + d->sorted_syms, + NULL); +} + +/***************************************************************************** + * Main decompression routine + *****************************************************************************/ + +typedef enum libdeflate_result (*decompress_func_t) + (struct libdeflate_decompressor * restrict d, + const void * restrict in, size_t in_nbytes, + void * restrict out, size_t out_nbytes_avail, + size_t *actual_in_nbytes_ret, size_t *actual_out_nbytes_ret); + +#define FUNCNAME deflate_decompress_default +#undef ATTRIBUTES +#undef EXTRACT_VARBITS +#undef EXTRACT_VARBITS8 +#include "decompress_template.h" + +/* Include architecture-specific implementation(s) if available. */ +#undef DEFAULT_IMPL +#undef arch_select_decompress_func +#if defined(ARCH_X86_32) || defined(ARCH_X86_64) +# include "x86/decompress_impl.h" +#endif + +#ifndef DEFAULT_IMPL +# define DEFAULT_IMPL deflate_decompress_default +#endif + +#ifdef arch_select_decompress_func +static enum libdeflate_result +dispatch_decomp(struct libdeflate_decompressor *d, + const void *in, size_t in_nbytes, + void *out, size_t out_nbytes_avail, + size_t *actual_in_nbytes_ret, size_t *actual_out_nbytes_ret); + +static volatile decompress_func_t decompress_impl = dispatch_decomp; + +/* Choose the best implementation at runtime. */ +static enum libdeflate_result +dispatch_decomp(struct libdeflate_decompressor *d, + const void *in, size_t in_nbytes, + void *out, size_t out_nbytes_avail, + size_t *actual_in_nbytes_ret, size_t *actual_out_nbytes_ret) +{ + decompress_func_t f = arch_select_decompress_func(); + + if (f == NULL) + f = DEFAULT_IMPL; + + decompress_impl = f; + return f(d, in, in_nbytes, out, out_nbytes_avail, + actual_in_nbytes_ret, actual_out_nbytes_ret); +} +#else +/* The best implementation is statically known, so call it directly. */ +# define decompress_impl DEFAULT_IMPL +#endif + +/* + * This is the main DEFLATE decompression routine. See libdeflate.h for the + * documentation. + * + * Note that the real code is in decompress_template.h. The part here just + * handles calling the appropriate implementation depending on the CPU features + * at runtime. + */ +LIBDEFLATEAPI enum libdeflate_result +libdeflate_deflate_decompress_ex(struct libdeflate_decompressor *d, + const void *in, size_t in_nbytes, + void *out, size_t out_nbytes_avail, + size_t *actual_in_nbytes_ret, + size_t *actual_out_nbytes_ret) +{ + return decompress_impl(d, in, in_nbytes, out, out_nbytes_avail, + actual_in_nbytes_ret, actual_out_nbytes_ret); +} + +LIBDEFLATEAPI enum libdeflate_result +libdeflate_deflate_decompress(struct libdeflate_decompressor *d, + const void *in, size_t in_nbytes, + void *out, size_t out_nbytes_avail, + size_t *actual_out_nbytes_ret) +{ + return libdeflate_deflate_decompress_ex(d, in, in_nbytes, + out, out_nbytes_avail, + NULL, actual_out_nbytes_ret); +} + +LIBDEFLATEAPI struct libdeflate_decompressor * +libdeflate_alloc_decompressor(void) +{ + /* + * Note that only certain parts of the decompressor actually must be + * initialized here: + * + * - 'static_codes_loaded' must be initialized to false. + * + * - The first half of the main portion of each decode table must be + * initialized to any value, to avoid reading from uninitialized + * memory during table expansion in build_decode_table(). (Although, + * this is really just to avoid warnings with dynamic tools like + * valgrind, since build_decode_table() is guaranteed to initialize + * all entries eventually anyway.) + * + * But for simplicity, we currently just zero the whole decompressor. + */ + struct libdeflate_decompressor *d = libdeflate_malloc(sizeof(*d)); + + if (d == NULL) + return NULL; + memset(d, 0, sizeof(*d)); + return d; +} + +LIBDEFLATEAPI void +libdeflate_free_decompressor(struct libdeflate_decompressor *d) +{ + libdeflate_free(d); +} diff --git a/tools/z64compress/src/enc/libdeflate/lib/gzip_compress.c b/tools/z64compress/src/enc/libdeflate/lib/gzip_compress.c new file mode 100644 index 000000000..e343e5068 --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/lib/gzip_compress.c @@ -0,0 +1,92 @@ +/* + * gzip_compress.c - compress with a gzip wrapper + * + * Copyright 2016 Eric Biggers + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "deflate_compress.h" +#include "gzip_constants.h" + +#include "libdeflate.h" + +LIBDEFLATEAPI size_t +libdeflate_gzip_compress(struct libdeflate_compressor *c, + const void *in, size_t in_nbytes, + void *out, size_t out_nbytes_avail) +{ + u8 *out_next = out; + unsigned compression_level; + u8 xfl; + size_t deflate_size; + + if (out_nbytes_avail <= GZIP_MIN_OVERHEAD) + return 0; + + /* ID1 */ + *out_next++ = GZIP_ID1; + /* ID2 */ + *out_next++ = GZIP_ID2; + /* CM */ + *out_next++ = GZIP_CM_DEFLATE; + /* FLG */ + *out_next++ = 0; + /* MTIME */ + put_unaligned_le32(GZIP_MTIME_UNAVAILABLE, out_next); + out_next += 4; + /* XFL */ + xfl = 0; + compression_level = libdeflate_get_compression_level(c); + if (compression_level < 2) + xfl |= GZIP_XFL_FASTEST_COMPRESSION; + else if (compression_level >= 8) + xfl |= GZIP_XFL_SLOWEST_COMPRESSION; + *out_next++ = xfl; + /* OS */ + *out_next++ = GZIP_OS_UNKNOWN; /* OS */ + + /* Compressed data */ + deflate_size = libdeflate_deflate_compress(c, in, in_nbytes, out_next, + out_nbytes_avail - GZIP_MIN_OVERHEAD); + if (deflate_size == 0) + return 0; + out_next += deflate_size; + + /* CRC32 */ + put_unaligned_le32(libdeflate_crc32(0, in, in_nbytes), out_next); + out_next += 4; + + /* ISIZE */ + put_unaligned_le32((u32)in_nbytes, out_next); + out_next += 4; + + return out_next - (u8 *)out; +} + +LIBDEFLATEAPI size_t +libdeflate_gzip_compress_bound(struct libdeflate_compressor *c, + size_t in_nbytes) +{ + return GZIP_MIN_OVERHEAD + + libdeflate_deflate_compress_bound(c, in_nbytes); +} diff --git a/tools/z64compress/src/enc/libdeflate/lib/gzip_constants.h b/tools/z64compress/src/enc/libdeflate/lib/gzip_constants.h new file mode 100644 index 000000000..35e4728d8 --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/lib/gzip_constants.h @@ -0,0 +1,45 @@ +/* + * gzip_constants.h - constants for the gzip wrapper format + */ + +#ifndef LIB_GZIP_CONSTANTS_H +#define LIB_GZIP_CONSTANTS_H + +#define GZIP_MIN_HEADER_SIZE 10 +#define GZIP_FOOTER_SIZE 8 +#define GZIP_MIN_OVERHEAD (GZIP_MIN_HEADER_SIZE + GZIP_FOOTER_SIZE) + +#define GZIP_ID1 0x1F +#define GZIP_ID2 0x8B + +#define GZIP_CM_DEFLATE 8 + +#define GZIP_FTEXT 0x01 +#define GZIP_FHCRC 0x02 +#define GZIP_FEXTRA 0x04 +#define GZIP_FNAME 0x08 +#define GZIP_FCOMMENT 0x10 +#define GZIP_FRESERVED 0xE0 + +#define GZIP_MTIME_UNAVAILABLE 0 + +#define GZIP_XFL_SLOWEST_COMPRESSION 0x02 +#define GZIP_XFL_FASTEST_COMPRESSION 0x04 + +#define GZIP_OS_FAT 0 +#define GZIP_OS_AMIGA 1 +#define GZIP_OS_VMS 2 +#define GZIP_OS_UNIX 3 +#define GZIP_OS_VM_CMS 4 +#define GZIP_OS_ATARI_TOS 5 +#define GZIP_OS_HPFS 6 +#define GZIP_OS_MACINTOSH 7 +#define GZIP_OS_Z_SYSTEM 8 +#define GZIP_OS_CP_M 9 +#define GZIP_OS_TOPS_20 10 +#define GZIP_OS_NTFS 11 +#define GZIP_OS_QDOS 12 +#define GZIP_OS_RISCOS 13 +#define GZIP_OS_UNKNOWN 255 + +#endif /* LIB_GZIP_CONSTANTS_H */ diff --git a/tools/z64compress/src/enc/libdeflate/lib/gzip_decompress.c b/tools/z64compress/src/enc/libdeflate/lib/gzip_decompress.c new file mode 100644 index 000000000..9518e7047 --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/lib/gzip_decompress.c @@ -0,0 +1,146 @@ +/* + * gzip_decompress.c - decompress with a gzip wrapper + * + * Copyright 2016 Eric Biggers + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "lib_common.h" +#include "gzip_constants.h" + +#include "libdeflate.h" + +LIBDEFLATEAPI enum libdeflate_result +libdeflate_gzip_decompress_ex(struct libdeflate_decompressor *d, + const void *in, size_t in_nbytes, + void *out, size_t out_nbytes_avail, + size_t *actual_in_nbytes_ret, + size_t *actual_out_nbytes_ret) +{ + const u8 *in_next = in; + const u8 * const in_end = in_next + in_nbytes; + u8 flg; + size_t actual_in_nbytes; + size_t actual_out_nbytes; + enum libdeflate_result result; + + if (in_nbytes < GZIP_MIN_OVERHEAD) + return LIBDEFLATE_BAD_DATA; + + /* ID1 */ + if (*in_next++ != GZIP_ID1) + return LIBDEFLATE_BAD_DATA; + /* ID2 */ + if (*in_next++ != GZIP_ID2) + return LIBDEFLATE_BAD_DATA; + /* CM */ + if (*in_next++ != GZIP_CM_DEFLATE) + return LIBDEFLATE_BAD_DATA; + flg = *in_next++; + /* MTIME */ + in_next += 4; + /* XFL */ + in_next += 1; + /* OS */ + in_next += 1; + + if (flg & GZIP_FRESERVED) + return LIBDEFLATE_BAD_DATA; + + /* Extra field */ + if (flg & GZIP_FEXTRA) { + u16 xlen = get_unaligned_le16(in_next); + in_next += 2; + + if (in_end - in_next < (u32)xlen + GZIP_FOOTER_SIZE) + return LIBDEFLATE_BAD_DATA; + + in_next += xlen; + } + + /* Original file name (zero terminated) */ + if (flg & GZIP_FNAME) { + while (*in_next++ != 0 && in_next != in_end) + ; + if (in_end - in_next < GZIP_FOOTER_SIZE) + return LIBDEFLATE_BAD_DATA; + } + + /* File comment (zero terminated) */ + if (flg & GZIP_FCOMMENT) { + while (*in_next++ != 0 && in_next != in_end) + ; + if (in_end - in_next < GZIP_FOOTER_SIZE) + return LIBDEFLATE_BAD_DATA; + } + + /* CRC16 for gzip header */ + if (flg & GZIP_FHCRC) { + in_next += 2; + if (in_end - in_next < GZIP_FOOTER_SIZE) + return LIBDEFLATE_BAD_DATA; + } + + /* Compressed data */ + result = libdeflate_deflate_decompress_ex(d, in_next, + in_end - GZIP_FOOTER_SIZE - in_next, + out, out_nbytes_avail, + &actual_in_nbytes, + actual_out_nbytes_ret); + if (result != LIBDEFLATE_SUCCESS) + return result; + + if (actual_out_nbytes_ret) + actual_out_nbytes = *actual_out_nbytes_ret; + else + actual_out_nbytes = out_nbytes_avail; + + in_next += actual_in_nbytes; + + /* CRC32 */ + if (libdeflate_crc32(0, out, actual_out_nbytes) != + get_unaligned_le32(in_next)) + return LIBDEFLATE_BAD_DATA; + in_next += 4; + + /* ISIZE */ + if ((u32)actual_out_nbytes != get_unaligned_le32(in_next)) + return LIBDEFLATE_BAD_DATA; + in_next += 4; + + if (actual_in_nbytes_ret) + *actual_in_nbytes_ret = in_next - (u8 *)in; + + return LIBDEFLATE_SUCCESS; +} + +LIBDEFLATEAPI enum libdeflate_result +libdeflate_gzip_decompress(struct libdeflate_decompressor *d, + const void *in, size_t in_nbytes, + void *out, size_t out_nbytes_avail, + size_t *actual_out_nbytes_ret) +{ + return libdeflate_gzip_decompress_ex(d, in, in_nbytes, + out, out_nbytes_avail, + NULL, actual_out_nbytes_ret); +} diff --git a/tools/z64compress/src/enc/libdeflate/lib/hc_matchfinder.h b/tools/z64compress/src/enc/libdeflate/lib/hc_matchfinder.h new file mode 100644 index 000000000..a0cddfca1 --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/lib/hc_matchfinder.h @@ -0,0 +1,401 @@ +/* + * hc_matchfinder.h - Lempel-Ziv matchfinding with a hash table of linked lists + * + * Copyright 2016 Eric Biggers + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * --------------------------------------------------------------------------- + * + * Algorithm + * + * This is a Hash Chains (hc) based matchfinder. + * + * The main data structure is a hash table where each hash bucket contains a + * linked list (or "chain") of sequences whose first 4 bytes share the same hash + * code. Each sequence is identified by its starting position in the input + * buffer. + * + * The algorithm processes the input buffer sequentially. At each byte + * position, the hash code of the first 4 bytes of the sequence beginning at + * that position (the sequence being matched against) is computed. This + * identifies the hash bucket to use for that position. Then, this hash + * bucket's linked list is searched for matches. Then, a new linked list node + * is created to represent the current sequence and is prepended to the list. + * + * This algorithm has several useful properties: + * + * - It only finds true Lempel-Ziv matches; i.e., those where the matching + * sequence occurs prior to the sequence being matched against. + * + * - The sequences in each linked list are always sorted by decreasing starting + * position. Therefore, the closest (smallest offset) matches are found + * first, which in many compression formats tend to be the cheapest to encode. + * + * - Although fast running time is not guaranteed due to the possibility of the + * lists getting very long, the worst degenerate behavior can be easily + * prevented by capping the number of nodes searched at each position. + * + * - If the compressor decides not to search for matches at a certain position, + * then that position can be quickly inserted without searching the list. + * + * - The algorithm is adaptable to sliding windows: just store the positions + * relative to a "base" value that is updated from time to time, and stop + * searching each list when the sequences get too far away. + * + * ---------------------------------------------------------------------------- + * + * Optimizations + * + * The main hash table and chains handle length 4+ matches. Length 3 matches + * are handled by a separate hash table with no chains. This works well for + * typical "greedy" or "lazy"-style compressors, where length 3 matches are + * often only helpful if they have small offsets. Instead of searching a full + * chain for length 3+ matches, the algorithm just checks for one close length 3 + * match, then focuses on finding length 4+ matches. + * + * The longest_match() and skip_bytes() functions are inlined into the + * compressors that use them. This isn't just about saving the overhead of a + * function call. These functions are intended to be called from the inner + * loops of compressors, where giving the compiler more control over register + * allocation is very helpful. There is also significant benefit to be gained + * from allowing the CPU to predict branches independently at each call site. + * For example, "lazy"-style compressors can be written with two calls to + * longest_match(), each of which starts with a different 'best_len' and + * therefore has significantly different performance characteristics. + * + * Although any hash function can be used, a multiplicative hash is fast and + * works well. + * + * On some processors, it is significantly faster to extend matches by whole + * words (32 or 64 bits) instead of by individual bytes. For this to be the + * case, the processor must implement unaligned memory accesses efficiently and + * must have either a fast "find first set bit" instruction or a fast "find last + * set bit" instruction, depending on the processor's endianness. + * + * The code uses one loop for finding the first match and one loop for finding a + * longer match. Each of these loops is tuned for its respective task and in + * combination are faster than a single generalized loop that handles both + * tasks. + * + * The code also uses a tight inner loop that only compares the last and first + * bytes of a potential match. It is only when these bytes match that a full + * match extension is attempted. + * + * ---------------------------------------------------------------------------- + */ + +#ifndef LIB_HC_MATCHFINDER_H +#define LIB_HC_MATCHFINDER_H + +#include "matchfinder_common.h" + +#define HC_MATCHFINDER_HASH3_ORDER 15 +#define HC_MATCHFINDER_HASH4_ORDER 16 + +#define HC_MATCHFINDER_TOTAL_HASH_SIZE \ + (((1UL << HC_MATCHFINDER_HASH3_ORDER) + \ + (1UL << HC_MATCHFINDER_HASH4_ORDER)) * sizeof(mf_pos_t)) + +struct MATCHFINDER_ALIGNED hc_matchfinder { + + /* The hash table for finding length 3 matches */ + mf_pos_t hash3_tab[1UL << HC_MATCHFINDER_HASH3_ORDER]; + + /* The hash table which contains the first nodes of the linked lists for + * finding length 4+ matches */ + mf_pos_t hash4_tab[1UL << HC_MATCHFINDER_HASH4_ORDER]; + + /* The "next node" references for the linked lists. The "next node" of + * the node for the sequence with position 'pos' is 'next_tab[pos]'. */ + mf_pos_t next_tab[MATCHFINDER_WINDOW_SIZE]; +}; + +/* Prepare the matchfinder for a new input buffer. */ +static forceinline void +hc_matchfinder_init(struct hc_matchfinder *mf) +{ + STATIC_ASSERT(HC_MATCHFINDER_TOTAL_HASH_SIZE % + MATCHFINDER_SIZE_ALIGNMENT == 0); + + matchfinder_init((mf_pos_t *)mf, HC_MATCHFINDER_TOTAL_HASH_SIZE); +} + +static forceinline void +hc_matchfinder_slide_window(struct hc_matchfinder *mf) +{ + STATIC_ASSERT(sizeof(*mf) % MATCHFINDER_SIZE_ALIGNMENT == 0); + + matchfinder_rebase((mf_pos_t *)mf, sizeof(*mf)); +} + +/* + * Find the longest match longer than 'best_len' bytes. + * + * @mf + * The matchfinder structure. + * @in_base_p + * Location of a pointer which points to the place in the input data the + * matchfinder currently stores positions relative to. This may be updated + * by this function. + * @in_next + * Pointer to the next position in the input buffer, i.e. the sequence + * being matched against. + * @best_len + * Require a match longer than this length. + * @max_len + * The maximum permissible match length at this position. + * @nice_len + * Stop searching if a match of at least this length is found. + * Must be <= @max_len. + * @max_search_depth + * Limit on the number of potential matches to consider. Must be >= 1. + * @next_hashes + * The precomputed hash codes for the sequence beginning at @in_next. + * These will be used and then updated with the precomputed hashcodes for + * the sequence beginning at @in_next + 1. + * @offset_ret + * If a match is found, its offset is returned in this location. + * + * Return the length of the match found, or 'best_len' if no match longer than + * 'best_len' was found. + */ +static forceinline u32 +hc_matchfinder_longest_match(struct hc_matchfinder * const mf, + const u8 ** const in_base_p, + const u8 * const in_next, + u32 best_len, + const u32 max_len, + const u32 nice_len, + const u32 max_search_depth, + u32 * const next_hashes, + u32 * const offset_ret) +{ + u32 depth_remaining = max_search_depth; + const u8 *best_matchptr = in_next; + mf_pos_t cur_node3, cur_node4; + u32 hash3, hash4; + u32 next_hashseq; + u32 seq4; + const u8 *matchptr; + u32 len; + u32 cur_pos = in_next - *in_base_p; + const u8 *in_base; + mf_pos_t cutoff; + + if (cur_pos == MATCHFINDER_WINDOW_SIZE) { + hc_matchfinder_slide_window(mf); + *in_base_p += MATCHFINDER_WINDOW_SIZE; + cur_pos = 0; + } + + in_base = *in_base_p; + cutoff = cur_pos - MATCHFINDER_WINDOW_SIZE; + + if (unlikely(max_len < 5)) /* can we read 4 bytes from 'in_next + 1'? */ + goto out; + + /* Get the precomputed hash codes. */ + hash3 = next_hashes[0]; + hash4 = next_hashes[1]; + + /* From the hash buckets, get the first node of each linked list. */ + cur_node3 = mf->hash3_tab[hash3]; + cur_node4 = mf->hash4_tab[hash4]; + + /* Update for length 3 matches. This replaces the singleton node in the + * 'hash3' bucket with the node for the current sequence. */ + mf->hash3_tab[hash3] = cur_pos; + + /* Update for length 4 matches. This prepends the node for the current + * sequence to the linked list in the 'hash4' bucket. */ + mf->hash4_tab[hash4] = cur_pos; + mf->next_tab[cur_pos] = cur_node4; + + /* Compute the next hash codes. */ + next_hashseq = get_unaligned_le32(in_next + 1); + next_hashes[0] = lz_hash(next_hashseq & 0xFFFFFF, HC_MATCHFINDER_HASH3_ORDER); + next_hashes[1] = lz_hash(next_hashseq, HC_MATCHFINDER_HASH4_ORDER); + prefetchw(&mf->hash3_tab[next_hashes[0]]); + prefetchw(&mf->hash4_tab[next_hashes[1]]); + + if (best_len < 4) { /* No match of length >= 4 found yet? */ + + /* Check for a length 3 match if needed. */ + + if (cur_node3 <= cutoff) + goto out; + + seq4 = load_u32_unaligned(in_next); + + if (best_len < 3) { + matchptr = &in_base[cur_node3]; + if (load_u24_unaligned(matchptr) == loaded_u32_to_u24(seq4)) { + best_len = 3; + best_matchptr = matchptr; + } + } + + /* Check for a length 4 match. */ + + if (cur_node4 <= cutoff) + goto out; + + for (;;) { + /* No length 4 match found yet. Check the first 4 bytes. */ + matchptr = &in_base[cur_node4]; + + if (load_u32_unaligned(matchptr) == seq4) + break; + + /* The first 4 bytes did not match. Keep trying. */ + cur_node4 = mf->next_tab[cur_node4 & (MATCHFINDER_WINDOW_SIZE - 1)]; + if (cur_node4 <= cutoff || !--depth_remaining) + goto out; + } + + /* Found a match of length >= 4. Extend it to its full length. */ + best_matchptr = matchptr; + best_len = lz_extend(in_next, best_matchptr, 4, max_len); + if (best_len >= nice_len) + goto out; + cur_node4 = mf->next_tab[cur_node4 & (MATCHFINDER_WINDOW_SIZE - 1)]; + if (cur_node4 <= cutoff || !--depth_remaining) + goto out; + } else { + if (cur_node4 <= cutoff || best_len >= nice_len) + goto out; + } + + /* Check for matches of length >= 5. */ + + for (;;) { + for (;;) { + matchptr = &in_base[cur_node4]; + + /* Already found a length 4 match. Try for a longer + * match; start by checking either the last 4 bytes and + * the first 4 bytes, or the last byte. (The last byte, + * the one which would extend the match length by 1, is + * the most important.) */ + #if UNALIGNED_ACCESS_IS_FAST + if ((load_u32_unaligned(matchptr + best_len - 3) == + load_u32_unaligned(in_next + best_len - 3)) && + (load_u32_unaligned(matchptr) == + load_u32_unaligned(in_next))) + #else + if (matchptr[best_len] == in_next[best_len]) + #endif + break; + + /* Continue to the next node in the list. */ + cur_node4 = mf->next_tab[cur_node4 & (MATCHFINDER_WINDOW_SIZE - 1)]; + if (cur_node4 <= cutoff || !--depth_remaining) + goto out; + } + + #if UNALIGNED_ACCESS_IS_FAST + len = 4; + #else + len = 0; + #endif + len = lz_extend(in_next, matchptr, len, max_len); + if (len > best_len) { + /* This is the new longest match. */ + best_len = len; + best_matchptr = matchptr; + if (best_len >= nice_len) + goto out; + } + + /* Continue to the next node in the list. */ + cur_node4 = mf->next_tab[cur_node4 & (MATCHFINDER_WINDOW_SIZE - 1)]; + if (cur_node4 <= cutoff || !--depth_remaining) + goto out; + } +out: + *offset_ret = in_next - best_matchptr; + return best_len; +} + +/* + * Advance the matchfinder, but don't search for matches. + * + * @mf + * The matchfinder structure. + * @in_base_p + * Location of a pointer which points to the place in the input data the + * matchfinder currently stores positions relative to. This may be updated + * by this function. + * @in_next + * Pointer to the next position in the input buffer. + * @in_end + * Pointer to the end of the input buffer. + * @count + * The number of bytes to advance. Must be > 0. + * @next_hashes + * The precomputed hash codes for the sequence beginning at @in_next. + * These will be used and then updated with the precomputed hashcodes for + * the sequence beginning at @in_next + @count. + */ +static forceinline void +hc_matchfinder_skip_bytes(struct hc_matchfinder * const mf, + const u8 ** const in_base_p, + const u8 *in_next, + const u8 * const in_end, + const u32 count, + u32 * const next_hashes) +{ + u32 cur_pos; + u32 hash3, hash4; + u32 next_hashseq; + u32 remaining = count; + + if (unlikely(count + 5 > in_end - in_next)) + return; + + cur_pos = in_next - *in_base_p; + hash3 = next_hashes[0]; + hash4 = next_hashes[1]; + do { + if (cur_pos == MATCHFINDER_WINDOW_SIZE) { + hc_matchfinder_slide_window(mf); + *in_base_p += MATCHFINDER_WINDOW_SIZE; + cur_pos = 0; + } + mf->hash3_tab[hash3] = cur_pos; + mf->next_tab[cur_pos] = mf->hash4_tab[hash4]; + mf->hash4_tab[hash4] = cur_pos; + + next_hashseq = get_unaligned_le32(++in_next); + hash3 = lz_hash(next_hashseq & 0xFFFFFF, HC_MATCHFINDER_HASH3_ORDER); + hash4 = lz_hash(next_hashseq, HC_MATCHFINDER_HASH4_ORDER); + cur_pos++; + } while (--remaining); + + prefetchw(&mf->hash3_tab[hash3]); + prefetchw(&mf->hash4_tab[hash4]); + next_hashes[0] = hash3; + next_hashes[1] = hash4; +} + +#endif /* LIB_HC_MATCHFINDER_H */ diff --git a/tools/z64compress/src/enc/libdeflate/lib/ht_matchfinder.h b/tools/z64compress/src/enc/libdeflate/lib/ht_matchfinder.h new file mode 100644 index 000000000..6e5a187c1 --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/lib/ht_matchfinder.h @@ -0,0 +1,234 @@ +/* + * ht_matchfinder.h - Lempel-Ziv matchfinding with a hash table + * + * Copyright 2022 Eric Biggers + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * --------------------------------------------------------------------------- + * + * This is a Hash Table (ht) matchfinder. + * + * This is a variant of the Hash Chains (hc) matchfinder that is optimized for + * very fast compression. The ht_matchfinder stores the hash chains inline in + * the hash table, whereas the hc_matchfinder stores them in a separate array. + * Storing the hash chains inline is the faster method when max_search_depth + * (the maximum chain length) is very small. It is not appropriate when + * max_search_depth is larger, as then it uses too much memory. + * + * Due to its focus on speed, the ht_matchfinder doesn't support length 3 + * matches. It also doesn't allow max_search_depth to vary at runtime; it is + * fixed at build time as HT_MATCHFINDER_BUCKET_SIZE. + * + * See hc_matchfinder.h for more information. + */ + +#ifndef LIB_HT_MATCHFINDER_H +#define LIB_HT_MATCHFINDER_H + +#include "matchfinder_common.h" + +#define HT_MATCHFINDER_HASH_ORDER 15 +#define HT_MATCHFINDER_BUCKET_SIZE 2 + +#define HT_MATCHFINDER_MIN_MATCH_LEN 4 +/* Minimum value of max_len for ht_matchfinder_longest_match() */ +#define HT_MATCHFINDER_REQUIRED_NBYTES 5 + +struct MATCHFINDER_ALIGNED ht_matchfinder { + mf_pos_t hash_tab[1UL << HT_MATCHFINDER_HASH_ORDER] + [HT_MATCHFINDER_BUCKET_SIZE]; +}; + +static forceinline void +ht_matchfinder_init(struct ht_matchfinder *mf) +{ + STATIC_ASSERT(sizeof(*mf) % MATCHFINDER_SIZE_ALIGNMENT == 0); + + matchfinder_init((mf_pos_t *)mf, sizeof(*mf)); +} + +static forceinline void +ht_matchfinder_slide_window(struct ht_matchfinder *mf) +{ + matchfinder_rebase((mf_pos_t *)mf, sizeof(*mf)); +} + +/* Note: max_len must be >= HT_MATCHFINDER_REQUIRED_NBYTES */ +static forceinline u32 +ht_matchfinder_longest_match(struct ht_matchfinder * const mf, + const u8 ** const in_base_p, + const u8 * const in_next, + const u32 max_len, + const u32 nice_len, + u32 * const next_hash, + u32 * const offset_ret) +{ + u32 best_len = 0; + const u8 *best_matchptr = in_next; + u32 cur_pos = in_next - *in_base_p; + const u8 *in_base; + mf_pos_t cutoff; + u32 hash; + u32 seq; + mf_pos_t cur_node; + const u8 *matchptr; +#if HT_MATCHFINDER_BUCKET_SIZE > 1 + mf_pos_t to_insert; + u32 len; +#endif +#if HT_MATCHFINDER_BUCKET_SIZE > 2 + int i; +#endif + + /* This is assumed throughout this function. */ + STATIC_ASSERT(HT_MATCHFINDER_MIN_MATCH_LEN == 4); + + if (cur_pos == MATCHFINDER_WINDOW_SIZE) { + ht_matchfinder_slide_window(mf); + *in_base_p += MATCHFINDER_WINDOW_SIZE; + cur_pos = 0; + } + in_base = *in_base_p; + cutoff = cur_pos - MATCHFINDER_WINDOW_SIZE; + + hash = *next_hash; + STATIC_ASSERT(HT_MATCHFINDER_REQUIRED_NBYTES == 5); + *next_hash = lz_hash(get_unaligned_le32(in_next + 1), + HT_MATCHFINDER_HASH_ORDER); + seq = load_u32_unaligned(in_next); + prefetchw(&mf->hash_tab[*next_hash]); +#if HT_MATCHFINDER_BUCKET_SIZE == 1 + /* Hand-unrolled version for BUCKET_SIZE == 1 */ + cur_node = mf->hash_tab[hash][0]; + mf->hash_tab[hash][0] = cur_pos; + if (cur_node <= cutoff) + goto out; + matchptr = &in_base[cur_node]; + if (load_u32_unaligned(matchptr) == seq) { + best_len = lz_extend(in_next, matchptr, 4, max_len); + best_matchptr = matchptr; + } +#elif HT_MATCHFINDER_BUCKET_SIZE == 2 + /* + * Hand-unrolled version for BUCKET_SIZE == 2. The logic here also + * differs slightly in that it copies the first entry to the second even + * if nice_len is reached on the first, as this can be slightly faster. + */ + cur_node = mf->hash_tab[hash][0]; + mf->hash_tab[hash][0] = cur_pos; + if (cur_node <= cutoff) + goto out; + matchptr = &in_base[cur_node]; + + to_insert = cur_node; + cur_node = mf->hash_tab[hash][1]; + mf->hash_tab[hash][1] = to_insert; + + if (load_u32_unaligned(matchptr) == seq) { + best_len = lz_extend(in_next, matchptr, 4, max_len); + best_matchptr = matchptr; + if (cur_node <= cutoff || best_len >= nice_len) + goto out; + matchptr = &in_base[cur_node]; + if (load_u32_unaligned(matchptr) == seq && + load_u32_unaligned(matchptr + best_len - 3) == + load_u32_unaligned(in_next + best_len - 3)) { + len = lz_extend(in_next, matchptr, 4, max_len); + if (len > best_len) { + best_len = len; + best_matchptr = matchptr; + } + } + } else { + if (cur_node <= cutoff) + goto out; + matchptr = &in_base[cur_node]; + if (load_u32_unaligned(matchptr) == seq) { + best_len = lz_extend(in_next, matchptr, 4, max_len); + best_matchptr = matchptr; + } + } +#else + /* Generic version for HT_MATCHFINDER_BUCKET_SIZE > 2 */ + to_insert = cur_pos; + for (i = 0; i < HT_MATCHFINDER_BUCKET_SIZE; i++) { + cur_node = mf->hash_tab[hash][i]; + mf->hash_tab[hash][i] = to_insert; + if (cur_node <= cutoff) + goto out; + matchptr = &in_base[cur_node]; + if (load_u32_unaligned(matchptr) == seq) { + len = lz_extend(in_next, matchptr, 4, max_len); + if (len > best_len) { + best_len = len; + best_matchptr = matchptr; + if (best_len >= nice_len) + goto out; + } + } + to_insert = cur_node; + } +#endif +out: + *offset_ret = in_next - best_matchptr; + return best_len; +} + +static forceinline void +ht_matchfinder_skip_bytes(struct ht_matchfinder * const mf, + const u8 ** const in_base_p, + const u8 *in_next, + const u8 * const in_end, + const u32 count, + u32 * const next_hash) +{ + s32 cur_pos = in_next - *in_base_p; + u32 hash; + u32 remaining = count; + int i; + + if (unlikely(count + HT_MATCHFINDER_REQUIRED_NBYTES > in_end - in_next)) + return; + + if (cur_pos + count - 1 >= MATCHFINDER_WINDOW_SIZE) { + ht_matchfinder_slide_window(mf); + *in_base_p += MATCHFINDER_WINDOW_SIZE; + cur_pos -= MATCHFINDER_WINDOW_SIZE; + } + + hash = *next_hash; + do { + for (i = HT_MATCHFINDER_BUCKET_SIZE - 1; i > 0; i--) + mf->hash_tab[hash][i] = mf->hash_tab[hash][i - 1]; + mf->hash_tab[hash][0] = cur_pos; + + hash = lz_hash(get_unaligned_le32(++in_next), + HT_MATCHFINDER_HASH_ORDER); + cur_pos++; + } while (--remaining); + + prefetchw(&mf->hash_tab[hash]); + *next_hash = hash; +} + +#endif /* LIB_HT_MATCHFINDER_H */ diff --git a/tools/z64compress/src/enc/libdeflate/lib/lib_common.h b/tools/z64compress/src/enc/libdeflate/lib/lib_common.h new file mode 100644 index 000000000..6aad0feec --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/lib/lib_common.h @@ -0,0 +1,94 @@ +/* + * lib_common.h - internal header included by all library code + */ + +#ifndef LIB_LIB_COMMON_H +#define LIB_LIB_COMMON_H + +#include "../common_defs.h" + +#ifdef LIBDEFLATE_H + /* + * When building the library, LIBDEFLATEAPI needs to be defined properly before + * including libdeflate.h. + */ +# error "lib_common.h must always be included before libdeflate.h" +#endif + +#if defined(LIBDEFLATE_DLL) && (defined(_WIN32) || defined(__CYGWIN__)) +# define LIBDEFLATE_EXPORT_SYM __declspec(dllexport) +#elif defined(__GNUC__) +# define LIBDEFLATE_EXPORT_SYM __attribute__((visibility("default"))) +#else +# define LIBDEFLATE_EXPORT_SYM +#endif + +/* + * On i386, gcc assumes that the stack is 16-byte aligned at function entry. + * However, some compilers (e.g. MSVC) and programming languages (e.g. Delphi) + * only guarantee 4-byte alignment when calling functions. This is mainly an + * issue on Windows, but it has been seen on Linux too. Work around this ABI + * incompatibility by realigning the stack pointer when entering libdeflate. + * This prevents crashes in SSE/AVX code. + */ +#if defined(__GNUC__) && defined(__i386__) +# define LIBDEFLATE_ALIGN_STACK __attribute__((force_align_arg_pointer)) +#else +# define LIBDEFLATE_ALIGN_STACK +#endif + +#define LIBDEFLATEAPI LIBDEFLATE_EXPORT_SYM LIBDEFLATE_ALIGN_STACK + +void *libdeflate_malloc(size_t size); +void libdeflate_free(void *ptr); + +void *libdeflate_aligned_malloc(size_t alignment, size_t size); +void libdeflate_aligned_free(void *ptr); + +#ifdef FREESTANDING +/* + * With -ffreestanding, may be missing, and we must provide + * implementations of memset(), memcpy(), memmove(), and memcmp(). + * See https://gcc.gnu.org/onlinedocs/gcc/Standards.html + * + * Also, -ffreestanding disables interpreting calls to these functions as + * built-ins. E.g., calling memcpy(&v, p, WORDBYTES) will make a function call, + * not be optimized to a single load instruction. For performance reasons we + * don't want that. So, declare these functions as macros that expand to the + * corresponding built-ins. This approach is recommended in the gcc man page. + * We still need the actual function definitions in case gcc calls them. + */ +void *memset(void *s, int c, size_t n); +#define memset(s, c, n) __builtin_memset((s), (c), (n)) + +void *memcpy(void *dest, const void *src, size_t n); +#define memcpy(dest, src, n) __builtin_memcpy((dest), (src), (n)) + +void *memmove(void *dest, const void *src, size_t n); +#define memmove(dest, src, n) __builtin_memmove((dest), (src), (n)) + +int memcmp(const void *s1, const void *s2, size_t n); +#define memcmp(s1, s2, n) __builtin_memcmp((s1), (s2), (n)) + +#undef LIBDEFLATE_ENABLE_ASSERTIONS +#else +#include +#endif + +/* + * Runtime assertion support. Don't enable this in production builds; it may + * hurt performance significantly. + */ +#ifdef LIBDEFLATE_ENABLE_ASSERTIONS +void libdeflate_assertion_failed(const char *expr, const char *file, int line); +#define ASSERT(expr) { if (unlikely(!(expr))) \ + libdeflate_assertion_failed(#expr, __FILE__, __LINE__); } +#else +#define ASSERT(expr) (void)(expr) +#endif + +#define CONCAT_IMPL(a, b) a##b +#define CONCAT(a, b) CONCAT_IMPL(a, b) +#define ADD_SUFFIX(name) CONCAT(name, SUFFIX) + +#endif /* LIB_LIB_COMMON_H */ diff --git a/tools/z64compress/src/enc/libdeflate/lib/matchfinder_common.h b/tools/z64compress/src/enc/libdeflate/lib/matchfinder_common.h new file mode 100644 index 000000000..48a243e1d --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/lib/matchfinder_common.h @@ -0,0 +1,199 @@ +/* + * matchfinder_common.h - common code for Lempel-Ziv matchfinding + */ + +#ifndef LIB_MATCHFINDER_COMMON_H +#define LIB_MATCHFINDER_COMMON_H + +#include "lib_common.h" + +#ifndef MATCHFINDER_WINDOW_ORDER +# error "MATCHFINDER_WINDOW_ORDER must be defined!" +#endif + +/* + * Given a 32-bit value that was loaded with the platform's native endianness, + * return a 32-bit value whose high-order 8 bits are 0 and whose low-order 24 + * bits contain the first 3 bytes, arranged in octets in a platform-dependent + * order, at the memory location from which the input 32-bit value was loaded. + */ +static forceinline u32 +loaded_u32_to_u24(u32 v) +{ + if (CPU_IS_LITTLE_ENDIAN()) + return v & 0xFFFFFF; + else + return v >> 8; +} + +/* + * Load the next 3 bytes from @p into the 24 low-order bits of a 32-bit value. + * The order in which the 3 bytes will be arranged as octets in the 24 bits is + * platform-dependent. At least 4 bytes (not 3) must be available at @p. + */ +static forceinline u32 +load_u24_unaligned(const u8 *p) +{ +#if UNALIGNED_ACCESS_IS_FAST + return loaded_u32_to_u24(load_u32_unaligned(p)); +#else + if (CPU_IS_LITTLE_ENDIAN()) + return ((u32)p[0] << 0) | ((u32)p[1] << 8) | ((u32)p[2] << 16); + else + return ((u32)p[2] << 0) | ((u32)p[1] << 8) | ((u32)p[0] << 16); +#endif +} + +#define MATCHFINDER_WINDOW_SIZE (1UL << MATCHFINDER_WINDOW_ORDER) + +typedef s16 mf_pos_t; + +#define MATCHFINDER_INITVAL ((mf_pos_t)-MATCHFINDER_WINDOW_SIZE) + +/* + * Required alignment of the matchfinder buffer pointer and size. The values + * here come from the AVX-2 implementation, which is the worst case. + */ +#define MATCHFINDER_MEM_ALIGNMENT 32 +#define MATCHFINDER_SIZE_ALIGNMENT 128 + +#undef matchfinder_init +#undef matchfinder_rebase +#ifdef _aligned_attribute +# define MATCHFINDER_ALIGNED _aligned_attribute(MATCHFINDER_MEM_ALIGNMENT) +# if defined(ARCH_ARM32) || defined(ARCH_ARM64) +# include "arm/matchfinder_impl.h" +# elif defined(ARCH_X86_32) || defined(ARCH_X86_64) +# include "x86/matchfinder_impl.h" +# endif +#else +# define MATCHFINDER_ALIGNED +#endif + +/* + * Initialize the hash table portion of the matchfinder. + * + * Essentially, this is an optimized memset(). + * + * 'data' must be aligned to a MATCHFINDER_MEM_ALIGNMENT boundary, and + * 'size' must be a multiple of MATCHFINDER_SIZE_ALIGNMENT. + */ +#ifndef matchfinder_init +static forceinline void +matchfinder_init(mf_pos_t *data, size_t size) +{ + size_t num_entries = size / sizeof(*data); + size_t i; + + for (i = 0; i < num_entries; i++) + data[i] = MATCHFINDER_INITVAL; +} +#endif + +/* + * Slide the matchfinder by MATCHFINDER_WINDOW_SIZE bytes. + * + * This must be called just after each MATCHFINDER_WINDOW_SIZE bytes have been + * run through the matchfinder. + * + * This subtracts MATCHFINDER_WINDOW_SIZE bytes from each entry in the given + * array, making the entries be relative to the current position rather than the + * position MATCHFINDER_WINDOW_SIZE bytes prior. To avoid integer underflows, + * entries that would become less than -MATCHFINDER_WINDOW_SIZE stay at + * -MATCHFINDER_WINDOW_SIZE, keeping them permanently out of bounds. + * + * The given array must contain all matchfinder data that is position-relative: + * the hash table(s) as well as any hash chain or binary tree links. Its + * address must be aligned to a MATCHFINDER_MEM_ALIGNMENT boundary, and its size + * must be a multiple of MATCHFINDER_SIZE_ALIGNMENT. + */ +#ifndef matchfinder_rebase +static forceinline void +matchfinder_rebase(mf_pos_t *data, size_t size) +{ + size_t num_entries = size / sizeof(*data); + size_t i; + + if (MATCHFINDER_WINDOW_SIZE == 32768) { + /* + * Branchless version for 32768-byte windows. Clear all bits if + * the value was already negative, then set the sign bit. This + * is equivalent to subtracting 32768 with signed saturation. + */ + for (i = 0; i < num_entries; i++) + data[i] = 0x8000 | (data[i] & ~(data[i] >> 15)); + } else { + for (i = 0; i < num_entries; i++) { + if (data[i] >= 0) + data[i] -= (mf_pos_t)-MATCHFINDER_WINDOW_SIZE; + else + data[i] = (mf_pos_t)-MATCHFINDER_WINDOW_SIZE; + } + } +} +#endif + +/* + * The hash function: given a sequence prefix held in the low-order bits of a + * 32-bit value, multiply by a carefully-chosen large constant. Discard any + * bits of the product that don't fit in a 32-bit value, but take the + * next-highest @num_bits bits of the product as the hash value, as those have + * the most randomness. + */ +static forceinline u32 +lz_hash(u32 seq, unsigned num_bits) +{ + return (u32)(seq * 0x1E35A7BD) >> (32 - num_bits); +} + +/* + * Return the number of bytes at @matchptr that match the bytes at @strptr, up + * to a maximum of @max_len. Initially, @start_len bytes are matched. + */ +static forceinline unsigned +lz_extend(const u8 * const strptr, const u8 * const matchptr, + const unsigned start_len, const unsigned max_len) +{ + unsigned len = start_len; + machine_word_t v_word; + + if (UNALIGNED_ACCESS_IS_FAST) { + + if (likely(max_len - len >= 4 * WORDBYTES)) { + + #define COMPARE_WORD_STEP \ + v_word = load_word_unaligned(&matchptr[len]) ^ \ + load_word_unaligned(&strptr[len]); \ + if (v_word != 0) \ + goto word_differs; \ + len += WORDBYTES; \ + + COMPARE_WORD_STEP + COMPARE_WORD_STEP + COMPARE_WORD_STEP + COMPARE_WORD_STEP + #undef COMPARE_WORD_STEP + } + + while (len + WORDBYTES <= max_len) { + v_word = load_word_unaligned(&matchptr[len]) ^ + load_word_unaligned(&strptr[len]); + if (v_word != 0) + goto word_differs; + len += WORDBYTES; + } + } + + while (len < max_len && matchptr[len] == strptr[len]) + len++; + return len; + +word_differs: + if (CPU_IS_LITTLE_ENDIAN()) + len += (bsfw(v_word) >> 3); + else + len += (WORDBITS - 1 - bsrw(v_word)) >> 3; + return len; +} + +#endif /* LIB_MATCHFINDER_COMMON_H */ diff --git a/tools/z64compress/src/enc/libdeflate/lib/utils.c b/tools/z64compress/src/enc/libdeflate/lib/utils.c new file mode 100644 index 000000000..c8e5121e5 --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/lib/utils.c @@ -0,0 +1,153 @@ +/* + * utils.c - utility functions for libdeflate + * + * Copyright 2016 Eric Biggers + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "lib_common.h" + +#include "libdeflate.h" + +#ifdef FREESTANDING +# define malloc NULL +# define free NULL +#else +# include +#endif + +static void *(*libdeflate_malloc_func)(size_t) = malloc; +static void (*libdeflate_free_func)(void *) = free; + +void * +libdeflate_malloc(size_t size) +{ + return (*libdeflate_malloc_func)(size); +} + +void +libdeflate_free(void *ptr) +{ + (*libdeflate_free_func)(ptr); +} + +void * +libdeflate_aligned_malloc(size_t alignment, size_t size) +{ + void *ptr = libdeflate_malloc(sizeof(void *) + alignment - 1 + size); + if (ptr) { + void *orig_ptr = ptr; + ptr = (void *)ALIGN((uintptr_t)ptr + sizeof(void *), alignment); + ((void **)ptr)[-1] = orig_ptr; + } + return ptr; +} + +void +libdeflate_aligned_free(void *ptr) +{ + if (ptr) + libdeflate_free(((void **)ptr)[-1]); +} + +LIBDEFLATEAPI void +libdeflate_set_memory_allocator(void *(*malloc_func)(size_t), + void (*free_func)(void *)) +{ + libdeflate_malloc_func = malloc_func; + libdeflate_free_func = free_func; +} + +/* + * Implementations of libc functions for freestanding library builds. + * Normal library builds don't use these. Not optimized yet; usually the + * compiler expands these functions and doesn't actually call them anyway. + */ +#ifdef FREESTANDING +#undef memset +void * __attribute__((weak)) +memset(void *s, int c, size_t n) +{ + u8 *p = s; + size_t i; + + for (i = 0; i < n; i++) + p[i] = c; + return s; +} + +#undef memcpy +void * __attribute__((weak)) +memcpy(void *dest, const void *src, size_t n) +{ + u8 *d = dest; + const u8 *s = src; + size_t i; + + for (i = 0; i < n; i++) + d[i] = s[i]; + return dest; +} + +#undef memmove +void * __attribute__((weak)) +memmove(void *dest, const void *src, size_t n) +{ + u8 *d = dest; + const u8 *s = src; + size_t i; + + if (d <= s) + return memcpy(d, s, n); + + for (i = n; i > 0; i--) + d[i - 1] = s[i - 1]; + return dest; +} + +#undef memcmp +int __attribute__((weak)) +memcmp(const void *s1, const void *s2, size_t n) +{ + const u8 *p1 = s1; + const u8 *p2 = s2; + size_t i; + + for (i = 0; i < n; i++) { + if (p1[i] != p2[i]) + return (int)p1[i] - (int)p2[i]; + } + return 0; +} +#endif /* FREESTANDING */ + +#ifdef LIBDEFLATE_ENABLE_ASSERTIONS +#include +#include +void +libdeflate_assertion_failed(const char *expr, const char *file, int line) +{ + fprintf(stderr, "Assertion failed: %s at %s:%d\n", expr, file, line); + abort(); +} +#endif /* LIBDEFLATE_ENABLE_ASSERTIONS */ diff --git a/tools/z64compress/src/enc/libdeflate/lib/x86/adler32_impl.h b/tools/z64compress/src/enc/libdeflate/lib/x86/adler32_impl.h new file mode 100644 index 000000000..6285dc80a --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/lib/x86/adler32_impl.h @@ -0,0 +1,287 @@ +/* + * x86/adler32_impl.h - x86 implementations of Adler-32 checksum algorithm + * + * Copyright 2016 Eric Biggers + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef LIB_X86_ADLER32_IMPL_H +#define LIB_X86_ADLER32_IMPL_H + +#include "cpu_features.h" + +/* + * The following macros horizontally sum the s1 counters and add them to the + * real s1, and likewise for s2. They do this via a series of reductions, each + * of which halves the vector length, until just one counter remains. + * + * The s1 reductions don't depend on the s2 reductions and vice versa, so for + * efficiency they are interleaved. Also, every other s1 counter is 0 due to + * the 'psadbw' instruction (_mm_sad_epu8) summing groups of 8 bytes rather than + * 4; hence, one of the s1 reductions is skipped when going from 128 => 32 bits. + */ + +#define ADLER32_FINISH_VEC_CHUNK_128(s1, s2, v_s1, v_s2) \ +{ \ + __m128i /* __v4su */ s1_last = (v_s1), s2_last = (v_s2); \ + \ + /* 128 => 32 bits */ \ + s2_last = _mm_add_epi32(s2_last, _mm_shuffle_epi32(s2_last, 0x31)); \ + s1_last = _mm_add_epi32(s1_last, _mm_shuffle_epi32(s1_last, 0x02)); \ + s2_last = _mm_add_epi32(s2_last, _mm_shuffle_epi32(s2_last, 0x02)); \ + \ + *(s1) += (u32)_mm_cvtsi128_si32(s1_last); \ + *(s2) += (u32)_mm_cvtsi128_si32(s2_last); \ +} + +#define ADLER32_FINISH_VEC_CHUNK_256(s1, s2, v_s1, v_s2) \ +{ \ + __m128i /* __v4su */ s1_128bit, s2_128bit; \ + \ + /* 256 => 128 bits */ \ + s1_128bit = _mm_add_epi32(_mm256_extracti128_si256((v_s1), 0), \ + _mm256_extracti128_si256((v_s1), 1)); \ + s2_128bit = _mm_add_epi32(_mm256_extracti128_si256((v_s2), 0), \ + _mm256_extracti128_si256((v_s2), 1)); \ + \ + ADLER32_FINISH_VEC_CHUNK_128((s1), (s2), s1_128bit, s2_128bit); \ +} + +/* + * This is a very silly partial workaround for gcc bug + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107892. The bug causes gcc to + * generate extra move instructions in some loops containing vector intrinsics. + * + * An alternate workaround would be to use gcc native vector operations instead + * of vector intrinsics. But that would result in MSVC needing its own code. + */ +#if GCC_PREREQ(1, 0) +# define GCC_UPDATE_VARS(a, b, c, d, e, f) \ + __asm__("" : "+x" (a), "+x" (b), "+x" (c), "+x" (d), "+x" (e), "+x" (f)) +#else +# define GCC_UPDATE_VARS(a, b, c, d, e, f) \ + (void)a, (void)b, (void)c, (void)d, (void)e, (void)f +#endif + +/* SSE2 implementation */ +#if HAVE_SSE2_INTRIN +# define adler32_sse2 adler32_sse2 +# define FUNCNAME adler32_sse2 +# define FUNCNAME_CHUNK adler32_sse2_chunk +# define IMPL_ALIGNMENT 16 +# define IMPL_SEGMENT_LEN 32 +/* + * The 16-bit precision byte counters must not be allowed to undergo *signed* + * overflow, otherwise the signed multiplications at the end (_mm_madd_epi16) + * would behave incorrectly. + */ +# define IMPL_MAX_CHUNK_LEN (32 * (0x7FFF / 0xFF)) +# if HAVE_SSE2_NATIVE +# define ATTRIBUTES +# else +# define ATTRIBUTES _target_attribute("sse2") +# endif +# include +static forceinline ATTRIBUTES void +adler32_sse2_chunk(const __m128i *p, const __m128i *const end, u32 *s1, u32 *s2) +{ + const __m128i zeroes = _mm_setzero_si128(); + const __m128i /* __v8hu */ mults_a = + _mm_setr_epi16(32, 31, 30, 29, 28, 27, 26, 25); + const __m128i /* __v8hu */ mults_b = + _mm_setr_epi16(24, 23, 22, 21, 20, 19, 18, 17); + const __m128i /* __v8hu */ mults_c = + _mm_setr_epi16(16, 15, 14, 13, 12, 11, 10, 9); + const __m128i /* __v8hu */ mults_d = + _mm_setr_epi16(8, 7, 6, 5, 4, 3, 2, 1); + + /* s1 counters: 32-bit, sum of bytes */ + __m128i /* __v4su */ v_s1 = zeroes; + + /* s2 counters: 32-bit, sum of s1 values */ + __m128i /* __v4su */ v_s2 = zeroes; + + /* + * Thirty-two 16-bit counters for byte sums. Each accumulates the bytes + * that eventually need to be multiplied by a number 32...1 for addition + * into s2. + */ + __m128i /* __v8hu */ v_byte_sums_a = zeroes; + __m128i /* __v8hu */ v_byte_sums_b = zeroes; + __m128i /* __v8hu */ v_byte_sums_c = zeroes; + __m128i /* __v8hu */ v_byte_sums_d = zeroes; + + do { + /* Load the next 32 bytes. */ + const __m128i bytes1 = *p++; + const __m128i bytes2 = *p++; + + /* + * Accumulate the previous s1 counters into the s2 counters. + * Logically, this really should be v_s2 += v_s1 * 32, but we + * can do the multiplication (or left shift) later. + */ + v_s2 = _mm_add_epi32(v_s2, v_s1); + + /* + * s1 update: use "Packed Sum of Absolute Differences" to add + * the bytes horizontally with 8 bytes per sum. Then add the + * sums to the s1 counters. + */ + v_s1 = _mm_add_epi32(v_s1, _mm_sad_epu8(bytes1, zeroes)); + v_s1 = _mm_add_epi32(v_s1, _mm_sad_epu8(bytes2, zeroes)); + + /* + * Also accumulate the bytes into 32 separate counters that have + * 16-bit precision. + */ + v_byte_sums_a = _mm_add_epi16( + v_byte_sums_a, _mm_unpacklo_epi8(bytes1, zeroes)); + v_byte_sums_b = _mm_add_epi16( + v_byte_sums_b, _mm_unpackhi_epi8(bytes1, zeroes)); + v_byte_sums_c = _mm_add_epi16( + v_byte_sums_c, _mm_unpacklo_epi8(bytes2, zeroes)); + v_byte_sums_d = _mm_add_epi16( + v_byte_sums_d, _mm_unpackhi_epi8(bytes2, zeroes)); + + GCC_UPDATE_VARS(v_s1, v_s2, v_byte_sums_a, v_byte_sums_b, + v_byte_sums_c, v_byte_sums_d); + } while (p != end); + + /* Finish calculating the s2 counters. */ + v_s2 = _mm_slli_epi32(v_s2, 5); + v_s2 = _mm_add_epi32(v_s2, _mm_madd_epi16(v_byte_sums_a, mults_a)); + v_s2 = _mm_add_epi32(v_s2, _mm_madd_epi16(v_byte_sums_b, mults_b)); + v_s2 = _mm_add_epi32(v_s2, _mm_madd_epi16(v_byte_sums_c, mults_c)); + v_s2 = _mm_add_epi32(v_s2, _mm_madd_epi16(v_byte_sums_d, mults_d)); + + /* Add the counters to the real s1 and s2. */ + ADLER32_FINISH_VEC_CHUNK_128(s1, s2, v_s1, v_s2); +} +# include "../adler32_vec_template.h" +#endif /* HAVE_SSE2_INTRIN */ + +/* + * AVX2 implementation. Basically the same as the SSE2 one, but with the vector + * width doubled. + */ +#if HAVE_AVX2_INTRIN +# define adler32_avx2 adler32_avx2 +# define FUNCNAME adler32_avx2 +# define FUNCNAME_CHUNK adler32_avx2_chunk +# define IMPL_ALIGNMENT 32 +# define IMPL_SEGMENT_LEN 64 +# define IMPL_MAX_CHUNK_LEN (64 * (0x7FFF / 0xFF)) +# if HAVE_AVX2_NATIVE +# define ATTRIBUTES +# else +# define ATTRIBUTES _target_attribute("avx2") +# endif +# include + /* + * With clang in MSVC compatibility mode, immintrin.h incorrectly skips + * including some sub-headers. + */ +# if defined(__clang__) && defined(_MSC_VER) +# include +# include +# endif +static forceinline ATTRIBUTES void +adler32_avx2_chunk(const __m256i *p, const __m256i *const end, u32 *s1, u32 *s2) +{ + const __m256i zeroes = _mm256_setzero_si256(); + /* + * Note, the multipliers have to be in this order because + * _mm256_unpack{lo,hi}_epi8 work on each 128-bit lane separately. + */ + const __m256i /* __v16hu */ mults_a = + _mm256_setr_epi16(64, 63, 62, 61, 60, 59, 58, 57, + 48, 47, 46, 45, 44, 43, 42, 41); + const __m256i /* __v16hu */ mults_b = + _mm256_setr_epi16(56, 55, 54, 53, 52, 51, 50, 49, + 40, 39, 38, 37, 36, 35, 34, 33); + const __m256i /* __v16hu */ mults_c = + _mm256_setr_epi16(32, 31, 30, 29, 28, 27, 26, 25, + 16, 15, 14, 13, 12, 11, 10, 9); + const __m256i /* __v16hu */ mults_d = + _mm256_setr_epi16(24, 23, 22, 21, 20, 19, 18, 17, + 8, 7, 6, 5, 4, 3, 2, 1); + __m256i /* __v8su */ v_s1 = zeroes; + __m256i /* __v8su */ v_s2 = zeroes; + __m256i /* __v16hu */ v_byte_sums_a = zeroes; + __m256i /* __v16hu */ v_byte_sums_b = zeroes; + __m256i /* __v16hu */ v_byte_sums_c = zeroes; + __m256i /* __v16hu */ v_byte_sums_d = zeroes; + + do { + const __m256i bytes1 = *p++; + const __m256i bytes2 = *p++; + + v_s2 = _mm256_add_epi32(v_s2, v_s1); + v_s1 = _mm256_add_epi32(v_s1, _mm256_sad_epu8(bytes1, zeroes)); + v_s1 = _mm256_add_epi32(v_s1, _mm256_sad_epu8(bytes2, zeroes)); + v_byte_sums_a = _mm256_add_epi16( + v_byte_sums_a, _mm256_unpacklo_epi8(bytes1, zeroes)); + v_byte_sums_b = _mm256_add_epi16( + v_byte_sums_b, _mm256_unpackhi_epi8(bytes1, zeroes)); + v_byte_sums_c = _mm256_add_epi16( + v_byte_sums_c, _mm256_unpacklo_epi8(bytes2, zeroes)); + v_byte_sums_d = _mm256_add_epi16( + v_byte_sums_d, _mm256_unpackhi_epi8(bytes2, zeroes)); + + GCC_UPDATE_VARS(v_s1, v_s2, v_byte_sums_a, v_byte_sums_b, + v_byte_sums_c, v_byte_sums_d); + } while (p != end); + + v_s2 = _mm256_slli_epi32(v_s2, 6); + v_s2 = _mm256_add_epi32(v_s2, _mm256_madd_epi16(v_byte_sums_a, mults_a)); + v_s2 = _mm256_add_epi32(v_s2, _mm256_madd_epi16(v_byte_sums_b, mults_b)); + v_s2 = _mm256_add_epi32(v_s2, _mm256_madd_epi16(v_byte_sums_c, mults_c)); + v_s2 = _mm256_add_epi32(v_s2, _mm256_madd_epi16(v_byte_sums_d, mults_d)); + ADLER32_FINISH_VEC_CHUNK_256(s1, s2, v_s1, v_s2); +} +# include "../adler32_vec_template.h" +#endif /* HAVE_AVX2_INTRIN */ + +#if defined(adler32_avx2) && HAVE_AVX2_NATIVE +#define DEFAULT_IMPL adler32_avx2 +#else +static inline adler32_func_t +arch_select_adler32_func(void) +{ + const u32 features MAYBE_UNUSED = get_x86_cpu_features(); + +#ifdef adler32_avx2 + if (HAVE_AVX2(features)) + return adler32_avx2; +#endif +#ifdef adler32_sse2 + if (HAVE_SSE2(features)) + return adler32_sse2; +#endif + return NULL; +} +#define arch_select_adler32_func arch_select_adler32_func +#endif + +#endif /* LIB_X86_ADLER32_IMPL_H */ diff --git a/tools/z64compress/src/enc/libdeflate/lib/x86/cpu_features.c b/tools/z64compress/src/enc/libdeflate/lib/x86/cpu_features.c new file mode 100644 index 000000000..958777ebd --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/lib/x86/cpu_features.c @@ -0,0 +1,151 @@ +/* + * x86/cpu_features.c - feature detection for x86 CPUs + * + * Copyright 2016 Eric Biggers + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "../cpu_features_common.h" /* must be included first */ +#include "cpu_features.h" + +#if HAVE_DYNAMIC_X86_CPU_FEATURES + +/* With old GCC versions we have to manually save and restore the x86_32 PIC + * register (ebx). See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=47602 */ +#if defined(ARCH_X86_32) && defined(__PIC__) +# define EBX_CONSTRAINT "=&r" +#else +# define EBX_CONSTRAINT "=b" +#endif + +/* Execute the CPUID instruction. */ +static inline void +cpuid(u32 leaf, u32 subleaf, u32 *a, u32 *b, u32 *c, u32 *d) +{ +#ifdef _MSC_VER + int result[4]; + + __cpuidex(result, leaf, subleaf); + *a = result[0]; + *b = result[1]; + *c = result[2]; + *d = result[3]; +#else + __asm__(".ifnc %%ebx, %1; mov %%ebx, %1; .endif\n" + "cpuid \n" + ".ifnc %%ebx, %1; xchg %%ebx, %1; .endif\n" + : "=a" (*a), EBX_CONSTRAINT (*b), "=c" (*c), "=d" (*d) + : "a" (leaf), "c" (subleaf)); +#endif +} + +/* Read an extended control register. */ +static inline u64 +read_xcr(u32 index) +{ +#ifdef _MSC_VER + return _xgetbv(index); +#else + u32 edx, eax; + + /* Execute the "xgetbv" instruction. Old versions of binutils do not + * recognize this instruction, so list the raw bytes instead. */ + __asm__ (".byte 0x0f, 0x01, 0xd0" : "=d" (edx), "=a" (eax) : "c" (index)); + + return ((u64)edx << 32) | eax; +#endif +} + +#undef BIT +#define BIT(nr) (1UL << (nr)) + +#define XCR0_BIT_SSE BIT(1) +#define XCR0_BIT_AVX BIT(2) + +#define IS_SET(reg, nr) ((reg) & BIT(nr)) +#define IS_ALL_SET(reg, mask) (((reg) & (mask)) == (mask)) + +static const struct cpu_feature x86_cpu_feature_table[] = { + {X86_CPU_FEATURE_SSE2, "sse2"}, + {X86_CPU_FEATURE_PCLMUL, "pclmul"}, + {X86_CPU_FEATURE_AVX, "avx"}, + {X86_CPU_FEATURE_AVX2, "avx2"}, + {X86_CPU_FEATURE_BMI2, "bmi2"}, +}; + +volatile u32 libdeflate_x86_cpu_features = 0; + +/* Initialize libdeflate_x86_cpu_features. */ +void libdeflate_init_x86_cpu_features(void) +{ + u32 features = 0; + u32 dummy1, dummy2, dummy3, dummy4; + u32 max_function; + u32 features_1, features_2, features_3, features_4; + bool os_avx_support = false; + + /* Get maximum supported function */ + cpuid(0, 0, &max_function, &dummy2, &dummy3, &dummy4); + if (max_function < 1) + goto out; + + /* Standard feature flags */ + cpuid(1, 0, &dummy1, &dummy2, &features_2, &features_1); + + if (IS_SET(features_1, 26)) + features |= X86_CPU_FEATURE_SSE2; + + if (IS_SET(features_2, 1)) + features |= X86_CPU_FEATURE_PCLMUL; + + if (IS_SET(features_2, 27)) { /* OSXSAVE set? */ + u64 xcr0 = read_xcr(0); + + os_avx_support = IS_ALL_SET(xcr0, + XCR0_BIT_SSE | + XCR0_BIT_AVX); + } + + if (os_avx_support && IS_SET(features_2, 28)) + features |= X86_CPU_FEATURE_AVX; + + if (max_function < 7) + goto out; + + /* Extended feature flags */ + cpuid(7, 0, &dummy1, &features_3, &features_4, &dummy4); + + if (os_avx_support && IS_SET(features_3, 5)) + features |= X86_CPU_FEATURE_AVX2; + + if (IS_SET(features_3, 8)) + features |= X86_CPU_FEATURE_BMI2; + +out: + disable_cpu_features_for_testing(&features, x86_cpu_feature_table, + ARRAY_LEN(x86_cpu_feature_table)); + + libdeflate_x86_cpu_features = features | X86_CPU_FEATURES_KNOWN; +} + +#endif /* HAVE_DYNAMIC_X86_CPU_FEATURES */ diff --git a/tools/z64compress/src/enc/libdeflate/lib/x86/cpu_features.h b/tools/z64compress/src/enc/libdeflate/lib/x86/cpu_features.h new file mode 100644 index 000000000..561bd567f --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/lib/x86/cpu_features.h @@ -0,0 +1,155 @@ +/* + * x86/cpu_features.h - feature detection for x86 CPUs + * + * Copyright 2016 Eric Biggers + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef LIB_X86_CPU_FEATURES_H +#define LIB_X86_CPU_FEATURES_H + +#include "../lib_common.h" + +#define HAVE_DYNAMIC_X86_CPU_FEATURES 0 + +#if defined(ARCH_X86_32) || defined(ARCH_X86_64) + +#if COMPILER_SUPPORTS_TARGET_FUNCTION_ATTRIBUTE || defined(_MSC_VER) +# undef HAVE_DYNAMIC_X86_CPU_FEATURES +# define HAVE_DYNAMIC_X86_CPU_FEATURES 1 +#endif + +#define X86_CPU_FEATURE_SSE2 0x00000001 +#define X86_CPU_FEATURE_PCLMUL 0x00000002 +#define X86_CPU_FEATURE_AVX 0x00000004 +#define X86_CPU_FEATURE_AVX2 0x00000008 +#define X86_CPU_FEATURE_BMI2 0x00000010 + +#define HAVE_SSE2(features) (HAVE_SSE2_NATIVE || ((features) & X86_CPU_FEATURE_SSE2)) +#define HAVE_PCLMUL(features) (HAVE_PCLMUL_NATIVE || ((features) & X86_CPU_FEATURE_PCLMUL)) +#define HAVE_AVX(features) (HAVE_AVX_NATIVE || ((features) & X86_CPU_FEATURE_AVX)) +#define HAVE_AVX2(features) (HAVE_AVX2_NATIVE || ((features) & X86_CPU_FEATURE_AVX2)) +#define HAVE_BMI2(features) (HAVE_BMI2_NATIVE || ((features) & X86_CPU_FEATURE_BMI2)) + +#if HAVE_DYNAMIC_X86_CPU_FEATURES +#define X86_CPU_FEATURES_KNOWN 0x80000000 +extern volatile u32 libdeflate_x86_cpu_features; + +void libdeflate_init_x86_cpu_features(void); + +static inline u32 get_x86_cpu_features(void) +{ + if (libdeflate_x86_cpu_features == 0) + libdeflate_init_x86_cpu_features(); + return libdeflate_x86_cpu_features; +} +#else /* HAVE_DYNAMIC_X86_CPU_FEATURES */ +static inline u32 get_x86_cpu_features(void) { return 0; } +#endif /* !HAVE_DYNAMIC_X86_CPU_FEATURES */ + +/* + * Prior to gcc 4.9 (r200349) and clang 3.8 (r239883), x86 intrinsics not + * available in the main target couldn't be used in 'target' attribute + * functions. Unfortunately clang has no feature test macro for this, so we + * have to check its version. + */ +#if HAVE_DYNAMIC_X86_CPU_FEATURES && \ + (GCC_PREREQ(4, 9) || CLANG_PREREQ(3, 8, 7030000) || defined(_MSC_VER)) +# define HAVE_TARGET_INTRINSICS 1 +#else +# define HAVE_TARGET_INTRINSICS 0 +#endif + +/* SSE2 */ +#if defined(__SSE2__) || \ + (defined(_MSC_VER) && \ + (defined(ARCH_X86_64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 2))) +# define HAVE_SSE2_NATIVE 1 +#else +# define HAVE_SSE2_NATIVE 0 +#endif +#define HAVE_SSE2_INTRIN (HAVE_SSE2_NATIVE || HAVE_TARGET_INTRINSICS) + +/* PCLMUL */ +#if defined(__PCLMUL__) || (defined(_MSC_VER) && defined(__AVX2__)) +# define HAVE_PCLMUL_NATIVE 1 +#else +# define HAVE_PCLMUL_NATIVE 0 +#endif +#if HAVE_PCLMUL_NATIVE || (HAVE_TARGET_INTRINSICS && \ + (GCC_PREREQ(4, 4) || \ + __has_builtin(__builtin_ia32_pclmulqdq128) || \ + defined(_MSC_VER))) +# define HAVE_PCLMUL_INTRIN 1 +#else +# define HAVE_PCLMUL_INTRIN 0 +#endif + +/* AVX */ +#ifdef __AVX__ +# define HAVE_AVX_NATIVE 1 +#else +# define HAVE_AVX_NATIVE 0 +#endif +#if HAVE_AVX_NATIVE || (HAVE_TARGET_INTRINSICS && \ + (GCC_PREREQ(4, 6) || \ + __has_builtin(__builtin_ia32_maxps256) || \ + defined(_MSC_VER))) +# define HAVE_AVX_INTRIN 1 +#else +# define HAVE_AVX_INTRIN 0 +#endif + +/* AVX2 */ +#ifdef __AVX2__ +# define HAVE_AVX2_NATIVE 1 +#else +# define HAVE_AVX2_NATIVE 0 +#endif +#if HAVE_AVX2_NATIVE || (HAVE_TARGET_INTRINSICS && \ + (GCC_PREREQ(4, 7) || \ + __has_builtin(__builtin_ia32_psadbw256) || \ + defined(_MSC_VER))) +# define HAVE_AVX2_INTRIN 1 +#else +# define HAVE_AVX2_INTRIN 0 +#endif + +/* BMI2 */ +#if defined(__BMI2__) || (defined(_MSC_VER) && defined(__AVX2__)) +# define HAVE_BMI2_NATIVE 1 +#else +# define HAVE_BMI2_NATIVE 0 +#endif +#if HAVE_BMI2_NATIVE || (HAVE_TARGET_INTRINSICS && \ + (GCC_PREREQ(4, 7) || \ + __has_builtin(__builtin_ia32_pdep_di) || \ + defined(_MSC_VER))) +# define HAVE_BMI2_INTRIN 1 +#else +# define HAVE_BMI2_INTRIN 0 +#endif + +#endif /* ARCH_X86_32 || ARCH_X86_64 */ + +#endif /* LIB_X86_CPU_FEATURES_H */ diff --git a/tools/z64compress/src/enc/libdeflate/lib/x86/crc32_impl.h b/tools/z64compress/src/enc/libdeflate/lib/x86/crc32_impl.h new file mode 100644 index 000000000..79cc7944e --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/lib/x86/crc32_impl.h @@ -0,0 +1,96 @@ +/* + * x86/crc32_impl.h - x86 implementations of the gzip CRC-32 algorithm + * + * Copyright 2016 Eric Biggers + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef LIB_X86_CRC32_IMPL_H +#define LIB_X86_CRC32_IMPL_H + +#include "cpu_features.h" + +/* PCLMUL implementation */ +#if HAVE_PCLMUL_INTRIN +# define crc32_x86_pclmul crc32_x86_pclmul +# define SUFFIX _pclmul +# if HAVE_PCLMUL_NATIVE +# define ATTRIBUTES +# else +# define ATTRIBUTES _target_attribute("pclmul") +# endif +# define FOLD_PARTIAL_VECS 0 +# include "crc32_pclmul_template.h" +#endif + +/* + * PCLMUL/AVX implementation. This implementation has two benefits over the + * regular PCLMUL one. First, simply compiling against the AVX target can + * improve performance significantly (e.g. 10100 MB/s to 16700 MB/s on Skylake) + * without actually using any AVX intrinsics, probably due to the availability + * of non-destructive VEX-encoded instructions. Second, AVX support implies + * SSSE3 and SSE4.1 support, and we can use SSSE3 and SSE4.1 intrinsics for + * efficient handling of partial blocks. (We *could* compile a variant with + * PCLMUL+SSSE3+SSE4.1 w/o AVX, but for simplicity we don't currently bother.) + * + * FIXME: with MSVC, this isn't actually compiled with AVX code generation + * enabled yet. That would require that this be moved to its own .c file. + */ +#if HAVE_PCLMUL_INTRIN && HAVE_AVX_INTRIN +# define crc32_x86_pclmul_avx crc32_x86_pclmul_avx +# define SUFFIX _pclmul_avx +# if HAVE_PCLMUL_NATIVE && HAVE_AVX_NATIVE +# define ATTRIBUTES +# else +# define ATTRIBUTES _target_attribute("pclmul,avx") +# endif +# define FOLD_PARTIAL_VECS 1 +# include "crc32_pclmul_template.h" +#endif + +/* + * If the best implementation is statically available, use it unconditionally. + * Otherwise choose the best implementation at runtime. + */ +#if defined(crc32_x86_pclmul_avx) && HAVE_PCLMUL_NATIVE && HAVE_AVX_NATIVE +#define DEFAULT_IMPL crc32_x86_pclmul_avx +#else +static inline crc32_func_t +arch_select_crc32_func(void) +{ + const u32 features MAYBE_UNUSED = get_x86_cpu_features(); + +#ifdef crc32_x86_pclmul_avx + if (HAVE_PCLMUL(features) && HAVE_AVX(features)) + return crc32_x86_pclmul_avx; +#endif +#ifdef crc32_x86_pclmul + if (HAVE_PCLMUL(features)) + return crc32_x86_pclmul; +#endif + return NULL; +} +#define arch_select_crc32_func arch_select_crc32_func +#endif + +#endif /* LIB_X86_CRC32_IMPL_H */ diff --git a/tools/z64compress/src/enc/libdeflate/lib/x86/crc32_pclmul_template.h b/tools/z64compress/src/enc/libdeflate/lib/x86/crc32_pclmul_template.h new file mode 100644 index 000000000..1d5782375 --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/lib/x86/crc32_pclmul_template.h @@ -0,0 +1,354 @@ +/* + * x86/crc32_pclmul_template.h - gzip CRC-32 with PCLMULQDQ instructions + * + * Copyright 2016 Eric Biggers + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * This file is a "template" for instantiating PCLMULQDQ-based crc32_x86 + * functions. The "parameters" are: + * + * SUFFIX: + * Name suffix to append to all instantiated functions. + * ATTRIBUTES: + * Target function attributes to use. + * FOLD_PARTIAL_VECS: + * Use vector instructions to handle any partial blocks at the beginning + * and end, instead of falling back to scalar instructions for those parts. + * Requires SSSE3 and SSE4.1 intrinsics. + * + * The overall algorithm used is CRC folding with carryless multiplication + * instructions. Note that the x86 crc32 instruction cannot be used, as it is + * for a different polynomial, not the gzip one. For an explanation of CRC + * folding with carryless multiplication instructions, see + * scripts/gen_crc32_multipliers.c and the following paper: + * + * "Fast CRC Computation for Generic Polynomials Using PCLMULQDQ Instruction" + * https://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf + */ + +#include +/* + * With clang in MSVC compatibility mode, immintrin.h incorrectly skips + * including some sub-headers. + */ +#if defined(__clang__) && defined(_MSC_VER) +# include +# include +# include +#endif + +#undef fold_vec +static forceinline ATTRIBUTES __m128i +ADD_SUFFIX(fold_vec)(__m128i src, __m128i dst, __m128i /* __v2di */ multipliers) +{ + /* + * The immediate constant for PCLMULQDQ specifies which 64-bit halves of + * the 128-bit vectors to multiply: + * + * 0x00 means low halves (higher degree polynomial terms for us) + * 0x11 means high halves (lower degree polynomial terms for us) + */ + dst = _mm_xor_si128(dst, _mm_clmulepi64_si128(src, multipliers, 0x00)); + dst = _mm_xor_si128(dst, _mm_clmulepi64_si128(src, multipliers, 0x11)); + return dst; +} +#define fold_vec ADD_SUFFIX(fold_vec) + +#if FOLD_PARTIAL_VECS +/* + * Given v containing a 16-byte polynomial, and a pointer 'p' that points to the + * next '1 <= len <= 15' data bytes, rearrange the concatenation of v and the + * data into vectors x0 and x1 that contain 'len' bytes and 16 bytes, + * respectively. Then fold x0 into x1 and return the result. Assumes that + * 'p + len - 16' is in-bounds. + */ +#undef fold_partial_vec +static forceinline ATTRIBUTES __m128i +ADD_SUFFIX(fold_partial_vec)(__m128i v, const u8 *p, size_t len, + __m128i /* __v2du */ multipliers_1) +{ + /* + * pshufb(v, shift_tab[len..len+15]) left shifts v by 16-len bytes. + * pshufb(v, shift_tab[len+16..len+31]) right shifts v by len bytes. + */ + static const u8 shift_tab[48] = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + }; + __m128i lshift = _mm_loadu_si128((const void *)&shift_tab[len]); + __m128i rshift = _mm_loadu_si128((const void *)&shift_tab[len + 16]); + __m128i x0, x1; + + /* x0 = v left-shifted by '16 - len' bytes */ + x0 = _mm_shuffle_epi8(v, lshift); + + /* + * x1 = the last '16 - len' bytes from v (i.e. v right-shifted by 'len' + * bytes) followed by the remaining data. + */ + x1 = _mm_blendv_epi8(_mm_shuffle_epi8(v, rshift), + _mm_loadu_si128((const void *)(p + len - 16)), + /* msb 0/1 of each byte selects byte from arg1/2 */ + rshift); + + return fold_vec(x0, x1, multipliers_1); +} +#define fold_partial_vec ADD_SUFFIX(fold_partial_vec) +#endif /* FOLD_PARTIAL_VECS */ + +static u32 ATTRIBUTES MAYBE_UNUSED +ADD_SUFFIX(crc32_x86)(u32 crc, const u8 *p, size_t len) +{ + const __m128i /* __v2du */ multipliers_8 = + _mm_set_epi64x(CRC32_8VECS_MULT_2, CRC32_8VECS_MULT_1); + const __m128i /* __v2du */ multipliers_4 = + _mm_set_epi64x(CRC32_4VECS_MULT_2, CRC32_4VECS_MULT_1); + const __m128i /* __v2du */ multipliers_2 = + _mm_set_epi64x(CRC32_2VECS_MULT_2, CRC32_2VECS_MULT_1); + const __m128i /* __v2du */ multipliers_1 = + _mm_set_epi64x(CRC32_1VECS_MULT_2, CRC32_1VECS_MULT_1); + const __m128i /* __v2du */ final_multiplier = + _mm_set_epi64x(0, CRC32_FINAL_MULT); + const __m128i mask32 = _mm_set_epi32(0, 0, 0, 0xFFFFFFFF); + const __m128i /* __v2du */ barrett_reduction_constants = + _mm_set_epi64x(CRC32_BARRETT_CONSTANT_2, + CRC32_BARRETT_CONSTANT_1); + __m128i v0, v1, v2, v3, v4, v5, v6, v7; + + /* + * There are two overall code paths. The first path supports all + * lengths, but is intended for short lengths; it uses unaligned loads + * and does at most 4-way folds. The second path only supports longer + * lengths, aligns the pointer in order to do aligned loads, and does up + * to 8-way folds. The length check below decides which path to take. + */ + if (len < 1024) { + if (len < 16) + return crc32_slice1(crc, p, len); + + v0 = _mm_xor_si128(_mm_loadu_si128((const void *)p), + _mm_cvtsi32_si128(crc)); + p += 16; + + if (len >= 64) { + v1 = _mm_loadu_si128((const void *)(p + 0)); + v2 = _mm_loadu_si128((const void *)(p + 16)); + v3 = _mm_loadu_si128((const void *)(p + 32)); + p += 48; + while (len >= 64 + 64) { + v0 = fold_vec(v0, _mm_loadu_si128((const void *)(p + 0)), + multipliers_4); + v1 = fold_vec(v1, _mm_loadu_si128((const void *)(p + 16)), + multipliers_4); + v2 = fold_vec(v2, _mm_loadu_si128((const void *)(p + 32)), + multipliers_4); + v3 = fold_vec(v3, _mm_loadu_si128((const void *)(p + 48)), + multipliers_4); + p += 64; + len -= 64; + } + v0 = fold_vec(v0, v2, multipliers_2); + v1 = fold_vec(v1, v3, multipliers_2); + if (len & 32) { + v0 = fold_vec(v0, _mm_loadu_si128((const void *)(p + 0)), + multipliers_2); + v1 = fold_vec(v1, _mm_loadu_si128((const void *)(p + 16)), + multipliers_2); + p += 32; + } + v0 = fold_vec(v0, v1, multipliers_1); + if (len & 16) { + v0 = fold_vec(v0, _mm_loadu_si128((const void *)p), + multipliers_1); + p += 16; + } + } else { + if (len >= 32) { + v0 = fold_vec(v0, _mm_loadu_si128((const void *)p), + multipliers_1); + p += 16; + if (len >= 48) { + v0 = fold_vec(v0, _mm_loadu_si128((const void *)p), + multipliers_1); + p += 16; + } + } + } + } else { + const size_t align = -(uintptr_t)p & 15; + const __m128i *vp; + + #if FOLD_PARTIAL_VECS + v0 = _mm_xor_si128(_mm_loadu_si128((const void *)p), + _mm_cvtsi32_si128(crc)); + p += 16; + /* Align p to the next 16-byte boundary. */ + if (align) { + v0 = fold_partial_vec(v0, p, align, multipliers_1); + p += align; + len -= align; + } + vp = (const __m128i *)p; + #else + /* Align p to the next 16-byte boundary. */ + if (align) { + crc = crc32_slice1(crc, p, align); + p += align; + len -= align; + } + vp = (const __m128i *)p; + v0 = _mm_xor_si128(*vp++, _mm_cvtsi32_si128(crc)); + #endif + v1 = *vp++; + v2 = *vp++; + v3 = *vp++; + v4 = *vp++; + v5 = *vp++; + v6 = *vp++; + v7 = *vp++; + do { + v0 = fold_vec(v0, *vp++, multipliers_8); + v1 = fold_vec(v1, *vp++, multipliers_8); + v2 = fold_vec(v2, *vp++, multipliers_8); + v3 = fold_vec(v3, *vp++, multipliers_8); + v4 = fold_vec(v4, *vp++, multipliers_8); + v5 = fold_vec(v5, *vp++, multipliers_8); + v6 = fold_vec(v6, *vp++, multipliers_8); + v7 = fold_vec(v7, *vp++, multipliers_8); + len -= 128; + } while (len >= 128 + 128); + + v0 = fold_vec(v0, v4, multipliers_4); + v1 = fold_vec(v1, v5, multipliers_4); + v2 = fold_vec(v2, v6, multipliers_4); + v3 = fold_vec(v3, v7, multipliers_4); + if (len & 64) { + v0 = fold_vec(v0, *vp++, multipliers_4); + v1 = fold_vec(v1, *vp++, multipliers_4); + v2 = fold_vec(v2, *vp++, multipliers_4); + v3 = fold_vec(v3, *vp++, multipliers_4); + } + + v0 = fold_vec(v0, v2, multipliers_2); + v1 = fold_vec(v1, v3, multipliers_2); + if (len & 32) { + v0 = fold_vec(v0, *vp++, multipliers_2); + v1 = fold_vec(v1, *vp++, multipliers_2); + } + + v0 = fold_vec(v0, v1, multipliers_1); + if (len & 16) + v0 = fold_vec(v0, *vp++, multipliers_1); + + p = (const u8 *)vp; + } + len &= 15; + + /* + * If fold_partial_vec() is available, handle any remaining partial + * block now before reducing to 32 bits. + */ +#if FOLD_PARTIAL_VECS + if (len) + v0 = fold_partial_vec(v0, p, len, multipliers_1); +#endif + + /* + * Fold 128 => 96 bits. This also implicitly appends 32 zero bits, + * which is equivalent to multiplying by x^32. This is needed because + * the CRC is defined as M(x)*x^32 mod G(x), not just M(x) mod G(x). + */ + v0 = _mm_xor_si128(_mm_srli_si128(v0, 8), + _mm_clmulepi64_si128(v0, multipliers_1, 0x10)); + + /* Fold 96 => 64 bits. */ + v0 = _mm_xor_si128(_mm_srli_si128(v0, 4), + _mm_clmulepi64_si128(_mm_and_si128(v0, mask32), + final_multiplier, 0x00)); + + /* + * Reduce 64 => 32 bits using Barrett reduction. + * + * Let M(x) = A(x)*x^32 + B(x) be the remaining message. The goal is to + * compute R(x) = M(x) mod G(x). Since degree(B(x)) < degree(G(x)): + * + * R(x) = (A(x)*x^32 + B(x)) mod G(x) + * = (A(x)*x^32) mod G(x) + B(x) + * + * Then, by the Division Algorithm there exists a unique q(x) such that: + * + * A(x)*x^32 mod G(x) = A(x)*x^32 - q(x)*G(x) + * + * Since the left-hand side is of maximum degree 31, the right-hand side + * must be too. This implies that we can apply 'mod x^32' to the + * right-hand side without changing its value: + * + * (A(x)*x^32 - q(x)*G(x)) mod x^32 = q(x)*G(x) mod x^32 + * + * Note that '+' is equivalent to '-' in polynomials over GF(2). + * + * We also know that: + * + * / A(x)*x^32 \ + * q(x) = floor ( --------- ) + * \ G(x) / + * + * To compute this efficiently, we can multiply the top and bottom by + * x^32 and move the division by G(x) to the top: + * + * / A(x) * floor(x^64 / G(x)) \ + * q(x) = floor ( ------------------------- ) + * \ x^32 / + * + * Note that floor(x^64 / G(x)) is a constant. + * + * So finally we have: + * + * / A(x) * floor(x^64 / G(x)) \ + * R(x) = B(x) + G(x)*floor ( ------------------------- ) + * \ x^32 / + */ + v1 = _mm_clmulepi64_si128(_mm_and_si128(v0, mask32), + barrett_reduction_constants, 0x00); + v1 = _mm_clmulepi64_si128(_mm_and_si128(v1, mask32), + barrett_reduction_constants, 0x10); + v0 = _mm_xor_si128(v0, v1); +#if FOLD_PARTIAL_VECS + crc = _mm_extract_epi32(v0, 1); +#else + crc = _mm_cvtsi128_si32(_mm_shuffle_epi32(v0, 0x01)); + /* Process up to 15 bytes left over at the end. */ + crc = crc32_slice1(crc, p, len); +#endif + return crc; +} + +#undef SUFFIX +#undef ATTRIBUTES +#undef FOLD_PARTIAL_VECS diff --git a/tools/z64compress/src/enc/libdeflate/lib/x86/decompress_impl.h b/tools/z64compress/src/enc/libdeflate/lib/x86/decompress_impl.h new file mode 100644 index 000000000..3e2ec37e7 --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/lib/x86/decompress_impl.h @@ -0,0 +1,54 @@ +#ifndef LIB_X86_DECOMPRESS_IMPL_H +#define LIB_X86_DECOMPRESS_IMPL_H + +#include "cpu_features.h" + +/* + * BMI2 optimized version + * + * FIXME: with MSVC, this isn't actually compiled with BMI2 code generation + * enabled yet. That would require that this be moved to its own .c file. + */ +#if HAVE_BMI2_INTRIN +# define deflate_decompress_bmi2 deflate_decompress_bmi2 +# define FUNCNAME deflate_decompress_bmi2 +# if !HAVE_BMI2_NATIVE +# define ATTRIBUTES _target_attribute("bmi2") +# endif + /* + * Even with __attribute__((target("bmi2"))), gcc doesn't reliably use the + * bzhi instruction for 'word & BITMASK(count)'. So use the bzhi intrinsic + * explicitly. EXTRACT_VARBITS() is equivalent to 'word & BITMASK(count)'; + * EXTRACT_VARBITS8() is equivalent to 'word & BITMASK((u8)count)'. + * Nevertheless, their implementation using the bzhi intrinsic is identical, + * as the bzhi instruction truncates the count to 8 bits implicitly. + */ +# ifndef __clang__ +# include +# ifdef ARCH_X86_64 +# define EXTRACT_VARBITS(word, count) _bzhi_u64((word), (count)) +# define EXTRACT_VARBITS8(word, count) _bzhi_u64((word), (count)) +# else +# define EXTRACT_VARBITS(word, count) _bzhi_u32((word), (count)) +# define EXTRACT_VARBITS8(word, count) _bzhi_u32((word), (count)) +# endif +# endif +# include "../decompress_template.h" +#endif /* HAVE_BMI2_INTRIN */ + +#if defined(deflate_decompress_bmi2) && HAVE_BMI2_NATIVE +#define DEFAULT_IMPL deflate_decompress_bmi2 +#else +static inline decompress_func_t +arch_select_decompress_func(void) +{ +#ifdef deflate_decompress_bmi2 + if (HAVE_BMI2(get_x86_cpu_features())) + return deflate_decompress_bmi2; +#endif + return NULL; +} +#define arch_select_decompress_func arch_select_decompress_func +#endif + +#endif /* LIB_X86_DECOMPRESS_IMPL_H */ diff --git a/tools/z64compress/src/enc/libdeflate/lib/x86/matchfinder_impl.h b/tools/z64compress/src/enc/libdeflate/lib/x86/matchfinder_impl.h new file mode 100644 index 000000000..8433b9b10 --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/lib/x86/matchfinder_impl.h @@ -0,0 +1,124 @@ +/* + * x86/matchfinder_impl.h - x86 implementations of matchfinder functions + * + * Copyright 2016 Eric Biggers + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef LIB_X86_MATCHFINDER_IMPL_H +#define LIB_X86_MATCHFINDER_IMPL_H + +#include "cpu_features.h" + +#if HAVE_AVX2_NATIVE +# include +static forceinline void +matchfinder_init_avx2(mf_pos_t *data, size_t size) +{ + __m256i *p = (__m256i *)data; + __m256i v = _mm256_set1_epi16(MATCHFINDER_INITVAL); + + STATIC_ASSERT(MATCHFINDER_MEM_ALIGNMENT % sizeof(*p) == 0); + STATIC_ASSERT(MATCHFINDER_SIZE_ALIGNMENT % (4 * sizeof(*p)) == 0); + STATIC_ASSERT(sizeof(mf_pos_t) == 2); + + do { + p[0] = v; + p[1] = v; + p[2] = v; + p[3] = v; + p += 4; + size -= 4 * sizeof(*p); + } while (size != 0); +} +#define matchfinder_init matchfinder_init_avx2 + +static forceinline void +matchfinder_rebase_avx2(mf_pos_t *data, size_t size) +{ + __m256i *p = (__m256i *)data; + __m256i v = _mm256_set1_epi16((u16)-MATCHFINDER_WINDOW_SIZE); + + STATIC_ASSERT(MATCHFINDER_MEM_ALIGNMENT % sizeof(*p) == 0); + STATIC_ASSERT(MATCHFINDER_SIZE_ALIGNMENT % (4 * sizeof(*p)) == 0); + STATIC_ASSERT(sizeof(mf_pos_t) == 2); + + do { + /* PADDSW: Add Packed Signed Integers With Signed Saturation */ + p[0] = _mm256_adds_epi16(p[0], v); + p[1] = _mm256_adds_epi16(p[1], v); + p[2] = _mm256_adds_epi16(p[2], v); + p[3] = _mm256_adds_epi16(p[3], v); + p += 4; + size -= 4 * sizeof(*p); + } while (size != 0); +} +#define matchfinder_rebase matchfinder_rebase_avx2 + +#elif HAVE_SSE2_NATIVE +# include +static forceinline void +matchfinder_init_sse2(mf_pos_t *data, size_t size) +{ + __m128i *p = (__m128i *)data; + __m128i v = _mm_set1_epi16(MATCHFINDER_INITVAL); + + STATIC_ASSERT(MATCHFINDER_MEM_ALIGNMENT % sizeof(*p) == 0); + STATIC_ASSERT(MATCHFINDER_SIZE_ALIGNMENT % (4 * sizeof(*p)) == 0); + STATIC_ASSERT(sizeof(mf_pos_t) == 2); + + do { + p[0] = v; + p[1] = v; + p[2] = v; + p[3] = v; + p += 4; + size -= 4 * sizeof(*p); + } while (size != 0); +} +#define matchfinder_init matchfinder_init_sse2 + +static forceinline void +matchfinder_rebase_sse2(mf_pos_t *data, size_t size) +{ + __m128i *p = (__m128i *)data; + __m128i v = _mm_set1_epi16((u16)-MATCHFINDER_WINDOW_SIZE); + + STATIC_ASSERT(MATCHFINDER_MEM_ALIGNMENT % sizeof(*p) == 0); + STATIC_ASSERT(MATCHFINDER_SIZE_ALIGNMENT % (4 * sizeof(*p)) == 0); + STATIC_ASSERT(sizeof(mf_pos_t) == 2); + + do { + /* PADDSW: Add Packed Signed Integers With Signed Saturation */ + p[0] = _mm_adds_epi16(p[0], v); + p[1] = _mm_adds_epi16(p[1], v); + p[2] = _mm_adds_epi16(p[2], v); + p[3] = _mm_adds_epi16(p[3], v); + p += 4; + size -= 4 * sizeof(*p); + } while (size != 0); +} +#define matchfinder_rebase matchfinder_rebase_sse2 +#endif /* HAVE_SSE2_NATIVE */ + +#endif /* LIB_X86_MATCHFINDER_IMPL_H */ diff --git a/tools/z64compress/src/enc/libdeflate/lib/zlib_compress.c b/tools/z64compress/src/enc/libdeflate/lib/zlib_compress.c new file mode 100644 index 000000000..4f9cc6f08 --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/lib/zlib_compress.c @@ -0,0 +1,84 @@ +/* + * zlib_compress.c - compress with a zlib wrapper + * + * Copyright 2016 Eric Biggers + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "deflate_compress.h" +#include "zlib_constants.h" + +#include "libdeflate.h" + +LIBDEFLATEAPI size_t +libdeflate_zlib_compress(struct libdeflate_compressor *c, + const void *in, size_t in_nbytes, + void *out, size_t out_nbytes_avail) +{ + u8 *out_next = out; + u16 hdr; + unsigned compression_level; + unsigned level_hint; + size_t deflate_size; + + if (out_nbytes_avail <= ZLIB_MIN_OVERHEAD) + return 0; + + /* 2 byte header: CMF and FLG */ + hdr = (ZLIB_CM_DEFLATE << 8) | (ZLIB_CINFO_32K_WINDOW << 12); + compression_level = libdeflate_get_compression_level(c); + if (compression_level < 2) + level_hint = ZLIB_FASTEST_COMPRESSION; + else if (compression_level < 6) + level_hint = ZLIB_FAST_COMPRESSION; + else if (compression_level < 8) + level_hint = ZLIB_DEFAULT_COMPRESSION; + else + level_hint = ZLIB_SLOWEST_COMPRESSION; + hdr |= level_hint << 6; + hdr |= 31 - (hdr % 31); + + put_unaligned_be16(hdr, out_next); + out_next += 2; + + /* Compressed data */ + deflate_size = libdeflate_deflate_compress(c, in, in_nbytes, out_next, + out_nbytes_avail - ZLIB_MIN_OVERHEAD); + if (deflate_size == 0) + return 0; + out_next += deflate_size; + + /* ADLER32 */ + put_unaligned_be32(libdeflate_adler32(1, in, in_nbytes), out_next); + out_next += 4; + + return out_next - (u8 *)out; +} + +LIBDEFLATEAPI size_t +libdeflate_zlib_compress_bound(struct libdeflate_compressor *c, + size_t in_nbytes) +{ + return ZLIB_MIN_OVERHEAD + + libdeflate_deflate_compress_bound(c, in_nbytes); +} diff --git a/tools/z64compress/src/enc/libdeflate/lib/zlib_constants.h b/tools/z64compress/src/enc/libdeflate/lib/zlib_constants.h new file mode 100644 index 000000000..f304310c7 --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/lib/zlib_constants.h @@ -0,0 +1,21 @@ +/* + * zlib_constants.h - constants for the zlib wrapper format + */ + +#ifndef LIB_ZLIB_CONSTANTS_H +#define LIB_ZLIB_CONSTANTS_H + +#define ZLIB_MIN_HEADER_SIZE 2 +#define ZLIB_FOOTER_SIZE 4 +#define ZLIB_MIN_OVERHEAD (ZLIB_MIN_HEADER_SIZE + ZLIB_FOOTER_SIZE) + +#define ZLIB_CM_DEFLATE 8 + +#define ZLIB_CINFO_32K_WINDOW 7 + +#define ZLIB_FASTEST_COMPRESSION 0 +#define ZLIB_FAST_COMPRESSION 1 +#define ZLIB_DEFAULT_COMPRESSION 2 +#define ZLIB_SLOWEST_COMPRESSION 3 + +#endif /* LIB_ZLIB_CONSTANTS_H */ diff --git a/tools/z64compress/src/enc/libdeflate/lib/zlib_decompress.c b/tools/z64compress/src/enc/libdeflate/lib/zlib_decompress.c new file mode 100644 index 000000000..b7b3b1f95 --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/lib/zlib_decompress.c @@ -0,0 +1,106 @@ +/* + * zlib_decompress.c - decompress with a zlib wrapper + * + * Copyright 2016 Eric Biggers + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "lib_common.h" +#include "zlib_constants.h" + +#include "libdeflate.h" + +LIBDEFLATEAPI enum libdeflate_result +libdeflate_zlib_decompress_ex(struct libdeflate_decompressor *d, + const void *in, size_t in_nbytes, + void *out, size_t out_nbytes_avail, + size_t *actual_in_nbytes_ret, + size_t *actual_out_nbytes_ret) +{ + const u8 *in_next = in; + const u8 * const in_end = in_next + in_nbytes; + u16 hdr; + size_t actual_in_nbytes; + size_t actual_out_nbytes; + enum libdeflate_result result; + + if (in_nbytes < ZLIB_MIN_OVERHEAD) + return LIBDEFLATE_BAD_DATA; + + /* 2 byte header: CMF and FLG */ + hdr = get_unaligned_be16(in_next); + in_next += 2; + + /* FCHECK */ + if ((hdr % 31) != 0) + return LIBDEFLATE_BAD_DATA; + + /* CM */ + if (((hdr >> 8) & 0xF) != ZLIB_CM_DEFLATE) + return LIBDEFLATE_BAD_DATA; + + /* CINFO */ + if ((hdr >> 12) > ZLIB_CINFO_32K_WINDOW) + return LIBDEFLATE_BAD_DATA; + + /* FDICT */ + if ((hdr >> 5) & 1) + return LIBDEFLATE_BAD_DATA; + + /* Compressed data */ + result = libdeflate_deflate_decompress_ex(d, in_next, + in_end - ZLIB_FOOTER_SIZE - in_next, + out, out_nbytes_avail, + &actual_in_nbytes, actual_out_nbytes_ret); + if (result != LIBDEFLATE_SUCCESS) + return result; + + if (actual_out_nbytes_ret) + actual_out_nbytes = *actual_out_nbytes_ret; + else + actual_out_nbytes = out_nbytes_avail; + + in_next += actual_in_nbytes; + + /* ADLER32 */ + if (libdeflate_adler32(1, out, actual_out_nbytes) != + get_unaligned_be32(in_next)) + return LIBDEFLATE_BAD_DATA; + in_next += 4; + + if (actual_in_nbytes_ret) + *actual_in_nbytes_ret = in_next - (u8 *)in; + + return LIBDEFLATE_SUCCESS; +} + +LIBDEFLATEAPI enum libdeflate_result +libdeflate_zlib_decompress(struct libdeflate_decompressor *d, + const void *in, size_t in_nbytes, + void *out, size_t out_nbytes_avail, + size_t *actual_out_nbytes_ret) +{ + return libdeflate_zlib_decompress_ex(d, in, in_nbytes, + out, out_nbytes_avail, + NULL, actual_out_nbytes_ret); +} diff --git a/tools/z64compress/src/enc/libdeflate/libdeflate-config.cmake.in b/tools/z64compress/src/enc/libdeflate/libdeflate-config.cmake.in new file mode 100644 index 000000000..747799df9 --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/libdeflate-config.cmake.in @@ -0,0 +1,3 @@ +@PACKAGE_INIT@ + +include("${CMAKE_CURRENT_LIST_DIR}/libdeflate-targets.cmake") diff --git a/tools/z64compress/src/enc/libdeflate/libdeflate.h b/tools/z64compress/src/enc/libdeflate/libdeflate.h new file mode 100644 index 000000000..f26087597 --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/libdeflate.h @@ -0,0 +1,368 @@ +/* + * libdeflate.h - public header for libdeflate + */ + +#ifndef LIBDEFLATE_H +#define LIBDEFLATE_H + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define LIBDEFLATE_VERSION_MAJOR 1 +#define LIBDEFLATE_VERSION_MINOR 15 +#define LIBDEFLATE_VERSION_STRING "1.15" + +/* + * Users of libdeflate.dll on Windows can define LIBDEFLATE_DLL to cause + * __declspec(dllimport) to be used. This should be done when it's easy to do. + * Otherwise it's fine to skip it, since it is a very minor performance + * optimization that is irrelevant for most use cases of libdeflate. + */ +#ifndef LIBDEFLATEAPI +# if defined(LIBDEFLATE_DLL) && (defined(_WIN32) || defined(__CYGWIN__)) +# define LIBDEFLATEAPI __declspec(dllimport) +# else +# define LIBDEFLATEAPI +# endif +#endif + +/* ========================================================================== */ +/* Compression */ +/* ========================================================================== */ + +struct libdeflate_compressor; + +/* + * libdeflate_alloc_compressor() allocates a new compressor that supports + * DEFLATE, zlib, and gzip compression. 'compression_level' is the compression + * level on a zlib-like scale but with a higher maximum value (1 = fastest, 6 = + * medium/default, 9 = slow, 12 = slowest). Level 0 is also supported and means + * "no compression", specifically "create a valid stream, but only emit + * uncompressed blocks" (this will expand the data slightly). + * + * The return value is a pointer to the new compressor, or NULL if out of memory + * or if the compression level is invalid (i.e. outside the range [0, 12]). + * + * Note: for compression, the sliding window size is defined at compilation time + * to 32768, the largest size permissible in the DEFLATE format. It cannot be + * changed at runtime. + * + * A single compressor is not safe to use by multiple threads concurrently. + * However, different threads may use different compressors concurrently. + */ +LIBDEFLATEAPI struct libdeflate_compressor * +libdeflate_alloc_compressor(int compression_level); + +/* + * libdeflate_deflate_compress() performs raw DEFLATE compression on a buffer of + * data. It attempts to compress 'in_nbytes' bytes of data located at 'in' and + * write the result to 'out', which has space for 'out_nbytes_avail' bytes. The + * return value is the compressed size in bytes, or 0 if the data could not be + * compressed to 'out_nbytes_avail' bytes or fewer (but see note below). + * + * If compression is successful, then the output data is guaranteed to be a + * valid DEFLATE stream that decompresses to the input data. No other + * guarantees are made about the output data. Notably, different versions of + * libdeflate can produce different compressed data for the same uncompressed + * data, even at the same compression level. Do ***NOT*** do things like + * writing tests that compare compressed data to a golden output, as this can + * break when libdeflate is updated. (This property isn't specific to + * libdeflate; the same is true for zlib and other compression libraries too.) + * + * Note: due to a performance optimization, libdeflate_deflate_compress() + * currently needs a small amount of slack space at the end of the output + * buffer. As a result, it can't actually report compressed sizes very close to + * 'out_nbytes_avail'. This doesn't matter in real-world use cases, and + * libdeflate_deflate_compress_bound() already includes the slack space. + * However, it does mean that testing code that redundantly compresses data + * using an exact-sized output buffer won't work as might be expected: + * + * out_nbytes = libdeflate_deflate_compress(c, in, in_nbytes, out, + * libdeflate_deflate_compress_bound(in_nbytes)); + * // The following assertion will fail. + * assert(libdeflate_deflate_compress(c, in, in_nbytes, out, out_nbytes) != 0); + * + * To avoid this, either don't write tests like the above, or make sure to + * include at least 9 bytes of slack space in 'out_nbytes_avail'. + */ +LIBDEFLATEAPI size_t +libdeflate_deflate_compress(struct libdeflate_compressor *compressor, + const void *in, size_t in_nbytes, + void *out, size_t out_nbytes_avail); + +/* + * libdeflate_deflate_compress_bound() returns a worst-case upper bound on the + * number of bytes of compressed data that may be produced by compressing any + * buffer of length less than or equal to 'in_nbytes' using + * libdeflate_deflate_compress() with the specified compressor. This bound will + * necessarily be a number greater than or equal to 'in_nbytes'. It may be an + * overestimate of the true upper bound. The return value is guaranteed to be + * the same for all invocations with the same compressor and same 'in_nbytes'. + * + * As a special case, 'compressor' may be NULL. This causes the bound to be + * taken across *any* libdeflate_compressor that could ever be allocated with + * this build of the library, with any options. + * + * Note that this function is not necessary in many applications. With + * block-based compression, it is usually preferable to separately store the + * uncompressed size of each block and to store any blocks that did not compress + * to less than their original size uncompressed. In that scenario, there is no + * need to know the worst-case compressed size, since the maximum number of + * bytes of compressed data that may be used would always be one less than the + * input length. You can just pass a buffer of that size to + * libdeflate_deflate_compress() and store the data uncompressed if + * libdeflate_deflate_compress() returns 0, indicating that the compressed data + * did not fit into the provided output buffer. + */ +LIBDEFLATEAPI size_t +libdeflate_deflate_compress_bound(struct libdeflate_compressor *compressor, + size_t in_nbytes); + +/* + * Like libdeflate_deflate_compress(), but uses the zlib wrapper format instead + * of raw DEFLATE. + */ +LIBDEFLATEAPI size_t +libdeflate_zlib_compress(struct libdeflate_compressor *compressor, + const void *in, size_t in_nbytes, + void *out, size_t out_nbytes_avail); + +/* + * Like libdeflate_deflate_compress_bound(), but assumes the data will be + * compressed with libdeflate_zlib_compress() rather than with + * libdeflate_deflate_compress(). + */ +LIBDEFLATEAPI size_t +libdeflate_zlib_compress_bound(struct libdeflate_compressor *compressor, + size_t in_nbytes); + +/* + * Like libdeflate_deflate_compress(), but uses the gzip wrapper format instead + * of raw DEFLATE. + */ +LIBDEFLATEAPI size_t +libdeflate_gzip_compress(struct libdeflate_compressor *compressor, + const void *in, size_t in_nbytes, + void *out, size_t out_nbytes_avail); + +/* + * Like libdeflate_deflate_compress_bound(), but assumes the data will be + * compressed with libdeflate_gzip_compress() rather than with + * libdeflate_deflate_compress(). + */ +LIBDEFLATEAPI size_t +libdeflate_gzip_compress_bound(struct libdeflate_compressor *compressor, + size_t in_nbytes); + +/* + * libdeflate_free_compressor() frees a compressor that was allocated with + * libdeflate_alloc_compressor(). If a NULL pointer is passed in, no action is + * taken. + */ +LIBDEFLATEAPI void +libdeflate_free_compressor(struct libdeflate_compressor *compressor); + +/* ========================================================================== */ +/* Decompression */ +/* ========================================================================== */ + +struct libdeflate_decompressor; + +/* + * libdeflate_alloc_decompressor() allocates a new decompressor that can be used + * for DEFLATE, zlib, and gzip decompression. The return value is a pointer to + * the new decompressor, or NULL if out of memory. + * + * This function takes no parameters, and the returned decompressor is valid for + * decompressing data that was compressed at any compression level and with any + * sliding window size. + * + * A single decompressor is not safe to use by multiple threads concurrently. + * However, different threads may use different decompressors concurrently. + */ +LIBDEFLATEAPI struct libdeflate_decompressor * +libdeflate_alloc_decompressor(void); + +/* + * Result of a call to libdeflate_deflate_decompress(), + * libdeflate_zlib_decompress(), or libdeflate_gzip_decompress(). + */ +enum libdeflate_result { + /* Decompression was successful. */ + LIBDEFLATE_SUCCESS = 0, + + /* Decompression failed because the compressed data was invalid, + * corrupt, or otherwise unsupported. */ + LIBDEFLATE_BAD_DATA = 1, + + /* A NULL 'actual_out_nbytes_ret' was provided, but the data would have + * decompressed to fewer than 'out_nbytes_avail' bytes. */ + LIBDEFLATE_SHORT_OUTPUT = 2, + + /* The data would have decompressed to more than 'out_nbytes_avail' + * bytes. */ + LIBDEFLATE_INSUFFICIENT_SPACE = 3, +}; + +/* + * libdeflate_deflate_decompress() decompresses a DEFLATE stream from the buffer + * 'in' with compressed size up to 'in_nbytes' bytes. The uncompressed data is + * written to 'out', a buffer with size 'out_nbytes_avail' bytes. If + * decompression succeeds, then 0 (LIBDEFLATE_SUCCESS) is returned. Otherwise, + * a nonzero result code such as LIBDEFLATE_BAD_DATA is returned, and the + * contents of the output buffer are undefined. + * + * Decompression stops at the end of the DEFLATE stream (as indicated by the + * BFINAL flag), even if it is actually shorter than 'in_nbytes' bytes. + * + * libdeflate_deflate_decompress() can be used in cases where the actual + * uncompressed size is known (recommended) or unknown (not recommended): + * + * - If the actual uncompressed size is known, then pass the actual + * uncompressed size as 'out_nbytes_avail' and pass NULL for + * 'actual_out_nbytes_ret'. This makes libdeflate_deflate_decompress() fail + * with LIBDEFLATE_SHORT_OUTPUT if the data decompressed to fewer than the + * specified number of bytes. + * + * - If the actual uncompressed size is unknown, then provide a non-NULL + * 'actual_out_nbytes_ret' and provide a buffer with some size + * 'out_nbytes_avail' that you think is large enough to hold all the + * uncompressed data. In this case, if the data decompresses to less than + * or equal to 'out_nbytes_avail' bytes, then + * libdeflate_deflate_decompress() will write the actual uncompressed size + * to *actual_out_nbytes_ret and return 0 (LIBDEFLATE_SUCCESS). Otherwise, + * it will return LIBDEFLATE_INSUFFICIENT_SPACE if the provided buffer was + * not large enough but no other problems were encountered, or another + * nonzero result code if decompression failed for another reason. + */ +LIBDEFLATEAPI enum libdeflate_result +libdeflate_deflate_decompress(struct libdeflate_decompressor *decompressor, + const void *in, size_t in_nbytes, + void *out, size_t out_nbytes_avail, + size_t *actual_out_nbytes_ret); + +/* + * Like libdeflate_deflate_decompress(), but adds the 'actual_in_nbytes_ret' + * argument. If decompression succeeds and 'actual_in_nbytes_ret' is not NULL, + * then the actual compressed size of the DEFLATE stream (aligned to the next + * byte boundary) is written to *actual_in_nbytes_ret. + */ +LIBDEFLATEAPI enum libdeflate_result +libdeflate_deflate_decompress_ex(struct libdeflate_decompressor *decompressor, + const void *in, size_t in_nbytes, + void *out, size_t out_nbytes_avail, + size_t *actual_in_nbytes_ret, + size_t *actual_out_nbytes_ret); + +/* + * Like libdeflate_deflate_decompress(), but assumes the zlib wrapper format + * instead of raw DEFLATE. + * + * Decompression will stop at the end of the zlib stream, even if it is shorter + * than 'in_nbytes'. If you need to know exactly where the zlib stream ended, + * use libdeflate_zlib_decompress_ex(). + */ +LIBDEFLATEAPI enum libdeflate_result +libdeflate_zlib_decompress(struct libdeflate_decompressor *decompressor, + const void *in, size_t in_nbytes, + void *out, size_t out_nbytes_avail, + size_t *actual_out_nbytes_ret); + +/* + * Like libdeflate_zlib_decompress(), but adds the 'actual_in_nbytes_ret' + * argument. If 'actual_in_nbytes_ret' is not NULL and the decompression + * succeeds (indicating that the first zlib-compressed stream in the input + * buffer was decompressed), then the actual number of input bytes consumed is + * written to *actual_in_nbytes_ret. + */ +LIBDEFLATEAPI enum libdeflate_result +libdeflate_zlib_decompress_ex(struct libdeflate_decompressor *decompressor, + const void *in, size_t in_nbytes, + void *out, size_t out_nbytes_avail, + size_t *actual_in_nbytes_ret, + size_t *actual_out_nbytes_ret); + +/* + * Like libdeflate_deflate_decompress(), but assumes the gzip wrapper format + * instead of raw DEFLATE. + * + * If multiple gzip-compressed members are concatenated, then only the first + * will be decompressed. Use libdeflate_gzip_decompress_ex() if you need + * multi-member support. + */ +LIBDEFLATEAPI enum libdeflate_result +libdeflate_gzip_decompress(struct libdeflate_decompressor *decompressor, + const void *in, size_t in_nbytes, + void *out, size_t out_nbytes_avail, + size_t *actual_out_nbytes_ret); + +/* + * Like libdeflate_gzip_decompress(), but adds the 'actual_in_nbytes_ret' + * argument. If 'actual_in_nbytes_ret' is not NULL and the decompression + * succeeds (indicating that the first gzip-compressed member in the input + * buffer was decompressed), then the actual number of input bytes consumed is + * written to *actual_in_nbytes_ret. + */ +LIBDEFLATEAPI enum libdeflate_result +libdeflate_gzip_decompress_ex(struct libdeflate_decompressor *decompressor, + const void *in, size_t in_nbytes, + void *out, size_t out_nbytes_avail, + size_t *actual_in_nbytes_ret, + size_t *actual_out_nbytes_ret); + +/* + * libdeflate_free_decompressor() frees a decompressor that was allocated with + * libdeflate_alloc_decompressor(). If a NULL pointer is passed in, no action + * is taken. + */ +LIBDEFLATEAPI void +libdeflate_free_decompressor(struct libdeflate_decompressor *decompressor); + +/* ========================================================================== */ +/* Checksums */ +/* ========================================================================== */ + +/* + * libdeflate_adler32() updates a running Adler-32 checksum with 'len' bytes of + * data and returns the updated checksum. When starting a new checksum, the + * required initial value for 'adler' is 1. This value is also returned when + * 'buffer' is specified as NULL. + */ +LIBDEFLATEAPI uint32_t +libdeflate_adler32(uint32_t adler, const void *buffer, size_t len); + + +/* + * libdeflate_crc32() updates a running CRC-32 checksum with 'len' bytes of data + * and returns the updated checksum. When starting a new checksum, the required + * initial value for 'crc' is 0. This value is also returned when 'buffer' is + * specified as NULL. + */ +LIBDEFLATEAPI uint32_t +libdeflate_crc32(uint32_t crc, const void *buffer, size_t len); + +/* ========================================================================== */ +/* Custom memory allocator */ +/* ========================================================================== */ + +/* + * Install a custom memory allocator which libdeflate will use for all memory + * allocations. 'malloc_func' is a function that must behave like malloc(), and + * 'free_func' is a function that must behave like free(). + * + * There must not be any libdeflate_compressor or libdeflate_decompressor + * structures in existence when calling this function. + */ +LIBDEFLATEAPI void +libdeflate_set_memory_allocator(void *(*malloc_func)(size_t), + void (*free_func)(void *)); + +#ifdef __cplusplus +} +#endif + +#endif /* LIBDEFLATE_H */ diff --git a/tools/z64compress/src/enc/libdeflate/libdeflate.pc.in b/tools/z64compress/src/enc/libdeflate/libdeflate.pc.in new file mode 100644 index 000000000..b8ced3c69 --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/libdeflate.pc.in @@ -0,0 +1,18 @@ +prefix=@CMAKE_INSTALL_PREFIX@ +exec_prefix=${prefix} +includedir=@CMAKE_PKGCONFIG_INCLUDEDIR@ +libdir=@CMAKE_PKGCONFIG_LIBDIR@ + +Name: libdeflate +Description: Fast implementation of DEFLATE, zlib, and gzip +Version: @PROJECT_VERSION@ +Libs: -L${libdir} -ldeflate +Cflags: -I${includedir} + +# Note: this library's public header allows LIBDEFLATE_DLL to be defined when +# linking to the DLL on Windows, to make __declspec(dllimport) be used. +# However, the only way to define a shared-library-only flag in a pkgconfig file +# is to use the weird workaround of unconditionally defining it in Cflags, then +# undefining it in Cflags.private. Just don't bother with this, since +# __declspec(dllimport) is optional anyway. It is a very minor performance +# optimization that is irrelevant for most use cases of libdeflate. diff --git a/tools/z64compress/src/enc/libdeflate/programs/benchmark.c b/tools/z64compress/src/enc/libdeflate/programs/benchmark.c new file mode 100644 index 000000000..52af8dafc --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/programs/benchmark.c @@ -0,0 +1,696 @@ +/* + * benchmark.c - a compression testing and benchmark program + * + * Copyright 2016 Eric Biggers + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "test_util.h" + +static const tchar *const optstring = T("0::1::2::3::4::5::6::7::8::9::C:D:eghs:VYZz"); + +enum format { + DEFLATE_FORMAT, + ZLIB_FORMAT, + GZIP_FORMAT, +}; + +struct compressor { + int level; + enum format format; + const struct engine *engine; + void *private; +}; + +struct decompressor { + enum format format; + const struct engine *engine; + void *private; +}; + +struct engine { + const tchar *name; + + bool (*init_compressor)(struct compressor *); + size_t (*compress_bound)(struct compressor *, size_t); + size_t (*compress)(struct compressor *, const void *, size_t, + void *, size_t); + void (*destroy_compressor)(struct compressor *); + + bool (*init_decompressor)(struct decompressor *); + bool (*decompress)(struct decompressor *, const void *, size_t, + void *, size_t); + void (*destroy_decompressor)(struct decompressor *); +}; + +/******************************************************************************/ + +static bool +libdeflate_engine_init_compressor(struct compressor *c) +{ + c->private = alloc_compressor(c->level); + return c->private != NULL; +} + +static size_t +libdeflate_engine_compress_bound(struct compressor *c, size_t in_nbytes) +{ + switch (c->format) { + case ZLIB_FORMAT: + return libdeflate_zlib_compress_bound(c->private, in_nbytes); + case GZIP_FORMAT: + return libdeflate_gzip_compress_bound(c->private, in_nbytes); + default: + return libdeflate_deflate_compress_bound(c->private, in_nbytes); + } +} + +static size_t +libdeflate_engine_compress(struct compressor *c, const void *in, + size_t in_nbytes, void *out, size_t out_nbytes_avail) +{ + switch (c->format) { + case ZLIB_FORMAT: + return libdeflate_zlib_compress(c->private, in, in_nbytes, + out, out_nbytes_avail); + case GZIP_FORMAT: + return libdeflate_gzip_compress(c->private, in, in_nbytes, + out, out_nbytes_avail); + default: + return libdeflate_deflate_compress(c->private, in, in_nbytes, + out, out_nbytes_avail); + } +} + +static void +libdeflate_engine_destroy_compressor(struct compressor *c) +{ + libdeflate_free_compressor(c->private); +} + +static bool +libdeflate_engine_init_decompressor(struct decompressor *d) +{ + d->private = alloc_decompressor(); + return d->private != NULL; +} + +static bool +libdeflate_engine_decompress(struct decompressor *d, const void *in, + size_t in_nbytes, void *out, size_t out_nbytes) +{ + switch (d->format) { + case ZLIB_FORMAT: + return !libdeflate_zlib_decompress(d->private, in, in_nbytes, + out, out_nbytes, NULL); + case GZIP_FORMAT: + return !libdeflate_gzip_decompress(d->private, in, in_nbytes, + out, out_nbytes, NULL); + default: + return !libdeflate_deflate_decompress(d->private, in, in_nbytes, + out, out_nbytes, NULL); + } +} + +static void +libdeflate_engine_destroy_decompressor(struct decompressor *d) +{ + libdeflate_free_decompressor(d->private); +} + +static const struct engine libdeflate_engine = { + .name = T("libdeflate"), + + .init_compressor = libdeflate_engine_init_compressor, + .compress_bound = libdeflate_engine_compress_bound, + .compress = libdeflate_engine_compress, + .destroy_compressor = libdeflate_engine_destroy_compressor, + + .init_decompressor = libdeflate_engine_init_decompressor, + .decompress = libdeflate_engine_decompress, + .destroy_decompressor = libdeflate_engine_destroy_decompressor, +}; + +/******************************************************************************/ + +static int +get_libz_window_bits(enum format format) +{ + const int windowBits = 15; + switch (format) { + case ZLIB_FORMAT: + return windowBits; + case GZIP_FORMAT: + return windowBits + 16; + default: + return -windowBits; + } +} + +static bool +libz_engine_init_compressor(struct compressor *c) +{ + z_stream *z; + + if (c->level > 9) { + msg("libz only supports up to compression level 9"); + return false; + } + + z = xmalloc(sizeof(*z)); + if (z == NULL) + return false; + + z->next_in = NULL; + z->avail_in = 0; + z->zalloc = NULL; + z->zfree = NULL; + z->opaque = NULL; + if (deflateInit2(z, c->level, Z_DEFLATED, + get_libz_window_bits(c->format), + 8, Z_DEFAULT_STRATEGY) != Z_OK) + { + msg("unable to initialize deflater"); + free(z); + return false; + } + + c->private = z; + return true; +} + +static size_t +libz_engine_compress_bound(struct compressor *c, size_t in_nbytes) +{ + return deflateBound(c->private, in_nbytes); +} + +static size_t +libz_engine_compress(struct compressor *c, const void *in, size_t in_nbytes, + void *out, size_t out_nbytes_avail) +{ + z_stream *z = c->private; + + deflateReset(z); + + z->next_in = (void *)in; + z->avail_in = in_nbytes; + z->next_out = out; + z->avail_out = out_nbytes_avail; + + if (deflate(z, Z_FINISH) != Z_STREAM_END) + return 0; + + return out_nbytes_avail - z->avail_out; +} + +static void +libz_engine_destroy_compressor(struct compressor *c) +{ + z_stream *z = c->private; + + deflateEnd(z); + free(z); +} + +static bool +libz_engine_init_decompressor(struct decompressor *d) +{ + z_stream *z; + + z = xmalloc(sizeof(*z)); + if (z == NULL) + return false; + + z->next_in = NULL; + z->avail_in = 0; + z->zalloc = NULL; + z->zfree = NULL; + z->opaque = NULL; + if (inflateInit2(z, get_libz_window_bits(d->format)) != Z_OK) { + msg("unable to initialize inflater"); + free(z); + return false; + } + + d->private = z; + return true; +} + +static bool +libz_engine_decompress(struct decompressor *d, const void *in, size_t in_nbytes, + void *out, size_t out_nbytes) +{ + z_stream *z = d->private; + + inflateReset(z); + + z->next_in = (void *)in; + z->avail_in = in_nbytes; + z->next_out = out; + z->avail_out = out_nbytes; + + return inflate(z, Z_FINISH) == Z_STREAM_END && z->avail_out == 0; +} + +static void +libz_engine_destroy_decompressor(struct decompressor *d) +{ + z_stream *z = d->private; + + inflateEnd(z); + free(z); +} + +static const struct engine libz_engine = { + .name = T("libz"), + + .init_compressor = libz_engine_init_compressor, + .compress_bound = libz_engine_compress_bound, + .compress = libz_engine_compress, + .destroy_compressor = libz_engine_destroy_compressor, + + .init_decompressor = libz_engine_init_decompressor, + .decompress = libz_engine_decompress, + .destroy_decompressor = libz_engine_destroy_decompressor, +}; + +/******************************************************************************/ + +static const struct engine * const all_engines[] = { + &libdeflate_engine, + &libz_engine, +}; + +#define DEFAULT_ENGINE libdeflate_engine + +static const struct engine * +name_to_engine(const tchar *name) +{ + size_t i; + + for (i = 0; i < ARRAY_LEN(all_engines); i++) + if (tstrcmp(all_engines[i]->name, name) == 0) + return all_engines[i]; + return NULL; +} + +/******************************************************************************/ + +static bool +compressor_init(struct compressor *c, int level, enum format format, + const struct engine *engine) +{ + c->level = level; + c->format = format; + c->engine = engine; + return engine->init_compressor(c); +} + +static size_t +compress_bound(struct compressor *c, size_t in_nbytes) +{ + return c->engine->compress_bound(c, in_nbytes); +} + +static size_t +do_compress(struct compressor *c, const void *in, size_t in_nbytes, + void *out, size_t out_nbytes_avail) +{ + return c->engine->compress(c, in, in_nbytes, out, out_nbytes_avail); +} + +static void +compressor_destroy(struct compressor *c) +{ + if (c->engine != NULL) + c->engine->destroy_compressor(c); +} + +static bool +decompressor_init(struct decompressor *d, enum format format, + const struct engine *engine) +{ + d->format = format; + d->engine = engine; + return engine->init_decompressor(d); +} + +static bool +do_decompress(struct decompressor *d, const void *in, size_t in_nbytes, + void *out, size_t out_nbytes) +{ + return d->engine->decompress(d, in, in_nbytes, out, out_nbytes); +} + +static void +decompressor_destroy(struct decompressor *d) +{ + if (d->engine != NULL) + d->engine->destroy_decompressor(d); +} + +/******************************************************************************/ + +static void +show_available_engines(FILE *fp) +{ + size_t i; + + fprintf(fp, "Available ENGINEs are: "); + for (i = 0; i < ARRAY_LEN(all_engines); i++) { + fprintf(fp, "%"TS, all_engines[i]->name); + if (i < ARRAY_LEN(all_engines) - 1) + fprintf(fp, ", "); + } + fprintf(fp, ". Default is %"TS"\n", DEFAULT_ENGINE.name); +} + +static void +show_usage(FILE *fp) +{ + fprintf(fp, +"Usage: %"TS" [-LVL] [-C ENGINE] [-D ENGINE] [-ghVz] [-s SIZE] [FILE]...\n" +"Benchmark DEFLATE compression and decompression on the specified FILEs.\n" +"\n" +"Options:\n" +" -0 no compression\n" +" -1 fastest (worst) compression\n" +" -6 medium compression (default)\n" +" -12 slowest (best) compression\n" +" -C ENGINE compression engine\n" +" -D ENGINE decompression engine\n" +" -e allow chunks to be expanded (implied by -0)\n" +" -g use gzip format instead of raw DEFLATE\n" +" -h print this help\n" +" -s SIZE chunk size\n" +" -V show version and legal information\n" +" -z use zlib format instead of raw DEFLATE\n" +"\n", prog_invocation_name); + + show_available_engines(fp); +} + +static void +show_version(void) +{ + printf( +"libdeflate compression benchmark program v" LIBDEFLATE_VERSION_STRING "\n" +"Copyright 2016 Eric Biggers\n" +"\n" +"This program is free software which may be modified and/or redistributed\n" +"under the terms of the MIT license. There is NO WARRANTY, to the extent\n" +"permitted by law. See the COPYING file for details.\n" + ); +} + + +/******************************************************************************/ + +static int +do_benchmark(struct file_stream *in, void *original_buf, void *compressed_buf, + void *decompressed_buf, u32 chunk_size, + bool allow_expansion, size_t compressed_buf_size, + struct compressor *compressor, + struct decompressor *decompressor) +{ + u64 total_uncompressed_size = 0; + u64 total_compressed_size = 0; + u64 total_compress_time = 0; + u64 total_decompress_time = 0; + ssize_t ret; + + while ((ret = xread(in, original_buf, chunk_size)) > 0) { + u32 original_size = ret; + size_t out_nbytes_avail; + u32 compressed_size; + u64 start_time; + bool ok; + + total_uncompressed_size += original_size; + + if (allow_expansion) { + out_nbytes_avail = compress_bound(compressor, + original_size); + if (out_nbytes_avail > compressed_buf_size) { + msg("%"TS": bug in compress_bound()", in->name); + return -1; + } + } else { + out_nbytes_avail = original_size - 1; + } + + /* Compress the chunk of data. */ + start_time = timer_ticks(); + compressed_size = do_compress(compressor, + original_buf, + original_size, + compressed_buf, + out_nbytes_avail); + total_compress_time += timer_ticks() - start_time; + + if (compressed_size) { + /* Successfully compressed the chunk of data. */ + + /* Decompress the data we just compressed and compare + * the result with the original. */ + start_time = timer_ticks(); + ok = do_decompress(decompressor, + compressed_buf, compressed_size, + decompressed_buf, original_size); + total_decompress_time += timer_ticks() - start_time; + + if (!ok) { + msg("%"TS": failed to decompress data", + in->name); + return -1; + } + + if (memcmp(original_buf, decompressed_buf, + original_size) != 0) + { + msg("%"TS": data did not decompress to " + "original", in->name); + return -1; + } + + total_compressed_size += compressed_size; + } else { + /* + * The chunk would have compressed to more than + * out_nbytes_avail bytes. + */ + if (allow_expansion) { + msg("%"TS": bug in compress_bound()", in->name); + return -1; + } + total_compressed_size += original_size; + } + } + + if (ret < 0) + return ret; + + if (total_uncompressed_size == 0) { + printf("\tFile was empty.\n"); + return 0; + } + + if (total_compress_time == 0) + total_compress_time = 1; + if (total_decompress_time == 0) + total_decompress_time = 1; + + printf("\tCompressed %"PRIu64 " => %"PRIu64" bytes (%u.%03u%%)\n", + total_uncompressed_size, total_compressed_size, + (unsigned int)(total_compressed_size * 100 / + total_uncompressed_size), + (unsigned int)(total_compressed_size * 100000 / + total_uncompressed_size % 1000)); + printf("\tCompression time: %"PRIu64" ms (%"PRIu64" MB/s)\n", + timer_ticks_to_ms(total_compress_time), + timer_MB_per_s(total_uncompressed_size, total_compress_time)); + printf("\tDecompression time: %"PRIu64" ms (%"PRIu64" MB/s)\n", + timer_ticks_to_ms(total_decompress_time), + timer_MB_per_s(total_uncompressed_size, total_decompress_time)); + + return 0; +} + +int +tmain(int argc, tchar *argv[]) +{ + u32 chunk_size = 1048576; + int level = 6; + enum format format = DEFLATE_FORMAT; + const struct engine *compress_engine = &DEFAULT_ENGINE; + const struct engine *decompress_engine = &DEFAULT_ENGINE; + bool allow_expansion = false; + struct compressor compressor = { 0 }; + struct decompressor decompressor = { 0 }; + size_t compressed_buf_size; + void *original_buf = NULL; + void *compressed_buf = NULL; + void *decompressed_buf = NULL; + tchar *default_file_list[] = { NULL }; + int opt_char; + int i; + int ret; + + begin_program(argv); + + while ((opt_char = tgetopt(argc, argv, optstring)) != -1) { + switch (opt_char) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + level = parse_compression_level(opt_char, toptarg); + if (level < 0) + return 1; + break; + case 'C': + compress_engine = name_to_engine(toptarg); + if (compress_engine == NULL) { + msg("invalid compression engine: \"%"TS"\"", toptarg); + show_available_engines(stderr); + return 1; + } + break; + case 'D': + decompress_engine = name_to_engine(toptarg); + if (decompress_engine == NULL) { + msg("invalid decompression engine: \"%"TS"\"", toptarg); + show_available_engines(stderr); + return 1; + } + break; + case 'e': + allow_expansion = true; + break; + case 'g': + format = GZIP_FORMAT; + break; + case 'h': + show_usage(stdout); + return 0; + case 's': + chunk_size = tstrtoul(toptarg, NULL, 10); + if (chunk_size == 0) { + msg("invalid chunk size: \"%"TS"\"", toptarg); + return 1; + } + break; + case 'V': + show_version(); + return 0; + case 'Y': /* deprecated, use '-C libz' instead */ + compress_engine = &libz_engine; + break; + case 'Z': /* deprecated, use '-D libz' instead */ + decompress_engine = &libz_engine; + break; + case 'z': + format = ZLIB_FORMAT; + break; + default: + show_usage(stderr); + return 1; + } + } + + argc -= toptind; + argv += toptind; + + if (level == 0) + allow_expansion = true; + + ret = -1; + if (!compressor_init(&compressor, level, format, compress_engine)) + goto out; + if (!decompressor_init(&decompressor, format, decompress_engine)) + goto out; + + if (allow_expansion) + compressed_buf_size = compress_bound(&compressor, chunk_size); + else + compressed_buf_size = chunk_size - 1; + + original_buf = xmalloc(chunk_size); + compressed_buf = xmalloc(compressed_buf_size); + decompressed_buf = xmalloc(chunk_size); + + ret = -1; + if (original_buf == NULL || compressed_buf == NULL || + decompressed_buf == NULL) + goto out; + + if (argc == 0) { + argv = default_file_list; + argc = ARRAY_LEN(default_file_list); + } else { + for (i = 0; i < argc; i++) + if (argv[i][0] == '-' && argv[i][1] == '\0') + argv[i] = NULL; + } + + printf("Benchmarking %s compression:\n", + format == DEFLATE_FORMAT ? "DEFLATE" : + format == ZLIB_FORMAT ? "zlib" : "gzip"); + printf("\tCompression level: %d\n", level); + printf("\tChunk size: %"PRIu32"\n", chunk_size); + printf("\tCompression engine: %"TS"\n", compress_engine->name); + printf("\tDecompression engine: %"TS"\n", decompress_engine->name); + + for (i = 0; i < argc; i++) { + struct file_stream in; + + ret = xopen_for_read(argv[i], true, &in); + if (ret != 0) + goto out; + + printf("Processing %"TS"...\n", in.name); + + ret = do_benchmark(&in, original_buf, compressed_buf, + decompressed_buf, chunk_size, + allow_expansion, compressed_buf_size, + &compressor, &decompressor); + xclose(&in); + if (ret != 0) + goto out; + } + ret = 0; +out: + free(decompressed_buf); + free(compressed_buf); + free(original_buf); + decompressor_destroy(&decompressor); + compressor_destroy(&compressor); + return -ret; +} diff --git a/tools/z64compress/src/enc/libdeflate/programs/checksum.c b/tools/z64compress/src/enc/libdeflate/programs/checksum.c new file mode 100644 index 000000000..68cd43c91 --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/programs/checksum.c @@ -0,0 +1,218 @@ +/* + * checksum.c - Adler-32 and CRC-32 checksumming program + * + * Copyright 2016 Eric Biggers + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "test_util.h" + +static const tchar *const optstring = T("Ahm:s:tZ"); + +static void +show_usage(FILE *fp) +{ + fprintf(fp, +"Usage: %"TS" [-A] [-h] [-m ALIGN] [-s SIZE] [-t] [-Z] [FILE]...\n" +"Calculate Adler-32 or CRC-32 checksums of the specified FILEs.\n" +"\n" +"Options:\n" +" -A use Adler-32 (default is CRC-32)\n" +" -h print this help\n" +" -m ALIGN misalign the buffer by ALIGN bytes\n" +" -s SIZE chunk size in bytes\n" +" -t show checksum speed, excluding I/O\n" +" -Z use zlib implementation instead of libdeflate\n", + prog_invocation_name); +} + +typedef u32 (*cksum_fn_t)(u32, const void *, size_t); + +static u32 +adler32_libdeflate(u32 adler, const void *buf, size_t len) +{ + return libdeflate_adler32(adler, buf, len); +} + +static u32 +crc32_libdeflate(u32 crc, const void *buf, size_t len) +{ + return libdeflate_crc32(crc, buf, len); +} + +static u32 +adler32_zlib(u32 adler, const void *buf, size_t len) +{ + return adler32(adler, buf, len); +} + +static u32 +crc32_zlib(u32 crc, const void *buf, size_t len) +{ + return crc32(crc, buf, len); +} + +static int +checksum_stream(struct file_stream *in, cksum_fn_t cksum, u32 *sum, + void *buf, size_t bufsize, u64 *size_ret, u64 *elapsed_ret) +{ + u64 size = 0; + u64 elapsed = 0; + + for (;;) { + ssize_t ret; + u64 start_time; + + ret = xread(in, buf, bufsize); + if (ret < 0) + return ret; + if (ret == 0) + break; + + size += ret; + start_time = timer_ticks(); + *sum = cksum(*sum, buf, ret); + elapsed += timer_ticks() - start_time; + } + + if (elapsed == 0) + elapsed = 1; + *size_ret = size; + *elapsed_ret = elapsed; + return 0; +} + +int +tmain(int argc, tchar *argv[]) +{ + bool use_adler32 = false; + bool use_zlib_impl = false; + bool do_timing = false; + void *orig_buf = NULL; + void *buf; + size_t misalignment = 0; + size_t bufsize = 131072; + tchar *default_file_list[] = { NULL }; + cksum_fn_t cksum; + int opt_char; + int i; + int ret; + + begin_program(argv); + + while ((opt_char = tgetopt(argc, argv, optstring)) != -1) { + switch (opt_char) { + case 'A': + use_adler32 = true; + break; + case 'h': + show_usage(stdout); + return 0; + case 'm': + misalignment = tstrtoul(toptarg, NULL, 10); + if (misalignment >= 4096) { + msg("invalid misalignment: \"%"TS"\"", toptarg); + return 1; + } + break; + case 's': + bufsize = tstrtoul(toptarg, NULL, 10); + if (bufsize == 0 || bufsize > SIZE_MAX / 2) { + msg("invalid chunk size: \"%"TS"\"", toptarg); + return 1; + } + break; + case 't': + do_timing = true; + break; + case 'Z': + use_zlib_impl = true; + break; + default: + show_usage(stderr); + return 1; + } + } + + argc -= toptind; + argv += toptind; + + if (use_adler32) { + if (use_zlib_impl) + cksum = adler32_zlib; + else + cksum = adler32_libdeflate; + } else { + if (use_zlib_impl) + cksum = crc32_zlib; + else + cksum = crc32_libdeflate; + } + + orig_buf = xmalloc(bufsize + 4096 + misalignment); + if (orig_buf == NULL) + return 1; + buf = (u8 *)orig_buf + (-(uintptr_t)orig_buf % 4096) + misalignment; + + if (argc == 0) { + argv = default_file_list; + argc = ARRAY_LEN(default_file_list); + } else { + for (i = 0; i < argc; i++) + if (argv[i][0] == '-' && argv[i][1] == '\0') + argv[i] = NULL; + } + + for (i = 0; i < argc; i++) { + struct file_stream in; + u32 sum = cksum(0, NULL, 0); + u64 size = 0; + u64 elapsed = 0; + + ret = xopen_for_read(argv[i], true, &in); + if (ret != 0) + goto out; + + ret = checksum_stream(&in, cksum, &sum, buf, bufsize, + &size, &elapsed); + if (ret == 0) { + if (do_timing) { + printf("%08"PRIx32"\t%"TS"\t" + "%"PRIu64" ms\t%"PRIu64" MB/s\n", + sum, in.name, timer_ticks_to_ms(elapsed), + timer_MB_per_s(size, elapsed)); + } else { + printf("%08"PRIx32"\t%"TS"\t\n", sum, in.name); + } + } + + xclose(&in); + + if (ret != 0) + goto out; + } + ret = 0; +out: + free(orig_buf); + return -ret; +} diff --git a/tools/z64compress/src/enc/libdeflate/programs/config.h.in b/tools/z64compress/src/enc/libdeflate/programs/config.h.in new file mode 100644 index 000000000..588aa8dca --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/programs/config.h.in @@ -0,0 +1,22 @@ +#ifndef CONFIG_H +#define CONFIG_H + +/* Is the clock_gettime() function available? */ +#cmakedefine HAVE_CLOCK_GETTIME + +/* Is the futimens() function available? */ +#cmakedefine HAVE_FUTIMENS + +/* Is the futimes() function available? */ +#cmakedefine HAVE_FUTIMES + +/* Is the posix_fadvise() function available? */ +#cmakedefine HAVE_POSIX_FADVISE + +/* Is the posix_madvise() function available? */ +#cmakedefine HAVE_POSIX_MADVISE + +/* Does stat() provide nanosecond-precision timestamps? */ +#cmakedefine HAVE_STAT_NANOSECOND_PRECISION + +#endif /* CONFIG_H */ diff --git a/tools/z64compress/src/enc/libdeflate/programs/gzip.c b/tools/z64compress/src/enc/libdeflate/programs/gzip.c new file mode 100644 index 000000000..c13474af5 --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/programs/gzip.c @@ -0,0 +1,701 @@ +/* + * gzip.c - a file compression and decompression program + * + * Copyright 2016 Eric Biggers + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifdef __sun +# define __EXTENSIONS__ /* for futimens() */ +#endif + +#include "prog_util.h" + +#include +#include +#include +#ifdef _WIN32 +# include +#else +# include +# include +# include +#endif + +#define GZIP_MIN_HEADER_SIZE 10 +#define GZIP_FOOTER_SIZE 8 +#define GZIP_MIN_OVERHEAD (GZIP_MIN_HEADER_SIZE + GZIP_FOOTER_SIZE) +#define GZIP_ID1 0x1F +#define GZIP_ID2 0x8B + +struct options { + bool to_stdout; + bool decompress; + bool force; + bool keep; + bool test; + int compression_level; + const tchar *suffix; +}; + +static const tchar *const optstring = T("1::2::3::4::5::6::7::8::9::cdfhknqS:tV"); + +static void +show_usage(FILE *fp) +{ + fprintf(fp, +"Usage: %"TS" [-LEVEL] [-cdfhkqtV] [-S SUF] FILE...\n" +"Compress or decompress the specified FILEs.\n" +"\n" +"Options:\n" +" -1 fastest (worst) compression\n" +" -6 medium compression (default)\n" +" -12 slowest (best) compression\n" +" -c write to standard output\n" +" -d decompress\n" +" -f overwrite existing output files; (de)compress hard-linked files;\n" +" allow reading/writing compressed data from/to terminal;\n" +" with gunzip -c, pass through non-gzipped data\n" +" -h print this help\n" +" -k don't delete input files\n" +" -q suppress warnings\n" +" -S SUF use suffix SUF instead of .gz\n" +" -t test file integrity\n" +" -V show version and legal information\n", + prog_invocation_name); +} + +static void +show_version(void) +{ + printf( +"gzip compression program v" LIBDEFLATE_VERSION_STRING "\n" +"Copyright 2016 Eric Biggers\n" +"\n" +"This program is free software which may be modified and/or redistributed\n" +"under the terms of the MIT license. There is NO WARRANTY, to the extent\n" +"permitted by law. See the COPYING file for details.\n" + ); +} + +/* Was the program invoked in decompression mode? */ +static bool +is_gunzip(void) +{ + if (tstrxcmp(prog_invocation_name, T("gunzip")) == 0) + return true; + if (tstrxcmp(prog_invocation_name, T("libdeflate-gunzip")) == 0) + return true; +#ifdef _WIN32 + if (tstrxcmp(prog_invocation_name, T("gunzip.exe")) == 0) + return true; + if (tstrxcmp(prog_invocation_name, T("libdeflate-gunzip.exe")) == 0) + return true; +#endif + return false; +} + +static const tchar * +get_suffix(const tchar *path, const tchar *suffix) +{ + size_t path_len = tstrlen(path); + size_t suffix_len = tstrlen(suffix); + const tchar *p; + + if (path_len <= suffix_len) + return NULL; + p = &path[path_len - suffix_len]; + if (tstrxcmp(p, suffix) == 0) + return p; + return NULL; +} + +static bool +has_suffix(const tchar *path, const tchar *suffix) +{ + return get_suffix(path, suffix) != NULL; +} + +static tchar * +append_suffix(const tchar *path, const tchar *suffix) +{ + size_t path_len = tstrlen(path); + size_t suffix_len = tstrlen(suffix); + tchar *suffixed_path; + + suffixed_path = xmalloc((path_len + suffix_len + 1) * sizeof(tchar)); + if (suffixed_path == NULL) + return NULL; + tmemcpy(suffixed_path, path, path_len); + tmemcpy(&suffixed_path[path_len], suffix, suffix_len + 1); + return suffixed_path; +} + +static int +do_compress(struct libdeflate_compressor *compressor, + struct file_stream *in, struct file_stream *out) +{ + const void *uncompressed_data = in->mmap_mem; + size_t uncompressed_size = in->mmap_size; + void *compressed_data; + size_t actual_compressed_size; + size_t max_compressed_size; + int ret; + + max_compressed_size = libdeflate_gzip_compress_bound(compressor, + uncompressed_size); + compressed_data = xmalloc(max_compressed_size); + if (compressed_data == NULL) { + msg("%"TS": file is probably too large to be processed by this " + "program", in->name); + ret = -1; + goto out; + } + + actual_compressed_size = libdeflate_gzip_compress(compressor, + uncompressed_data, + uncompressed_size, + compressed_data, + max_compressed_size); + if (actual_compressed_size == 0) { + msg("Bug in libdeflate_gzip_compress_bound()!"); + ret = -1; + goto out; + } + + ret = full_write(out, compressed_data, actual_compressed_size); +out: + free(compressed_data); + return ret; +} + +static int +do_decompress(struct libdeflate_decompressor *decompressor, + struct file_stream *in, struct file_stream *out, + const struct options *options) +{ + const u8 *compressed_data = in->mmap_mem; + size_t compressed_size = in->mmap_size; + void *uncompressed_data = NULL; + size_t uncompressed_size; + size_t max_uncompressed_size; + size_t actual_in_nbytes; + size_t actual_out_nbytes; + enum libdeflate_result result; + int ret = 0; + + if (compressed_size < GZIP_MIN_OVERHEAD || + compressed_data[0] != GZIP_ID1 || + compressed_data[1] != GZIP_ID2) { + if (options->force && options->to_stdout) + return full_write(out, compressed_data, compressed_size); + msg("%"TS": not in gzip format", in->name); + return -1; + } + + /* + * Use the ISIZE field as a hint for the decompressed data size. It may + * need to be increased later, however, because the file may contain + * multiple gzip members and the particular ISIZE we happen to use may + * not be the largest; or the real size may be >= 4 GiB, causing ISIZE + * to overflow. In any case, make sure to allocate at least one byte. + */ + uncompressed_size = + get_unaligned_le32(&compressed_data[compressed_size - 4]); + if (uncompressed_size == 0) + uncompressed_size = 1; + + /* + * DEFLATE cannot expand data more than 1032x, so there's no need to + * ever allocate a buffer more than 1032 times larger than the + * compressed data. This is a fail-safe, albeit not a very good one, if + * ISIZE becomes corrupted on a small file. (The 1032x number comes + * from each 2 bits generating a 258-byte match. This is a hard upper + * bound; the real upper bound is slightly smaller due to overhead.) + */ + if (compressed_size <= SIZE_MAX / 1032) + max_uncompressed_size = compressed_size * 1032; + else + max_uncompressed_size = SIZE_MAX; + + do { + if (uncompressed_data == NULL) { + uncompressed_size = MIN(uncompressed_size, + max_uncompressed_size); + uncompressed_data = xmalloc(uncompressed_size); + if (uncompressed_data == NULL) { + msg("%"TS": file is probably too large to be " + "processed by this program", in->name); + ret = -1; + goto out; + } + } + + result = libdeflate_gzip_decompress_ex(decompressor, + compressed_data, + compressed_size, + uncompressed_data, + uncompressed_size, + &actual_in_nbytes, + &actual_out_nbytes); + + if (result == LIBDEFLATE_INSUFFICIENT_SPACE) { + if (uncompressed_size >= max_uncompressed_size) { + msg("Bug in libdeflate_gzip_decompress_ex(): data expanded too much!"); + ret = -1; + goto out; + } + if (uncompressed_size * 2 <= uncompressed_size) { + msg("%"TS": file corrupt or too large to be " + "processed by this program", in->name); + ret = -1; + goto out; + } + uncompressed_size *= 2; + free(uncompressed_data); + uncompressed_data = NULL; + continue; + } + + if (result != LIBDEFLATE_SUCCESS) { + msg("%"TS": file corrupt or not in gzip format", + in->name); + ret = -1; + goto out; + } + + if (actual_in_nbytes == 0 || + actual_in_nbytes > compressed_size || + actual_out_nbytes > uncompressed_size) { + msg("Bug in libdeflate_gzip_decompress_ex(): impossible actual_nbytes value!"); + ret = -1; + goto out; + } + + if (!options->test) { + ret = full_write(out, uncompressed_data, actual_out_nbytes); + if (ret != 0) + goto out; + } + + compressed_data += actual_in_nbytes; + compressed_size -= actual_in_nbytes; + + } while (compressed_size != 0); +out: + free(uncompressed_data); + return ret; +} + +static int +stat_file(struct file_stream *in, stat_t *stbuf, bool allow_hard_links) +{ + if (tfstat(in->fd, stbuf) != 0) { + msg("%"TS": unable to stat file", in->name); + return -1; + } + + if (!S_ISREG(stbuf->st_mode) && !in->is_standard_stream) { + warn("%"TS" is %s -- skipping", + in->name, S_ISDIR(stbuf->st_mode) ? "a directory" : + "not a regular file"); + return -2; + } + + if (stbuf->st_nlink > 1 && !allow_hard_links) { + warn("%"TS" has multiple hard links -- skipping (use -f to process anyway)", + in->name); + return -2; + } + + return 0; +} + +static void +restore_mode(struct file_stream *out, const stat_t *stbuf) +{ +#ifndef _WIN32 + if (fchmod(out->fd, stbuf->st_mode) != 0) + msg_errno("%"TS": unable to preserve mode", out->name); +#endif +} + +static void +restore_owner_and_group(struct file_stream *out, const stat_t *stbuf) +{ +#ifndef _WIN32 + if (fchown(out->fd, stbuf->st_uid, stbuf->st_gid) != 0) { + msg_errno("%"TS": unable to preserve owner and group", + out->name); + } +#endif +} + +static void +restore_timestamps(struct file_stream *out, const tchar *newpath, + const stat_t *stbuf) +{ + int ret; +#ifdef __APPLE__ + struct timespec times[2] = { + { stbuf->st_atime, stbuf->st_atimensec }, + { stbuf->st_mtime, stbuf->st_mtimensec }, + }; + ret = futimens(out->fd, times); +#elif defined(HAVE_FUTIMENS) && defined(HAVE_STAT_NANOSECOND_PRECISION) + struct timespec times[2] = { + stbuf->st_atim, stbuf->st_mtim, + }; + ret = futimens(out->fd, times); +#elif defined(HAVE_FUTIMES) && defined(HAVE_STAT_NANOSECOND_PRECISION) + struct timeval times[2] = { + { stbuf->st_atim.tv_sec, stbuf->st_atim.tv_nsec / 1000, }, + { stbuf->st_mtim.tv_sec, stbuf->st_mtim.tv_nsec / 1000, }, + }; + ret = futimes(out->fd, times); +#else + struct tutimbuf times = { + stbuf->st_atime, stbuf->st_mtime, + }; + ret = tutime(newpath, ×); +#endif + if (ret != 0) + msg_errno("%"TS": unable to preserve timestamps", out->name); +} + +static void +restore_metadata(struct file_stream *out, const tchar *newpath, + const stat_t *stbuf) +{ + restore_mode(out, stbuf); + restore_owner_and_group(out, stbuf); + restore_timestamps(out, newpath, stbuf); +} + +static int +decompress_file(struct libdeflate_decompressor *decompressor, const tchar *path, + const struct options *options) +{ + tchar *oldpath = (tchar *)path; + tchar *newpath = NULL; + struct file_stream in; + struct file_stream out; + stat_t stbuf; + int ret; + int ret2; + + if (path != NULL) { + const tchar *suffix = get_suffix(path, options->suffix); + if (suffix == NULL) { + /* + * Input file is unsuffixed. If the file doesn't exist, + * then try it suffixed. Otherwise, if we're not + * writing to stdout, skip the file with warning status. + * Otherwise, go ahead and try to open the file anyway + * (which will very likely fail). + */ + if (tstat(path, &stbuf) != 0 && errno == ENOENT) { + oldpath = append_suffix(path, options->suffix); + if (oldpath == NULL) + return -1; + if (!options->to_stdout) + newpath = (tchar *)path; + } else if (!options->to_stdout) { + warn("\"%"TS"\" does not end with the %"TS" suffix -- skipping", + path, options->suffix); + return -2; + } + } else if (!options->to_stdout) { + /* + * Input file is suffixed, and we're not writing to + * stdout. Strip the suffix to get the path to the + * output file. + */ + newpath = xmalloc((suffix - oldpath + 1) * + sizeof(tchar)); + if (newpath == NULL) + return -1; + tmemcpy(newpath, oldpath, suffix - oldpath); + newpath[suffix - oldpath] = '\0'; + } + } + + ret = xopen_for_read(oldpath, options->force || options->to_stdout, + &in); + if (ret != 0) + goto out_free_paths; + + if (!options->force && isatty(in.fd)) { + msg("Refusing to read compressed data from terminal. " + "Use -f to override.\nFor help, use -h."); + ret = -1; + goto out_close_in; + } + + ret = stat_file(&in, &stbuf, options->force || options->keep || + oldpath == NULL || newpath == NULL); + if (ret != 0) + goto out_close_in; + + ret = xopen_for_write(newpath, options->force, &out); + if (ret != 0) + goto out_close_in; + + /* TODO: need a streaming-friendly solution */ + ret = map_file_contents(&in, stbuf.st_size); + if (ret != 0) + goto out_close_out; + + ret = do_decompress(decompressor, &in, &out, options); + if (ret != 0) + goto out_close_out; + + if (oldpath != NULL && newpath != NULL) + restore_metadata(&out, newpath, &stbuf); + ret = 0; +out_close_out: + ret2 = xclose(&out); + if (ret == 0) + ret = ret2; + if (ret != 0 && newpath != NULL) + tunlink(newpath); +out_close_in: + xclose(&in); + if (ret == 0 && oldpath != NULL && newpath != NULL && !options->keep) + tunlink(oldpath); +out_free_paths: + if (newpath != path) + free(newpath); + if (oldpath != path) + free(oldpath); + return ret; +} + +static int +compress_file(struct libdeflate_compressor *compressor, const tchar *path, + const struct options *options) +{ + tchar *newpath = NULL; + struct file_stream in; + struct file_stream out; + stat_t stbuf; + int ret; + int ret2; + + if (path != NULL && !options->to_stdout) { + if (!options->force && has_suffix(path, options->suffix)) { + msg("%"TS": already has %"TS" suffix -- skipping", + path, options->suffix); + return 0; + } + newpath = append_suffix(path, options->suffix); + if (newpath == NULL) + return -1; + } + + ret = xopen_for_read(path, options->force || options->to_stdout, &in); + if (ret != 0) + goto out_free_newpath; + + ret = stat_file(&in, &stbuf, options->force || options->keep || + path == NULL || newpath == NULL); + if (ret != 0) + goto out_close_in; + + ret = xopen_for_write(newpath, options->force, &out); + if (ret != 0) + goto out_close_in; + + if (!options->force && isatty(out.fd)) { + msg("Refusing to write compressed data to terminal. " + "Use -f to override.\nFor help, use -h."); + ret = -1; + goto out_close_out; + } + + /* TODO: need a streaming-friendly solution */ + ret = map_file_contents(&in, stbuf.st_size); + if (ret != 0) + goto out_close_out; + + ret = do_compress(compressor, &in, &out); + if (ret != 0) + goto out_close_out; + + if (path != NULL && newpath != NULL) + restore_metadata(&out, newpath, &stbuf); + ret = 0; +out_close_out: + ret2 = xclose(&out); + if (ret == 0) + ret = ret2; + if (ret != 0 && newpath != NULL) + tunlink(newpath); +out_close_in: + xclose(&in); + if (ret == 0 && path != NULL && newpath != NULL && !options->keep) + tunlink(path); +out_free_newpath: + free(newpath); + return ret; +} + +int +tmain(int argc, tchar *argv[]) +{ + tchar *default_file_list[] = { NULL }; + struct options options; + int opt_char; + int i; + int ret; + + begin_program(argv); + + options.to_stdout = false; + options.decompress = is_gunzip(); + options.force = false; + options.keep = false; + options.test = false; + options.compression_level = 6; + options.suffix = T(".gz"); + + while ((opt_char = tgetopt(argc, argv, optstring)) != -1) { + switch (opt_char) { + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + options.compression_level = + parse_compression_level(opt_char, toptarg); + if (options.compression_level < 0) + return 1; + break; + case 'c': + options.to_stdout = true; + break; + case 'd': + options.decompress = true; + break; + case 'f': + options.force = true; + break; + case 'h': + show_usage(stdout); + return 0; + case 'k': + options.keep = true; + break; + case 'n': + /* + * -n means don't save or restore the original filename + * in the gzip header. Currently this implementation + * already behaves this way by default, so accept the + * option as a no-op. + */ + break; + case 'q': + suppress_warnings = true; + break; + case 'S': + options.suffix = toptarg; + if (options.suffix[0] == T('\0')) { + msg("invalid suffix"); + return 1; + } + break; + case 't': + options.test = true; + options.decompress = true; + options.to_stdout = true; + /* + * -t behaves just like the more commonly used -c + * option, except that -t doesn't actually write + * anything. For ease of implementation, just pretend + * that -c was specified too. + */ + break; + case 'V': + show_version(); + return 0; + default: + show_usage(stderr); + return 1; + } + } + + argv += toptind; + argc -= toptind; + + if (argc == 0) { + argv = default_file_list; + argc = ARRAY_LEN(default_file_list); + } else { + for (i = 0; i < argc; i++) + if (argv[i][0] == '-' && argv[i][1] == '\0') + argv[i] = NULL; + } + + ret = 0; + if (options.decompress) { + struct libdeflate_decompressor *d; + + d = alloc_decompressor(); + if (d == NULL) + return 1; + + for (i = 0; i < argc; i++) + ret |= -decompress_file(d, argv[i], &options); + + libdeflate_free_decompressor(d); + } else { + struct libdeflate_compressor *c; + + c = alloc_compressor(options.compression_level); + if (c == NULL) + return 1; + + for (i = 0; i < argc; i++) + ret |= -compress_file(c, argv[i], &options); + + libdeflate_free_compressor(c); + } + + switch (ret) { + case 0: + /* No warnings or errors */ + return 0; + case 2: + /* At least one warning, but no errors */ + if (suppress_warnings) + return 0; + return 2; + default: + /* At least one error */ + return 1; + } +} diff --git a/tools/z64compress/src/enc/libdeflate/programs/prog_util.c b/tools/z64compress/src/enc/libdeflate/programs/prog_util.c new file mode 100644 index 000000000..a4bf1c47d --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/programs/prog_util.c @@ -0,0 +1,522 @@ +/* + * prog_util.c - utility functions for programs + * + * Copyright 2016 Eric Biggers + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifdef __APPLE__ +/* for O_NOFOLLOW */ +# undef _POSIX_C_SOURCE +# define _DARWIN_C_SOURCE +#endif + +#include "prog_util.h" + +#include +#include +#include +#ifdef _WIN32 +# include +#else +# include +# include +#endif + +#ifndef O_BINARY +# define O_BINARY 0 +#endif +#ifndef O_SEQUENTIAL +# define O_SEQUENTIAL 0 +#endif +#ifndef O_NOFOLLOW +# define O_NOFOLLOW 0 +#endif +#ifndef O_NONBLOCK +# define O_NONBLOCK 0 +#endif +#ifndef O_NOCTTY +# define O_NOCTTY 0 +#endif + +/* The invocation name of the program (filename component only) */ +const tchar *prog_invocation_name; + +/* Whether to suppress warning messages or not */ +bool suppress_warnings; + +static void +do_msg(const char *format, bool with_errno, va_list va) +{ + int saved_errno = errno; + + fprintf(stderr, "%"TS": ", prog_invocation_name); + vfprintf(stderr, format, va); + if (with_errno) + fprintf(stderr, ": %s\n", strerror(saved_errno)); + else + fprintf(stderr, "\n"); + + errno = saved_errno; +} + +/* Print a message to standard error */ +void +msg(const char *format, ...) +{ + va_list va; + + va_start(va, format); + do_msg(format, false, va); + va_end(va); +} + +/* Print a message to standard error, including a description of errno */ +void +msg_errno(const char *format, ...) +{ + va_list va; + + va_start(va, format); + do_msg(format, true, va); + va_end(va); +} + + +/* Same as msg(), but do nothing if 'suppress_warnings' has been set. */ +void +warn(const char *format, ...) +{ + if (!suppress_warnings) { + va_list va; + + va_start(va, format); + do_msg(format, false, va); + va_end(va); + } +} + +/* malloc() wrapper */ +void * +xmalloc(size_t size) +{ + void *p = malloc(size); + if (p == NULL && size == 0) + p = malloc(1); + if (p == NULL) + msg("Out of memory"); + return p; +} + +/* + * Retrieve a pointer to the filename component of the specified path. + * + * Note: this does not modify the path. Therefore, it is not guaranteed to work + * properly for directories, since a path to a directory might have trailing + * slashes. + */ +static const tchar * +get_filename(const tchar *path) +{ + const tchar *slash = tstrrchr(path, '/'); +#ifdef _WIN32 + const tchar *backslash = tstrrchr(path, '\\'); + if (backslash != NULL && (slash == NULL || backslash > slash)) + slash = backslash; +#endif + if (slash != NULL) + return slash + 1; + return path; +} + +void +begin_program(tchar *argv[]) +{ + prog_invocation_name = get_filename(argv[0]); + +#ifdef FREESTANDING + /* This allows testing freestanding library builds. */ + libdeflate_set_memory_allocator(malloc, free); +#endif +} + +/* Create a copy of 'path' surrounded by double quotes */ +static tchar * +quote_path(const tchar *path) +{ + size_t len = tstrlen(path); + tchar *result; + + result = xmalloc((1 + len + 1 + 1) * sizeof(tchar)); + if (result == NULL) + return NULL; + result[0] = '"'; + tmemcpy(&result[1], path, len); + result[1 + len] = '"'; + result[1 + len + 1] = '\0'; + return result; +} + +/* Open a file for reading, or set up standard input for reading */ +int +xopen_for_read(const tchar *path, bool symlink_ok, struct file_stream *strm) +{ + strm->mmap_token = NULL; + strm->mmap_mem = NULL; + + if (path == NULL) { + strm->is_standard_stream = true; + strm->name = T("standard input"); + strm->fd = STDIN_FILENO; + #ifdef _WIN32 + _setmode(strm->fd, O_BINARY); + #endif + return 0; + } + + strm->is_standard_stream = false; + + strm->name = quote_path(path); + if (strm->name == NULL) + return -1; + + strm->fd = topen(path, O_RDONLY | O_BINARY | O_NONBLOCK | O_NOCTTY | + (symlink_ok ? 0 : O_NOFOLLOW) | O_SEQUENTIAL); + if (strm->fd < 0) { + msg_errno("Can't open %"TS" for reading", strm->name); + free(strm->name); + return -1; + } + +#if defined(HAVE_POSIX_FADVISE) && (O_SEQUENTIAL == 0) + (void)posix_fadvise(strm->fd, 0, 0, POSIX_FADV_SEQUENTIAL); +#endif + + return 0; +} + +/* Open a file for writing, or set up standard output for writing */ +int +xopen_for_write(const tchar *path, bool overwrite, struct file_stream *strm) +{ + int ret = -1; + + strm->mmap_token = NULL; + strm->mmap_mem = NULL; + + if (path == NULL) { + strm->is_standard_stream = true; + strm->name = T("standard output"); + strm->fd = STDOUT_FILENO; + #ifdef _WIN32 + _setmode(strm->fd, O_BINARY); + #endif + return 0; + } + + strm->is_standard_stream = false; + + strm->name = quote_path(path); + if (strm->name == NULL) + goto err; +retry: + strm->fd = topen(path, O_WRONLY | O_BINARY | O_NOFOLLOW | + O_CREAT | O_EXCL, 0644); + if (strm->fd < 0) { + if (errno != EEXIST) { + msg_errno("Can't open %"TS" for writing", strm->name); + goto err; + } + if (!overwrite) { + if (!isatty(STDERR_FILENO) || !isatty(STDIN_FILENO)) { + warn("%"TS" already exists; use -f to overwrite", + strm->name); + ret = -2; /* warning only */ + goto err; + } + fprintf(stderr, "%"TS": %"TS" already exists; " + "overwrite? (y/n) ", + prog_invocation_name, strm->name); + if (getchar() != 'y') { + msg("Not overwriting."); + goto err; + } + } + if (tunlink(path) != 0) { + msg_errno("Unable to delete %"TS, strm->name); + goto err; + } + goto retry; + } + + return 0; + +err: + free(strm->name); + return ret; +} + +/* Read the full contents of a file into memory */ +static int +read_full_contents(struct file_stream *strm) +{ + size_t filled = 0; + size_t capacity = 4096; + char *buf; + int ret; + + buf = xmalloc(capacity); + if (buf == NULL) + return -1; + do { + if (filled == capacity) { + char *newbuf; + + if (capacity == SIZE_MAX) + goto oom; + capacity += MIN(SIZE_MAX - capacity, capacity); + newbuf = realloc(buf, capacity); + if (newbuf == NULL) + goto oom; + buf = newbuf; + } + ret = xread(strm, &buf[filled], capacity - filled); + if (ret < 0) + goto err; + filled += ret; + } while (ret != 0); + + strm->mmap_mem = buf; + strm->mmap_size = filled; + return 0; + +err: + free(buf); + return ret; +oom: + msg("Out of memory! %"TS" is too large to be processed by " + "this program as currently implemented.", strm->name); + ret = -1; + goto err; +} + +/* Map the contents of a file into memory */ +int +map_file_contents(struct file_stream *strm, u64 size) +{ + if (size == 0) /* mmap isn't supported on empty files */ + return read_full_contents(strm); + + if (size > SIZE_MAX) { + msg("%"TS" is too large to be processed by this program", + strm->name); + return -1; + } +#ifdef _WIN32 + strm->mmap_token = CreateFileMapping( + (HANDLE)(intptr_t)_get_osfhandle(strm->fd), + NULL, PAGE_READONLY, 0, 0, NULL); + if (strm->mmap_token == NULL) { + DWORD err = GetLastError(); + if (err == ERROR_BAD_EXE_FORMAT) /* mmap unsupported */ + return read_full_contents(strm); + msg("Unable create file mapping for %"TS": Windows error %u", + strm->name, (unsigned int)err); + return -1; + } + + strm->mmap_mem = MapViewOfFile((HANDLE)strm->mmap_token, + FILE_MAP_READ, 0, 0, size); + if (strm->mmap_mem == NULL) { + msg("Unable to map %"TS" into memory: Windows error %u", + strm->name, (unsigned int)GetLastError()); + CloseHandle((HANDLE)strm->mmap_token); + return -1; + } +#else /* _WIN32 */ + strm->mmap_mem = mmap(NULL, size, PROT_READ, MAP_SHARED, strm->fd, 0); + if (strm->mmap_mem == MAP_FAILED) { + strm->mmap_mem = NULL; + if (errno == ENODEV /* standard */ || + errno == EINVAL /* macOS */) { + /* mmap isn't supported on this file */ + return read_full_contents(strm); + } + if (errno == ENOMEM) { + msg("%"TS" is too large to be processed by this " + "program", strm->name); + } else { + msg_errno("Unable to map %"TS" into memory", + strm->name); + } + return -1; + } + +#ifdef HAVE_POSIX_MADVISE + (void)posix_madvise(strm->mmap_mem, size, POSIX_MADV_SEQUENTIAL); +#endif + strm->mmap_token = strm; /* anything that's not NULL */ + +#endif /* !_WIN32 */ + strm->mmap_size = size; + return 0; +} + +/* + * Read from a file, returning the full count to indicate all bytes were read, a + * short count (possibly 0) to indicate EOF, or -1 to indicate error. + */ +ssize_t +xread(struct file_stream *strm, void *buf, size_t count) +{ + char *p = buf; + size_t orig_count = count; + + while (count != 0) { + ssize_t res = read(strm->fd, p, MIN(count, INT_MAX)); + if (res == 0) + break; + if (res < 0) { + if (errno == EAGAIN || errno == EINTR) + continue; + msg_errno("Error reading from %"TS, strm->name); + return -1; + } + p += res; + count -= res; + } + return orig_count - count; +} + +/* Write to a file, returning 0 if all bytes were written or -1 on error */ +int +full_write(struct file_stream *strm, const void *buf, size_t count) +{ + const char *p = buf; + + while (count != 0) { + ssize_t res = write(strm->fd, p, MIN(count, INT_MAX)); + if (res <= 0) { + msg_errno("Error writing to %"TS, strm->name); + return -1; + } + p += res; + count -= res; + } + return 0; +} + +/* Close a file, returning 0 on success or -1 on error */ +int +xclose(struct file_stream *strm) +{ + int ret = 0; + + if (!strm->is_standard_stream) { + if (close(strm->fd) != 0) { + msg_errno("Error closing %"TS, strm->name); + ret = -1; + } + free(strm->name); + } + + if (strm->mmap_token != NULL) { +#ifdef _WIN32 + UnmapViewOfFile(strm->mmap_mem); + CloseHandle((HANDLE)strm->mmap_token); +#else + munmap(strm->mmap_mem, strm->mmap_size); +#endif + strm->mmap_token = NULL; + } else { + free(strm->mmap_mem); + } + strm->mmap_mem = NULL; + strm->fd = -1; + strm->name = NULL; + return ret; +} + +/* + * Parse the compression level given on the command line, returning the + * compression level on success or -1 on error + */ +int +parse_compression_level(tchar opt_char, const tchar *arg) +{ + int level; + + if (arg == NULL) + arg = T(""); + + if (opt_char < '0' || opt_char > '9') + goto invalid; + level = opt_char - '0'; + + if (arg[0] != '\0') { + if (arg[0] < '0' || arg[0] > '9') + goto invalid; + if (arg[1] != '\0') /* Levels are at most 2 digits */ + goto invalid; + if (level == 0) /* Don't allow arguments like "-01" */ + goto invalid; + level = (level * 10) + (arg[0] - '0'); + } + + if (level < 0 || level > 12) + goto invalid; + + return level; + +invalid: + msg("Invalid compression level: \"%"TC"%"TS"\". " + "Must be an integer in the range [0, 12].", opt_char, arg); + return -1; +} + +/* Allocate a new DEFLATE compressor */ +struct libdeflate_compressor * +alloc_compressor(int level) +{ + struct libdeflate_compressor *c; + + c = libdeflate_alloc_compressor(level); + if (c == NULL) { + msg_errno("Unable to allocate compressor with " + "compression level %d", level); + } + return c; +} + +/* Allocate a new DEFLATE decompressor */ +struct libdeflate_decompressor * +alloc_decompressor(void) +{ + struct libdeflate_decompressor *d; + + d = libdeflate_alloc_decompressor(); + if (d == NULL) + msg_errno("Unable to allocate decompressor"); + + return d; +} diff --git a/tools/z64compress/src/enc/libdeflate/programs/prog_util.h b/tools/z64compress/src/enc/libdeflate/programs/prog_util.h new file mode 100644 index 000000000..08f538399 --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/programs/prog_util.h @@ -0,0 +1,177 @@ +/* + * prog_util.h - utility functions for programs + * + * Copyright 2016 Eric Biggers + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef PROGRAMS_PROG_UTIL_H +#define PROGRAMS_PROG_UTIL_H + +/* + * To keep the code similar on all platforms, sometimes we intentionally use the + * "deprecated" non-underscore-prefixed variants of functions in msvcrt. + */ +#if defined(_WIN32) && !defined(_CRT_NONSTDC_NO_DEPRECATE) +# define _CRT_NONSTDC_NO_DEPRECATE 1 +#endif +/* + * Similarly, to match other platforms we intentionally use the "non-secure" + * variants, which aren't actually any less secure when used properly. + */ +#if defined(_WIN32) && !defined(_CRT_SECURE_NO_WARNINGS) +# define _CRT_SECURE_NO_WARNINGS 1 +#endif + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "libdeflate.h" + +#include +#include +#include +#include +#include +#ifndef _WIN32 +# include +#endif + +#include "../common_defs.h" + +#if defined(__GNUC__) || __has_attribute(format) +# define _printf(str_idx, args_idx) \ + __attribute__((format(printf, str_idx, args_idx))) +#else +# define _printf(str_idx, args_idx) +#endif + +#ifdef _WIN32 + +/* + * Definitions for Windows builds. Mainly, 'tchar' is defined to be the 2-byte + * 'wchar_t' type instead of 'char'. This is the only "easy" way I know of to + * get full Unicode support on Windows... + */ + +#include +#include +int wmain(int argc, wchar_t **argv); +# define tmain wmain +# define tchar wchar_t +# define _T(text) L##text +# define T(text) _T(text) +# define TS "ls" +# define TC "lc" +# define tmemcpy wmemcpy +# define topen _wopen +# define tstrchr wcschr +# define tstrcmp wcscmp +# define tstrlen wcslen +# define tstrrchr wcsrchr +# define tstrtoul wcstoul +# define tstrxcmp wcsicmp +# define tunlink _wunlink +# define tutimbuf __utimbuf64 +# define tutime _wutime64 +# define tstat _wstat64 +# define tfstat _fstat64 +# define stat_t struct _stat64 +# ifdef _MSC_VER +# define STDIN_FILENO 0 +# define STDOUT_FILENO 1 +# define STDERR_FILENO 2 +# define S_ISREG(m) (((m) & S_IFMT) == S_IFREG) +# define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR) +# endif + +#else /* _WIN32 */ + +/* Standard definitions for everyone else */ + +# define tmain main +# define tchar char +# define T(text) text +# define TS "s" +# define TC "c" +# define tmemcpy memcpy +# define topen open +# define tstrchr strchr +# define tstrcmp strcmp +# define tstrlen strlen +# define tstrrchr strrchr +# define tstrtoul strtoul +# define tstrxcmp strcmp +# define tunlink unlink +# define tutimbuf utimbuf +# define tutime utime +# define tstat stat +# define tfstat fstat +# define stat_t struct stat + +#endif /* !_WIN32 */ + +extern const tchar *prog_invocation_name; +extern bool suppress_warnings; + +void _printf(1, 2) msg(const char *fmt, ...); +void _printf(1, 2) msg_errno(const char *fmt, ...); +void _printf(1, 2) warn(const char *fmt, ...); + +void *xmalloc(size_t size); + +void begin_program(tchar *argv[]); + +struct file_stream { + int fd; + tchar *name; + bool is_standard_stream; + void *mmap_token; + void *mmap_mem; + size_t mmap_size; +}; + +int xopen_for_read(const tchar *path, bool symlink_ok, + struct file_stream *strm); +int xopen_for_write(const tchar *path, bool force, struct file_stream *strm); +int map_file_contents(struct file_stream *strm, u64 size); + +ssize_t xread(struct file_stream *strm, void *buf, size_t count); +int full_write(struct file_stream *strm, const void *buf, size_t count); + +int xclose(struct file_stream *strm); + +int parse_compression_level(tchar opt_char, const tchar *arg); + +struct libdeflate_compressor *alloc_compressor(int level); +struct libdeflate_decompressor *alloc_decompressor(void); + +/* tgetopt.c */ + +extern tchar *toptarg; +extern int toptind, topterr, toptopt; + +int tgetopt(int argc, tchar *argv[], const tchar *optstring); + +#endif /* PROGRAMS_PROG_UTIL_H */ diff --git a/tools/z64compress/src/enc/libdeflate/programs/test_checksums.c b/tools/z64compress/src/enc/libdeflate/programs/test_checksums.c new file mode 100644 index 000000000..e66e62443 --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/programs/test_checksums.c @@ -0,0 +1,200 @@ +/* + * test_checksums.c + * + * Verify that libdeflate's Adler-32 and CRC-32 functions produce the same + * results as their zlib equivalents. + */ + +#include +#include + +#include "test_util.h" + +static unsigned int rng_seed; + +typedef u32 (*cksum_fn_t)(u32, const void *, size_t); + +static u32 +adler32_libdeflate(u32 adler, const void *buf, size_t len) +{ + return libdeflate_adler32(adler, buf, len); +} + +static u32 +crc32_libdeflate(u32 crc, const void *buf, size_t len) +{ + return libdeflate_crc32(crc, buf, len); +} + +static u32 +adler32_zlib(u32 adler, const void *buf, size_t len) +{ + return adler32(adler, buf, len); +} + +static u32 +crc32_zlib(u32 crc, const void *buf, size_t len) +{ + return crc32(crc, buf, len); +} + +static u32 +select_initial_crc(void) +{ + if (rand() & 1) + return 0; + return ((u32)rand() << 16) | rand(); +} + +static u32 +select_initial_adler(void) +{ + u32 lo, hi; + + if (rand() & 1) + return 1; + + lo = (rand() % 4 == 0 ? 65520 : rand() % 65521); + hi = (rand() % 4 == 0 ? 65520 : rand() % 65521); + return (hi << 16) | lo; +} + +static void +test_initial_values(cksum_fn_t cksum, u32 expected) +{ + ASSERT(cksum(0, NULL, 0) == expected); + if (cksum != adler32_zlib) /* broken */ + ASSERT(cksum(0, NULL, 1) == expected); + ASSERT(cksum(0, NULL, 1234) == expected); + ASSERT(cksum(1234, NULL, 0) == expected); + ASSERT(cksum(1234, NULL, 1234) == expected); +} + +static void +test_multipart(const u8 *buffer, size_t size, const char *name, + cksum_fn_t cksum, u32 v, u32 expected) +{ + size_t division = rand() % (size + 1); + v = cksum(v, buffer, division); + v = cksum(v, buffer + division, size - division); + if (v != expected) { + fprintf(stderr, "%s checksum failed multipart test\n", name); + ASSERT(0); + } +} + +static void +test_checksums(const void *buffer, size_t size, const char *name, + cksum_fn_t cksum1, cksum_fn_t cksum2, u32 initial_value) +{ + u32 v1 = cksum1(initial_value, buffer, size); + u32 v2 = cksum2(initial_value, buffer, size); + + if (v1 != v2) { + fprintf(stderr, "%s checksum mismatch\n", name); + fprintf(stderr, "initial_value=0x%08"PRIx32", buffer=%p, " + "size=%zu, buffer=", initial_value, buffer, size); + for (size_t i = 0; i < MIN(size, 256); i++) + fprintf(stderr, "%02x", ((const u8 *)buffer)[i]); + if (size > 256) + fprintf(stderr, "..."); + fprintf(stderr, "\n"); + ASSERT(0); + } + + if ((rand() & 15) == 0) { + test_multipart(buffer, size, name, cksum1, initial_value, v1); + test_multipart(buffer, size, name, cksum2, initial_value, v1); + } +} + +static void +test_crc32(const void *buffer, size_t size, u32 initial_value) +{ + test_checksums(buffer, size, "CRC-32", + crc32_libdeflate, crc32_zlib, initial_value); +} + +static void +test_adler32(const void *buffer, size_t size, u32 initial_value) +{ + test_checksums(buffer, size, "Adler-32", + adler32_libdeflate, adler32_zlib, initial_value); +} + +static void test_random_buffers(u8 *buf_start, u8 *buf_end, size_t limit, + u32 num_iter) +{ + for (u32 i = 0; i < num_iter; i++) { + size_t start = rand() % limit; + size_t len = rand() % (limit - start); + u32 a0 = select_initial_adler(); + u32 c0 = select_initial_crc(); + + for (size_t j = start; j < start + len; j++) + buf_start[j] = rand(); + + /* Test with chosen size and alignment */ + test_adler32(&buf_start[start], len, a0); + test_crc32(&buf_start[start], len, c0); + + /* Test with chosen size, with guard page before input buffer */ + memmove(buf_start, &buf_start[start], len); + test_adler32(buf_start, len, a0); + test_crc32(buf_start, len, c0); + + /* Test with chosen size, with guard page after input buffer */ + memmove(buf_end - len, buf_start, len); + test_adler32(buf_end - len, len, a0); + test_crc32(buf_end - len, len, c0); + } +} + +int +tmain(int argc, tchar *argv[]) +{ + u8 *buf_start, *buf_end; + + begin_program(argv); + + alloc_guarded_buffer(262144, &buf_start, &buf_end); + + rng_seed = time(NULL); + srand(rng_seed); + + test_initial_values(adler32_libdeflate, 1); + test_initial_values(adler32_zlib, 1); + test_initial_values(crc32_libdeflate, 0); + test_initial_values(crc32_zlib, 0); + + /* Test different buffer sizes and alignments */ + test_random_buffers(buf_start, buf_end, 256, 5000); + test_random_buffers(buf_start, buf_end, 1024, 500); + test_random_buffers(buf_start, buf_end, 32768, 50); + test_random_buffers(buf_start, buf_end, 262144, 25); + + /* + * Test Adler-32 overflow cases. For example, given all 0xFF bytes and + * the highest possible initial (s1, s2) of (65520, 65520), then s2 if + * stored as a 32-bit unsigned integer will overflow if > 5552 bytes are + * processed. Implementations must make sure to reduce s2 modulo 65521 + * before that point. Also, some implementations make use of 16-bit + * counters which can overflow earlier. + */ + memset(buf_start, 0xFF, 32768); + for (u32 i = 0; i < 20; i++) { + u32 initial_value; + + if (i == 0) + initial_value = ((u32)65520 << 16) | 65520; + else + initial_value = select_initial_adler(); + + test_adler32(buf_start, 5553, initial_value); + test_adler32(buf_start, rand() % 32769, initial_value); + buf_start[rand() % 32768] = 0xFE; + } + + free_guarded_buffer(buf_start, buf_end); + return 0; +} diff --git a/tools/z64compress/src/enc/libdeflate/programs/test_custom_malloc.c b/tools/z64compress/src/enc/libdeflate/programs/test_custom_malloc.c new file mode 100644 index 000000000..2bbb7f098 --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/programs/test_custom_malloc.c @@ -0,0 +1,85 @@ +/* + * test_custom_malloc.c + * + * Test libdeflate_set_memory_allocator(). + * Also test injecting allocation failures. + */ + +#include "test_util.h" + +static int malloc_count = 0; +static int free_count = 0; + +static void *do_malloc(size_t size) +{ + malloc_count++; + return malloc(size); +} + +static void *do_fail_malloc(size_t size) +{ + malloc_count++; + return NULL; +} + +static void do_free(void *ptr) +{ + free_count++; + free(ptr); +} + +int +tmain(int argc, tchar *argv[]) +{ + int level; + struct libdeflate_compressor *c; + struct libdeflate_decompressor *d; + + begin_program(argv); + + /* Test that the custom allocator is actually used when requested. */ + + libdeflate_set_memory_allocator(do_malloc, do_free); + ASSERT(malloc_count == 0); + ASSERT(free_count == 0); + + for (level = 0; level <= 12; level++) { + malloc_count = free_count = 0; + c = libdeflate_alloc_compressor(level); + ASSERT(c != NULL); + ASSERT(malloc_count == 1); + ASSERT(free_count == 0); + libdeflate_free_compressor(c); + ASSERT(malloc_count == 1); + ASSERT(free_count == 1); + } + + malloc_count = free_count = 0; + d = libdeflate_alloc_decompressor(); + ASSERT(d != NULL); + ASSERT(malloc_count == 1); + ASSERT(free_count == 0); + libdeflate_free_decompressor(d); + ASSERT(malloc_count == 1); + ASSERT(free_count == 1); + + /* As long as we're here, also test injecting allocation failures. */ + + libdeflate_set_memory_allocator(do_fail_malloc, do_free); + + for (level = 0; level <= 12; level++) { + malloc_count = free_count = 0; + c = libdeflate_alloc_compressor(level); + ASSERT(c == NULL); + ASSERT(malloc_count == 1); + ASSERT(free_count == 0); + } + + malloc_count = free_count = 0; + d = libdeflate_alloc_decompressor(); + ASSERT(d == NULL); + ASSERT(malloc_count == 1); + ASSERT(free_count == 0); + + return 0; +} diff --git a/tools/z64compress/src/enc/libdeflate/programs/test_incomplete_codes.c b/tools/z64compress/src/enc/libdeflate/programs/test_incomplete_codes.c new file mode 100644 index 000000000..4e441bccb --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/programs/test_incomplete_codes.c @@ -0,0 +1,385 @@ +/* + * test_incomplete_codes.c + * + * Test that the decompressor accepts incomplete Huffman codes in certain + * specific cases. + */ + +#include "test_util.h" + +static void +verify_decompression_libdeflate(const u8 *in, size_t in_nbytes, + u8 *out, size_t out_nbytes_avail, + const u8 *expected_out, + size_t expected_out_nbytes) +{ + struct libdeflate_decompressor *d; + enum libdeflate_result res; + size_t actual_out_nbytes; + + d = libdeflate_alloc_decompressor(); + ASSERT(d != NULL); + + res = libdeflate_deflate_decompress(d, in, in_nbytes, + out, out_nbytes_avail, + &actual_out_nbytes); + ASSERT(res == LIBDEFLATE_SUCCESS); + ASSERT(actual_out_nbytes == expected_out_nbytes); + ASSERT(memcmp(out, expected_out, actual_out_nbytes) == 0); + + libdeflate_free_decompressor(d); +} + +static void +verify_decompression_zlib(const u8 *in, size_t in_nbytes, + u8 *out, size_t out_nbytes_avail, + const u8 *expected_out, size_t expected_out_nbytes) +{ + z_stream z; + int res; + size_t actual_out_nbytes; + + memset(&z, 0, sizeof(z)); + res = inflateInit2(&z, -15); + ASSERT(res == Z_OK); + + z.next_in = (void *)in; + z.avail_in = in_nbytes; + z.next_out = (void *)out; + z.avail_out = out_nbytes_avail; + res = inflate(&z, Z_FINISH); + ASSERT(res == Z_STREAM_END); + actual_out_nbytes = out_nbytes_avail - z.avail_out; + ASSERT(actual_out_nbytes == expected_out_nbytes); + ASSERT(memcmp(out, expected_out, actual_out_nbytes) == 0); + + inflateEnd(&z); +} + +static void +verify_decompression(const u8 *in, size_t in_nbytes, + u8 *out, size_t out_nbytes_avail, + const u8 *expected_out, size_t expected_out_nbytes) +{ + verify_decompression_libdeflate(in, in_nbytes, out, out_nbytes_avail, + expected_out, expected_out_nbytes); + verify_decompression_zlib(in, in_nbytes, out, out_nbytes_avail, + expected_out, expected_out_nbytes); + +} + +/* Test that an empty offset code is accepted. */ +static void +test_empty_offset_code(void) +{ + static const u8 expected_out[] = { 'A', 'B', 'A', 'A' }; + u8 in[128]; + u8 out[128]; + struct output_bitstream os = { .next = in, .end = in + sizeof(in) }; + int i; + + /* + * Generate a DEFLATE stream containing a "dynamic Huffman" block + * containing literals, but no offsets; and having an empty offset code + * (all codeword lengths set to 0). + * + * Litlen code: + * litlensym_A freq=3 len=1 codeword= 0 + * litlensym_B freq=1 len=2 codeword=01 + * litlensym_256 (end-of-block) freq=1 len=2 codeword=11 + * Offset code: + * (empty) + * + * Litlen and offset codeword lengths: + * [0..'A'-1] = 0 presym_18 + * ['A'] = 1 presym_1 + * ['B'] = 2 presym_2 + * ['B'+1..255] = 0 presym_18 presym_18 + * [256] = 2 presym_2 + * [257] = 0 presym_0 + * + * Precode: + * presym_0 freq=1 len=3 codeword=011 + * presym_1 freq=1 len=3 codeword=111 + * presym_2 freq=2 len=2 codeword= 01 + * presym_18 freq=3 len=1 codeword= 0 + */ + + ASSERT(put_bits(&os, 1, 1)); /* BFINAL: 1 */ + ASSERT(put_bits(&os, 2, 2)); /* BTYPE: DYNAMIC_HUFFMAN */ + ASSERT(put_bits(&os, 0, 5)); /* num_litlen_syms: 0 + 257 */ + ASSERT(put_bits(&os, 0, 5)); /* num_offset_syms: 0 + 1 */ + ASSERT(put_bits(&os, 14, 4)); /* num_explicit_precode_lens: 14 + 4 */ + + /* + * Precode codeword lengths: order is + * [16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15] + */ + for (i = 0; i < 2; i++) /* presym_{16,17}: len=0 */ + ASSERT(put_bits(&os, 0, 3)); + ASSERT(put_bits(&os, 1, 3)); /* presym_18: len=1 */ + ASSERT(put_bits(&os, 3, 3)); /* presym_0: len=3 */ + for (i = 0; i < 11; i++) /* presym_{8,...,13}: len=0 */ + ASSERT(put_bits(&os, 0, 3)); + ASSERT(put_bits(&os, 2, 3)); /* presym_2: len=2 */ + ASSERT(put_bits(&os, 0, 3)); /* presym_14: len=0 */ + ASSERT(put_bits(&os, 3, 3)); /* presym_1: len=3 */ + + /* Litlen and offset codeword lengths */ + ASSERT(put_bits(&os, 0x0, 1) && + put_bits(&os, 54, 7)); /* presym_18, 65 zeroes */ + ASSERT(put_bits(&os, 0x7, 3)); /* presym_1 */ + ASSERT(put_bits(&os, 0x1, 2)); /* presym_2 */ + ASSERT(put_bits(&os, 0x0, 1) && + put_bits(&os, 89, 7)); /* presym_18, 100 zeroes */ + ASSERT(put_bits(&os, 0x0, 1) && + put_bits(&os, 78, 7)); /* presym_18, 89 zeroes */ + ASSERT(put_bits(&os, 0x1, 2)); /* presym_2 */ + ASSERT(put_bits(&os, 0x3, 3)); /* presym_0 */ + + /* Litlen symbols */ + ASSERT(put_bits(&os, 0x0, 1)); /* litlensym_A */ + ASSERT(put_bits(&os, 0x1, 2)); /* litlensym_B */ + ASSERT(put_bits(&os, 0x0, 1)); /* litlensym_A */ + ASSERT(put_bits(&os, 0x0, 1)); /* litlensym_A */ + ASSERT(put_bits(&os, 0x3, 2)); /* litlensym_256 (end-of-block) */ + + ASSERT(flush_bits(&os)); + + verify_decompression(in, os.next - in, out, sizeof(out), + expected_out, sizeof(expected_out)); +} + +/* Test that a litrunlen code containing only one symbol is accepted. */ +static void +test_singleton_litrunlen_code(void) +{ + u8 in[128]; + u8 out[128]; + struct output_bitstream os = { .next = in, .end = in + sizeof(in) }; + int i; + + /* + * Litlen code: + * litlensym_256 (end-of-block) freq=1 len=1 codeword=0 + * Offset code: + * (empty) + * + * Litlen and offset codeword lengths: + * [0..256] = 0 presym_18 presym_18 + * [256] = 1 presym_1 + * [257] = 0 presym_0 + * + * Precode: + * presym_0 freq=1 len=2 codeword=01 + * presym_1 freq=1 len=2 codeword=11 + * presym_18 freq=2 len=1 codeword= 0 + */ + + ASSERT(put_bits(&os, 1, 1)); /* BFINAL: 1 */ + ASSERT(put_bits(&os, 2, 2)); /* BTYPE: DYNAMIC_HUFFMAN */ + ASSERT(put_bits(&os, 0, 5)); /* num_litlen_syms: 0 + 257 */ + ASSERT(put_bits(&os, 0, 5)); /* num_offset_syms: 0 + 1 */ + ASSERT(put_bits(&os, 14, 4)); /* num_explicit_precode_lens: 14 + 4 */ + + /* + * Precode codeword lengths: order is + * [16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15] + */ + for (i = 0; i < 2; i++) /* presym_{16,17}: len=0 */ + ASSERT(put_bits(&os, 0, 3)); + ASSERT(put_bits(&os, 1, 3)); /* presym_18: len=1 */ + ASSERT(put_bits(&os, 2, 3)); /* presym_0: len=2 */ + for (i = 0; i < 13; i++) /* presym_{8,...,14}: len=0 */ + ASSERT(put_bits(&os, 0, 3)); + ASSERT(put_bits(&os, 2, 3)); /* presym_1: len=2 */ + + /* Litlen and offset codeword lengths */ + for (i = 0; i < 2; i++) { + ASSERT(put_bits(&os, 0, 1) && /* presym_18, 128 zeroes */ + put_bits(&os, 117, 7)); + } + ASSERT(put_bits(&os, 0x3, 2)); /* presym_1 */ + ASSERT(put_bits(&os, 0x1, 2)); /* presym_0 */ + + /* Litlen symbols */ + ASSERT(put_bits(&os, 0x0, 1)); /* litlensym_256 (end-of-block) */ + + ASSERT(flush_bits(&os)); + + verify_decompression(in, os.next - in, out, sizeof(out), in, 0); +} + +/* Test that an offset code containing only one symbol is accepted. */ +static void +test_singleton_offset_code(void) +{ + static const u8 expected_out[] = { 255, 255, 255, 255 }; + u8 in[128]; + u8 out[128]; + struct output_bitstream os = { .next = in, .end = in + sizeof(in) }; + int i; + + ASSERT(put_bits(&os, 1, 1)); /* BFINAL: 1 */ + ASSERT(put_bits(&os, 2, 2)); /* BTYPE: DYNAMIC_HUFFMAN */ + + /* + * Litlen code: + * litlensym_255 freq=1 len=1 codeword= 0 + * litlensym_256 (end-of-block) freq=1 len=2 codeword=01 + * litlensym_257 (len 3) freq=1 len=2 codeword=11 + * Offset code: + * offsetsym_0 (offset 0) freq=1 len=1 codeword=0 + * + * Litlen and offset codeword lengths: + * [0..254] = 0 presym_{18,18} + * [255] = 1 presym_1 + * [256] = 1 presym_2 + * [257] = 1 presym_2 + * [258] = 1 presym_1 + * + * Precode: + * presym_1 freq=2 len=2 codeword=01 + * presym_2 freq=2 len=2 codeword=11 + * presym_18 freq=2 len=1 codeword= 0 + */ + + ASSERT(put_bits(&os, 1, 5)); /* num_litlen_syms: 1 + 257 */ + ASSERT(put_bits(&os, 0, 5)); /* num_offset_syms: 0 + 1 */ + ASSERT(put_bits(&os, 14, 4)); /* num_explicit_precode_lens: 14 + 4 */ + /* + * Precode codeword lengths: order is + * [16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15] + */ + for (i = 0; i < 2; i++) /* presym_{16,17}: len=0 */ + ASSERT(put_bits(&os, 0, 3)); + ASSERT(put_bits(&os, 1, 3)); /* presym_18: len=1 */ + for (i = 0; i < 12; i++) /* presym_{0,...,13}: len=0 */ + ASSERT(put_bits(&os, 0, 3)); + ASSERT(put_bits(&os, 2, 3)); /* presym_2: len=2 */ + ASSERT(put_bits(&os, 0, 3)); /* presym_14: len=0 */ + ASSERT(put_bits(&os, 2, 3)); /* presym_1: len=2 */ + + /* Litlen and offset codeword lengths */ + ASSERT(put_bits(&os, 0x0, 1) && /* presym_18, 128 zeroes */ + put_bits(&os, 117, 7)); + ASSERT(put_bits(&os, 0x0, 1) && /* presym_18, 127 zeroes */ + put_bits(&os, 116, 7)); + ASSERT(put_bits(&os, 0x1, 2)); /* presym_1 */ + ASSERT(put_bits(&os, 0x3, 2)); /* presym_2 */ + ASSERT(put_bits(&os, 0x3, 2)); /* presym_2 */ + ASSERT(put_bits(&os, 0x1, 2)); /* presym_1 */ + + /* Literal */ + ASSERT(put_bits(&os, 0x0, 1)); /* litlensym_255 */ + + /* Match */ + ASSERT(put_bits(&os, 0x3, 2)); /* litlensym_257 */ + ASSERT(put_bits(&os, 0x0, 1)); /* offsetsym_0 */ + + /* End of block */ + ASSERT(put_bits(&os, 0x1, 2)); /* litlensym_256 */ + + ASSERT(flush_bits(&os)); + + verify_decompression(in, os.next - in, out, sizeof(out), + expected_out, sizeof(expected_out)); +} + +/* Test that an offset code containing only one symbol is accepted, even if that + * symbol is not symbol 0. The codeword should be '0' in either case. */ +static void +test_singleton_offset_code_notsymzero(void) +{ + static const u8 expected_out[] = { 254, 255, 254, 255, 254 }; + u8 in[128]; + u8 out[128]; + struct output_bitstream os = { .next = in, .end = in + sizeof(in) }; + int i; + + ASSERT(put_bits(&os, 1, 1)); /* BFINAL: 1 */ + ASSERT(put_bits(&os, 2, 2)); /* BTYPE: DYNAMIC_HUFFMAN */ + + /* + * Litlen code: + * litlensym_254 len=2 codeword=00 + * litlensym_255 len=2 codeword=10 + * litlensym_256 (end-of-block) len=2 codeword=01 + * litlensym_257 (len 3) len=2 codeword=11 + * Offset code: + * offsetsym_1 (offset 2) len=1 codeword=0 + * + * Litlen and offset codeword lengths: + * [0..253] = 0 presym_{18,18} + * [254] = 2 presym_2 + * [255] = 2 presym_2 + * [256] = 2 presym_2 + * [257] = 2 presym_2 + * [258] = 0 presym_0 + * [259] = 1 presym_1 + * + * Precode: + * presym_0 len=2 codeword=00 + * presym_1 len=2 codeword=10 + * presym_2 len=2 codeword=01 + * presym_18 len=2 codeword=11 + */ + + ASSERT(put_bits(&os, 1, 5)); /* num_litlen_syms: 1 + 257 */ + ASSERT(put_bits(&os, 1, 5)); /* num_offset_syms: 1 + 1 */ + ASSERT(put_bits(&os, 14, 4)); /* num_explicit_precode_lens: 14 + 4 */ + /* + * Precode codeword lengths: order is + * [16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15] + */ + for (i = 0; i < 2; i++) /* presym_{16,17}: len=0 */ + ASSERT(put_bits(&os, 0, 3)); + ASSERT(put_bits(&os, 2, 3)); /* presym_18: len=2 */ + ASSERT(put_bits(&os, 2, 3)); /* presym_0: len=2 */ + for (i = 0; i < 11; i++) /* presym_{8,...,13}: len=0 */ + ASSERT(put_bits(&os, 0, 3)); + ASSERT(put_bits(&os, 2, 3)); /* presym_2: len=2 */ + ASSERT(put_bits(&os, 0, 3)); /* presym_14: len=0 */ + ASSERT(put_bits(&os, 2, 3)); /* presym_1: len=2 */ + + /* Litlen and offset codeword lengths */ + ASSERT(put_bits(&os, 0x3, 2) && /* presym_18, 128 zeroes */ + put_bits(&os, 117, 7)); + ASSERT(put_bits(&os, 0x3, 2) && /* presym_18, 126 zeroes */ + put_bits(&os, 115, 7)); + ASSERT(put_bits(&os, 0x1, 2)); /* presym_2 */ + ASSERT(put_bits(&os, 0x1, 2)); /* presym_2 */ + ASSERT(put_bits(&os, 0x1, 2)); /* presym_2 */ + ASSERT(put_bits(&os, 0x1, 2)); /* presym_2 */ + ASSERT(put_bits(&os, 0x0, 2)); /* presym_0 */ + ASSERT(put_bits(&os, 0x2, 2)); /* presym_1 */ + + /* Literals */ + ASSERT(put_bits(&os, 0x0, 2)); /* litlensym_254 */ + ASSERT(put_bits(&os, 0x2, 2)); /* litlensym_255 */ + + /* Match */ + ASSERT(put_bits(&os, 0x3, 2)); /* litlensym_257 */ + ASSERT(put_bits(&os, 0x0, 1)); /* offsetsym_1 */ + + /* End of block */ + ASSERT(put_bits(&os, 0x1, 2)); /* litlensym_256 */ + + ASSERT(flush_bits(&os)); + + verify_decompression(in, os.next - in, out, sizeof(out), + expected_out, sizeof(expected_out)); +} + +int +tmain(int argc, tchar *argv[]) +{ + begin_program(argv); + + test_empty_offset_code(); + test_singleton_litrunlen_code(); + test_singleton_offset_code(); + test_singleton_offset_code_notsymzero(); + + return 0; +} diff --git a/tools/z64compress/src/enc/libdeflate/programs/test_litrunlen_overflow.c b/tools/z64compress/src/enc/libdeflate/programs/test_litrunlen_overflow.c new file mode 100644 index 000000000..cdec8c802 --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/programs/test_litrunlen_overflow.c @@ -0,0 +1,72 @@ +/* + * test_litrunlen_overflow.c + * + * Regression test for commit f2f0df727444 ("deflate_compress: fix corruption + * with long literal run"). Try to compress a file longer than 65535 bytes + * where no 2-byte sequence (3 would be sufficient) is repeated <= 32768 bytes + * apart, and the distribution of bytes remains constant throughout, and yet not + * all bytes are used so the data is still slightly compressible. There will be + * no matches in this data, but the compressor should still output a compressed + * block, and this block should contain more than 65535 consecutive literals, + * which triggered the bug. + * + * Note: on random data, this situation is extremely unlikely if the compressor + * uses all matches it finds, since random data will on average have a 3-byte + * match every (256**3)/32768 = 512 bytes. + */ + +#include "test_util.h" + +int +tmain(int argc, tchar *argv[]) +{ + const int data_size = 2 * 250 * 251; + u8 *orig_data, *compressed_data, *decompressed_data; + int i, stride, multiple, j = 0; + struct libdeflate_decompressor *d; + static const int levels[] = { 3, 6, 12 }; + + begin_program(argv); + + orig_data = xmalloc(data_size); + compressed_data = xmalloc(data_size); + decompressed_data = xmalloc(data_size); + + for (i = 0; i < 2; i++) { + for (stride = 1; stride < 251; stride++) { + for (multiple = 0; multiple < 251; multiple++) + orig_data[j++] = (stride * multiple) % 251; + } + } + ASSERT(j == data_size); + + d = libdeflate_alloc_decompressor(); + ASSERT(d != NULL); + + for (i = 0; i < ARRAY_LEN(levels); i++) { + struct libdeflate_compressor *c; + size_t csize; + enum libdeflate_result res; + + c = libdeflate_alloc_compressor(levels[i]); + ASSERT(c != NULL); + + csize = libdeflate_deflate_compress(c, orig_data, data_size, + compressed_data, data_size); + ASSERT(csize > 0 && csize < data_size); + + res = libdeflate_deflate_decompress(d, compressed_data, csize, + decompressed_data, + data_size, NULL); + ASSERT(res == LIBDEFLATE_SUCCESS); + ASSERT(memcmp(orig_data, decompressed_data, data_size) == 0); + + libdeflate_free_compressor(c); + } + + libdeflate_free_decompressor(d); + free(orig_data); + free(compressed_data); + free(decompressed_data); + return 0; +} diff --git a/tools/z64compress/src/enc/libdeflate/programs/test_overread.c b/tools/z64compress/src/enc/libdeflate/programs/test_overread.c new file mode 100644 index 000000000..2a6003218 --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/programs/test_overread.c @@ -0,0 +1,95 @@ +/* + * test_overread.c + * + * Test that the decompressor doesn't produce an unbounded amount of output if + * it runs out of input, even when implicit zeroes appended to the input would + * continue producing output (as is the case when the input ends during a + * DYNAMIC_HUFFMAN block where a literal has an all-zeroes codeword). + * + * This is a regression test for commit 3f21ec9d6121 ("deflate_decompress: error + * out if overread count gets too large"). + */ + +#include "test_util.h" + +static void +generate_test_input(struct output_bitstream *os) +{ + int i; + + put_bits(os, 0, 1); /* BFINAL: 0 */ + put_bits(os, 2, 2); /* BTYPE: DYNAMIC_HUFFMAN */ + + /* + * Write the Huffman codes. + * + * Litlen code: + * litlensym_0 (0) len=1 codeword=0 + * litlensym_256 (end-of-block) len=1 codeword=1 + * Offset code: + * offsetsym_0 (unused) len=1 codeword=0 + * + * Litlen and offset codeword lengths: + * [0] = 1 presym_1 + * [1..255] = 0 presym_{18,18} + * [256] = 1 presym_1 + * [257] = 1 presym_1 + * + * Precode: + * presym_1 len=1 codeword=0 + * presym_18 len=1 codeword=1 + */ + put_bits(os, 0, 5); /* num_litlen_syms: 0 + 257 */ + put_bits(os, 0, 5); /* num_offset_syms: 0 + 1 */ + put_bits(os, 14, 4); /* num_explicit_precode_lens: 14 + 4 */ + /* + * Precode codeword lengths: order is + * [16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15] + */ + put_bits(os, 0, 3); /* presym_16: len=0 */ + put_bits(os, 0, 3); /* presym_17: len=0 */ + put_bits(os, 1, 3); /* presym_18: len=1 */ + for (i = 0; i < 14; i++) /* presym_{0,...,14}: len=0 */ + put_bits(os, 0, 3); + put_bits(os, 1, 3); /* presym_1: len=1 */ + + /* Litlen and offset codeword lengths */ + put_bits(os, 0, 1); /* presym_1 */ + put_bits(os, 1, 1); /* presym_18 ... */ + put_bits(os, 117, 7); /* ... 11 + 117 zeroes */ + put_bits(os, 1, 1); /* presym_18 ... */ + put_bits(os, 116, 7); /* ... 11 + 116 zeroes */ + put_bits(os, 0, 1); /* presym_1 */ + put_bits(os, 0, 1); /* presym_1 */ + + /* Implicit zeroes would generate endless literals from here. */ + + ASSERT(flush_bits(os)); +} + +int +tmain(int argc, tchar *argv[]) +{ + u8 cdata[16]; + u8 udata[256]; + struct output_bitstream os = + { .next = cdata, .end = cdata + sizeof(cdata) }; + struct libdeflate_decompressor *d; + enum libdeflate_result res; + size_t actual_out_nbytes; + + begin_program(argv); + + generate_test_input(&os); + d = libdeflate_alloc_decompressor(); + ASSERT(d != NULL); + + res = libdeflate_deflate_decompress(d, cdata, os.next - cdata, + udata, sizeof(udata), + &actual_out_nbytes); + /* Before the fix, the result was LIBDEFLATE_INSUFFICIENT_SPACE here. */ + ASSERT(res == LIBDEFLATE_BAD_DATA); + + libdeflate_free_decompressor(d); + return 0; +} diff --git a/tools/z64compress/src/enc/libdeflate/programs/test_slow_decompression.c b/tools/z64compress/src/enc/libdeflate/programs/test_slow_decompression.c new file mode 100644 index 000000000..d5ac26245 --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/programs/test_slow_decompression.c @@ -0,0 +1,472 @@ +/* + * test_slow_decompression.c + * + * Test how quickly libdeflate decompresses degenerate/malicious compressed data + * streams that start new Huffman blocks extremely frequently. + */ + +#include "test_util.h" + +/* + * Generate a DEFLATE stream containing all empty "static Huffman" blocks. + * + * libdeflate used to decompress this very slowly (~1000x slower than typical + * data), but now it's much faster (only ~2x slower than typical data) because + * now it skips rebuilding the decode tables for the static Huffman codes when + * they're already loaded into the decompressor. + */ +static void +generate_empty_static_huffman_blocks(u8 *p, size_t len) +{ + struct output_bitstream os = { .next = p, .end = p + len }; + + while (put_bits(&os, 0, 1) && /* BFINAL: 0 */ + put_bits(&os, 1, 2) && /* BTYPE: STATIC_HUFFMAN */ + put_bits(&os, 0, 7)) /* litlensym_256 (end-of-block) */ + ; +} + +static bool +generate_empty_dynamic_huffman_block(struct output_bitstream *os) +{ + int i; + + if (!put_bits(os, 0, 1)) /* BFINAL: 0 */ + return false; + if (!put_bits(os, 2, 2)) /* BTYPE: DYNAMIC_HUFFMAN */ + return false; + + /* + * Write a minimal Huffman code, then the end-of-block symbol. + * + * Litlen code: + * litlensym_256 (end-of-block) freq=1 len=1 codeword=0 + * Offset code: + * offsetsym_0 (unused) freq=0 len=1 codeword=0 + * + * Litlen and offset codeword lengths: + * [0..255] = 0 presym_{18,18} + * [256] = 1 presym_1 + * [257] = 1 presym_1 + * + * Precode: + * presym_1 freq=2 len=1 codeword=0 + * presym_18 freq=2 len=1 codeword=1 + */ + + if (!put_bits(os, 0, 5)) /* num_litlen_syms: 0 + 257 */ + return false; + if (!put_bits(os, 0, 5)) /* num_offset_syms: 0 + 1 */ + return false; + if (!put_bits(os, 14, 4)) /* num_explicit_precode_lens: 14 + 4 */ + return false; + /* + * Precode codeword lengths: order is + * [16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15] + */ + for (i = 0; i < 2; i++) { /* presym_{16,17}: len=0 */ + if (!put_bits(os, 0, 3)) + return false; + } + if (!put_bits(os, 1, 3)) /* presym_18: len=1 */ + return false; + for (i = 0; i < 14; i++) { /* presym_{0,...,14}: len=0 */ + if (!put_bits(os, 0, 3)) + return false; + } + if (!put_bits(os, 1, 3)) /* presym_1: len=1 */ + return false; + + /* Litlen and offset codeword lengths */ + for (i = 0; i < 2; i++) { + if (!put_bits(os, 1, 1) || /* presym_18, 128 zeroes */ + !put_bits(os, 117, 7)) + return false; + } + if (!put_bits(os, 0, 1)) /* presym_1 */ + return false; + if (!put_bits(os, 0, 1)) /* presym_1 */ + return false; + /* Done writing the Huffman codes */ + + return put_bits(os, 0, 1); /* litlensym_256 (end-of-block) */ +} + +/* + * Generate a DEFLATE stream containing all empty "dynamic Huffman" blocks. + * + * This is the worst known case currently, being ~100x slower to decompress than + * typical data. + */ +static void +generate_empty_dynamic_huffman_blocks(u8 *p, size_t len) +{ + struct output_bitstream os = { .next = p, .end = p + len }; + + while (generate_empty_dynamic_huffman_block(&os)) + ; +} + +#define NUM_ITERATIONS 100 + +static u64 +do_test_libdeflate(const char *input_type, const u8 *in, size_t in_nbytes, + u8 *out, size_t out_nbytes_avail) +{ + struct libdeflate_decompressor *d; + enum libdeflate_result res; + u64 t; + int i; + + d = libdeflate_alloc_decompressor(); + ASSERT(d != NULL); + + t = timer_ticks(); + for (i = 0; i < NUM_ITERATIONS; i++) { + res = libdeflate_deflate_decompress(d, in, in_nbytes, out, + out_nbytes_avail, NULL); + ASSERT(res == LIBDEFLATE_BAD_DATA || + res == LIBDEFLATE_INSUFFICIENT_SPACE); + } + t = timer_ticks() - t; + + printf("[%s, libdeflate]: %"PRIu64" KB/s\n", input_type, + timer_KB_per_s((u64)in_nbytes * NUM_ITERATIONS, t)); + + libdeflate_free_decompressor(d); + return t; +} + +static u64 +do_test_zlib(const char *input_type, const u8 *in, size_t in_nbytes, + u8 *out, size_t out_nbytes_avail) +{ + z_stream z; + int res; + u64 t; + int i; + + memset(&z, 0, sizeof(z)); + res = inflateInit2(&z, -15); + ASSERT(res == Z_OK); + + t = timer_ticks(); + for (i = 0; i < NUM_ITERATIONS; i++) { + inflateReset(&z); + z.next_in = (void *)in; + z.avail_in = in_nbytes; + z.next_out = out; + z.avail_out = out_nbytes_avail; + res = inflate(&z, Z_FINISH); + ASSERT(res == Z_BUF_ERROR || res == Z_DATA_ERROR); + } + t = timer_ticks() - t; + + printf("[%s, zlib ]: %"PRIu64" KB/s\n", input_type, + timer_KB_per_s((u64)in_nbytes * NUM_ITERATIONS, t)); + + inflateEnd(&z); + return t; +} + +/* + * Test case from https://github.com/ebiggers/libdeflate/issues/33 + * with the gzip header and footer removed to leave just the DEFLATE stream + */ +static const u8 orig_repro[3962] = + "\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x6a\x6a\x6a\x6a\x6a\x6a\x6a\x6a\x6a\x6a\x6a\x6a" + "\x6a\x6a\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x00\xea\x04\x48\x00\x20" + "\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x00\xea\x04\x48\x00\x20\x80\x28" + "\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x92\x63\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11" + "\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x00\xea\x04\x48" + "\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x00\xea\x04\x48\x00\x20\x80" + "\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x00\xea\x04\x48\x00\x20\x80\x28\x00" + "\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x00\xea" + "\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x00\xea" + "\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x00\xea\x04\x48" + "\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x00\xea\x04\x48\x00\x20" + "\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x92\x63\x00\xea\x04\x48\x00\x20\x80\x28\x00" + "\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11" + "\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x63" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92" + "\x63\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x00" + "\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x00\xea\x04\x48" + "\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x00\xea\x04\x48\x00\x20" + "\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x92\x63\x00\xea\x04\x48\x00\x20\x80\x28\x00" + "\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x92\x63\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x00\xea" + "\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x00\xea\x04\x48" + "\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11" + "\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x92\x63\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x92\x63\x00\xea\x04\x48\x00\x20\x80\x28\x00" + "\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x92\x63\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x92\x63\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11" + "\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63" + "\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x00\xea" + "\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x92\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x6a\x6a\x6a\x6a\x6a\x6a\x6a\x6a\x6a\x6a\x6a\x6a\x6a" + "\x6a\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x00\xea\x04\x48\x00\x20\x80" + "\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x00\xea\x04\x48\x00\x20\x80\x28\x00" + "\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x92\x63\x00\xea\x04\x48\x00\x20\x80\x28\x00" + "\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x92\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x6a\x6a\x6a\x6a\x6a\x6a\x6a\x6a\x6a\x6a\x6a" + "\x6a\x6a\x6a\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x00\xea\x04\x48\x00" + "\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x00\xea\x04\x48\x00\x20\x80" + "\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x92\x63\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00" + "\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x00\xea\x04" + "\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x00\xea\x04\x48\x00\x20" + "\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x00\xea\x04\x48\x00\x20\x80\x28" + "\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x00" + "\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x00\xea\x04" + "\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x00\xea\x04\x48\x00" + "\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x00\xea\x04\x48\x00\x20\x80\x28" + "\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00" + "\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x63\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x92\x63\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x00\xea\x04" + "\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x00\xea\x04\x48\x00" + "\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x00\xea\x04\x48\x00\x20\x80\x28" + "\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x92\x63\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x00" + "\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x00\xea\x04" + "\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00" + "\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x92\x63\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x00\xea\x04\x48\x00\x20\x80\x28" + "\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x92\x63\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x92\x63\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00" + "\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92" + "\x63\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x00" + "\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x63\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x00\xea\x04\x48\x00" + "\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x00\xea\x04\x48\x00\x20\x80" + "\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x92\x63\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00" + "\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92" + "\x63\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x00" + "\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x00\xea\x04" + "\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x00\xea\x04\x48\x00\x20" + "\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x00\xea\x04\x48\x00\x20\x80\x28" + "\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1a\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x00" + "\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" + "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x00\x04\xea\x48\x00\x20" + "\x80\x28\x00\x00\x11\x1b\x1b\x1b\x1b\x92\x63\x00\xea\x04\x48\x00" + "\x20\x80\x28\x00\x00\x11\x00\x00\x01\x04\x00\x3f\x00\x00\x00\x00" + "\x28\xf7\xff\x00\xff\xff\xff\xff\x00\x00"; + +int +tmain(int argc, tchar *argv[]) +{ + u8 in[4096]; + u8 out[10000]; + u64 t, tz; + + begin_program(argv); + + begin_performance_test(); + + /* static huffman case */ + generate_empty_static_huffman_blocks(in, sizeof(in)); + t = do_test_libdeflate("static huffman", in, sizeof(in), + out, sizeof(out)); + tz = do_test_zlib("static huffman", in, sizeof(in), out, sizeof(out)); + /* + * libdeflate is faster than zlib in this case, e.g. + * [static huffman, libdeflate]: 215861 KB/s + * [static huffman, zlib ]: 73651 KB/s + */ + putchar('\n'); + ASSERT(t < tz); + + /* dynamic huffman case */ + generate_empty_dynamic_huffman_blocks(in, sizeof(in)); + t = do_test_libdeflate("dynamic huffman", in, sizeof(in), + out, sizeof(out)); + tz = do_test_zlib("dynamic huffman", in, sizeof(in), out, sizeof(out)); + /* + * libdeflate is slower than zlib in this case, though not super bad. + * [dynamic huffman, libdeflate]: 6277 KB/s + * [dynamic huffman, zlib ]: 10419 KB/s + * FIXME: make it faster. + */ + putchar('\n'); + ASSERT(t < 4 * tz); + + /* original reproducer */ + t = do_test_libdeflate("original repro", orig_repro, sizeof(orig_repro), + out, sizeof(out)); + tz = do_test_zlib("original repro", orig_repro, sizeof(orig_repro), + out, sizeof(out)); + ASSERT(t < tz); + + return 0; +} diff --git a/tools/z64compress/src/enc/libdeflate/programs/test_trailing_bytes.c b/tools/z64compress/src/enc/libdeflate/programs/test_trailing_bytes.c new file mode 100644 index 000000000..e37e97b9c --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/programs/test_trailing_bytes.c @@ -0,0 +1,151 @@ +/* + * test_trailing_bytes.c + * + * Test that decompression correctly stops at the end of the first DEFLATE, + * zlib, or gzip stream, and doesn't process any additional trailing bytes. + */ + +#include "test_util.h" + +static const struct { + size_t (*compress)(struct libdeflate_compressor *compressor, + const void *in, size_t in_nbytes, + void *out, size_t out_nbytes_avail); + enum libdeflate_result (*decompress)( + struct libdeflate_decompressor *decompressor, + const void *in, size_t in_nbytes, + void *out, size_t out_nbytes_avail, + size_t *actual_out_nbytes_ret); + enum libdeflate_result (*decompress_ex)( + struct libdeflate_decompressor *decompressor, + const void *in, size_t in_nbytes, + void *out, size_t out_nbytes_avail, + size_t *actual_in_nbytes_ret, + size_t *actual_out_nbytes_ret); +} codecs[] = { + { + .compress = libdeflate_deflate_compress, + .decompress = libdeflate_deflate_decompress, + .decompress_ex = libdeflate_deflate_decompress_ex, + }, { + .compress = libdeflate_zlib_compress, + .decompress = libdeflate_zlib_decompress, + .decompress_ex = libdeflate_zlib_decompress_ex, + }, { + .compress = libdeflate_gzip_compress, + .decompress = libdeflate_gzip_decompress, + .decompress_ex = libdeflate_gzip_decompress_ex, + } +}; + +int +tmain(int argc, tchar *argv[]) +{ + const size_t original_nbytes = 32768; + const size_t compressed_nbytes_total = 32768; + /* + * Don't use the full buffer for compressed data, because we want to + * test whether decompression can deal with additional trailing bytes. + * + * Note: we can't use a guarded buffer (i.e. a buffer where the byte + * after compressed_nbytes is unmapped) because the decompressor may + * read a few bytes beyond the end of the stream (but ultimately not + * actually use those bytes) as long as they are within the buffer. + */ + const size_t compressed_nbytes_avail = 30000; + size_t i; + u8 *original; + u8 *compressed; + u8 *decompressed; + struct libdeflate_compressor *c; + struct libdeflate_decompressor *d; + size_t compressed_nbytes; + enum libdeflate_result res; + size_t actual_compressed_nbytes; + size_t actual_decompressed_nbytes; + + begin_program(argv); + + ASSERT(compressed_nbytes_avail < compressed_nbytes_total); + + /* Prepare some dummy data to compress */ + original = xmalloc(original_nbytes); + ASSERT(original != NULL); + for (i = 0; i < original_nbytes; i++) + original[i] = (i % 123) + (i % 1023); + + compressed = xmalloc(compressed_nbytes_total); + ASSERT(compressed != NULL); + memset(compressed, 0, compressed_nbytes_total); + + decompressed = xmalloc(original_nbytes); + ASSERT(decompressed != NULL); + + c = libdeflate_alloc_compressor(6); + ASSERT(c != NULL); + + d = libdeflate_alloc_decompressor(); + ASSERT(d != NULL); + + for (i = 0; i < ARRAY_LEN(codecs); i++) { + compressed_nbytes = codecs[i].compress(c, original, + original_nbytes, + compressed, + compressed_nbytes_avail); + ASSERT(compressed_nbytes > 0); + ASSERT(compressed_nbytes <= compressed_nbytes_avail); + + /* Test decompress() of stream that fills the whole buffer */ + actual_decompressed_nbytes = 0; + memset(decompressed, 0, original_nbytes); + res = codecs[i].decompress(d, compressed, compressed_nbytes, + decompressed, original_nbytes, + &actual_decompressed_nbytes); + ASSERT(res == LIBDEFLATE_SUCCESS); + ASSERT(actual_decompressed_nbytes == original_nbytes); + ASSERT(memcmp(decompressed, original, original_nbytes) == 0); + + /* Test decompress_ex() of stream that fills the whole buffer */ + actual_compressed_nbytes = actual_decompressed_nbytes = 0; + memset(decompressed, 0, original_nbytes); + res = codecs[i].decompress_ex(d, compressed, compressed_nbytes, + decompressed, original_nbytes, + &actual_compressed_nbytes, + &actual_decompressed_nbytes); + ASSERT(res == LIBDEFLATE_SUCCESS); + ASSERT(actual_compressed_nbytes == compressed_nbytes); + ASSERT(actual_decompressed_nbytes == original_nbytes); + ASSERT(memcmp(decompressed, original, original_nbytes) == 0); + + /* Test decompress() of stream with trailing bytes */ + actual_decompressed_nbytes = 0; + memset(decompressed, 0, original_nbytes); + res = codecs[i].decompress(d, compressed, + compressed_nbytes_total, + decompressed, original_nbytes, + &actual_decompressed_nbytes); + ASSERT(res == LIBDEFLATE_SUCCESS); + ASSERT(actual_decompressed_nbytes == original_nbytes); + ASSERT(memcmp(decompressed, original, original_nbytes) == 0); + + /* Test decompress_ex() of stream with trailing bytes */ + actual_compressed_nbytes = actual_decompressed_nbytes = 0; + memset(decompressed, 0, original_nbytes); + res = codecs[i].decompress_ex(d, compressed, + compressed_nbytes_total, + decompressed, original_nbytes, + &actual_compressed_nbytes, + &actual_decompressed_nbytes); + ASSERT(res == LIBDEFLATE_SUCCESS); + ASSERT(actual_compressed_nbytes == compressed_nbytes); + ASSERT(actual_decompressed_nbytes == original_nbytes); + ASSERT(memcmp(decompressed, original, original_nbytes) == 0); + } + + free(original); + free(compressed); + free(decompressed); + libdeflate_free_compressor(c); + libdeflate_free_decompressor(d); + return 0; +} diff --git a/tools/z64compress/src/enc/libdeflate/programs/test_util.c b/tools/z64compress/src/enc/libdeflate/programs/test_util.c new file mode 100644 index 000000000..20e7c217f --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/programs/test_util.c @@ -0,0 +1,243 @@ +/* + * test_util.c - utility functions for test programs + * + * Copyright 2016 Eric Biggers + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _WIN32 +/* for MAP_ANONYMOUS or MAP_ANON, which unfortunately aren't part of POSIX... */ +# undef _POSIX_C_SOURCE +# ifdef __APPLE__ +# define _DARWIN_C_SOURCE +# elif defined(__linux__) +# define _GNU_SOURCE +# endif +#endif + +#include "test_util.h" + +#include +#include +#ifdef _WIN32 +# include +#else +# include +# include +# include +#endif + +#ifndef MAP_ANONYMOUS +# define MAP_ANONYMOUS MAP_ANON +#endif + +/* Abort with an error message */ +_noreturn void +assertion_failed(const char *expr, const char *file, int line) +{ + msg("Assertion failed: %s at %s:%d", expr, file, line); + abort(); +} + +void +begin_performance_test(void) +{ + /* Skip performance tests by default, since they can be flaky. */ + if (getenv("INCLUDE_PERF_TESTS") == NULL) + exit(0); +} + +static size_t +get_page_size(void) +{ +#ifdef _WIN32 + SYSTEM_INFO info; + + GetSystemInfo(&info); + return info.dwPageSize; +#else + return sysconf(_SC_PAGESIZE); +#endif +} + +/* Allocate a buffer with guard pages */ +void +alloc_guarded_buffer(size_t size, u8 **start_ret, u8 **end_ret) +{ + const size_t pagesize = get_page_size(); + const size_t nr_pages = (size + pagesize - 1) / pagesize; + u8 *base_addr; + u8 *start, *end; +#ifdef _WIN32 + DWORD oldProtect; +#endif + + *start_ret = NULL; + *end_ret = NULL; + +#ifdef _WIN32 + /* Allocate buffer and guard pages with no access. */ + base_addr = VirtualAlloc(NULL, (nr_pages + 2) * pagesize, + MEM_COMMIT | MEM_RESERVE, PAGE_NOACCESS); + if (!base_addr) { + msg("Unable to allocate memory (VirtualAlloc): Windows error %u", + (unsigned int)GetLastError()); + ASSERT(0); + } + start = base_addr + pagesize; + end = start + (nr_pages * pagesize); + + /* Grant read+write access to just the buffer. */ + if (!VirtualProtect(start, end - start, PAGE_READWRITE, &oldProtect)) { + msg("Unable to protect memory (VirtualProtect): Windows error %u", + (unsigned int)GetLastError()); + VirtualFree(base_addr, 0, MEM_RELEASE); + ASSERT(0); + } +#else + /* Allocate buffer and guard pages. */ + base_addr = mmap(NULL, (nr_pages + 2) * pagesize, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + if (base_addr == (u8 *)MAP_FAILED) { + msg_errno("Unable to allocate memory (anonymous mmap)"); + ASSERT(0); + } + start = base_addr + pagesize; + end = start + (nr_pages * pagesize); + + /* Unmap the guard pages. */ + munmap(base_addr, pagesize); + munmap(end, pagesize); +#endif + *start_ret = start; + *end_ret = end; +} + +/* Free a buffer that was allocated by alloc_guarded_buffer() */ +void +free_guarded_buffer(u8 *start, u8 *end) +{ + if (!start) + return; +#ifdef _WIN32 + VirtualFree(start - get_page_size(), 0, MEM_RELEASE); +#else + munmap(start, end - start); +#endif +} + +/* + * Return the number of timer ticks that have elapsed since some unspecified + * point fixed at the start of program execution + */ +u64 +timer_ticks(void) +{ +#ifdef _WIN32 + LARGE_INTEGER count; + + QueryPerformanceCounter(&count); + return count.QuadPart; +#elif defined(HAVE_CLOCK_GETTIME) + struct timespec ts; + + clock_gettime(CLOCK_MONOTONIC, &ts); + return (1000000000 * (u64)ts.tv_sec) + ts.tv_nsec; +#else + struct timeval tv; + + gettimeofday(&tv, NULL); + return (1000000 * (u64)tv.tv_sec) + tv.tv_usec; +#endif +} + +/* + * Return the number of timer ticks per second + */ +static u64 +timer_frequency(void) +{ +#ifdef _WIN32 + LARGE_INTEGER freq; + + QueryPerformanceFrequency(&freq); + return freq.QuadPart; +#elif defined(HAVE_CLOCK_GETTIME) + return 1000000000; +#else + return 1000000; +#endif +} + +/* + * Convert a number of elapsed timer ticks to milliseconds + */ +u64 timer_ticks_to_ms(u64 ticks) +{ + return ticks * 1000 / timer_frequency(); +} + +/* + * Convert a byte count and a number of elapsed timer ticks to MB/s + */ +u64 timer_MB_per_s(u64 bytes, u64 ticks) +{ + return bytes * timer_frequency() / ticks / 1000000; +} + +/* + * Convert a byte count and a number of elapsed timer ticks to KB/s + */ +u64 timer_KB_per_s(u64 bytes, u64 ticks) +{ + return bytes * timer_frequency() / ticks / 1000; +} + +bool +put_bits(struct output_bitstream *os, machine_word_t bits, int num_bits) +{ + os->bitbuf |= bits << os->bitcount; + os->bitcount += num_bits; + while (os->bitcount >= 8) { + if (os->next == os->end) + return false; + *os->next++ = os->bitbuf; + os->bitcount -= 8; + os->bitbuf >>= 8; + } + return true; +} + +bool +flush_bits(struct output_bitstream *os) +{ + while (os->bitcount > 0) { + if (os->next == os->end) + return false; + *os->next++ = os->bitbuf; + os->bitcount -= 8; + os->bitbuf >>= 8; + } + os->bitcount = 0; + return true; +} diff --git a/tools/z64compress/src/enc/libdeflate/programs/test_util.h b/tools/z64compress/src/enc/libdeflate/programs/test_util.h new file mode 100644 index 000000000..4fb9688f6 --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/programs/test_util.h @@ -0,0 +1,67 @@ +/* + * test_util.h - utility functions for test programs + * + * Copyright 2016 Eric Biggers + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef PROGRAMS_TEST_UTIL_H +#define PROGRAMS_TEST_UTIL_H + +#include "prog_util.h" + +#include /* for comparison purposes */ + +#if defined(__GNUC__) || __has_attribute(noreturn) +# define _noreturn __attribute__((noreturn)) +#else +# define _noreturn +#endif + +void _noreturn +assertion_failed(const char *expr, const char *file, int line); + +#define ASSERT(expr) { if (unlikely(!(expr))) \ + assertion_failed(#expr, __FILE__, __LINE__); } + +void begin_performance_test(void); + +void alloc_guarded_buffer(size_t size, u8 **start_ret, u8 **end_ret); +void free_guarded_buffer(u8 *start, u8 *end); + +u64 timer_ticks(void); +u64 timer_ticks_to_ms(u64 ticks); +u64 timer_MB_per_s(u64 bytes, u64 ticks); +u64 timer_KB_per_s(u64 bytes, u64 ticks); + +struct output_bitstream { + machine_word_t bitbuf; + int bitcount; + u8 *next; + u8 *end; +}; + +bool put_bits(struct output_bitstream *os, machine_word_t bits, int num_bits); +bool flush_bits(struct output_bitstream *os); + +#endif /* PROGRAMS_TEST_UTIL_H */ diff --git a/tools/z64compress/src/enc/libdeflate/programs/tgetopt.c b/tools/z64compress/src/enc/libdeflate/programs/tgetopt.c new file mode 100644 index 000000000..868600d97 --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/programs/tgetopt.c @@ -0,0 +1,118 @@ +/* + * tgetopt.c - portable replacement for GNU getopt() + * + * Copyright 2016 Eric Biggers + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "prog_util.h" + +tchar *toptarg; +int toptind = 1, topterr = 1, toptopt; + +/* + * This is a simple implementation of getopt(). It can be compiled with either + * 'char' or 'wchar_t' as the character type. + * + * Do *not* use this implementation if you need any of the following features, + * as they are not supported: + * - Long options + * - Option-related arguments retained in argv, not nulled out + * - '+' and '-' characters in optstring + */ +int +tgetopt(int argc, tchar *argv[], const tchar *optstring) +{ + static tchar empty[1]; + static tchar *nextchar; + static bool done; + + if (toptind == 1) { + /* Starting to scan a new argument vector */ + nextchar = NULL; + done = false; + } + + while (!done && (nextchar != NULL || toptind < argc)) { + if (nextchar == NULL) { + /* Scanning a new argument */ + tchar *arg = argv[toptind++]; + if (arg[0] == '-' && arg[1] != '\0') { + if (arg[1] == '-' && arg[2] == '\0') { + /* All args after "--" are nonoptions */ + argv[toptind - 1] = NULL; + done = true; + } else { + /* Start of short option characters */ + nextchar = &arg[1]; + } + } + } else { + /* More short options in previous arg */ + tchar opt = *nextchar; + tchar *p = tstrchr(optstring, opt); + if (p == NULL) { + if (topterr) + msg("invalid option -- '%"TC"'", opt); + toptopt = opt; + return '?'; + } + /* 'opt' is a valid short option character */ + nextchar++; + toptarg = NULL; + if (*(p + 1) == ':') { + /* 'opt' can take an argument */ + if (*nextchar != '\0') { + /* Optarg is in same argv argument */ + toptarg = nextchar; + nextchar = empty; + } else if (toptind < argc && *(p + 2) != ':') { + /* Optarg is next argv argument */ + argv[toptind - 1] = NULL; + toptarg = argv[toptind++]; + } else if (*(p + 2) != ':') { + if (topterr && *optstring != ':') { + msg("option requires an " + "argument -- '%"TC"'", opt); + } + toptopt = opt; + opt = (*optstring == ':') ? ':' : '?'; + } + } + if (*nextchar == '\0') { + argv[toptind - 1] = NULL; + nextchar = NULL; + } + return opt; + } + } + + /* Done scanning. Move all nonoptions to the end, set optind to the + * index of the first nonoption, and return -1. */ + toptind = argc; + while (--argc > 0) + if (argv[argc] != NULL) + argv[--toptind] = argv[argc]; + done = true; + return -1; +} diff --git a/tools/z64compress/src/enc/libdeflate/scripts/afl-fuzz/deflate_compress/fuzz.c b/tools/z64compress/src/enc/libdeflate/scripts/afl-fuzz/deflate_compress/fuzz.c new file mode 100644 index 000000000..420a7db67 --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/scripts/afl-fuzz/deflate_compress/fuzz.c @@ -0,0 +1,56 @@ +#include +#include +#include +#include +#include +#include + +int main(int argc, char **argv) +{ + struct libdeflate_decompressor *d; + struct libdeflate_compressor *c; + int ret; + int fd = open(argv[1], O_RDONLY); + struct stat stbuf; + unsigned char level; + unsigned char use_bound; + + assert(fd >= 0); + ret = fstat(fd, &stbuf); + assert(!ret); + + if (stbuf.st_size < 2) + return 0; + ret = read(fd, &level, 1); + assert(ret == 1); + level %= 13; + + ret = read(fd, &use_bound, 1); + assert(ret == 1); + use_bound %= 2; + + char in[stbuf.st_size - 2]; + ret = read(fd, in, sizeof in); + assert(ret == sizeof in); + + c = libdeflate_alloc_compressor(level); + d = libdeflate_alloc_decompressor(); + + size_t outsize = use_bound ? libdeflate_deflate_compress_bound(c, sizeof(in)) : sizeof(in); + char out[outsize]; + char checkarray[sizeof(in)]; + + size_t csize = libdeflate_deflate_compress(c, in,sizeof in, out, outsize); + if (csize != 0) { + enum libdeflate_result res; + res = libdeflate_deflate_decompress(d, out, csize, checkarray, sizeof in, NULL); + assert(!res); + assert(!memcmp(in, checkarray, sizeof in)); + } else { + assert(!use_bound); + } + + libdeflate_free_compressor(c); + libdeflate_free_decompressor(d); + return 0; +} diff --git a/tools/z64compress/src/enc/libdeflate/scripts/afl-fuzz/deflate_compress/inputs/0 b/tools/z64compress/src/enc/libdeflate/scripts/afl-fuzz/deflate_compress/inputs/0 new file mode 100644 index 000000000..875bce73a Binary files /dev/null and b/tools/z64compress/src/enc/libdeflate/scripts/afl-fuzz/deflate_compress/inputs/0 differ diff --git a/tools/z64compress/src/enc/libdeflate/scripts/afl-fuzz/deflate_decompress/fuzz.c b/tools/z64compress/src/enc/libdeflate/scripts/afl-fuzz/deflate_decompress/fuzz.c new file mode 100644 index 000000000..8cc4ce55c --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/scripts/afl-fuzz/deflate_decompress/fuzz.c @@ -0,0 +1,28 @@ +#include +#include +#include +#include +#include + +int main(int argc, char **argv) +{ + struct libdeflate_decompressor *d; + int ret; + int fd = open(argv[1], O_RDONLY); + struct stat stbuf; + assert(fd >= 0); + ret = fstat(fd, &stbuf); + assert(!ret); + + char in[stbuf.st_size]; + ret = read(fd, in, sizeof in); + assert(ret == sizeof in); + + char out[sizeof(in) * 3]; + + d = libdeflate_alloc_decompressor(); + + libdeflate_deflate_decompress(d, in, sizeof in, out, sizeof out, NULL); + libdeflate_free_decompressor(d); + return 0; +} diff --git a/tools/z64compress/src/enc/libdeflate/scripts/afl-fuzz/deflate_decompress/inputs/0 b/tools/z64compress/src/enc/libdeflate/scripts/afl-fuzz/deflate_decompress/inputs/0 new file mode 100644 index 000000000..19e3a346e --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/scripts/afl-fuzz/deflate_decompress/inputs/0 @@ -0,0 +1,3 @@ +uŽ1 +Â@EgÅBl5 +‚°VÅÒè6j—«X{i=•èl=€àΟ¬Ñlóßü™?tíÐç½D í¨ò=¯GÑ% ¾©—2xÔ‡7eðD½ÓÐs[ÔиUkÅ÷q¹ |R/åêµùë®°*F¢Mzš¼v°•`ÐÇórÐ1ªóB÷,lDuYj#0<ÅÕž2È0hE`¹øI°ÿìW \ No newline at end of file diff --git a/tools/z64compress/src/enc/libdeflate/scripts/afl-fuzz/fuzz.sh b/tools/z64compress/src/enc/libdeflate/scripts/afl-fuzz/fuzz.sh new file mode 100644 index 000000000..c78a2ca05 --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/scripts/afl-fuzz/fuzz.sh @@ -0,0 +1,154 @@ +#!/bin/bash + +set -e -u -o pipefail + +cd "$(dirname "$0")" + +read -r -a AVAILABLE_TARGETS < <(echo */fuzz.c | sed 's@/fuzz.c@@g') + +usage() +{ + cat << EOF +Usage: $0 [OPTION]... [TARGET]... + +Fuzz libdeflate with afl-fuzz. + +Options: + --asan Enable AddressSanitizer + --no-resume Don't resume existing afl-fuzz session; start a new one + --ubsan Enable UndefinedBehaviorSanitizer + +Available targets: ${AVAILABLE_TARGETS[*]} +EOF +} + +die() +{ + echo "$*" 1>&2 + exit 1 +} + +asan=false +ubsan=false +may_resume=true + +longopts_array=( +asan +help +no-resume +ubsan +) +longopts=$(echo "${longopts_array[@]}" | tr ' ' ',') + +if ! options=$(getopt -o "" -l "$longopts" -- "$@"); then + usage 1>&2 + exit 1 +fi +eval set -- "$options" +while (( $# >= 0 )); do + case "$1" in + --asan) + asan=true + ;; + --help) + usage + exit 0 + ;; + --no-resume) + may_resume=false + ;; + --ubsan) + ubsan=true + ;; + --) + shift + break + ;; + *) + echo 1>&2 "Invalid option: \"$1\"" + usage 1>&2 + exit 1 + esac + shift +done + +if $asan && $ubsan; then + die "--asan and --ubsan are mutually exclusive" +fi + +if ! type -P afl-fuzz > /dev/null; then + die "afl-fuzz is not installed" +fi + +if (( $# == 0 )); then + targets=("${AVAILABLE_TARGETS[@]}") +else + for target; do + found=false + for t in "${AVAILABLE_TARGETS[@]}"; do + if [ "$target" = "$t" ]; then + found=true + fi + done + if ! $found; then + echo 1>&2 "Unknown target '$target'" + echo 1>&2 "Available targets: ${AVAILABLE_TARGETS[*]}" + exit 1 + fi + done + targets=("$@") +fi +if (( ${#targets[@]} > 1 )) && ! type -P urxvt > /dev/null; then + die "urxvt is not installed" +fi + +afl_opts="" +if $asan; then + export AFL_USE_ASAN=1 + export CFLAGS="-O2 -m32" + export CC=afl-clang + afl_opts+=" -m 800" +elif $ubsan; then + export CFLAGS="-fsanitize=undefined -fno-sanitize-recover=undefined" + export CC=afl-gcc +else + export AFL_HARDEN=1 + export CFLAGS="-O2" + export CC=afl-gcc +fi +CFLAGS+=" -DLIBDEFLATE_ENABLE_ASSERTIONS" + +sudo sh -c "echo core > /proc/sys/kernel/core_pattern" +if [ -e /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor ]; then + sudo sh -c "echo performance | tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor" +fi + +srcdir=../.. +builddir=$srcdir/build +$srcdir/scripts/cmake-helper.sh -G Ninja +cmake --build $builddir + +for dir in "${targets[@]}"; do + cp -vaT "$dir" "/tmp/$dir" + # shellcheck disable=SC2086 # Intended word splitting of $CFLAGS + $CC $CFLAGS -Wall -I$srcdir "$dir"/fuzz.c $builddir/libdeflate.a \ + -o "/tmp/$dir/fuzz" + indir=/tmp/$dir/inputs + outdir=/tmp/$dir/outputs + if [ -e "$outdir" ]; then + if $may_resume; then + indir="-" + else + rm -rf "${outdir:?}"/* + fi + else + mkdir "$outdir" + fi + cmd="afl-fuzz -i $indir -o $outdir -T $dir $afl_opts -- /tmp/$dir/fuzz @@" + if (( ${#targets[@]} > 1 )); then + urxvt -e bash -c "$cmd" & + else + $cmd + fi +done +wait diff --git a/tools/z64compress/src/enc/libdeflate/scripts/afl-fuzz/gzip_decompress/fuzz.c b/tools/z64compress/src/enc/libdeflate/scripts/afl-fuzz/gzip_decompress/fuzz.c new file mode 100644 index 000000000..aec50804c --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/scripts/afl-fuzz/gzip_decompress/fuzz.c @@ -0,0 +1,28 @@ +#include +#include +#include +#include +#include + +int main(int argc, char **argv) +{ + struct libdeflate_decompressor *d; + int ret; + int fd = open(argv[1], O_RDONLY); + struct stat stbuf; + assert(fd >= 0); + ret = fstat(fd, &stbuf); + assert(!ret); + + char in[stbuf.st_size]; + ret = read(fd, in, sizeof in); + assert(ret == sizeof in); + + char out[sizeof(in) * 3]; + + d = libdeflate_alloc_decompressor(); + + libdeflate_gzip_decompress(d, in, sizeof in, out, sizeof out, NULL); + libdeflate_free_decompressor(d); + return 0; +} diff --git a/tools/z64compress/src/enc/libdeflate/scripts/afl-fuzz/gzip_decompress/inputs/0 b/tools/z64compress/src/enc/libdeflate/scripts/afl-fuzz/gzip_decompress/inputs/0 new file mode 100644 index 000000000..813c75359 Binary files /dev/null and b/tools/z64compress/src/enc/libdeflate/scripts/afl-fuzz/gzip_decompress/inputs/0 differ diff --git a/tools/z64compress/src/enc/libdeflate/scripts/afl-fuzz/zlib_decompress/fuzz.c b/tools/z64compress/src/enc/libdeflate/scripts/afl-fuzz/zlib_decompress/fuzz.c new file mode 100644 index 000000000..797343bbf --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/scripts/afl-fuzz/zlib_decompress/fuzz.c @@ -0,0 +1,28 @@ +#include +#include +#include +#include +#include + +int main(int argc, char **argv) +{ + struct libdeflate_decompressor *d; + int ret; + int fd = open(argv[1], O_RDONLY); + struct stat stbuf; + assert(fd >= 0); + ret = fstat(fd, &stbuf); + assert(!ret); + + char in[stbuf.st_size]; + ret = read(fd, in, sizeof in); + assert(ret == sizeof in); + + char out[sizeof(in) * 3]; + + d = libdeflate_alloc_decompressor(); + + libdeflate_zlib_decompress(d, in, sizeof in, out, sizeof out, NULL); + libdeflate_free_decompressor(d); + return 0; +} diff --git a/tools/z64compress/src/enc/libdeflate/scripts/afl-fuzz/zlib_decompress/inputs/0 b/tools/z64compress/src/enc/libdeflate/scripts/afl-fuzz/zlib_decompress/inputs/0 new file mode 100644 index 000000000..292e9726d --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/scripts/afl-fuzz/zlib_decompress/inputs/0 @@ -0,0 +1,3 @@ +xœuŽ1 +Â@EgÅBl5 +‚°VÅÒè6j—«X{i=•èl=€àΟ¬Ñlóßü™?tíÐç½D í¨ò=¯GÑ% ¾©—2xÔ‡7eðD½ÓÐs[ÔиUkÅ÷q¹ |R/åêµùë®°*F¢Mzš¼v°•`ÐÇórÐ1ªóB÷,lDuYj#0<ÅÕž2È0hE`¹øI°ÿìWÂ-© \ No newline at end of file diff --git a/tools/z64compress/src/enc/libdeflate/scripts/android_build.sh b/tools/z64compress/src/enc/libdeflate/scripts/android_build.sh new file mode 100644 index 000000000..ae0b4bc0e --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/scripts/android_build.sh @@ -0,0 +1,117 @@ +#!/bin/bash + +set -eu -o pipefail + +SCRIPTDIR="$(dirname "$0")" +BUILDDIR="$SCRIPTDIR/../build" +API_LEVEL=28 +ARCH=arm64 +export CFLAGS=${CFLAGS:-} +ENABLE_CRC=false +ENABLE_CRYPTO=false +NDKDIR=$HOME/android-ndk-r23b + +usage() { + cat << EOF +Usage: $0 [OPTION]... +Build libdeflate for Android. + + --api-level=LEVEL Android API level to target (default: $API_LEVEL) + --arch=ARCH Architecture: arm32|arm64|x86|x86_64 (default: $ARCH) + --enable-crc Enable crc instructions + --enable-crypto Enable crypto instructions + --ndkdir=NDKDIR Android NDK directory (default: $NDKDIR) +EOF +} +if ! options=$(getopt -o '' \ + -l 'api-level:,arch:,enable-crc,enable-crypto,help,ndkdir:' -- "$@"); then + usage 1>&2 + exit 1 +fi + +eval set -- "$options" + +while [ $# -gt 0 ]; do + case "$1" in + --api-level) + API_LEVEL="$2" + shift + ;; + --arch) + ARCH="$2" + shift + ;; + --enable-crc) + ENABLE_CRC=true + ;; + --enable-crypto) + ENABLE_CRYPTO=true + ;; + --help) + usage + exit 0 + ;; + --ndkdir) + NDKDIR="$2" + shift + ;; + --) + shift + break + ;; + *) + echo 1>&2 "Unknown option \"$1\"" + usage 1>&2 + exit 1 + esac + shift +done + +case "$ARCH" in +arm|arm32|aarch32|armeabi-v7a) + ANDROID_ABI=armeabi-v7a + if $ENABLE_CRC || $ENABLE_CRYPTO; then + CFLAGS+=" -march=armv8-a" + if $ENABLE_CRC; then + CFLAGS+=" -mcrc" + else + CFLAGS+=" -mnocrc" + fi + if $ENABLE_CRYPTO; then + CFLAGS+=" -mfpu=crypto-neon-fp-armv8" + else + CFLAGS+=" -mfpu=neon" + fi + fi + ;; +arm64|aarch64|arm64-v8a) + ANDROID_ABI=arm64-v8a + features="" + if $ENABLE_CRC; then + features+="+crc" + fi + if $ENABLE_CRYPTO; then + features+="+crypto" + fi + if [ -n "$features" ]; then + CFLAGS+=" -march=armv8-a$features" + fi + ;; +x86) + ANDROID_ABI=x86 + ;; +x86_64) + ANDROID_ABI=x86_64 + ;; +*) + echo 1>&2 "Unknown architecture: \"$ARCH\"" + usage 1>&2 + exit 1 +esac + +"$SCRIPTDIR"/cmake-helper.sh -G Ninja \ + -DCMAKE_TOOLCHAIN_FILE="$NDKDIR"/build/cmake/android.toolchain.cmake \ + -DANDROID_ABI="$ANDROID_ABI" \ + -DANDROID_PLATFORM="$API_LEVEL" \ + -DLIBDEFLATE_BUILD_TESTS=1 +cmake --build "$BUILDDIR" diff --git a/tools/z64compress/src/enc/libdeflate/scripts/android_tests.sh b/tools/z64compress/src/enc/libdeflate/scripts/android_tests.sh new file mode 100644 index 000000000..3ec1007ba --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/scripts/android_tests.sh @@ -0,0 +1,69 @@ +#!/bin/bash +# +# Test libdeflate on a connected arm64 Android device. +# Requires the Android NDK (release 19 or later) and adb. + +set -eu -o pipefail +cd "$(dirname "$0")/.." + +if [ $# -ne 0 ]; then + echo 1>&2 "Usage: $0" + exit 2 +fi + +# Use NDKDIR if specified in environment, else use default value. +: "${NDKDIR:=$HOME/android-ndk-r23b}" +if [ ! -e "$NDKDIR" ]; then + cat 1>&2 << EOF +Android NDK was not found in NDKDIR=$NDKDIR! Set the +environmental variable NDKDIR to the location of your Android NDK installation. +EOF + exit 1 +fi + +CLEANUP_CMDS=() +cleanup() { + for cmd in "${CLEANUP_CMDS[@]}"; do + eval "$cmd" + done +} +trap cleanup EXIT + +# Use TESTDATA if specified in environment, else generate it. +if [ -z "${TESTDATA:-}" ]; then + # Generate default TESTDATA file. + TESTDATA=$(mktemp -t libdeflate_testdata.XXXXXXXXXX) + export TESTDATA + CLEANUP_CMDS+=("rm -f '$TESTDATA'") + find . '(' -name '*.c' -o -name '*.h' -o -name '*.sh' ')' \ + -exec cat '{}' ';' | head -c 1000000 > "$TESTDATA" +fi + +TMPDIR=$(mktemp -d -t libdeflate_test.XXXXXXXXX) +CLEANUP_CMDS+=("rm -r '$TMPDIR'") + +android_build_and_test() { + echo "Running Android tests with $*" + + ./scripts/android_build.sh --ndkdir="$NDKDIR" "$@" > /dev/null + adb push "$TESTDATA" ./scripts/exec_tests.sh \ + ./build/programs/{benchmark,test_*} /data/local/tmp/ > /dev/null + + # Note: adb shell always returns 0, even if the shell command fails... + adb shell "cd /data/local/tmp && WRAPPER= TESTDATA=$(basename "$TESTDATA") sh exec_tests.sh" \ + > "$TMPDIR/adb.out" + if ! grep -q "exec_tests finished successfully" "$TMPDIR/adb.out"; then + echo 1>&2 "Android test failure! adb shell output:" + cat "$TMPDIR/adb.out" + exit 1 + fi +} + +android_build_and_test --arch=arm32 +android_build_and_test --arch=arm32 --enable-crc +android_build_and_test --arch=arm64 +android_build_and_test --arch=arm64 --enable-crc +android_build_and_test --arch=arm64 --enable-crypto +android_build_and_test --arch=arm64 --enable-crc --enable-crypto + +echo "Android tests passed" diff --git a/tools/z64compress/src/enc/libdeflate/scripts/benchmark.sh b/tools/z64compress/src/enc/libdeflate/scripts/benchmark.sh new file mode 100644 index 000000000..e7275926d --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/scripts/benchmark.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +set -e + +SCRIPTDIR="$(dirname "$0")" +BUILDDIR="$SCRIPTDIR/../build" + +"$SCRIPTDIR"/cmake-helper.sh -DLIBDEFLATE_BUILD_TESTS=1 -G Ninja > /dev/null +ninja -C "$BUILDDIR" --quiet benchmark +"$BUILDDIR"/programs/benchmark "$@" diff --git a/tools/z64compress/src/enc/libdeflate/scripts/checksum.sh b/tools/z64compress/src/enc/libdeflate/scripts/checksum.sh new file mode 100644 index 000000000..c7350d756 --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/scripts/checksum.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +set -e + +SCRIPTDIR="$(dirname "$0")" +BUILDDIR="$SCRIPTDIR/../build" + +"$SCRIPTDIR"/cmake-helper.sh -DLIBDEFLATE_BUILD_TESTS=1 -G Ninja > /dev/null +ninja -C "$BUILDDIR" --quiet checksum +"$BUILDDIR"/programs/checksum "$@" diff --git a/tools/z64compress/src/enc/libdeflate/scripts/checksum_benchmarks.sh b/tools/z64compress/src/enc/libdeflate/scripts/checksum_benchmarks.sh new file mode 100644 index 000000000..836676084 --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/scripts/checksum_benchmarks.sh @@ -0,0 +1,167 @@ +#!/bin/bash + +set -eu -o pipefail + +have_cpu_feature() { + local feature="$1" + local tag + case $ARCH in + arm*|aarch*) + tag="Features" + ;; + *) + tag="flags" + ;; + esac + grep -q "^$tag"$'[ \t]'"*:.*\<$feature\>" /proc/cpuinfo +} + +make_and_test() { + # Build the checksum program and tests. Set the special test support + # flag to get support for LIBDEFLATE_DISABLE_CPU_FEATURES. + rm -rf build + CFLAGS="$CFLAGS -DTEST_SUPPORT__DO_NOT_USE=1" \ + cmake -B build -G Ninja -DLIBDEFLATE_BUILD_TESTS=1 > /dev/null + cmake --build build > /dev/null + + # Run the checksum tests, for good measure. (This isn't actually part + # of the benchmarking.) + ./build/programs/test_checksums > /dev/null +} + +__do_benchmark() { + local impl="$1" speed + shift + local flags=("$@") + + speed=$(./build/programs/checksum "${CKSUM_FLAGS[@]}" \ + "${flags[@]}" -t "$FILE" | \ + grep -o '[0-9]\+ MB/s' | grep -o '[0-9]\+') + printf "%-45s%-10s\n" "$CKSUM_NAME ($impl)" "$speed" +} + +do_benchmark() { + local impl="$1" + + if [ "$impl" = zlib ]; then + __do_benchmark "$impl" "-Z" + else + CFLAGS="${EXTRA_CFLAGS[*]}" make_and_test + __do_benchmark "libdeflate, $impl" + if [ "$ARCH" = x86_64 ]; then + CFLAGS="-m32 ${EXTRA_CFLAGS[*]}" make_and_test + __do_benchmark "libdeflate, $impl, 32-bit" + fi + fi +} + +sort_by_speed() { + awk '{print $NF, $0}' | sort -nr | cut -f2- -d' ' +} + +disable_cpu_feature() { + local name="$1" + shift + local extra_cflags=("$@") + + LIBDEFLATE_DISABLE_CPU_FEATURES+=",$name" + EXTRA_CFLAGS+=("${extra_cflags[@]}") +} + +cleanup() { + if $USING_TMPFILE; then + rm "$FILE" + fi +} + +ARCH="$(uname -m)" +USING_TMPFILE=false + +if (( $# > 1 )); then + echo "Usage: $0 [FILE]" 1>&2 + exit 1 +fi + +trap cleanup EXIT + +if (( $# == 0 )); then + # Generate default test data file. + FILE=$(mktemp -t checksum_testdata.XXXXXXXXXX) + USING_TMPFILE=true + echo "Generating 100 MB test file: $FILE" + head -c 100000000 /dev/urandom > "$FILE" +else + FILE="$1" +fi + +cat << EOF +Method Speed (MB/s) +------ ------------ +EOF + +# CRC-32 +CKSUM_NAME="CRC-32" +CKSUM_FLAGS=() +EXTRA_CFLAGS=() +export LIBDEFLATE_DISABLE_CPU_FEATURES="" +{ +case $ARCH in +i386|x86_64) + if have_cpu_feature pclmulqdq && have_cpu_feature avx; then + do_benchmark "PCLMUL/AVX" + disable_cpu_feature "avx" "-mno-avx" + fi + if have_cpu_feature pclmulqdq; then + do_benchmark "PCLMUL" + disable_cpu_feature "pclmul" "-mno-pclmul" + fi + ;; +arm*|aarch*) + if have_cpu_feature crc32; then + do_benchmark "ARM" + disable_cpu_feature "crc32" "-march=armv8-a+nocrc" + fi + if have_cpu_feature pmull; then + do_benchmark "PMULL" + disable_cpu_feature "pmull" "-march=armv8-a+nocrc+nocrypto" + fi + ;; +esac +do_benchmark "generic" +do_benchmark "zlib" +} | sort_by_speed + +# Adler-32 +CKSUM_NAME="Adler-32" +CKSUM_FLAGS=(-A) +EXTRA_CFLAGS=() +export LIBDEFLATE_DISABLE_CPU_FEATURES="" +echo +{ +case $ARCH in +i386|x86_64) + if have_cpu_feature avx2; then + do_benchmark "AVX2" + disable_cpu_feature "avx2" "-mno-avx2" + fi + if have_cpu_feature sse2; then + do_benchmark "SSE2" + disable_cpu_feature "sse2" "-mno-sse2" + fi + ;; +arm*) + if have_cpu_feature neon; then + do_benchmark "NEON" + disable_cpu_feature "neon" "-mfpu=vfpv3" + fi + ;; +aarch*) + if have_cpu_feature asimd; then + do_benchmark "NEON" + disable_cpu_feature "neon" "-march=armv8-a+nosimd" + fi + ;; +esac +do_benchmark "generic" +do_benchmark "zlib" +} | sort_by_speed diff --git a/tools/z64compress/src/enc/libdeflate/scripts/cmake-helper.sh b/tools/z64compress/src/enc/libdeflate/scripts/cmake-helper.sh new file mode 100644 index 000000000..0c67930ba --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/scripts/cmake-helper.sh @@ -0,0 +1,15 @@ +#!/bin/sh + +# This script ensures that the 'build' directory has been created and configured +# with the given CMake options and environment. + +TOPDIR="$(dirname "$0")"/.. +BUILDDIR="$TOPDIR"/build + +flags=$(env; echo "@CMAKEOPTS@=$*") +if [ "$flags" != "$(cat "$BUILDDIR"/.flags 2>/dev/null)" ]; then + rm -rf "$BUILDDIR"/CMakeCache.txt "$BUILDDIR"/CMakeFiles + mkdir -p "$BUILDDIR" + cmake -S "$TOPDIR" -B "$BUILDDIR" "$@" + echo "$flags" > "$BUILDDIR"/.flags +fi diff --git a/tools/z64compress/src/enc/libdeflate/scripts/deflate_benchmarks.sh b/tools/z64compress/src/enc/libdeflate/scripts/deflate_benchmarks.sh new file mode 100644 index 000000000..5321cdc3a --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/scripts/deflate_benchmarks.sh @@ -0,0 +1,119 @@ +#!/bin/bash + +set -eu -o pipefail +topdir="$(dirname "$0")/.." +tmpfile=$(mktemp) +trap 'rm -f $tmpfile' EXIT + +run_benchmark() +{ + local best_ctime=1000000000 + local i + + for i in $(seq "$NUM_ITERATIONS"); do + "$@" > "$tmpfile" + csize=$(awk '/Compressed/{print $4}' "$tmpfile") + ctime=$(awk '/Compression time/{print $3}' "$tmpfile") + if (( ctime < best_ctime )); then + best_ctime=$ctime + fi + : "$i" # make shellcheck happy + done + CSIZE=$csize + CTIME=$best_ctime +} + +multifile() +{ + local file results cmd best em + + NUM_ITERATIONS=1 + + echo "File | zlib -6 | zlib -9 | libdeflate -6 | libdeflate -9 | libdeflate -12" + echo "-----|---------|---------|---------------|---------------|---------------" + + for file in "$@"; do + echo -n "$(basename "$file")" + results=() + cmd=("$topdir/build/programs/benchmark" + -s"$(stat -c "%s" "$file")" "$file") + run_benchmark "${cmd[@]}" -Y -6 + results+=("$CSIZE") + run_benchmark "${cmd[@]}" -Y -6 + results+=("$CSIZE") + run_benchmark "${cmd[@]}" -6 + results+=("$CSIZE") + run_benchmark "${cmd[@]}" -9 + results+=("$CSIZE") + run_benchmark "${cmd[@]}" -12 + results+=("$CSIZE") + best=2000000000 + for result in "${results[@]}"; do + if (( result < best)); then + best=$result + fi + done + for result in "${results[@]}"; do + if (( result == best )); then + em="**" + else + em="" + fi + echo -n " | ${em}${result}${em}" + done + echo + done +} + +single_file() +{ + local file=$1 + local usize args + local include_old=false + + usize=$(stat -c "%s" "$file") + : ${NUM_ITERATIONS:=3} + + if [ -e "$topdir/benchmark-old" ]; then + include_old=true + fi + echo -n "Level | libdeflate (new) " + if $include_old; then + echo -n "| libdeflate (old) " + fi + echo "| zlib" + echo -n "------|------------------" + if $include_old; then + echo -n "|------------------" + fi + echo "|-----" + for level in {1..12}; do + echo -n "$level" + args=("$file" -s "$usize" "-$level") + + run_benchmark "$topdir/build/programs/benchmark" "${args[@]}" + echo -n " | $CSIZE / $CTIME" + + if $include_old; then + run_benchmark "$topdir/benchmark-old" "${args[@]}" + echo -n " | $CSIZE / $CTIME" + fi + + if (( level > 9 )); then + echo -n " | N/A" + else + run_benchmark "$topdir/build/programs/benchmark" \ + "${args[@]}" -Y + echo -n " | $CSIZE / $CTIME" + fi + echo + done +} + +if (( $# > 1 )); then + multifile "$@" +elif (( $# == 1 )); then + single_file "$@" +else + echo 1>&2 "Usage: $0 FILE..." +fi diff --git a/tools/z64compress/src/enc/libdeflate/scripts/exec_tests.sh b/tools/z64compress/src/enc/libdeflate/scripts/exec_tests.sh new file mode 100644 index 000000000..b4ad2d5b0 --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/scripts/exec_tests.sh @@ -0,0 +1,38 @@ +#!/bin/sh +# +# Helper script used by run_tests.sh and android_tests.sh, +# not intended to be run directly +# + +set -eu + +DIR=${1:-.} + +cd "$DIR" + +run_cmd() { + echo "$WRAPPER $*" + $WRAPPER "$@" > /dev/null +} + +for prog in ./test_*; do + run_cmd "$prog" +done + +for format in '' '-g' '-z'; do + for ref_impl in '' '-Y' '-Z'; do + run_cmd ./benchmark $format $ref_impl "$TESTDATA" + done +done +for level in 0 1 3 7 9; do + for ref_impl in '' '-Y'; do + run_cmd ./benchmark -$level $ref_impl "$TESTDATA" + done +done +for level in 0 1 3 7 9 12; do + for ref_impl in '' '-Z'; do + run_cmd ./benchmark -$level $ref_impl "$TESTDATA" + done +done + +echo "exec_tests finished successfully" # Needed for 'adb shell' diff --git a/tools/z64compress/src/enc/libdeflate/scripts/gen_bitreverse_tab.py b/tools/z64compress/src/enc/libdeflate/scripts/gen_bitreverse_tab.py new file mode 100644 index 000000000..3695742a3 --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/scripts/gen_bitreverse_tab.py @@ -0,0 +1,19 @@ +#!/usr/bin/env python3 +# +# This script computes a table that maps each byte to its bitwise reverse. + +def reverse_byte(v): + return sum(1 << (7 - bit) for bit in range(8) if (v & (1 << bit)) != 0) + +tab = [reverse_byte(v) for v in range(256)] + +print('static const u8 bitreverse_tab[256] = {') +for i in range(0, len(tab), 8): + print('\t', end='') + for j, v in enumerate(tab[i:i+8]): + print(f'0x{v:02x},', end='') + if j == 7: + print('') + else: + print(' ', end='') +print('};') diff --git a/tools/z64compress/src/enc/libdeflate/scripts/gen_crc32_multipliers.c b/tools/z64compress/src/enc/libdeflate/scripts/gen_crc32_multipliers.c new file mode 100644 index 000000000..5ef9bacaf --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/scripts/gen_crc32_multipliers.c @@ -0,0 +1,203 @@ +/* + * gen_crc32_multipliers.c + * + * Copyright 2016 Eric Biggers + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * This program computes the constant multipliers needed for "folding" over + * various distances with the gzip CRC-32. Each such multiplier is x^D mod G(x) + * for some distance D, in bits, over which the folding is occurring. + * + * Folding works as follows: let A(x) be a polynomial (possibly reduced + * partially or fully mod G(x)) for part of the message, and let B(x) be a + * polynomial (possibly reduced partially or fully mod G(x)) for a later part of + * the message. The unreduced combined polynomial is A(x)*x^D + B(x), where D + * is the number of bits separating the two parts of the message plus len(B(x)). + * Since mod G(x) can be applied at any point, x^D mod G(x) can be precomputed + * and used instead of x^D unreduced. That allows the combined polynomial to be + * computed relatively easily in a partially-reduced form A(x)*(x^D mod G(x)) + + * B(x), with length max(len(A(x)) + 31, len(B(x))). This does require doing a + * polynomial multiplication (carryless multiplication). + * + * "Folding" in this way can be used for the entire CRC computation except the + * final reduction to 32 bits; this works well when CPU support for carryless + * multiplication is available. It can also be used to combine CRCs of + * different parts of the message that were computed using a different method. + * + * Note that the gzip CRC-32 uses bit-reversed polynomials. I.e., the low order + * bits are really the high order polynomial coefficients. + */ + +#include +#include + +#include "../common_defs.h" + +/* The generator polynomial G(x) for the gzip CRC-32 */ +#define CRCPOLY 0xEDB88320 /* G(x) without x^32 term */ +#define CRCPOLY_FULL (((u64)CRCPOLY << 1) | 1) /* G(x) */ + +/* Compute x^D mod G(x) */ +static u32 +compute_xD_modG(size_t D) +{ + /* Start with x^0 mod G(x) */ + u32 remainder = 0x80000000; + + /* Each iteration, 'remainder' becomes x^i mod G(x) */ + for (size_t i = 1; i <= D; i++) + remainder = (remainder >> 1) ^ ((remainder & 1) ? CRCPOLY : 0); + + /* Now 'remainder' is x^D mod G(x) */ + return remainder; +} + +/* Compute floor(x^64 / G(x)) */ +static u64 +compute_x64_div_G(void) +{ + u64 quotient = 0; + u64 dividend = 0x1; + + for (int i = 0; i < 64 - 32 + 1; i++) { + if ((dividend >> i) & 1) { + quotient |= (u64)1 << i; + dividend ^= CRCPOLY_FULL << i; + } + } + + return quotient; +} + +static void +gen_vec_folding_constants(void) +{ + /* + * Compute the multipliers needed for CRC-32 folding with carryless + * multiplication instructions that operate on the 64-bit halves of + * 128-bit vectors. Using the terminology from earlier, for each 64-bit + * fold len(A(x)) = 64, and len(B(x)) = 95 since a 64-bit polynomial + * multiplied by a 32-bit one produces a 95-bit one. When A(x) is the + * low order polynomial half of a 128-bit vector (high order physical + * half), the separation between the message parts is the total length + * of the 128-bit vectors separating the values. When A(x) is the high + * order polynomial half, the separation is 64 bits greater. + */ + for (int num_vecs = 1; num_vecs <= 12; num_vecs++) { + const int sep_lo = 128 * (num_vecs - 1); + const int sep_hi = sep_lo + 64; + const int len_B = 95; + int D; + + /* A(x) = high 64 polynomial bits (low 64 physical bits) */ + D = sep_hi + len_B; + printf("#define CRC32_%dVECS_MULT_1 0x%08"PRIx32" /* x^%d mod G(x) */\n", + num_vecs, compute_xD_modG(D), D); + + /* A(x) = low 64 polynomial bits (high 64 physical bits) */ + D = sep_lo + len_B; + printf("#define CRC32_%dVECS_MULT_2 0x%08"PRIx32" /* x^%d mod G(x) */\n", + num_vecs, compute_xD_modG(D), D); + + printf("#define CRC32_%dVECS_MULTS { CRC32_%dVECS_MULT_1, CRC32_%dVECS_MULT_2 }\n", + num_vecs, num_vecs, num_vecs); + printf("\n"); + } + + /* Multiplier for final 96 => 64 bit fold */ + printf("#define CRC32_FINAL_MULT 0x%08"PRIx32" /* x^63 mod G(x) */\n", + compute_xD_modG(63)); + + /* + * Constants for final 64 => 32 bit reduction. These constants are the + * odd ones out, as this final reduction step can't use the regular CRC + * folding described above. It uses Barrett reduction instead. + */ + printf("#define CRC32_BARRETT_CONSTANT_1 0x%016"PRIx64"ULL /* floor(x^64 / G(x)) */\n", + compute_x64_div_G()); + printf("#define CRC32_BARRETT_CONSTANT_2 0x%016"PRIx64"ULL /* G(x) */\n", + CRCPOLY_FULL); + printf("#define CRC32_BARRETT_CONSTANTS { CRC32_BARRETT_CONSTANT_1, CRC32_BARRETT_CONSTANT_2 }\n"); +} + +/* Multipliers for combining the CRCs of separate chunks */ +static void +gen_chunk_constants(void) +{ + const size_t num_chunks = 4; + const size_t table_len = 129; + const size_t min_chunk_len = 128; + + printf("#define CRC32_NUM_CHUNKS %zu\n", num_chunks); + printf("#define CRC32_MIN_VARIABLE_CHUNK_LEN %zuUL\n", min_chunk_len); + printf("#define CRC32_MAX_VARIABLE_CHUNK_LEN %zuUL\n", + (table_len - 1) * min_chunk_len); + printf("\n"); + printf("/* Multipliers for implementations that use a variable chunk length */\n"); + printf("static const u32 crc32_mults_for_chunklen[][CRC32_NUM_CHUNKS - 1] MAYBE_UNUSED = {\n", + num_chunks - 1); + printf("\t{ 0 /* unused row */ },\n"); + for (size_t i = 1; i < table_len; i++) { + const size_t chunk_len = i*min_chunk_len; + + printf("\t/* chunk_len=%zu */\n", chunk_len); + printf("\t{ "); + for (size_t j = num_chunks - 1; j >= 1; j--) { + const size_t D = (j * 8 * chunk_len) - 33; + + printf("0x%08"PRIx32" /* x^%zu mod G(x) */, ", + compute_xD_modG(D), D); + } + printf("},\n"); + } + printf("};\n"); + printf("\n"); + + printf("/* Multipliers for implementations that use a large fixed chunk length */\n"); + const size_t fixed_chunk_len = 32768; + printf("#define CRC32_FIXED_CHUNK_LEN %zuUL\n", fixed_chunk_len); + for (int j = 1; j < num_chunks; j++) { + const size_t D = (j * 8 * fixed_chunk_len) - 33; + + printf("#define CRC32_FIXED_CHUNK_MULT_%d 0x%08"PRIx32" /* x^%zu mod G(x) */\n", + j, compute_xD_modG(D), D); + } +} + +int +main(void) +{ + printf("/*\n" + " * crc32_multipliers.h - constants for CRC-32 folding\n" + " *\n" + " * THIS FILE WAS GENERATED BY gen_crc32_multipliers.c. DO NOT EDIT.\n" + " */\n" + "\n"); + + gen_vec_folding_constants(); + printf("\n"); + gen_chunk_constants(); + return 0; +} diff --git a/tools/z64compress/src/enc/libdeflate/scripts/gen_crc32_tables.c b/tools/z64compress/src/enc/libdeflate/scripts/gen_crc32_tables.c new file mode 100644 index 000000000..b13fc5c49 --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/scripts/gen_crc32_tables.c @@ -0,0 +1,105 @@ +/* + * gen_crc32_tables.c - a program for CRC-32 table generation + * + * Copyright 2016 Eric Biggers + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include + +#include "../common_defs.h" + +#define CRCPOLY 0xEDB88320 /* G(x) without x^32 term */ + +static u32 +crc32_update_bit(u32 remainder, u8 next_bit) +{ + return (remainder >> 1) ^ (((remainder ^ next_bit) & 1) ? CRCPOLY : 0); +} + +static u32 +crc32_update_byte(u32 remainder, u8 next_byte) +{ + for (int j = 0; j < 8; j++, next_byte >>= 1) + remainder = crc32_update_bit(remainder, next_byte & 1); + return remainder; +} + +static void +print_256_entries(const u32 *entries) +{ + for (size_t i = 0; i < 256 / 4; i++) { + printf("\t"); + for (size_t j = 0; j < 4; j++) { + printf("0x%08x,", entries[i * 4 + j]); + if (j != 3) + printf(" "); + } + printf("\n"); + } +} + +int +main(void) +{ + u32 crc32_table[0x800]; + + /* crc32_table[i] for 0 <= i < 0x100 is the CRC-32 of byte i. */ + for (int i = 0; i < 0x100; i++) + crc32_table[i] = crc32_update_byte(0, i); + + /* + * crc32_table[i] for 0x100 <= i < 0x800 is the CRC-32 of byte i % 0x100 + * followed by i / 0x100 zero bytes. + */ + for (int i = 0x100; i < 0x800; i++) + crc32_table[i] = crc32_update_byte(crc32_table[i - 0x100], 0); + + printf("/*\n"); + printf(" * crc32_tables.h - data tables for CRC-32 computation\n"); + printf(" *\n"); + printf(" * THIS FILE WAS GENERATED BY gen_crc32_tables.c. DO NOT EDIT.\n"); + printf(" */\n"); + printf("\n"); + /* + * Although crc32_slice1_table is the same as the first 256 entries of + * crc32_slice8_table, we output these tables separately because any + * combo of (slice1, slice8, slice1 && slice8, nothing) might be needed, + * and it's simplest to let the compiler optimize out any unused tables. + */ + printf("static const u32 crc32_slice1_table[] MAYBE_UNUSED = {\n"); + print_256_entries(&crc32_table[0x000]); + printf("};\n"); + printf("\n"); + printf("static const u32 crc32_slice8_table[] MAYBE_UNUSED = {\n"); + print_256_entries(&crc32_table[0x000]); + print_256_entries(&crc32_table[0x100]); + print_256_entries(&crc32_table[0x200]); + print_256_entries(&crc32_table[0x300]); + print_256_entries(&crc32_table[0x400]); + print_256_entries(&crc32_table[0x500]); + print_256_entries(&crc32_table[0x600]); + print_256_entries(&crc32_table[0x700]); + printf("};\n"); + return 0; +} diff --git a/tools/z64compress/src/enc/libdeflate/scripts/gen_default_litlen_costs.py b/tools/z64compress/src/enc/libdeflate/scripts/gen_default_litlen_costs.py new file mode 100644 index 000000000..26b8d9ccb --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/scripts/gen_default_litlen_costs.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python3 +# +# This script computes the default litlen symbol costs for the near-optimal +# parser. + +from math import log2 + +BIT_COST = 16 # Must match BIT_COST in deflate_compress.c +NUM_LEN_SLOTS = 29 + +print("""static const struct { + u8 used_lits_to_lit_cost[257]; + u8 len_sym_cost; +} default_litlen_costs[] = {""") +MATCH_PROBS = [0.25, 0.50, 0.75] +for i, match_prob in enumerate(MATCH_PROBS): + len_prob = match_prob / NUM_LEN_SLOTS + len_sym_cost = int(-log2(len_prob) * BIT_COST) + if i == 0: + print('\t{', end='') + print(f' /* match_prob = {match_prob} */') + print('\t\t.used_lits_to_lit_cost = {') + + j = 0 + for num_used_literals in range(0, 257): + if num_used_literals == 0: + num_used_literals = 1 + lit_prob = (1 - match_prob) / num_used_literals + lit_cost = int(-log2(lit_prob) * BIT_COST) + if j == 0: + print('\t\t\t', end='') + if j == 7 or num_used_literals == 256: + print(f'{lit_cost},') + j = 0 + else: + print(f'{lit_cost}, ', end='') + j += 1 + print('\t\t},') + print(f'\t\t.len_sym_cost = {len_sym_cost},') + if i < len(MATCH_PROBS) - 1: + print('\t}, {', end='') + else: + print('\t},') +print('};') diff --git a/tools/z64compress/src/enc/libdeflate/scripts/gen_offset_slot_map.py b/tools/z64compress/src/enc/libdeflate/scripts/gen_offset_slot_map.py new file mode 100644 index 000000000..500332cfb --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/scripts/gen_offset_slot_map.py @@ -0,0 +1,41 @@ +#!/usr/bin/env python3 +# +# This script generates the deflate_offset_slot[] array, which is a condensed +# map from offsets to offset slots. + +DEFLATE_OFFSET_SLOT_BASE = [ + 1 , 2 , 3 , 4 , 5 , 7 , 9 , 13 , + 17 , 25 , 33 , 49 , 65 , 97 , 129 , 193 , + 257 , 385 , 513 , 769 , 1025 , 1537 , 2049 , 3073 , + 4097 , 6145 , 8193 , 12289 , 16385 , 24577 , +] + +DEFLATE_EXTRA_OFFSET_BITS = [ + 0 , 0 , 0 , 0 , 1 , 1 , 2 , 2 , + 3 , 3 , 4 , 4 , 5 , 5 , 6 , 6 , + 7 , 7 , 8 , 8 , 9 , 9 , 10 , 10 , + 11 , 11 , 12 , 12 , 13 , 13 , +] + +offset_slot_map = [0] * 512 + +for offset_slot, offset_base in enumerate(DEFLATE_OFFSET_SLOT_BASE): + num_extra_bits = DEFLATE_EXTRA_OFFSET_BITS[offset_slot] + offset_end = offset_base + (1 << num_extra_bits) + if offset_base <= 256: + for offset in range(offset_base, offset_end): + offset_slot_map[offset] = offset_slot + else: + for offset in range(offset_base, offset_end, 128): + offset_slot_map[256 + ((offset - 1) >> 7)] = offset_slot + +print('static const u8 deflate_offset_slot_map[512] = {') +for i in range(0, len(offset_slot_map), 16): + print('\t', end='') + for j, v in enumerate(offset_slot_map[i:i+16]): + print(f'{v},', end='') + if j == 15: + print('') + else: + print(' ', end='') +print('};') diff --git a/tools/z64compress/src/enc/libdeflate/scripts/gzip_tests.sh b/tools/z64compress/src/enc/libdeflate/scripts/gzip_tests.sh new file mode 100644 index 000000000..9b15cd1af --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/scripts/gzip_tests.sh @@ -0,0 +1,523 @@ +#!/bin/bash +# +# Test script for libdeflate's gzip and gunzip programs. +# +# To run, you must set GZIP and GUNZIP in the environment to the absolute paths +# to the gzip and gunzip programs to test. All tests should pass regardless of +# whether the GNU versions or the libdeflate versions, or a combination, of +# these programs are used. +# +# The environmental variable TESTDATA must also be set to a file containing +# test data. +# + +set -eu -o pipefail + +export -n GZIP GUNZIP TESTDATA + +ORIG_PWD=$PWD +TMPDIR="$(mktemp -d)" +CURRENT_TEST= + +BSD_STAT=false +if ! stat --version 2>&1 | grep -q coreutils; then + BSD_STAT=true +fi + +cleanup() { + if [ -n "$CURRENT_TEST" ]; then + echo "TEST FAILED: \"$CURRENT_TEST\"" + fi + rm -rf -- "$TMPDIR" +} + +trap cleanup EXIT + +begin_test() { + CURRENT_TEST="$1" + rm -rf -- "${TMPDIR:?}"/* + cd "$ORIG_PWD" + cp "$TESTDATA" "$TMPDIR/file" + chmod +w "$TMPDIR/file" + cd "$TMPDIR" +} + +gzip() { + $GZIP "$@" +} + +gunzip() { + $GUNZIP "$@" +} + +get_filesize() { + local file=$1 + + if $BSD_STAT; then + stat -f %z "$file" + else + stat -c %s "$file" + fi +} + +get_linkcount() { + local file=$1 + + if $BSD_STAT; then + stat -f %l "$file" + else + stat -c %h "$file" + fi +} + +get_modeandtimestamps() { + local file=$1 + + if $BSD_STAT; then + stat -f "%p;%a;%m" "$file" + elif [ "$(uname -m)" = s390x ]; then + # Use seconds precision instead of nanoseconds. + # TODO: why is this needed? QEMU user mode emulation bug? + stat -c "%a;%X;%Y" "$file" + else + stat -c "%a;%x;%y" "$file" + fi +} + +assert_status() { + local expected_status="$1" + local expected_msg="$2" + shift 2 + ( + set +e + { eval "$*" > /dev/null; } 2>&1 + local actual_status=$? + if [ "$actual_status" != "$expected_status" ]; then + echo 1>&2 "Command '$*' exited with status" \ + "$actual_status but expected status" \ + "$expected_status" + exit 1 + fi + exit 0 + ) > command_output + if ! grep -E -q "$expected_msg" command_output; then + echo 1>&2 "Expected output of command '$*' to match regex" \ + "'$expected_msg'" + echo 1>&2 "Actual output was:" + echo 1>&2 "---------------------------------------------------" + cat 1>&2 command_output + echo 1>&2 "---------------------------------------------------" + return 1 + fi +} + +assert_error() { + assert_status 1 "$@" +} + +assert_warning() { + assert_status 2 "$@" +} + +assert_skipped() { + assert_warning '\<(ignored|skipping|unchanged)\>' "$@" +} + +assert_equals() { + local expected="$1" + local actual="$2" + + if [ "$expected" != "$actual" ]; then + echo 1>&2 "Expected '$expected', but got '$actual'" + return 1 + fi +} + + +begin_test 'Basic compression and decompression works' +cp file orig +gzip file +[ ! -e file ] && [ -e file.gz ] +gunzip file.gz +[ -e file ] && [ ! -e file.gz ] +cmp file orig + + +begin_test 'gzip -d is gunzip' +cp file orig +gzip file +gzip -d file.gz +cmp file orig + + +begin_test '-k (keep original file) works' +cp file orig +gzip -k file +cmp file orig +rm file +cp file.gz orig.gz +gunzip -k file.gz +cmp file.gz orig.gz + + +begin_test '-c (write to stdout) works' +cp file orig +gzip -k file +gzip -c file > 2.gz +cmp file orig +cmp file.gz 2.gz +gunzip -c 2.gz > file +cmp file.gz 2.gz +cmp file orig + + +# Note: in some of the commands below, we intentionally use 'cat file | gzip' +# rather than 'gzip < file', in order to test the use of a pipe. This produces +# a shellcheck warning about 'cat' being unnecessary. Suppress that warning by +# using { cat file; true; }. +begin_test 'Reading from stdin works' +gzip < file > 1.gz +gzip - < file > 2.gz +{ cat file; true; } | gzip > 3.gz +{ cat file; true; } | gzip - > 4.gz +cmp file <(gunzip < 1.gz) +cmp file <(gunzip - < 2.gz) +cmp file <({ cat 3.gz; true; } | gunzip) +cmp file <({ cat 4.gz; true; } | gunzip -) + + +begin_test '-n option is accepted' +gzip -n file +gunzip -n file.gz + + +begin_test 'can specify multiple options' +gzip -fk1 file +cmp <(gzip -c -1 file) file.gz +gunzip -kfd file.gz + + +begin_test 'Compression levels' +if [ "$GZIP" = /bin/gzip ] || [ "$GZIP" = /usr/bin/gzip ]; then + assert_error '\' gzip -10 + max_level=9 +else + for level in 13 99999 1a; do + assert_error '\' gzip -$level + done + max_level=12 +fi +for level in $(seq 1 $max_level); do + gzip -c "-$level" file > "file$level" + cmp file <(gunzip -c "file$level") +done +rm file command_output + + +begin_test 'Overwriting output file requires -f' +cp file orig +echo -n > file.gz +gzip -c file > 2.gz +assert_warning 'already exists' gzip file file +assert_warning 'already exists' gunzip file.gz c.gz +gzip file.gz 2>&1 >/dev/null | grep -q 'already has .gz suffix' +[ -e file.gz ] && [ ! -e file.gz.gz ] +gzip -f file.gz +[ ! -e file.gz ] && [ -e file.gz.gz ] +cmp file.gz.gz c.gz + + +begin_test 'gunzip -f -c passes through non-gzip data' +echo hello > file +cp file orig +gunzip -f -c file > foo +cmp file foo +gzip file +gunzip -f -c file.gz > foo +cmp foo orig + + +begin_test 'gunzip -f (without -c) does *not* pass through non-gzip data' +echo hello > file.gz +assert_error '\' gunzip -f file.gz + + +begin_test 'Decompressing unsuffixed file only works with -c' +gzip file && mv file.gz file +assert_skipped gunzip file +assert_skipped gunzip -f file +gunzip -c file > orig +mv file file.gz && gunzip file.gz && cmp file orig + + +begin_test '... unless there is a corresponding suffixed file' +cp file orig +gzip file +[ ! -e file ] && [ -e file.gz ] +gunzip -c file > tmp +cmp tmp orig +rm tmp +ln -s NONEXISTENT file +gunzip -c file > tmp +cmp tmp orig +rm tmp file +gunzip file +[ -e file ] && [ ! -e file.gz ] +cmp file orig + + +begin_test 'Directory is skipped, even with -f' +mkdir dir +mkdir dir.gz +for opt in '' '-f' '-c'; do + assert_skipped gzip $opt dir +done +#assert_skipped gzip dir.gz # XXX: GNU gzip warns, libdeflate gzip no-ops +for opt in '' '-f' '-c'; do + for name in dir dir.gz; do + assert_skipped gunzip $opt $name + done +done + + +begin_test '(gzip) symlink is rejected without -f or -c' +ln -s file symlink1 +ln -s file symlink2 +assert_error 'Too many levels of symbolic links' gzip symlink1 +[ -e file ] && [ -e symlink1 ] && [ ! -e symlink1.gz ] +gzip -f symlink1 +[ -e file ] && [ ! -e symlink1 ] && [ -e symlink1.gz ] +gzip -c symlink2 > /dev/null + + +begin_test '(gunzip) symlink is rejected without -f or -c' +gzip file +ln -s file.gz symlink1.gz +ln -s file.gz symlink2.gz +assert_error 'Too many levels of symbolic links' gunzip symlink1 +[ -e file.gz ] && [ -e symlink1.gz ] && [ ! -e symlink1 ] +gunzip -f symlink1.gz +[ -e file.gz ] && [ ! -e symlink1.gz ] && [ -e symlink1 ] +gunzip -c symlink2.gz > /dev/null + + +begin_test 'FIFO is skipped, even with -f' +mkfifo foo +mkfifo foo.gz +assert_skipped gzip foo +assert_skipped gzip -f foo +#assert_skipped gzip -c foo # XXX: works with GNU gzip, not libdeflate's +assert_skipped gunzip foo.gz +assert_skipped gunzip -f foo.gz +#assert_skipped gunzip -c foo.gz # XXX: works with GNU gzip, not libdeflate's + + +begin_test '(gzip) overwriting symlink does not follow symlink' +echo a > a +echo b > b +gzip a +ln -s a.gz b.gz +gzip -f b +gunzip a.gz +cmp <(echo a) a + + +begin_test '(gunzip) overwriting symlink does not follow symlink' +echo a > a +echo b > b +gzip b +ln -s a b +gunzip -f b.gz +cmp <(echo a) a +cmp <(echo b) b + + +begin_test '(gzip) hard linked file skipped without -f or -c' +cp file orig +ln file link +assert_equals 2 "$(get_linkcount file)" +assert_skipped gzip file +gzip -c file > /dev/null +assert_equals 2 "$(get_linkcount file)" +gzip -f file +assert_equals 1 "$(get_linkcount link)" +assert_equals 1 "$(get_linkcount file.gz)" +cmp link orig +# XXX: GNU gzip skips hard linked files with -k, libdeflate's doesn't + + +begin_test '(gunzip) hard linked file skipped without -f or -c' +gzip file +ln file.gz link.gz +cp file.gz orig.gz +assert_equals 2 "$(get_linkcount file.gz)" +assert_skipped gunzip file.gz +gunzip -c file.gz > /dev/null +assert_equals 2 "$(get_linkcount file.gz)" +gunzip -f file +assert_equals 1 "$(get_linkcount link.gz)" +assert_equals 1 "$(get_linkcount file)" +cmp link.gz orig.gz + + +begin_test 'Multiple files' +cp file file2 +gzip file file2 +[ ! -e file ] && [ ! -e file2 ] && [ -e file.gz ] && [ -e file2.gz ] +gunzip file.gz file2.gz +[ -e file ] && [ -e file2 ] && [ ! -e file.gz ] && [ ! -e file2.gz ] + + +begin_test 'Multiple files, continue on warning' +mkdir 1 +cp file 2 +assert_skipped gzip 1 2 +[ ! -e 1.gz ] +cmp file <(gunzip -c 2.gz) +rmdir 1 +mkdir 1.gz +assert_skipped gunzip 1.gz 2.gz +[ ! -e 1 ] +cmp 2 file + + +if (( $(id -u) != 0 )); then + begin_test 'Multiple files, continue on error' + cp file 1 + cp file 2 + chmod a-r 1 + assert_error 'Permission denied' gzip 1 2 + [ ! -e 1.gz ] + cmp file <(gunzip -c 2.gz) + rm -f 1 + cp 2.gz 1.gz + chmod a-r 1.gz + assert_error 'Permission denied' gunzip 1.gz 2.gz + [ ! -e 1 ] + cmp 2 file +fi + + +begin_test 'Compressing empty file' +echo -n > empty +gzip empty +gunzip empty.gz +cmp /dev/null empty + + +begin_test 'Decompressing malformed file' +echo -n > foo.gz +assert_error '\<(not in gzip format|unexpected end of file)\>' \ + gunzip foo.gz +echo 1 > foo.gz +assert_error '\' gunzip foo.gz +echo abcdefgh > foo.gz +assert_error '\' gunzip foo.gz +echo -ne '\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\x4b\x4c\x4a\x4e\x49\x24\x16\x73\x01\x00\x6c\x5b\xa2\x62\x2e\x00\x00\x00' \ + > foo.gz +assert_error '\<(not in gzip format|crc error)\>' gunzip foo.gz + + +for suf in .foo foo .blaaaaaaaaaaaaaaaargh; do + begin_test "Custom suffix: $suf" + gzip -S $suf file + [ ! -e file ] && [ ! -e file.gz ] && [ -e file$suf ] + assert_skipped gunzip file$suf + gunzip -S $suf file$suf + [ -e file ] && [ ! -e file.gz ] && [ ! -e file$suf ] +done +# DIFFERENCE: GNU gzip lower cases suffix, we don't + + +begin_test 'Empty suffix is rejected' +assert_error '\' gzip -S '""' file +assert_error '\' gunzip -S '""' file + + +begin_test 'Timestamps and mode are preserved' +chmod 777 file +orig_stat=$(get_modeandtimestamps file) +gzip file +sleep 1 +gunzip file.gz +assert_equals "$orig_stat" "$(get_modeandtimestamps file)" + + +begin_test 'Decompressing multi-member gzip file' +cat file file > orig +gzip -c file > file.gz +gzip -c file >> file.gz +gunzip -f file.gz +cmp file orig + + +begin_test 'Decompressing multi-member gzip file (final member smaller)' +echo 'hello world' > hello +cat file hello > orig +gzip -c file > file.gz +gzip -c hello >> file.gz +gunzip -f file.gz +cmp file orig + + +begin_test 'Help option' +gzip -h 2>&1 | grep -q 'Usage' +gunzip -h 2>&1 | grep -q 'Usage' + + +begin_test 'Incorrect usage' +for prog in gzip gunzip; do + for opt in '--invalid-option' '-0'; do + assert_error '\<(unrecognized|invalid) option\>' $prog $opt + done +done + + +begin_test '-t (test) option works' +good_files=( +'H4sIAAAAAAAAA3PMSVTITVTIzi9JVABTIJ5jzpGZelwAX+86ehsAAAA=' +'H4sIAAAAAAAAAwvJSFUoLM1MzlZIKsovz1NIy69QyCrNLShWyC9LLVIoAUrnJFZVKqTkp+txAQBqzFDrLQAAAA==') +bad_files=( +'H4sIAO1YYmAAA3PMSVTITVTIzi9JVABTIJ5jzpGZelwAX+46ehsAAAA=' +'H4sIAO1YYmAAA3PMSVTITVTIzi85VABTIJ5jzpGZelwAX+86ehsAAAA=' +'H4sIAAAAAAAAA3PMSVTITVTIzi9JVABTIJ5jzpGZelwAX+86ehsBAAA=' +'H4sIAAAAAAAAAwvJSFUoLM1MzlZIKsovz1NIy69QyCrNLShWyC9LLVIogUrnJFZVKqTkp+txAQBqzFDrLQAAAA==' +'H4sIAAAAAAAAAwvJSFUoLM1MzlZIKsovz1NIy69QyCrNLShWyC9L') +for contents in "${good_files[@]}"; do + echo "$contents" | base64 -d | gzip -t +done +for contents in "${bad_files[@]}"; do + echo "$contents" | base64 -d > file + assert_error '\' \ + gzip -t file +done + + +begin_test '-q (quiet) option works' +mkdir dir +gunzip -q dir &> output || true +[ ! -s output ] + + +begin_test 'Version information' +gzip -V | grep -q Copyright +gunzip -V | grep -q Copyright + +CURRENT_TEST= diff --git a/tools/z64compress/src/enc/libdeflate/scripts/make-windows-releases.sh b/tools/z64compress/src/enc/libdeflate/scripts/make-windows-releases.sh new file mode 100644 index 000000000..332f42c51 --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/scripts/make-windows-releases.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +set -eu -o pipefail + +for arch in 'i686' 'x86_64'; do + dir=libdeflate-$(git describe --tags | tr -d v)-windows-${arch}-bin + rm -rf build "$dir" "$dir.zip" + CFLAGS="-Werror" ${arch}-w64-mingw32-cmake -B build -G Ninja \ + -DLIBDEFLATE_BUILD_TESTS=1 + cmake --build build + mkdir "$dir" + cp libdeflate.h build/libdeflate.{dll,dll.a,a} \ + build/programs/{benchmark,checksum}.exe "$dir" + cp build/programs/libdeflate-gzip.exe "$dir"/gzip.exe + cp build/programs/libdeflate-gzip.exe "$dir"/gunzip.exe + ${arch}-w64-mingw32-strip "$dir"/libdeflate.dll "$dir"/*.exe + for file in COPYING NEWS.md README.md; do + sed < $file > "$dir/${file}.txt" -e 's/$/\r/g' + done + (cd "$dir" && zip -r "../${dir}.zip" .) +done diff --git a/tools/z64compress/src/enc/libdeflate/scripts/msc_test.bat b/tools/z64compress/src/enc/libdeflate/scripts/msc_test.bat new file mode 100644 index 000000000..e8a150fa0 --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/scripts/msc_test.bat @@ -0,0 +1,3 @@ +nmake /f Makefile.msc clean +nmake /f Makefile.msc +copy /y *.exe j:\exe\ diff --git a/tools/z64compress/src/enc/libdeflate/scripts/run_tests.sh b/tools/z64compress/src/enc/libdeflate/scripts/run_tests.sh new file mode 100644 index 000000000..44d3b9e25 --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/scripts/run_tests.sh @@ -0,0 +1,415 @@ +#!/bin/bash +# +# Test script for libdeflate +# +# Usage: +# Run all tests: +# ./run_tests.sh +# Run only the given tests: +# ./run_tests.sh asan valgrind +# Run all tests other than the given ones: +# ./run_tests.sh ^asan ^valgrind +# +# See TEST_FUNCS for the available tests. + +set -eu -o pipefail +cd "$(dirname "$0")/.." + +# Use CC if specified in environment, else default to "cc". +: "${CC:=cc}" + +export CFLAGS="-Werror -DLIBDEFLATE_ENABLE_ASSERTIONS" + +# No wrapper by default; overridden by valgrind tests +export WRAPPER= + +TEST_FUNCS=() + +CLEANUP_CMDS=() +cleanup() { + for cmd in "${CLEANUP_CMDS[@]}"; do + eval "$cmd" + done +} +trap cleanup EXIT + +CLEANUP_CMDS+=("rm -rf build") + +# Use TESTDATA if specified in environment, else generate it. +if [ -z "${TESTDATA:-}" ]; then + # Generate default TESTDATA file. + TESTDATA=$(mktemp -t libdeflate_testdata.XXXXXXXXXX) + export TESTDATA + CLEANUP_CMDS+=("rm -f '$TESTDATA'") + find . '(' -name '*.c' -o -name '*.h' -o -name '*.sh' ')' \ + -exec cat '{}' ';' | head -c 1000000 > "$TESTDATA" +fi + +TMPDIR=$(mktemp -d -t libdeflate_test.XXXXXXXXX) +CLEANUP_CMDS+=("rm -r '$TMPDIR'") + +MAKE="make -j$(getconf _NPROCESSORS_ONLN)" + +UNAME=$(uname) +ARCH=$(uname -m) + +SHLIB=build/libdeflate.so +if [ "$UNAME" = Darwin ]; then + SHLIB=build/libdeflate.dylib +fi + +############################################################################### + +INDENT=0 + +log() +{ + echo -n "[$(date)] " + if (( INDENT != 0 )); then + head -c $(( INDENT * 4 )) /dev/zero | tr '\0' ' ' + fi + echo "$@" +} + +begin() +{ + log "$@" + (( INDENT++ )) || true +} + +end() +{ + (( INDENT-- )) || true +} + +run_cmd() +{ + log "$@" + "$@" > /dev/null +} + +fail() +{ + echo 1>&2 "$@" + exit 1 +} + +file_count() +{ + local dir=$1 + + find "$dir" -type f -o -type l | wc -l +} + +cflags_supported() +{ + # -Werror is needed here in order for old versions of clang to reject + # invalid options. + echo 'int main(void){ return 0; }' \ + | $CC "$@" -Werror -x c - -o /dev/null 2>/dev/null +} + +# Build libdeflate, including the test programs. Set the special test support +# flag to get support for LIBDEFLATE_DISABLE_CPU_FEATURES. +build() +{ + CFLAGS="$CFLAGS -DTEST_SUPPORT__DO_NOT_USE=1" scripts/cmake-helper.sh \ + -DLIBDEFLATE_BUILD_TESTS=1 "$@" > /dev/null + $MAKE -C build > /dev/null +} + +build_and_run_tests() +{ + local quick=false + if [ "${1:-}" = "--quick" ]; then + quick=true + shift + fi + + begin "CC=$CC CFLAGS=\"$CFLAGS\" WRAPPER=\"$WRAPPER\" $*" + + build "$@" + + # When not using -march=native, run the tests multiple times with + # different combinations of CPU features disabled. This is needed to + # test all variants of dynamically-dispatched code. + # + # For now, we aren't super exhausive in which combinations of features + # we test disabling. We just disable the features roughly in order from + # newest to oldest for each architecture, cumulatively. In practice, + # that's good enough to cover all the code. + local features=('') + if ! [[ "$CFLAGS" =~ "-march=native" ]] && ! $quick; then + case "$ARCH" in + i386|x86_64) + features+=(avx2 avx bmi2 pclmul sse2) + ;; + arm*|aarch*) + features+=(dotprod sha3 crc32 pmull neon) + ;; + esac + fi + local disable_str="" + local feature + for feature in "${features[@]}"; do + if [ -n "$feature" ]; then + if [ -n "$disable_str" ]; then + disable_str+="," + fi + disable_str+="$feature" + fi + log "Using LIBDEFLATE_DISABLE_CPU_FEATURES=$disable_str" + LIBDEFLATE_DISABLE_CPU_FEATURES="$disable_str" \ + sh ./scripts/exec_tests.sh build/programs/ > /dev/null + done + end +} + +is_compatible_system_gzip() +{ + local prog=$1 + + # Needs to exist. + if ! [ -e "$prog" ]; then + return 1 + fi + # Needs to be GNU gzip. + if ! "$prog" -V 2>&1 | grep -q 'Free Software Foundation'; then + return 1 + fi + # Needs to support the -k option, i.e. be v1.6 or later. + if echo | { "$prog" -k 2>&1 >/dev/null || true; } \ + | grep -q 'invalid option'; then + return 1 + fi + return 0 +} + +gzip_tests() +{ + local gzips=("$PWD/build/programs/libdeflate-gzip") + local gunzips=("$PWD/build/programs/libdeflate-gzip -d") + if [ "${1:-}" != "--quick" ]; then + if is_compatible_system_gzip /bin/gzip; then + gzips+=(/bin/gzip) + gunzips+=(/bin/gunzip) + elif is_compatible_system_gzip /usr/bin/gzip; then + gzips+=(/usr/bin/gzip) + gunzips+=(/usr/bin/gunzip) + else + log "Unsupported system gzip; skipping comparison with system gzip" + fi + fi + local gzip gunzip + + begin "Running gzip program tests with CC=\"$CC\" CFLAGS=\"$CFLAGS\"" + build + for gzip in "${gzips[@]}"; do + for gunzip in "${gunzips[@]}"; do + log "GZIP=$gzip, GUNZIP=$gunzip" + GZIP="$gzip" GUNZIP="$gunzip" TESTDATA="$TESTDATA" \ + ./scripts/gzip_tests.sh + done + done + end +} + +do_run_tests() +{ + build_and_run_tests "$@" + gzip_tests "$@" +} + +################################################################################ + +regular_test() +{ + do_run_tests +} +TEST_FUNCS+=(regular_test) + +O3_test() +{ + CFLAGS="$CFLAGS -O3" do_run_tests +} +TEST_FUNCS+=(O3_test) + +march_native_test() +{ + if ! cflags_supported "-march=native"; then + log "Compiler doesn't support -march=native; skipping test" + return + fi + CFLAGS="$CFLAGS -march=native" do_run_tests +} +TEST_FUNCS+=(march_native_test) + +valgrind_version_at_least() +{ + local want_vers=$1 + local vers + + if ! type -P valgrind &> /dev/null; then + return 1 + fi + + vers=$(valgrind --version | grep -E -o '[0-9\.]+' | head -1) + + [ "$want_vers" = "$(echo -e "$vers\n$want_vers" | sort -V | head -1)" ] +} + +valgrind_test() +{ + # Need valgrind 3.9.0 for '--errors-for-leak-kinds=all' + # Need valgrind 3.12.0 for armv8 crypto and crc instructions + if ! valgrind_version_at_least 3.12.0; then + log "valgrind not found; skipping test" + return + fi + WRAPPER="valgrind --quiet --error-exitcode=100 --leak-check=full --errors-for-leak-kinds=all" \ + do_run_tests --quick +} +TEST_FUNCS+=(valgrind_test) + +ubsan_test() +{ + local cflags=("-fsanitize=undefined" "-fno-sanitize-recover=undefined") + if ! cflags_supported "${cflags[@]}"; then + log "Compiler doesn't support UBSAN; skipping test" + return + fi + CFLAGS="$CFLAGS ${cflags[*]}" do_run_tests --quick +} +TEST_FUNCS+=(ubsan_test) + +asan_test() +{ + local cflags=("-fsanitize=address" "-fno-sanitize-recover=address") + if ! cflags_supported "${cflags[@]}"; then + log "Compiler doesn't support ASAN; skipping test" + return + fi + CFLAGS="$CFLAGS ${cflags[*]}" do_run_tests --quick +} +TEST_FUNCS+=(asan_test) + +cfi_test() +{ + local cflags=("-fsanitize=cfi" "-fno-sanitize-recover=cfi" "-flto" + "-fvisibility=hidden") + if ! cflags_supported "${cflags[@]}"; then + log "Compiler doesn't support CFI; skipping test" + return + fi + CFLAGS="$CFLAGS ${cflags[*]}" AR=llvm-ar do_run_tests --quick +} +TEST_FUNCS+=(cfi_test) + +install_test() +{ + build + $MAKE -C build install DESTDIR=inst > /dev/null +} +TEST_FUNCS+=(install_test) + +symbol_prefix_test() +{ + build + log "Checking that all global symbols are prefixed with \"libdeflate_\"" + if nm build/libdeflate.a | grep ' T ' | grep -E -v " _?libdeflate_" + then + fail "Some global symbols aren't prefixed with \"libdeflate_\"" + fi + log "Checking that all exported symbols are prefixed with \"libdeflate\"" + if nm $SHLIB | grep ' T ' \ + | grep -E -v " _?(libdeflate_|_init\>|_fini\>)"; then + fail "Some exported symbols aren't prefixed with \"libdeflate_\"" + fi +} +TEST_FUNCS+=(symbol_prefix_test) + +is_dynamically_linked() +{ + local prog=$1 + + if [ "$UNAME" = Darwin ]; then + otool -L "$prog" | grep -q libdeflate + else + ldd "$prog" | grep -q libdeflate + fi +} + +use_shared_lib_test() +{ + log "Testing USE_SHARED_LIB=1" + build + if is_dynamically_linked build/programs/libdeflate-gzip; then + fail "Binary should be statically linked by default" + fi + build -DLIBDEFLATE_USE_SHARED_LIB=1 > /dev/null + if ! is_dynamically_linked build/programs/libdeflate-gzip; then + fail "Binary isn't dynamically linked" + fi +} +TEST_FUNCS+=(use_shared_lib_test) + +freestanding_test() +{ + if [ "$UNAME" = Darwin ]; then + log "Skipping freestanding build tests due to unsupported OS" + return + fi + build_and_run_tests --quick -DLIBDEFLATE_FREESTANDING=1 + if nm $SHLIB | grep -v '\<__stack_chk_fail\>' | grep -q ' U '; then + echo 1>&2 "Freestanding lib links to external functions!:" + nm $SHLIB | grep ' U ' + return 1 + fi + if ldd $SHLIB | grep -q -v '\'; then + echo 1>&2 "Freestanding lib links to external libraries!:" + ldd $SHLIB + return 1 + fi +} +TEST_FUNCS+=(freestanding_test) + +############################################################################### + +declare -A all_tests +for test_func in "${TEST_FUNCS[@]}"; do + all_tests["${test_func%_test}"]=true +done +declare -A tests_to_run + +# Determine the set of tests to run by applying any inclusions and exclusions +# given on the command line. If no inclusions were given, then default to all +# tests (subject to exclusions). +all=true +for arg; do + if [[ $arg != ^* ]]; then + all=false + fi +done +if $all; then + for t in "${!all_tests[@]}"; do + tests_to_run[$t]=true + done +fi +for arg; do + if [[ $arg == ^* ]]; then + unset "tests_to_run[${arg#^}]" + elif [[ -z ${all_tests["$arg"]:-} ]]; then + fail "Unknown test '$arg'. Options are: ${!all_tests[*]}" + else + tests_to_run["$arg"]=true + fi +done + +# Actually run the tests. +log "Running libdeflate tests: ${!tests_to_run[*]}" +for t in "${!tests_to_run[@]}"; do + begin "Running ${t}_test" + eval "${t}_test" + end +done +log "All tests passed!" diff --git a/tools/z64compress/src/enc/libdeflate/scripts/toolchain-i686-w64-mingw32.cmake b/tools/z64compress/src/enc/libdeflate/scripts/toolchain-i686-w64-mingw32.cmake new file mode 100644 index 000000000..0b8063648 --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/scripts/toolchain-i686-w64-mingw32.cmake @@ -0,0 +1,8 @@ +set(CMAKE_SYSTEM_NAME Windows) +set(CMAKE_SYSTEM_PROCESSOR i686) +set(CMAKE_C_COMPILER i686-w64-mingw32-gcc) +set(CMAKE_FIND_ROOT_PATH /usr/i686-w64-mingw32) +set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) +set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) +set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY) +set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) diff --git a/tools/z64compress/src/enc/libdeflate/scripts/toolchain-x86_64-w64-mingw32.cmake b/tools/z64compress/src/enc/libdeflate/scripts/toolchain-x86_64-w64-mingw32.cmake new file mode 100644 index 000000000..f9d6e37f2 --- /dev/null +++ b/tools/z64compress/src/enc/libdeflate/scripts/toolchain-x86_64-w64-mingw32.cmake @@ -0,0 +1,8 @@ +set(CMAKE_SYSTEM_NAME Windows) +set(CMAKE_SYSTEM_PROCESSOR x86_64) +set(CMAKE_C_COMPILER x86_64-w64-mingw32-gcc) +set(CMAKE_FIND_ROOT_PATH /usr/x86_64-w64-mingw32) +set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) +set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) +set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY) +set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) diff --git a/tools/z64compress/src/enc/lzo.c b/tools/z64compress/src/enc/lzo.c new file mode 100644 index 000000000..a6f9b3b26 --- /dev/null +++ b/tools/z64compress/src/enc/lzo.c @@ -0,0 +1,55 @@ +#include +#include +#include +#include "lzo/lzoconf.h" +#include "lzo/lzo1x.h" + +void +lzoCtx_free(void *_ctx) +{ + if (!_ctx) + return; + + free(_ctx); +} + +void * +lzoCtx_new(void) +{ + return malloc(LZO1X_999_MEM_COMPRESS); +} + +int +lzoenc( + void *_src + , unsigned src_sz + , void *_dst + , unsigned *dst_sz + , void *_ctx +) +{ + unsigned char *src = _src; + unsigned char *dst = _dst; + unsigned char *wrkmem = _ctx; + lzo_uint result_sz = 0; + + extern int g_hlen; /* header length */ + memset(dst, 0, g_hlen); + memcpy(dst, "LZO0", 4); + dst[4] = (src_sz >> 24); + dst[5] = (src_sz >> 16); + dst[6] = (src_sz >> 8); + dst[7] = (src_sz >> 0); + + if (!wrkmem) + return 1; + + memset(wrkmem, 0, LZO1X_999_MEM_COMPRESS); + + lzo1x_999_compress(src, src_sz, dst + g_hlen, &result_sz, wrkmem); + + *dst_sz = result_sz + g_hlen; + + return 0; +} + diff --git a/tools/z64compress/src/enc/lzo/config1x.h b/tools/z64compress/src/enc/lzo/config1x.h new file mode 100644 index 000000000..f85cb6548 --- /dev/null +++ b/tools/z64compress/src/enc/lzo/config1x.h @@ -0,0 +1,106 @@ +/* config1x.h -- configuration for the LZO1X algorithm + + This file is part of the LZO real-time data compression library. + + Copyright (C) 1996-2017 Markus Franz Xaver Johannes Oberhumer + All Rights Reserved. + + The LZO library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + The LZO library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with the LZO library; see the file COPYING. + If not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + + Markus F.X.J. Oberhumer + + http://www.oberhumer.com/opensource/lzo/ + */ + + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the library and is subject + to change. + */ + + +#ifndef __LZO_CONFIG1X_H +#define __LZO_CONFIG1X_H 1 + +#if !defined(LZO1X) && !defined(LZO1Y) && !defined(LZO1Z) +# define LZO1X 1 +#endif + +#include "lzo_conf.h" +#if !defined(__LZO_IN_MINILZO) +#include "lzo1x.h" +#endif + + +/*********************************************************************** +// +************************************************************************/ + +#ifndef LZO_EOF_CODE +#define LZO_EOF_CODE 1 +#endif +#undef LZO_DETERMINISTIC + +#define M1_MAX_OFFSET 0x0400 +#ifndef M2_MAX_OFFSET +#define M2_MAX_OFFSET 0x0800 +#endif +#define M3_MAX_OFFSET 0x4000 +#define M4_MAX_OFFSET 0xbfff + +#define MX_MAX_OFFSET (M1_MAX_OFFSET + M2_MAX_OFFSET) + +#define M1_MIN_LEN 2 +#define M1_MAX_LEN 2 +#define M2_MIN_LEN 3 +#ifndef M2_MAX_LEN +#define M2_MAX_LEN 8 +#endif +#define M3_MIN_LEN 3 +#define M3_MAX_LEN 33 +#define M4_MIN_LEN 3 +#define M4_MAX_LEN 9 + +#define M1_MARKER 0 +#define M2_MARKER 64 +#define M3_MARKER 32 +#define M4_MARKER 16 + + +/*********************************************************************** +// +************************************************************************/ + +#ifndef MIN_LOOKAHEAD +#define MIN_LOOKAHEAD (M2_MAX_LEN + 1) +#endif + +#if defined(LZO_NEED_DICT_H) + +#ifndef LZO_HASH +#define LZO_HASH LZO_HASH_LZO_INCREMENTAL_B +#endif +#define DL_MIN_LEN M2_MIN_LEN +#include "lzo_dict.h" + +#endif + + + +#endif /* already included */ + + +/* vim:set ts=4 sw=4 et: */ diff --git a/tools/z64compress/src/enc/lzo/lzo1_d.ch b/tools/z64compress/src/enc/lzo/lzo1_d.ch new file mode 100644 index 000000000..bedc7ce8c --- /dev/null +++ b/tools/z64compress/src/enc/lzo/lzo1_d.ch @@ -0,0 +1,156 @@ +/* lzo1_d.ch -- common decompression stuff + + This file is part of the LZO real-time data compression library. + + Copyright (C) 1996-2017 Markus Franz Xaver Johannes Oberhumer + All Rights Reserved. + + The LZO library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + The LZO library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with the LZO library; see the file COPYING. + If not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + + Markus F.X.J. Oberhumer + + http://www.oberhumer.com/opensource/lzo/ + */ + + + +#if defined(LZO_TEST_OVERRUN) +# if !defined(LZO_TEST_OVERRUN_INPUT) +# define LZO_TEST_OVERRUN_INPUT 2 +# endif +# if !defined(LZO_TEST_OVERRUN_OUTPUT) +# define LZO_TEST_OVERRUN_OUTPUT 2 +# endif +# if !defined(LZO_TEST_OVERRUN_LOOKBEHIND) +# define LZO_TEST_OVERRUN_LOOKBEHIND 1 +# endif +#endif + + +/*********************************************************************** +// Overrun detection is internally handled by these macros: +// +// TEST_IP test input overrun at loop begin +// NEED_IP test input overrun at every input byte +// +// TEST_OP test output overrun at loop begin +// NEED_OP test output overrun at every output byte +// +// TEST_LB test match position +// +// The fastest decompressor results when testing for no overruns +// and using LZO_EOF_CODE. +************************************************************************/ + +#undef TEST_IP +#undef TEST_OP +#undef TEST_IP_AND_TEST_OP +#undef TEST_LB +#undef TEST_LBO +#undef NEED_IP +#undef NEED_OP +#undef TEST_IV +#undef TEST_OV +#undef HAVE_TEST_IP +#undef HAVE_TEST_OP +#undef HAVE_NEED_IP +#undef HAVE_NEED_OP +#undef HAVE_ANY_IP +#undef HAVE_ANY_OP + + +#if defined(LZO_TEST_OVERRUN_INPUT) +# if (LZO_TEST_OVERRUN_INPUT >= 1) +# define TEST_IP (ip < ip_end) +# endif +# if (LZO_TEST_OVERRUN_INPUT >= 2) +# define NEED_IP(x) \ + if ((lzo_uint)(ip_end - ip) < (lzo_uint)(x)) goto input_overrun +# define TEST_IV(x) if ((x) > (lzo_uint)0 - (511)) goto input_overrun +# endif +#endif + +#if defined(LZO_TEST_OVERRUN_OUTPUT) +# if (LZO_TEST_OVERRUN_OUTPUT >= 1) +# define TEST_OP (op <= op_end) +# endif +# if (LZO_TEST_OVERRUN_OUTPUT >= 2) +# undef TEST_OP /* don't need both of the tests here */ +# define NEED_OP(x) \ + if ((lzo_uint)(op_end - op) < (lzo_uint)(x)) goto output_overrun +# define TEST_OV(x) if ((x) > (lzo_uint)0 - (511)) goto output_overrun +# endif +#endif + +#if defined(LZO_TEST_OVERRUN_LOOKBEHIND) +# define TEST_LB(m_pos) if (PTR_LT(m_pos,out) || PTR_GE(m_pos,op)) goto lookbehind_overrun +# define TEST_LBO(m_pos,o) if (PTR_LT(m_pos,out) || PTR_GE(m_pos,op-(o))) goto lookbehind_overrun +#else +# define TEST_LB(m_pos) ((void) 0) +# define TEST_LBO(m_pos,o) ((void) 0) +#endif + + +#if !defined(LZO_EOF_CODE) && !defined(TEST_IP) + /* if we have no EOF code, we have to test for the end of the input */ +# define TEST_IP (ip < ip_end) +#endif + + +#if defined(TEST_IP) +# define HAVE_TEST_IP 1 +#else +# define TEST_IP 1 +#endif +#if defined(TEST_OP) +# define HAVE_TEST_OP 1 +#else +# define TEST_OP 1 +#endif + +#if defined(HAVE_TEST_IP) && defined(HAVE_TEST_OP) +# define TEST_IP_AND_TEST_OP (TEST_IP && TEST_OP) +#elif defined(HAVE_TEST_IP) +# define TEST_IP_AND_TEST_OP TEST_IP +#elif defined(HAVE_TEST_OP) +# define TEST_IP_AND_TEST_OP TEST_OP +#else +# define TEST_IP_AND_TEST_OP 1 +#endif + +#if defined(NEED_IP) +# define HAVE_NEED_IP 1 +#else +# define NEED_IP(x) ((void) 0) +# define TEST_IV(x) ((void) 0) +#endif +#if defined(NEED_OP) +# define HAVE_NEED_OP 1 +#else +# define NEED_OP(x) ((void) 0) +# define TEST_OV(x) ((void) 0) +#endif + + +#if defined(HAVE_TEST_IP) || defined(HAVE_NEED_IP) +# define HAVE_ANY_IP 1 +#endif +#if defined(HAVE_TEST_OP) || defined(HAVE_NEED_OP) +# define HAVE_ANY_OP 1 +#endif + + +/* vim:set ts=4 sw=4 et: */ diff --git a/tools/z64compress/src/enc/lzo/lzo1x.h b/tools/z64compress/src/enc/lzo/lzo1x.h new file mode 100644 index 000000000..a11151407 --- /dev/null +++ b/tools/z64compress/src/enc/lzo/lzo1x.h @@ -0,0 +1,165 @@ +/* lzo1x.h -- public interface of the LZO1X compression algorithm + + This file is part of the LZO real-time data compression library. + + Copyright (C) 1996-2017 Markus Franz Xaver Johannes Oberhumer + All Rights Reserved. + + The LZO library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + The LZO library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with the LZO library; see the file COPYING. + If not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + + Markus F.X.J. Oberhumer + + http://www.oberhumer.com/opensource/lzo/ + */ + + +#ifndef __LZO1X_H_INCLUDED +#define __LZO1X_H_INCLUDED 1 + +#ifndef __LZOCONF_H_INCLUDED +#include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + + +/*********************************************************************** +// +************************************************************************/ + +/* Memory required for the wrkmem parameter. + * When the required size is 0, you can also pass a NULL pointer. + */ + +#define LZO1X_MEM_COMPRESS LZO1X_1_MEM_COMPRESS +#define LZO1X_MEM_DECOMPRESS (0) +#define LZO1X_MEM_OPTIMIZE (0) + + +/* decompression */ +LZO_EXTERN(int) +lzo1x_decompress ( const lzo_bytep src, lzo_uint src_len, + lzo_bytep dst, lzo_uintp dst_len, + lzo_voidp wrkmem /* NOT USED */ ); + +/* safe decompression with overrun testing */ +LZO_EXTERN(int) +lzo1x_decompress_safe ( const lzo_bytep src, lzo_uint src_len, + lzo_bytep dst, lzo_uintp dst_len, + lzo_voidp wrkmem /* NOT USED */ ); + + +/*********************************************************************** +// +************************************************************************/ + +#define LZO1X_1_MEM_COMPRESS ((lzo_uint32_t) (16384L * lzo_sizeof_dict_t)) + +LZO_EXTERN(int) +lzo1x_1_compress ( const lzo_bytep src, lzo_uint src_len, + lzo_bytep dst, lzo_uintp dst_len, + lzo_voidp wrkmem ); + + +/*********************************************************************** +// special compressor versions +************************************************************************/ + +/* this version needs only 8 KiB work memory */ +#define LZO1X_1_11_MEM_COMPRESS ((lzo_uint32_t) (2048L * lzo_sizeof_dict_t)) + +LZO_EXTERN(int) +lzo1x_1_11_compress ( const lzo_bytep src, lzo_uint src_len, + lzo_bytep dst, lzo_uintp dst_len, + lzo_voidp wrkmem ); + + +/* this version needs 16 KiB work memory */ +#define LZO1X_1_12_MEM_COMPRESS ((lzo_uint32_t) (4096L * lzo_sizeof_dict_t)) + +LZO_EXTERN(int) +lzo1x_1_12_compress ( const lzo_bytep src, lzo_uint src_len, + lzo_bytep dst, lzo_uintp dst_len, + lzo_voidp wrkmem ); + + +/* use this version if you need a little more compression speed */ +#define LZO1X_1_15_MEM_COMPRESS ((lzo_uint32_t) (32768L * lzo_sizeof_dict_t)) + +LZO_EXTERN(int) +lzo1x_1_15_compress ( const lzo_bytep src, lzo_uint src_len, + lzo_bytep dst, lzo_uintp dst_len, + lzo_voidp wrkmem ); + + +/*********************************************************************** +// better compression ratio at the cost of more memory and time +************************************************************************/ + +#define LZO1X_999_MEM_COMPRESS ((lzo_uint32_t) (14 * 16384L * sizeof(short))) + +LZO_EXTERN(int) +lzo1x_999_compress ( const lzo_bytep src, lzo_uint src_len, + lzo_bytep dst, lzo_uintp dst_len, + lzo_voidp wrkmem ); + + +/*********************************************************************** +// +************************************************************************/ + +LZO_EXTERN(int) +lzo1x_999_compress_dict ( const lzo_bytep src, lzo_uint src_len, + lzo_bytep dst, lzo_uintp dst_len, + lzo_voidp wrkmem, + const lzo_bytep dict, lzo_uint dict_len ); + +LZO_EXTERN(int) +lzo1x_999_compress_level ( const lzo_bytep src, lzo_uint src_len, + lzo_bytep dst, lzo_uintp dst_len, + lzo_voidp wrkmem, + const lzo_bytep dict, lzo_uint dict_len, + lzo_callback_p cb, + int compression_level ); + +LZO_EXTERN(int) +lzo1x_decompress_dict_safe ( const lzo_bytep src, lzo_uint src_len, + lzo_bytep dst, lzo_uintp dst_len, + lzo_voidp wrkmem /* NOT USED */, + const lzo_bytep dict, lzo_uint dict_len ); + + +/*********************************************************************** +// optimize a compressed data block +************************************************************************/ + +LZO_EXTERN(int) +lzo1x_optimize ( lzo_bytep src, lzo_uint src_len, + lzo_bytep dst, lzo_uintp dst_len, + lzo_voidp wrkmem /* NOT USED */ ); + + + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* already included */ + + +/* vim:set ts=4 sw=4 et: */ diff --git a/tools/z64compress/src/enc/lzo/lzo1x_1.c b/tools/z64compress/src/enc/lzo/lzo1x_1.c new file mode 100644 index 000000000..a659393f2 --- /dev/null +++ b/tools/z64compress/src/enc/lzo/lzo1x_1.c @@ -0,0 +1,57 @@ +/* lzo1x_1.c -- LZO1X-1 compression + + This file is part of the LZO real-time data compression library. + + Copyright (C) 1996-2017 Markus Franz Xaver Johannes Oberhumer + All Rights Reserved. + + The LZO library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + The LZO library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with the LZO library; see the file COPYING. + If not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + + Markus F.X.J. Oberhumer + + http://www.oberhumer.com/opensource/lzo/ + */ + + +#include "lzo_conf.h" +#if 1 && defined(UA_GET_LE32) +#undef LZO_DICT_USE_PTR +#define LZO_DICT_USE_PTR 0 +#undef lzo_dict_t +#define lzo_dict_t lzo_uint16_t +#endif + +#define LZO_NEED_DICT_H 1 +#ifndef D_BITS +#define D_BITS 14 +#endif +#define D_INDEX1(d,p) d = DM(DMUL(0x21,DX3(p,5,5,6)) >> 5) +#define D_INDEX2(d,p) d = (d & (D_MASK & 0x7ff)) ^ (D_HIGH | 0x1f) +#if 1 +#define DINDEX(dv,p) DM(((DMUL(0x1824429d,dv)) >> (32-D_BITS))) +#else +#define DINDEX(dv,p) DM((dv) + ((dv) >> (32-D_BITS))) +#endif +#include "config1x.h" +#define LZO_DETERMINISTIC !(LZO_DICT_USE_PTR) + +#ifndef DO_COMPRESS +#define DO_COMPRESS lzo1x_1_compress +#endif + +#include "lzo1x_c.ch" + +/* vim:set ts=4 sw=4 et: */ diff --git a/tools/z64compress/src/enc/lzo/lzo1x_9x.c b/tools/z64compress/src/enc/lzo/lzo1x_9x.c new file mode 100644 index 000000000..39a211e54 --- /dev/null +++ b/tools/z64compress/src/enc/lzo/lzo1x_9x.c @@ -0,0 +1,867 @@ +/* lzo1x_9x.c -- implementation of the LZO1X-999 compression algorithm + + This file is part of the LZO real-time data compression library. + + Copyright (C) 1996-2017 Markus Franz Xaver Johannes Oberhumer + All Rights Reserved. + + The LZO library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + The LZO library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with the LZO library; see the file COPYING. + If not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + + Markus F.X.J. Oberhumer + + http://www.oberhumer.com/opensource/lzo/ + */ + + +#if !defined(LZO1X) && !defined(LZO1Y) && !defined(LZO1Z) +# define LZO1X 1 +#endif + +#if defined(LZO1X) +# include "config1x.h" +#elif defined(LZO1Y) +# include "config1y.h" +#elif defined(LZO1Z) +# include "config1z.h" +#else +# error +#endif + + +/*********************************************************************** +// +************************************************************************/ + +#define SWD_N M4_MAX_OFFSET /* size of ring buffer */ +#define SWD_THRESHOLD 1 /* lower limit for match length */ +#define SWD_F 2048 /* upper limit for match length */ + +#define SWD_BEST_OFF (LZO_MAX3( M2_MAX_LEN, M3_MAX_LEN, M4_MAX_LEN ) + 1) + +#if defined(LZO1X) +# define LZO_COMPRESS_T lzo1x_999_t +# define lzo_swd_t lzo1x_999_swd_t +#elif defined(LZO1Y) +# define LZO_COMPRESS_T lzo1y_999_t +# define lzo_swd_t lzo1y_999_swd_t +# define lzo1x_999_compress_internal lzo1y_999_compress_internal +# define lzo1x_999_compress_dict lzo1y_999_compress_dict +# define lzo1x_999_compress_level lzo1y_999_compress_level +# define lzo1x_999_compress lzo1y_999_compress +#elif defined(LZO1Z) +# define LZO_COMPRESS_T lzo1z_999_t +# define lzo_swd_t lzo1z_999_swd_t +# define lzo1x_999_compress_internal lzo1z_999_compress_internal +# define lzo1x_999_compress_dict lzo1z_999_compress_dict +# define lzo1x_999_compress_level lzo1z_999_compress_level +# define lzo1x_999_compress lzo1z_999_compress +#else +# error +#endif + +#if 0 +# define HEAD3(b,p) \ + ((((((lzo_xint)b[p]<<3)^b[p+1])<<3)^b[p+2]) & (SWD_HSIZE-1)) +#endif +#if 0 && (LZO_OPT_UNALIGNED32) && (LZO_ABI_LITTLE_ENDIAN) +# define HEAD3(b,p) \ + (((* (lzo_uint32_tp) &b[p]) ^ ((* (lzo_uint32_tp) &b[p])>>10)) & (SWD_HSIZE-1)) +#endif + +#include "lzo_mchw.ch" + + +/* this is a public functions, but there is no prototype in a header file */ +LZO_EXTERN(int) +lzo1x_999_compress_internal ( const lzo_bytep in , lzo_uint in_len, + lzo_bytep out, lzo_uintp out_len, + lzo_voidp wrkmem, + const lzo_bytep dict, lzo_uint dict_len, + lzo_callback_p cb, + int try_lazy_parm, + lzo_uint good_length, + lzo_uint max_lazy, + lzo_uint nice_length, + lzo_uint max_chain, + lzo_uint32_t flags ); + + +/*********************************************************************** +// +************************************************************************/ + +static lzo_bytep +code_match ( LZO_COMPRESS_T *c, lzo_bytep op, lzo_uint m_len, lzo_uint m_off ) +{ + lzo_uint x_len = m_len; + lzo_uint x_off = m_off; + + c->match_bytes += m_len; + +#if 0 +/* + static lzo_uint last_m_len = 0, last_m_off = 0; + static lzo_uint prev_m_off[4]; + static unsigned prev_m_off_ptr = 0; + unsigned i; + + //if (m_len >= 3 && m_len <= M2_MAX_LEN && m_off <= M2_MAX_OFFSET) + if (m_len >= 3 && m_len <= M2_MAX_LEN) + { + //if (m_len == last_m_len && m_off == last_m_off) + //printf("last_m_len + last_m_off\n"); + //else + if (m_off == last_m_off) + printf("last_m_off\n"); + else + { + for (i = 0; i < 4; i++) + if (m_off == prev_m_off[i]) + printf("prev_m_off %u: %5ld\n",i,(long)m_off); + } + } + last_m_len = m_len; + last_m_off = prev_m_off[prev_m_off_ptr] = m_off; + prev_m_off_ptr = (prev_m_off_ptr + 1) & 3; +*/ +#endif + + assert(op > c->out); + if (m_len == 2) + { + assert(m_off <= M1_MAX_OFFSET); + assert(c->r1_lit > 0); assert(c->r1_lit < 4); + m_off -= 1; +#if defined(LZO1Z) + *op++ = LZO_BYTE(M1_MARKER | (m_off >> 6)); + *op++ = LZO_BYTE(m_off << 2); +#else + *op++ = LZO_BYTE(M1_MARKER | ((m_off & 3) << 2)); + *op++ = LZO_BYTE(m_off >> 2); +#endif + c->m1a_m++; + } +#if defined(LZO1Z) + else if (m_len <= M2_MAX_LEN && (m_off <= M2_MAX_OFFSET || m_off == c->last_m_off)) +#else + else if (m_len <= M2_MAX_LEN && m_off <= M2_MAX_OFFSET) +#endif + { + assert(m_len >= 3); +#if defined(LZO1X) + m_off -= 1; + *op++ = LZO_BYTE(((m_len - 1) << 5) | ((m_off & 7) << 2)); + *op++ = LZO_BYTE(m_off >> 3); + assert(op[-2] >= M2_MARKER); +#elif defined(LZO1Y) + m_off -= 1; + *op++ = LZO_BYTE(((m_len + 1) << 4) | ((m_off & 3) << 2)); + *op++ = LZO_BYTE(m_off >> 2); + assert(op[-2] >= M2_MARKER); +#elif defined(LZO1Z) + if (m_off == c->last_m_off) + *op++ = LZO_BYTE(((m_len - 1) << 5) | (0x700 >> 6)); + else + { + m_off -= 1; + *op++ = LZO_BYTE(((m_len - 1) << 5) | (m_off >> 6)); + *op++ = LZO_BYTE(m_off << 2); + } +#endif + c->m2_m++; + } + else if (m_len == M2_MIN_LEN && m_off <= MX_MAX_OFFSET && c->r1_lit >= 4) + { + assert(m_len == 3); + assert(m_off > M2_MAX_OFFSET); + m_off -= 1 + M2_MAX_OFFSET; +#if defined(LZO1Z) + *op++ = LZO_BYTE(M1_MARKER | (m_off >> 6)); + *op++ = LZO_BYTE(m_off << 2); +#else + *op++ = LZO_BYTE(M1_MARKER | ((m_off & 3) << 2)); + *op++ = LZO_BYTE(m_off >> 2); +#endif + c->m1b_m++; + } + else if (m_off <= M3_MAX_OFFSET) + { + assert(m_len >= 3); + m_off -= 1; + if (m_len <= M3_MAX_LEN) + *op++ = LZO_BYTE(M3_MARKER | (m_len - 2)); + else + { + m_len -= M3_MAX_LEN; + *op++ = M3_MARKER | 0; + while (m_len > 255) + { + m_len -= 255; + *op++ = 0; + } + assert(m_len > 0); + *op++ = LZO_BYTE(m_len); + } +#if defined(LZO1Z) + *op++ = LZO_BYTE(m_off >> 6); + *op++ = LZO_BYTE(m_off << 2); +#else + *op++ = LZO_BYTE(m_off << 2); + *op++ = LZO_BYTE(m_off >> 6); +#endif + c->m3_m++; + } + else + { + lzo_uint k; + + assert(m_len >= 3); + assert(m_off > 0x4000); assert(m_off <= 0xbfff); + m_off -= 0x4000; + k = (m_off & 0x4000) >> 11; + if (m_len <= M4_MAX_LEN) + *op++ = LZO_BYTE(M4_MARKER | k | (m_len - 2)); + else + { + m_len -= M4_MAX_LEN; + *op++ = LZO_BYTE(M4_MARKER | k | 0); + while (m_len > 255) + { + m_len -= 255; + *op++ = 0; + } + assert(m_len > 0); + *op++ = LZO_BYTE(m_len); + } +#if defined(LZO1Z) + *op++ = LZO_BYTE(m_off >> 6); + *op++ = LZO_BYTE(m_off << 2); +#else + *op++ = LZO_BYTE(m_off << 2); + *op++ = LZO_BYTE(m_off >> 6); +#endif + c->m4_m++; + } + + c->last_m_len = x_len; + c->last_m_off = x_off; + return op; +} + + +static lzo_bytep +STORE_RUN ( LZO_COMPRESS_T *c, lzo_bytep op, const lzo_bytep ii, lzo_uint t ) +{ + c->lit_bytes += t; + + if (op == c->out && t <= 238) + { + *op++ = LZO_BYTE(17 + t); + } + else if (t <= 3) + { +#if defined(LZO1Z) + op[-1] = LZO_BYTE(op[-1] | t); +#else + op[-2] = LZO_BYTE(op[-2] | t); +#endif + c->lit1_r++; + } + else if (t <= 18) + { + *op++ = LZO_BYTE(t - 3); + c->lit2_r++; + } + else + { + lzo_uint tt = t - 18; + + *op++ = 0; + while (tt > 255) + { + tt -= 255; + *op++ = 0; + } + assert(tt > 0); + *op++ = LZO_BYTE(tt); + c->lit3_r++; + } + do *op++ = *ii++; while (--t > 0); + + return op; +} + + +static lzo_bytep +code_run ( LZO_COMPRESS_T *c, lzo_bytep op, const lzo_bytep ii, + lzo_uint lit, lzo_uint m_len ) +{ + if (lit > 0) + { + assert(m_len >= 2); + op = STORE_RUN(c,op,ii,lit); + c->r1_m_len = m_len; + c->r1_lit = lit; + } + else + { + assert(m_len >= 3); + c->r1_m_len = 0; + c->r1_lit = 0; + } + + return op; +} + + +/*********************************************************************** +// +************************************************************************/ + +static lzo_uint +len_of_coded_match ( lzo_uint m_len, lzo_uint m_off, lzo_uint lit ) +{ + lzo_uint n = 4; + + if (m_len < 2) + return 0; + if (m_len == 2) + return (m_off <= M1_MAX_OFFSET && lit > 0 && lit < 4) ? 2 : 0; + if (m_len <= M2_MAX_LEN && m_off <= M2_MAX_OFFSET) + return 2; + if (m_len == M2_MIN_LEN && m_off <= MX_MAX_OFFSET && lit >= 4) + return 2; + if (m_off <= M3_MAX_OFFSET) + { + if (m_len <= M3_MAX_LEN) + return 3; + m_len -= M3_MAX_LEN; + while (m_len > 255) + { + m_len -= 255; + n++; + } + return n; + } + if (m_off <= M4_MAX_OFFSET) + { + if (m_len <= M4_MAX_LEN) + return 3; + m_len -= M4_MAX_LEN; + while (m_len > 255) + { + m_len -= 255; + n++; + } + return n; + } + return 0; +} + + +static lzo_uint +min_gain(lzo_uint ahead, lzo_uint lit1, lzo_uint lit2, lzo_uint l1, lzo_uint l2, lzo_uint l3) +{ + lzo_uint lazy_match_min_gain; + + assert (ahead >= 1); + lazy_match_min_gain = ahead; + +#if 0 + if (l3) + lit2 -= ahead; +#endif + + if (lit1 <= 3) + lazy_match_min_gain += (lit2 <= 3) ? 0 : 2; + else if (lit1 <= 18) + lazy_match_min_gain += (lit2 <= 18) ? 0 : 1; + + lazy_match_min_gain += (l2 - l1) * 2; + if (l3) + lazy_match_min_gain -= (ahead - l3) * 2; + + if ((lzo_int) lazy_match_min_gain < 0) + lazy_match_min_gain = 0; + +#if 0 + if (l1 == 2) + if (lazy_match_min_gain == 0) + lazy_match_min_gain = 1; +#endif + + return lazy_match_min_gain; +} + + +/*********************************************************************** +// +************************************************************************/ + +#if !defined(NDEBUG) +static +void assert_match( const lzo_swd_p swd, lzo_uint m_len, lzo_uint m_off ) +{ + const LZO_COMPRESS_T *c = swd->c; + lzo_uint d_off; + + assert(m_len >= 2); + if (m_off <= (lzo_uint) (c->bp - c->in)) + { + assert(c->bp - m_off + m_len < c->ip); + assert(lzo_memcmp(c->bp, c->bp - m_off, m_len) == 0); + } + else + { + assert(swd->dict != NULL); + d_off = m_off - (lzo_uint) (c->bp - c->in); + assert(d_off <= swd->dict_len); + if (m_len > d_off) + { + assert(lzo_memcmp(c->bp, swd->dict_end - d_off, d_off) == 0); + assert(c->in + m_len - d_off < c->ip); + assert(lzo_memcmp(c->bp + d_off, c->in, m_len - d_off) == 0); + } + else + { + assert(lzo_memcmp(c->bp, swd->dict_end - d_off, m_len) == 0); + } + } +} +#else +# define assert_match(a,b,c) ((void)0) +#endif + + +#if defined(SWD_BEST_OFF) + +static void +better_match ( const lzo_swd_p swd, lzo_uint *m_len, lzo_uint *m_off ) +{ +#if defined(LZO1Z) + const LZO_COMPRESS_T *c = swd->c; +#endif + + if (*m_len <= M2_MIN_LEN) + return; +#if defined(LZO1Z) + if (*m_off == c->last_m_off && *m_len <= M2_MAX_LEN) + return; +#if 1 + if (*m_len >= M2_MIN_LEN + 1 && *m_len <= M2_MAX_LEN + 1 && + c->last_m_off && swd->best_off[*m_len-1] == c->last_m_off) + { + *m_len = *m_len - 1; + *m_off = swd->best_off[*m_len]; + return; + } +#endif +#endif + + if (*m_off <= M2_MAX_OFFSET) + return; + +#if 1 + /* M3/M4 -> M2 */ + if (*m_off > M2_MAX_OFFSET && + *m_len >= M2_MIN_LEN + 1 && *m_len <= M2_MAX_LEN + 1 && + swd->best_off[*m_len-1] && swd->best_off[*m_len-1] <= M2_MAX_OFFSET) + { + *m_len = *m_len - 1; + *m_off = swd->best_off[*m_len]; + return; + } +#endif + +#if 1 + /* M4 -> M2 */ + if (*m_off > M3_MAX_OFFSET && + *m_len >= M4_MAX_LEN + 1 && *m_len <= M2_MAX_LEN + 2 && + swd->best_off[*m_len-2] && swd->best_off[*m_len-2] <= M2_MAX_OFFSET) + { + *m_len = *m_len - 2; + *m_off = swd->best_off[*m_len]; + return; + } +#endif + +#if 1 + /* M4 -> M3 */ + if (*m_off > M3_MAX_OFFSET && + *m_len >= M4_MAX_LEN + 1 && *m_len <= M3_MAX_LEN + 1 && + swd->best_off[*m_len-1] && swd->best_off[*m_len-1] <= M3_MAX_OFFSET) + { + *m_len = *m_len - 1; + *m_off = swd->best_off[*m_len]; + } +#endif +} + +#endif + + +/*********************************************************************** +// +************************************************************************/ + +LZO_PUBLIC(int) +lzo1x_999_compress_internal ( const lzo_bytep in , lzo_uint in_len, + lzo_bytep out, lzo_uintp out_len, + lzo_voidp wrkmem, + const lzo_bytep dict, lzo_uint dict_len, + lzo_callback_p cb, + int try_lazy_parm, + lzo_uint good_length, + lzo_uint max_lazy, + lzo_uint nice_length, + lzo_uint max_chain, + lzo_uint32_t flags ) +{ + lzo_bytep op; + const lzo_bytep ii; + lzo_uint lit; + lzo_uint m_len, m_off; + LZO_COMPRESS_T cc; + LZO_COMPRESS_T * const c = &cc; + lzo_swd_p const swd = (lzo_swd_p) wrkmem; + lzo_uint try_lazy; + int r; + + /* sanity check */ +#if defined(LZO1X) + LZO_COMPILE_TIME_ASSERT(LZO1X_999_MEM_COMPRESS >= SIZEOF_LZO_SWD_T) +#elif defined(LZO1Y) + LZO_COMPILE_TIME_ASSERT(LZO1Y_999_MEM_COMPRESS >= SIZEOF_LZO_SWD_T) +#elif defined(LZO1Z) + LZO_COMPILE_TIME_ASSERT(LZO1Z_999_MEM_COMPRESS >= SIZEOF_LZO_SWD_T) +#else +# error +#endif + +/* setup parameter defaults */ + /* number of lazy match tries */ + try_lazy = (lzo_uint) try_lazy_parm; + if (try_lazy_parm < 0) + try_lazy = 1; + /* reduce lazy match search if we already have a match with this length */ + if (good_length == 0) + good_length = 32; + /* do not try a lazy match if we already have a match with this length */ + if (max_lazy == 0) + max_lazy = 32; + /* stop searching for longer matches than this one */ + if (nice_length == 0) + nice_length = 0; + /* don't search more positions than this */ + if (max_chain == 0) + max_chain = SWD_MAX_CHAIN; + + c->init = 0; + c->ip = c->in = in; + c->in_end = in + in_len; + c->out = out; + c->cb = cb; + c->m1a_m = c->m1b_m = c->m2_m = c->m3_m = c->m4_m = 0; + c->lit1_r = c->lit2_r = c->lit3_r = 0; + + op = out; + ii = c->ip; /* point to start of literal run */ + lit = 0; + c->r1_lit = c->r1_m_len = 0; + + r = init_match(c,swd,dict,dict_len,flags); + if (r != 0) + return r; + if (max_chain > 0) + swd->max_chain = max_chain; + if (nice_length > 0) + swd->nice_length = nice_length; + + r = find_match(c,swd,0,0); + if (r != 0) + return r; + while (c->look > 0) + { + lzo_uint ahead; + lzo_uint max_ahead; + lzo_uint l1, l2, l3; + + c->codesize = pd(op, out); + + m_len = c->m_len; + m_off = c->m_off; + + assert(c->bp == c->ip - c->look); + assert(c->bp >= in); + if (lit == 0) + ii = c->bp; + assert(ii + lit == c->bp); + assert(swd->b_char == *(c->bp)); + + if ( m_len < 2 || + (m_len == 2 && (m_off > M1_MAX_OFFSET || lit == 0 || lit >= 4)) || +#if 1 + /* Do not accept this match for compressed-data compatibility + * with LZO v1.01 and before + * [ might be a problem for decompress() and optimize() ] + */ + (m_len == 2 && op == out) || +#endif + (op == out && lit == 0)) + { + /* a literal */ + m_len = 0; + } + else if (m_len == M2_MIN_LEN) + { + /* compression ratio improves if we code a literal in some cases */ + if (m_off > MX_MAX_OFFSET && lit >= 4) + m_len = 0; + } + + if (m_len == 0) + { + /* a literal */ + lit++; + swd->max_chain = max_chain; + r = find_match(c,swd,1,0); + assert(r == 0); LZO_UNUSED(r); + continue; + } + + /* a match */ +#if defined(SWD_BEST_OFF) + if (swd->use_best_off) + better_match(swd,&m_len,&m_off); +#endif + assert_match(swd,m_len,m_off); + + + /* shall we try a lazy match ? */ + ahead = 0; + if (try_lazy == 0 || m_len >= max_lazy) + { + /* no */ + l1 = 0; + max_ahead = 0; + } + else + { + /* yes, try a lazy match */ + l1 = len_of_coded_match(m_len,m_off,lit); + assert(l1 > 0); +#if 1 + max_ahead = LZO_MIN(try_lazy, l1 - 1); +#else + max_ahead = LZO_MIN3(try_lazy, l1, m_len - 1); +#endif + } + + + while (ahead < max_ahead && c->look > m_len) + { + lzo_uint lazy_match_min_gain; + + if (m_len >= good_length) + swd->max_chain = max_chain >> 2; + else + swd->max_chain = max_chain; + r = find_match(c,swd,1,0); + ahead++; + + assert(r == 0); LZO_UNUSED(r); + assert(c->look > 0); + assert(ii + lit + ahead == c->bp); + +#if defined(LZO1Z) + if (m_off == c->last_m_off && c->m_off != c->last_m_off) + if (m_len >= M2_MIN_LEN && m_len <= M2_MAX_LEN) + c->m_len = 0; +#endif + if (c->m_len < m_len) + continue; +#if 1 + if (c->m_len == m_len && c->m_off >= m_off) + continue; +#endif +#if defined(SWD_BEST_OFF) + if (swd->use_best_off) + better_match(swd,&c->m_len,&c->m_off); +#endif + l2 = len_of_coded_match(c->m_len,c->m_off,lit+ahead); + if (l2 == 0) + continue; +#if 0 + if (c->m_len == m_len && l2 >= l1) + continue; +#endif + + +#if 1 + /* compressed-data compatibility [see above] */ + l3 = (op == out) ? 0 : len_of_coded_match(ahead,m_off,lit); +#else + l3 = len_of_coded_match(ahead,m_off,lit); +#endif + + lazy_match_min_gain = min_gain(ahead,lit,lit+ahead,l1,l2,l3); + if (c->m_len >= m_len + lazy_match_min_gain) + { + c->lazy++; + assert_match(swd,c->m_len,c->m_off); + + if (l3) + { + /* code previous run */ + op = code_run(c,op,ii,lit,ahead); + lit = 0; + /* code shortened match */ + op = code_match(c,op,ahead,m_off); + } + else + { + lit += ahead; + assert(ii + lit == c->bp); + } + goto lazy_match_done; + } + } + + + assert(ii + lit + ahead == c->bp); + + /* 1 - code run */ + op = code_run(c,op,ii,lit,m_len); + lit = 0; + + /* 2 - code match */ + op = code_match(c,op,m_len,m_off); + swd->max_chain = max_chain; + r = find_match(c,swd,m_len,1+ahead); + assert(r == 0); LZO_UNUSED(r); + +lazy_match_done: ; + } + + + /* store final run */ + if (lit > 0) + op = STORE_RUN(c,op,ii,lit); + +#if defined(LZO_EOF_CODE) + *op++ = M4_MARKER | 1; + *op++ = 0; + *op++ = 0; +#endif + + c->codesize = pd(op, out); + assert(c->textsize == in_len); + + *out_len = pd(op, out); + + if (c->cb && c->cb->nprogress) + (*c->cb->nprogress)(c->cb, c->textsize, c->codesize, 0); + +#if 0 + printf("%ld %ld -> %ld %ld: %ld %ld %ld %ld %ld %ld: %ld %ld %ld %ld\n", + (long) c->textsize, (long) in_len, (long) c->codesize, + c->match_bytes, c->m1a_m, c->m1b_m, c->m2_m, c->m3_m, c->m4_m, + c->lit_bytes, c->lit1_r, c->lit2_r, c->lit3_r, c->lazy); +#endif + assert(c->lit_bytes + c->match_bytes == in_len); + + return LZO_E_OK; +} + + +/*********************************************************************** +// +************************************************************************/ + +LZO_PUBLIC(int) +lzo1x_999_compress_level ( const lzo_bytep in , lzo_uint in_len, + lzo_bytep out, lzo_uintp out_len, + lzo_voidp wrkmem, + const lzo_bytep dict, lzo_uint dict_len, + lzo_callback_p cb, + int compression_level ) +{ + static const struct + { + int try_lazy_parm; + lzo_uint good_length; + lzo_uint max_lazy; + lzo_uint nice_length; + lzo_uint max_chain; + lzo_uint32_t flags; + } c[9] = { + /* faster compression */ + { 0, 0, 0, 8, 4, 0 }, + { 0, 0, 0, 16, 8, 0 }, + { 0, 0, 0, 32, 16, 0 }, + { 1, 4, 4, 16, 16, 0 }, + { 1, 8, 16, 32, 32, 0 }, + { 1, 8, 16, 128, 128, 0 }, + { 2, 8, 32, 128, 256, 0 }, + { 2, 32, 128, SWD_F, 2048, 1 }, + { 2, SWD_F, SWD_F, SWD_F, 4096, 1 } + /* max. compression */ + }; + + if (compression_level < 1 || compression_level > 9) + return LZO_E_ERROR; + + compression_level -= 1; + return lzo1x_999_compress_internal(in, in_len, out, out_len, wrkmem, + dict, dict_len, cb, + c[compression_level].try_lazy_parm, + c[compression_level].good_length, + c[compression_level].max_lazy, +#if 0 + c[compression_level].nice_length, +#else + 0, +#endif + c[compression_level].max_chain, + c[compression_level].flags); +} + + +/*********************************************************************** +// +************************************************************************/ + +LZO_PUBLIC(int) +lzo1x_999_compress_dict ( const lzo_bytep in , lzo_uint in_len, + lzo_bytep out, lzo_uintp out_len, + lzo_voidp wrkmem, + const lzo_bytep dict, lzo_uint dict_len ) +{ + return lzo1x_999_compress_level(in, in_len, out, out_len, wrkmem, + dict, dict_len, 0, 8); +} + +LZO_PUBLIC(int) +lzo1x_999_compress ( const lzo_bytep in , lzo_uint in_len, + lzo_bytep out, lzo_uintp out_len, + lzo_voidp wrkmem ) +{ + return lzo1x_999_compress_level(in, in_len, out, out_len, wrkmem, + NULL, 0, (lzo_callback_p) 0, 9/*ootntsc10: 9 saves 2580 bytes over 8*/); +} + + +/* vim:set ts=4 sw=4 et: */ diff --git a/tools/z64compress/src/enc/lzo/lzo1x_c.ch b/tools/z64compress/src/enc/lzo/lzo1x_c.ch new file mode 100644 index 000000000..be19b2b74 --- /dev/null +++ b/tools/z64compress/src/enc/lzo/lzo1x_c.ch @@ -0,0 +1,403 @@ +/* lzo1x_c.ch -- implementation of the LZO1[XY]-1 compression algorithm + + This file is part of the LZO real-time data compression library. + + Copyright (C) 1996-2017 Markus Franz Xaver Johannes Oberhumer + All Rights Reserved. + + The LZO library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + The LZO library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with the LZO library; see the file COPYING. + If not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + + Markus F.X.J. Oberhumer + + http://www.oberhumer.com/opensource/lzo/ + */ + + + +#if 1 && defined(DO_COMPRESS) && !defined(do_compress) + /* choose a unique name to better help PGO optimizations */ +# define do_compress LZO_PP_ECONCAT2(DO_COMPRESS,_core) +#endif + + +/*********************************************************************** +// compress a block of data. +************************************************************************/ + +static __lzo_noinline lzo_uint +do_compress ( const lzo_bytep in , lzo_uint in_len, + lzo_bytep out, lzo_uintp out_len, + lzo_uint ti, lzo_voidp wrkmem) +{ + const lzo_bytep ip; + lzo_bytep op; + const lzo_bytep const in_end = in + in_len; + const lzo_bytep const ip_end = in + in_len - 20; + const lzo_bytep ii; + lzo_dict_p const dict = (lzo_dict_p) wrkmem; + + op = out; + ip = in; + ii = ip; + + ip += ti < 4 ? 4 - ti : 0; + for (;;) + { + const lzo_bytep m_pos; +#if !(LZO_DETERMINISTIC) + LZO_DEFINE_UNINITIALIZED_VAR(lzo_uint, m_off, 0); + lzo_uint m_len; + lzo_uint dindex; +next: + if __lzo_unlikely(ip >= ip_end) + break; + DINDEX1(dindex,ip); + GINDEX(m_pos,m_off,dict,dindex,in); + if (LZO_CHECK_MPOS_NON_DET(m_pos,m_off,in,ip,M4_MAX_OFFSET)) + goto literal; +#if 1 + if (m_off <= M2_MAX_OFFSET || m_pos[3] == ip[3]) + goto try_match; + DINDEX2(dindex,ip); +#endif + GINDEX(m_pos,m_off,dict,dindex,in); + if (LZO_CHECK_MPOS_NON_DET(m_pos,m_off,in,ip,M4_MAX_OFFSET)) + goto literal; + if (m_off <= M2_MAX_OFFSET || m_pos[3] == ip[3]) + goto try_match; + goto literal; + +try_match: +#if (LZO_OPT_UNALIGNED32) + if (UA_GET_NE32(m_pos) != UA_GET_NE32(ip)) +#else + if (m_pos[0] != ip[0] || m_pos[1] != ip[1] || m_pos[2] != ip[2] || m_pos[3] != ip[3]) +#endif + { + /* a literal */ +literal: + UPDATE_I(dict,0,dindex,ip,in); + ip += 1 + ((ip - ii) >> 5); + continue; + } +/*match:*/ + UPDATE_I(dict,0,dindex,ip,in); +#else + lzo_uint m_off; + lzo_uint m_len; + { + lzo_uint32_t dv; + lzo_uint dindex; +literal: + ip += 1 + ((ip - ii) >> 5); +next: + if __lzo_unlikely(ip >= ip_end) + break; + dv = UA_GET_LE32(ip); + dindex = DINDEX(dv,ip); + GINDEX(m_off,m_pos,in+dict,dindex,in); + UPDATE_I(dict,0,dindex,ip,in); + if __lzo_unlikely(dv != UA_GET_LE32(m_pos)) + goto literal; + } +#endif + + /* a match */ + + ii -= ti; ti = 0; + { + lzo_uint t = pd(ip,ii); + if (t != 0) + { + if (t <= 3) + { + op[-2] = LZO_BYTE(op[-2] | t); +#if (LZO_OPT_UNALIGNED32) + UA_COPY4(op, ii); + op += t; +#else + { do *op++ = *ii++; while (--t > 0); } +#endif + } +#if (LZO_OPT_UNALIGNED32) || (LZO_OPT_UNALIGNED64) + else if (t <= 16) + { + *op++ = LZO_BYTE(t - 3); + UA_COPY8(op, ii); + UA_COPY8(op+8, ii+8); + op += t; + } +#endif + else + { + if (t <= 18) + *op++ = LZO_BYTE(t - 3); + else + { + lzo_uint tt = t - 18; + *op++ = 0; + while __lzo_unlikely(tt > 255) + { + tt -= 255; + UA_SET1(op, 0); + op++; + } + assert(tt > 0); + *op++ = LZO_BYTE(tt); + } +#if (LZO_OPT_UNALIGNED32) || (LZO_OPT_UNALIGNED64) + do { + UA_COPY8(op, ii); + UA_COPY8(op+8, ii+8); + op += 16; ii += 16; t -= 16; + } while (t >= 16); if (t > 0) +#endif + { do *op++ = *ii++; while (--t > 0); } + } + } + } + m_len = 4; + { +#if (LZO_OPT_UNALIGNED64) + lzo_uint64_t v; + v = UA_GET_NE64(ip + m_len) ^ UA_GET_NE64(m_pos + m_len); + if __lzo_unlikely(v == 0) { + do { + m_len += 8; + v = UA_GET_NE64(ip + m_len) ^ UA_GET_NE64(m_pos + m_len); + if __lzo_unlikely(ip + m_len >= ip_end) + goto m_len_done; + } while (v == 0); + } +#if (LZO_ABI_BIG_ENDIAN) && defined(lzo_bitops_ctlz64) + m_len += lzo_bitops_ctlz64(v) / CHAR_BIT; +#elif (LZO_ABI_BIG_ENDIAN) + if ((v >> (64 - CHAR_BIT)) == 0) do { + v <<= CHAR_BIT; + m_len += 1; + } while ((v >> (64 - CHAR_BIT)) == 0); +#elif (LZO_ABI_LITTLE_ENDIAN) && defined(lzo_bitops_cttz64) + m_len += lzo_bitops_cttz64(v) / CHAR_BIT; +#elif (LZO_ABI_LITTLE_ENDIAN) + if ((v & UCHAR_MAX) == 0) do { + v >>= CHAR_BIT; + m_len += 1; + } while ((v & UCHAR_MAX) == 0); +#else + if (ip[m_len] == m_pos[m_len]) do { + m_len += 1; + } while (ip[m_len] == m_pos[m_len]); +#endif +#elif (LZO_OPT_UNALIGNED32) + lzo_uint32_t v; + v = UA_GET_NE32(ip + m_len) ^ UA_GET_NE32(m_pos + m_len); + if __lzo_unlikely(v == 0) { + do { + m_len += 4; + v = UA_GET_NE32(ip + m_len) ^ UA_GET_NE32(m_pos + m_len); + if (v != 0) + break; + m_len += 4; + v = UA_GET_NE32(ip + m_len) ^ UA_GET_NE32(m_pos + m_len); + if __lzo_unlikely(ip + m_len >= ip_end) + goto m_len_done; + } while (v == 0); + } +#if (LZO_ABI_BIG_ENDIAN) && defined(lzo_bitops_ctlz32) + m_len += lzo_bitops_ctlz32(v) / CHAR_BIT; +#elif (LZO_ABI_BIG_ENDIAN) + if ((v >> (32 - CHAR_BIT)) == 0) do { + v <<= CHAR_BIT; + m_len += 1; + } while ((v >> (32 - CHAR_BIT)) == 0); +#elif (LZO_ABI_LITTLE_ENDIAN) && defined(lzo_bitops_cttz32) + m_len += lzo_bitops_cttz32(v) / CHAR_BIT; +#elif (LZO_ABI_LITTLE_ENDIAN) + if ((v & UCHAR_MAX) == 0) do { + v >>= CHAR_BIT; + m_len += 1; + } while ((v & UCHAR_MAX) == 0); +#else + if (ip[m_len] == m_pos[m_len]) do { + m_len += 1; + } while (ip[m_len] == m_pos[m_len]); +#endif +#else + if __lzo_unlikely(ip[m_len] == m_pos[m_len]) { + do { + m_len += 1; + if (ip[m_len] != m_pos[m_len]) + break; + m_len += 1; + if (ip[m_len] != m_pos[m_len]) + break; + m_len += 1; + if (ip[m_len] != m_pos[m_len]) + break; + m_len += 1; + if (ip[m_len] != m_pos[m_len]) + break; + m_len += 1; + if (ip[m_len] != m_pos[m_len]) + break; + m_len += 1; + if (ip[m_len] != m_pos[m_len]) + break; + m_len += 1; + if (ip[m_len] != m_pos[m_len]) + break; + m_len += 1; + if __lzo_unlikely(ip + m_len >= ip_end) + goto m_len_done; + } while (ip[m_len] == m_pos[m_len]); + } +#endif + } +m_len_done: + m_off = pd(ip,m_pos); + ip += m_len; + ii = ip; + if (m_len <= M2_MAX_LEN && m_off <= M2_MAX_OFFSET) + { + m_off -= 1; +#if defined(LZO1X) + *op++ = LZO_BYTE(((m_len - 1) << 5) | ((m_off & 7) << 2)); + *op++ = LZO_BYTE(m_off >> 3); +#elif defined(LZO1Y) + *op++ = LZO_BYTE(((m_len + 1) << 4) | ((m_off & 3) << 2)); + *op++ = LZO_BYTE(m_off >> 2); +#endif + } + else if (m_off <= M3_MAX_OFFSET) + { + m_off -= 1; + if (m_len <= M3_MAX_LEN) + *op++ = LZO_BYTE(M3_MARKER | (m_len - 2)); + else + { + m_len -= M3_MAX_LEN; + *op++ = M3_MARKER | 0; + while __lzo_unlikely(m_len > 255) + { + m_len -= 255; + UA_SET1(op, 0); + op++; + } + *op++ = LZO_BYTE(m_len); + } + *op++ = LZO_BYTE(m_off << 2); + *op++ = LZO_BYTE(m_off >> 6); + } + else + { + m_off -= 0x4000; + if (m_len <= M4_MAX_LEN) + *op++ = LZO_BYTE(M4_MARKER | ((m_off >> 11) & 8) | (m_len - 2)); + else + { + m_len -= M4_MAX_LEN; + *op++ = LZO_BYTE(M4_MARKER | ((m_off >> 11) & 8)); + while __lzo_unlikely(m_len > 255) + { + m_len -= 255; + UA_SET1(op, 0); + op++; + } + *op++ = LZO_BYTE(m_len); + } + *op++ = LZO_BYTE(m_off << 2); + *op++ = LZO_BYTE(m_off >> 6); + } + goto next; + } + + *out_len = pd(op, out); + return pd(in_end,ii-ti); +} + + +/*********************************************************************** +// public entry point +************************************************************************/ + +LZO_PUBLIC(int) +DO_COMPRESS ( const lzo_bytep in , lzo_uint in_len, + lzo_bytep out, lzo_uintp out_len, + lzo_voidp wrkmem ) +{ + const lzo_bytep ip = in; + lzo_bytep op = out; + lzo_uint l = in_len; + lzo_uint t = 0; + + while (l > 20) + { + lzo_uint ll = l; + lzo_uintptr_t ll_end; +#if 0 || (LZO_DETERMINISTIC) + ll = LZO_MIN(ll, 49152); +#endif + ll_end = (lzo_uintptr_t)ip + ll; + if ((ll_end + ((t + ll) >> 5)) <= ll_end || (const lzo_bytep)(ll_end + ((t + ll) >> 5)) <= ip + ll) + break; +#if (LZO_DETERMINISTIC) + lzo_memset(wrkmem, 0, ((lzo_uint)1 << D_BITS) * sizeof(lzo_dict_t)); +#endif + t = do_compress(ip,ll,op,out_len,t,wrkmem); + ip += ll; + op += *out_len; + l -= ll; + } + t += l; + + if (t > 0) + { + const lzo_bytep ii = in + in_len - t; + + if (op == out && t <= 238) + *op++ = LZO_BYTE(17 + t); + else if (t <= 3) + op[-2] = LZO_BYTE(op[-2] | t); + else if (t <= 18) + *op++ = LZO_BYTE(t - 3); + else + { + lzo_uint tt = t - 18; + + *op++ = 0; + while (tt > 255) + { + tt -= 255; + UA_SET1(op, 0); + op++; + } + assert(tt > 0); + *op++ = LZO_BYTE(tt); + } + UA_COPYN(op, ii, t); + op += t; + } + + *op++ = M4_MARKER | 1; + *op++ = 0; + *op++ = 0; + + *out_len = pd(op, out); + return LZO_E_OK; +} + + +/* vim:set ts=4 sw=4 et: */ diff --git a/tools/z64compress/src/enc/lzo/lzo1x_d.ch b/tools/z64compress/src/enc/lzo/lzo1x_d.ch new file mode 100644 index 000000000..b6c6d9947 --- /dev/null +++ b/tools/z64compress/src/enc/lzo/lzo1x_d.ch @@ -0,0 +1,475 @@ +/* lzo1x_d.ch -- implementation of the LZO1X decompression algorithm + + This file is part of the LZO real-time data compression library. + + Copyright (C) 1996-2017 Markus Franz Xaver Johannes Oberhumer + All Rights Reserved. + + The LZO library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + The LZO library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with the LZO library; see the file COPYING. + If not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + + Markus F.X.J. Oberhumer + + http://www.oberhumer.com/opensource/lzo/ + */ + + +#include "lzo1_d.ch" + + +/*********************************************************************** +// decompress a block of data. +************************************************************************/ + +#if defined(DO_DECOMPRESS) +LZO_PUBLIC(int) +DO_DECOMPRESS ( const lzo_bytep in , lzo_uint in_len, + lzo_bytep out, lzo_uintp out_len, + lzo_voidp wrkmem ) +#endif +{ + lzo_bytep op; + const lzo_bytep ip; + lzo_uint t; +#if defined(COPY_DICT) + lzo_uint m_off; + const lzo_bytep dict_end; +#else + const lzo_bytep m_pos; +#endif + + const lzo_bytep const ip_end = in + in_len; +#if defined(HAVE_ANY_OP) + lzo_bytep const op_end = out + *out_len; +#endif +#if defined(LZO1Z) + lzo_uint last_m_off = 0; +#endif + + LZO_UNUSED(wrkmem); + +#if defined(COPY_DICT) + if (dict) + { + if (dict_len > M4_MAX_OFFSET) + { + dict += dict_len - M4_MAX_OFFSET; + dict_len = M4_MAX_OFFSET; + } + dict_end = dict + dict_len; + } + else + { + dict_len = 0; + dict_end = NULL; + } +#endif /* COPY_DICT */ + + *out_len = 0; + + op = out; + ip = in; + + NEED_IP(1); + if (*ip > 17) + { + t = *ip++ - 17; + if (t < 4) + goto match_next; + assert(t > 0); NEED_OP(t); NEED_IP(t+3); + do *op++ = *ip++; while (--t > 0); + goto first_literal_run; + } + + for (;;) + { + NEED_IP(3); + t = *ip++; + if (t >= 16) + goto match; + /* a literal run */ + if (t == 0) + { + while (*ip == 0) + { + t += 255; + ip++; + TEST_IV(t); + NEED_IP(1); + } + t += 15 + *ip++; + } + /* copy literals */ + assert(t > 0); NEED_OP(t+3); NEED_IP(t+6); +#if (LZO_OPT_UNALIGNED64) && (LZO_OPT_UNALIGNED32) + t += 3; + if (t >= 8) do + { + UA_COPY8(op,ip); + op += 8; ip += 8; t -= 8; + } while (t >= 8); + if (t >= 4) + { + UA_COPY4(op,ip); + op += 4; ip += 4; t -= 4; + } + if (t > 0) + { + *op++ = *ip++; + if (t > 1) { *op++ = *ip++; if (t > 2) { *op++ = *ip++; } } + } +#elif (LZO_OPT_UNALIGNED32) || (LZO_ALIGNED_OK_4) +#if !(LZO_OPT_UNALIGNED32) + if (PTR_ALIGNED2_4(op,ip)) + { +#endif + UA_COPY4(op,ip); + op += 4; ip += 4; + if (--t > 0) + { + if (t >= 4) + { + do { + UA_COPY4(op,ip); + op += 4; ip += 4; t -= 4; + } while (t >= 4); + if (t > 0) do *op++ = *ip++; while (--t > 0); + } + else + do *op++ = *ip++; while (--t > 0); + } +#if !(LZO_OPT_UNALIGNED32) + } + else +#endif +#endif +#if !(LZO_OPT_UNALIGNED32) + { + *op++ = *ip++; *op++ = *ip++; *op++ = *ip++; + do *op++ = *ip++; while (--t > 0); + } +#endif + + +first_literal_run: + + + t = *ip++; + if (t >= 16) + goto match; +#if defined(COPY_DICT) +#if defined(LZO1Z) + m_off = (1 + M2_MAX_OFFSET) + (t << 6) + (*ip++ >> 2); + last_m_off = m_off; +#else + m_off = (1 + M2_MAX_OFFSET) + (t >> 2) + (*ip++ << 2); +#endif + NEED_OP(3); + t = 3; COPY_DICT(t,m_off) +#else /* !COPY_DICT */ +#if defined(LZO1Z) + t = (1 + M2_MAX_OFFSET) + (t << 6) + (*ip++ >> 2); + m_pos = op - t; + last_m_off = t; +#else + m_pos = op - (1 + M2_MAX_OFFSET); + m_pos -= t >> 2; + m_pos -= *ip++ << 2; +#endif + TEST_LB(m_pos); NEED_OP(3); + *op++ = *m_pos++; *op++ = *m_pos++; *op++ = *m_pos; +#endif /* COPY_DICT */ + goto match_done; + + + /* handle matches */ + for (;;) { +match: + if (t >= 64) /* a M2 match */ + { +#if defined(COPY_DICT) +#if defined(LZO1X) + m_off = 1 + ((t >> 2) & 7) + (*ip++ << 3); + t = (t >> 5) - 1; +#elif defined(LZO1Y) + m_off = 1 + ((t >> 2) & 3) + (*ip++ << 2); + t = (t >> 4) - 3; +#elif defined(LZO1Z) + m_off = t & 0x1f; + if (m_off >= 0x1c) + m_off = last_m_off; + else + { + m_off = 1 + (m_off << 6) + (*ip++ >> 2); + last_m_off = m_off; + } + t = (t >> 5) - 1; +#endif +#else /* !COPY_DICT */ +#if defined(LZO1X) + m_pos = op - 1; + m_pos -= (t >> 2) & 7; + m_pos -= *ip++ << 3; + t = (t >> 5) - 1; +#elif defined(LZO1Y) + m_pos = op - 1; + m_pos -= (t >> 2) & 3; + m_pos -= *ip++ << 2; + t = (t >> 4) - 3; +#elif defined(LZO1Z) + { + lzo_uint off = t & 0x1f; + m_pos = op; + if (off >= 0x1c) + { + assert(last_m_off > 0); + m_pos -= last_m_off; + } + else + { + off = 1 + (off << 6) + (*ip++ >> 2); + m_pos -= off; + last_m_off = off; + } + } + t = (t >> 5) - 1; +#endif + TEST_LB(m_pos); assert(t > 0); NEED_OP(t+3-1); + goto copy_match; +#endif /* COPY_DICT */ + } + else if (t >= 32) /* a M3 match */ + { + t &= 31; + if (t == 0) + { + while (*ip == 0) + { + t += 255; + ip++; + TEST_OV(t); + NEED_IP(1); + } + t += 31 + *ip++; + NEED_IP(2); + } +#if defined(COPY_DICT) +#if defined(LZO1Z) + m_off = 1 + (ip[0] << 6) + (ip[1] >> 2); + last_m_off = m_off; +#else + m_off = 1 + (ip[0] >> 2) + (ip[1] << 6); +#endif +#else /* !COPY_DICT */ +#if defined(LZO1Z) + { + lzo_uint off = 1 + (ip[0] << 6) + (ip[1] >> 2); + m_pos = op - off; + last_m_off = off; + } +#elif (LZO_OPT_UNALIGNED16) && (LZO_ABI_LITTLE_ENDIAN) + m_pos = op - 1; + m_pos -= UA_GET_LE16(ip) >> 2; +#else + m_pos = op - 1; + m_pos -= (ip[0] >> 2) + (ip[1] << 6); +#endif +#endif /* COPY_DICT */ + ip += 2; + } + else if (t >= 16) /* a M4 match */ + { +#if defined(COPY_DICT) + m_off = (t & 8) << 11; +#else /* !COPY_DICT */ + m_pos = op; + m_pos -= (t & 8) << 11; +#endif /* COPY_DICT */ + t &= 7; + if (t == 0) + { + while (*ip == 0) + { + t += 255; + ip++; + TEST_OV(t); + NEED_IP(1); + } + t += 7 + *ip++; + NEED_IP(2); + } +#if defined(COPY_DICT) +#if defined(LZO1Z) + m_off += (ip[0] << 6) + (ip[1] >> 2); +#else + m_off += (ip[0] >> 2) + (ip[1] << 6); +#endif + ip += 2; + if (m_off == 0) + goto eof_found; + m_off += 0x4000; +#if defined(LZO1Z) + last_m_off = m_off; +#endif +#else /* !COPY_DICT */ +#if defined(LZO1Z) + m_pos -= (ip[0] << 6) + (ip[1] >> 2); +#elif (LZO_OPT_UNALIGNED16) && (LZO_ABI_LITTLE_ENDIAN) + m_pos -= UA_GET_LE16(ip) >> 2; +#else + m_pos -= (ip[0] >> 2) + (ip[1] << 6); +#endif + ip += 2; + if (m_pos == op) + goto eof_found; + m_pos -= 0x4000; +#if defined(LZO1Z) + last_m_off = pd((const lzo_bytep)op, m_pos); +#endif +#endif /* COPY_DICT */ + } + else /* a M1 match */ + { +#if defined(COPY_DICT) +#if defined(LZO1Z) + m_off = 1 + (t << 6) + (*ip++ >> 2); + last_m_off = m_off; +#else + m_off = 1 + (t >> 2) + (*ip++ << 2); +#endif + NEED_OP(2); + t = 2; COPY_DICT(t,m_off) +#else /* !COPY_DICT */ +#if defined(LZO1Z) + t = 1 + (t << 6) + (*ip++ >> 2); + m_pos = op - t; + last_m_off = t; +#else + m_pos = op - 1; + m_pos -= t >> 2; + m_pos -= *ip++ << 2; +#endif + TEST_LB(m_pos); NEED_OP(2); + *op++ = *m_pos++; *op++ = *m_pos; +#endif /* COPY_DICT */ + goto match_done; + } + + /* copy match */ +#if defined(COPY_DICT) + + NEED_OP(t+3-1); + t += 3-1; COPY_DICT(t,m_off) + +#else /* !COPY_DICT */ + + TEST_LB(m_pos); assert(t > 0); NEED_OP(t+3-1); +#if (LZO_OPT_UNALIGNED64) && (LZO_OPT_UNALIGNED32) + if (op - m_pos >= 8) + { + t += (3 - 1); + if (t >= 8) do + { + UA_COPY8(op,m_pos); + op += 8; m_pos += 8; t -= 8; + } while (t >= 8); + if (t >= 4) + { + UA_COPY4(op,m_pos); + op += 4; m_pos += 4; t -= 4; + } + if (t > 0) + { + *op++ = m_pos[0]; + if (t > 1) { *op++ = m_pos[1]; if (t > 2) { *op++ = m_pos[2]; } } + } + } + else +#elif (LZO_OPT_UNALIGNED32) || (LZO_ALIGNED_OK_4) +#if !(LZO_OPT_UNALIGNED32) + if (t >= 2 * 4 - (3 - 1) && PTR_ALIGNED2_4(op,m_pos)) + { + assert((op - m_pos) >= 4); /* both pointers are aligned */ +#else + if (t >= 2 * 4 - (3 - 1) && (op - m_pos) >= 4) + { +#endif + UA_COPY4(op,m_pos); + op += 4; m_pos += 4; t -= 4 - (3 - 1); + do { + UA_COPY4(op,m_pos); + op += 4; m_pos += 4; t -= 4; + } while (t >= 4); + if (t > 0) do *op++ = *m_pos++; while (--t > 0); + } + else +#endif + { +copy_match: + *op++ = *m_pos++; *op++ = *m_pos++; + do *op++ = *m_pos++; while (--t > 0); + } + +#endif /* COPY_DICT */ + +match_done: +#if defined(LZO1Z) + t = ip[-1] & 3; +#else + t = ip[-2] & 3; +#endif + if (t == 0) + break; + + /* copy literals */ +match_next: + assert(t > 0); assert(t < 4); NEED_OP(t); NEED_IP(t+3); +#if 0 + do *op++ = *ip++; while (--t > 0); +#else + *op++ = *ip++; + if (t > 1) { *op++ = *ip++; if (t > 2) { *op++ = *ip++; } } +#endif + t = *ip++; + } + } + +eof_found: + *out_len = pd(op, out); + return (ip == ip_end ? LZO_E_OK : + (ip < ip_end ? LZO_E_INPUT_NOT_CONSUMED : LZO_E_INPUT_OVERRUN)); + + +#if defined(HAVE_NEED_IP) +input_overrun: + *out_len = pd(op, out); + return LZO_E_INPUT_OVERRUN; +#endif + +#if defined(HAVE_NEED_OP) +output_overrun: + *out_len = pd(op, out); + return LZO_E_OUTPUT_OVERRUN; +#endif + +#if defined(LZO_TEST_OVERRUN_LOOKBEHIND) +lookbehind_overrun: + *out_len = pd(op, out); + return LZO_E_LOOKBEHIND_OVERRUN; +#endif +} + + +/* vim:set ts=4 sw=4 et: */ diff --git a/tools/z64compress/src/enc/lzo/lzo1x_d1.c b/tools/z64compress/src/enc/lzo/lzo1x_d1.c new file mode 100644 index 000000000..68faf48e8 --- /dev/null +++ b/tools/z64compress/src/enc/lzo/lzo1x_d1.c @@ -0,0 +1,36 @@ +/* lzo1x_d1.c -- LZO1X decompression + + This file is part of the LZO real-time data compression library. + + Copyright (C) 1996-2017 Markus Franz Xaver Johannes Oberhumer + All Rights Reserved. + + The LZO library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + The LZO library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with the LZO library; see the file COPYING. + If not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + + Markus F.X.J. Oberhumer + + http://www.oberhumer.com/opensource/lzo/ + */ + + +#include "config1x.h" + +#undef LZO_TEST_OVERRUN +#define DO_DECOMPRESS lzo1x_decompress + +#include "lzo1x_d.ch" + +/* vim:set ts=4 sw=4 et: */ diff --git a/tools/z64compress/src/enc/lzo/lzo1x_d2.c b/tools/z64compress/src/enc/lzo/lzo1x_d2.c new file mode 100644 index 000000000..8b7c316af --- /dev/null +++ b/tools/z64compress/src/enc/lzo/lzo1x_d2.c @@ -0,0 +1,61 @@ +/* lzo1x_d2.c -- LZO1X decompression with overrun testing + + This file is part of the LZO real-time data compression library. + + Copyright (C) 1996-2017 Markus Franz Xaver Johannes Oberhumer + All Rights Reserved. + + The LZO library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + The LZO library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with the LZO library; see the file COPYING. + If not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + + Markus F.X.J. Oberhumer + + http://www.oberhumer.com/opensource/lzo/ + */ + + +#include "config1x.h" + +#define LZO_TEST_OVERRUN 1 +#define DO_DECOMPRESS lzo1x_decompress_safe + +#include "lzo1x_d.ch" + +#if defined(LZO_ARCH_I386) && defined(LZO_USE_ASM) +LZO_EXTERN(int) lzo1x_decompress_asm_safe + (const lzo_bytep src, lzo_uint src_len, + lzo_bytep dst, lzo_uintp dst_len, + lzo_voidp wrkmem); +LZO_PUBLIC(int) lzo1x_decompress_asm_safe + (const lzo_bytep src, lzo_uint src_len, + lzo_bytep dst, lzo_uintp dst_len, + lzo_voidp wrkmem) +{ + return lzo1x_decompress_safe(src, src_len, dst, dst_len, wrkmem); +} +LZO_EXTERN(int) lzo1x_decompress_asm_fast_safe + (const lzo_bytep src, lzo_uint src_len, + lzo_bytep dst, lzo_uintp dst_len, + lzo_voidp wrkmem); +LZO_PUBLIC(int) lzo1x_decompress_asm_fast_safe + (const lzo_bytep src, lzo_uint src_len, + lzo_bytep dst, lzo_uintp dst_len, + lzo_voidp wrkmem) +{ + return lzo1x_decompress_safe(src, src_len, dst, dst_len, wrkmem); +} +#endif + +/* vim:set ts=4 sw=4 et: */ diff --git a/tools/z64compress/src/enc/lzo/lzo1x_d3.c b/tools/z64compress/src/enc/lzo/lzo1x_d3.c new file mode 100644 index 000000000..b3d430f85 --- /dev/null +++ b/tools/z64compress/src/enc/lzo/lzo1x_d3.c @@ -0,0 +1,93 @@ +/* lzo1x_d3.c -- LZO1X decompression with preset dictionary + + This file is part of the LZO real-time data compression library. + + Copyright (C) 1996-2017 Markus Franz Xaver Johannes Oberhumer + All Rights Reserved. + + The LZO library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + The LZO library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with the LZO library; see the file COPYING. + If not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + + Markus F.X.J. Oberhumer + + http://www.oberhumer.com/opensource/lzo/ + */ + + +#include "config1x.h" + +#define LZO_TEST_OVERRUN 1 + + +#define SLOW_MEMCPY(a,b,l) { do *a++ = *b++; while (--l > 0); } +#define FAST_MEMCPY(a,b,l) { lzo_memcpy(a,b,l); a += l; } + +#if 1 && defined(FAST_MEMCPY) +# define DICT_MEMMOVE(op,m_pos,m_len,m_off) \ + if (m_off >= (m_len)) \ + FAST_MEMCPY(op,m_pos,m_len) \ + else \ + SLOW_MEMCPY(op,m_pos,m_len) +#else +# define DICT_MEMMOVE(op,m_pos,m_len,m_off) \ + SLOW_MEMCPY(op,m_pos,m_len) +#endif + +#if !defined(FAST_MEMCPY) +# define FAST_MEMCPY SLOW_MEMCPY +#endif + + +#define COPY_DICT_DICT(m_len,m_off) \ + { \ + const lzo_bytep m_pos; \ + m_off -= pd(op, out); assert(m_off > 0); \ + if (m_off > dict_len) goto lookbehind_overrun; \ + m_pos = dict_end - m_off; \ + if (m_len > m_off) \ + { \ + m_len -= m_off; \ + FAST_MEMCPY(op,m_pos,m_off) \ + m_pos = out; \ + SLOW_MEMCPY(op,m_pos,m_len) \ + } \ + else \ + FAST_MEMCPY(op,m_pos,m_len) \ + } + +#define COPY_DICT(m_len,m_off) \ + assert(m_len >= 2); assert(m_off > 0); assert(op > out); \ + if (m_off <= pd(op, out)) \ + { \ + const lzo_bytep m_pos = op - m_off; \ + DICT_MEMMOVE(op,m_pos,m_len,m_off) \ + } \ + else \ + COPY_DICT_DICT(m_len,m_off) + + + + +LZO_PUBLIC(int) +lzo1x_decompress_dict_safe ( const lzo_bytep in, lzo_uint in_len, + lzo_bytep out, lzo_uintp out_len, + lzo_voidp wrkmem /* NOT USED */, + const lzo_bytep dict, lzo_uint dict_len) + + +#include "lzo1x_d.ch" + + +/* vim:set ts=4 sw=4 et: */ diff --git a/tools/z64compress/src/enc/lzo/lzo_conf.h b/tools/z64compress/src/enc/lzo/lzo_conf.h new file mode 100644 index 000000000..fff021d1a --- /dev/null +++ b/tools/z64compress/src/enc/lzo/lzo_conf.h @@ -0,0 +1,436 @@ +/* lzo_conf.h -- main internal configuration file for the the LZO library + + This file is part of the LZO real-time data compression library. + + Copyright (C) 1996-2017 Markus Franz Xaver Johannes Oberhumer + All Rights Reserved. + + The LZO library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + The LZO library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with the LZO library; see the file COPYING. + If not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + + Markus F.X.J. Oberhumer + + http://www.oberhumer.com/opensource/lzo/ + */ + + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the library and is subject + to change. + */ + + +#ifndef __LZO_CONF_H +#define __LZO_CONF_H 1 + +#if !defined(__LZO_IN_MINILZO) +#if defined(LZO_CFG_FREESTANDING) && (LZO_CFG_FREESTANDING) +# define LZO_LIBC_FREESTANDING 1 +# define LZO_OS_FREESTANDING 1 +#endif +#if defined(LZO_CFG_EXTRA_CONFIG_HEADER) +# include LZO_CFG_EXTRA_CONFIG_HEADER +#endif +#if defined(__LZOCONF_H) || defined(__LZOCONF_H_INCLUDED) +# error "include this file first" +#endif +#if defined(LZO_CFG_BUILD_DLL) && (LZO_CFG_BUILD_DLL+0) && !defined(__LZO_EXPORT1) && !defined(__LZO_EXPORT2) && 0 + /* idea: we could auto-define __LZO_EXPORT1 for DLL exports */ +#ifndef __LZODEFS_H_INCLUDED +#if defined(LZO_HAVE_CONFIG_H) +# include +#endif +#include +#include +#include "lzodefs.h" +#endif + /* #define __LZO_EXPORT1 __attribute__((__visibility__("default"))) */ + /* #define __LZO_EXPORT1 __declspec(dllexport) */ +#endif +#include "lzoconf.h" +#if defined(LZO_CFG_EXTRA_CONFIG_HEADER2) +# include LZO_CFG_EXTRA_CONFIG_HEADER2 +#endif +#endif /* !defined(__LZO_IN_MINILZO) */ + +#if !defined(__LZOCONF_H_INCLUDED) || (LZO_VERSION+0 != 0x20a0) +# error "version mismatch" +#endif + + +/*********************************************************************** +// pragmas +************************************************************************/ + +#if (LZO_CC_MSC && (_MSC_VER >= 1000 && _MSC_VER < 1100)) + /* disable bogus "unreachable code" warnings */ +# pragma warning(disable: 4702) +#endif +#if (LZO_CC_MSC && (_MSC_VER >= 1000)) +# pragma warning(disable: 4127 4701) + /* disable warnings about inlining */ +# pragma warning(disable: 4514 4710 4711) +#endif +#if (LZO_CC_MSC && (_MSC_VER >= 1300)) + /* disable '-Wall' warnings in system header files */ +# pragma warning(disable: 4820) +#endif +#if (LZO_CC_MSC && (_MSC_VER >= 1800)) + /* disable '-Wall' warnings in system header files */ +# pragma warning(disable: 4746) +#endif +#if (LZO_CC_INTELC && (__INTEL_COMPILER >= 900)) + /* disable pedantic warnings in system header files */ +# pragma warning(disable: 1684) +#endif + +#if (LZO_CC_SUNPROC) +#if !defined(__cplusplus) +# pragma error_messages(off,E_END_OF_LOOP_CODE_NOT_REACHED) +# pragma error_messages(off,E_LOOP_NOT_ENTERED_AT_TOP) +# pragma error_messages(off,E_STATEMENT_NOT_REACHED) +#endif +#endif + + +/*********************************************************************** +// function types +************************************************************************/ + +#if !defined(__LZO_NOEXPORT1) +# define __LZO_NOEXPORT1 /*empty*/ +#endif +#if !defined(__LZO_NOEXPORT2) +# define __LZO_NOEXPORT2 /*empty*/ +#endif + +#if 1 +# define LZO_PUBLIC_DECL(r) LZO_EXTERN(r) +#endif +#if 1 +# define LZO_PUBLIC_IMPL(r) LZO_PUBLIC(r) +#endif +#if !defined(LZO_LOCAL_DECL) +# define LZO_LOCAL_DECL(r) __LZO_EXTERN_C LZO_LOCAL_IMPL(r) +#endif +#if !defined(LZO_LOCAL_IMPL) +# define LZO_LOCAL_IMPL(r) __LZO_NOEXPORT1 r __LZO_NOEXPORT2 __LZO_CDECL +#endif +#if 1 +# define LZO_STATIC_DECL(r) LZO_PRIVATE(r) +#endif +#if 1 +# define LZO_STATIC_IMPL(r) LZO_PRIVATE(r) +#endif + + +/*********************************************************************** +// +************************************************************************/ + +#if defined(__LZO_IN_MINILZO) || (LZO_CFG_FREESTANDING) +#elif 1 +# include +#else +# define LZO_WANT_ACC_INCD_H 1 +#endif +#if defined(LZO_HAVE_CONFIG_H) +# define LZO_CFG_NO_CONFIG_HEADER 1 +#endif +#include "lzo_supp.h" + +/* Integral types */ +#if 1 || defined(lzo_int8_t) || defined(lzo_uint8_t) +LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(lzo_int8_t) == 1) +LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(lzo_uint8_t) == 1) +#endif +#if 1 || defined(lzo_int16_t) || defined(lzo_uint16_t) +LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(lzo_int16_t) == 2) +LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(lzo_uint16_t) == 2) +#endif +#if 1 || defined(lzo_int32_t) || defined(lzo_uint32_t) +LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(lzo_int32_t) == 4) +LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(lzo_uint32_t) == 4) +#endif +#if defined(lzo_int64_t) || defined(lzo_uint64_t) +LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(lzo_int64_t) == 8) +LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(lzo_uint64_t) == 8) +#endif + +#if (LZO_CFG_FREESTANDING) +# undef HAVE_MEMCMP +# undef HAVE_MEMCPY +# undef HAVE_MEMMOVE +# undef HAVE_MEMSET +#endif + +#if !(HAVE_MEMCMP) +# undef memcmp +# define memcmp(a,b,c) lzo_memcmp(a,b,c) +#else +# undef lzo_memcmp +# define lzo_memcmp(a,b,c) memcmp(a,b,c) +#endif +#if !(HAVE_MEMCPY) +# undef memcpy +# define memcpy(a,b,c) lzo_memcpy(a,b,c) +#else +# undef lzo_memcpy +# define lzo_memcpy(a,b,c) memcpy(a,b,c) +#endif +#if !(HAVE_MEMMOVE) +# undef memmove +# define memmove(a,b,c) lzo_memmove(a,b,c) +#else +# undef lzo_memmove +# define lzo_memmove(a,b,c) memmove(a,b,c) +#endif +#if !(HAVE_MEMSET) +# undef memset +# define memset(a,b,c) lzo_memset(a,b,c) +#else +# undef lzo_memset +# define lzo_memset(a,b,c) memset(a,b,c) +#endif + +#undef NDEBUG +#if (LZO_CFG_FREESTANDING) +# undef LZO_DEBUG +# define NDEBUG 1 +# undef assert +# define assert(e) ((void)0) +#else +# if !defined(LZO_DEBUG) +# define NDEBUG 1 +# endif +# include +#endif + +#if 0 && defined(__BOUNDS_CHECKING_ON) +# include +#else +# define BOUNDS_CHECKING_OFF_DURING(stmt) stmt +# define BOUNDS_CHECKING_OFF_IN_EXPR(expr) (expr) +#endif + +#if (LZO_CFG_PGO) +# undef __lzo_likely +# undef __lzo_unlikely +# define __lzo_likely(e) (e) +# define __lzo_unlikely(e) (e) +#endif + +#undef _ +#undef __ +#undef ___ +#undef ____ +#undef _p0 +#undef _p1 +#undef _p2 +#undef _p3 +#undef _p4 +#undef _s0 +#undef _s1 +#undef _s2 +#undef _s3 +#undef _s4 +#undef _ww + + +/*********************************************************************** +// +************************************************************************/ + +#if 1 +# define LZO_BYTE(x) ((unsigned char) (x)) +#else +# define LZO_BYTE(x) ((unsigned char) ((x) & 0xff)) +#endif + +#define LZO_MAX(a,b) ((a) >= (b) ? (a) : (b)) +#define LZO_MIN(a,b) ((a) <= (b) ? (a) : (b)) +#define LZO_MAX3(a,b,c) ((a) >= (b) ? LZO_MAX(a,c) : LZO_MAX(b,c)) +#define LZO_MIN3(a,b,c) ((a) <= (b) ? LZO_MIN(a,c) : LZO_MIN(b,c)) + +#define lzo_sizeof(type) ((lzo_uint) (sizeof(type))) + +#define LZO_HIGH(array) ((lzo_uint) (sizeof(array)/sizeof(*(array)))) + +/* this always fits into 32 bits */ +#define LZO_SIZE(bits) (1u << (bits)) +#define LZO_MASK(bits) (LZO_SIZE(bits) - 1) + +#define LZO_USIZE(bits) ((lzo_uint) 1 << (bits)) +#define LZO_UMASK(bits) (LZO_USIZE(bits) - 1) + +#if !defined(DMUL) +#if 0 + /* 32*32 multiplies may be faster than 64*64 on some 64-bit machines, + * but then we need extra casts from unsigned<->size_t */ +# define DMUL(a,b) ((lzo_xint) ((lzo_uint32_t)(a) * (lzo_uint32_t)(b))) +#else +# define DMUL(a,b) ((lzo_xint) ((a) * (b))) +#endif +#endif + + +/*********************************************************************** +// compiler and architecture specific stuff +************************************************************************/ + +/* Some defines that indicate if memory can be accessed at unaligned + * memory addresses. You should also test that this is actually faster + * even if it is allowed by your system. + */ + +#include "lzo_func.h" + +#ifndef UA_SET1 +#define UA_SET1 LZO_MEMOPS_SET1 +#endif +#ifndef UA_SET2 +#define UA_SET2 LZO_MEMOPS_SET2 +#endif +#ifndef UA_SET3 +#define UA_SET3 LZO_MEMOPS_SET3 +#endif +#ifndef UA_SET4 +#define UA_SET4 LZO_MEMOPS_SET4 +#endif +#ifndef UA_MOVE1 +#define UA_MOVE1 LZO_MEMOPS_MOVE1 +#endif +#ifndef UA_MOVE2 +#define UA_MOVE2 LZO_MEMOPS_MOVE2 +#endif +#ifndef UA_MOVE3 +#define UA_MOVE3 LZO_MEMOPS_MOVE3 +#endif +#ifndef UA_MOVE4 +#define UA_MOVE4 LZO_MEMOPS_MOVE4 +#endif +#ifndef UA_MOVE8 +#define UA_MOVE8 LZO_MEMOPS_MOVE8 +#endif +#ifndef UA_COPY1 +#define UA_COPY1 LZO_MEMOPS_COPY1 +#endif +#ifndef UA_COPY2 +#define UA_COPY2 LZO_MEMOPS_COPY2 +#endif +#ifndef UA_COPY3 +#define UA_COPY3 LZO_MEMOPS_COPY3 +#endif +#ifndef UA_COPY4 +#define UA_COPY4 LZO_MEMOPS_COPY4 +#endif +#ifndef UA_COPY8 +#define UA_COPY8 LZO_MEMOPS_COPY8 +#endif +#ifndef UA_COPYN +#define UA_COPYN LZO_MEMOPS_COPYN +#endif +#ifndef UA_COPYN_X +#define UA_COPYN_X LZO_MEMOPS_COPYN +#endif +#ifndef UA_GET_LE16 +#define UA_GET_LE16 LZO_MEMOPS_GET_LE16 +#endif +#ifndef UA_GET_LE32 +#define UA_GET_LE32 LZO_MEMOPS_GET_LE32 +#endif +#ifdef LZO_MEMOPS_GET_LE64 +#ifndef UA_GET_LE64 +#define UA_GET_LE64 LZO_MEMOPS_GET_LE64 +#endif +#endif +#ifndef UA_GET_NE16 +#define UA_GET_NE16 LZO_MEMOPS_GET_NE16 +#endif +#ifndef UA_GET_NE32 +#define UA_GET_NE32 LZO_MEMOPS_GET_NE32 +#endif +#ifdef LZO_MEMOPS_GET_NE64 +#ifndef UA_GET_NE64 +#define UA_GET_NE64 LZO_MEMOPS_GET_NE64 +#endif +#endif +#ifndef UA_PUT_LE16 +#define UA_PUT_LE16 LZO_MEMOPS_PUT_LE16 +#endif +#ifndef UA_PUT_LE32 +#define UA_PUT_LE32 LZO_MEMOPS_PUT_LE32 +#endif +#ifndef UA_PUT_NE16 +#define UA_PUT_NE16 LZO_MEMOPS_PUT_NE16 +#endif +#ifndef UA_PUT_NE32 +#define UA_PUT_NE32 LZO_MEMOPS_PUT_NE32 +#endif + + +/* Fast memcpy that copies multiples of 8 byte chunks. + * len is the number of bytes. + * note: all parameters must be lvalues, len >= 8 + * dest and src advance, len is undefined afterwards + */ + +#define MEMCPY8_DS(dest,src,len) \ + lzo_memcpy(dest,src,len); dest += len; src += len + +#define BZERO8_PTR(s,l,n) \ + lzo_memset((lzo_voidp)(s),0,(lzo_uint)(l)*(n)) + +#define MEMCPY_DS(dest,src,len) \ + do *dest++ = *src++; while (--len > 0) + + +/*********************************************************************** +// +************************************************************************/ + +LZO_EXTERN(const lzo_bytep) lzo_copyright(void); + +#include "lzo_ptr.h" + +/* Generate compressed data in a deterministic way. + * This is fully portable, and compression can be faster as well. + * A reason NOT to be deterministic is when the block size is + * very small (e.g. 8kB) or the dictionary is big, because + * then the initialization of the dictionary becomes a relevant + * magnitude for compression speed. + */ +#ifndef LZO_DETERMINISTIC +#define LZO_DETERMINISTIC 1 +#endif + + +#ifndef LZO_DICT_USE_PTR +#define LZO_DICT_USE_PTR 1 +#endif + +#if (LZO_DICT_USE_PTR) +# define lzo_dict_t const lzo_bytep +# define lzo_dict_p lzo_dict_t * +#else +# define lzo_dict_t lzo_uint +# define lzo_dict_p lzo_dict_t * +#endif + + +#endif /* already included */ + + +/* vim:set ts=4 sw=4 et: */ diff --git a/tools/z64compress/src/enc/lzo/lzo_dict.h b/tools/z64compress/src/enc/lzo/lzo_dict.h new file mode 100644 index 000000000..e48addb17 --- /dev/null +++ b/tools/z64compress/src/enc/lzo/lzo_dict.h @@ -0,0 +1,307 @@ +/* lzo_dict.h -- dictionary definitions for the the LZO library + + This file is part of the LZO real-time data compression library. + + Copyright (C) 1996-2017 Markus Franz Xaver Johannes Oberhumer + All Rights Reserved. + + The LZO library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + The LZO library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with the LZO library; see the file COPYING. + If not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + + Markus F.X.J. Oberhumer + + http://www.oberhumer.com/opensource/lzo/ + */ + + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the library and is subject + to change. + */ + + +#ifndef __LZO_DICT_H +#define __LZO_DICT_H 1 + +#ifdef __cplusplus +extern "C" { +#endif + + + +/*********************************************************************** +// dictionary size +************************************************************************/ + +/* dictionary needed for compression */ +#if !defined(D_BITS) && defined(DBITS) +# define D_BITS DBITS +#endif +#if !defined(D_BITS) +# error "D_BITS is not defined" +#endif +#if (D_BITS < 16) +# define D_SIZE LZO_SIZE(D_BITS) +# define D_MASK LZO_MASK(D_BITS) +#else +# define D_SIZE LZO_USIZE(D_BITS) +# define D_MASK LZO_UMASK(D_BITS) +#endif +#define D_HIGH ((D_MASK >> 1) + 1) + + +/* dictionary depth */ +#if !defined(DD_BITS) +# define DD_BITS 0 +#endif +#define DD_SIZE LZO_SIZE(DD_BITS) +#define DD_MASK LZO_MASK(DD_BITS) + +/* dictionary length */ +#if !defined(DL_BITS) +# define DL_BITS (D_BITS - DD_BITS) +#endif +#if (DL_BITS < 16) +# define DL_SIZE LZO_SIZE(DL_BITS) +# define DL_MASK LZO_MASK(DL_BITS) +#else +# define DL_SIZE LZO_USIZE(DL_BITS) +# define DL_MASK LZO_UMASK(DL_BITS) +#endif + + +#if (D_BITS != DL_BITS + DD_BITS) +# error "D_BITS does not match" +#endif +#if (D_BITS < 6 || D_BITS > 18) +# error "invalid D_BITS" +#endif +#if (DL_BITS < 6 || DL_BITS > 20) +# error "invalid DL_BITS" +#endif +#if (DD_BITS < 0 || DD_BITS > 6) +# error "invalid DD_BITS" +#endif + + +#if !defined(DL_MIN_LEN) +# define DL_MIN_LEN 3 +#endif +#if !defined(DL_SHIFT) +# define DL_SHIFT ((DL_BITS + (DL_MIN_LEN - 1)) / DL_MIN_LEN) +#endif + + + +/*********************************************************************** +// dictionary access +************************************************************************/ + +#define LZO_HASH_GZIP 1 +#define LZO_HASH_GZIP_INCREMENTAL 2 +#define LZO_HASH_LZO_INCREMENTAL_A 3 +#define LZO_HASH_LZO_INCREMENTAL_B 4 + +#if !defined(LZO_HASH) +# error "choose a hashing strategy" +#endif + +#undef DM +#undef DX + +#if (DL_MIN_LEN == 3) +# define _DV2_A(p,shift1,shift2) \ + (((( (lzo_xint)((p)[0]) << shift1) ^ (p)[1]) << shift2) ^ (p)[2]) +# define _DV2_B(p,shift1,shift2) \ + (((( (lzo_xint)((p)[2]) << shift1) ^ (p)[1]) << shift2) ^ (p)[0]) +# define _DV3_B(p,shift1,shift2,shift3) \ + ((_DV2_B((p)+1,shift1,shift2) << (shift3)) ^ (p)[0]) +#elif (DL_MIN_LEN == 2) +# define _DV2_A(p,shift1,shift2) \ + (( (lzo_xint)(p[0]) << shift1) ^ p[1]) +# define _DV2_B(p,shift1,shift2) \ + (( (lzo_xint)(p[1]) << shift1) ^ p[2]) +#else +# error "invalid DL_MIN_LEN" +#endif +#define _DV_A(p,shift) _DV2_A(p,shift,shift) +#define _DV_B(p,shift) _DV2_B(p,shift,shift) +#define DA2(p,s1,s2) \ + (((((lzo_xint)((p)[2]) << (s2)) + (p)[1]) << (s1)) + (p)[0]) +#define DS2(p,s1,s2) \ + (((((lzo_xint)((p)[2]) << (s2)) - (p)[1]) << (s1)) - (p)[0]) +#define DX2(p,s1,s2) \ + (((((lzo_xint)((p)[2]) << (s2)) ^ (p)[1]) << (s1)) ^ (p)[0]) +#define DA3(p,s1,s2,s3) ((DA2((p)+1,s2,s3) << (s1)) + (p)[0]) +#define DS3(p,s1,s2,s3) ((DS2((p)+1,s2,s3) << (s1)) - (p)[0]) +#define DX3(p,s1,s2,s3) ((DX2((p)+1,s2,s3) << (s1)) ^ (p)[0]) +#define DMS(v,s) ((lzo_uint) (((v) & (D_MASK >> (s))) << (s))) +#define DM(v) DMS(v,0) + + +#if (LZO_HASH == LZO_HASH_GZIP) + /* hash function like in gzip/zlib (deflate) */ +# define _DINDEX(dv,p) (_DV_A((p),DL_SHIFT)) + +#elif (LZO_HASH == LZO_HASH_GZIP_INCREMENTAL) + /* incremental hash like in gzip/zlib (deflate) */ +# define __LZO_HASH_INCREMENTAL 1 +# define DVAL_FIRST(dv,p) dv = _DV_A((p),DL_SHIFT) +# define DVAL_NEXT(dv,p) dv = (((dv) << DL_SHIFT) ^ p[2]) +# define _DINDEX(dv,p) (dv) +# define DVAL_LOOKAHEAD DL_MIN_LEN + +#elif (LZO_HASH == LZO_HASH_LZO_INCREMENTAL_A) + /* incremental LZO hash version A */ +# define __LZO_HASH_INCREMENTAL 1 +# define DVAL_FIRST(dv,p) dv = _DV_A((p),5) +# define DVAL_NEXT(dv,p) \ + dv ^= (lzo_xint)(p[-1]) << (2*5); dv = (((dv) << 5) ^ p[2]) +# define _DINDEX(dv,p) ((DMUL(0x9f5f,dv)) >> 5) +# define DVAL_LOOKAHEAD DL_MIN_LEN + +#elif (LZO_HASH == LZO_HASH_LZO_INCREMENTAL_B) + /* incremental LZO hash version B */ +# define __LZO_HASH_INCREMENTAL 1 +# define DVAL_FIRST(dv,p) dv = _DV_B((p),5) +# define DVAL_NEXT(dv,p) \ + dv ^= p[-1]; dv = (((dv) >> 5) ^ ((lzo_xint)(p[2]) << (2*5))) +# define _DINDEX(dv,p) ((DMUL(0x9f5f,dv)) >> 5) +# define DVAL_LOOKAHEAD DL_MIN_LEN + +#else +# error "choose a hashing strategy" +#endif + + +#ifndef DINDEX +#define DINDEX(dv,p) ((lzo_uint)((_DINDEX(dv,p)) & DL_MASK) << DD_BITS) +#endif +#if !defined(DINDEX1) && defined(D_INDEX1) +#define DINDEX1 D_INDEX1 +#endif +#if !defined(DINDEX2) && defined(D_INDEX2) +#define DINDEX2 D_INDEX2 +#endif + + + +#if !defined(__LZO_HASH_INCREMENTAL) +# define DVAL_FIRST(dv,p) ((void) 0) +# define DVAL_NEXT(dv,p) ((void) 0) +# define DVAL_LOOKAHEAD 0 +#endif + + +#if !defined(DVAL_ASSERT) +#if defined(__LZO_HASH_INCREMENTAL) && !defined(NDEBUG) +#if 1 && (LZO_CC_ARMCC_GNUC || LZO_CC_CLANG || (LZO_CC_GNUC >= 0x020700ul) || LZO_CC_INTELC_GNUC || LZO_CC_LLVM || LZO_CC_PATHSCALE || LZO_CC_PGI) +static void __attribute__((__unused__)) +#else +static void +#endif +DVAL_ASSERT(lzo_xint dv, const lzo_bytep p) +{ + lzo_xint df; + DVAL_FIRST(df,(p)); + assert(DINDEX(dv,p) == DINDEX(df,p)); +} +#else +# define DVAL_ASSERT(dv,p) ((void) 0) +#endif +#endif + + + +/*********************************************************************** +// dictionary updating +************************************************************************/ + +#if (LZO_DICT_USE_PTR) +# define DENTRY(p,in) (p) +# define GINDEX(m_pos,m_off,dict,dindex,in) m_pos = dict[dindex] +#else +# define DENTRY(p,in) ((lzo_dict_t) pd(p, in)) +# define GINDEX(m_pos,m_off,dict,dindex,in) m_off = dict[dindex] +#endif + + +#if (DD_BITS == 0) + +# define UPDATE_D(dict,drun,dv,p,in) dict[ DINDEX(dv,p) ] = DENTRY(p,in) +# define UPDATE_I(dict,drun,index,p,in) dict[index] = DENTRY(p,in) +# define UPDATE_P(ptr,drun,p,in) (ptr)[0] = DENTRY(p,in) + +#else + +# define UPDATE_D(dict,drun,dv,p,in) \ + dict[ DINDEX(dv,p) + drun++ ] = DENTRY(p,in); drun &= DD_MASK +# define UPDATE_I(dict,drun,index,p,in) \ + dict[ (index) + drun++ ] = DENTRY(p,in); drun &= DD_MASK +# define UPDATE_P(ptr,drun,p,in) \ + (ptr) [ drun++ ] = DENTRY(p,in); drun &= DD_MASK + +#endif + + +/*********************************************************************** +// test for a match +************************************************************************/ + +#if (LZO_DICT_USE_PTR) + +/* m_pos is either NULL or a valid pointer */ +#define LZO_CHECK_MPOS_DET(m_pos,m_off,in,ip,max_offset) \ + (m_pos == NULL || (m_off = pd(ip, m_pos)) > max_offset) + +/* m_pos may point anywhere... */ +#define LZO_CHECK_MPOS_NON_DET(m_pos,m_off,in,ip,max_offset) \ + (BOUNDS_CHECKING_OFF_IN_EXPR(( \ + m_pos = ip - (lzo_uint) PTR_DIFF(ip,m_pos), \ + PTR_LT(m_pos,in) || \ + (m_off = (lzo_uint) PTR_DIFF(ip,m_pos)) == 0 || \ + m_off > max_offset ))) + +#else + +#define LZO_CHECK_MPOS_DET(m_pos,m_off,in,ip,max_offset) \ + (m_off == 0 || \ + ((m_off = pd(ip, in) - m_off) > max_offset) || \ + (m_pos = (ip) - (m_off), 0) ) + +#define LZO_CHECK_MPOS_NON_DET(m_pos,m_off,in,ip,max_offset) \ + (pd(ip, in) <= m_off || \ + ((m_off = pd(ip, in) - m_off) > max_offset) || \ + (m_pos = (ip) - (m_off), 0) ) + +#endif + + +#if (LZO_DETERMINISTIC) +# define LZO_CHECK_MPOS LZO_CHECK_MPOS_DET +#else +# define LZO_CHECK_MPOS LZO_CHECK_MPOS_NON_DET +#endif + + + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* already included */ + + +/* vim:set ts=4 sw=4 et: */ diff --git a/tools/z64compress/src/enc/lzo/lzo_func.h b/tools/z64compress/src/enc/lzo/lzo_func.h new file mode 100644 index 000000000..f3ac8e344 --- /dev/null +++ b/tools/z64compress/src/enc/lzo/lzo_func.h @@ -0,0 +1,491 @@ +/* lzo_func.h -- functions + + This file is part of the LZO real-time data compression library. + + Copyright (C) 1996-2017 Markus Franz Xaver Johannes Oberhumer + All Rights Reserved. + + The LZO library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + The LZO library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with the LZO library; see the file COPYING. + If not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + + Markus F.X.J. Oberhumer + + http://www.oberhumer.com/opensource/lzo/ + */ + + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the library and is subject + to change. + */ + + +#ifndef __LZO_FUNC_H +#define __LZO_FUNC_H 1 + + +/*********************************************************************** +// bitops +************************************************************************/ + +#if !defined(LZO_BITOPS_USE_ASM_BITSCAN) && !defined(LZO_BITOPS_USE_GNUC_BITSCAN) && !defined(LZO_BITOPS_USE_MSC_BITSCAN) +#if 1 && (LZO_ARCH_AMD64) && (LZO_CC_GNUC && (LZO_CC_GNUC < 0x040000ul)) && (LZO_ASM_SYNTAX_GNUC) +#define LZO_BITOPS_USE_ASM_BITSCAN 1 +#elif (LZO_CC_CLANG || (LZO_CC_GNUC >= 0x030400ul) || (LZO_CC_INTELC_GNUC && (__INTEL_COMPILER >= 1000)) || (LZO_CC_LLVM && (!defined(__llvm_tools_version__) || (__llvm_tools_version__+0 >= 0x010500ul)))) +#define LZO_BITOPS_USE_GNUC_BITSCAN 1 +#elif (LZO_OS_WIN32 || LZO_OS_WIN64) && ((LZO_CC_INTELC_MSC && (__INTEL_COMPILER >= 1010)) || (LZO_CC_MSC && (_MSC_VER >= 1400))) +#define LZO_BITOPS_USE_MSC_BITSCAN 1 +#if (LZO_CC_MSC) && (LZO_ARCH_AMD64 || LZO_ARCH_I386) +#include +#endif +#if (LZO_CC_MSC) && (LZO_ARCH_AMD64 || LZO_ARCH_I386) +#pragma intrinsic(_BitScanReverse) +#pragma intrinsic(_BitScanForward) +#endif +#if (LZO_CC_MSC) && (LZO_ARCH_AMD64) +#pragma intrinsic(_BitScanReverse64) +#pragma intrinsic(_BitScanForward64) +#endif +#endif +#endif + +__lzo_static_forceinline unsigned lzo_bitops_ctlz32_func(lzo_uint32_t v) +{ +#if (LZO_BITOPS_USE_MSC_BITSCAN) && (LZO_ARCH_AMD64 || LZO_ARCH_I386) + unsigned long r; (void) _BitScanReverse(&r, v); return (unsigned) r ^ 31; +#define lzo_bitops_ctlz32(v) lzo_bitops_ctlz32_func(v) +#elif (LZO_BITOPS_USE_ASM_BITSCAN) && (LZO_ARCH_AMD64 || LZO_ARCH_I386) && (LZO_ASM_SYNTAX_GNUC) + lzo_uint32_t r; + __asm__("bsr %1,%0" : "=r" (r) : "rm" (v) __LZO_ASM_CLOBBER_LIST_CC); + return (unsigned) r ^ 31; +#define lzo_bitops_ctlz32(v) lzo_bitops_ctlz32_func(v) +#elif (LZO_BITOPS_USE_GNUC_BITSCAN) && (LZO_SIZEOF_INT == 4) + unsigned r; r = (unsigned) __builtin_clz(v); return r; +#define lzo_bitops_ctlz32(v) ((unsigned) __builtin_clz(v)) +#elif (LZO_BITOPS_USE_GNUC_BITSCAN) && (LZO_SIZEOF_LONG == 8) && (LZO_WORDSIZE >= 8) + unsigned r; r = (unsigned) __builtin_clzl(v); return r ^ 32; +#define lzo_bitops_ctlz32(v) (((unsigned) __builtin_clzl(v)) ^ 32) +#else + LZO_UNUSED(v); return 0; +#endif +} + +#if defined(lzo_uint64_t) +__lzo_static_forceinline unsigned lzo_bitops_ctlz64_func(lzo_uint64_t v) +{ +#if (LZO_BITOPS_USE_MSC_BITSCAN) && (LZO_ARCH_AMD64) + unsigned long r; (void) _BitScanReverse64(&r, v); return (unsigned) r ^ 63; +#define lzo_bitops_ctlz64(v) lzo_bitops_ctlz64_func(v) +#elif (LZO_BITOPS_USE_ASM_BITSCAN) && (LZO_ARCH_AMD64) && (LZO_ASM_SYNTAX_GNUC) + lzo_uint64_t r; + __asm__("bsr %1,%0" : "=r" (r) : "rm" (v) __LZO_ASM_CLOBBER_LIST_CC); + return (unsigned) r ^ 63; +#define lzo_bitops_ctlz64(v) lzo_bitops_ctlz64_func(v) +#elif (LZO_BITOPS_USE_GNUC_BITSCAN) && (LZO_SIZEOF_LONG == 8) && (LZO_WORDSIZE >= 8) + unsigned r; r = (unsigned) __builtin_clzl(v); return r; +#define lzo_bitops_ctlz64(v) ((unsigned) __builtin_clzl(v)) +#elif (LZO_BITOPS_USE_GNUC_BITSCAN) && (LZO_SIZEOF_LONG_LONG == 8) && (LZO_WORDSIZE >= 8) + unsigned r; r = (unsigned) __builtin_clzll(v); return r; +#define lzo_bitops_ctlz64(v) ((unsigned) __builtin_clzll(v)) +#else + LZO_UNUSED(v); return 0; +#endif +} +#endif + +__lzo_static_forceinline unsigned lzo_bitops_cttz32_func(lzo_uint32_t v) +{ +#if (LZO_BITOPS_USE_MSC_BITSCAN) && (LZO_ARCH_AMD64 || LZO_ARCH_I386) + unsigned long r; (void) _BitScanForward(&r, v); return (unsigned) r; +#define lzo_bitops_cttz32(v) lzo_bitops_cttz32_func(v) +#elif (LZO_BITOPS_USE_ASM_BITSCAN) && (LZO_ARCH_AMD64 || LZO_ARCH_I386) && (LZO_ASM_SYNTAX_GNUC) + lzo_uint32_t r; + __asm__("bsf %1,%0" : "=r" (r) : "rm" (v) __LZO_ASM_CLOBBER_LIST_CC); + return (unsigned) r; +#define lzo_bitops_cttz32(v) lzo_bitops_cttz32_func(v) +#elif (LZO_BITOPS_USE_GNUC_BITSCAN) && (LZO_SIZEOF_INT >= 4) + unsigned r; r = (unsigned) __builtin_ctz(v); return r; +#define lzo_bitops_cttz32(v) ((unsigned) __builtin_ctz(v)) +#else + LZO_UNUSED(v); return 0; +#endif +} + +#if defined(lzo_uint64_t) +__lzo_static_forceinline unsigned lzo_bitops_cttz64_func(lzo_uint64_t v) +{ +#if (LZO_BITOPS_USE_MSC_BITSCAN) && (LZO_ARCH_AMD64) + unsigned long r; (void) _BitScanForward64(&r, v); return (unsigned) r; +#define lzo_bitops_cttz64(v) lzo_bitops_cttz64_func(v) +#elif (LZO_BITOPS_USE_ASM_BITSCAN) && (LZO_ARCH_AMD64) && (LZO_ASM_SYNTAX_GNUC) + lzo_uint64_t r; + __asm__("bsf %1,%0" : "=r" (r) : "rm" (v) __LZO_ASM_CLOBBER_LIST_CC); + return (unsigned) r; +#define lzo_bitops_cttz64(v) lzo_bitops_cttz64_func(v) +#elif (LZO_BITOPS_USE_GNUC_BITSCAN) && (LZO_SIZEOF_LONG >= 8) && (LZO_WORDSIZE >= 8) + unsigned r; r = (unsigned) __builtin_ctzl(v); return r; +#define lzo_bitops_cttz64(v) ((unsigned) __builtin_ctzl(v)) +#elif (LZO_BITOPS_USE_GNUC_BITSCAN) && (LZO_SIZEOF_LONG_LONG >= 8) && (LZO_WORDSIZE >= 8) + unsigned r; r = (unsigned) __builtin_ctzll(v); return r; +#define lzo_bitops_cttz64(v) ((unsigned) __builtin_ctzll(v)) +#else + LZO_UNUSED(v); return 0; +#endif +} +#endif + +lzo_unused_funcs_impl(void, lzo_bitops_unused_funcs)(void) +{ + LZO_UNUSED_FUNC(lzo_bitops_unused_funcs); + LZO_UNUSED_FUNC(lzo_bitops_ctlz32_func); + LZO_UNUSED_FUNC(lzo_bitops_cttz32_func); +#if defined(lzo_uint64_t) + LZO_UNUSED_FUNC(lzo_bitops_ctlz64_func); + LZO_UNUSED_FUNC(lzo_bitops_cttz64_func); +#endif +} + + +/*********************************************************************** +// memops +************************************************************************/ + +#if defined(__lzo_alignof) && !(LZO_CFG_NO_UNALIGNED) +/* CBUG: disabled because of gcc bug 64516 */ +#if !defined(lzo_memops_tcheck__) && 0 +#define lzo_memops_tcheck__(t,a,b) ((void)0, sizeof(t) == (a) && __lzo_alignof(t) == (b)) +#endif +#endif +#ifndef lzo_memops_TU0p +#define lzo_memops_TU0p void __LZO_MMODEL * +#endif +#ifndef lzo_memops_TU1p +#define lzo_memops_TU1p unsigned char __LZO_MMODEL * +#endif +#ifndef lzo_memops_TU2p +#if (LZO_OPT_UNALIGNED16) +typedef lzo_uint16_t __lzo_may_alias lzo_memops_TU2; +#define lzo_memops_TU2p volatile lzo_memops_TU2 * +#elif defined(__lzo_byte_struct) +__lzo_byte_struct(lzo_memops_TU2_struct,2) +typedef struct lzo_memops_TU2_struct lzo_memops_TU2; +#else +struct lzo_memops_TU2_struct { unsigned char a[2]; } __lzo_may_alias; +typedef struct lzo_memops_TU2_struct lzo_memops_TU2; +#endif +#ifndef lzo_memops_TU2p +#define lzo_memops_TU2p lzo_memops_TU2 * +#endif +#endif +#ifndef lzo_memops_TU4p +#if (LZO_OPT_UNALIGNED32) +typedef lzo_uint32_t __lzo_may_alias lzo_memops_TU4; +#define lzo_memops_TU4p volatile lzo_memops_TU4 __LZO_MMODEL * +#elif defined(__lzo_byte_struct) +__lzo_byte_struct(lzo_memops_TU4_struct,4) +typedef struct lzo_memops_TU4_struct lzo_memops_TU4; +#else +struct lzo_memops_TU4_struct { unsigned char a[4]; } __lzo_may_alias; +typedef struct lzo_memops_TU4_struct lzo_memops_TU4; +#endif +#ifndef lzo_memops_TU4p +#define lzo_memops_TU4p lzo_memops_TU4 __LZO_MMODEL * +#endif +#endif +#ifndef lzo_memops_TU8p +#if (LZO_OPT_UNALIGNED64) +typedef lzo_uint64_t __lzo_may_alias lzo_memops_TU8; +#define lzo_memops_TU8p volatile lzo_memops_TU8 __LZO_MMODEL * +#elif defined(__lzo_byte_struct) +__lzo_byte_struct(lzo_memops_TU8_struct,8) +typedef struct lzo_memops_TU8_struct lzo_memops_TU8; +#else +struct lzo_memops_TU8_struct { unsigned char a[8]; } __lzo_may_alias; +typedef struct lzo_memops_TU8_struct lzo_memops_TU8; +#endif +#ifndef lzo_memops_TU8p +#define lzo_memops_TU8p lzo_memops_TU8 __LZO_MMODEL * +#endif +#endif +#ifndef lzo_memops_set_TU1p +#define lzo_memops_set_TU1p volatile lzo_memops_TU1p +#endif +#ifndef lzo_memops_move_TU1p +#define lzo_memops_move_TU1p lzo_memops_TU1p +#endif +#define LZO_MEMOPS_SET1(dd,cc) \ + LZO_BLOCK_BEGIN \ + lzo_memops_set_TU1p d__1 = (lzo_memops_set_TU1p) (lzo_memops_TU0p) (dd); \ + d__1[0] = LZO_BYTE(cc); \ + LZO_BLOCK_END +#define LZO_MEMOPS_SET2(dd,cc) \ + LZO_BLOCK_BEGIN \ + lzo_memops_set_TU1p d__2 = (lzo_memops_set_TU1p) (lzo_memops_TU0p) (dd); \ + d__2[0] = LZO_BYTE(cc); d__2[1] = LZO_BYTE(cc); \ + LZO_BLOCK_END +#define LZO_MEMOPS_SET3(dd,cc) \ + LZO_BLOCK_BEGIN \ + lzo_memops_set_TU1p d__3 = (lzo_memops_set_TU1p) (lzo_memops_TU0p) (dd); \ + d__3[0] = LZO_BYTE(cc); d__3[1] = LZO_BYTE(cc); d__3[2] = LZO_BYTE(cc); \ + LZO_BLOCK_END +#define LZO_MEMOPS_SET4(dd,cc) \ + LZO_BLOCK_BEGIN \ + lzo_memops_set_TU1p d__4 = (lzo_memops_set_TU1p) (lzo_memops_TU0p) (dd); \ + d__4[0] = LZO_BYTE(cc); d__4[1] = LZO_BYTE(cc); d__4[2] = LZO_BYTE(cc); d__4[3] = LZO_BYTE(cc); \ + LZO_BLOCK_END +#define LZO_MEMOPS_MOVE1(dd,ss) \ + LZO_BLOCK_BEGIN \ + lzo_memops_move_TU1p d__1 = (lzo_memops_move_TU1p) (lzo_memops_TU0p) (dd); \ + const lzo_memops_move_TU1p s__1 = (const lzo_memops_move_TU1p) (const lzo_memops_TU0p) (ss); \ + d__1[0] = s__1[0]; \ + LZO_BLOCK_END +#define LZO_MEMOPS_MOVE2(dd,ss) \ + LZO_BLOCK_BEGIN \ + lzo_memops_move_TU1p d__2 = (lzo_memops_move_TU1p) (lzo_memops_TU0p) (dd); \ + const lzo_memops_move_TU1p s__2 = (const lzo_memops_move_TU1p) (const lzo_memops_TU0p) (ss); \ + d__2[0] = s__2[0]; d__2[1] = s__2[1]; \ + LZO_BLOCK_END +#define LZO_MEMOPS_MOVE3(dd,ss) \ + LZO_BLOCK_BEGIN \ + lzo_memops_move_TU1p d__3 = (lzo_memops_move_TU1p) (lzo_memops_TU0p) (dd); \ + const lzo_memops_move_TU1p s__3 = (const lzo_memops_move_TU1p) (const lzo_memops_TU0p) (ss); \ + d__3[0] = s__3[0]; d__3[1] = s__3[1]; d__3[2] = s__3[2]; \ + LZO_BLOCK_END +#define LZO_MEMOPS_MOVE4(dd,ss) \ + LZO_BLOCK_BEGIN \ + lzo_memops_move_TU1p d__4 = (lzo_memops_move_TU1p) (lzo_memops_TU0p) (dd); \ + const lzo_memops_move_TU1p s__4 = (const lzo_memops_move_TU1p) (const lzo_memops_TU0p) (ss); \ + d__4[0] = s__4[0]; d__4[1] = s__4[1]; d__4[2] = s__4[2]; d__4[3] = s__4[3]; \ + LZO_BLOCK_END +#define LZO_MEMOPS_MOVE8(dd,ss) \ + LZO_BLOCK_BEGIN \ + lzo_memops_move_TU1p d__8 = (lzo_memops_move_TU1p) (lzo_memops_TU0p) (dd); \ + const lzo_memops_move_TU1p s__8 = (const lzo_memops_move_TU1p) (const lzo_memops_TU0p) (ss); \ + d__8[0] = s__8[0]; d__8[1] = s__8[1]; d__8[2] = s__8[2]; d__8[3] = s__8[3]; \ + d__8[4] = s__8[4]; d__8[5] = s__8[5]; d__8[6] = s__8[6]; d__8[7] = s__8[7]; \ + LZO_BLOCK_END +LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(*(lzo_memops_TU1p)0)==1) +#define LZO_MEMOPS_COPY1(dd,ss) LZO_MEMOPS_MOVE1(dd,ss) +#if (LZO_OPT_UNALIGNED16) +LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(*(lzo_memops_TU2p)0)==2) +#define LZO_MEMOPS_COPY2(dd,ss) \ + * (lzo_memops_TU2p) (lzo_memops_TU0p) (dd) = * (const lzo_memops_TU2p) (const lzo_memops_TU0p) (ss) +#elif defined(lzo_memops_tcheck__) +#define LZO_MEMOPS_COPY2(dd,ss) \ + LZO_BLOCK_BEGIN if (lzo_memops_tcheck__(lzo_memops_TU2,2,1)) { \ + * (lzo_memops_TU2p) (lzo_memops_TU0p) (dd) = * (const lzo_memops_TU2p) (const lzo_memops_TU0p) (ss); \ + } else { LZO_MEMOPS_MOVE2(dd,ss); } LZO_BLOCK_END +#else +#define LZO_MEMOPS_COPY2(dd,ss) LZO_MEMOPS_MOVE2(dd,ss) +#endif +#if (LZO_OPT_UNALIGNED32) +LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(*(lzo_memops_TU4p)0)==4) +#define LZO_MEMOPS_COPY4(dd,ss) \ + * (lzo_memops_TU4p) (lzo_memops_TU0p) (dd) = * (const lzo_memops_TU4p) (const lzo_memops_TU0p) (ss) +#elif defined(lzo_memops_tcheck__) +#define LZO_MEMOPS_COPY4(dd,ss) \ + LZO_BLOCK_BEGIN if (lzo_memops_tcheck__(lzo_memops_TU4,4,1)) { \ + * (lzo_memops_TU4p) (lzo_memops_TU0p) (dd) = * (const lzo_memops_TU4p) (const lzo_memops_TU0p) (ss); \ + } else { LZO_MEMOPS_MOVE4(dd,ss); } LZO_BLOCK_END +#else +#define LZO_MEMOPS_COPY4(dd,ss) LZO_MEMOPS_MOVE4(dd,ss) +#endif +#if (LZO_WORDSIZE != 8) +#define LZO_MEMOPS_COPY8(dd,ss) \ + LZO_BLOCK_BEGIN LZO_MEMOPS_COPY4(dd,ss); LZO_MEMOPS_COPY4((lzo_memops_TU1p)(lzo_memops_TU0p)(dd)+4,(const lzo_memops_TU1p)(const lzo_memops_TU0p)(ss)+4); LZO_BLOCK_END +#else +#if (LZO_OPT_UNALIGNED64) +LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(*(lzo_memops_TU8p)0)==8) +#define LZO_MEMOPS_COPY8(dd,ss) \ + * (lzo_memops_TU8p) (lzo_memops_TU0p) (dd) = * (const lzo_memops_TU8p) (const lzo_memops_TU0p) (ss) +#elif (LZO_OPT_UNALIGNED32) +#define LZO_MEMOPS_COPY8(dd,ss) \ + LZO_BLOCK_BEGIN LZO_MEMOPS_COPY4(dd,ss); LZO_MEMOPS_COPY4((lzo_memops_TU1p)(lzo_memops_TU0p)(dd)+4,(const lzo_memops_TU1p)(const lzo_memops_TU0p)(ss)+4); LZO_BLOCK_END +#elif defined(lzo_memops_tcheck__) +#define LZO_MEMOPS_COPY8(dd,ss) \ + LZO_BLOCK_BEGIN if (lzo_memops_tcheck__(lzo_memops_TU8,8,1)) { \ + * (lzo_memops_TU8p) (lzo_memops_TU0p) (dd) = * (const lzo_memops_TU8p) (const lzo_memops_TU0p) (ss); \ + } else { LZO_MEMOPS_MOVE8(dd,ss); } LZO_BLOCK_END +#else +#define LZO_MEMOPS_COPY8(dd,ss) LZO_MEMOPS_MOVE8(dd,ss) +#endif +#endif +#define LZO_MEMOPS_COPYN(dd,ss,nn) \ + LZO_BLOCK_BEGIN \ + lzo_memops_TU1p d__n = (lzo_memops_TU1p) (lzo_memops_TU0p) (dd); \ + const lzo_memops_TU1p s__n = (const lzo_memops_TU1p) (const lzo_memops_TU0p) (ss); \ + lzo_uint n__n = (nn); \ + while ((void)0, n__n >= 8) { LZO_MEMOPS_COPY8(d__n, s__n); d__n += 8; s__n += 8; n__n -= 8; } \ + if ((void)0, n__n >= 4) { LZO_MEMOPS_COPY4(d__n, s__n); d__n += 4; s__n += 4; n__n -= 4; } \ + if ((void)0, n__n > 0) do { *d__n++ = *s__n++; } while (--n__n > 0); \ + LZO_BLOCK_END + +__lzo_static_forceinline lzo_uint16_t lzo_memops_get_le16(const lzo_voidp ss) +{ + lzo_uint16_t v; +#if (LZO_ABI_LITTLE_ENDIAN) + LZO_MEMOPS_COPY2(&v, ss); +#elif (LZO_OPT_UNALIGNED16 && LZO_ARCH_POWERPC && LZO_ABI_BIG_ENDIAN) && (LZO_ASM_SYNTAX_GNUC) + const lzo_memops_TU2p s = (const lzo_memops_TU2p) ss; + unsigned long vv; + __asm__("lhbrx %0,0,%1" : "=r" (vv) : "r" (s), "m" (*s)); + v = (lzo_uint16_t) vv; +#else + const lzo_memops_TU1p s = (const lzo_memops_TU1p) ss; + v = (lzo_uint16_t) (((lzo_uint16_t)s[0]) | ((lzo_uint16_t)s[1] << 8)); +#endif + return v; +} +#if (LZO_OPT_UNALIGNED16) && (LZO_ABI_LITTLE_ENDIAN) +#define LZO_MEMOPS_GET_LE16(ss) (* (const lzo_memops_TU2p) (const lzo_memops_TU0p) (ss)) +#else +#define LZO_MEMOPS_GET_LE16(ss) lzo_memops_get_le16(ss) +#endif + +__lzo_static_forceinline lzo_uint32_t lzo_memops_get_le32(const lzo_voidp ss) +{ + lzo_uint32_t v; +#if (LZO_ABI_LITTLE_ENDIAN) + LZO_MEMOPS_COPY4(&v, ss); +#elif (LZO_OPT_UNALIGNED32 && LZO_ARCH_POWERPC && LZO_ABI_BIG_ENDIAN) && (LZO_ASM_SYNTAX_GNUC) + const lzo_memops_TU4p s = (const lzo_memops_TU4p) ss; + unsigned long vv; + __asm__("lwbrx %0,0,%1" : "=r" (vv) : "r" (s), "m" (*s)); + v = (lzo_uint32_t) vv; +#else + const lzo_memops_TU1p s = (const lzo_memops_TU1p) ss; + v = (lzo_uint32_t) (((lzo_uint32_t)s[0]) | ((lzo_uint32_t)s[1] << 8) | ((lzo_uint32_t)s[2] << 16) | ((lzo_uint32_t)s[3] << 24)); +#endif + return v; +} +#if (LZO_OPT_UNALIGNED32) && (LZO_ABI_LITTLE_ENDIAN) +#define LZO_MEMOPS_GET_LE32(ss) (* (const lzo_memops_TU4p) (const lzo_memops_TU0p) (ss)) +#else +#define LZO_MEMOPS_GET_LE32(ss) lzo_memops_get_le32(ss) +#endif + +#if (LZO_OPT_UNALIGNED64) && (LZO_ABI_LITTLE_ENDIAN) +#define LZO_MEMOPS_GET_LE64(ss) (* (const lzo_memops_TU8p) (const lzo_memops_TU0p) (ss)) +#endif + +__lzo_static_forceinline lzo_uint16_t lzo_memops_get_ne16(const lzo_voidp ss) +{ + lzo_uint16_t v; + LZO_MEMOPS_COPY2(&v, ss); + return v; +} +#if (LZO_OPT_UNALIGNED16) +LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(*(lzo_memops_TU2p)0)==2) +#define LZO_MEMOPS_GET_NE16(ss) (* (const lzo_memops_TU2p) (const lzo_memops_TU0p) (ss)) +#else +#define LZO_MEMOPS_GET_NE16(ss) lzo_memops_get_ne16(ss) +#endif + +__lzo_static_forceinline lzo_uint32_t lzo_memops_get_ne32(const lzo_voidp ss) +{ + lzo_uint32_t v; + LZO_MEMOPS_COPY4(&v, ss); + return v; +} +#if (LZO_OPT_UNALIGNED32) +LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(*(lzo_memops_TU4p)0)==4) +#define LZO_MEMOPS_GET_NE32(ss) (* (const lzo_memops_TU4p) (const lzo_memops_TU0p) (ss)) +#else +#define LZO_MEMOPS_GET_NE32(ss) lzo_memops_get_ne32(ss) +#endif + +#if (LZO_OPT_UNALIGNED64) +LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(*(lzo_memops_TU8p)0)==8) +#define LZO_MEMOPS_GET_NE64(ss) (* (const lzo_memops_TU8p) (const lzo_memops_TU0p) (ss)) +#endif + +__lzo_static_forceinline void lzo_memops_put_le16(lzo_voidp dd, lzo_uint16_t vv) +{ +#if (LZO_ABI_LITTLE_ENDIAN) + LZO_MEMOPS_COPY2(dd, &vv); +#elif (LZO_OPT_UNALIGNED16 && LZO_ARCH_POWERPC && LZO_ABI_BIG_ENDIAN) && (LZO_ASM_SYNTAX_GNUC) + lzo_memops_TU2p d = (lzo_memops_TU2p) dd; + unsigned long v = vv; + __asm__("sthbrx %2,0,%1" : "=m" (*d) : "r" (d), "r" (v)); +#else + lzo_memops_TU1p d = (lzo_memops_TU1p) dd; + d[0] = LZO_BYTE((vv ) & 0xff); + d[1] = LZO_BYTE((vv >> 8) & 0xff); +#endif +} +#if (LZO_OPT_UNALIGNED16) && (LZO_ABI_LITTLE_ENDIAN) +#define LZO_MEMOPS_PUT_LE16(dd,vv) (* (lzo_memops_TU2p) (lzo_memops_TU0p) (dd) = (vv)) +#else +#define LZO_MEMOPS_PUT_LE16(dd,vv) lzo_memops_put_le16(dd,vv) +#endif + +__lzo_static_forceinline void lzo_memops_put_le32(lzo_voidp dd, lzo_uint32_t vv) +{ +#if (LZO_ABI_LITTLE_ENDIAN) + LZO_MEMOPS_COPY4(dd, &vv); +#elif (LZO_OPT_UNALIGNED32 && LZO_ARCH_POWERPC && LZO_ABI_BIG_ENDIAN) && (LZO_ASM_SYNTAX_GNUC) + lzo_memops_TU4p d = (lzo_memops_TU4p) dd; + unsigned long v = vv; + __asm__("stwbrx %2,0,%1" : "=m" (*d) : "r" (d), "r" (v)); +#else + lzo_memops_TU1p d = (lzo_memops_TU1p) dd; + d[0] = LZO_BYTE((vv ) & 0xff); + d[1] = LZO_BYTE((vv >> 8) & 0xff); + d[2] = LZO_BYTE((vv >> 16) & 0xff); + d[3] = LZO_BYTE((vv >> 24) & 0xff); +#endif +} +#if (LZO_OPT_UNALIGNED32) && (LZO_ABI_LITTLE_ENDIAN) +#define LZO_MEMOPS_PUT_LE32(dd,vv) (* (lzo_memops_TU4p) (lzo_memops_TU0p) (dd) = (vv)) +#else +#define LZO_MEMOPS_PUT_LE32(dd,vv) lzo_memops_put_le32(dd,vv) +#endif + +__lzo_static_forceinline void lzo_memops_put_ne16(lzo_voidp dd, lzo_uint16_t vv) +{ + LZO_MEMOPS_COPY2(dd, &vv); +} +#if (LZO_OPT_UNALIGNED16) +#define LZO_MEMOPS_PUT_NE16(dd,vv) (* (lzo_memops_TU2p) (lzo_memops_TU0p) (dd) = (vv)) +#else +#define LZO_MEMOPS_PUT_NE16(dd,vv) lzo_memops_put_ne16(dd,vv) +#endif + +__lzo_static_forceinline void lzo_memops_put_ne32(lzo_voidp dd, lzo_uint32_t vv) +{ + LZO_MEMOPS_COPY4(dd, &vv); +} +#if (LZO_OPT_UNALIGNED32) +#define LZO_MEMOPS_PUT_NE32(dd,vv) (* (lzo_memops_TU4p) (lzo_memops_TU0p) (dd) = (vv)) +#else +#define LZO_MEMOPS_PUT_NE32(dd,vv) lzo_memops_put_ne32(dd,vv) +#endif + +lzo_unused_funcs_impl(void, lzo_memops_unused_funcs)(void) +{ + LZO_UNUSED_FUNC(lzo_memops_unused_funcs); + LZO_UNUSED_FUNC(lzo_memops_get_le16); + LZO_UNUSED_FUNC(lzo_memops_get_le32); + LZO_UNUSED_FUNC(lzo_memops_get_ne16); + LZO_UNUSED_FUNC(lzo_memops_get_ne32); + LZO_UNUSED_FUNC(lzo_memops_put_le16); + LZO_UNUSED_FUNC(lzo_memops_put_le32); + LZO_UNUSED_FUNC(lzo_memops_put_ne16); + LZO_UNUSED_FUNC(lzo_memops_put_ne32); +} + +#endif /* already included */ + +/* vim:set ts=4 sw=4 et: */ diff --git a/tools/z64compress/src/enc/lzo/lzo_mchw.ch b/tools/z64compress/src/enc/lzo/lzo_mchw.ch new file mode 100644 index 000000000..94bedc3d9 --- /dev/null +++ b/tools/z64compress/src/enc/lzo/lzo_mchw.ch @@ -0,0 +1,222 @@ +/* lzo_mchw.ch -- matching functions using a window + + This file is part of the LZO real-time data compression library. + + Copyright (C) 1996-2017 Markus Franz Xaver Johannes Oberhumer + All Rights Reserved. + + The LZO library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + The LZO library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with the LZO library; see the file COPYING. + If not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + + Markus F.X.J. Oberhumer + + http://www.oberhumer.com/opensource/lzo/ + */ + + +/*********************************************************************** +// +************************************************************************/ + +typedef struct +{ + unsigned init; + + lzo_uint look; /* bytes in lookahead buffer */ + + lzo_uint m_len; + lzo_uint m_off; + + lzo_uint last_m_len; + lzo_uint last_m_off; + + const lzo_bytep bp; + const lzo_bytep ip; + const lzo_bytep in; + const lzo_bytep in_end; + lzo_bytep out; + + lzo_callback_p cb; + + lzo_uint textsize; /* text size counter */ + lzo_uint codesize; /* code size counter */ + lzo_uint printcount; /* counter for reporting progress every 1K bytes */ + + /* some stats */ + lzo_uint lit_bytes; + lzo_uint match_bytes; + lzo_uint rep_bytes; + lzo_uint lazy; + +#if defined(LZO1B) + lzo_uint r1_m_len; + + /* some stats */ + lzo_uint r1_r, m3_r, m2_m, m3_m; +#endif + +#if defined(LZO1C) + lzo_uint r1_m_len; + lzo_bytep m3; + + /* some stats */ + lzo_uint r1_r, m3_r, m2_m, m3_m; +#endif + +#if defined(LZO1F) + lzo_uint r1_lit; + lzo_uint r1_m_len; + + /* some stats */ + lzo_uint r1_r, m2_m, m3_m; +#endif + +#if defined(LZO1X) || defined(LZO1Y) || defined(LZO1Z) + lzo_uint r1_lit; + lzo_uint r1_m_len; + + /* some stats */ + lzo_uint m1a_m, m1b_m, m2_m, m3_m, m4_m; + lzo_uint lit1_r, lit2_r, lit3_r; +#endif + +#if defined(LZO2A) + /* some stats */ + lzo_uint m1, m2, m3, m4; +#endif +} +LZO_COMPRESS_T; + + +#define getbyte(c) ((c).ip < (c).in_end ? *((c).ip)++ : (-1)) + +#include "lzo_swd.ch" + + +/*********************************************************************** +// +************************************************************************/ + +static int +init_match ( LZO_COMPRESS_T *c, lzo_swd_p s, + const lzo_bytep dict, lzo_uint dict_len, + lzo_uint32_t flags ) +{ + int r; + + assert(!c->init); + c->init = 1; + + s->c = c; + + c->last_m_len = c->last_m_off = 0; + + c->textsize = c->codesize = c->printcount = 0; + c->lit_bytes = c->match_bytes = c->rep_bytes = 0; + c->lazy = 0; + + r = swd_init(s,dict,dict_len); + if (r != LZO_E_OK) + { + swd_exit(s); + return r; + } + + s->use_best_off = (flags & 1) ? 1 : 0; + return LZO_E_OK; +} + + +/*********************************************************************** +// +************************************************************************/ + +static int +find_match ( LZO_COMPRESS_T *c, lzo_swd_p s, + lzo_uint this_len, lzo_uint skip ) +{ + assert(c->init); + + if (skip > 0) + { + assert(this_len >= skip); + swd_accept(s, this_len - skip); + c->textsize += this_len - skip + 1; + } + else + { + assert(this_len <= 1); + c->textsize += this_len - skip; + } + + s->m_len = SWD_THRESHOLD; + s->m_off = 0; +#ifdef SWD_BEST_OFF + if (s->use_best_off) + lzo_memset(s->best_pos,0,sizeof(s->best_pos)); +#endif + swd_findbest(s); + c->m_len = s->m_len; + c->m_off = s->m_off; + + swd_getbyte(s); + + if (s->b_char < 0) + { + c->look = 0; + c->m_len = 0; + swd_exit(s); + } + else + { + c->look = s->look + 1; + } + c->bp = c->ip - c->look; + +#if 0 + /* brute force match search */ + if (c->m_len > SWD_THRESHOLD && c->m_len + 1 <= c->look) + { + const lzo_bytep ip = c->bp; + const lzo_bytep m = c->bp - c->m_off; + const lzo_bytep in = c->in; + + if (ip - in > s->swd_n) + in = ip - s->swd_n; + for (;;) + { + while (*in != *ip) + in++; + if (in == ip) + break; + if (in != m) + if (lzo_memcmp(in,ip,c->m_len+1) == 0) + printf("%p %p %p %5d\n",in,ip,m,c->m_len); + in++; + } + } +#endif + + if (c->cb && c->cb->nprogress && c->textsize > c->printcount) + { + (*c->cb->nprogress)(c->cb, c->textsize, c->codesize, 0); + c->printcount += 1024; + } + + return LZO_E_OK; +} + + +/* vim:set ts=4 sw=4 et: */ diff --git a/tools/z64compress/src/enc/lzo/lzo_ptr.c b/tools/z64compress/src/enc/lzo/lzo_ptr.c new file mode 100644 index 000000000..d2def7f63 --- /dev/null +++ b/tools/z64compress/src/enc/lzo/lzo_ptr.c @@ -0,0 +1,80 @@ +/* lzo_ptr.c -- low-level pointer constructs + + This file is part of the LZO real-time data compression library. + + Copyright (C) 1996-2017 Markus Franz Xaver Johannes Oberhumer + All Rights Reserved. + + The LZO library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + The LZO library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with the LZO library; see the file COPYING. + If not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + + Markus F.X.J. Oberhumer + + http://www.oberhumer.com/opensource/lzo/ + */ + + +#include "lzo_conf.h" + + +/*********************************************************************** +// +************************************************************************/ + +LZO_PUBLIC(lzo_uintptr_t) +__lzo_ptr_linear(const lzo_voidp ptr) +{ + lzo_uintptr_t p; + +#if (LZO_ARCH_I086) +#error "LZO_ARCH_I086 is unsupported" +#elif (LZO_MM_PVP) +#error "LZO_MM_PVP is unsupported" +#else + p = (lzo_uintptr_t) PTR_LINEAR(ptr); +#endif + + return p; +} + + +/*********************************************************************** +// +************************************************************************/ + +LZO_PUBLIC(unsigned) +__lzo_align_gap(const lzo_voidp ptr, lzo_uint size) +{ +#if (__LZO_UINTPTR_T_IS_POINTER) +#error "__LZO_UINTPTR_T_IS_POINTER is unsupported" +#else + lzo_uintptr_t p, n; + if (size < 2) return 0; + p = __lzo_ptr_linear(ptr); +#if 0 + n = (((p + size - 1) / size) * size) - p; +#else + if ((size & (size - 1)) != 0) + return 0; + n = size; n = ((p + n - 1) & ~(n - 1)) - p; +#endif +#endif + assert((long)n >= 0); + assert(n <= size); + return (unsigned)n; +} + + +/* vim:set ts=4 sw=4 et: */ diff --git a/tools/z64compress/src/enc/lzo/lzo_ptr.h b/tools/z64compress/src/enc/lzo/lzo_ptr.h new file mode 100644 index 000000000..8d7ee4483 --- /dev/null +++ b/tools/z64compress/src/enc/lzo/lzo_ptr.h @@ -0,0 +1,123 @@ +/* lzo_ptr.h -- low-level pointer constructs + + This file is part of the LZO real-time data compression library. + + Copyright (C) 1996-2017 Markus Franz Xaver Johannes Oberhumer + All Rights Reserved. + + The LZO library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + The LZO library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with the LZO library; see the file COPYING. + If not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + + Markus F.X.J. Oberhumer + + http://www.oberhumer.com/opensource/lzo/ + */ + + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the library and is subject + to change. + */ + + +#ifndef __LZO_PTR_H +#define __LZO_PTR_H 1 + +#ifdef __cplusplus +extern "C" { +#endif + + +/*********************************************************************** +// +************************************************************************/ + +/* Always use the safe (=integral) version for pointer-comparisons. + * The compiler should optimize away the additional casts anyway. + * + * Note that this only works if the representation and ordering + * of the pointer and the integral is the same (at bit level). + */ + +#if (LZO_ARCH_I086) +#error "LZO_ARCH_I086 is unsupported" +#elif (LZO_MM_PVP) +#error "LZO_MM_PVP is unsupported" +#else +#define PTR(a) ((lzo_uintptr_t) (a)) +#define PTR_LINEAR(a) PTR(a) +#define PTR_ALIGNED_4(a) ((PTR_LINEAR(a) & 3) == 0) +#define PTR_ALIGNED_8(a) ((PTR_LINEAR(a) & 7) == 0) +#define PTR_ALIGNED2_4(a,b) (((PTR_LINEAR(a) | PTR_LINEAR(b)) & 3) == 0) +#define PTR_ALIGNED2_8(a,b) (((PTR_LINEAR(a) | PTR_LINEAR(b)) & 7) == 0) +#endif + +#define PTR_LT(a,b) (PTR(a) < PTR(b)) +#define PTR_GE(a,b) (PTR(a) >= PTR(b)) +#define PTR_DIFF(a,b) (PTR(a) - PTR(b)) +#define pd(a,b) ((lzo_uint) ((a)-(b))) + + +LZO_EXTERN(lzo_uintptr_t) +__lzo_ptr_linear(const lzo_voidp ptr); + + +typedef union +{ + char a_char; + unsigned char a_uchar; + short a_short; + unsigned short a_ushort; + int a_int; + unsigned int a_uint; + long a_long; + unsigned long a_ulong; + lzo_int a_lzo_int; + lzo_uint a_lzo_uint; + lzo_xint a_lzo_xint; + lzo_int16_t a_lzo_int16_t; + lzo_uint16_t a_lzo_uint16_t; + lzo_int32_t a_lzo_int32_t; + lzo_uint32_t a_lzo_uint32_t; +#if defined(lzo_uint64_t) + lzo_int64_t a_lzo_int64_t; + lzo_uint64_t a_lzo_uint64_t; +#endif + size_t a_size_t; + ptrdiff_t a_ptrdiff_t; + lzo_uintptr_t a_lzo_uintptr_t; + void * a_void_p; + char * a_char_p; + unsigned char * a_uchar_p; + const void * a_c_void_p; + const char * a_c_char_p; + const unsigned char * a_c_uchar_p; + lzo_voidp a_lzo_voidp; + lzo_bytep a_lzo_bytep; + const lzo_voidp a_c_lzo_voidp; + const lzo_bytep a_c_lzo_bytep; +} +lzo_full_align_t; + + + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* already included */ + + +/* vim:set ts=4 sw=4 et: */ diff --git a/tools/z64compress/src/enc/lzo/lzo_supp.h b/tools/z64compress/src/enc/lzo/lzo_supp.h new file mode 100644 index 000000000..a2c90210d --- /dev/null +++ b/tools/z64compress/src/enc/lzo/lzo_supp.h @@ -0,0 +1,3678 @@ +/* lzo_supp.h -- architecture, OS and compiler specific defines + + This file is part of the LZO real-time data compression library. + + Copyright (C) 1996-2017 Markus Franz Xaver Johannes Oberhumer + All Rights Reserved. + + The LZO library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + The LZO library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with the LZO library; see the file COPYING. + If not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + + Markus F.X.J. Oberhumer + + http://www.oberhumer.com/opensource/lzo/ + */ + + +#ifndef __LZO_SUPP_H_INCLUDED +#define __LZO_SUPP_H_INCLUDED 1 +#if (LZO_CFG_NO_CONFIG_HEADER) +#elif defined(LZO_CFG_CONFIG_HEADER) +#else +#if !(LZO_CFG_AUTO_NO_HEADERS) +#if (LZO_LIBC_NAKED) +#elif (LZO_LIBC_FREESTANDING) +# define HAVE_LIMITS_H 1 +# define HAVE_STDARG_H 1 +# define HAVE_STDDEF_H 1 +#elif (LZO_LIBC_MOSTLY_FREESTANDING) +# define HAVE_LIMITS_H 1 +# define HAVE_SETJMP_H 1 +# define HAVE_STDARG_H 1 +# define HAVE_STDDEF_H 1 +# define HAVE_STDIO_H 1 +# define HAVE_STRING_H 1 +#else +#define STDC_HEADERS 1 +#define HAVE_ASSERT_H 1 +#define HAVE_CTYPE_H 1 +#define HAVE_DIRENT_H 1 +#define HAVE_ERRNO_H 1 +#define HAVE_FCNTL_H 1 +#define HAVE_FLOAT_H 1 +#define HAVE_LIMITS_H 1 +#define HAVE_MALLOC_H 1 +#define HAVE_MEMORY_H 1 +#define HAVE_SETJMP_H 1 +#define HAVE_SIGNAL_H 1 +#define HAVE_STDARG_H 1 +#define HAVE_STDDEF_H 1 +#define HAVE_STDIO_H 1 +#define HAVE_STDLIB_H 1 +#define HAVE_STRING_H 1 +#define HAVE_TIME_H 1 +#define HAVE_UNISTD_H 1 +#define HAVE_UTIME_H 1 +#define HAVE_SYS_STAT_H 1 +#define HAVE_SYS_TIME_H 1 +#define HAVE_SYS_TYPES_H 1 +#if (LZO_OS_POSIX) +# if (LZO_OS_POSIX_AIX) +# define HAVE_SYS_RESOURCE_H 1 +# elif (LZO_OS_POSIX_DARWIN || LZO_OS_POSIX_FREEBSD || LZO_OS_POSIX_NETBSD || LZO_OS_POSIX_OPENBSD) +# define HAVE_STRINGS_H 1 +# undef HAVE_MALLOC_H +# elif (LZO_OS_POSIX_HPUX || LZO_OS_POSIX_INTERIX) +# define HAVE_ALLOCA_H 1 +# elif (LZO_OS_POSIX_DARWIN && LZO_LIBC_MSL) +# undef HAVE_SYS_TIME_H +# undef HAVE_SYS_TYPES_H +# elif (LZO_OS_POSIX_SOLARIS || LZO_OS_POSIX_SUNOS) +# define HAVE_ALLOCA_H 1 +# endif +# if (LZO_LIBC_DIETLIBC || LZO_LIBC_GLIBC || LZO_LIBC_UCLIBC) +# define HAVE_STRINGS_H 1 +# define HAVE_SYS_MMAN_H 1 +# define HAVE_SYS_RESOURCE_H 1 +# define HAVE_SYS_WAIT_H 1 +# endif +# if (LZO_LIBC_NEWLIB) +# undef HAVE_STRINGS_H +# endif +#elif (LZO_OS_CYGWIN) +# define HAVE_IO_H 1 +#elif (LZO_OS_EMX) +# define HAVE_ALLOCA_H 1 +# define HAVE_IO_H 1 +#elif (LZO_ARCH_M68K && LZO_OS_TOS && LZO_CC_GNUC) +# if !defined(__MINT__) +# undef HAVE_MALLOC_H +# endif +#elif (LZO_ARCH_M68K && LZO_OS_TOS && (LZO_CC_PUREC || LZO_CC_TURBOC)) +# undef HAVE_DIRENT_H +# undef HAVE_FCNTL_H +# undef HAVE_MALLOC_H +# undef HAVE_MEMORY_H +# undef HAVE_UNISTD_H +# undef HAVE_UTIME_H +# undef HAVE_SYS_STAT_H +# undef HAVE_SYS_TIME_H +# undef HAVE_SYS_TYPES_H +#endif +#if (LZO_OS_DOS16 || LZO_OS_DOS32 || LZO_OS_OS2 || LZO_OS_OS216 || LZO_OS_WIN16 || LZO_OS_WIN32 || LZO_OS_WIN64) +#define HAVE_CONIO_H 1 +#define HAVE_DIRECT_H 1 +#define HAVE_DOS_H 1 +#define HAVE_IO_H 1 +#define HAVE_SHARE_H 1 +#if (LZO_CC_AZTECC) +# undef HAVE_CONIO_H +# undef HAVE_DIRECT_H +# undef HAVE_DIRENT_H +# undef HAVE_MALLOC_H +# undef HAVE_SHARE_H +# undef HAVE_UNISTD_H +# undef HAVE_UTIME_H +# undef HAVE_SYS_STAT_H +# undef HAVE_SYS_TIME_H +# undef HAVE_SYS_TYPES_H +#elif (LZO_CC_BORLANDC) +# undef HAVE_UNISTD_H +# undef HAVE_SYS_TIME_H +# if (LZO_OS_WIN32 || LZO_OS_WIN64) +# undef HAVE_DIRENT_H +# endif +# if (__BORLANDC__ < 0x0400) +# undef HAVE_DIRENT_H +# undef HAVE_UTIME_H +# endif +#elif (LZO_CC_DMC) +# undef HAVE_DIRENT_H +# undef HAVE_UNISTD_H +# define HAVE_SYS_DIRENT_H 1 +#elif (LZO_OS_DOS32 && LZO_CC_GNUC) && defined(__DJGPP__) +#elif (LZO_OS_DOS32 && LZO_CC_HIGHC) +# define HAVE_ALLOCA_H 1 +# undef HAVE_DIRENT_H +# undef HAVE_UNISTD_H +#elif (LZO_CC_IBMC && LZO_OS_OS2) +# undef HAVE_DOS_H +# undef HAVE_DIRENT_H +# undef HAVE_UNISTD_H +# undef HAVE_UTIME_H +# undef HAVE_SYS_TIME_H +# define HAVE_SYS_UTIME_H 1 +#elif (LZO_CC_CLANG_C2 || LZO_CC_CLANG_MSC || LZO_CC_GHS || LZO_CC_INTELC_MSC || LZO_CC_MSC) +# undef HAVE_DIRENT_H +# undef HAVE_UNISTD_H +# undef HAVE_UTIME_H +# undef HAVE_SYS_TIME_H +# define HAVE_SYS_UTIME_H 1 +#elif (LZO_CC_LCCWIN32) +# undef HAVE_DIRENT_H +# undef HAVE_DOS_H +# undef HAVE_UNISTD_H +# undef HAVE_SYS_TIME_H +#elif (LZO_OS_WIN32 && LZO_CC_GNUC) && defined(__MINGW32__) +# undef HAVE_UTIME_H +# define HAVE_SYS_UTIME_H 1 +#elif (LZO_OS_WIN32 && LZO_LIBC_MSL) +# define HAVE_ALLOCA_H 1 +# undef HAVE_DOS_H +# undef HAVE_SHARE_H +# undef HAVE_SYS_TIME_H +#elif (LZO_CC_NDPC) +# undef HAVE_DIRENT_H +# undef HAVE_DOS_H +# undef HAVE_UNISTD_H +# undef HAVE_UTIME_H +# undef HAVE_SYS_TIME_H +#elif (LZO_CC_PACIFICC) +# undef HAVE_DIRECT_H +# undef HAVE_DIRENT_H +# undef HAVE_FCNTL_H +# undef HAVE_IO_H +# undef HAVE_MALLOC_H +# undef HAVE_MEMORY_H +# undef HAVE_SHARE_H +# undef HAVE_UNISTD_H +# undef HAVE_UTIME_H +# undef HAVE_SYS_STAT_H +# undef HAVE_SYS_TIME_H +# undef HAVE_SYS_TYPES_H +#elif (LZO_OS_WIN32 && LZO_CC_PELLESC) +# undef HAVE_DIRENT_H +# undef HAVE_DOS_H +# undef HAVE_MALLOC_H +# undef HAVE_SHARE_H +# undef HAVE_UNISTD_H +# undef HAVE_UTIME_H +# undef HAVE_SYS_TIME_H +# if (__POCC__ < 280) +# else +# define HAVE_SYS_UTIME_H 1 +# endif +#elif (LZO_OS_WIN32 && LZO_CC_PGI) && defined(__MINGW32__) +# undef HAVE_UTIME_H +# define HAVE_SYS_UTIME_H 1 +#elif (LZO_OS_WIN32 && LZO_CC_GNUC) && defined(__PW32__) +#elif (LZO_CC_SYMANTECC) +# undef HAVE_DIRENT_H +# undef HAVE_UNISTD_H +# if (__SC__ < 0x700) +# undef HAVE_UTIME_H +# undef HAVE_SYS_TIME_H +# endif +#elif (LZO_CC_TOPSPEEDC) +# undef HAVE_DIRENT_H +# undef HAVE_UNISTD_H +# undef HAVE_UTIME_H +# undef HAVE_SYS_STAT_H +# undef HAVE_SYS_TIME_H +# undef HAVE_SYS_TYPES_H +#elif (LZO_CC_TURBOC) +# undef HAVE_UNISTD_H +# undef HAVE_SYS_TIME_H +# undef HAVE_SYS_TYPES_H +# if (LZO_OS_WIN32 || LZO_OS_WIN64) +# undef HAVE_DIRENT_H +# endif +# if (__TURBOC__ < 0x0200) +# undef HAVE_SIGNAL_H +# endif +# if (__TURBOC__ < 0x0400) +# undef HAVE_DIRECT_H +# undef HAVE_DIRENT_H +# undef HAVE_MALLOC_H +# undef HAVE_MEMORY_H +# undef HAVE_UTIME_H +# endif +#elif (LZO_CC_WATCOMC) +# undef HAVE_DIRENT_H +# undef HAVE_UTIME_H +# undef HAVE_SYS_TIME_H +# define HAVE_SYS_UTIME_H 1 +# if (__WATCOMC__ < 950) +# undef HAVE_UNISTD_H +# endif +#elif (LZO_CC_ZORTECHC) +# undef HAVE_DIRENT_H +# undef HAVE_MEMORY_H +# undef HAVE_UNISTD_H +# undef HAVE_UTIME_H +# undef HAVE_SYS_TIME_H +#endif +#endif +#if (LZO_OS_CONSOLE) +# undef HAVE_DIRENT_H +#endif +#if (LZO_OS_EMBEDDED) +# undef HAVE_DIRENT_H +#endif +#if (LZO_LIBC_ISOC90 || LZO_LIBC_ISOC99) +# undef HAVE_DIRENT_H +# undef HAVE_FCNTL_H +# undef HAVE_MALLOC_H +# undef HAVE_UNISTD_H +# undef HAVE_UTIME_H +# undef HAVE_SYS_STAT_H +# undef HAVE_SYS_TIME_H +# undef HAVE_SYS_TYPES_H +#endif +#if (LZO_LIBC_GLIBC >= 0x020100ul) +# define HAVE_STDINT_H 1 +#elif (LZO_LIBC_DIETLIBC) +# undef HAVE_STDINT_H +#elif (LZO_LIBC_UCLIBC) +# define HAVE_STDINT_H 1 +#elif (LZO_CC_BORLANDC) && (__BORLANDC__ >= 0x560) +# undef HAVE_STDINT_H +#elif (LZO_CC_DMC) && (__DMC__ >= 0x825) +# define HAVE_STDINT_H 1 +#endif +#if (HAVE_SYS_TIME_H && HAVE_TIME_H) +# define TIME_WITH_SYS_TIME 1 +#endif +#endif +#endif +#if !(LZO_CFG_AUTO_NO_FUNCTIONS) +#if (LZO_LIBC_NAKED) +#elif (LZO_LIBC_FREESTANDING) +#elif (LZO_LIBC_MOSTLY_FREESTANDING) +# define HAVE_LONGJMP 1 +# define HAVE_MEMCMP 1 +# define HAVE_MEMCPY 1 +# define HAVE_MEMMOVE 1 +# define HAVE_MEMSET 1 +# define HAVE_SETJMP 1 +#else +#define HAVE_ACCESS 1 +#define HAVE_ALLOCA 1 +#define HAVE_ATEXIT 1 +#define HAVE_ATOI 1 +#define HAVE_ATOL 1 +#define HAVE_CHMOD 1 +#define HAVE_CHOWN 1 +#define HAVE_CTIME 1 +#define HAVE_DIFFTIME 1 +#define HAVE_FILENO 1 +#define HAVE_FSTAT 1 +#define HAVE_GETENV 1 +#define HAVE_GETTIMEOFDAY 1 +#define HAVE_GMTIME 1 +#define HAVE_ISATTY 1 +#define HAVE_LOCALTIME 1 +#define HAVE_LONGJMP 1 +#define HAVE_LSTAT 1 +#define HAVE_MEMCMP 1 +#define HAVE_MEMCPY 1 +#define HAVE_MEMMOVE 1 +#define HAVE_MEMSET 1 +#define HAVE_MKDIR 1 +#define HAVE_MKTIME 1 +#define HAVE_QSORT 1 +#define HAVE_RAISE 1 +#define HAVE_RMDIR 1 +#define HAVE_SETJMP 1 +#define HAVE_SIGNAL 1 +#define HAVE_SNPRINTF 1 +#define HAVE_STAT 1 +#define HAVE_STRCHR 1 +#define HAVE_STRDUP 1 +#define HAVE_STRERROR 1 +#define HAVE_STRFTIME 1 +#define HAVE_STRRCHR 1 +#define HAVE_STRSTR 1 +#define HAVE_TIME 1 +#define HAVE_UMASK 1 +#define HAVE_UTIME 1 +#define HAVE_VSNPRINTF 1 +#if (LZO_OS_BEOS || LZO_OS_CYGWIN || LZO_OS_POSIX || LZO_OS_QNX || LZO_OS_VMS) +# define HAVE_STRCASECMP 1 +# define HAVE_STRNCASECMP 1 +#elif (LZO_OS_WIN32 && LZO_CC_GNUC) && defined(__PW32__) +# define HAVE_STRCASECMP 1 +# define HAVE_STRNCASECMP 1 +#else +# define HAVE_STRICMP 1 +# define HAVE_STRNICMP 1 +#endif +#if (LZO_OS_POSIX) +# if (LZO_OS_POSIX_AIX) +# define HAVE_GETRUSAGE 1 +# elif (LZO_OS_POSIX_DARWIN && LZO_LIBC_MSL) +# undef HAVE_CHOWN +# undef HAVE_LSTAT +# elif (LZO_OS_POSIX_UNICOS) +# undef HAVE_ALLOCA +# undef HAVE_SNPRINTF +# undef HAVE_VSNPRINTF +# endif +# if (LZO_CC_TINYC) +# undef HAVE_ALLOCA +# endif +# if (LZO_LIBC_DIETLIBC || LZO_LIBC_GLIBC || LZO_LIBC_UCLIBC) +# define HAVE_GETRUSAGE 1 +# define HAVE_GETPAGESIZE 1 +# define HAVE_MMAP 1 +# define HAVE_MPROTECT 1 +# define HAVE_MUNMAP 1 +# endif +#elif (LZO_OS_CYGWIN) +# if (LZO_CC_GNUC < 0x025a00ul) +# undef HAVE_GETTIMEOFDAY +# undef HAVE_LSTAT +# endif +# if (LZO_CC_GNUC < 0x025f00ul) +# undef HAVE_SNPRINTF +# undef HAVE_VSNPRINTF +# endif +#elif (LZO_OS_EMX) +# undef HAVE_CHOWN +# undef HAVE_LSTAT +#elif (LZO_ARCH_M68K && LZO_OS_TOS && LZO_CC_GNUC) +# if !defined(__MINT__) +# undef HAVE_SNPRINTF +# undef HAVE_VSNPRINTF +# endif +#elif (LZO_ARCH_M68K && LZO_OS_TOS && (LZO_CC_PUREC || LZO_CC_TURBOC)) +# undef HAVE_ALLOCA +# undef HAVE_ACCESS +# undef HAVE_CHMOD +# undef HAVE_CHOWN +# undef HAVE_FSTAT +# undef HAVE_GETTIMEOFDAY +# undef HAVE_LSTAT +# undef HAVE_SNPRINTF +# undef HAVE_UMASK +# undef HAVE_UTIME +# undef HAVE_VSNPRINTF +#endif +#if (LZO_OS_DOS16 || LZO_OS_DOS32 || LZO_OS_OS2 || LZO_OS_OS216 || LZO_OS_WIN16 || LZO_OS_WIN32 || LZO_OS_WIN64) +#undef HAVE_CHOWN +#undef HAVE_GETTIMEOFDAY +#undef HAVE_LSTAT +#undef HAVE_UMASK +#if (LZO_CC_AZTECC) +# undef HAVE_ALLOCA +# undef HAVE_DIFFTIME +# undef HAVE_FSTAT +# undef HAVE_STRDUP +# undef HAVE_SNPRINTF +# undef HAVE_UTIME +# undef HAVE_VSNPRINTF +#elif (LZO_CC_BORLANDC) +# if (__BORLANDC__ < 0x0400) +# undef HAVE_ALLOCA +# undef HAVE_UTIME +# endif +# if ((__BORLANDC__ < 0x0410) && LZO_OS_WIN16) +# undef HAVE_ALLOCA +# endif +# if (__BORLANDC__ < 0x0550) +# undef HAVE_SNPRINTF +# undef HAVE_VSNPRINTF +# endif +#elif (LZO_CC_DMC) +# if (LZO_OS_WIN16) +# undef HAVE_ALLOCA +# endif +# define snprintf _snprintf +# define vsnprintf _vsnprintf +#elif (LZO_OS_DOS32 && LZO_CC_GNUC) && defined(__DJGPP__) +# undef HAVE_SNPRINTF +# undef HAVE_VSNPRINTF +#elif (LZO_OS_DOS32 && LZO_CC_HIGHC) +# undef HAVE_SNPRINTF +# undef HAVE_VSNPRINTF +#elif (LZO_CC_GHS) +# undef HAVE_ALLOCA +# ifndef snprintf +# define snprintf _snprintf +# endif +# ifndef vsnprintf +# define vsnprintf _vsnprintf +# endif +#elif (LZO_CC_IBMC) +# undef HAVE_SNPRINTF +# undef HAVE_VSNPRINTF +#elif (LZO_CC_CLANG_MSC || LZO_CC_INTELC_MSC) +# ifndef snprintf +# define snprintf _snprintf +# endif +# ifndef vsnprintf +# define vsnprintf _vsnprintf +# endif +#elif (LZO_CC_LCCWIN32) +# define utime _utime +#elif (LZO_CC_CLANG_C2 || LZO_CC_MSC) +# if (_MSC_VER < 600) +# undef HAVE_STRFTIME +# endif +# if (_MSC_VER < 700) +# undef HAVE_SNPRINTF +# undef HAVE_VSNPRINTF +# elif (_MSC_VER < 1500) +# ifndef snprintf +# define snprintf _snprintf +# endif +# ifndef vsnprintf +# define vsnprintf _vsnprintf +# endif +# elif (_MSC_VER < 1900) +# ifndef snprintf +# define snprintf _snprintf +# endif +# endif +# if ((_MSC_VER < 800) && LZO_OS_WIN16) +# undef HAVE_ALLOCA +# endif +# if (LZO_ARCH_I086) && defined(__cplusplus) +# undef HAVE_LONGJMP +# undef HAVE_SETJMP +# endif +#elif (LZO_OS_WIN32 && LZO_CC_GNUC) && defined(__MINGW32__) +# if (LZO_CC_GNUC < 0x025f00ul) +# undef HAVE_SNPRINTF +# undef HAVE_VSNPRINTF +# else +# define snprintf _snprintf +# define vsnprintf _vsnprintf +# endif +#elif (LZO_OS_WIN32 && LZO_LIBC_MSL) +# if (__MSL__ < 0x8000ul) +# undef HAVE_CHMOD +# endif +#elif (LZO_CC_NDPC) +# undef HAVE_ALLOCA +# undef HAVE_SNPRINTF +# undef HAVE_STRNICMP +# undef HAVE_UTIME +# undef HAVE_VSNPRINTF +# if defined(__cplusplus) +# undef HAVE_STAT +# endif +#elif (LZO_CC_PACIFICC) +# undef HAVE_ACCESS +# undef HAVE_ALLOCA +# undef HAVE_CHMOD +# undef HAVE_DIFFTIME +# undef HAVE_FSTAT +# undef HAVE_MKTIME +# undef HAVE_RAISE +# undef HAVE_SNPRINTF +# undef HAVE_STRFTIME +# undef HAVE_UTIME +# undef HAVE_VSNPRINTF +#elif (LZO_OS_WIN32 && LZO_CC_PELLESC) +# if (__POCC__ < 280) +# define alloca _alloca +# undef HAVE_UTIME +# endif +#elif (LZO_OS_WIN32 && LZO_CC_PGI) && defined(__MINGW32__) +# define snprintf _snprintf +# define vsnprintf _vsnprintf +#elif (LZO_OS_WIN32 && LZO_CC_GNUC) && defined(__PW32__) +# undef HAVE_SNPRINTF +# undef HAVE_VSNPRINTF +#elif (LZO_CC_SYMANTECC) +# if (LZO_OS_WIN16 && (LZO_MM_MEDIUM || LZO_MM_LARGE || LZO_MM_HUGE)) +# undef HAVE_ALLOCA +# endif +# if (__SC__ < 0x600) +# undef HAVE_SNPRINTF +# undef HAVE_VSNPRINTF +# else +# define snprintf _snprintf +# define vsnprintf _vsnprintf +# endif +# if (__SC__ < 0x700) +# undef HAVE_DIFFTIME +# undef HAVE_UTIME +# endif +#elif (LZO_CC_TOPSPEEDC) +# undef HAVE_SNPRINTF +# undef HAVE_VSNPRINTF +#elif (LZO_CC_TURBOC) +# undef HAVE_ALLOCA +# undef HAVE_SNPRINTF +# undef HAVE_VSNPRINTF +# if (__TURBOC__ < 0x0200) +# undef HAVE_RAISE +# undef HAVE_SIGNAL +# endif +# if (__TURBOC__ < 0x0295) +# undef HAVE_MKTIME +# undef HAVE_STRFTIME +# endif +# if (__TURBOC__ < 0x0400) +# undef HAVE_UTIME +# endif +#elif (LZO_CC_WATCOMC) +# if (__WATCOMC__ < 1100) +# undef HAVE_SNPRINTF +# undef HAVE_VSNPRINTF +# elif (__WATCOMC__ < 1200) +# define snprintf _snprintf +# define vsnprintf _vsnprintf +# endif +#elif (LZO_CC_ZORTECHC) +# if (LZO_OS_WIN16 && (LZO_MM_MEDIUM || LZO_MM_LARGE || LZO_MM_HUGE)) +# undef HAVE_ALLOCA +# endif +# undef HAVE_DIFFTIME +# undef HAVE_SNPRINTF +# undef HAVE_UTIME +# undef HAVE_VSNPRINTF +#endif +#endif +#if (LZO_OS_CONSOLE) +# undef HAVE_ACCESS +# undef HAVE_CHMOD +# undef HAVE_CHOWN +# undef HAVE_GETTIMEOFDAY +# undef HAVE_LSTAT +# undef HAVE_TIME +# undef HAVE_UMASK +# undef HAVE_UTIME +#endif +#if (LZO_LIBC_ISOC90 || LZO_LIBC_ISOC99) +# undef HAVE_ACCESS +# undef HAVE_CHMOD +# undef HAVE_CHOWN +# undef HAVE_FILENO +# undef HAVE_FSTAT +# undef HAVE_GETTIMEOFDAY +# undef HAVE_LSTAT +# undef HAVE_STAT +# undef HAVE_UMASK +# undef HAVE_UTIME +# if 1 +# undef HAVE_ALLOCA +# undef HAVE_ISATTY +# undef HAVE_MKDIR +# undef HAVE_RMDIR +# undef HAVE_STRDUP +# undef HAVE_STRICMP +# undef HAVE_STRNICMP +# endif +#endif +#endif +#endif +#if !(LZO_CFG_AUTO_NO_SIZES) +#if !defined(SIZEOF_SHORT) && defined(LZO_SIZEOF_SHORT) +# define SIZEOF_SHORT LZO_SIZEOF_SHORT +#endif +#if !defined(SIZEOF_INT) && defined(LZO_SIZEOF_INT) +# define SIZEOF_INT LZO_SIZEOF_INT +#endif +#if !defined(SIZEOF_LONG) && defined(LZO_SIZEOF_LONG) +# define SIZEOF_LONG LZO_SIZEOF_LONG +#endif +#if !defined(SIZEOF_LONG_LONG) && defined(LZO_SIZEOF_LONG_LONG) +# define SIZEOF_LONG_LONG LZO_SIZEOF_LONG_LONG +#endif +#if !defined(SIZEOF___INT32) && defined(LZO_SIZEOF___INT32) +# define SIZEOF___INT32 LZO_SIZEOF___INT32 +#endif +#if !defined(SIZEOF___INT64) && defined(LZO_SIZEOF___INT64) +# define SIZEOF___INT64 LZO_SIZEOF___INT64 +#endif +#if !defined(SIZEOF_VOID_P) && defined(LZO_SIZEOF_VOID_P) +# define SIZEOF_VOID_P LZO_SIZEOF_VOID_P +#endif +#if !defined(SIZEOF_SIZE_T) && defined(LZO_SIZEOF_SIZE_T) +# define SIZEOF_SIZE_T LZO_SIZEOF_SIZE_T +#endif +#if !defined(SIZEOF_PTRDIFF_T) && defined(LZO_SIZEOF_PTRDIFF_T) +# define SIZEOF_PTRDIFF_T LZO_SIZEOF_PTRDIFF_T +#endif +#endif +#if (HAVE_SIGNAL) && !defined(RETSIGTYPE) +# define RETSIGTYPE void +#endif +#endif +#if !(LZO_CFG_SKIP_LZO_TYPES) +#if 1 && !defined(lzo_signo_t) && defined(__linux__) && defined(__dietlibc__) && (LZO_SIZEOF_INT != 4) +# define lzo_signo_t lzo_int32e_t +#endif +#if !defined(lzo_signo_t) +# define lzo_signo_t int +#endif +#if defined(__cplusplus) +extern "C" { +#endif +#if (LZO_BROKEN_CDECL_ALT_SYNTAX) +typedef void __lzo_cdecl_sighandler (*lzo_sighandler_t)(lzo_signo_t); +#else +typedef void (__lzo_cdecl_sighandler *lzo_sighandler_t)(lzo_signo_t); +#endif +#if defined(__cplusplus) +} +#endif +#endif +#endif +#if defined(LZO_WANT_ACC_INCD_H) +# undef LZO_WANT_ACC_INCD_H +#ifndef __LZO_INCD_H_INCLUDED +#define __LZO_INCD_H_INCLUDED 1 +#if (LZO_LIBC_NAKED) +#ifndef __LZO_FALLBACK_STDDEF_H_INCLUDED +#define __LZO_FALLBACK_STDDEF_H_INCLUDED 1 +#if defined(__PTRDIFF_TYPE__) +typedef __PTRDIFF_TYPE__ lzo_fallback_ptrdiff_t; +#elif defined(__MIPS_PSX2__) +typedef int lzo_fallback_ptrdiff_t; +#else +typedef long lzo_fallback_ptrdiff_t; +#endif +#if defined(__SIZE_TYPE__) +typedef __SIZE_TYPE__ lzo_fallback_size_t; +#elif defined(__MIPS_PSX2__) +typedef unsigned int lzo_fallback_size_t; +#else +typedef unsigned long lzo_fallback_size_t; +#endif +#if !defined(ptrdiff_t) +typedef lzo_fallback_ptrdiff_t ptrdiff_t; +#ifndef _PTRDIFF_T_DEFINED +#define _PTRDIFF_T_DEFINED 1 +#endif +#endif +#if !defined(size_t) +typedef lzo_fallback_size_t size_t; +#ifndef _SIZE_T_DEFINED +#define _SIZE_T_DEFINED 1 +#endif +#endif +#if !defined(__cplusplus) && !defined(wchar_t) +typedef unsigned short wchar_t; +#ifndef _WCHAR_T_DEFINED +#define _WCHAR_T_DEFINED 1 +#endif +#endif +#ifndef NULL +#if defined(__cplusplus) && defined(__GNUC__) && (__GNUC__ >= 4) +#define NULL __null +#elif defined(__cplusplus) +#define NULL 0 +#else +#define NULL ((void*)0) +#endif +#endif +#ifndef offsetof +#define offsetof(s,m) ((size_t)((ptrdiff_t)&(((s*)0)->m))) +#endif +#endif +#elif (LZO_LIBC_FREESTANDING) +# if defined(HAVE_STDDEF_H) && (HAVE_STDDEF_H+0) +# include +# endif +# if defined(HAVE_STDINT_H) && (HAVE_STDINT_H+0) +# include +# endif +#elif (LZO_LIBC_MOSTLY_FREESTANDING) +# if defined(HAVE_STDIO_H) && (HAVE_STDIO_H+0) +# include +# endif +# if defined(HAVE_STDDEF_H) && (HAVE_STDDEF_H+0) +# include +# endif +# if defined(HAVE_STDINT_H) && (HAVE_STDINT_H+0) +# include +# endif +#else +#include +#if defined(HAVE_TIME_H) && (HAVE_TIME_H+0) && defined(__MSL__) && defined(__cplusplus) +# include +#endif +#if defined(HAVE_SYS_TYPES_H) && (HAVE_SYS_TYPES_H+0) +# include +#endif +#if defined(HAVE_SYS_STAT_H) && (HAVE_SYS_STAT_H+0) +# include +#endif +#if defined(STDC_HEADERS) && (STDC_HEADERS+0) +# include +#elif defined(HAVE_STDLIB_H) && (HAVE_STDLIB_H+0) +# include +#endif +#include +#if defined(HAVE_STRING_H) && (HAVE_STRING_H+0) +# if defined(STDC_HEADERS) && (STDC_HEADERS+0) +# elif defined(HAVE_MEMORY_H) && (HAVE_MEMORY_H+0) +# include +# endif +# include +#endif +#if defined(HAVE_STRINGS_H) && (HAVE_STRINGS_H+0) +# include +#endif +#if defined(HAVE_INTTYPES_H) && (HAVE_INTTYPES_H+0) +# include +#endif +#if defined(HAVE_STDINT_H) && (HAVE_STDINT_H+0) +# include +#endif +#if defined(HAVE_UNISTD_H) && (HAVE_UNISTD_H+0) +# include +#endif +#endif +#endif +#endif +#if defined(LZO_WANT_ACC_INCE_H) +# undef LZO_WANT_ACC_INCE_H +#ifndef __LZO_INCE_H_INCLUDED +#define __LZO_INCE_H_INCLUDED 1 +#if (LZO_LIBC_NAKED) +#elif (LZO_LIBC_FREESTANDING) +#elif (LZO_LIBC_MOSTLY_FREESTANDING) +# if (HAVE_SETJMP_H) +# include +# endif +#else +#if (HAVE_STDARG_H) +# include +#endif +#if (HAVE_CTYPE_H) +# include +#endif +#if (HAVE_ERRNO_H) +# include +#endif +#if (HAVE_MALLOC_H) +# include +#endif +#if (HAVE_ALLOCA_H) +# include +#endif +#if (HAVE_FCNTL_H) +# include +#endif +#if (HAVE_DIRENT_H) +# include +#endif +#if (HAVE_SETJMP_H) +# include +#endif +#if (HAVE_SIGNAL_H) +# include +#endif +#if (HAVE_SYS_TIME_H && HAVE_TIME_H) +# include +# include +#elif (HAVE_TIME_H) +# include +#endif +#if (HAVE_UTIME_H) +# include +#elif (HAVE_SYS_UTIME_H) +# include +#endif +#if (HAVE_IO_H) +# include +#endif +#if (HAVE_DOS_H) +# include +#endif +#if (HAVE_DIRECT_H) +# include +#endif +#if (HAVE_SHARE_H) +# include +#endif +#if (LZO_CC_NDPC) +# include +#endif +#if defined(__TOS__) && (defined(__PUREC__) || defined(__TURBOC__)) +# include +#endif +#endif +#endif +#endif +#if defined(LZO_WANT_ACC_INCI_H) +# undef LZO_WANT_ACC_INCI_H +#ifndef __LZO_INCI_H_INCLUDED +#define __LZO_INCI_H_INCLUDED 1 +#if (LZO_LIBC_NAKED) +#elif (LZO_LIBC_FREESTANDING) +#elif (LZO_LIBC_MOSTLY_FREESTANDING) +#else +#if (LZO_OS_TOS && (LZO_CC_PUREC || LZO_CC_TURBOC)) +# include +#elif (LZO_HAVE_WINDOWS_H) +# if 1 && !defined(WIN32_LEAN_AND_MEAN) +# define WIN32_LEAN_AND_MEAN 1 +# endif +# if 1 && !defined(_WIN32_WINNT) +# define _WIN32_WINNT 0x0400 +# endif +# include +# if (LZO_CC_BORLANDC || LZO_CC_TURBOC) +# include +# endif +#elif (LZO_OS_DOS16 || LZO_OS_DOS32 || LZO_OS_WIN16) +# if (LZO_CC_AZTECC) +# include +# include +# elif (LZO_CC_BORLANDC || LZO_CC_TURBOC) +# include +# include +# elif (LZO_OS_DOS32 && LZO_CC_GNUC) && defined(__DJGPP__) +# include +# elif (LZO_CC_PACIFICC) +# include +# include +# include +# elif (LZO_CC_WATCOMC) +# include +# endif +#elif (LZO_OS_OS216) +# if (LZO_CC_WATCOMC) +# include +# endif +#endif +#if (HAVE_SYS_MMAN_H) +# include +#endif +#if (HAVE_SYS_RESOURCE_H) +# include +#endif +#if (LZO_OS_DOS16 || LZO_OS_OS216 || LZO_OS_WIN16) +# if defined(FP_OFF) +# define LZO_PTR_FP_OFF(x) FP_OFF(x) +# elif defined(_FP_OFF) +# define LZO_PTR_FP_OFF(x) _FP_OFF(x) +# else +# define LZO_PTR_FP_OFF(x) (((const unsigned __far*)&(x))[0]) +# endif +# if defined(FP_SEG) +# define LZO_PTR_FP_SEG(x) FP_SEG(x) +# elif defined(_FP_SEG) +# define LZO_PTR_FP_SEG(x) _FP_SEG(x) +# else +# define LZO_PTR_FP_SEG(x) (((const unsigned __far*)&(x))[1]) +# endif +# if defined(MK_FP) +# define LZO_PTR_MK_FP(s,o) MK_FP(s,o) +# elif defined(_MK_FP) +# define LZO_PTR_MK_FP(s,o) _MK_FP(s,o) +# else +# define LZO_PTR_MK_FP(s,o) ((void __far*)(((unsigned long)(s)<<16)+(unsigned)(o))) +# endif +# if 0 +# undef LZO_PTR_FP_OFF +# undef LZO_PTR_FP_SEG +# undef LZO_PTR_MK_FP +# define LZO_PTR_FP_OFF(x) (((const unsigned __far*)&(x))[0]) +# define LZO_PTR_FP_SEG(x) (((const unsigned __far*)&(x))[1]) +# define LZO_PTR_MK_FP(s,o) ((void __far*)(((unsigned long)(s)<<16)+(unsigned)(o))) +# endif +#endif +#endif +#endif +#endif +#if defined(LZO_WANT_ACC_LIB_H) +# undef LZO_WANT_ACC_LIB_H +#ifndef __LZO_LIB_H_INCLUDED +#define __LZO_LIB_H_INCLUDED 1 +#if !defined(__LZOLIB_FUNCNAME) +# define __LZOLIB_FUNCNAME(f) f +#endif +#if !defined(LZOLIB_EXTERN) +# define LZOLIB_EXTERN(r,f) extern r __LZOLIB_FUNCNAME(f) +#endif +#if !defined(LZOLIB_EXTERN_NOINLINE) +# if defined(__lzo_noinline) +# define LZOLIB_EXTERN_NOINLINE(r,f) extern __lzo_noinline r __LZOLIB_FUNCNAME(f) +# else +# define LZOLIB_EXTERN_NOINLINE(r,f) extern r __LZOLIB_FUNCNAME(f) +# endif +#endif +#if (LZO_SIZEOF_LONG > LZO_SIZEOF_VOID_P) +# define lzolib_handle_t long +#else +# define lzolib_handle_t lzo_intptr_t +#endif +#if 0 +LZOLIB_EXTERN(int, lzo_ascii_digit) (int); +LZOLIB_EXTERN(int, lzo_ascii_islower) (int); +LZOLIB_EXTERN(int, lzo_ascii_isupper) (int); +LZOLIB_EXTERN(int, lzo_ascii_tolower) (int); +LZOLIB_EXTERN(int, lzo_ascii_toupper) (int); +LZOLIB_EXTERN(int, lzo_ascii_utolower) (int); +LZOLIB_EXTERN(int, lzo_ascii_utoupper) (int); +#endif +#define lzo_ascii_isdigit(c) ((LZO_ICAST(unsigned, c) - 48) < 10) +#define lzo_ascii_islower(c) ((LZO_ICAST(unsigned, c) - 97) < 26) +#define lzo_ascii_isupper(c) ((LZO_ICAST(unsigned, c) - 65) < 26) +#define lzo_ascii_tolower(c) (LZO_ICAST(int, c) + (lzo_ascii_isupper(c) << 5)) +#define lzo_ascii_toupper(c) (LZO_ICAST(int, c) - (lzo_ascii_islower(c) << 5)) +#define lzo_ascii_utolower(c) lzo_ascii_tolower(LZO_ITRUNC(unsigned char, c)) +#define lzo_ascii_utoupper(c) lzo_ascii_toupper(LZO_ITRUNC(unsigned char, c)) +#ifndef lzo_hsize_t +#if (LZO_HAVE_MM_HUGE_PTR) +# define lzo_hsize_t unsigned long +# define lzo_hvoid_p void __huge * +# define lzo_hchar_p char __huge * +# define lzo_hchar_pp char __huge * __huge * +# define lzo_hbyte_p unsigned char __huge * +#else +# define lzo_hsize_t size_t +# define lzo_hvoid_p void * +# define lzo_hchar_p char * +# define lzo_hchar_pp char ** +# define lzo_hbyte_p unsigned char * +#endif +#endif +LZOLIB_EXTERN(lzo_hvoid_p, lzo_halloc) (lzo_hsize_t); +LZOLIB_EXTERN(void, lzo_hfree) (lzo_hvoid_p); +#if (LZO_OS_DOS16 || LZO_OS_OS216) +LZOLIB_EXTERN(void __far*, lzo_dos_alloc) (unsigned long); +LZOLIB_EXTERN(int, lzo_dos_free) (void __far*); +#endif +LZOLIB_EXTERN(int, lzo_hmemcmp) (const lzo_hvoid_p, const lzo_hvoid_p, lzo_hsize_t); +LZOLIB_EXTERN(lzo_hvoid_p, lzo_hmemcpy) (lzo_hvoid_p, const lzo_hvoid_p, lzo_hsize_t); +LZOLIB_EXTERN(lzo_hvoid_p, lzo_hmemmove) (lzo_hvoid_p, const lzo_hvoid_p, lzo_hsize_t); +LZOLIB_EXTERN(lzo_hvoid_p, lzo_hmemset) (lzo_hvoid_p, int, lzo_hsize_t); +LZOLIB_EXTERN(lzo_hsize_t, lzo_hstrlen) (const lzo_hchar_p); +LZOLIB_EXTERN(int, lzo_hstrcmp) (const lzo_hchar_p, const lzo_hchar_p); +LZOLIB_EXTERN(int, lzo_hstrncmp)(const lzo_hchar_p, const lzo_hchar_p, lzo_hsize_t); +LZOLIB_EXTERN(int, lzo_ascii_hstricmp) (const lzo_hchar_p, const lzo_hchar_p); +LZOLIB_EXTERN(int, lzo_ascii_hstrnicmp)(const lzo_hchar_p, const lzo_hchar_p, lzo_hsize_t); +LZOLIB_EXTERN(int, lzo_ascii_hmemicmp) (const lzo_hvoid_p, const lzo_hvoid_p, lzo_hsize_t); +LZOLIB_EXTERN(lzo_hchar_p, lzo_hstrstr) (const lzo_hchar_p, const lzo_hchar_p); +LZOLIB_EXTERN(lzo_hchar_p, lzo_ascii_hstristr) (const lzo_hchar_p, const lzo_hchar_p); +LZOLIB_EXTERN(lzo_hvoid_p, lzo_hmemmem) (const lzo_hvoid_p, lzo_hsize_t, const lzo_hvoid_p, lzo_hsize_t); +LZOLIB_EXTERN(lzo_hvoid_p, lzo_ascii_hmemimem) (const lzo_hvoid_p, lzo_hsize_t, const lzo_hvoid_p, lzo_hsize_t); +LZOLIB_EXTERN(lzo_hchar_p, lzo_hstrcpy) (lzo_hchar_p, const lzo_hchar_p); +LZOLIB_EXTERN(lzo_hchar_p, lzo_hstrcat) (lzo_hchar_p, const lzo_hchar_p); +LZOLIB_EXTERN(lzo_hsize_t, lzo_hstrlcpy) (lzo_hchar_p, const lzo_hchar_p, lzo_hsize_t); +LZOLIB_EXTERN(lzo_hsize_t, lzo_hstrlcat) (lzo_hchar_p, const lzo_hchar_p, lzo_hsize_t); +LZOLIB_EXTERN(int, lzo_hstrscpy) (lzo_hchar_p, const lzo_hchar_p, lzo_hsize_t); +LZOLIB_EXTERN(int, lzo_hstrscat) (lzo_hchar_p, const lzo_hchar_p, lzo_hsize_t); +LZOLIB_EXTERN(lzo_hchar_p, lzo_hstrccpy) (lzo_hchar_p, const lzo_hchar_p, int); +LZOLIB_EXTERN(lzo_hvoid_p, lzo_hmemccpy) (lzo_hvoid_p, const lzo_hvoid_p, int, lzo_hsize_t); +LZOLIB_EXTERN(lzo_hchar_p, lzo_hstrchr) (const lzo_hchar_p, int); +LZOLIB_EXTERN(lzo_hchar_p, lzo_hstrrchr) (const lzo_hchar_p, int); +LZOLIB_EXTERN(lzo_hchar_p, lzo_ascii_hstrichr) (const lzo_hchar_p, int); +LZOLIB_EXTERN(lzo_hchar_p, lzo_ascii_hstrrichr) (const lzo_hchar_p, int); +LZOLIB_EXTERN(lzo_hvoid_p, lzo_hmemchr) (const lzo_hvoid_p, int, lzo_hsize_t); +LZOLIB_EXTERN(lzo_hvoid_p, lzo_hmemrchr) (const lzo_hvoid_p, int, lzo_hsize_t); +LZOLIB_EXTERN(lzo_hvoid_p, lzo_ascii_hmemichr) (const lzo_hvoid_p, int, lzo_hsize_t); +LZOLIB_EXTERN(lzo_hvoid_p, lzo_ascii_hmemrichr) (const lzo_hvoid_p, int, lzo_hsize_t); +LZOLIB_EXTERN(lzo_hsize_t, lzo_hstrspn) (const lzo_hchar_p, const lzo_hchar_p); +LZOLIB_EXTERN(lzo_hsize_t, lzo_hstrrspn) (const lzo_hchar_p, const lzo_hchar_p); +LZOLIB_EXTERN(lzo_hsize_t, lzo_hstrcspn) (const lzo_hchar_p, const lzo_hchar_p); +LZOLIB_EXTERN(lzo_hsize_t, lzo_hstrrcspn) (const lzo_hchar_p, const lzo_hchar_p); +LZOLIB_EXTERN(lzo_hchar_p, lzo_hstrpbrk) (const lzo_hchar_p, const lzo_hchar_p); +LZOLIB_EXTERN(lzo_hchar_p, lzo_hstrrpbrk) (const lzo_hchar_p, const lzo_hchar_p); +LZOLIB_EXTERN(lzo_hchar_p, lzo_hstrsep) (lzo_hchar_pp, const lzo_hchar_p); +LZOLIB_EXTERN(lzo_hchar_p, lzo_hstrrsep) (lzo_hchar_pp, const lzo_hchar_p); +LZOLIB_EXTERN(lzo_hchar_p, lzo_ascii_hstrlwr) (lzo_hchar_p); +LZOLIB_EXTERN(lzo_hchar_p, lzo_ascii_hstrupr) (lzo_hchar_p); +LZOLIB_EXTERN(lzo_hvoid_p, lzo_ascii_hmemlwr) (lzo_hvoid_p, lzo_hsize_t); +LZOLIB_EXTERN(lzo_hvoid_p, lzo_ascii_hmemupr) (lzo_hvoid_p, lzo_hsize_t); +LZOLIB_EXTERN(lzo_hsize_t, lzo_hfread) (void *, lzo_hvoid_p, lzo_hsize_t); +LZOLIB_EXTERN(lzo_hsize_t, lzo_hfwrite) (void *, const lzo_hvoid_p, lzo_hsize_t); +#if (LZO_HAVE_MM_HUGE_PTR) +LZOLIB_EXTERN(long, lzo_hread) (int, lzo_hvoid_p, long); +LZOLIB_EXTERN(long, lzo_hwrite) (int, const lzo_hvoid_p, long); +#endif +LZOLIB_EXTERN(long, lzo_safe_hread) (int, lzo_hvoid_p, long); +LZOLIB_EXTERN(long, lzo_safe_hwrite) (int, const lzo_hvoid_p, long); +LZOLIB_EXTERN(unsigned, lzo_ua_get_be16) (const lzo_hvoid_p); +LZOLIB_EXTERN(lzo_uint32l_t, lzo_ua_get_be24) (const lzo_hvoid_p); +LZOLIB_EXTERN(lzo_uint32l_t, lzo_ua_get_be32) (const lzo_hvoid_p); +LZOLIB_EXTERN(void, lzo_ua_set_be16) (lzo_hvoid_p, unsigned); +LZOLIB_EXTERN(void, lzo_ua_set_be24) (lzo_hvoid_p, lzo_uint32l_t); +LZOLIB_EXTERN(void, lzo_ua_set_be32) (lzo_hvoid_p, lzo_uint32l_t); +LZOLIB_EXTERN(unsigned, lzo_ua_get_le16) (const lzo_hvoid_p); +LZOLIB_EXTERN(lzo_uint32l_t, lzo_ua_get_le24) (const lzo_hvoid_p); +LZOLIB_EXTERN(lzo_uint32l_t, lzo_ua_get_le32) (const lzo_hvoid_p); +LZOLIB_EXTERN(void, lzo_ua_set_le16) (lzo_hvoid_p, unsigned); +LZOLIB_EXTERN(void, lzo_ua_set_le24) (lzo_hvoid_p, lzo_uint32l_t); +LZOLIB_EXTERN(void, lzo_ua_set_le32) (lzo_hvoid_p, lzo_uint32l_t); +#if defined(lzo_int64l_t) +LZOLIB_EXTERN(lzo_uint64l_t, lzo_ua_get_be64) (const lzo_hvoid_p); +LZOLIB_EXTERN(void, lzo_ua_set_be64) (lzo_hvoid_p, lzo_uint64l_t); +LZOLIB_EXTERN(lzo_uint64l_t, lzo_ua_get_le64) (const lzo_hvoid_p); +LZOLIB_EXTERN(void, lzo_ua_set_le64) (lzo_hvoid_p, lzo_uint64l_t); +#endif +LZOLIB_EXTERN_NOINLINE(short, lzo_vget_short) (short, int); +LZOLIB_EXTERN_NOINLINE(int, lzo_vget_int) (int, int); +LZOLIB_EXTERN_NOINLINE(long, lzo_vget_long) (long, int); +#if defined(lzo_int64l_t) +LZOLIB_EXTERN_NOINLINE(lzo_int64l_t, lzo_vget_lzo_int64l_t) (lzo_int64l_t, int); +#endif +LZOLIB_EXTERN_NOINLINE(lzo_hsize_t, lzo_vget_lzo_hsize_t) (lzo_hsize_t, int); +#if !(LZO_CFG_NO_FLOAT) +LZOLIB_EXTERN_NOINLINE(float, lzo_vget_float) (float, int); +#endif +#if !(LZO_CFG_NO_DOUBLE) +LZOLIB_EXTERN_NOINLINE(double, lzo_vget_double) (double, int); +#endif +LZOLIB_EXTERN_NOINLINE(lzo_hvoid_p, lzo_vget_lzo_hvoid_p) (lzo_hvoid_p, int); +LZOLIB_EXTERN_NOINLINE(const lzo_hvoid_p, lzo_vget_lzo_hvoid_cp) (const lzo_hvoid_p, int); +#if !defined(LZO_FN_PATH_MAX) +#if (LZO_OS_DOS16 || LZO_OS_WIN16) +# define LZO_FN_PATH_MAX 143 +#elif (LZO_OS_DOS32 || LZO_OS_OS2 || LZO_OS_OS216 || LZO_OS_WIN32 || LZO_OS_WIN64) +# define LZO_FN_PATH_MAX 259 +#elif (LZO_OS_TOS) +# define LZO_FN_PATH_MAX 259 +#endif +#endif +#if !defined(LZO_FN_PATH_MAX) +# define LZO_FN_PATH_MAX 1023 +#endif +#if !defined(LZO_FN_NAME_MAX) +#if (LZO_OS_DOS16 || LZO_OS_WIN16) +# define LZO_FN_NAME_MAX 12 +#elif (LZO_ARCH_M68K && LZO_OS_TOS && (LZO_CC_PUREC || LZO_CC_TURBOC)) +# define LZO_FN_NAME_MAX 12 +#elif (LZO_OS_DOS32 && LZO_CC_GNUC) && defined(__DJGPP__) +#elif (LZO_OS_DOS32) +# define LZO_FN_NAME_MAX 12 +#endif +#endif +#if !defined(LZO_FN_NAME_MAX) +# define LZO_FN_NAME_MAX LZO_FN_PATH_MAX +#endif +#define LZO_FNMATCH_NOESCAPE 1 +#define LZO_FNMATCH_PATHNAME 2 +#define LZO_FNMATCH_PATHSTAR 4 +#define LZO_FNMATCH_PERIOD 8 +#define LZO_FNMATCH_ASCII_CASEFOLD 16 +LZOLIB_EXTERN(int, lzo_fnmatch) (const lzo_hchar_p, const lzo_hchar_p, int); +#undef __LZOLIB_USE_OPENDIR +#if (HAVE_DIRENT_H || LZO_CC_WATCOMC) +# define __LZOLIB_USE_OPENDIR 1 +# if (LZO_OS_DOS32 && defined(__BORLANDC__)) +# elif (LZO_OS_DOS32 && LZO_CC_GNUC) && defined(__DJGPP__) +# elif (LZO_OS_OS2 || LZO_OS_OS216) +# elif (LZO_ARCH_M68K && LZO_OS_TOS && LZO_CC_GNUC) +# elif (LZO_OS_WIN32 && !(LZO_HAVE_WINDOWS_H)) +# elif (LZO_OS_DOS16 || LZO_OS_DOS32 || LZO_OS_OS2 || LZO_OS_OS216 || LZO_OS_TOS || LZO_OS_WIN16 || LZO_OS_WIN32 || LZO_OS_WIN64) +# undef __LZOLIB_USE_OPENDIR +# endif +#endif +typedef struct +{ +#if defined(__LZOLIB_USE_OPENDIR) + void* u_dirp; +# if (LZO_CC_WATCOMC) + unsigned short f_time; + unsigned short f_date; + unsigned long f_size; +# endif + char f_name[LZO_FN_NAME_MAX+1]; +#elif (LZO_OS_WIN32 || LZO_OS_WIN64) + lzolib_handle_t u_handle; + unsigned f_attr; + unsigned f_size_low; + unsigned f_size_high; + char f_name[LZO_FN_NAME_MAX+1]; +#elif (LZO_OS_DOS16 || LZO_OS_DOS32 || LZO_OS_TOS || LZO_OS_WIN16) + char u_dta[21]; + unsigned char f_attr; + unsigned short f_time; + unsigned short f_date; + unsigned short f_size_low; + unsigned short f_size_high; + char f_name[LZO_FN_NAME_MAX+1]; + char u_dirp; +#else + void* u_dirp; + char f_name[LZO_FN_NAME_MAX+1]; +#endif +} lzo_dir_t; +#ifndef lzo_dir_p +#define lzo_dir_p lzo_dir_t * +#endif +LZOLIB_EXTERN(int, lzo_opendir) (lzo_dir_p, const char*); +LZOLIB_EXTERN(int, lzo_readdir) (lzo_dir_p); +LZOLIB_EXTERN(int, lzo_closedir) (lzo_dir_p); +#if (LZO_CC_GNUC) && (defined(__CYGWIN__) || defined(__MINGW32__)) +# define lzo_alloca(x) __builtin_alloca((x)) +#elif (LZO_CC_GNUC) && (LZO_OS_CONSOLE_PS2) +# define lzo_alloca(x) __builtin_alloca((x)) +#elif (LZO_CC_BORLANDC || LZO_CC_LCC) && defined(__linux__) +#elif (HAVE_ALLOCA) +# define lzo_alloca(x) LZO_STATIC_CAST(void *, alloca((x))) +#endif +#if (LZO_OS_DOS32 && LZO_CC_GNUC) && defined(__DJGPP__) +# define lzo_stackavail() stackavail() +#elif (LZO_ARCH_I086 && LZO_CC_BORLANDC && (__BORLANDC__ >= 0x0410)) +# define lzo_stackavail() stackavail() +#elif (LZO_ARCH_I086 && LZO_CC_BORLANDC && (__BORLANDC__ >= 0x0400)) +# if (LZO_OS_WIN16) && (LZO_MM_TINY || LZO_MM_SMALL || LZO_MM_MEDIUM) +# else +# define lzo_stackavail() stackavail() +# endif +#elif ((LZO_ARCH_I086 || LZO_ARCH_I386) && (LZO_CC_DMC || LZO_CC_SYMANTECC)) +# define lzo_stackavail() stackavail() +#elif ((LZO_ARCH_I086) && LZO_CC_MSC && (_MSC_VER >= 700)) +# define lzo_stackavail() _stackavail() +#elif ((LZO_ARCH_I086) && LZO_CC_MSC) +# define lzo_stackavail() stackavail() +#elif ((LZO_ARCH_I086 || LZO_ARCH_I386) && LZO_CC_TURBOC && (__TURBOC__ >= 0x0450)) +# define lzo_stackavail() stackavail() +#elif (LZO_ARCH_I086 && LZO_CC_TURBOC && (__TURBOC__ >= 0x0400)) + LZO_EXTERN_C size_t __cdecl stackavail(void); +# define lzo_stackavail() stackavail() +#elif ((LZO_ARCH_I086 || LZO_ARCH_I386) && (LZO_CC_WATCOMC)) +# define lzo_stackavail() stackavail() +#elif (LZO_ARCH_I086 && LZO_CC_ZORTECHC) +# define lzo_stackavail() _chkstack() +#endif +LZOLIB_EXTERN(lzo_intptr_t, lzo_get_osfhandle) (int); +LZOLIB_EXTERN(const char *, lzo_getenv) (const char *); +LZOLIB_EXTERN(int, lzo_isatty) (int); +LZOLIB_EXTERN(int, lzo_mkdir) (const char*, unsigned); +LZOLIB_EXTERN(int, lzo_rmdir) (const char*); +LZOLIB_EXTERN(int, lzo_response) (int*, char***); +LZOLIB_EXTERN(int, lzo_set_binmode) (int, int); +#if defined(lzo_int32e_t) +LZOLIB_EXTERN(lzo_int32e_t, lzo_muldiv32s) (lzo_int32e_t, lzo_int32e_t, lzo_int32e_t); +LZOLIB_EXTERN(lzo_uint32e_t, lzo_muldiv32u) (lzo_uint32e_t, lzo_uint32e_t, lzo_uint32e_t); +#endif +LZOLIB_EXTERN(void, lzo_wildargv) (int*, char***); +LZOLIB_EXTERN_NOINLINE(void, lzo_debug_break) (void); +LZOLIB_EXTERN_NOINLINE(void, lzo_debug_nop) (void); +LZOLIB_EXTERN_NOINLINE(int, lzo_debug_align_check_query) (void); +LZOLIB_EXTERN_NOINLINE(int, lzo_debug_align_check_enable) (int); +LZOLIB_EXTERN_NOINLINE(unsigned, lzo_debug_running_on_qemu) (void); +LZOLIB_EXTERN_NOINLINE(unsigned, lzo_debug_running_on_valgrind) (void); +#if defined(lzo_int32e_t) +LZOLIB_EXTERN(int, lzo_tsc_read) (lzo_uint32e_t*); +#endif +struct lzo_pclock_handle_t; +struct lzo_pclock_t; +typedef struct lzo_pclock_handle_t lzo_pclock_handle_t; +typedef struct lzo_pclock_t lzo_pclock_t; +#ifndef lzo_pclock_handle_p +#define lzo_pclock_handle_p lzo_pclock_handle_t * +#endif +#ifndef lzo_pclock_p +#define lzo_pclock_p lzo_pclock_t * +#endif +#define LZO_PCLOCK_REALTIME 0 +#define LZO_PCLOCK_MONOTONIC 1 +#define LZO_PCLOCK_PROCESS_CPUTIME_ID 2 +#define LZO_PCLOCK_THREAD_CPUTIME_ID 3 +typedef int (*lzo_pclock_gettime_t) (lzo_pclock_handle_p, lzo_pclock_p); +struct lzo_pclock_handle_t { + lzolib_handle_t h; + int mode; + int read_error; + const char* name; + lzo_pclock_gettime_t gettime; +#if defined(lzo_int64l_t) + lzo_uint64l_t ticks_base; +#endif +}; +struct lzo_pclock_t { +#if defined(lzo_int64l_t) + lzo_int64l_t tv_sec; +#else + lzo_int32l_t tv_sec_high; + lzo_uint32l_t tv_sec_low; +#endif + lzo_uint32l_t tv_nsec; +}; +LZOLIB_EXTERN(int, lzo_pclock_open) (lzo_pclock_handle_p, int); +LZOLIB_EXTERN(int, lzo_pclock_open_default) (lzo_pclock_handle_p); +LZOLIB_EXTERN(int, lzo_pclock_close) (lzo_pclock_handle_p); +LZOLIB_EXTERN(void, lzo_pclock_read) (lzo_pclock_handle_p, lzo_pclock_p); +#if !(LZO_CFG_NO_DOUBLE) +LZOLIB_EXTERN(double, lzo_pclock_get_elapsed) (lzo_pclock_handle_p, const lzo_pclock_p, const lzo_pclock_p); +#endif +LZOLIB_EXTERN(int, lzo_pclock_flush_cpu_cache) (lzo_pclock_handle_p, unsigned); +struct lzo_getopt_t; +typedef struct lzo_getopt_t lzo_getopt_t; +#ifndef lzo_getopt_p +#define lzo_getopt_p lzo_getopt_t * +#endif +struct lzo_getopt_longopt_t; +typedef struct lzo_getopt_longopt_t lzo_getopt_longopt_t; +#ifndef lzo_getopt_longopt_p +#define lzo_getopt_longopt_p lzo_getopt_longopt_t * +#endif +struct lzo_getopt_longopt_t { + const char* name; + int has_arg; + int* flag; + int val; +}; +typedef void (*lzo_getopt_opterr_t)(lzo_getopt_p, const char*, void *); +struct lzo_getopt_t { + void *user; + const char *progname; + int bad_option; + char *optarg; + lzo_getopt_opterr_t opterr; + int optind; + int optopt; + int errcount; + int argc; char** argv; + int eof; int shortpos; + int pending_rotate_first, pending_rotate_middle; +}; +enum { LZO_GETOPT_NO_ARG, LZO_GETOPT_REQUIRED_ARG, LZO_GETOPT_OPTIONAL_ARG, LZO_GETOPT_EXACT_ARG = 0x10 }; +enum { LZO_GETOPT_PERMUTE, LZO_GETOPT_RETURN_IN_ORDER, LZO_GETOPT_REQUIRE_ORDER }; +LZOLIB_EXTERN(void, lzo_getopt_init) (lzo_getopt_p g, + int start_argc, int argc, char** argv); +LZOLIB_EXTERN(int, lzo_getopt) (lzo_getopt_p g, + const char* shortopts, + const lzo_getopt_longopt_p longopts, + int* longind); +typedef struct { + lzo_uint32l_t seed; +} lzo_rand31_t; +#ifndef lzo_rand31_p +#define lzo_rand31_p lzo_rand31_t * +#endif +LZOLIB_EXTERN(void, lzo_srand31) (lzo_rand31_p, lzo_uint32l_t); +LZOLIB_EXTERN(lzo_uint32l_t, lzo_rand31) (lzo_rand31_p); +#if defined(lzo_int64l_t) +typedef struct { + lzo_uint64l_t seed; +} lzo_rand48_t; +#ifndef lzo_rand48_p +#define lzo_rand48_p lzo_rand48_t * +#endif +LZOLIB_EXTERN(void, lzo_srand48) (lzo_rand48_p, lzo_uint32l_t); +LZOLIB_EXTERN(lzo_uint32l_t, lzo_rand48) (lzo_rand48_p); +LZOLIB_EXTERN(lzo_uint32l_t, lzo_rand48_r32) (lzo_rand48_p); +#endif +#if defined(lzo_int64l_t) +typedef struct { + lzo_uint64l_t seed; +} lzo_rand64_t; +#ifndef lzo_rand64_p +#define lzo_rand64_p lzo_rand64_t * +#endif +LZOLIB_EXTERN(void, lzo_srand64) (lzo_rand64_p, lzo_uint64l_t); +LZOLIB_EXTERN(lzo_uint32l_t, lzo_rand64) (lzo_rand64_p); +LZOLIB_EXTERN(lzo_uint32l_t, lzo_rand64_r32) (lzo_rand64_p); +#endif +typedef struct { + unsigned n; + lzo_uint32l_t s[624]; +} lzo_randmt_t; +#ifndef lzo_randmt_p +#define lzo_randmt_p lzo_randmt_t * +#endif +LZOLIB_EXTERN(void, lzo_srandmt) (lzo_randmt_p, lzo_uint32l_t); +LZOLIB_EXTERN(lzo_uint32l_t, lzo_randmt) (lzo_randmt_p); +LZOLIB_EXTERN(lzo_uint32l_t, lzo_randmt_r32) (lzo_randmt_p); +#if defined(lzo_int64l_t) +typedef struct { + unsigned n; + lzo_uint64l_t s[312]; +} lzo_randmt64_t; +#ifndef lzo_randmt64_p +#define lzo_randmt64_p lzo_randmt64_t * +#endif +LZOLIB_EXTERN(void, lzo_srandmt64) (lzo_randmt64_p, lzo_uint64l_t); +LZOLIB_EXTERN(lzo_uint64l_t, lzo_randmt64_r64) (lzo_randmt64_p); +#endif +#define LZO_SPAWN_P_WAIT 0 +#define LZO_SPAWN_P_NOWAIT 1 +LZOLIB_EXTERN(int, lzo_spawnv) (int mode, const char* fn, const char* const * argv); +LZOLIB_EXTERN(int, lzo_spawnvp) (int mode, const char* fn, const char* const * argv); +LZOLIB_EXTERN(int, lzo_spawnve) (int mode, const char* fn, const char* const * argv, const char * const envp); +#endif +#endif +#if defined(LZO_WANT_ACC_CXX_H) +# undef LZO_WANT_ACC_CXX_H +#ifndef __LZO_CXX_H_INCLUDED +#define __LZO_CXX_H_INCLUDED 1 +#if defined(__cplusplus) +#if defined(LZO_CXX_NOTHROW) +#elif (LZO_CC_GNUC && (LZO_CC_GNUC < 0x020800ul)) +#elif (LZO_CC_BORLANDC && (__BORLANDC__ < 0x0450)) +#elif (LZO_CC_GHS && !defined(__EXCEPTIONS)) +#elif (LZO_CC_HIGHC) +#elif (LZO_CC_MSC && (_MSC_VER < 1100)) +#elif (LZO_CC_NDPC) +#elif (LZO_CC_TURBOC) +#elif (LZO_CC_WATCOMC && !defined(_CPPUNWIND)) +#elif (LZO_CC_ZORTECHC) +#else +# define LZO_CXX_NOTHROW throw() +#endif +#if !defined(LZO_CXX_NOTHROW) +# define LZO_CXX_NOTHROW /*empty*/ +#endif +#if defined(__LZO_CXX_DO_NEW) +#elif (LZO_CC_GHS || LZO_CC_NDPC || LZO_CC_PGI) +# define __LZO_CXX_DO_NEW { return 0; } +#elif ((LZO_CC_BORLANDC || LZO_CC_TURBOC) && LZO_ARCH_I086) +# define __LZO_CXX_DO_NEW { return 0; } +#else +# define __LZO_CXX_DO_NEW ; +#endif +#if defined(__LZO_CXX_DO_DELETE) +#elif (LZO_CC_BORLANDC || LZO_CC_TURBOC) +# define __LZO_CXX_DO_DELETE { } +#else +# define __LZO_CXX_DO_DELETE LZO_CXX_NOTHROW { } +#endif +#if (LZO_CC_BORLANDC && (__BORLANDC__ < 0x0450)) +#elif (LZO_CC_MSC && LZO_MM_HUGE) +# define LZO_CXX_DISABLE_NEW_DELETE private: +#elif (LZO_CC_MSC && (_MSC_VER < 1100)) +#elif (LZO_CC_NDPC) +#elif (LZO_CC_SYMANTECC || LZO_CC_ZORTECHC) +#elif (LZO_CC_TURBOC) +#elif (LZO_CC_WATCOMC && (__WATCOMC__ < 1100)) +#else +# define __LZO_CXX_HAVE_ARRAY_NEW 1 +#endif +#if (__LZO_CXX_HAVE_ARRAY_NEW) +# define __LZO_CXX_HAVE_PLACEMENT_NEW 1 +#endif +#if (__LZO_CXX_HAVE_PLACEMENT_NEW) +# if (LZO_CC_GNUC >= 0x030000ul) +# define __LZO_CXX_HAVE_PLACEMENT_DELETE 1 +# elif (LZO_CC_INTELC) +# define __LZO_CXX_HAVE_PLACEMENT_DELETE 1 +# elif (LZO_CC_MSC && (_MSC_VER >= 1200)) +# define __LZO_CXX_HAVE_PLACEMENT_DELETE 1 +# elif (LZO_CC_CLANG || LZO_CC_LLVM || LZO_CC_PATHSCALE) +# define __LZO_CXX_HAVE_PLACEMENT_DELETE 1 +# elif (LZO_CC_PGI) +# define __LZO_CXX_HAVE_PLACEMENT_DELETE 1 +# endif +#endif +#if defined(LZO_CXX_DISABLE_NEW_DELETE) +#elif defined(new) || defined(delete) +# define LZO_CXX_DISABLE_NEW_DELETE private: +#elif (LZO_CC_GNUC && (LZO_CC_GNUC < 0x025b00ul)) +# define LZO_CXX_DISABLE_NEW_DELETE private: +#elif (LZO_CC_HIGHC) +# define LZO_CXX_DISABLE_NEW_DELETE private: +#elif !(__LZO_CXX_HAVE_ARRAY_NEW) +# define LZO_CXX_DISABLE_NEW_DELETE \ + protected: static void operator delete(void*) __LZO_CXX_DO_DELETE \ + protected: static void* operator new(size_t) __LZO_CXX_DO_NEW \ + private: +#else +# define LZO_CXX_DISABLE_NEW_DELETE \ + protected: static void operator delete(void*) __LZO_CXX_DO_DELETE \ + static void operator delete[](void*) __LZO_CXX_DO_DELETE \ + private: static void* operator new(size_t) __LZO_CXX_DO_NEW \ + static void* operator new[](size_t) __LZO_CXX_DO_NEW +#endif +#if defined(LZO_CXX_TRIGGER_FUNCTION) +#else +# define LZO_CXX_TRIGGER_FUNCTION \ + protected: virtual const void* lzo_cxx_trigger_function() const; \ + private: +#endif +#if defined(LZO_CXX_TRIGGER_FUNCTION_IMPL) +#else +# define LZO_CXX_TRIGGER_FUNCTION_IMPL(klass) \ + const void* klass::lzo_cxx_trigger_function() const { return LZO_STATIC_CAST(const void *, 0); } +#endif +#endif +#endif +#endif +#if defined(LZO_WANT_ACC_CHK_CH) +# undef LZO_WANT_ACC_CHK_CH +#if !defined(LZOCHK_ASSERT) +# define LZOCHK_ASSERT(expr) LZO_COMPILE_TIME_ASSERT_HEADER(expr) +#endif +#if !defined(LZOCHK_ASSERT_SIGN_T) +# define LZOCHK_ASSERT_SIGN_T(type,relop) \ + LZOCHK_ASSERT( LZO_STATIC_CAST(type, -1) relop LZO_STATIC_CAST(type, 0)) \ + LZOCHK_ASSERT( LZO_STATIC_CAST(type, ~LZO_STATIC_CAST(type, 0)) relop LZO_STATIC_CAST(type, 0)) \ + LZOCHK_ASSERT( LZO_STATIC_CAST(type, ~LZO_STATIC_CAST(type, 0)) == LZO_STATIC_CAST(type, -1)) +#endif +#if !defined(LZOCHK_ASSERT_IS_SIGNED_T) +# define LZOCHK_ASSERT_IS_SIGNED_T(type) LZOCHK_ASSERT_SIGN_T(type,<) +#endif +#if !defined(LZOCHK_ASSERT_IS_UNSIGNED_T) +# if (LZO_BROKEN_INTEGRAL_PROMOTION) +# define LZOCHK_ASSERT_IS_UNSIGNED_T(type) \ + LZOCHK_ASSERT( LZO_STATIC_CAST(type, -1) > LZO_STATIC_CAST(type, 0) ) +# else +# define LZOCHK_ASSERT_IS_UNSIGNED_T(type) LZOCHK_ASSERT_SIGN_T(type,>) +# endif +#endif +#if defined(LZOCHK_CFG_PEDANTIC) +#if (LZO_CC_BORLANDC && (__BORLANDC__ >= 0x0550) && (__BORLANDC__ < 0x0560)) +# pragma option push -w-8055 +#elif (LZO_CC_BORLANDC && (__BORLANDC__ >= 0x0530) && (__BORLANDC__ < 0x0550)) +# pragma option push -w-osh +#endif +#endif +#if (LZO_0xffffffffL - LZO_UINT32_C(4294967294) != 1) +# error "preprocessor error" +#endif +#if (LZO_0xffffffffL - LZO_UINT32_C(0xfffffffd) != 2) +# error "preprocessor error" +#endif +#if +0 +# error "preprocessor error" +#endif +#if -0 +# error "preprocessor error" +#endif +#if +0 != 0 +# error "preprocessor error" +#endif +#if -0 != 0 +# error "preprocessor error" +#endif +#define LZOCHK_VAL 1 +#define LZOCHK_TMP1 LZOCHK_VAL +#undef LZOCHK_VAL +#define LZOCHK_VAL 2 +#define LZOCHK_TMP2 LZOCHK_VAL +#if (LZOCHK_TMP1 != 2) +# error "preprocessor error 3a" +#endif +#if (LZOCHK_TMP2 != 2) +# error "preprocessor error 3b" +#endif +#undef LZOCHK_VAL +#if (LZOCHK_TMP2) +# error "preprocessor error 3c" +#endif +#if (LZOCHK_TMP2 + 0 != 0) +# error "preprocessor error 3d" +#endif +#undef LZOCHK_TMP1 +#undef LZOCHK_TMP2 +#if 0 || defined(LZOCHK_CFG_PEDANTIC) +# if (LZO_ARCH_MIPS) && defined(_MIPS_SZINT) + LZOCHK_ASSERT((_MIPS_SZINT) == 8 * sizeof(int)) +# endif +# if (LZO_ARCH_MIPS) && defined(_MIPS_SZLONG) + LZOCHK_ASSERT((_MIPS_SZLONG) == 8 * sizeof(long)) +# endif +# if (LZO_ARCH_MIPS) && defined(_MIPS_SZPTR) + LZOCHK_ASSERT((_MIPS_SZPTR) == 8 * sizeof(void *)) +# endif +#endif + LZOCHK_ASSERT(1 == 1) + LZOCHK_ASSERT(__LZO_MASK_GEN(1u,1) == 1u) + LZOCHK_ASSERT(__LZO_MASK_GEN(1u,2) == 3u) + LZOCHK_ASSERT(__LZO_MASK_GEN(1u,3) == 7u) + LZOCHK_ASSERT(__LZO_MASK_GEN(1u,8) == 255u) +#if (LZO_SIZEOF_INT >= 2) + LZOCHK_ASSERT(__LZO_MASK_GEN(1,15) == 32767) + LZOCHK_ASSERT(__LZO_MASK_GEN(1u,16) == 0xffffU) + LZOCHK_ASSERT(__LZO_MASK_GEN(0u,16) == 0u) +#endif + LZOCHK_ASSERT(__LZO_MASK_GEN(1ul,16) == 0xffffUL) + LZOCHK_ASSERT(__LZO_MASK_GEN(0ul,16) == 0ul) +#if (LZO_SIZEOF_INT >= 4) + LZOCHK_ASSERT(__LZO_MASK_GEN(1,31) == 2147483647) + LZOCHK_ASSERT(__LZO_MASK_GEN(1u,32) == 0xffffffffU) + LZOCHK_ASSERT(__LZO_MASK_GEN(0u,32) == 0u) +#endif +#if (LZO_SIZEOF_LONG >= 4) + LZOCHK_ASSERT(__LZO_MASK_GEN(1ul,32) == 0xffffffffUL) + LZOCHK_ASSERT(__LZO_MASK_GEN(0ul,32) == 0ul) +#endif +#if (LZO_SIZEOF_LONG >= 8) + LZOCHK_ASSERT(__LZO_MASK_GEN(1ul,64) == 0xffffffffffffffffUL) + LZOCHK_ASSERT(__LZO_MASK_GEN(0ul,64) == 0ul) +#endif +#if !(LZO_BROKEN_INTEGRAL_PROMOTION) + LZOCHK_ASSERT(__LZO_MASK_GEN(1u,LZO_SIZEOF_INT*8) == ~0u) + LZOCHK_ASSERT(__LZO_MASK_GEN(1ul,LZO_SIZEOF_LONG*8) == ~0ul) +#endif +#if 1 + LZOCHK_ASSERT(__LZO_MASK_GEN(0,0) == 0) + LZOCHK_ASSERT(__LZO_MASK_GEN(1,0) == 0) + LZOCHK_ASSERT(__LZO_MASK_GEN(2,0) == 0) + LZOCHK_ASSERT(__LZO_MASK_GEN(4,0) == 0) +#endif +#if 1 + LZOCHK_ASSERT(__LZO_MASK_GEN(2,1) == 2) + LZOCHK_ASSERT(__LZO_MASK_GEN(4,1) == 4) + LZOCHK_ASSERT(__LZO_MASK_GEN(8,1) == 8) + LZOCHK_ASSERT(__LZO_MASK_GEN(2,2) == 2+4) + LZOCHK_ASSERT(__LZO_MASK_GEN(4,2) == 4+8) + LZOCHK_ASSERT(__LZO_MASK_GEN(8,2) == 8+16) + LZOCHK_ASSERT(__LZO_MASK_GEN(2,3) == 2+4+8) + LZOCHK_ASSERT(__LZO_MASK_GEN(4,3) == 4+8+16) + LZOCHK_ASSERT(__LZO_MASK_GEN(8,3) == 8+16+32) + LZOCHK_ASSERT(__LZO_MASK_GEN(7,1) == 7) + LZOCHK_ASSERT(__LZO_MASK_GEN(7,2) == 7+14) + LZOCHK_ASSERT(__LZO_MASK_GEN(7,3) == 7+14+28) +#endif +#if !(LZO_BROKEN_SIGNED_RIGHT_SHIFT) + LZOCHK_ASSERT(((-1) >> 7) == -1) +#endif + LZOCHK_ASSERT(((1) >> 7) == 0) +#if (LZO_CC_INTELC && (__INTEL_COMPILER >= 900)) +# pragma warning(push) +# pragma warning(disable: 1025) +#endif + LZOCHK_ASSERT((~0l & ~0) == ~0l) + LZOCHK_ASSERT((~0l & ~0u) == ~0u) + LZOCHK_ASSERT((~0ul & ~0) == ~0ul) + LZOCHK_ASSERT((~0ul & ~0u) == ~0u) +#if defined(__MSDOS__) && defined(__TURBOC__) && (__TURBOC__ < 0x0150) +#elif (LZO_SIZEOF_INT == 2) + LZOCHK_ASSERT((~0l & ~0u) == 0xffffU) + LZOCHK_ASSERT((~0ul & ~0u) == 0xffffU) +#elif (LZO_SIZEOF_INT == 4) + LZOCHK_ASSERT((~0l & ~0u) == 0xffffffffU) + LZOCHK_ASSERT((~0ul & ~0u) == 0xffffffffU) +#endif +#if (LZO_CC_INTELC && (__INTEL_COMPILER >= 900)) +# pragma warning(pop) +#endif + LZOCHK_ASSERT_IS_SIGNED_T(signed char) + LZOCHK_ASSERT_IS_UNSIGNED_T(unsigned char) + LZOCHK_ASSERT(sizeof(signed char) == sizeof(char)) + LZOCHK_ASSERT(sizeof(unsigned char) == sizeof(char)) + LZOCHK_ASSERT(sizeof(char) == 1) +#if (LZO_CC_CILLY) && (!defined(__CILLY__) || (__CILLY__ < 0x010302L)) +#else + LZOCHK_ASSERT(sizeof(char) == sizeof(LZO_STATIC_CAST(char, 0))) +#endif +#if defined(__cplusplus) + LZOCHK_ASSERT(sizeof('\0') == sizeof(char)) +#else +# if (LZO_CC_DMC) +# else + LZOCHK_ASSERT(sizeof('\0') == sizeof(int)) +# endif +#endif +#if defined(__lzo_alignof) + LZOCHK_ASSERT(__lzo_alignof(char) == 1) + LZOCHK_ASSERT(__lzo_alignof(signed char) == 1) + LZOCHK_ASSERT(__lzo_alignof(unsigned char) == 1) +#if defined(lzo_int16e_t) + LZOCHK_ASSERT(__lzo_alignof(lzo_int16e_t) >= 1) + LZOCHK_ASSERT(__lzo_alignof(lzo_int16e_t) <= 2) +#endif +#if defined(lzo_int32e_t) + LZOCHK_ASSERT(__lzo_alignof(lzo_int32e_t) >= 1) + LZOCHK_ASSERT(__lzo_alignof(lzo_int32e_t) <= 4) +#endif +#endif + LZOCHK_ASSERT_IS_SIGNED_T(short) + LZOCHK_ASSERT_IS_UNSIGNED_T(unsigned short) + LZOCHK_ASSERT(sizeof(short) == sizeof(unsigned short)) +#if !(LZO_ABI_I8LP16) + LZOCHK_ASSERT(sizeof(short) >= 2) +#endif + LZOCHK_ASSERT(sizeof(short) >= sizeof(char)) +#if (LZO_CC_CILLY) && (!defined(__CILLY__) || (__CILLY__ < 0x010302L)) +#else + LZOCHK_ASSERT(sizeof(short) == sizeof(LZO_STATIC_CAST(short, 0))) +#endif +#if (LZO_SIZEOF_SHORT > 0) + LZOCHK_ASSERT(sizeof(short) == LZO_SIZEOF_SHORT) +#endif + LZOCHK_ASSERT_IS_SIGNED_T(int) + LZOCHK_ASSERT_IS_UNSIGNED_T(unsigned int) + LZOCHK_ASSERT(sizeof(int) == sizeof(unsigned int)) +#if !(LZO_ABI_I8LP16) + LZOCHK_ASSERT(sizeof(int) >= 2) +#endif + LZOCHK_ASSERT(sizeof(int) >= sizeof(short)) + LZOCHK_ASSERT(sizeof(int) == sizeof(0)) + LZOCHK_ASSERT(sizeof(int) == sizeof(LZO_STATIC_CAST(int, 0))) +#if (LZO_SIZEOF_INT > 0) + LZOCHK_ASSERT(sizeof(int) == LZO_SIZEOF_INT) +#endif + LZOCHK_ASSERT(sizeof(0) == sizeof(int)) + LZOCHK_ASSERT_IS_SIGNED_T(long) + LZOCHK_ASSERT_IS_UNSIGNED_T(unsigned long) + LZOCHK_ASSERT(sizeof(long) == sizeof(unsigned long)) +#if !(LZO_ABI_I8LP16) + LZOCHK_ASSERT(sizeof(long) >= 4) +#endif + LZOCHK_ASSERT(sizeof(long) >= sizeof(int)) + LZOCHK_ASSERT(sizeof(long) == sizeof(0L)) + LZOCHK_ASSERT(sizeof(long) == sizeof(LZO_STATIC_CAST(long, 0))) +#if (LZO_SIZEOF_LONG > 0) + LZOCHK_ASSERT(sizeof(long) == LZO_SIZEOF_LONG) +#endif + LZOCHK_ASSERT(sizeof(0L) == sizeof(long)) + LZOCHK_ASSERT_IS_UNSIGNED_T(size_t) + LZOCHK_ASSERT(sizeof(size_t) >= sizeof(int)) + LZOCHK_ASSERT(sizeof(size_t) == sizeof(sizeof(0))) +#if (LZO_SIZEOF_SIZE_T > 0) + LZOCHK_ASSERT(sizeof(size_t) == LZO_SIZEOF_SIZE_T) +#endif + LZOCHK_ASSERT_IS_SIGNED_T(ptrdiff_t) + LZOCHK_ASSERT(sizeof(ptrdiff_t) >= sizeof(int)) + LZOCHK_ASSERT(sizeof(ptrdiff_t) >= sizeof(size_t)) +#if !(LZO_BROKEN_SIZEOF) + LZOCHK_ASSERT(sizeof(ptrdiff_t) == sizeof(LZO_STATIC_CAST(char*, 0) - LZO_STATIC_CAST(char*, 0))) +# if (LZO_HAVE_MM_HUGE_PTR) + LZOCHK_ASSERT(4 == sizeof(LZO_STATIC_CAST(char __huge*, 0) - LZO_STATIC_CAST(char __huge*, 0))) +# endif +#endif +#if (LZO_SIZEOF_PTRDIFF_T > 0) + LZOCHK_ASSERT(sizeof(ptrdiff_t) == LZO_SIZEOF_PTRDIFF_T) +#endif + LZOCHK_ASSERT(sizeof(void*) >= sizeof(char*)) +#if (LZO_SIZEOF_VOID_P > 0) + LZOCHK_ASSERT(sizeof(void*) == LZO_SIZEOF_VOID_P) + LZOCHK_ASSERT(sizeof(char*) == LZO_SIZEOF_VOID_P) +#endif +#if (LZO_HAVE_MM_HUGE_PTR) + LZOCHK_ASSERT(4 == sizeof(void __huge*)) + LZOCHK_ASSERT(4 == sizeof(char __huge*)) +#endif +#if (LZO_ABI_I8LP16) + LZOCHK_ASSERT((((1u << 7) + 1) >> 7) == 1) + LZOCHK_ASSERT((((1ul << 15) + 1) >> 15) == 1) +#else + LZOCHK_ASSERT((((1u << 15) + 1) >> 15) == 1) + LZOCHK_ASSERT((((1ul << 31) + 1) >> 31) == 1) +#endif +#if defined(LZOCHK_CFG_PEDANTIC) +#if defined(__MSDOS__) && defined(__TURBOC__) && (__TURBOC__ < 0x0150) +#else + LZOCHK_ASSERT((1 << (8*LZO_SIZEOF_INT-1)) < 0) +#endif +#endif + LZOCHK_ASSERT((1u << (8*LZO_SIZEOF_INT-1)) > 0) +#if defined(LZOCHK_CFG_PEDANTIC) + LZOCHK_ASSERT((1l << (8*LZO_SIZEOF_LONG-1)) < 0) +#endif + LZOCHK_ASSERT((1ul << (8*LZO_SIZEOF_LONG-1)) > 0) +#if defined(lzo_int16e_t) + LZOCHK_ASSERT(sizeof(lzo_int16e_t) == 2) + LZOCHK_ASSERT(sizeof(lzo_int16e_t) == LZO_SIZEOF_LZO_INT16E_T) + LZOCHK_ASSERT(sizeof(lzo_uint16e_t) == 2) + LZOCHK_ASSERT(sizeof(lzo_int16e_t) == sizeof(lzo_uint16e_t)) + LZOCHK_ASSERT_IS_SIGNED_T(lzo_int16e_t) + LZOCHK_ASSERT_IS_UNSIGNED_T(lzo_uint16e_t) +#if defined(__MSDOS__) && defined(__TURBOC__) && (__TURBOC__ < 0x0150) +#else + LZOCHK_ASSERT((LZO_STATIC_CAST(lzo_uint16e_t, (~LZO_STATIC_CAST(lzo_uint16e_t,0ul))) >> 15) == 1) +#endif + LZOCHK_ASSERT( LZO_STATIC_CAST(lzo_int16e_t, (1 + ~LZO_STATIC_CAST(lzo_int16e_t, 0))) == 0) +#if defined(LZOCHK_CFG_PEDANTIC) + LZOCHK_ASSERT( LZO_STATIC_CAST(lzo_uint16e_t, (1 + ~LZO_STATIC_CAST(lzo_uint16e_t, 0))) == 0) +#endif +#endif +#if defined(lzo_int32e_t) + LZOCHK_ASSERT(sizeof(lzo_int32e_t) == 4) + LZOCHK_ASSERT(sizeof(lzo_int32e_t) == LZO_SIZEOF_LZO_INT32E_T) + LZOCHK_ASSERT(sizeof(lzo_uint32e_t) == 4) + LZOCHK_ASSERT(sizeof(lzo_int32e_t) == sizeof(lzo_uint32e_t)) + LZOCHK_ASSERT_IS_SIGNED_T(lzo_int32e_t) + LZOCHK_ASSERT(((( LZO_STATIC_CAST(lzo_int32e_t, 1) << 30) + 1) >> 30) == 1) + LZOCHK_ASSERT_IS_UNSIGNED_T(lzo_uint32e_t) + LZOCHK_ASSERT(((( LZO_STATIC_CAST(lzo_uint32e_t, 1) << 31) + 1) >> 31) == 1) + LZOCHK_ASSERT((LZO_STATIC_CAST(lzo_uint32e_t, (~LZO_STATIC_CAST(lzo_uint32e_t, 0ul))) >> 31) == 1) + LZOCHK_ASSERT( LZO_STATIC_CAST(lzo_int32e_t, (1 + ~LZO_STATIC_CAST(lzo_int32e_t, 0))) == 0) +#if defined(LZOCHK_CFG_PEDANTIC) + LZOCHK_ASSERT( LZO_STATIC_CAST(lzo_uint32e_t, (1 + ~LZO_STATIC_CAST(lzo_uint32e_t, 0))) == 0) +#endif +#endif +#if defined(lzo_int32e_t) + LZOCHK_ASSERT(sizeof(lzo_int32l_t) >= sizeof(lzo_int32e_t)) +#endif + LZOCHK_ASSERT(sizeof(lzo_int32l_t) >= 4) + LZOCHK_ASSERT(sizeof(lzo_int32l_t) == LZO_SIZEOF_LZO_INT32L_T) + LZOCHK_ASSERT(sizeof(lzo_uint32l_t) >= 4) + LZOCHK_ASSERT(sizeof(lzo_int32l_t) == sizeof(lzo_uint32l_t)) + LZOCHK_ASSERT_IS_SIGNED_T(lzo_int32l_t) + LZOCHK_ASSERT(((( LZO_STATIC_CAST(lzo_int32l_t, 1) << 30) + 1) >> 30) == 1) + LZOCHK_ASSERT_IS_UNSIGNED_T(lzo_uint32l_t) + LZOCHK_ASSERT(((( LZO_STATIC_CAST(lzo_uint32l_t, 1) << 31) + 1) >> 31) == 1) + LZOCHK_ASSERT(sizeof(lzo_int32f_t) >= sizeof(int)) +#if defined(lzo_int32e_t) + LZOCHK_ASSERT(sizeof(lzo_int32f_t) >= sizeof(lzo_int32e_t)) +#endif + LZOCHK_ASSERT(sizeof(lzo_int32f_t) >= sizeof(lzo_int32l_t)) + LZOCHK_ASSERT(sizeof(lzo_int32f_t) >= 4) + LZOCHK_ASSERT(sizeof(lzo_int32f_t) >= sizeof(lzo_int32l_t)) + LZOCHK_ASSERT(sizeof(lzo_int32f_t) == LZO_SIZEOF_LZO_INT32F_T) + LZOCHK_ASSERT(sizeof(lzo_uint32f_t) >= 4) + LZOCHK_ASSERT(sizeof(lzo_uint32f_t) >= sizeof(lzo_uint32l_t)) + LZOCHK_ASSERT(sizeof(lzo_int32f_t) == sizeof(lzo_uint32f_t)) + LZOCHK_ASSERT_IS_SIGNED_T(lzo_int32f_t) + LZOCHK_ASSERT(((( LZO_STATIC_CAST(lzo_int32f_t, 1) << 30) + 1) >> 30) == 1) + LZOCHK_ASSERT_IS_UNSIGNED_T(lzo_uint32f_t) + LZOCHK_ASSERT(((( LZO_STATIC_CAST(lzo_uint32f_t, 1) << 31) + 1) >> 31) == 1) +#if defined(lzo_int64e_t) + LZOCHK_ASSERT(sizeof(lzo_int64e_t) == 8) + LZOCHK_ASSERT(sizeof(lzo_int64e_t) == LZO_SIZEOF_LZO_INT64E_T) + LZOCHK_ASSERT(sizeof(lzo_uint64e_t) == 8) + LZOCHK_ASSERT(sizeof(lzo_int64e_t) == sizeof(lzo_uint64e_t)) + LZOCHK_ASSERT_IS_SIGNED_T(lzo_int64e_t) +#if (LZO_CC_BORLANDC && (__BORLANDC__ < 0x0530)) +#else + LZOCHK_ASSERT_IS_UNSIGNED_T(lzo_uint64e_t) +#endif +#endif +#if defined(lzo_int64l_t) +#if defined(lzo_int64e_t) + LZOCHK_ASSERT(sizeof(lzo_int64l_t) >= sizeof(lzo_int64e_t)) +#endif + LZOCHK_ASSERT(sizeof(lzo_int64l_t) >= 8) + LZOCHK_ASSERT(sizeof(lzo_int64l_t) == LZO_SIZEOF_LZO_INT64L_T) + LZOCHK_ASSERT(sizeof(lzo_uint64l_t) >= 8) + LZOCHK_ASSERT(sizeof(lzo_int64l_t) == sizeof(lzo_uint64l_t)) + LZOCHK_ASSERT_IS_SIGNED_T(lzo_int64l_t) + LZOCHK_ASSERT(((( LZO_STATIC_CAST(lzo_int64l_t, 1) << 62) + 1) >> 62) == 1) + LZOCHK_ASSERT(((( LZO_INT64_C(1) << 62) + 1) >> 62) == 1) +#if (LZO_CC_BORLANDC && (__BORLANDC__ < 0x0530)) +#else + LZOCHK_ASSERT_IS_UNSIGNED_T(lzo_uint64l_t) + LZOCHK_ASSERT(LZO_UINT64_C(18446744073709551615) > 0) +#endif + LZOCHK_ASSERT(((( LZO_STATIC_CAST(lzo_uint64l_t, 1) << 63) + 1) >> 63) == 1) + LZOCHK_ASSERT(((( LZO_UINT64_C(1) << 63) + 1) >> 63) == 1) +#if (LZO_CC_GNUC && (LZO_CC_GNUC < 0x020600ul)) + LZOCHK_ASSERT(LZO_INT64_C(9223372036854775807) > LZO_INT64_C(0)) +#else + LZOCHK_ASSERT(LZO_INT64_C(9223372036854775807) > 0) +#endif + LZOCHK_ASSERT(LZO_INT64_C(-9223372036854775807) - 1 < 0) + LZOCHK_ASSERT( LZO_INT64_C(9223372036854775807) % LZO_INT32_C(2147483629) == 721) + LZOCHK_ASSERT( LZO_INT64_C(9223372036854775807) % LZO_INT32_C(2147483647) == 1) + LZOCHK_ASSERT(LZO_UINT64_C(9223372036854775807) % LZO_UINT32_C(2147483629) == 721) + LZOCHK_ASSERT(LZO_UINT64_C(9223372036854775807) % LZO_UINT32_C(2147483647) == 1) +#endif +#if defined(lzo_int64f_t) +#if defined(lzo_int64e_t) + LZOCHK_ASSERT(sizeof(lzo_int64f_t) >= sizeof(lzo_int64e_t)) +#endif + LZOCHK_ASSERT(sizeof(lzo_int64f_t) >= sizeof(lzo_int64l_t)) + LZOCHK_ASSERT(sizeof(lzo_int64f_t) >= 8) + LZOCHK_ASSERT(sizeof(lzo_int64f_t) >= sizeof(lzo_int64l_t)) + LZOCHK_ASSERT(sizeof(lzo_int64f_t) == LZO_SIZEOF_LZO_INT64F_T) + LZOCHK_ASSERT(sizeof(lzo_uint64f_t) >= 8) + LZOCHK_ASSERT(sizeof(lzo_uint64f_t) >= sizeof(lzo_uint64l_t)) + LZOCHK_ASSERT(sizeof(lzo_int64f_t) == sizeof(lzo_uint64f_t)) + LZOCHK_ASSERT_IS_SIGNED_T(lzo_int64f_t) +#if (LZO_CC_BORLANDC && (__BORLANDC__ < 0x0530)) +#else + LZOCHK_ASSERT_IS_UNSIGNED_T(lzo_uint64f_t) +#endif +#endif +#if !defined(__LZO_INTPTR_T_IS_POINTER) + LZOCHK_ASSERT_IS_SIGNED_T(lzo_intptr_t) + LZOCHK_ASSERT_IS_UNSIGNED_T(lzo_uintptr_t) +#endif + LZOCHK_ASSERT(sizeof(lzo_intptr_t) >= sizeof(void *)) + LZOCHK_ASSERT(sizeof(lzo_intptr_t) == LZO_SIZEOF_LZO_INTPTR_T) + LZOCHK_ASSERT(sizeof(lzo_intptr_t) == sizeof(lzo_uintptr_t)) +#if defined(lzo_word_t) + LZOCHK_ASSERT(LZO_WORDSIZE == LZO_SIZEOF_LZO_WORD_T) + LZOCHK_ASSERT_IS_UNSIGNED_T(lzo_word_t) + LZOCHK_ASSERT_IS_SIGNED_T(lzo_sword_t) + LZOCHK_ASSERT(sizeof(lzo_word_t) == LZO_SIZEOF_LZO_WORD_T) + LZOCHK_ASSERT(sizeof(lzo_word_t) == sizeof(lzo_sword_t)) +#endif + LZOCHK_ASSERT(sizeof(lzo_int8_t) == 1) + LZOCHK_ASSERT(sizeof(lzo_uint8_t) == 1) + LZOCHK_ASSERT(sizeof(lzo_int8_t) == sizeof(lzo_uint8_t)) + LZOCHK_ASSERT_IS_SIGNED_T(lzo_int8_t) + LZOCHK_ASSERT_IS_UNSIGNED_T(lzo_uint8_t) +#if defined(LZO_INT16_C) + LZOCHK_ASSERT(sizeof(LZO_INT16_C(0)) >= 2) + LZOCHK_ASSERT(sizeof(LZO_UINT16_C(0)) >= 2) + LZOCHK_ASSERT((LZO_UINT16_C(0xffff) >> 15) == 1) +#endif +#if defined(LZO_INT32_C) + LZOCHK_ASSERT(sizeof(LZO_INT32_C(0)) >= 4) + LZOCHK_ASSERT(sizeof(LZO_UINT32_C(0)) >= 4) + LZOCHK_ASSERT((LZO_UINT32_C(0xffffffff) >> 31) == 1) +#endif +#if defined(LZO_INT64_C) +#if (LZO_CC_BORLANDC && (__BORLANDC__ < 0x0560)) +#else + LZOCHK_ASSERT(sizeof(LZO_INT64_C(0)) >= 8) + LZOCHK_ASSERT(sizeof(LZO_UINT64_C(0)) >= 8) +#endif + LZOCHK_ASSERT((LZO_UINT64_C(0xffffffffffffffff) >> 63) == 1) + LZOCHK_ASSERT((LZO_UINT64_C(0xffffffffffffffff) & ~0) == LZO_UINT64_C(0xffffffffffffffff)) + LZOCHK_ASSERT((LZO_UINT64_C(0xffffffffffffffff) & ~0l) == LZO_UINT64_C(0xffffffffffffffff)) +#if (LZO_SIZEOF_INT == 4) +# if (LZO_CC_GNUC && (LZO_CC_GNUC < 0x020000ul)) +# else + LZOCHK_ASSERT((LZO_UINT64_C(0xffffffffffffffff) & (~0u+0u)) == 0xffffffffu) +# endif +#endif +#if (LZO_SIZEOF_LONG == 4) +# if (LZO_CC_GNUC && (LZO_CC_GNUC < 0x020000ul)) +# else + LZOCHK_ASSERT((LZO_UINT64_C(0xffffffffffffffff) & (~0ul+0ul)) == 0xfffffffful) +# endif +#endif +#endif +#if (LZO_MM_TINY || LZO_MM_SMALL || LZO_MM_MEDIUM) + LZOCHK_ASSERT(sizeof(void*) == 2) + LZOCHK_ASSERT(sizeof(ptrdiff_t) == 2) +#elif (LZO_MM_COMPACT || LZO_MM_LARGE || LZO_MM_HUGE) + LZOCHK_ASSERT(sizeof(void*) == 4) +#endif +#if (LZO_MM_TINY || LZO_MM_SMALL || LZO_MM_COMPACT) + LZOCHK_ASSERT(sizeof(void (*)(void)) == 2) +#elif (LZO_MM_MEDIUM || LZO_MM_LARGE || LZO_MM_HUGE) + LZOCHK_ASSERT(sizeof(void (*)(void)) == 4) +#endif +#if (LZO_ABI_ILP32) + LZOCHK_ASSERT(sizeof(int) == 4) + LZOCHK_ASSERT(sizeof(long) == 4) + LZOCHK_ASSERT(sizeof(void*) == 4) + LZOCHK_ASSERT(sizeof(ptrdiff_t) == sizeof(void*)) + LZOCHK_ASSERT(sizeof(size_t) == sizeof(void*)) + LZOCHK_ASSERT(sizeof(lzo_intptr_t) == sizeof(void *)) +#endif +#if (LZO_ABI_ILP64) + LZOCHK_ASSERT(sizeof(int) == 8) + LZOCHK_ASSERT(sizeof(long) == 8) + LZOCHK_ASSERT(sizeof(void*) == 8) + LZOCHK_ASSERT(sizeof(ptrdiff_t) == sizeof(void*)) + LZOCHK_ASSERT(sizeof(size_t) == sizeof(void*)) + LZOCHK_ASSERT(sizeof(lzo_intptr_t) == sizeof(void *)) +#endif +#if (LZO_ABI_IP32L64) + LZOCHK_ASSERT(sizeof(int) == 4) + LZOCHK_ASSERT(sizeof(long) == 8) + LZOCHK_ASSERT(sizeof(void*) == 4) + LZOCHK_ASSERT(sizeof(ptrdiff_t) == sizeof(void*)) + LZOCHK_ASSERT(sizeof(size_t) == sizeof(void*)) + LZOCHK_ASSERT(sizeof(lzo_intptr_t) == sizeof(void *)) +#endif +#if (LZO_ABI_LLP64) + LZOCHK_ASSERT(sizeof(int) == 4) + LZOCHK_ASSERT(sizeof(long) == 4) + LZOCHK_ASSERT(sizeof(void*) == 8) + LZOCHK_ASSERT(sizeof(ptrdiff_t) == sizeof(void*)) + LZOCHK_ASSERT(sizeof(size_t) == sizeof(void*)) + LZOCHK_ASSERT(sizeof(lzo_intptr_t) == sizeof(void *)) +#endif +#if (LZO_ABI_LP32) + LZOCHK_ASSERT(sizeof(int) == 2) + LZOCHK_ASSERT(sizeof(long) == 4) + LZOCHK_ASSERT(sizeof(void*) == 4) + LZOCHK_ASSERT(sizeof(lzo_intptr_t) == sizeof(void *)) +#endif +#if (LZO_ABI_LP64) + LZOCHK_ASSERT(sizeof(int) == 4) + LZOCHK_ASSERT(sizeof(long) == 8) + LZOCHK_ASSERT(sizeof(void*) == 8) + LZOCHK_ASSERT(sizeof(ptrdiff_t) == sizeof(void*)) + LZOCHK_ASSERT(sizeof(size_t) == sizeof(void*)) + LZOCHK_ASSERT(sizeof(lzo_intptr_t) == sizeof(void *)) +#endif +#if (LZO_ABI_IP32W64) + LZOCHK_ASSERT(sizeof(int) == 4) + LZOCHK_ASSERT(sizeof(void*) == 4) + LZOCHK_ASSERT(sizeof(ptrdiff_t) == sizeof(void*)) + LZOCHK_ASSERT(sizeof(size_t) == sizeof(void*)) + LZOCHK_ASSERT(sizeof(lzo_intptr_t) == sizeof(void *)) + LZOCHK_ASSERT(LZO_WORDSIZE == 8) +#endif +#if (LZO_ARCH_I086) + LZOCHK_ASSERT(sizeof(size_t) == 2) + LZOCHK_ASSERT(sizeof(lzo_intptr_t) == sizeof(void *)) +#elif (LZO_ARCH_I386 || LZO_ARCH_M68K) + LZOCHK_ASSERT(sizeof(size_t) == 4) + LZOCHK_ASSERT(sizeof(ptrdiff_t) == 4) + LZOCHK_ASSERT(sizeof(lzo_intptr_t) == sizeof(void *)) +#endif +#if (LZO_OS_DOS32 || LZO_OS_OS2 || LZO_OS_WIN32) + LZOCHK_ASSERT(sizeof(size_t) == 4) + LZOCHK_ASSERT(sizeof(ptrdiff_t) == 4) + LZOCHK_ASSERT(sizeof(void (*)(void)) == 4) +#elif (LZO_OS_WIN64) + LZOCHK_ASSERT(sizeof(size_t) == 8) + LZOCHK_ASSERT(sizeof(ptrdiff_t) == 8) + LZOCHK_ASSERT(sizeof(void (*)(void)) == 8) +#endif +#if (LZO_CC_NDPC) +#elif (LZO_SIZEOF_INT > 1) + LZOCHK_ASSERT( LZO_STATIC_CAST(int, LZO_STATIC_CAST(unsigned char, LZO_STATIC_CAST(signed char, -1))) == 255) +#endif +#if defined(LZOCHK_CFG_PEDANTIC) +#if (LZO_CC_KEILC) +#elif (LZO_CC_NDPC) +#elif !(LZO_BROKEN_INTEGRAL_PROMOTION) && (LZO_SIZEOF_INT > 1) + LZOCHK_ASSERT( ((LZO_STATIC_CAST(unsigned char, 128)) << LZO_STATIC_CAST(int, (8*sizeof(int)-8))) < 0) +#endif +#endif +#if defined(LZOCHK_CFG_PEDANTIC) +#if (LZO_CC_BORLANDC && (__BORLANDC__ >= 0x0530) && (__BORLANDC__ < 0x0560)) +# pragma option pop +#endif +#endif +#endif +#if defined(LZO_WANT_ACCLIB_VGET) +# undef LZO_WANT_ACCLIB_VGET +#define __LZOLIB_VGET_CH_INCLUDED 1 +#if !defined(LZOLIB_PUBLIC) +# define LZOLIB_PUBLIC(r,f) r __LZOLIB_FUNCNAME(f) +#endif +#if !defined(LZOLIB_PUBLIC_NOINLINE) +# if !defined(__lzo_noinline) +# define LZOLIB_PUBLIC_NOINLINE(r,f) r __LZOLIB_FUNCNAME(f) +# elif (LZO_CC_CLANG || (LZO_CC_GNUC >= 0x030400ul) || LZO_CC_LLVM) +# define LZOLIB_PUBLIC_NOINLINE(r,f) __lzo_noinline __attribute__((__used__)) r __LZOLIB_FUNCNAME(f) +# else +# define LZOLIB_PUBLIC_NOINLINE(r,f) __lzo_noinline r __LZOLIB_FUNCNAME(f) +# endif +#endif +extern void* volatile lzo_vget_ptr__; +#if (LZO_CC_CLANG || (LZO_CC_GNUC >= 0x030400ul) || LZO_CC_LLVM) +void* volatile __attribute__((__used__)) lzo_vget_ptr__ = LZO_STATIC_CAST(void *, 0); +#else +void* volatile lzo_vget_ptr__ = LZO_STATIC_CAST(void *, 0); +#endif +#ifndef __LZOLIB_VGET_BODY +#define __LZOLIB_VGET_BODY(T) \ + if __lzo_unlikely(lzo_vget_ptr__) { \ + typedef T __lzo_may_alias TT; \ + unsigned char e; expr &= 255; e = LZO_STATIC_CAST(unsigned char, expr); \ + * LZO_STATIC_CAST(TT *, lzo_vget_ptr__) = v; \ + * LZO_STATIC_CAST(unsigned char *, lzo_vget_ptr__) = e; \ + v = * LZO_STATIC_CAST(TT *, lzo_vget_ptr__); \ + } \ + return v; +#endif +LZOLIB_PUBLIC_NOINLINE(short, lzo_vget_short) (short v, int expr) +{ + __LZOLIB_VGET_BODY(short) +} +LZOLIB_PUBLIC_NOINLINE(int, lzo_vget_int) (int v, int expr) +{ + __LZOLIB_VGET_BODY(int) +} +LZOLIB_PUBLIC_NOINLINE(long, lzo_vget_long) (long v, int expr) +{ + __LZOLIB_VGET_BODY(long) +} +#if defined(lzo_int64l_t) +LZOLIB_PUBLIC_NOINLINE(lzo_int64l_t, lzo_vget_lzo_int64l_t) (lzo_int64l_t v, int expr) +{ + __LZOLIB_VGET_BODY(lzo_int64l_t) +} +#endif +LZOLIB_PUBLIC_NOINLINE(lzo_hsize_t, lzo_vget_lzo_hsize_t) (lzo_hsize_t v, int expr) +{ + __LZOLIB_VGET_BODY(lzo_hsize_t) +} +#if !(LZO_CFG_NO_DOUBLE) +LZOLIB_PUBLIC_NOINLINE(double, lzo_vget_double) (double v, int expr) +{ + __LZOLIB_VGET_BODY(double) +} +#endif +LZOLIB_PUBLIC_NOINLINE(lzo_hvoid_p, lzo_vget_lzo_hvoid_p) (lzo_hvoid_p v, int expr) +{ + __LZOLIB_VGET_BODY(lzo_hvoid_p) +} +#if (LZO_ARCH_I086 && LZO_CC_TURBOC && (__TURBOC__ == 0x0295)) && !defined(__cplusplus) +LZOLIB_PUBLIC_NOINLINE(lzo_hvoid_p, lzo_vget_lzo_hvoid_cp) (const lzo_hvoid_p vv, int expr) +{ + lzo_hvoid_p v = (lzo_hvoid_p) vv; + __LZOLIB_VGET_BODY(lzo_hvoid_p) +} +#else +LZOLIB_PUBLIC_NOINLINE(const lzo_hvoid_p, lzo_vget_lzo_hvoid_cp) (const lzo_hvoid_p v, int expr) +{ + __LZOLIB_VGET_BODY(const lzo_hvoid_p) +} +#endif +#endif +#if defined(LZO_WANT_ACCLIB_HMEMCPY) +# undef LZO_WANT_ACCLIB_HMEMCPY +#define __LZOLIB_HMEMCPY_CH_INCLUDED 1 +#if !defined(LZOLIB_PUBLIC) +# define LZOLIB_PUBLIC(r,f) r __LZOLIB_FUNCNAME(f) +#endif +LZOLIB_PUBLIC(int, lzo_hmemcmp) (const lzo_hvoid_p s1, const lzo_hvoid_p s2, lzo_hsize_t len) +{ +#if (LZO_HAVE_MM_HUGE_PTR) || !(HAVE_MEMCMP) + const lzo_hbyte_p p1 = LZO_STATIC_CAST(const lzo_hbyte_p, s1); + const lzo_hbyte_p p2 = LZO_STATIC_CAST(const lzo_hbyte_p, s2); + if __lzo_likely(len > 0) do + { + int d = *p1 - *p2; + if (d != 0) + return d; + p1++; p2++; + } while __lzo_likely(--len > 0); + return 0; +#else + return memcmp(s1, s2, len); +#endif +} +LZOLIB_PUBLIC(lzo_hvoid_p, lzo_hmemcpy) (lzo_hvoid_p dest, const lzo_hvoid_p src, lzo_hsize_t len) +{ +#if (LZO_HAVE_MM_HUGE_PTR) || !(HAVE_MEMCPY) + lzo_hbyte_p p1 = LZO_STATIC_CAST(lzo_hbyte_p, dest); + const lzo_hbyte_p p2 = LZO_STATIC_CAST(const lzo_hbyte_p, src); + if (!(len > 0) || p1 == p2) + return dest; + do + *p1++ = *p2++; + while __lzo_likely(--len > 0); + return dest; +#else + return memcpy(dest, src, len); +#endif +} +LZOLIB_PUBLIC(lzo_hvoid_p, lzo_hmemmove) (lzo_hvoid_p dest, const lzo_hvoid_p src, lzo_hsize_t len) +{ +#if (LZO_HAVE_MM_HUGE_PTR) || !(HAVE_MEMMOVE) + lzo_hbyte_p p1 = LZO_STATIC_CAST(lzo_hbyte_p, dest); + const lzo_hbyte_p p2 = LZO_STATIC_CAST(const lzo_hbyte_p, src); + if (!(len > 0) || p1 == p2) + return dest; + if (p1 < p2) + { + do + *p1++ = *p2++; + while __lzo_likely(--len > 0); + } + else + { + p1 += len; + p2 += len; + do + *--p1 = *--p2; + while __lzo_likely(--len > 0); + } + return dest; +#else + return memmove(dest, src, len); +#endif +} +LZOLIB_PUBLIC(lzo_hvoid_p, lzo_hmemset) (lzo_hvoid_p s, int cc, lzo_hsize_t len) +{ +#if (LZO_HAVE_MM_HUGE_PTR) || !(HAVE_MEMSET) + lzo_hbyte_p p = LZO_STATIC_CAST(lzo_hbyte_p, s); + unsigned char c = LZO_ITRUNC(unsigned char, cc); + if __lzo_likely(len > 0) do + *p++ = c; + while __lzo_likely(--len > 0); + return s; +#else + return memset(s, cc, len); +#endif +} +#endif +#if defined(LZO_WANT_ACCLIB_RAND) +# undef LZO_WANT_ACCLIB_RAND +#define __LZOLIB_RAND_CH_INCLUDED 1 +#if !defined(LZOLIB_PUBLIC) +# define LZOLIB_PUBLIC(r,f) r __LZOLIB_FUNCNAME(f) +#endif +LZOLIB_PUBLIC(void, lzo_srand31) (lzo_rand31_p r, lzo_uint32l_t seed) +{ + r->seed = seed & LZO_UINT32_C(0xffffffff); +} +LZOLIB_PUBLIC(lzo_uint32l_t, lzo_rand31) (lzo_rand31_p r) +{ + r->seed = r->seed * LZO_UINT32_C(1103515245) + 12345; + r->seed &= LZO_UINT32_C(0x7fffffff); + return r->seed; +} +#if defined(lzo_int64l_t) +LZOLIB_PUBLIC(void, lzo_srand48) (lzo_rand48_p r, lzo_uint32l_t seed) +{ + r->seed = seed & LZO_UINT32_C(0xffffffff); + r->seed <<= 16; r->seed |= 0x330e; +} +LZOLIB_PUBLIC(lzo_uint32l_t, lzo_rand48) (lzo_rand48_p r) +{ + lzo_uint64l_t a; + r->seed = r->seed * LZO_UINT64_C(25214903917) + 11; + r->seed &= LZO_UINT64_C(0xffffffffffff); + a = r->seed >> 17; + return LZO_STATIC_CAST(lzo_uint32l_t, a); +} +LZOLIB_PUBLIC(lzo_uint32l_t, lzo_rand48_r32) (lzo_rand48_p r) +{ + lzo_uint64l_t a; + r->seed = r->seed * LZO_UINT64_C(25214903917) + 11; + r->seed &= LZO_UINT64_C(0xffffffffffff); + a = r->seed >> 16; + return LZO_STATIC_CAST(lzo_uint32l_t, a); +} +#endif +#if defined(lzo_int64l_t) +LZOLIB_PUBLIC(void, lzo_srand64) (lzo_rand64_p r, lzo_uint64l_t seed) +{ + r->seed = seed & LZO_UINT64_C(0xffffffffffffffff); +} +LZOLIB_PUBLIC(lzo_uint32l_t, lzo_rand64) (lzo_rand64_p r) +{ + lzo_uint64l_t a; + r->seed = r->seed * LZO_UINT64_C(6364136223846793005) + 1; +#if (LZO_SIZEOF_LZO_INT64L_T > 8) + r->seed &= LZO_UINT64_C(0xffffffffffffffff); +#endif + a = r->seed >> 33; + return LZO_STATIC_CAST(lzo_uint32l_t, a); +} +LZOLIB_PUBLIC(lzo_uint32l_t, lzo_rand64_r32) (lzo_rand64_p r) +{ + lzo_uint64l_t a; + r->seed = r->seed * LZO_UINT64_C(6364136223846793005) + 1; +#if (LZO_SIZEOF_LZO_INT64L_T > 8) + r->seed &= LZO_UINT64_C(0xffffffffffffffff); +#endif + a = r->seed >> 32; + return LZO_STATIC_CAST(lzo_uint32l_t, a); +} +#endif +LZOLIB_PUBLIC(void, lzo_srandmt) (lzo_randmt_p r, lzo_uint32l_t seed) +{ + unsigned i = 0; + do { + r->s[i++] = (seed &= LZO_UINT32_C(0xffffffff)); + seed ^= seed >> 30; + seed = seed * LZO_UINT32_C(0x6c078965) + i; + } while (i != 624); + r->n = i; +} +LZOLIB_PUBLIC(lzo_uint32l_t, lzo_randmt) (lzo_randmt_p r) +{ + return (__LZOLIB_FUNCNAME(lzo_randmt_r32)(r)) >> 1; +} +LZOLIB_PUBLIC(lzo_uint32l_t, lzo_randmt_r32) (lzo_randmt_p r) +{ + lzo_uint32l_t v; + if __lzo_unlikely(r->n == 624) { + unsigned i = 0, j; + r->n = 0; + do { + j = i - 623; if (LZO_STATIC_CAST(int, j) < 0) j += 624; + v = (r->s[i] & LZO_UINT32_C(0x80000000)) ^ (r->s[j] & LZO_UINT32_C(0x7fffffff)); + j = i - 227; if (LZO_STATIC_CAST(int, j) < 0) j += 624; + r->s[i] = r->s[j] ^ (v >> 1); + if (v & 1) r->s[i] ^= LZO_UINT32_C(0x9908b0df); + } while (++i != 624); + } + { unsigned i = r->n++; v = r->s[i]; } + v ^= v >> 11; v ^= (v & LZO_UINT32_C(0x013a58ad)) << 7; + v ^= (v & LZO_UINT32_C(0x0001df8c)) << 15; v ^= v >> 18; + return v; +} +#if defined(lzo_int64l_t) +LZOLIB_PUBLIC(void, lzo_srandmt64) (lzo_randmt64_p r, lzo_uint64l_t seed) +{ + unsigned i = 0; + do { + r->s[i++] = (seed &= LZO_UINT64_C(0xffffffffffffffff)); + seed ^= seed >> 62; + seed = seed * LZO_UINT64_C(0x5851f42d4c957f2d) + i; + } while (i != 312); + r->n = i; +} +#if 0 +LZOLIB_PUBLIC(lzo_uint32l_t, lzo_randmt64) (lzo_randmt64_p r) +{ + lzo_uint64l_t v; + v = (__LZOLIB_FUNCNAME(lzo_randmt64_r64)(r)) >> 33; + return LZO_STATIC_CAST(lzo_uint32l_t, v); +} +#endif +LZOLIB_PUBLIC(lzo_uint64l_t, lzo_randmt64_r64) (lzo_randmt64_p r) +{ + lzo_uint64l_t v; + if __lzo_unlikely(r->n == 312) { + unsigned i = 0, j; + r->n = 0; + do { + j = i - 311; if (LZO_STATIC_CAST(int, j) < 0) j += 312; + v = (r->s[i] & LZO_UINT64_C(0xffffffff80000000)) ^ (r->s[j] & LZO_UINT64_C(0x7fffffff)); + j = i - 156; if (LZO_STATIC_CAST(int, j) < 0) j += 312; + r->s[i] = r->s[j] ^ (v >> 1); + if (v & 1) r->s[i] ^= LZO_UINT64_C(0xb5026f5aa96619e9); + } while (++i != 312); + } + { unsigned i = r->n++; v = r->s[i]; } + v ^= (v & LZO_UINT64_C(0xaaaaaaaaa0000000)) >> 29; + v ^= (v & LZO_UINT64_C(0x38eb3ffff6d3)) << 17; + v ^= (v & LZO_UINT64_C(0x7ffbf77)) << 37; + return v ^ (v >> 43); +} +#endif +#endif +#if defined(LZO_WANT_ACCLIB_RDTSC) +# undef LZO_WANT_ACCLIB_RDTSC +#define __LZOLIB_RDTSC_CH_INCLUDED 1 +#if !defined(LZOLIB_PUBLIC) +# define LZOLIB_PUBLIC(r,f) r __LZOLIB_FUNCNAME(f) +#endif +#if defined(lzo_int32e_t) +#if (LZO_OS_WIN32 && LZO_CC_PELLESC && (__POCC__ >= 290)) +# pragma warn(push) +# pragma warn(disable:2007) +#endif +#if (LZO_ARCH_AMD64 || LZO_ARCH_I386) && (LZO_ASM_SYNTAX_GNUC) +#if (LZO_ARCH_AMD64 && LZO_CC_INTELC) +# define __LZOLIB_RDTSC_REGS : : "c" (t) : "memory", "rax", "rdx" +#elif (LZO_ARCH_AMD64) +# define __LZOLIB_RDTSC_REGS : : "c" (t) : "cc", "memory", "rax", "rdx" +#elif (LZO_ARCH_I386 && LZO_CC_GNUC && (LZO_CC_GNUC < 0x020000ul)) +# define __LZOLIB_RDTSC_REGS : : "c" (t) : "ax", "dx" +#elif (LZO_ARCH_I386 && LZO_CC_INTELC) +# define __LZOLIB_RDTSC_REGS : : "c" (t) : "memory", "eax", "edx" +#else +# define __LZOLIB_RDTSC_REGS : : "c" (t) : "cc", "memory", "eax", "edx" +#endif +#endif +LZOLIB_PUBLIC(int, lzo_tsc_read) (lzo_uint32e_t* t) +{ +#if (LZO_ARCH_AMD64 || LZO_ARCH_I386) && (LZO_ASM_SYNTAX_GNUC) + __asm__ __volatile__( + "clc \n" ".byte 0x0f,0x31\n" + "movl %%eax,(%0)\n" "movl %%edx,4(%0)\n" + __LZOLIB_RDTSC_REGS + ); + return 0; +#elif (LZO_ARCH_I386) && (LZO_ASM_SYNTAX_MSC) + LZO_UNUSED(t); + __asm { + mov ecx, t + clc +# if (LZO_CC_MSC && (_MSC_VER < 1200)) + _emit 0x0f + _emit 0x31 +# else + rdtsc +# endif + mov [ecx], eax + mov [ecx+4], edx + } + return 0; +#else + t[0] = t[1] = 0; return -1; +#endif +} +#if (LZO_OS_WIN32 && LZO_CC_PELLESC && (__POCC__ >= 290)) +# pragma warn(pop) +#endif +#endif +#endif +#if defined(LZO_WANT_ACCLIB_DOSALLOC) +# undef LZO_WANT_ACCLIB_DOSALLOC +#define __LZOLIB_DOSALLOC_CH_INCLUDED 1 +#if !defined(LZOLIB_PUBLIC) +# define LZOLIB_PUBLIC(r,f) r __LZOLIB_FUNCNAME(f) +#endif +#if (LZO_OS_OS216) +LZO_EXTERN_C unsigned short __far __pascal DosAllocHuge(unsigned short, unsigned short, unsigned short __far *, unsigned short, unsigned short); +LZO_EXTERN_C unsigned short __far __pascal DosFreeSeg(unsigned short); +#endif +#if (LZO_OS_DOS16 || LZO_OS_WIN16) +#if !(LZO_CC_AZTECC) +LZOLIB_PUBLIC(void __far*, lzo_dos_alloc) (unsigned long size) +{ + void __far* p = 0; + union REGS ri, ro; + if ((long)size <= 0) + return p; + size = (size + 15) >> 4; + if (size > 0xffffu) + return p; + ri.x.ax = 0x4800; + ri.x.bx = (unsigned short) size; + int86(0x21, &ri, &ro); + if ((ro.x.cflag & 1) == 0) + p = (void __far*) LZO_PTR_MK_FP(ro.x.ax, 0); + return p; +} +LZOLIB_PUBLIC(int, lzo_dos_free) (void __far* p) +{ + union REGS ri, ro; + struct SREGS rs; + if (!p) + return 0; + if (LZO_PTR_FP_OFF(p) != 0) + return -1; + segread(&rs); + ri.x.ax = 0x4900; + rs.es = LZO_PTR_FP_SEG(p); + int86x(0x21, &ri, &ro, &rs); + if (ro.x.cflag & 1) + return -1; + return 0; +} +#endif +#endif +#if (LZO_OS_OS216) +LZOLIB_PUBLIC(void __far*, lzo_dos_alloc) (unsigned long size) +{ + void __far* p = 0; + unsigned short sel = 0; + if ((long)size <= 0) + return p; + if (DosAllocHuge((unsigned short)(size >> 16), (unsigned short)size, &sel, 0, 0) == 0) + p = (void __far*) LZO_PTR_MK_FP(sel, 0); + return p; +} +LZOLIB_PUBLIC(int, lzo_dos_free) (void __far* p) +{ + if (!p) + return 0; + if (LZO_PTR_FP_OFF(p) != 0) + return -1; + if (DosFreeSeg(LZO_PTR_FP_SEG(p)) != 0) + return -1; + return 0; +} +#endif +#endif +#if defined(LZO_WANT_ACCLIB_GETOPT) +# undef LZO_WANT_ACCLIB_GETOPT +#define __LZOLIB_GETOPT_CH_INCLUDED 1 +#if !defined(LZOLIB_PUBLIC) +# define LZOLIB_PUBLIC(r,f) r __LZOLIB_FUNCNAME(f) +#endif +LZOLIB_PUBLIC(void, lzo_getopt_init) (lzo_getopt_p g, + int start_argc, int argc, char** argv) +{ + memset(g, 0, sizeof(*g)); + g->optind = start_argc; + g->argc = argc; g->argv = argv; + g->optopt = -1; +} +static int __LZOLIB_FUNCNAME(lzo_getopt_rotate) (char** p, int first, int middle, int last) +{ + int i = middle, n = middle - first; + if (first >= middle || middle >= last) return 0; + for (;;) + { + char* t = p[first]; p[first] = p[i]; p[i] = t; + if (++first == middle) + { + if (++i == last) break; + middle = i; + } + else if (++i == last) + i = middle; + } + return n; +} +static int __LZOLIB_FUNCNAME(lzo_getopt_perror) (lzo_getopt_p g, int ret, const char* f, ...) +{ + if (g->opterr) + { +#if (HAVE_STDARG_H) + struct { va_list ap; } s; + va_start(s.ap, f); + g->opterr(g, f, &s); + va_end(s.ap); +#else + g->opterr(g, f, NULL); +#endif + } + ++g->errcount; + return ret; +} +LZOLIB_PUBLIC(int, lzo_getopt) (lzo_getopt_p g, + const char* shortopts, + const lzo_getopt_longopt_p longopts, + int* longind) +{ +#define pe __LZOLIB_FUNCNAME(lzo_getopt_perror) + int ordering = LZO_GETOPT_PERMUTE; + int missing_arg_ret = g->bad_option; + char* a; + if (shortopts) + { + if (*shortopts == '-' || *shortopts == '+') + ordering = *shortopts++ == '-' ? LZO_GETOPT_RETURN_IN_ORDER : LZO_GETOPT_REQUIRE_ORDER; + if (*shortopts == ':') + missing_arg_ret = *shortopts++; + } + g->optarg = NULL; + if (g->optopt == -1) + g->optopt = g->bad_option; + if (longind) + *longind = -1; + if (g->eof) + return -1; + if (g->shortpos) + goto lzo_label_next_shortopt; + g->optind -= __LZOLIB_FUNCNAME(lzo_getopt_rotate)(g->argv, g->pending_rotate_first, g->pending_rotate_middle, g->optind); + g->pending_rotate_first = g->pending_rotate_middle = g->optind; + if (ordering == LZO_GETOPT_PERMUTE) + { + while (g->optind < g->argc && !(g->argv[g->optind][0] == '-' && g->argv[g->optind][1])) + ++g->optind; + g->pending_rotate_middle = g->optind; + } + if (g->optind >= g->argc) + { + g->optind = g->pending_rotate_first; + goto lzo_label_eof; + } + a = g->argv[g->optind]; + if (a[0] == '-' && a[1] == '-') + { + size_t l = 0; + const lzo_getopt_longopt_p o; + const lzo_getopt_longopt_p o1 = NULL; + const lzo_getopt_longopt_p o2 = NULL; + int need_exact = 0; + ++g->optind; + if (!a[2]) + goto lzo_label_eof; + for (a += 2; a[l] && a[l] != '=' && a[l] != '#'; ) + ++l; + for (o = longopts; l && o && o->name; ++o) + { + if (strncmp(a, o->name, l) != 0) + continue; + if (!o->name[l]) + goto lzo_label_found_o; + need_exact |= o->has_arg & LZO_GETOPT_EXACT_ARG; + if (o1) o2 = o; + else o1 = o; + } + if (!o1 || need_exact) + return pe(g, g->bad_option, "unrecognized option '--%s'", a); + if (o2) + return pe(g, g->bad_option, "option '--%s' is ambiguous (could be '--%s' or '--%s')", a, o1->name, o2->name); + o = o1; + lzo_label_found_o: + a += l; + switch (o->has_arg & 0x2f) + { + case LZO_GETOPT_OPTIONAL_ARG: + if (a[0]) + g->optarg = a + 1; + break; + case LZO_GETOPT_REQUIRED_ARG: + if (a[0]) + g->optarg = a + 1; + else if (g->optind < g->argc) + g->optarg = g->argv[g->optind++]; + if (!g->optarg) + return pe(g, missing_arg_ret, "option '--%s' requires an argument", o->name); + break; + case LZO_GETOPT_REQUIRED_ARG | 0x20: + if (a[0] && a[1]) + g->optarg = a + 1; + if (!g->optarg) + return pe(g, missing_arg_ret, "option '--%s=' requires an argument", o->name); + break; + default: + if (a[0]) + return pe(g, g->bad_option, "option '--%s' doesn't allow an argument", o->name); + break; + } + if (longind) + *longind = (int) (o - longopts); + if (o->flag) + { + *o->flag = o->val; + return 0; + } + return o->val; + } + if (a[0] == '-' && a[1]) + { + unsigned char c; + const char* s; + lzo_label_next_shortopt: + a = g->argv[g->optind] + ++g->shortpos; + c = (unsigned char) *a++; s = NULL; + if (c != ':' && shortopts) + s = strchr(shortopts, c); + if (!s || s[1] != ':') + { + if (!a[0]) + { ++g->optind; g->shortpos = 0; } + if (!s) + { + g->optopt = c; + return pe(g, g->bad_option, "invalid option '-%c'", c); + } + } + else + { + ++g->optind; g->shortpos = 0; + if (a[0]) + g->optarg = a; + else if (s[2] != ':') + { + if (g->optind < g->argc) + g->optarg = g->argv[g->optind++]; + else + { + g->optopt = c; + return pe(g, missing_arg_ret, "option '-%c' requires an argument", c); + } + } + } + return c; + } + if (ordering == LZO_GETOPT_RETURN_IN_ORDER) + { + ++g->optind; + g->optarg = a; + return 1; + } +lzo_label_eof: + g->optind -= __LZOLIB_FUNCNAME(lzo_getopt_rotate)(g->argv, g->pending_rotate_first, g->pending_rotate_middle, g->optind); + g->pending_rotate_first = g->pending_rotate_middle = g->optind; + g->eof = 1; + return -1; +#undef pe +} +#endif +#if defined(LZO_WANT_ACCLIB_HALLOC) +# undef LZO_WANT_ACCLIB_HALLOC +#define __LZOLIB_HALLOC_CH_INCLUDED 1 +#if !defined(LZOLIB_PUBLIC) +# define LZOLIB_PUBLIC(r,f) r __LZOLIB_FUNCNAME(f) +#endif +#if (LZO_HAVE_MM_HUGE_PTR) +#if 1 && (LZO_OS_DOS16 && defined(BLX286)) +# define __LZOLIB_HALLOC_USE_DAH 1 +#elif 1 && (LZO_OS_DOS16 && defined(DOSX286)) +# define __LZOLIB_HALLOC_USE_DAH 1 +#elif 1 && (LZO_OS_OS216) +# define __LZOLIB_HALLOC_USE_DAH 1 +#elif 1 && (LZO_OS_WIN16) +# define __LZOLIB_HALLOC_USE_GA 1 +#elif 1 && (LZO_OS_DOS16) && (LZO_CC_BORLANDC) && defined(__DPMI16__) +# define __LZOLIB_HALLOC_USE_GA 1 +#endif +#endif +#if (__LZOLIB_HALLOC_USE_DAH) +#if 0 && (LZO_OS_OS216) +#include +#else +LZO_EXTERN_C unsigned short __far __pascal DosAllocHuge(unsigned short, unsigned short, unsigned short __far *, unsigned short, unsigned short); +LZO_EXTERN_C unsigned short __far __pascal DosFreeSeg(unsigned short); +#endif +#endif +#if (__LZOLIB_HALLOC_USE_GA) +#if 0 +#define STRICT 1 +#include +#else +LZO_EXTERN_C const void __near* __far __pascal GlobalAlloc(unsigned, unsigned long); +LZO_EXTERN_C const void __near* __far __pascal GlobalFree(const void __near*); +LZO_EXTERN_C unsigned long __far __pascal GlobalHandle(unsigned); +LZO_EXTERN_C void __far* __far __pascal GlobalLock(const void __near*); +LZO_EXTERN_C int __far __pascal GlobalUnlock(const void __near*); +#endif +#endif +LZOLIB_PUBLIC(lzo_hvoid_p, lzo_halloc) (lzo_hsize_t size) +{ + lzo_hvoid_p p = LZO_STATIC_CAST(lzo_hvoid_p, 0); + if (!(size > 0)) + return p; +#if 0 && defined(__palmos__) + p = MemPtrNew(size); +#elif !(LZO_HAVE_MM_HUGE_PTR) + if (size < LZO_STATIC_CAST(size_t, -1)) + p = malloc(LZO_STATIC_CAST(size_t, size)); +#else + if (LZO_STATIC_CAST(long, size) <= 0) + return p; +{ +#if (__LZOLIB_HALLOC_USE_DAH) + unsigned short sel = 0; + if (DosAllocHuge((unsigned short)(size >> 16), (unsigned short)size, &sel, 0, 0) == 0) + p = (lzo_hvoid_p) LZO_PTR_MK_FP(sel, 0); +#elif (__LZOLIB_HALLOC_USE_GA) + const void __near* h = GlobalAlloc(2, size); + if (h) { + p = GlobalLock(h); + if (p && LZO_PTR_FP_OFF(p) != 0) { + GlobalUnlock(h); + p = 0; + } + if (!p) + GlobalFree(h); + } +#elif (LZO_CC_MSC && (_MSC_VER >= 700)) + p = _halloc(size, 1); +#elif (LZO_CC_MSC || LZO_CC_WATCOMC) + p = halloc(size, 1); +#elif (LZO_CC_DMC || LZO_CC_SYMANTECC || LZO_CC_ZORTECHC) + p = farmalloc(size); +#elif (LZO_CC_BORLANDC || LZO_CC_TURBOC) + p = farmalloc(size); +#elif (LZO_CC_AZTECC) + p = lmalloc(size); +#else + if (size < LZO_STATIC_CAST(size_t, -1)) + p = malloc(LZO_STATIC_CAST(size_t, size)); +#endif +} +#endif + return p; +} +LZOLIB_PUBLIC(void, lzo_hfree) (lzo_hvoid_p p) +{ + if (!p) + return; +#if 0 && defined(__palmos__) + MemPtrFree(p); +#elif !(LZO_HAVE_MM_HUGE_PTR) + free(p); +#else +#if (__LZOLIB_HALLOC_USE_DAH) + if (LZO_PTR_FP_OFF(p) == 0) + DosFreeSeg((unsigned short) LZO_PTR_FP_SEG(p)); +#elif (__LZOLIB_HALLOC_USE_GA) + if (LZO_PTR_FP_OFF(p) == 0) { + const void __near* h = (const void __near*) (unsigned) GlobalHandle(LZO_PTR_FP_SEG(p)); + if (h) { + GlobalUnlock(h); + GlobalFree(h); + } + } +#elif (LZO_CC_MSC && (_MSC_VER >= 700)) + _hfree(p); +#elif (LZO_CC_MSC || LZO_CC_WATCOMC) + hfree(p); +#elif (LZO_CC_DMC || LZO_CC_SYMANTECC || LZO_CC_ZORTECHC) + farfree((void __far*) p); +#elif (LZO_CC_BORLANDC || LZO_CC_TURBOC) + farfree((void __far*) p); +#elif (LZO_CC_AZTECC) + lfree(p); +#else + free(p); +#endif +#endif +} +#endif +#if defined(LZO_WANT_ACCLIB_HFREAD) +# undef LZO_WANT_ACCLIB_HFREAD +#define __LZOLIB_HFREAD_CH_INCLUDED 1 +#if !defined(LZOLIB_PUBLIC) +# define LZOLIB_PUBLIC(r,f) r __LZOLIB_FUNCNAME(f) +#endif +LZOLIB_PUBLIC(lzo_hsize_t, lzo_hfread) (void* vfp, lzo_hvoid_p buf, lzo_hsize_t size) +{ + FILE* fp = LZO_STATIC_CAST(FILE *, vfp); +#if (LZO_HAVE_MM_HUGE_PTR) +#if (LZO_MM_TINY || LZO_MM_SMALL || LZO_MM_MEDIUM) +#define __LZOLIB_REQUIRE_HMEMCPY_CH 1 + unsigned char tmp[512]; + lzo_hsize_t l = 0; + while (l < size) + { + size_t n = size - l > sizeof(tmp) ? sizeof(tmp) : (size_t) (size - l); + n = fread(tmp, 1, n, fp); + if (n == 0) + break; + __LZOLIB_FUNCNAME(lzo_hmemcpy)((lzo_hbyte_p)buf + l, tmp, (lzo_hsize_t)n); + l += n; + } + return l; +#elif (LZO_MM_COMPACT || LZO_MM_LARGE || LZO_MM_HUGE) + lzo_hbyte_p b = (lzo_hbyte_p) buf; + lzo_hsize_t l = 0; + while (l < size) + { + size_t n; + n = LZO_PTR_FP_OFF(b); n = (n <= 1) ? 0x8000u : (0u - n); + if ((lzo_hsize_t) n > size - l) + n = (size_t) (size - l); + n = fread((void __far*)b, 1, n, fp); + if (n == 0) + break; + b += n; l += n; + } + return l; +#else +# error "unknown memory model" +#endif +#else + return fread(buf, 1, size, fp); +#endif +} +LZOLIB_PUBLIC(lzo_hsize_t, lzo_hfwrite) (void* vfp, const lzo_hvoid_p buf, lzo_hsize_t size) +{ + FILE* fp = LZO_STATIC_CAST(FILE *, vfp); +#if (LZO_HAVE_MM_HUGE_PTR) +#if (LZO_MM_TINY || LZO_MM_SMALL || LZO_MM_MEDIUM) +#define __LZOLIB_REQUIRE_HMEMCPY_CH 1 + unsigned char tmp[512]; + lzo_hsize_t l = 0; + while (l < size) + { + size_t n = size - l > sizeof(tmp) ? sizeof(tmp) : (size_t) (size - l); + __LZOLIB_FUNCNAME(lzo_hmemcpy)(tmp, (const lzo_hbyte_p)buf + l, (lzo_hsize_t)n); + n = fwrite(tmp, 1, n, fp); + if (n == 0) + break; + l += n; + } + return l; +#elif (LZO_MM_COMPACT || LZO_MM_LARGE || LZO_MM_HUGE) + const lzo_hbyte_p b = (const lzo_hbyte_p) buf; + lzo_hsize_t l = 0; + while (l < size) + { + size_t n; + n = LZO_PTR_FP_OFF(b); n = (n <= 1) ? 0x8000u : (0u - n); + if ((lzo_hsize_t) n > size - l) + n = (size_t) (size - l); + n = fwrite((void __far*)b, 1, n, fp); + if (n == 0) + break; + b += n; l += n; + } + return l; +#else +# error "unknown memory model" +#endif +#else + return fwrite(buf, 1, size, fp); +#endif +} +#endif +#if defined(LZO_WANT_ACCLIB_HSREAD) +# undef LZO_WANT_ACCLIB_HSREAD +#define __LZOLIB_HSREAD_CH_INCLUDED 1 +#if !defined(LZOLIB_PUBLIC) +# define LZOLIB_PUBLIC(r,f) r __LZOLIB_FUNCNAME(f) +#endif +LZOLIB_PUBLIC(long, lzo_safe_hread) (int fd, lzo_hvoid_p buf, long size) +{ + lzo_hbyte_p b = (lzo_hbyte_p) buf; + long l = 0; + int saved_errno; + saved_errno = errno; + while (l < size) + { + long n = size - l; +#if (LZO_HAVE_MM_HUGE_PTR) +# define __LZOLIB_REQUIRE_HREAD_CH 1 + errno = 0; n = lzo_hread(fd, b, n); +#elif (LZO_OS_DOS32) && defined(__DJGPP__) + errno = 0; n = _read(fd, b, n); +#else + errno = 0; n = read(fd, b, n); +#endif + if (n == 0) + break; + if (n < 0) { +#if defined(EAGAIN) + if (errno == (EAGAIN)) continue; +#endif +#if defined(EINTR) + if (errno == (EINTR)) continue; +#endif + if (errno == 0) errno = 1; + return l; + } + b += n; l += n; + } + errno = saved_errno; + return l; +} +LZOLIB_PUBLIC(long, lzo_safe_hwrite) (int fd, const lzo_hvoid_p buf, long size) +{ + const lzo_hbyte_p b = (const lzo_hbyte_p) buf; + long l = 0; + int saved_errno; + saved_errno = errno; + while (l < size) + { + long n = size - l; +#if (LZO_HAVE_MM_HUGE_PTR) +# define __LZOLIB_REQUIRE_HREAD_CH 1 + errno = 0; n = lzo_hwrite(fd, b, n); +#elif (LZO_OS_DOS32) && defined(__DJGPP__) + errno = 0; n = _write(fd, b, n); +#else + errno = 0; n = write(fd, b, n); +#endif + if (n == 0) + break; + if (n < 0) { +#if defined(EAGAIN) + if (errno == (EAGAIN)) continue; +#endif +#if defined(EINTR) + if (errno == (EINTR)) continue; +#endif + if (errno == 0) errno = 1; + return l; + } + b += n; l += n; + } + errno = saved_errno; + return l; +} +#endif +#if defined(LZO_WANT_ACCLIB_PCLOCK) +# undef LZO_WANT_ACCLIB_PCLOCK +#define __LZOLIB_PCLOCK_CH_INCLUDED 1 +#if !defined(LZOLIB_PUBLIC) +# define LZOLIB_PUBLIC(r,f) r __LZOLIB_FUNCNAME(f) +#endif +#if 1 && (LZO_OS_POSIX_LINUX && LZO_ARCH_AMD64 && LZO_ASM_SYNTAX_GNUC && !LZO_CFG_NO_SYSCALL) +#ifndef lzo_pclock_syscall_clock_gettime +#define lzo_pclock_syscall_clock_gettime lzo_pclock_syscall_clock_gettime +#endif +__lzo_static_noinline long lzo_pclock_syscall_clock_gettime(long clockid, struct timespec *ts) +{ + unsigned long r = 228; + __asm__ __volatile__("syscall\n" : "=a" (r), "=m" (*ts) : "0" (r), "D" (clockid), "S" (ts) __LZO_ASM_CLOBBER_LIST_CC); + return LZO_ICAST(long, r); +} +#endif +#if 1 && (LZO_OS_POSIX_LINUX && LZO_ARCH_I386 && LZO_ASM_SYNTAX_GNUC && !LZO_CFG_NO_SYSCALL) && defined(lzo_int64l_t) +#ifndef lzo_pclock_syscall_clock_gettime +#define lzo_pclock_syscall_clock_gettime lzo_pclock_syscall_clock_gettime +#endif +__lzo_static_noinline long lzo_pclock_syscall_clock_gettime(long clockid, struct timespec *ts) +{ + unsigned long r = 265; + __asm__ __volatile__("pushl %%ebx\n pushl %%edx\n popl %%ebx\n int $0x80\n popl %%ebx\n": "=a" (r), "=m" (*ts) : "0" (r), "d" (clockid), "c" (ts) __LZO_ASM_CLOBBER_LIST_CC); + return LZO_ICAST(long, r); +} +#endif +#if 0 && defined(lzo_pclock_syscall_clock_gettime) +#ifndef lzo_pclock_read_clock_gettime_r_syscall +#define lzo_pclock_read_clock_gettime_r_syscall lzo_pclock_read_clock_gettime_r_syscall +#endif +static int lzo_pclock_read_clock_gettime_r_syscall(lzo_pclock_handle_p h, lzo_pclock_p c) +{ + struct timespec ts; + if (lzo_pclock_syscall_clock_gettime(0, &ts) != 0) + return -1; + c->tv_sec = ts.tv_sec; + c->tv_nsec = LZO_STATIC_CAST(lzo_uint32l_t, ts.tv_nsec); + LZO_UNUSED(h); return 0; +} +#endif +#if (HAVE_GETTIMEOFDAY) +#ifndef lzo_pclock_read_gettimeofday +#define lzo_pclock_read_gettimeofday lzo_pclock_read_gettimeofday +#endif +static int lzo_pclock_read_gettimeofday(lzo_pclock_handle_p h, lzo_pclock_p c) +{ + struct timeval tv; + if (gettimeofday(&tv, NULL) != 0) + return -1; +#if defined(lzo_int64l_t) + c->tv_sec = tv.tv_sec; +#else + c->tv_sec_high = 0; + c->tv_sec_low = tv.tv_sec; +#endif + c->tv_nsec = LZO_STATIC_CAST(lzo_uint32l_t, (tv.tv_usec * 1000u)); + LZO_UNUSED(h); return 0; +} +#endif +#if defined(CLOCKS_PER_SEC) && !(LZO_CFG_NO_DOUBLE) +#ifndef lzo_pclock_read_clock +#define lzo_pclock_read_clock lzo_pclock_read_clock +#endif +static int lzo_pclock_read_clock(lzo_pclock_handle_p h, lzo_pclock_p c) +{ + clock_t ticks; + double secs; +#if defined(lzo_int64l_t) + lzo_uint64l_t nsecs; + ticks = clock(); + secs = LZO_STATIC_CAST(double, ticks) / (CLOCKS_PER_SEC); + nsecs = LZO_STATIC_CAST(lzo_uint64l_t, (secs * 1000000000.0)); + c->tv_sec = LZO_STATIC_CAST(lzo_int64l_t, (nsecs / 1000000000ul)); + nsecs = (nsecs % 1000000000ul); + c->tv_nsec = LZO_STATIC_CAST(lzo_uint32l_t, nsecs); +#else + ticks = clock(); + secs = LZO_STATIC_CAST(double, ticks) / (CLOCKS_PER_SEC); + c->tv_sec_high = 0; + c->tv_sec_low = LZO_STATIC_CAST(lzo_uint32l_t, (secs + 0.5)); + c->tv_nsec = 0; +#endif + LZO_UNUSED(h); return 0; +} +#endif +#if 1 && defined(lzo_pclock_syscall_clock_gettime) +#ifndef lzo_pclock_read_clock_gettime_m_syscall +#define lzo_pclock_read_clock_gettime_m_syscall lzo_pclock_read_clock_gettime_m_syscall +#endif +static int lzo_pclock_read_clock_gettime_m_syscall(lzo_pclock_handle_p h, lzo_pclock_p c) +{ + struct timespec ts; + if (lzo_pclock_syscall_clock_gettime(1, &ts) != 0) + return -1; + c->tv_sec = ts.tv_sec; + c->tv_nsec = LZO_STATIC_CAST(lzo_uint32l_t, ts.tv_nsec); + LZO_UNUSED(h); return 0; +} +#endif +#if (LZO_OS_DOS32 && LZO_CC_GNUC) && defined(__DJGPP__) && defined(UCLOCKS_PER_SEC) && !(LZO_CFG_NO_DOUBLE) +#ifndef lzo_pclock_read_uclock +#define lzo_pclock_read_uclock lzo_pclock_read_uclock +#endif +static int lzo_pclock_read_uclock(lzo_pclock_handle_p h, lzo_pclock_p c) +{ + lzo_uint64l_t ticks; + double secs; + lzo_uint64l_t nsecs; + ticks = uclock(); + secs = LZO_STATIC_CAST(double, ticks) / (UCLOCKS_PER_SEC); + nsecs = LZO_STATIC_CAST(lzo_uint64l_t, (secs * 1000000000.0)); + c->tv_sec = nsecs / 1000000000ul; + c->tv_nsec = LZO_STATIC_CAST(lzo_uint32l_t, (nsecs % 1000000000ul)); + LZO_UNUSED(h); return 0; +} +#endif +#if 1 && (HAVE_CLOCK_GETTIME) && defined(CLOCK_PROCESS_CPUTIME_ID) && defined(lzo_int64l_t) +#ifndef lzo_pclock_read_clock_gettime_p_libc +#define lzo_pclock_read_clock_gettime_p_libc lzo_pclock_read_clock_gettime_p_libc +#endif +static int lzo_pclock_read_clock_gettime_p_libc(lzo_pclock_handle_p h, lzo_pclock_p c) +{ + struct timespec ts; + if (clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts) != 0) + return -1; + c->tv_sec = ts.tv_sec; + c->tv_nsec = LZO_STATIC_CAST(lzo_uint32l_t, ts.tv_nsec); + LZO_UNUSED(h); return 0; +} +#endif +#if 1 && defined(lzo_pclock_syscall_clock_gettime) +#ifndef lzo_pclock_read_clock_gettime_p_syscall +#define lzo_pclock_read_clock_gettime_p_syscall lzo_pclock_read_clock_gettime_p_syscall +#endif +static int lzo_pclock_read_clock_gettime_p_syscall(lzo_pclock_handle_p h, lzo_pclock_p c) +{ + struct timespec ts; + if (lzo_pclock_syscall_clock_gettime(2, &ts) != 0) + return -1; + c->tv_sec = ts.tv_sec; + c->tv_nsec = LZO_STATIC_CAST(lzo_uint32l_t, ts.tv_nsec); + LZO_UNUSED(h); return 0; +} +#endif +#if (LZO_OS_CYGWIN || LZO_OS_WIN32 || LZO_OS_WIN64) && (LZO_HAVE_WINDOWS_H) && defined(lzo_int64l_t) +#ifndef lzo_pclock_read_getprocesstimes +#define lzo_pclock_read_getprocesstimes lzo_pclock_read_getprocesstimes +#endif +static int lzo_pclock_read_getprocesstimes(lzo_pclock_handle_p h, lzo_pclock_p c) +{ + FILETIME ct, et, kt, ut; + lzo_uint64l_t ticks; + if (GetProcessTimes(GetCurrentProcess(), &ct, &et, &kt, &ut) == 0) + return -1; + ticks = (LZO_STATIC_CAST(lzo_uint64l_t, ut.dwHighDateTime) << 32) | ut.dwLowDateTime; + if __lzo_unlikely(h->ticks_base == 0) + h->ticks_base = ticks; + else + ticks -= h->ticks_base; + c->tv_sec = LZO_STATIC_CAST(lzo_int64l_t, (ticks / 10000000ul)); + ticks = (ticks % 10000000ul) * 100u; + c->tv_nsec = LZO_STATIC_CAST(lzo_uint32l_t, ticks); + LZO_UNUSED(h); return 0; +} +#endif +#if (HAVE_GETRUSAGE) && defined(RUSAGE_SELF) +#ifndef lzo_pclock_read_getrusage +#define lzo_pclock_read_getrusage lzo_pclock_read_getrusage +#endif +static int lzo_pclock_read_getrusage(lzo_pclock_handle_p h, lzo_pclock_p c) +{ + struct rusage ru; + if (getrusage(RUSAGE_SELF, &ru) != 0) + return -1; +#if defined(lzo_int64l_t) + c->tv_sec = ru.ru_utime.tv_sec; +#else + c->tv_sec_high = 0; + c->tv_sec_low = ru.ru_utime.tv_sec; +#endif + c->tv_nsec = LZO_STATIC_CAST(lzo_uint32l_t, (ru.ru_utime.tv_usec * 1000u)); + LZO_UNUSED(h); return 0; +} +#endif +#if 1 && (HAVE_CLOCK_GETTIME) && defined(CLOCK_THREAD_CPUTIME_ID) && defined(lzo_int64l_t) +#ifndef lzo_pclock_read_clock_gettime_t_libc +#define lzo_pclock_read_clock_gettime_t_libc lzo_pclock_read_clock_gettime_t_libc +#endif +static int lzo_pclock_read_clock_gettime_t_libc(lzo_pclock_handle_p h, lzo_pclock_p c) +{ + struct timespec ts; + if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts) != 0) + return -1; + c->tv_sec = ts.tv_sec; + c->tv_nsec = (lzo_uint32l_t) ts.tv_nsec; + LZO_UNUSED(h); return 0; +} +#endif +#if 1 && defined(lzo_pclock_syscall_clock_gettime) +#ifndef lzo_pclock_read_clock_gettime_t_syscall +#define lzo_pclock_read_clock_gettime_t_syscall lzo_pclock_read_clock_gettime_t_syscall +#endif +static int lzo_pclock_read_clock_gettime_t_syscall(lzo_pclock_handle_p h, lzo_pclock_p c) +{ + struct timespec ts; + if (lzo_pclock_syscall_clock_gettime(3, &ts) != 0) + return -1; + c->tv_sec = ts.tv_sec; + c->tv_nsec = LZO_STATIC_CAST(lzo_uint32l_t, ts.tv_nsec); + LZO_UNUSED(h); return 0; +} +#endif +#if (LZO_OS_CYGWIN || LZO_OS_WIN32 || LZO_OS_WIN64) && (LZO_HAVE_WINDOWS_H) && defined(lzo_int64l_t) +#ifndef lzo_pclock_read_getthreadtimes +#define lzo_pclock_read_getthreadtimes lzo_pclock_read_getthreadtimes +#endif +static int lzo_pclock_read_getthreadtimes(lzo_pclock_handle_p h, lzo_pclock_p c) +{ + FILETIME ct, et, kt, ut; + lzo_uint64l_t ticks; + if (GetThreadTimes(GetCurrentThread(), &ct, &et, &kt, &ut) == 0) + return -1; + ticks = (LZO_STATIC_CAST(lzo_uint64l_t, ut.dwHighDateTime) << 32) | ut.dwLowDateTime; + if __lzo_unlikely(h->ticks_base == 0) + h->ticks_base = ticks; + else + ticks -= h->ticks_base; + c->tv_sec = LZO_STATIC_CAST(lzo_int64l_t, (ticks / 10000000ul)); + ticks = (ticks % 10000000ul) * 100u; + c->tv_nsec = LZO_STATIC_CAST(lzo_uint32l_t, ticks); + LZO_UNUSED(h); return 0; +} +#endif +LZOLIB_PUBLIC(int, lzo_pclock_open) (lzo_pclock_handle_p h, int mode) +{ + lzo_pclock_t c; + int i; + h->h = LZO_STATIC_CAST(lzolib_handle_t, 0); + h->mode = -1; + h->read_error = 2; + h->name = NULL; + h->gettime = LZO_STATIC_CAST(lzo_pclock_gettime_t, 0); +#if defined(lzo_int64l_t) + h->ticks_base = 0; +#endif + switch (mode) + { + case LZO_PCLOCK_REALTIME: +# if defined(lzo_pclock_read_clock_gettime_r_syscall) + if (lzo_pclock_read_clock_gettime_r_syscall(h, &c) == 0) { + h->gettime = lzo_pclock_read_clock_gettime_r_syscall; + h->name = "CLOCK_REALTIME/syscall"; + break; + } +# endif +# if defined(lzo_pclock_read_gettimeofday) + if (lzo_pclock_read_gettimeofday(h, &c) == 0) { + h->gettime = lzo_pclock_read_gettimeofday; + h->name = "gettimeofday"; + break; + } +# endif + break; + case LZO_PCLOCK_MONOTONIC: +# if defined(lzo_pclock_read_clock_gettime_m_syscall) + if (lzo_pclock_read_clock_gettime_m_syscall(h, &c) == 0) { + h->gettime = lzo_pclock_read_clock_gettime_m_syscall; + h->name = "CLOCK_MONOTONIC/syscall"; + break; + } +# endif +# if defined(lzo_pclock_read_uclock) + if (lzo_pclock_read_uclock(h, &c) == 0) { + h->gettime = lzo_pclock_read_uclock; + h->name = "uclock"; + break; + } +# endif +# if defined(lzo_pclock_read_clock) + if (lzo_pclock_read_clock(h, &c) == 0) { + h->gettime = lzo_pclock_read_clock; + h->name = "clock"; + break; + } +# endif + break; + case LZO_PCLOCK_PROCESS_CPUTIME_ID: +# if defined(lzo_pclock_read_getprocesstimes) + if (lzo_pclock_read_getprocesstimes(h, &c) == 0) { + h->gettime = lzo_pclock_read_getprocesstimes; + h->name = "GetProcessTimes"; + break; + } +# endif +# if defined(lzo_pclock_read_clock_gettime_p_syscall) + if (lzo_pclock_read_clock_gettime_p_syscall(h, &c) == 0) { + h->gettime = lzo_pclock_read_clock_gettime_p_syscall; + h->name = "CLOCK_PROCESS_CPUTIME_ID/syscall"; + break; + } +# endif +# if defined(lzo_pclock_read_clock_gettime_p_libc) + if (lzo_pclock_read_clock_gettime_p_libc(h, &c) == 0) { + h->gettime = lzo_pclock_read_clock_gettime_p_libc; + h->name = "CLOCK_PROCESS_CPUTIME_ID/libc"; + break; + } +# endif +# if defined(lzo_pclock_read_getrusage) + if (lzo_pclock_read_getrusage(h, &c) == 0) { + h->gettime = lzo_pclock_read_getrusage; + h->name = "getrusage"; + break; + } +# endif + break; + case LZO_PCLOCK_THREAD_CPUTIME_ID: +# if defined(lzo_pclock_read_getthreadtimes) + if (lzo_pclock_read_getthreadtimes(h, &c) == 0) { + h->gettime = lzo_pclock_read_getthreadtimes; + h->name = "GetThreadTimes"; + } +# endif +# if defined(lzo_pclock_read_clock_gettime_t_syscall) + if (lzo_pclock_read_clock_gettime_t_syscall(h, &c) == 0) { + h->gettime = lzo_pclock_read_clock_gettime_t_syscall; + h->name = "CLOCK_THREAD_CPUTIME_ID/syscall"; + break; + } +# endif +# if defined(lzo_pclock_read_clock_gettime_t_libc) + if (lzo_pclock_read_clock_gettime_t_libc(h, &c) == 0) { + h->gettime = lzo_pclock_read_clock_gettime_t_libc; + h->name = "CLOCK_THREAD_CPUTIME_ID/libc"; + break; + } +# endif + break; + } + if (!h->gettime) + return -1; + if (!h->h) + h->h = LZO_STATIC_CAST(lzolib_handle_t, 1); + h->mode = mode; + h->read_error = 0; + if (!h->name) + h->name = "unknown"; + for (i = 0; i < 10; i++) { + __LZOLIB_FUNCNAME(lzo_pclock_read)(h, &c); + } + return 0; +} +LZOLIB_PUBLIC(int, lzo_pclock_open_default) (lzo_pclock_handle_p h) +{ + if (__LZOLIB_FUNCNAME(lzo_pclock_open)(h, LZO_PCLOCK_PROCESS_CPUTIME_ID) == 0) + return 0; + if (__LZOLIB_FUNCNAME(lzo_pclock_open)(h, LZO_PCLOCK_MONOTONIC) == 0) + return 0; + if (__LZOLIB_FUNCNAME(lzo_pclock_open)(h, LZO_PCLOCK_REALTIME) == 0) + return 0; + if (__LZOLIB_FUNCNAME(lzo_pclock_open)(h, LZO_PCLOCK_THREAD_CPUTIME_ID) == 0) + return 0; + return -1; +} +LZOLIB_PUBLIC(int, lzo_pclock_close) (lzo_pclock_handle_p h) +{ + h->h = LZO_STATIC_CAST(lzolib_handle_t, 0); + h->mode = -1; + h->name = NULL; + h->gettime = LZO_STATIC_CAST(lzo_pclock_gettime_t, 0); + return 0; +} +LZOLIB_PUBLIC(void, lzo_pclock_read) (lzo_pclock_handle_p h, lzo_pclock_p c) +{ + if (h->gettime) { + if (h->gettime(h, c) == 0) + return; + } + h->read_error = 1; +#if defined(lzo_int64l_t) + c->tv_sec = 0; +#else + c->tv_sec_high = 0; + c->tv_sec_low = 0; +#endif + c->tv_nsec = 0; +} +#if !(LZO_CFG_NO_DOUBLE) +LZOLIB_PUBLIC(double, lzo_pclock_get_elapsed) (lzo_pclock_handle_p h, const lzo_pclock_p start, const lzo_pclock_p stop) +{ + if (!h->h) { h->mode = -1; return 0.0; } + { +#if 1 && (LZO_ARCH_I386 && LZO_CC_GNUC) && defined(__STRICT_ALIGNMENT__) + float tstop, tstart; + tstop = LZO_STATIC_CAST(float, (stop->tv_sec + stop->tv_nsec / 1000000000.0)); + tstart = LZO_STATIC_CAST(float, (start->tv_sec + start->tv_nsec / 1000000000.0)); +#elif defined(lzo_int64l_t) + double tstop, tstart; +#if 1 && (LZO_CC_INTELC) + { lzo_int64l_t a = stop->tv_sec; lzo_uint32l_t b = stop->tv_nsec; + tstop = a + b / 1000000000.0; } + { lzo_int64l_t a = start->tv_sec; lzo_uint32l_t b = start->tv_nsec; + tstart = a + b / 1000000000.0; } +#else + tstop = stop->tv_sec + stop->tv_nsec / 1000000000.0; + tstart = start->tv_sec + start->tv_nsec / 1000000000.0; +#endif +#else + double tstop, tstart; + tstop = stop->tv_sec_low + stop->tv_nsec / 1000000000.0; + tstart = start->tv_sec_low + start->tv_nsec / 1000000000.0; +#endif + return tstop - tstart; + } +} +#endif +LZOLIB_PUBLIC(int, lzo_pclock_flush_cpu_cache) (lzo_pclock_handle_p h, unsigned flags) +{ + LZO_UNUSED(h); LZO_UNUSED(flags); + return -1; +} +#endif +#if defined(LZO_WANT_ACCLIB_MISC) +# undef LZO_WANT_ACCLIB_MISC +#define __LZOLIB_MISC_CH_INCLUDED 1 +#if !defined(LZOLIB_PUBLIC) +# define LZOLIB_PUBLIC(r,f) r __LZOLIB_FUNCNAME(f) +#endif +#if !defined(LZOLIB_PUBLIC_NOINLINE) +# if !defined(__lzo_noinline) +# define LZOLIB_PUBLIC_NOINLINE(r,f) r __LZOLIB_FUNCNAME(f) +# elif (LZO_CC_CLANG || (LZO_CC_GNUC >= 0x030400ul) || LZO_CC_LLVM) +# define LZOLIB_PUBLIC_NOINLINE(r,f) __lzo_noinline __attribute__((__used__)) r __LZOLIB_FUNCNAME(f) +# else +# define LZOLIB_PUBLIC_NOINLINE(r,f) __lzo_noinline r __LZOLIB_FUNCNAME(f) +# endif +#endif +#if (LZO_OS_WIN32 && LZO_CC_PELLESC && (__POCC__ >= 290)) +# pragma warn(push) +# pragma warn(disable:2007) +#endif +LZOLIB_PUBLIC(const char *, lzo_getenv) (const char *s) +{ +#if (HAVE_GETENV) + return getenv(s); +#else + LZO_UNUSED(s); return LZO_STATIC_CAST(const char *, 0); +#endif +} +LZOLIB_PUBLIC(lzo_intptr_t, lzo_get_osfhandle) (int fd) +{ + if (fd < 0) + return -1; +#if (LZO_OS_CYGWIN) + return get_osfhandle(fd); +#elif (LZO_OS_EMX && defined(__RSXNT__)) + return -1; +#elif (LZO_OS_WIN32 && LZO_CC_GNUC) && defined(__PW32__) + return -1; +#elif (LZO_OS_WIN32 || LZO_OS_WIN64) +# if (LZO_CC_PELLESC && (__POCC__ < 280)) + return -1; +# elif (LZO_CC_WATCOMC && (__WATCOMC__ < 1000)) + return -1; +# elif (LZO_CC_WATCOMC && (__WATCOMC__ < 1100)) + return _os_handle(fd); +# else + return _get_osfhandle(fd); +# endif +#else + return fd; +#endif +} +LZOLIB_PUBLIC(int, lzo_set_binmode) (int fd, int binary) +{ +#if (LZO_ARCH_M68K && LZO_OS_TOS && LZO_CC_GNUC) && defined(__MINT__) + FILE* fp; int old_binary; + if (fd == STDIN_FILENO) fp = stdin; + else if (fd == STDOUT_FILENO) fp = stdout; + else if (fd == STDERR_FILENO) fp = stderr; + else return -1; + old_binary = fp->__mode.__binary; + __set_binmode(fp, binary ? 1 : 0); + return old_binary ? 1 : 0; +#elif (LZO_ARCH_M68K && LZO_OS_TOS) + LZO_UNUSED(fd); LZO_UNUSED(binary); + return -1; +#elif (LZO_OS_DOS16 && (LZO_CC_AZTECC || LZO_CC_PACIFICC)) + LZO_UNUSED(fd); LZO_UNUSED(binary); + return -1; +#elif (LZO_OS_DOS32 && LZO_CC_GNUC) && defined(__DJGPP__) + int r; unsigned old_flags = __djgpp_hwint_flags; + LZO_COMPILE_TIME_ASSERT(O_BINARY > 0) + LZO_COMPILE_TIME_ASSERT(O_TEXT > 0) + if (fd < 0) return -1; + r = setmode(fd, binary ? O_BINARY : O_TEXT); + if ((old_flags & 1u) != (__djgpp_hwint_flags & 1u)) + __djgpp_set_ctrl_c(!(old_flags & 1)); + if (r == -1) return -1; + return (r & O_TEXT) ? 0 : 1; +#elif (LZO_OS_WIN32 && LZO_CC_GNUC) && defined(__PW32__) + if (fd < 0) return -1; + LZO_UNUSED(binary); + return 1; +#elif (LZO_OS_DOS32 && LZO_CC_HIGHC) + FILE* fp; int r; + if (fd == fileno(stdin)) fp = stdin; + else if (fd == fileno(stdout)) fp = stdout; + else if (fd == fileno(stderr)) fp = stderr; + else return -1; + r = _setmode(fp, binary ? _BINARY : _TEXT); + if (r == -1) return -1; + return (r & _BINARY) ? 1 : 0; +#elif (LZO_OS_WIN32 && LZO_CC_MWERKS) && defined(__MSL__) + LZO_UNUSED(fd); LZO_UNUSED(binary); + return -1; +#elif (LZO_OS_CYGWIN && (LZO_CC_GNUC < 0x025a00ul)) + LZO_UNUSED(fd); LZO_UNUSED(binary); + return -1; +#elif (LZO_OS_CYGWIN || LZO_OS_DOS16 || LZO_OS_DOS32 || LZO_OS_EMX || LZO_OS_OS2 || LZO_OS_OS216 || LZO_OS_WIN16 || LZO_OS_WIN32 || LZO_OS_WIN64) + int r; +#if !(LZO_CC_ZORTECHC) + LZO_COMPILE_TIME_ASSERT(O_BINARY > 0) +#endif + LZO_COMPILE_TIME_ASSERT(O_TEXT > 0) + if (fd < 0) return -1; + r = setmode(fd, binary ? O_BINARY : O_TEXT); + if (r == -1) return -1; + return (r & O_TEXT) ? 0 : 1; +#else + if (fd < 0) return -1; + LZO_UNUSED(binary); + return 1; +#endif +} +LZOLIB_PUBLIC(int, lzo_isatty) (int fd) +{ + if (fd < 0) + return 0; +#if (LZO_OS_DOS16 && !(LZO_CC_AZTECC)) + { + union REGS ri, ro; + ri.x.ax = 0x4400; ri.x.bx = fd; + int86(0x21, &ri, &ro); + if ((ro.x.cflag & 1) == 0) + if ((ro.x.ax & 0x83) != 0x83) + return 0; + } +#elif (LZO_OS_DOS32 && LZO_CC_WATCOMC) + { + union REGS ri, ro; + ri.w.ax = 0x4400; ri.w.bx = LZO_STATIC_CAST(unsigned short, fd); + int386(0x21, &ri, &ro); + if ((ro.w.cflag & 1) == 0) + if ((ro.w.ax & 0x83) != 0x83) + return 0; + } +#elif (LZO_HAVE_WINDOWS_H) + { + lzo_intptr_t h = __LZOLIB_FUNCNAME(lzo_get_osfhandle)(fd); + LZO_COMPILE_TIME_ASSERT(sizeof(h) == sizeof(HANDLE)) + if (h != -1) + { + DWORD d = 0; + if (GetConsoleMode(LZO_REINTERPRET_CAST(HANDLE, h), &d) == 0) + return 0; + } + } +#endif +#if (HAVE_ISATTY) + return (isatty(fd)) ? 1 : 0; +#else + return 0; +#endif +} +LZOLIB_PUBLIC(int, lzo_mkdir) (const char* name, unsigned mode) +{ +#if !(HAVE_MKDIR) + LZO_UNUSED(name); LZO_UNUSED(mode); + return -1; +#elif (LZO_ARCH_M68K && LZO_OS_TOS && (LZO_CC_PUREC || LZO_CC_TURBOC)) + LZO_UNUSED(mode); + return Dcreate(name); +#elif (LZO_OS_DOS32 && LZO_CC_GNUC) && defined(__DJGPP__) + return mkdir(name, mode); +#elif (LZO_OS_WIN32 && LZO_CC_GNUC) && defined(__PW32__) + return mkdir(name, mode); +#elif ((LZO_OS_DOS16 || LZO_OS_DOS32) && (LZO_CC_HIGHC || LZO_CC_PACIFICC)) + LZO_UNUSED(mode); + return mkdir(LZO_UNCONST_CAST(char *, name)); +#elif (LZO_OS_DOS16 || LZO_OS_DOS32 || LZO_OS_OS2 || LZO_OS_OS216 || LZO_OS_WIN16 || LZO_OS_WIN32 || LZO_OS_WIN64) + LZO_UNUSED(mode); + return mkdir(name); +#elif (LZO_CC_WATCOMC) + return mkdir(name, LZO_STATIC_CAST(mode_t, mode)); +#else + return mkdir(name, mode); +#endif +} +LZOLIB_PUBLIC(int, lzo_rmdir) (const char* name) +{ +#if !(HAVE_RMDIR) + LZO_UNUSED(name); + return -1; +#elif ((LZO_OS_DOS16 || LZO_OS_DOS32) && (LZO_CC_HIGHC || LZO_CC_PACIFICC)) + return rmdir(LZO_UNCONST_CAST(char *, name)); +#else + return rmdir(name); +#endif +} +#if defined(lzo_int32e_t) +LZOLIB_PUBLIC(lzo_int32e_t, lzo_muldiv32s) (lzo_int32e_t a, lzo_int32e_t b, lzo_int32e_t x) +{ + lzo_int32e_t r = 0; + if __lzo_likely(x != 0) + { +#if defined(lzo_int64l_t) + lzo_int64l_t rr = (LZO_ICONV(lzo_int64l_t, a) * b) / x; + r = LZO_ITRUNC(lzo_int32e_t, rr); +#else + LZO_UNUSED(a); LZO_UNUSED(b); +#endif + } + return r; +} +LZOLIB_PUBLIC(lzo_uint32e_t, lzo_muldiv32u) (lzo_uint32e_t a, lzo_uint32e_t b, lzo_uint32e_t x) +{ + lzo_uint32e_t r = 0; + if __lzo_likely(x != 0) + { +#if defined(lzo_int64l_t) + lzo_uint64l_t rr = (LZO_ICONV(lzo_uint64l_t, a) * b) / x; + r = LZO_ITRUNC(lzo_uint32e_t, rr); +#else + LZO_UNUSED(a); LZO_UNUSED(b); +#endif + } + return r; +} +#endif +#if (LZO_OS_WIN16) +LZO_EXTERN_C void __far __pascal DebugBreak(void); +#endif +LZOLIB_PUBLIC_NOINLINE(void, lzo_debug_break) (void) +{ +#if (LZO_OS_WIN16) + DebugBreak(); +#elif (LZO_ARCH_I086) +#elif (LZO_OS_WIN64) && (LZO_HAVE_WINDOWS_H) + DebugBreak(); +#elif (LZO_ARCH_AMD64 || LZO_ARCH_I386) && (LZO_ASM_SYNTAX_GNUC) + __asm__ __volatile__("int $3\n" : : __LZO_ASM_CLOBBER_LIST_CC_MEMORY); +#elif (LZO_ARCH_I386) && (LZO_ASM_SYNTAX_MSC) + __asm { int 3 } +#elif (LZO_OS_WIN32) && (LZO_HAVE_WINDOWS_H) + DebugBreak(); +#else + volatile lzo_intptr_t a = -1; + * LZO_STATIC_CAST(volatile unsigned long *, LZO_REINTERPRET_CAST(volatile void *, a)) = ~0ul; +#endif +} +LZOLIB_PUBLIC_NOINLINE(void, lzo_debug_nop) (void) +{ +} +LZOLIB_PUBLIC_NOINLINE(int, lzo_debug_align_check_query) (void) +{ +#if (LZO_ARCH_AMD64 || LZO_ARCH_I386) && (LZO_ASM_SYNTAX_GNUC) +# if (LZO_ARCH_AMD64) + lzo_uint64e_t r = 0; +# else + size_t r = 0; +# endif + __asm__ __volatile__("pushf\n pop %0\n" : "=a" (r) : __LZO_ASM_CLOBBER_LIST_CC_MEMORY); + return LZO_ICONV(int, (r >> 18) & 1); +#elif (LZO_ARCH_I386) && (LZO_ASM_SYNTAX_MSC) + unsigned long r; + __asm { + pushf + pop eax + mov r,eax + } + return LZO_ICONV(int, (r >> 18) & 1); +#else + return -1; +#endif +} +LZOLIB_PUBLIC_NOINLINE(int, lzo_debug_align_check_enable) (int v) +{ +#if (LZO_ARCH_AMD64) && (LZO_ASM_SYNTAX_GNUC) + if (v) { + __asm__ __volatile__("pushf\n orl $262144,(%%rsp)\n popf\n" : : __LZO_ASM_CLOBBER_LIST_CC_MEMORY); + } else { + __asm__ __volatile__("pushf\n andl $-262145,(%%rsp)\n popf\n" : : __LZO_ASM_CLOBBER_LIST_CC_MEMORY); + } + return 0; +#elif (LZO_ARCH_I386) && (LZO_ASM_SYNTAX_GNUC) + if (v) { + __asm__ __volatile__("pushf\n orl $262144,(%%esp)\n popf\n" : : __LZO_ASM_CLOBBER_LIST_CC_MEMORY); + } else { + __asm__ __volatile__("pushf\n andl $-262145,(%%esp)\n popf\n" : : __LZO_ASM_CLOBBER_LIST_CC_MEMORY); + } + return 0; +#elif (LZO_ARCH_I386) && (LZO_ASM_SYNTAX_MSC) + if (v) { __asm { + pushf + or dword ptr [esp],262144 + popf + }} else { __asm { + pushf + and dword ptr [esp],-262145 + popf + }} + return 0; +#else + LZO_UNUSED(v); return -1; +#endif +} +LZOLIB_PUBLIC_NOINLINE(unsigned, lzo_debug_running_on_qemu) (void) +{ + unsigned r = 0; +#if (LZO_OS_POSIX_LINUX || LZO_OS_WIN32 || LZO_OS_WIN64) + const char* p; + p = __LZOLIB_FUNCNAME(lzo_getenv)(LZO_PP_STRINGIZE(LZO_ENV_RUNNING_ON_QEMU)); + if (p) { + if (p[0] == 0) r = 0; + else if ((p[0] >= '0' && p[0] <= '9') && p[1] == 0) r = LZO_ICAST(unsigned, p[0]) - '0'; + else r = 1; + } +#endif + return r; +} +LZOLIB_PUBLIC_NOINLINE(unsigned, lzo_debug_running_on_valgrind) (void) +{ +#if (LZO_ARCH_AMD64 && LZO_ABI_ILP32) + return 0; +#elif (LZO_ARCH_AMD64 || LZO_ARCH_I386) && (LZO_ASM_SYNTAX_GNUC) + volatile size_t a[6]; + size_t r = 0; + a[0] = 0x1001; a[1] = 0; a[2] = 0; a[3] = 0; a[4] = 0; a[5] = 0; +# if (LZO_ARCH_AMD64) + __asm__ __volatile__(".byte 0x48,0xc1,0xc7,0x03,0x48,0xc1,0xc7,0x0d,0x48,0xc1,0xc7,0x3d,0x48,0xc1,0xc7,0x33,0x48,0x87,0xdb\n" : "=d" (r) : "a" (&a[0]), "d" (r) __LZO_ASM_CLOBBER_LIST_CC_MEMORY); +# elif (LZO_ARCH_I386) + __asm__ __volatile__(".byte 0xc1,0xc7,0x03,0xc1,0xc7,0x0d,0xc1,0xc7,0x1d,0xc1,0xc7,0x13,0x87,0xdb\n" : "=d" (r) : "a" (&a[0]), "d" (r) __LZO_ASM_CLOBBER_LIST_CC_MEMORY); +# endif + return LZO_ITRUNC(unsigned, r); +#else + return 0; +#endif +} +#if (LZO_OS_WIN32 && LZO_CC_PELLESC && (__POCC__ >= 290)) +# pragma warn(pop) +#endif +#endif +#if defined(LZO_WANT_ACCLIB_WILDARGV) +# undef LZO_WANT_ACCLIB_WILDARGV +#define __LZOLIB_WILDARGV_CH_INCLUDED 1 +#if !defined(LZOLIB_PUBLIC) +# define LZOLIB_PUBLIC(r,f) r __LZOLIB_FUNCNAME(f) +#endif +#if (LZO_OS_DOS16 || LZO_OS_OS216 || LZO_OS_WIN16) +#if 0 && (LZO_CC_MSC) +LZO_EXTERN_C int __lzo_cdecl __setargv(void); +LZO_EXTERN_C int __lzo_cdecl _setargv(void); +LZO_EXTERN_C int __lzo_cdecl _setargv(void) { return __setargv(); } +#endif +#endif +#if (LZO_OS_WIN32 || LZO_OS_WIN64) +#if (LZO_CC_MSC && (_MSC_VER >= 1900)) +#elif (LZO_CC_INTELC || LZO_CC_MSC) +LZO_EXTERN_C int __lzo_cdecl __setargv(void); +LZO_EXTERN_C int __lzo_cdecl _setargv(void); +LZO_EXTERN_C int __lzo_cdecl _setargv(void) { return __setargv(); } +#endif +#endif +#if (LZO_OS_EMX) +#define __LZOLIB_HAVE_LZO_WILDARGV 1 +LZOLIB_PUBLIC(void, lzo_wildargv) (int* argc, char*** argv) +{ + if (argc && argv) { + _response(argc, argv); + _wildcard(argc, argv); + } +} +#endif +#if (LZO_OS_CONSOLE_PSP) && defined(__PSPSDK_DEBUG__) +#define __LZOLIB_HAVE_LZO_WILDARGV 1 +LZO_EXTERN_C int lzo_psp_init_module(int*, char***, int); +LZOLIB_PUBLIC(void, lzo_wildargv) (int* argc, char*** argv) +{ + lzo_psp_init_module(argc, argv, -1); +} +#endif +#if !(__LZOLIB_HAVE_LZO_WILDARGV) +#define __LZOLIB_HAVE_LZO_WILDARGV 1 +LZOLIB_PUBLIC(void, lzo_wildargv) (int* argc, char*** argv) +{ +#if 1 && (LZO_ARCH_I086PM) + if (LZO_MM_AHSHIFT != 3) { exit(1); } +#elif 1 && (LZO_ARCH_M68K && LZO_OS_TOS && LZO_CC_GNUC) && defined(__MINT__) + __binmode(1); + if (isatty(1)) __set_binmode(stdout, 0); + if (isatty(2)) __set_binmode(stderr, 0); +#endif + LZO_UNUSED(argc); LZO_UNUSED(argv); +} +#endif +#endif + +/* vim:set ts=4 sw=4 et: */ diff --git a/tools/z64compress/src/enc/lzo/lzo_swd.ch b/tools/z64compress/src/enc/lzo/lzo_swd.ch new file mode 100644 index 000000000..20bc46175 --- /dev/null +++ b/tools/z64compress/src/enc/lzo/lzo_swd.ch @@ -0,0 +1,700 @@ +/* lzo_swd.ch -- sliding window dictionary + + This file is part of the LZO real-time data compression library. + + Copyright (C) 1996-2017 Markus Franz Xaver Johannes Oberhumer + All Rights Reserved. + + The LZO library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + The LZO library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with the LZO library; see the file COPYING. + If not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + + Markus F.X.J. Oberhumer + + http://www.oberhumer.com/opensource/lzo/ + */ + + +#if (LZO_UINT_MAX < LZO_0xffffffffL) +# error "LZO_UINT_MAX" +#endif +#if defined(LZO_DEBUG) +# include +#endif +#if defined(__LZO_CHECKER) +# include +#endif + + +/*********************************************************************** +// +************************************************************************/ + +/* unsigned type for dictionary access - don't waste memory here */ +#if (0UL + SWD_N + SWD_F + SWD_F < 65535UL) + typedef lzo_uint16_t swd_uint; +# define SWD_UINT_MAX 0xffffu +#else + typedef lzo_uint32_t swd_uint; +# define SWD_UINT_MAX 0xffffffffu +#endif +#define swd_uintp swd_uint * +#define SWD_UINT(x) ((swd_uint)(x)) + + +#ifndef SWD_HSIZE +# define SWD_HSIZE 16384 +#endif +#ifndef SWD_MAX_CHAIN +# define SWD_MAX_CHAIN 2048 +#endif + +#if !defined(HEAD3) +#if 1 +# define HEAD3(b,p) \ + ((DMUL(0x9f5f,(((((lzo_xint)b[p]<<5)^b[p+1])<<5)^b[p+2]))>>5) & (SWD_HSIZE-1)) +#else +# define HEAD3(b,p) \ + ((DMUL(0x9f5f,(((((lzo_xint)b[p+2]<<5)^b[p+1])<<5)^b[p]))>>5) & (SWD_HSIZE-1)) +#endif +#endif + +#if !(SWD_NO_HEAD2) && (SWD_THRESHOLD == 1) && !defined(HEAD2) +# if 1 && (LZO_OPT_UNALIGNED16) +# define HEAD2(b,p) UA_GET_NE16((b)+(p)) +# else +# define HEAD2(b,p) (b[p] ^ ((unsigned)b[(p)+1]<<8)) +# endif +# define NIL2 SWD_UINT_MAX +#endif +#ifndef IF_HEAD2 +#define IF_HEAD2(s) /*empty*/ +#endif + + +typedef struct +{ +/* public - "built-in" */ + lzo_uint swd_n; + lzo_uint swd_f; + lzo_uint swd_threshold; + +/* public - configuration */ + lzo_uint max_chain; + lzo_uint nice_length; + lzo_bool use_best_off; + lzo_uint lazy_insert; + +/* public - output */ + lzo_uint m_len; + lzo_uint m_off; + lzo_uint look; + int b_char; +#if defined(SWD_BEST_OFF) + lzo_uint best_off[ SWD_BEST_OFF ]; +#endif + +/* semi public */ + LZO_COMPRESS_T *c; + lzo_uint m_pos; +#if defined(SWD_BEST_OFF) + lzo_uint best_pos[ SWD_BEST_OFF ]; +#endif + +/* private */ + const lzo_bytep dict; + const lzo_bytep dict_end; + lzo_uint dict_len; + +/* private */ + lzo_uint ip; /* input pointer (lookahead) */ + lzo_uint bp; /* buffer pointer */ + lzo_uint rp; /* remove pointer */ + lzo_uint b_size; + + lzo_bytep b_wrap; + + lzo_uint node_count; + lzo_uint first_rp; + +#if defined(__LZO_CHECKER) + /* malloc arrays of the exact size to detect any overrun */ + unsigned char *b; + swd_uint *head3; + swd_uint *succ3; + swd_uint *best3; + swd_uint *llen3; +# ifdef HEAD2 + swd_uint *head2; +# endif + +#else + unsigned char b [ SWD_N + SWD_F + SWD_F ]; + swd_uint head3 [ SWD_HSIZE ]; + swd_uint succ3 [ SWD_N + SWD_F ]; + swd_uint best3 [ SWD_N + SWD_F ]; + swd_uint llen3 [ SWD_HSIZE ]; +# ifdef HEAD2 + swd_uint head2 [ 65536L ]; +# endif +#endif +} +lzo_swd_t; +#define lzo_swd_p lzo_swd_t * + + +#define s_b(s) s->b +#define s_head3(s) s->head3 +#define s_succ3(s) s->succ3 +#define s_best3(s) s->best3 +#define s_llen3(s) s->llen3 +#ifdef HEAD2 +#define s_head2(s) s->head2 +#endif +#define SIZEOF_LZO_SWD_T (sizeof(lzo_swd_t)) + + +/* Access macro for head3. + * head3[key] may be uninitialized if the list is emtpy, + * but then its value will never be used. + */ +#if 1 || defined(__LZO_CHECKER) +# define s_get_head3(s,key) \ + ((swd_uint)((s_llen3(s)[key] == 0) ? SWD_UINT_MAX : s_head3(s)[key])) +#else +# define s_get_head3(s,key) (s_head3(s)[key]) +#endif + + +/*********************************************************************** +// +************************************************************************/ + +static +void swd_initdict(lzo_swd_p s, const lzo_bytep dict, lzo_uint dict_len) +{ + s->dict = s->dict_end = NULL; + s->dict_len = 0; + + if (!dict || dict_len == 0) + return; + if (dict_len > s->swd_n) + { + dict += dict_len - s->swd_n; + dict_len = s->swd_n; + } + + s->dict = dict; + s->dict_len = dict_len; + s->dict_end = dict + dict_len; + lzo_memcpy(s_b(s),dict,dict_len); + s->ip = dict_len; +} + + +static +void swd_insertdict(lzo_swd_p s, lzo_uint node, lzo_uint len) +{ + lzo_uint key; + + s->node_count = s->swd_n - len; + s->first_rp = node; + + if (len) do + { + key = HEAD3(s_b(s),node); + s_succ3(s)[node] = s_get_head3(s,key); + s_head3(s)[key] = SWD_UINT(node); + s_best3(s)[node] = SWD_UINT(s->swd_f + 1); + s_llen3(s)[key]++; + assert(s_llen3(s)[key] <= s->swd_n); + +#ifdef HEAD2 + IF_HEAD2(s) { + key = HEAD2(s_b(s),node); + s_head2(s)[key] = SWD_UINT(node); + } +#endif + + node++; + } + while (--len != 0); +} + + +/*********************************************************************** +// +************************************************************************/ + +static void swd_exit(lzo_swd_p s); + +static +int swd_init(lzo_swd_p s, const lzo_bytep dict, lzo_uint dict_len) +{ +#if defined(__LZO_CHECKER) + unsigned r = 1; + s->b = (lzo_bytep) malloc(SWD_N + SWD_F + SWD_F); + s->head3 = (swd_uintp) malloc(sizeof(swd_uint) * SWD_HSIZE); + s->succ3 = (swd_uintp) malloc(sizeof(swd_uint) * (SWD_N + SWD_F)); + s->best3 = (swd_uintp) malloc(sizeof(swd_uint) * (SWD_N + SWD_F)); + s->llen3 = (swd_uintp) malloc(sizeof(swd_uint) * SWD_HSIZE); + r &= s->b != NULL; + r &= s->head3 != NULL; + r &= s->succ3 != NULL; + r &= s->best3 != NULL; + r &= s->llen3 != NULL; +#ifdef HEAD2 + IF_HEAD2(s) { + s->head2 = (swd_uintp) malloc(sizeof(swd_uint) * 65536L); + r &= s->head2 != NULL; + } +#endif + if (r != 1) { + swd_exit(s); + return LZO_E_OUT_OF_MEMORY; + } +#endif + + s->m_len = 0; + s->m_off = 0; +#if defined(SWD_BEST_OFF) + { + unsigned i; + for (i = 0; i < SWD_BEST_OFF; i++) + s->best_off[i] = s->best_pos[i] = 0; + } +#endif + + s->swd_n = SWD_N; + s->swd_f = SWD_F; + s->swd_threshold = SWD_THRESHOLD; + + /* defaults */ + s->max_chain = SWD_MAX_CHAIN; + s->nice_length = s->swd_f; + s->use_best_off = 0; + s->lazy_insert = 0; + + s->b_size = s->swd_n + s->swd_f; +#if 0 + if (2 * s->swd_f >= s->swd_n || s->b_size + s->swd_f >= SWD_UINT_MAX) + return LZO_E_ERROR; +#else + LZO_COMPILE_TIME_ASSERT(!(0ul + 2 * SWD_F >= SWD_N)) + LZO_COMPILE_TIME_ASSERT(!(0ul + SWD_N + SWD_F + SWD_F >= SWD_UINT_MAX)) +#endif + s->b_wrap = s_b(s) + s->b_size; + s->node_count = s->swd_n; + + lzo_memset(s_llen3(s), 0, (lzo_uint)sizeof(s_llen3(s)[0]) * (lzo_uint)SWD_HSIZE); +#ifdef HEAD2 + IF_HEAD2(s) { +#if 1 + lzo_memset(s_head2(s), 0xff, (lzo_uint)sizeof(s_head2(s)[0]) * 65536L); + assert(s_head2(s)[0] == NIL2); +#else + lzo_xint i; + for (i = 0; i < 65536L; i++) + s_head2(s)[i] = NIL2; +#endif + } +#endif + + s->ip = 0; + swd_initdict(s,dict,dict_len); + s->bp = s->ip; + s->first_rp = s->ip; + + assert(s->ip + s->swd_f <= s->b_size); +#if 1 + s->look = (lzo_uint) (s->c->in_end - s->c->ip); + if (s->look > 0) + { + if (s->look > s->swd_f) + s->look = s->swd_f; + lzo_memcpy(&s_b(s)[s->ip],s->c->ip,s->look); + s->c->ip += s->look; + s->ip += s->look; + } +#else + s->look = 0; + while (s->look < s->swd_f) + { + int c; + if ((c = getbyte(*(s->c))) < 0) + break; + s_b(s)[s->ip] = LZO_BYTE(c); + s->ip++; + s->look++; + } +#endif + if (s->ip == s->b_size) + s->ip = 0; + + if (s->look >= 2 && s->dict_len > 0) + swd_insertdict(s,0,s->dict_len); + + s->rp = s->first_rp; + if (s->rp >= s->node_count) + s->rp -= s->node_count; + else + s->rp += s->b_size - s->node_count; + +#if 1 || defined(__LZO_CHECKER) + /* initialize memory for the first few HEAD3 (if s->ip is not far + * enough ahead to do this job for us). The value doesn't matter. */ + if (s->look < 3) { + lzo_bytep p = &s_b(s)[s->bp+s->look]; + p[0] = p[1] = p[2] = 0; + } +#endif + + return LZO_E_OK; +} + + +static +void swd_exit(lzo_swd_p s) +{ +#if defined(__LZO_CHECKER) + /* free in reverse order of allocations */ +#ifdef HEAD2 + free(s->head2); s->head2 = NULL; +#endif + free(s->llen3); s->llen3 = NULL; + free(s->best3); s->best3 = NULL; + free(s->succ3); s->succ3 = NULL; + free(s->head3); s->head3 = NULL; + free(s->b); s->b = NULL; +#else + LZO_UNUSED(s); +#endif +} + + +#define swd_pos2off(s,pos) \ + (s->bp > (pos) ? s->bp - (pos) : s->b_size - ((pos) - s->bp)) + + +/*********************************************************************** +// +************************************************************************/ + +static __lzo_inline +void swd_getbyte(lzo_swd_p s) +{ + int c; + + if ((c = getbyte(*(s->c))) < 0) + { + if (s->look > 0) + --s->look; +#if 1 || defined(__LZO_CHECKER) + /* initialize memory - value doesn't matter */ + s_b(s)[s->ip] = 0; + if (s->ip < s->swd_f) + s->b_wrap[s->ip] = 0; +#endif + } + else + { + s_b(s)[s->ip] = LZO_BYTE(c); + if (s->ip < s->swd_f) + s->b_wrap[s->ip] = LZO_BYTE(c); + } + if (++s->ip == s->b_size) + s->ip = 0; + if (++s->bp == s->b_size) + s->bp = 0; + if (++s->rp == s->b_size) + s->rp = 0; +} + + +/*********************************************************************** +// remove node from lists +************************************************************************/ + +static __lzo_inline +void swd_remove_node(lzo_swd_p s, lzo_uint node) +{ + if (s->node_count == 0) + { + lzo_uint key; + +#ifdef LZO_DEBUG + if (s->first_rp != LZO_UINT_MAX) + { + if (node != s->first_rp) + printf("Remove %5ld: %5ld %5ld %5ld %5ld %6ld %6ld\n", + (long)node, (long)s->rp, (long)s->ip, (long)s->bp, + (long)s->first_rp, (long)(s->ip - node), + (long)(s->ip - s->bp)); + assert(node == s->first_rp); + s->first_rp = LZO_UINT_MAX; + } +#endif + + key = HEAD3(s_b(s),node); + assert(s_llen3(s)[key] > 0); + --s_llen3(s)[key]; + +#ifdef HEAD2 + IF_HEAD2(s) { + key = HEAD2(s_b(s),node); + assert(s_head2(s)[key] != NIL2); + if ((lzo_uint) s_head2(s)[key] == node) + s_head2(s)[key] = NIL2; + } +#endif + } + else + --s->node_count; +} + + +/*********************************************************************** +// +************************************************************************/ + +static +void swd_accept(lzo_swd_p s, lzo_uint n) +{ + assert(n <= s->look); + + if (n) do + { + lzo_uint key; + + swd_remove_node(s,s->rp); + + /* add bp into HEAD3 */ + key = HEAD3(s_b(s),s->bp); + s_succ3(s)[s->bp] = s_get_head3(s,key); + s_head3(s)[key] = SWD_UINT(s->bp); + s_best3(s)[s->bp] = SWD_UINT(s->swd_f + 1); + s_llen3(s)[key]++; + assert(s_llen3(s)[key] <= s->swd_n); + +#ifdef HEAD2 + /* add bp into HEAD2 */ + IF_HEAD2(s) { + key = HEAD2(s_b(s),s->bp); + s_head2(s)[key] = SWD_UINT(s->bp); + } +#endif + + swd_getbyte(s); + } while (--n != 0); +} + + +/*********************************************************************** +// +************************************************************************/ + +static +void swd_search(lzo_swd_p s, lzo_uint node, lzo_uint cnt) +{ + const lzo_bytep p1; + const lzo_bytep p2; + const lzo_bytep px; + lzo_uint m_len = s->m_len; + const lzo_bytep b = s_b(s); + const lzo_bytep bp = s_b(s) + s->bp; + const lzo_bytep bx = s_b(s) + s->bp + s->look; + swd_uintp succ3 = s_succ3(s); + unsigned char scan_end1; + + assert(s->m_len > 0); + + scan_end1 = bp[m_len - 1]; + for ( ; cnt-- > 0; node = succ3[node]) + { + p1 = bp; + p2 = b + node; + px = bx; + + assert(m_len < s->look); + + if ( +#if 1 + p2[m_len - 1] == scan_end1 && + p2[m_len] == p1[m_len] && +#endif + p2[0] == p1[0] && + p2[1] == p1[1]) + { + lzo_uint i; + assert(lzo_memcmp(bp,&b[node],3) == 0); + +#if 0 && (LZO_OPT_UNALIGNED32) + p1 += 3; p2 += 3; + while (p1 + 4 <= px && UA_GET_NE32(p1) == UA_GET_NE32(p2)) + p1 += 4, p2 += 4; + while (p1 < px && *p1 == *p2) + p1 += 1, p2 += 1; +#else + p1 += 2; p2 += 2; + do {} while (++p1 < px && *p1 == *++p2); +#endif + i = pd(p1, bp); + +#ifdef LZO_DEBUG + if (lzo_memcmp(bp,&b[node],i) != 0) + printf("%5ld %5ld %5ld %02x/%02x %02x/%02x\n", + (long)s->bp, (long) node, (long) i, + bp[0], bp[1], b[node], b[node+1]); +#endif + assert(lzo_memcmp(bp,&b[node],i) == 0); + +#if defined(SWD_BEST_OFF) + if (i < SWD_BEST_OFF) + { + if (s->best_pos[i] == 0) + s->best_pos[i] = node + 1; + } +#endif + if (i > m_len) + { + s->m_len = m_len = i; + s->m_pos = node; + if (m_len == s->look) + return; + if (m_len >= s->nice_length) + return; + if (m_len > (lzo_uint) s_best3(s)[node]) + return; + scan_end1 = bp[m_len - 1]; + } + } + } +} + + +/*********************************************************************** +// +************************************************************************/ + +#ifdef HEAD2 + +static +lzo_bool swd_search2(lzo_swd_p s) +{ + lzo_uint key; + + assert(s->look >= 2); + assert(s->m_len > 0); + + key = s_head2(s)[ HEAD2(s_b(s),s->bp) ]; + if (key == NIL2) + return 0; +#ifdef LZO_DEBUG + if (lzo_memcmp(&s_b(s)[s->bp],&s_b(s)[key],2) != 0) + printf("%5ld %5ld %02x/%02x %02x/%02x\n", (long)s->bp, (long)key, + s_b(s)[s->bp], s_b(s)[s->bp+1], s_b(s)[key], s_b(s)[key+1]); +#endif + assert(lzo_memcmp(&s_b(s)[s->bp],&s_b(s)[key],2) == 0); +#if defined(SWD_BEST_OFF) + if (s->best_pos[2] == 0) + s->best_pos[2] = key + 1; +#endif + + if (s->m_len < 2) + { + s->m_len = 2; + s->m_pos = key; + } + return 1; +} + +#endif + + +/*********************************************************************** +// +************************************************************************/ + +static +void swd_findbest(lzo_swd_p s) +{ + lzo_uint key; + lzo_uint cnt, node; + lzo_uint len; + + assert(s->m_len > 0); + + /* get current head, add bp into HEAD3 */ + key = HEAD3(s_b(s),s->bp); + node = s_succ3(s)[s->bp] = s_get_head3(s,key); + cnt = s_llen3(s)[key]++; + assert(s_llen3(s)[key] <= s->swd_n + s->swd_f); + if (cnt > s->max_chain && s->max_chain > 0) + cnt = s->max_chain; + s_head3(s)[key] = SWD_UINT(s->bp); + + s->b_char = s_b(s)[s->bp]; + len = s->m_len; + if (s->m_len >= s->look) + { + if (s->look == 0) + s->b_char = -1; + s->m_off = 0; + s_best3(s)[s->bp] = SWD_UINT(s->swd_f + 1); + } + else + { +#if defined(HEAD2) + if (swd_search2(s) && s->look >= 3) + swd_search(s,node,cnt); +#else + if (s->look >= 3) + swd_search(s,node,cnt); +#endif + if (s->m_len > len) + s->m_off = swd_pos2off(s,s->m_pos); + s_best3(s)[s->bp] = SWD_UINT(s->m_len); + +#if defined(SWD_BEST_OFF) + if (s->use_best_off) + { + unsigned i; + for (i = 2; i < SWD_BEST_OFF; i++) + if (s->best_pos[i] > 0) + s->best_off[i] = swd_pos2off(s,s->best_pos[i]-1); + else + s->best_off[i] = 0; + } +#endif + } + + swd_remove_node(s,s->rp); + +#ifdef HEAD2 + /* add bp into HEAD2 */ + IF_HEAD2(s) { + key = HEAD2(s_b(s),s->bp); + s_head2(s)[key] = SWD_UINT(s->bp); + } +#endif +} + + +#undef HEAD3 +#undef HEAD2 +#undef IF_HEAD2 +#undef s_get_head3 + + +/* vim:set ts=4 sw=4 et: */ diff --git a/tools/z64compress/src/enc/lzo/lzoconf.h b/tools/z64compress/src/enc/lzo/lzoconf.h new file mode 100644 index 000000000..1a1dd98c2 --- /dev/null +++ b/tools/z64compress/src/enc/lzo/lzoconf.h @@ -0,0 +1,453 @@ +/* lzoconf.h -- configuration of the LZO data compression library + + This file is part of the LZO real-time data compression library. + + Copyright (C) 1996-2017 Markus Franz Xaver Johannes Oberhumer + All Rights Reserved. + + The LZO library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + The LZO library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with the LZO library; see the file COPYING. + If not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + + Markus F.X.J. Oberhumer + + http://www.oberhumer.com/opensource/lzo/ + */ + + +#ifndef __LZOCONF_H_INCLUDED +#define __LZOCONF_H_INCLUDED 1 + +#define LZO_VERSION 0x20a0 /* 2.10 */ +#define LZO_VERSION_STRING "2.10" +#define LZO_VERSION_DATE "Mar 01 2017" + +/* internal Autoconf configuration file - only used when building LZO */ +#if defined(LZO_HAVE_CONFIG_H) +# include +#endif +#include +#include + + +/*********************************************************************** +// LZO requires a conforming +************************************************************************/ + +#if !defined(CHAR_BIT) || (CHAR_BIT != 8) +# error "invalid CHAR_BIT" +#endif +#if !defined(UCHAR_MAX) || !defined(USHRT_MAX) || !defined(UINT_MAX) || !defined(ULONG_MAX) +# error "check your compiler installation" +#endif +#if (USHRT_MAX < 1) || (UINT_MAX < 1) || (ULONG_MAX < 1) +# error "your limits.h macros are broken" +#endif + +/* get OS and architecture defines */ +#ifndef __LZODEFS_H_INCLUDED +#include "lzodefs.h" +#endif + + +#ifdef __cplusplus +extern "C" { +#endif + + +/*********************************************************************** +// some core defines +************************************************************************/ + +/* memory checkers */ +#if !defined(__LZO_CHECKER) +# if defined(__BOUNDS_CHECKING_ON) +# define __LZO_CHECKER 1 +# elif defined(__CHECKER__) +# define __LZO_CHECKER 1 +# elif defined(__INSURE__) +# define __LZO_CHECKER 1 +# elif defined(__PURIFY__) +# define __LZO_CHECKER 1 +# endif +#endif + + +/*********************************************************************** +// integral and pointer types +************************************************************************/ + +/* lzo_uint must match size_t */ +#if !defined(LZO_UINT_MAX) +# if (LZO_ABI_LLP64) +# if (LZO_OS_WIN64) + typedef unsigned __int64 lzo_uint; + typedef __int64 lzo_int; +# define LZO_TYPEOF_LZO_INT LZO_TYPEOF___INT64 +# else + typedef lzo_ullong_t lzo_uint; + typedef lzo_llong_t lzo_int; +# define LZO_TYPEOF_LZO_INT LZO_TYPEOF_LONG_LONG +# endif +# define LZO_SIZEOF_LZO_INT 8 +# define LZO_UINT_MAX 0xffffffffffffffffull +# define LZO_INT_MAX 9223372036854775807LL +# define LZO_INT_MIN (-1LL - LZO_INT_MAX) +# elif (LZO_ABI_IP32L64) /* MIPS R5900 */ + typedef unsigned int lzo_uint; + typedef int lzo_int; +# define LZO_SIZEOF_LZO_INT LZO_SIZEOF_INT +# define LZO_TYPEOF_LZO_INT LZO_TYPEOF_INT +# define LZO_UINT_MAX UINT_MAX +# define LZO_INT_MAX INT_MAX +# define LZO_INT_MIN INT_MIN +# elif (ULONG_MAX >= LZO_0xffffffffL) + typedef unsigned long lzo_uint; + typedef long lzo_int; +# define LZO_SIZEOF_LZO_INT LZO_SIZEOF_LONG +# define LZO_TYPEOF_LZO_INT LZO_TYPEOF_LONG +# define LZO_UINT_MAX ULONG_MAX +# define LZO_INT_MAX LONG_MAX +# define LZO_INT_MIN LONG_MIN +# else +# error "lzo_uint" +# endif +#endif + +/* The larger type of lzo_uint and lzo_uint32_t. */ +#if (LZO_SIZEOF_LZO_INT >= 4) +# define lzo_xint lzo_uint +#else +# define lzo_xint lzo_uint32_t +#endif + +typedef int lzo_bool; + +/* sanity checks */ +LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(lzo_int) == LZO_SIZEOF_LZO_INT) +LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(lzo_uint) == LZO_SIZEOF_LZO_INT) +LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(lzo_xint) >= sizeof(lzo_uint)) +LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(lzo_xint) >= sizeof(lzo_uint32_t)) + +#ifndef __LZO_MMODEL +#define __LZO_MMODEL /*empty*/ +#endif + +/* no typedef here because of const-pointer issues */ +#define lzo_bytep unsigned char __LZO_MMODEL * +#define lzo_charp char __LZO_MMODEL * +#define lzo_voidp void __LZO_MMODEL * +#define lzo_shortp short __LZO_MMODEL * +#define lzo_ushortp unsigned short __LZO_MMODEL * +#define lzo_intp lzo_int __LZO_MMODEL * +#define lzo_uintp lzo_uint __LZO_MMODEL * +#define lzo_xintp lzo_xint __LZO_MMODEL * +#define lzo_voidpp lzo_voidp __LZO_MMODEL * +#define lzo_bytepp lzo_bytep __LZO_MMODEL * + +#define lzo_int8_tp lzo_int8_t __LZO_MMODEL * +#define lzo_uint8_tp lzo_uint8_t __LZO_MMODEL * +#define lzo_int16_tp lzo_int16_t __LZO_MMODEL * +#define lzo_uint16_tp lzo_uint16_t __LZO_MMODEL * +#define lzo_int32_tp lzo_int32_t __LZO_MMODEL * +#define lzo_uint32_tp lzo_uint32_t __LZO_MMODEL * +#if defined(lzo_int64_t) +#define lzo_int64_tp lzo_int64_t __LZO_MMODEL * +#define lzo_uint64_tp lzo_uint64_t __LZO_MMODEL * +#endif + +/* Older LZO versions used to support ancient systems and memory models + * such as 16-bit MSDOS with __huge pointers or Cray PVP, but these + * obsolete configurations are not supported any longer. + */ +#if defined(__LZO_MMODEL_HUGE) +#error "__LZO_MMODEL_HUGE memory model is unsupported" +#endif +#if (LZO_MM_PVP) +#error "LZO_MM_PVP memory model is unsupported" +#endif +#if (LZO_SIZEOF_INT < 4) +#error "LZO_SIZEOF_INT < 4 is unsupported" +#endif +#if (__LZO_UINTPTR_T_IS_POINTER) +#error "__LZO_UINTPTR_T_IS_POINTER is unsupported" +#endif +LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(int) >= 4) +LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(lzo_uint) >= 4) +/* Strange configurations where sizeof(lzo_uint) != sizeof(size_t) should + * work but have not received much testing lately, so be strict here. + */ +LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(lzo_uint) == sizeof(size_t)) +LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(lzo_uint) == sizeof(ptrdiff_t)) +LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(lzo_uint) == sizeof(lzo_uintptr_t)) +LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(void *) == sizeof(lzo_uintptr_t)) +LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(char *) == sizeof(lzo_uintptr_t)) +LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(long *) == sizeof(lzo_uintptr_t)) +LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(void *) == sizeof(lzo_voidp)) +LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(char *) == sizeof(lzo_bytep)) + + +/*********************************************************************** +// function types +************************************************************************/ + +/* name mangling */ +#if !defined(__LZO_EXTERN_C) +# ifdef __cplusplus +# define __LZO_EXTERN_C extern "C" +# else +# define __LZO_EXTERN_C extern +# endif +#endif + +/* calling convention */ +#if !defined(__LZO_CDECL) +# define __LZO_CDECL __lzo_cdecl +#endif + +/* DLL export information */ +#if !defined(__LZO_EXPORT1) +# define __LZO_EXPORT1 /*empty*/ +#endif +#if !defined(__LZO_EXPORT2) +# define __LZO_EXPORT2 /*empty*/ +#endif + +/* __cdecl calling convention for public C and assembly functions */ +#if !defined(LZO_PUBLIC) +# define LZO_PUBLIC(r) __LZO_EXPORT1 r __LZO_EXPORT2 __LZO_CDECL +#endif +#if !defined(LZO_EXTERN) +# define LZO_EXTERN(r) __LZO_EXTERN_C LZO_PUBLIC(r) +#endif +#if !defined(LZO_PRIVATE) +# define LZO_PRIVATE(r) static r __LZO_CDECL +#endif + +/* function types */ +typedef int +(__LZO_CDECL *lzo_compress_t) ( const lzo_bytep src, lzo_uint src_len, + lzo_bytep dst, lzo_uintp dst_len, + lzo_voidp wrkmem ); + +typedef int +(__LZO_CDECL *lzo_decompress_t) ( const lzo_bytep src, lzo_uint src_len, + lzo_bytep dst, lzo_uintp dst_len, + lzo_voidp wrkmem ); + +typedef int +(__LZO_CDECL *lzo_optimize_t) ( lzo_bytep src, lzo_uint src_len, + lzo_bytep dst, lzo_uintp dst_len, + lzo_voidp wrkmem ); + +typedef int +(__LZO_CDECL *lzo_compress_dict_t)(const lzo_bytep src, lzo_uint src_len, + lzo_bytep dst, lzo_uintp dst_len, + lzo_voidp wrkmem, + const lzo_bytep dict, lzo_uint dict_len ); + +typedef int +(__LZO_CDECL *lzo_decompress_dict_t)(const lzo_bytep src, lzo_uint src_len, + lzo_bytep dst, lzo_uintp dst_len, + lzo_voidp wrkmem, + const lzo_bytep dict, lzo_uint dict_len ); + + +/* Callback interface. Currently only the progress indicator ("nprogress") + * is used, but this may change in a future release. */ + +struct lzo_callback_t; +typedef struct lzo_callback_t lzo_callback_t; +#define lzo_callback_p lzo_callback_t __LZO_MMODEL * + +/* malloc & free function types */ +typedef lzo_voidp (__LZO_CDECL *lzo_alloc_func_t) + (lzo_callback_p self, lzo_uint items, lzo_uint size); +typedef void (__LZO_CDECL *lzo_free_func_t) + (lzo_callback_p self, lzo_voidp ptr); + +/* a progress indicator callback function */ +typedef void (__LZO_CDECL *lzo_progress_func_t) + (lzo_callback_p, lzo_uint, lzo_uint, int); + +struct lzo_callback_t +{ + /* custom allocators (set to 0 to disable) */ + lzo_alloc_func_t nalloc; /* [not used right now] */ + lzo_free_func_t nfree; /* [not used right now] */ + + /* a progress indicator callback function (set to 0 to disable) */ + lzo_progress_func_t nprogress; + + /* INFO: the first parameter "self" of the nalloc/nfree/nprogress + * callbacks points back to this struct, so you are free to store + * some extra info in the following variables. */ + lzo_voidp user1; + lzo_xint user2; + lzo_xint user3; +}; + + +/*********************************************************************** +// error codes and prototypes +************************************************************************/ + +/* Error codes for the compression/decompression functions. Negative + * values are errors, positive values will be used for special but + * normal events. + */ +#define LZO_E_OK 0 +#define LZO_E_ERROR (-1) +#define LZO_E_OUT_OF_MEMORY (-2) /* [lzo_alloc_func_t failure] */ +#define LZO_E_NOT_COMPRESSIBLE (-3) /* [not used right now] */ +#define LZO_E_INPUT_OVERRUN (-4) +#define LZO_E_OUTPUT_OVERRUN (-5) +#define LZO_E_LOOKBEHIND_OVERRUN (-6) +#define LZO_E_EOF_NOT_FOUND (-7) +#define LZO_E_INPUT_NOT_CONSUMED (-8) +#define LZO_E_NOT_YET_IMPLEMENTED (-9) /* [not used right now] */ +#define LZO_E_INVALID_ARGUMENT (-10) +#define LZO_E_INVALID_ALIGNMENT (-11) /* pointer argument is not properly aligned */ +#define LZO_E_OUTPUT_NOT_CONSUMED (-12) +#define LZO_E_INTERNAL_ERROR (-99) + + +#ifndef lzo_sizeof_dict_t +# define lzo_sizeof_dict_t ((unsigned)sizeof(lzo_bytep)) +#endif + +/* lzo_init() should be the first function you call. + * Check the return code ! + * + * lzo_init() is a macro to allow checking that the library and the + * compiler's view of various types are consistent. + */ +#define lzo_init() __lzo_init_v2(LZO_VERSION,(int)sizeof(short),(int)sizeof(int),\ + (int)sizeof(long),(int)sizeof(lzo_uint32_t),(int)sizeof(lzo_uint),\ + (int)lzo_sizeof_dict_t,(int)sizeof(char *),(int)sizeof(lzo_voidp),\ + (int)sizeof(lzo_callback_t)) +LZO_EXTERN(int) __lzo_init_v2(unsigned,int,int,int,int,int,int,int,int,int); + +/* version functions (useful for shared libraries) */ +LZO_EXTERN(unsigned) lzo_version(void); +LZO_EXTERN(const char *) lzo_version_string(void); +LZO_EXTERN(const char *) lzo_version_date(void); +LZO_EXTERN(const lzo_charp) _lzo_version_string(void); +LZO_EXTERN(const lzo_charp) _lzo_version_date(void); + +/* string functions */ +LZO_EXTERN(int) + lzo_memcmp(const lzo_voidp a, const lzo_voidp b, lzo_uint len); +LZO_EXTERN(lzo_voidp) + lzo_memcpy(lzo_voidp dst, const lzo_voidp src, lzo_uint len); +LZO_EXTERN(lzo_voidp) + lzo_memmove(lzo_voidp dst, const lzo_voidp src, lzo_uint len); +LZO_EXTERN(lzo_voidp) + lzo_memset(lzo_voidp buf, int c, lzo_uint len); + +/* checksum functions */ +LZO_EXTERN(lzo_uint32_t) + lzo_adler32(lzo_uint32_t c, const lzo_bytep buf, lzo_uint len); +LZO_EXTERN(lzo_uint32_t) + lzo_crc32(lzo_uint32_t c, const lzo_bytep buf, lzo_uint len); +LZO_EXTERN(const lzo_uint32_tp) + lzo_get_crc32_table(void); + +/* misc. */ +LZO_EXTERN(int) _lzo_config_check(void); +typedef union { + lzo_voidp a00; lzo_bytep a01; lzo_uint a02; lzo_xint a03; lzo_uintptr_t a04; + void *a05; unsigned char *a06; unsigned long a07; size_t a08; ptrdiff_t a09; +#if defined(lzo_int64_t) + lzo_uint64_t a10; +#endif +} lzo_align_t; + +/* align a char pointer on a boundary that is a multiple of 'size' */ +LZO_EXTERN(unsigned) __lzo_align_gap(const lzo_voidp p, lzo_uint size); +#define LZO_PTR_ALIGN_UP(p,size) \ + ((p) + (lzo_uint) __lzo_align_gap((const lzo_voidp)(p),(lzo_uint)(size))) + + +/*********************************************************************** +// deprecated macros - only for backward compatibility +************************************************************************/ + +/* deprecated - use 'lzo_bytep' instead of 'lzo_byte *' */ +#define lzo_byte unsigned char +/* deprecated type names */ +#define lzo_int32 lzo_int32_t +#define lzo_uint32 lzo_uint32_t +#define lzo_int32p lzo_int32_t __LZO_MMODEL * +#define lzo_uint32p lzo_uint32_t __LZO_MMODEL * +#define LZO_INT32_MAX LZO_INT32_C(2147483647) +#define LZO_UINT32_MAX LZO_UINT32_C(4294967295) +#if defined(lzo_int64_t) +#define lzo_int64 lzo_int64_t +#define lzo_uint64 lzo_uint64_t +#define lzo_int64p lzo_int64_t __LZO_MMODEL * +#define lzo_uint64p lzo_uint64_t __LZO_MMODEL * +#define LZO_INT64_MAX LZO_INT64_C(9223372036854775807) +#define LZO_UINT64_MAX LZO_UINT64_C(18446744073709551615) +#endif +/* deprecated types */ +typedef union { lzo_bytep a; lzo_uint b; } __lzo_pu_u; +typedef union { lzo_bytep a; lzo_uint32_t b; } __lzo_pu32_u; +/* deprecated defines */ +#if !defined(LZO_SIZEOF_LZO_UINT) +# define LZO_SIZEOF_LZO_UINT LZO_SIZEOF_LZO_INT +#endif + +#if defined(LZO_CFG_COMPAT) + +#define __LZOCONF_H 1 + +#if defined(LZO_ARCH_I086) +# define __LZO_i386 1 +#elif defined(LZO_ARCH_I386) +# define __LZO_i386 1 +#endif + +#if defined(LZO_OS_DOS16) +# define __LZO_DOS 1 +# define __LZO_DOS16 1 +#elif defined(LZO_OS_DOS32) +# define __LZO_DOS 1 +#elif defined(LZO_OS_WIN16) +# define __LZO_WIN 1 +# define __LZO_WIN16 1 +#elif defined(LZO_OS_WIN32) +# define __LZO_WIN 1 +#endif + +#define __LZO_CMODEL /*empty*/ +#define __LZO_DMODEL /*empty*/ +#define __LZO_ENTRY __LZO_CDECL +#define LZO_EXTERN_CDECL LZO_EXTERN +#define LZO_ALIGN LZO_PTR_ALIGN_UP + +#define lzo_compress_asm_t lzo_compress_t +#define lzo_decompress_asm_t lzo_decompress_t + +#endif /* LZO_CFG_COMPAT */ + + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* already included */ + + +/* vim:set ts=4 sw=4 et: */ diff --git a/tools/z64compress/src/enc/lzo/lzodefs.h b/tools/z64compress/src/enc/lzo/lzodefs.h new file mode 100644 index 000000000..c3e2bcf5d --- /dev/null +++ b/tools/z64compress/src/enc/lzo/lzodefs.h @@ -0,0 +1,3268 @@ +/* lzodefs.h -- architecture, OS and compiler specific defines + + This file is part of the LZO real-time data compression library. + + Copyright (C) 1996-2017 Markus Franz Xaver Johannes Oberhumer + All Rights Reserved. + + The LZO library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + The LZO library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with the LZO library; see the file COPYING. + If not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + + Markus F.X.J. Oberhumer + + http://www.oberhumer.com/opensource/lzo/ + */ + + +#ifndef __LZODEFS_H_INCLUDED +#define __LZODEFS_H_INCLUDED 1 + +#if defined(__CYGWIN32__) && !defined(__CYGWIN__) +# define __CYGWIN__ __CYGWIN32__ +#endif +#if 1 && defined(__INTERIX) && defined(__GNUC__) && !defined(_ALL_SOURCE) +# define _ALL_SOURCE 1 +#endif +#if defined(__mips__) && defined(__R5900__) +# if !defined(__LONG_MAX__) +# define __LONG_MAX__ 9223372036854775807L +# endif +#endif +#if 0 +#elif !defined(__LZO_LANG_OVERRIDE) +#if (defined(__clang__) || defined(__GNUC__)) && defined(__ASSEMBLER__) +# if (__ASSEMBLER__+0) <= 0 +# error "__ASSEMBLER__" +# else +# define LZO_LANG_ASSEMBLER 1 +# endif +#elif defined(__cplusplus) +# if (__cplusplus+0) <= 0 +# error "__cplusplus" +# elif (__cplusplus < 199711L) +# define LZO_LANG_CXX 1 +# elif defined(_MSC_VER) && defined(_MSVC_LANG) && (_MSVC_LANG+0 >= 201402L) && 1 +# define LZO_LANG_CXX _MSVC_LANG +# else +# define LZO_LANG_CXX __cplusplus +# endif +# define LZO_LANG_CPLUSPLUS LZO_LANG_CXX +#else +# if defined(__STDC_VERSION__) && (__STDC_VERSION__+0 >= 199409L) +# define LZO_LANG_C __STDC_VERSION__ +# else +# define LZO_LANG_C 1 +# endif +#endif +#endif +#if !defined(LZO_CFG_NO_DISABLE_WUNDEF) +#if defined(__ARMCC_VERSION) +# pragma diag_suppress 193 +#elif defined(__clang__) && defined(__clang_minor__) +# pragma clang diagnostic ignored "-Wundef" +#elif defined(__INTEL_COMPILER) +# pragma warning(disable: 193) +#elif defined(__KEIL__) && defined(__C166__) +# pragma warning disable = 322 +#elif defined(__GNUC__) && defined(__GNUC_MINOR__) && !defined(__PATHSCALE__) +# if ((__GNUC__-0) >= 5 || ((__GNUC__-0) == 4 && (__GNUC_MINOR__-0) >= 2)) +# pragma GCC diagnostic ignored "-Wundef" +# endif +#elif defined(_MSC_VER) && !defined(__clang__) && !defined(__INTEL_COMPILER) && !defined(__MWERKS__) +# if ((_MSC_VER-0) >= 1300) +# pragma warning(disable: 4668) +# endif +#endif +#endif +#if 0 && defined(__POCC__) && defined(_WIN32) +# if (__POCC__ >= 400) +# pragma warn(disable: 2216) +# endif +#endif +#if 0 && defined(__WATCOMC__) +# if (__WATCOMC__ >= 1050) && (__WATCOMC__ < 1060) +# pragma warning 203 9 +# endif +#endif +#if defined(__BORLANDC__) && defined(__MSDOS__) && !defined(__FLAT__) +# pragma option -h +#endif +#if !(LZO_CFG_NO_DISABLE_WCRTNONSTDC) +#ifndef _CRT_NONSTDC_NO_DEPRECATE +#define _CRT_NONSTDC_NO_DEPRECATE 1 +#endif +#ifndef _CRT_NONSTDC_NO_WARNINGS +#define _CRT_NONSTDC_NO_WARNINGS 1 +#endif +#ifndef _CRT_SECURE_NO_DEPRECATE +#define _CRT_SECURE_NO_DEPRECATE 1 +#endif +#ifndef _CRT_SECURE_NO_WARNINGS +#define _CRT_SECURE_NO_WARNINGS 1 +#endif +#endif +#if 0 +#define LZO_0xffffUL 0xfffful +#define LZO_0xffffffffUL 0xfffffffful +#else +#define LZO_0xffffUL 65535ul +#define LZO_0xffffffffUL 4294967295ul +#endif +#define LZO_0xffffL LZO_0xffffUL +#define LZO_0xffffffffL LZO_0xffffffffUL +#if (LZO_0xffffL == LZO_0xffffffffL) +# error "your preprocessor is broken 1" +#endif +#if (16ul * 16384ul != 262144ul) +# error "your preprocessor is broken 2" +#endif +#if 0 +#if (32767 >= 4294967295ul) +# error "your preprocessor is broken 3" +#endif +#if (65535u >= 4294967295ul) +# error "your preprocessor is broken 4" +#endif +#endif +#if defined(__COUNTER__) +# ifndef LZO_CFG_USE_COUNTER +# define LZO_CFG_USE_COUNTER 1 +# endif +#else +# undef LZO_CFG_USE_COUNTER +#endif +#if (UINT_MAX == LZO_0xffffL) +#if defined(__ZTC__) && defined(__I86__) && !defined(__OS2__) +# if !defined(MSDOS) +# define MSDOS 1 +# endif +# if !defined(_MSDOS) +# define _MSDOS 1 +# endif +#elif 0 && defined(__VERSION) && defined(MB_LEN_MAX) +# if (__VERSION == 520) && (MB_LEN_MAX == 1) +# if !defined(__AZTEC_C__) +# define __AZTEC_C__ __VERSION +# endif +# if !defined(__DOS__) +# define __DOS__ 1 +# endif +# endif +#endif +#endif +#if (UINT_MAX == LZO_0xffffL) +#if defined(_MSC_VER) && defined(M_I86HM) +# define ptrdiff_t long +# define _PTRDIFF_T_DEFINED 1 +#endif +#endif +#if (UINT_MAX == LZO_0xffffL) +# undef __LZO_RENAME_A +# undef __LZO_RENAME_B +# if defined(__AZTEC_C__) && defined(__DOS__) +# define __LZO_RENAME_A 1 +# elif defined(_MSC_VER) && defined(MSDOS) +# if (_MSC_VER < 600) +# define __LZO_RENAME_A 1 +# elif (_MSC_VER < 700) +# define __LZO_RENAME_B 1 +# endif +# elif defined(__TSC__) && defined(__OS2__) +# define __LZO_RENAME_A 1 +# elif defined(__MSDOS__) && defined(__TURBOC__) && (__TURBOC__ < 0x0410) +# define __LZO_RENAME_A 1 +# elif defined(__PACIFIC__) && defined(DOS) +# if !defined(__far) +# define __far far +# endif +# if !defined(__near) +# define __near near +# endif +# endif +# if defined(__LZO_RENAME_A) +# if !defined(__cdecl) +# define __cdecl cdecl +# endif +# if !defined(__far) +# define __far far +# endif +# if !defined(__huge) +# define __huge huge +# endif +# if !defined(__near) +# define __near near +# endif +# if !defined(__pascal) +# define __pascal pascal +# endif +# if !defined(__huge) +# define __huge huge +# endif +# elif defined(__LZO_RENAME_B) +# if !defined(__cdecl) +# define __cdecl _cdecl +# endif +# if !defined(__far) +# define __far _far +# endif +# if !defined(__huge) +# define __huge _huge +# endif +# if !defined(__near) +# define __near _near +# endif +# if !defined(__pascal) +# define __pascal _pascal +# endif +# elif (defined(__PUREC__) || defined(__TURBOC__)) && defined(__TOS__) +# if !defined(__cdecl) +# define __cdecl cdecl +# endif +# if !defined(__pascal) +# define __pascal pascal +# endif +# endif +# undef __LZO_RENAME_A +# undef __LZO_RENAME_B +#endif +#if (UINT_MAX == LZO_0xffffL) +#if defined(__AZTEC_C__) && defined(__DOS__) +# define LZO_BROKEN_CDECL_ALT_SYNTAX 1 +#elif defined(_MSC_VER) && defined(MSDOS) +# if (_MSC_VER < 600) +# define LZO_BROKEN_INTEGRAL_CONSTANTS 1 +# endif +# if (_MSC_VER < 700) +# define LZO_BROKEN_INTEGRAL_PROMOTION 1 +# define LZO_BROKEN_SIZEOF 1 +# endif +#elif defined(__PACIFIC__) && defined(DOS) +# define LZO_BROKEN_INTEGRAL_CONSTANTS 1 +#elif defined(__TURBOC__) && defined(__MSDOS__) +# if (__TURBOC__ < 0x0150) +# define LZO_BROKEN_CDECL_ALT_SYNTAX 1 +# define LZO_BROKEN_INTEGRAL_CONSTANTS 1 +# define LZO_BROKEN_INTEGRAL_PROMOTION 1 +# endif +# if (__TURBOC__ < 0x0200) +# define LZO_BROKEN_SIZEOF 1 +# endif +# if (__TURBOC__ < 0x0400) && defined(__cplusplus) +# define LZO_BROKEN_CDECL_ALT_SYNTAX 1 +# endif +#elif (defined(__PUREC__) || defined(__TURBOC__)) && defined(__TOS__) +# define LZO_BROKEN_CDECL_ALT_SYNTAX 1 +# define LZO_BROKEN_SIZEOF 1 +#endif +#endif +#if defined(__WATCOMC__) && (__WATCOMC__ < 900) +# define LZO_BROKEN_INTEGRAL_CONSTANTS 1 +#endif +#if defined(_CRAY) && defined(_CRAY1) +# define LZO_BROKEN_SIGNED_RIGHT_SHIFT 1 +#endif +#define LZO_PP_STRINGIZE(x) #x +#define LZO_PP_MACRO_EXPAND(x) LZO_PP_STRINGIZE(x) +#define LZO_PP_CONCAT0() /*empty*/ +#define LZO_PP_CONCAT1(a) a +#define LZO_PP_CONCAT2(a,b) a ## b +#define LZO_PP_CONCAT3(a,b,c) a ## b ## c +#define LZO_PP_CONCAT4(a,b,c,d) a ## b ## c ## d +#define LZO_PP_CONCAT5(a,b,c,d,e) a ## b ## c ## d ## e +#define LZO_PP_CONCAT6(a,b,c,d,e,f) a ## b ## c ## d ## e ## f +#define LZO_PP_CONCAT7(a,b,c,d,e,f,g) a ## b ## c ## d ## e ## f ## g +#define LZO_PP_ECONCAT0() LZO_PP_CONCAT0() +#define LZO_PP_ECONCAT1(a) LZO_PP_CONCAT1(a) +#define LZO_PP_ECONCAT2(a,b) LZO_PP_CONCAT2(a,b) +#define LZO_PP_ECONCAT3(a,b,c) LZO_PP_CONCAT3(a,b,c) +#define LZO_PP_ECONCAT4(a,b,c,d) LZO_PP_CONCAT4(a,b,c,d) +#define LZO_PP_ECONCAT5(a,b,c,d,e) LZO_PP_CONCAT5(a,b,c,d,e) +#define LZO_PP_ECONCAT6(a,b,c,d,e,f) LZO_PP_CONCAT6(a,b,c,d,e,f) +#define LZO_PP_ECONCAT7(a,b,c,d,e,f,g) LZO_PP_CONCAT7(a,b,c,d,e,f,g) +#define LZO_PP_EMPTY /*empty*/ +#define LZO_PP_EMPTY0() /*empty*/ +#define LZO_PP_EMPTY1(a) /*empty*/ +#define LZO_PP_EMPTY2(a,b) /*empty*/ +#define LZO_PP_EMPTY3(a,b,c) /*empty*/ +#define LZO_PP_EMPTY4(a,b,c,d) /*empty*/ +#define LZO_PP_EMPTY5(a,b,c,d,e) /*empty*/ +#define LZO_PP_EMPTY6(a,b,c,d,e,f) /*empty*/ +#define LZO_PP_EMPTY7(a,b,c,d,e,f,g) /*empty*/ +#if 1 +#define LZO_CPP_STRINGIZE(x) #x +#define LZO_CPP_MACRO_EXPAND(x) LZO_CPP_STRINGIZE(x) +#define LZO_CPP_CONCAT2(a,b) a ## b +#define LZO_CPP_CONCAT3(a,b,c) a ## b ## c +#define LZO_CPP_CONCAT4(a,b,c,d) a ## b ## c ## d +#define LZO_CPP_CONCAT5(a,b,c,d,e) a ## b ## c ## d ## e +#define LZO_CPP_CONCAT6(a,b,c,d,e,f) a ## b ## c ## d ## e ## f +#define LZO_CPP_CONCAT7(a,b,c,d,e,f,g) a ## b ## c ## d ## e ## f ## g +#define LZO_CPP_ECONCAT2(a,b) LZO_CPP_CONCAT2(a,b) +#define LZO_CPP_ECONCAT3(a,b,c) LZO_CPP_CONCAT3(a,b,c) +#define LZO_CPP_ECONCAT4(a,b,c,d) LZO_CPP_CONCAT4(a,b,c,d) +#define LZO_CPP_ECONCAT5(a,b,c,d,e) LZO_CPP_CONCAT5(a,b,c,d,e) +#define LZO_CPP_ECONCAT6(a,b,c,d,e,f) LZO_CPP_CONCAT6(a,b,c,d,e,f) +#define LZO_CPP_ECONCAT7(a,b,c,d,e,f,g) LZO_CPP_CONCAT7(a,b,c,d,e,f,g) +#endif +#define __LZO_MASK_GEN(o,b) (((((o) << ((b)-((b)!=0))) - (o)) << 1) + (o)*((b)!=0)) +#if 1 && defined(__cplusplus) +# if !defined(__STDC_CONSTANT_MACROS) +# define __STDC_CONSTANT_MACROS 1 +# endif +# if !defined(__STDC_LIMIT_MACROS) +# define __STDC_LIMIT_MACROS 1 +# endif +#endif +#if defined(__cplusplus) +# define LZO_EXTERN_C extern "C" +# define LZO_EXTERN_C_BEGIN extern "C" { +# define LZO_EXTERN_C_END } +#else +# define LZO_EXTERN_C extern +# define LZO_EXTERN_C_BEGIN /*empty*/ +# define LZO_EXTERN_C_END /*empty*/ +#endif +#if !defined(__LZO_OS_OVERRIDE) +#if (LZO_OS_FREESTANDING) +# define LZO_INFO_OS "freestanding" +#elif (LZO_OS_EMBEDDED) +# define LZO_INFO_OS "embedded" +#elif 1 && defined(__IAR_SYSTEMS_ICC__) +# define LZO_OS_EMBEDDED 1 +# define LZO_INFO_OS "embedded" +#elif defined(__CYGWIN__) && defined(__GNUC__) +# define LZO_OS_CYGWIN 1 +# define LZO_INFO_OS "cygwin" +#elif defined(__EMX__) && defined(__GNUC__) +# define LZO_OS_EMX 1 +# define LZO_INFO_OS "emx" +#elif defined(__BEOS__) +# define LZO_OS_BEOS 1 +# define LZO_INFO_OS "beos" +#elif defined(__Lynx__) +# define LZO_OS_LYNXOS 1 +# define LZO_INFO_OS "lynxos" +#elif defined(__OS400__) +# define LZO_OS_OS400 1 +# define LZO_INFO_OS "os400" +#elif defined(__QNX__) +# define LZO_OS_QNX 1 +# define LZO_INFO_OS "qnx" +#elif defined(__BORLANDC__) && defined(__DPMI32__) && (__BORLANDC__ >= 0x0460) +# define LZO_OS_DOS32 1 +# define LZO_INFO_OS "dos32" +#elif defined(__BORLANDC__) && defined(__DPMI16__) +# define LZO_OS_DOS16 1 +# define LZO_INFO_OS "dos16" +#elif defined(__ZTC__) && defined(DOS386) +# define LZO_OS_DOS32 1 +# define LZO_INFO_OS "dos32" +#elif defined(__OS2__) || defined(__OS2V2__) +# if (UINT_MAX == LZO_0xffffL) +# define LZO_OS_OS216 1 +# define LZO_INFO_OS "os216" +# elif (UINT_MAX == LZO_0xffffffffL) +# define LZO_OS_OS2 1 +# define LZO_INFO_OS "os2" +# else +# error "check your limits.h header" +# endif +#elif defined(__WIN64__) || defined(_WIN64) || defined(WIN64) +# define LZO_OS_WIN64 1 +# define LZO_INFO_OS "win64" +#elif defined(__WIN32__) || defined(_WIN32) || defined(WIN32) || defined(__WINDOWS_386__) +# define LZO_OS_WIN32 1 +# define LZO_INFO_OS "win32" +#elif defined(__MWERKS__) && defined(__INTEL__) +# define LZO_OS_WIN32 1 +# define LZO_INFO_OS "win32" +#elif defined(__WINDOWS__) || defined(_WINDOWS) || defined(_Windows) +# if (UINT_MAX == LZO_0xffffL) +# define LZO_OS_WIN16 1 +# define LZO_INFO_OS "win16" +# elif (UINT_MAX == LZO_0xffffffffL) +# define LZO_OS_WIN32 1 +# define LZO_INFO_OS "win32" +# else +# error "check your limits.h header" +# endif +#elif defined(__DOS__) || defined(__MSDOS__) || defined(_MSDOS) || defined(MSDOS) || (defined(__PACIFIC__) && defined(DOS)) +# if (UINT_MAX == LZO_0xffffL) +# define LZO_OS_DOS16 1 +# define LZO_INFO_OS "dos16" +# elif (UINT_MAX == LZO_0xffffffffL) +# define LZO_OS_DOS32 1 +# define LZO_INFO_OS "dos32" +# else +# error "check your limits.h header" +# endif +#elif defined(__WATCOMC__) +# if defined(__NT__) && (UINT_MAX == LZO_0xffffL) +# define LZO_OS_DOS16 1 +# define LZO_INFO_OS "dos16" +# elif defined(__NT__) && (__WATCOMC__ < 1100) +# define LZO_OS_WIN32 1 +# define LZO_INFO_OS "win32" +# elif defined(__linux__) || defined(__LINUX__) +# define LZO_OS_POSIX 1 +# define LZO_INFO_OS "posix" +# else +# error "please specify a target using the -bt compiler option" +# endif +#elif defined(__palmos__) +# define LZO_OS_PALMOS 1 +# define LZO_INFO_OS "palmos" +#elif defined(__TOS__) || defined(__atarist__) +# define LZO_OS_TOS 1 +# define LZO_INFO_OS "tos" +#elif defined(macintosh) && !defined(__arm__) && !defined(__i386__) && !defined(__ppc__) && !defined(__x64_64__) +# define LZO_OS_MACCLASSIC 1 +# define LZO_INFO_OS "macclassic" +#elif defined(__VMS) +# define LZO_OS_VMS 1 +# define LZO_INFO_OS "vms" +#elif (defined(__mips__) && defined(__R5900__)) || defined(__MIPS_PSX2__) +# define LZO_OS_CONSOLE 1 +# define LZO_OS_CONSOLE_PS2 1 +# define LZO_INFO_OS "console" +# define LZO_INFO_OS_CONSOLE "ps2" +#elif defined(__mips__) && defined(__psp__) +# define LZO_OS_CONSOLE 1 +# define LZO_OS_CONSOLE_PSP 1 +# define LZO_INFO_OS "console" +# define LZO_INFO_OS_CONSOLE "psp" +#else +# define LZO_OS_POSIX 1 +# define LZO_INFO_OS "posix" +#endif +#if (LZO_OS_POSIX) +# if defined(_AIX) || defined(__AIX__) || defined(__aix__) +# define LZO_OS_POSIX_AIX 1 +# define LZO_INFO_OS_POSIX "aix" +# elif defined(__FreeBSD__) +# define LZO_OS_POSIX_FREEBSD 1 +# define LZO_INFO_OS_POSIX "freebsd" +# elif defined(__hpux__) || defined(__hpux) +# define LZO_OS_POSIX_HPUX 1 +# define LZO_INFO_OS_POSIX "hpux" +# elif defined(__INTERIX) +# define LZO_OS_POSIX_INTERIX 1 +# define LZO_INFO_OS_POSIX "interix" +# elif defined(__IRIX__) || defined(__irix__) +# define LZO_OS_POSIX_IRIX 1 +# define LZO_INFO_OS_POSIX "irix" +# elif defined(__linux__) || defined(__linux) || defined(__LINUX__) +# define LZO_OS_POSIX_LINUX 1 +# define LZO_INFO_OS_POSIX "linux" +# elif defined(__APPLE__) && defined(__MACH__) +# if ((__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__-0) >= 20000) +# define LZO_OS_POSIX_DARWIN 1040 +# define LZO_INFO_OS_POSIX "darwin_iphone" +# elif ((__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__-0) >= 1040) +# define LZO_OS_POSIX_DARWIN __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ +# define LZO_INFO_OS_POSIX "darwin" +# else +# define LZO_OS_POSIX_DARWIN 1 +# define LZO_INFO_OS_POSIX "darwin" +# endif +# define LZO_OS_POSIX_MACOSX LZO_OS_POSIX_DARWIN +# elif defined(__minix__) || defined(__minix) +# define LZO_OS_POSIX_MINIX 1 +# define LZO_INFO_OS_POSIX "minix" +# elif defined(__NetBSD__) +# define LZO_OS_POSIX_NETBSD 1 +# define LZO_INFO_OS_POSIX "netbsd" +# elif defined(__OpenBSD__) +# define LZO_OS_POSIX_OPENBSD 1 +# define LZO_INFO_OS_POSIX "openbsd" +# elif defined(__osf__) +# define LZO_OS_POSIX_OSF 1 +# define LZO_INFO_OS_POSIX "osf" +# elif defined(__solaris__) || defined(__sun) +# if defined(__SVR4) || defined(__svr4__) +# define LZO_OS_POSIX_SOLARIS 1 +# define LZO_INFO_OS_POSIX "solaris" +# else +# define LZO_OS_POSIX_SUNOS 1 +# define LZO_INFO_OS_POSIX "sunos" +# endif +# elif defined(__ultrix__) || defined(__ultrix) +# define LZO_OS_POSIX_ULTRIX 1 +# define LZO_INFO_OS_POSIX "ultrix" +# elif defined(_UNICOS) +# define LZO_OS_POSIX_UNICOS 1 +# define LZO_INFO_OS_POSIX "unicos" +# else +# define LZO_OS_POSIX_UNKNOWN 1 +# define LZO_INFO_OS_POSIX "unknown" +# endif +#endif +#endif +#if (LZO_OS_DOS16 || LZO_OS_OS216 || LZO_OS_WIN16) +# if (UINT_MAX != LZO_0xffffL) +# error "unexpected configuration - check your compiler defines" +# endif +# if (ULONG_MAX != LZO_0xffffffffL) +# error "unexpected configuration - check your compiler defines" +# endif +#endif +#if (LZO_OS_DOS32 || LZO_OS_OS2 || LZO_OS_WIN32 || LZO_OS_WIN64) +# if (UINT_MAX != LZO_0xffffffffL) +# error "unexpected configuration - check your compiler defines" +# endif +# if (ULONG_MAX != LZO_0xffffffffL) +# error "unexpected configuration - check your compiler defines" +# endif +#endif +#if defined(CIL) && defined(_GNUCC) && defined(__GNUC__) +# define LZO_CC_CILLY 1 +# define LZO_INFO_CC "Cilly" +# if defined(__CILLY__) +# define LZO_INFO_CCVER LZO_PP_MACRO_EXPAND(__CILLY__) +# else +# define LZO_INFO_CCVER "unknown" +# endif +#elif 0 && defined(SDCC) && defined(__VERSION__) && !defined(__GNUC__) +# define LZO_CC_SDCC 1 +# define LZO_INFO_CC "sdcc" +# define LZO_INFO_CCVER LZO_PP_MACRO_EXPAND(SDCC) +#elif defined(__PATHSCALE__) && defined(__PATHCC_PATCHLEVEL__) +# define LZO_CC_PATHSCALE (__PATHCC__ * 0x10000L + (__PATHCC_MINOR__-0) * 0x100 + (__PATHCC_PATCHLEVEL__-0)) +# define LZO_INFO_CC "Pathscale C" +# define LZO_INFO_CCVER __PATHSCALE__ +# if defined(__GNUC__) && defined(__GNUC_MINOR__) && defined(__VERSION__) +# define LZO_CC_PATHSCALE_GNUC (__GNUC__ * 0x10000L + (__GNUC_MINOR__-0) * 0x100 + (__GNUC_PATCHLEVEL__-0)) +# endif +#elif defined(__INTEL_COMPILER) && ((__INTEL_COMPILER-0) > 0) +# define LZO_CC_INTELC __INTEL_COMPILER +# define LZO_INFO_CC "Intel C" +# define LZO_INFO_CCVER LZO_PP_MACRO_EXPAND(__INTEL_COMPILER) +# if defined(_MSC_VER) && ((_MSC_VER-0) > 0) +# define LZO_CC_INTELC_MSC _MSC_VER +# elif defined(__GNUC__) && defined(__GNUC_MINOR__) && defined(__VERSION__) +# define LZO_CC_INTELC_GNUC (__GNUC__ * 0x10000L + (__GNUC_MINOR__-0) * 0x100 + (__GNUC_PATCHLEVEL__-0)) +# endif +#elif defined(__POCC__) && defined(_WIN32) +# define LZO_CC_PELLESC 1 +# define LZO_INFO_CC "Pelles C" +# define LZO_INFO_CCVER LZO_PP_MACRO_EXPAND(__POCC__) +#elif defined(__ARMCC_VERSION) && defined(__GNUC__) && defined(__GNUC_MINOR__) && defined(__VERSION__) +# if defined(__GNUC_PATCHLEVEL__) +# define LZO_CC_ARMCC_GNUC (__GNUC__ * 0x10000L + (__GNUC_MINOR__-0) * 0x100 + (__GNUC_PATCHLEVEL__-0)) +# else +# define LZO_CC_ARMCC_GNUC (__GNUC__ * 0x10000L + (__GNUC_MINOR__-0) * 0x100) +# endif +# define LZO_CC_ARMCC __ARMCC_VERSION +# define LZO_INFO_CC "ARM C Compiler" +# define LZO_INFO_CCVER __VERSION__ +#elif defined(__clang__) && defined(__c2__) && defined(__c2_version__) && defined(_MSC_VER) +# define LZO_CC_CLANG (__clang_major__ * 0x10000L + (__clang_minor__-0) * 0x100 + (__clang_patchlevel__-0)) +# define LZO_CC_CLANG_C2 _MSC_VER +# define LZO_CC_CLANG_VENDOR_MICROSOFT 1 +# define LZO_INFO_CC "clang/c2" +# define LZO_INFO_CCVER LZO_PP_MACRO_EXPAND(__c2_version__) +#elif defined(__clang__) && defined(__llvm__) && defined(__VERSION__) +# if defined(__clang_major__) && defined(__clang_minor__) && defined(__clang_patchlevel__) +# define LZO_CC_CLANG (__clang_major__ * 0x10000L + (__clang_minor__-0) * 0x100 + (__clang_patchlevel__-0)) +# else +# define LZO_CC_CLANG 0x010000L +# endif +# if defined(_MSC_VER) && ((_MSC_VER-0) > 0) +# define LZO_CC_CLANG_MSC _MSC_VER +# elif defined(__GNUC__) && defined(__GNUC_MINOR__) && defined(__VERSION__) +# define LZO_CC_CLANG_GNUC (__GNUC__ * 0x10000L + (__GNUC_MINOR__-0) * 0x100 + (__GNUC_PATCHLEVEL__-0)) +# endif +# if defined(__APPLE_CC__) +# define LZO_CC_CLANG_VENDOR_APPLE 1 +# define LZO_INFO_CC "clang/apple" +# else +# define LZO_CC_CLANG_VENDOR_LLVM 1 +# define LZO_INFO_CC "clang" +# endif +# if defined(__clang_version__) +# define LZO_INFO_CCVER __clang_version__ +# else +# define LZO_INFO_CCVER __VERSION__ +# endif +#elif defined(__llvm__) && defined(__GNUC__) && defined(__GNUC_MINOR__) && defined(__VERSION__) +# if defined(__GNUC_PATCHLEVEL__) +# define LZO_CC_LLVM_GNUC (__GNUC__ * 0x10000L + (__GNUC_MINOR__-0) * 0x100 + (__GNUC_PATCHLEVEL__-0)) +# else +# define LZO_CC_LLVM_GNUC (__GNUC__ * 0x10000L + (__GNUC_MINOR__-0) * 0x100) +# endif +# define LZO_CC_LLVM LZO_CC_LLVM_GNUC +# define LZO_INFO_CC "llvm-gcc" +# define LZO_INFO_CCVER __VERSION__ +#elif defined(__ACK__) && defined(_ACK) +# define LZO_CC_ACK 1 +# define LZO_INFO_CC "Amsterdam Compiler Kit C" +# define LZO_INFO_CCVER "unknown" +#elif defined(__ARMCC_VERSION) && !defined(__GNUC__) +# define LZO_CC_ARMCC __ARMCC_VERSION +# define LZO_CC_ARMCC_ARMCC __ARMCC_VERSION +# define LZO_INFO_CC "ARM C Compiler" +# define LZO_INFO_CCVER LZO_PP_MACRO_EXPAND(__ARMCC_VERSION) +#elif defined(__AZTEC_C__) +# define LZO_CC_AZTECC 1 +# define LZO_INFO_CC "Aztec C" +# define LZO_INFO_CCVER LZO_PP_MACRO_EXPAND(__AZTEC_C__) +#elif defined(__CODEGEARC__) +# define LZO_CC_CODEGEARC 1 +# define LZO_INFO_CC "CodeGear C" +# define LZO_INFO_CCVER LZO_PP_MACRO_EXPAND(__CODEGEARC__) +#elif defined(__BORLANDC__) +# define LZO_CC_BORLANDC 1 +# define LZO_INFO_CC "Borland C" +# define LZO_INFO_CCVER LZO_PP_MACRO_EXPAND(__BORLANDC__) +#elif defined(_CRAYC) && defined(_RELEASE) +# define LZO_CC_CRAYC 1 +# define LZO_INFO_CC "Cray C" +# define LZO_INFO_CCVER LZO_PP_MACRO_EXPAND(_RELEASE) +#elif defined(__DMC__) && defined(__SC__) +# define LZO_CC_DMC 1 +# define LZO_INFO_CC "Digital Mars C" +# define LZO_INFO_CCVER LZO_PP_MACRO_EXPAND(__DMC__) +#elif defined(__DECC) +# define LZO_CC_DECC 1 +# define LZO_INFO_CC "DEC C" +# define LZO_INFO_CCVER LZO_PP_MACRO_EXPAND(__DECC) +#elif (defined(__ghs) || defined(__ghs__)) && defined(__GHS_VERSION_NUMBER) && ((__GHS_VERSION_NUMBER-0) > 0) +# define LZO_CC_GHS 1 +# define LZO_INFO_CC "Green Hills C" +# define LZO_INFO_CCVER LZO_PP_MACRO_EXPAND(__GHS_VERSION_NUMBER) +# if defined(_MSC_VER) && ((_MSC_VER-0) > 0) +# define LZO_CC_GHS_MSC _MSC_VER +# elif defined(__GNUC__) && defined(__GNUC_MINOR__) && defined(__VERSION__) +# define LZO_CC_GHS_GNUC (__GNUC__ * 0x10000L + (__GNUC_MINOR__-0) * 0x100 + (__GNUC_PATCHLEVEL__-0)) +# endif +#elif defined(__HIGHC__) +# define LZO_CC_HIGHC 1 +# define LZO_INFO_CC "MetaWare High C" +# define LZO_INFO_CCVER "unknown" +#elif defined(__HP_aCC) && ((__HP_aCC-0) > 0) +# define LZO_CC_HPACC __HP_aCC +# define LZO_INFO_CC "HP aCC" +# define LZO_INFO_CCVER LZO_PP_MACRO_EXPAND(__HP_aCC) +#elif defined(__IAR_SYSTEMS_ICC__) +# define LZO_CC_IARC 1 +# define LZO_INFO_CC "IAR C" +# if defined(__VER__) +# define LZO_INFO_CCVER LZO_PP_MACRO_EXPAND(__VER__) +# else +# define LZO_INFO_CCVER "unknown" +# endif +#elif defined(__IBMC__) && ((__IBMC__-0) > 0) +# define LZO_CC_IBMC __IBMC__ +# define LZO_INFO_CC "IBM C" +# define LZO_INFO_CCVER LZO_PP_MACRO_EXPAND(__IBMC__) +#elif defined(__IBMCPP__) && ((__IBMCPP__-0) > 0) +# define LZO_CC_IBMC __IBMCPP__ +# define LZO_INFO_CC "IBM C" +# define LZO_INFO_CCVER LZO_PP_MACRO_EXPAND(__IBMCPP__) +#elif defined(__KEIL__) && defined(__C166__) +# define LZO_CC_KEILC 1 +# define LZO_INFO_CC "Keil C" +# define LZO_INFO_CCVER LZO_PP_MACRO_EXPAND(__C166__) +#elif defined(__LCC__) && defined(_WIN32) && defined(__LCCOPTIMLEVEL) +# define LZO_CC_LCCWIN32 1 +# define LZO_INFO_CC "lcc-win32" +# define LZO_INFO_CCVER "unknown" +#elif defined(__LCC__) +# define LZO_CC_LCC 1 +# define LZO_INFO_CC "lcc" +# if defined(__LCC_VERSION__) +# define LZO_INFO_CCVER LZO_PP_MACRO_EXPAND(__LCC_VERSION__) +# else +# define LZO_INFO_CCVER "unknown" +# endif +#elif defined(__MWERKS__) && ((__MWERKS__-0) > 0) +# define LZO_CC_MWERKS __MWERKS__ +# define LZO_INFO_CC "Metrowerks C" +# define LZO_INFO_CCVER LZO_PP_MACRO_EXPAND(__MWERKS__) +#elif (defined(__NDPC__) || defined(__NDPX__)) && defined(__i386) +# define LZO_CC_NDPC 1 +# define LZO_INFO_CC "Microway NDP C" +# define LZO_INFO_CCVER "unknown" +#elif defined(__PACIFIC__) +# define LZO_CC_PACIFICC 1 +# define LZO_INFO_CC "Pacific C" +# define LZO_INFO_CCVER LZO_PP_MACRO_EXPAND(__PACIFIC__) +#elif defined(__PGI) && defined(__PGIC__) && defined(__PGIC_MINOR__) +# if defined(__PGIC_PATCHLEVEL__) +# define LZO_CC_PGI (__PGIC__ * 0x10000L + (__PGIC_MINOR__-0) * 0x100 + (__PGIC_PATCHLEVEL__-0)) +# define LZO_INFO_CCVER LZO_PP_MACRO_EXPAND(__PGIC__) "." LZO_PP_MACRO_EXPAND(__PGIC_MINOR__) "." LZO_PP_MACRO_EXPAND(__PGIC_PATCHLEVEL__) +# else +# define LZO_CC_PGI (__PGIC__ * 0x10000L + (__PGIC_MINOR__-0) * 0x100) +# define LZO_INFO_CCVER LZO_PP_MACRO_EXPAND(__PGIC__) "." LZO_PP_MACRO_EXPAND(__PGIC_MINOR__) ".0" +# endif +# define LZO_INFO_CC "Portland Group PGI C" +#elif defined(__PGI) && (defined(__linux__) || defined(__WIN32__)) +# define LZO_CC_PGI 1 +# define LZO_INFO_CC "Portland Group PGI C" +# define LZO_INFO_CCVER "unknown" +#elif defined(__PUREC__) && defined(__TOS__) +# define LZO_CC_PUREC 1 +# define LZO_INFO_CC "Pure C" +# define LZO_INFO_CCVER LZO_PP_MACRO_EXPAND(__PUREC__) +#elif defined(__SC__) && defined(__ZTC__) +# define LZO_CC_SYMANTECC 1 +# define LZO_INFO_CC "Symantec C" +# define LZO_INFO_CCVER LZO_PP_MACRO_EXPAND(__SC__) +#elif defined(__SUNPRO_C) +# define LZO_INFO_CC "SunPro C" +# if ((__SUNPRO_C-0) > 0) +# define LZO_CC_SUNPROC __SUNPRO_C +# define LZO_INFO_CCVER LZO_PP_MACRO_EXPAND(__SUNPRO_C) +# else +# define LZO_CC_SUNPROC 1 +# define LZO_INFO_CCVER "unknown" +# endif +#elif defined(__SUNPRO_CC) +# define LZO_INFO_CC "SunPro C" +# if ((__SUNPRO_CC-0) > 0) +# define LZO_CC_SUNPROC __SUNPRO_CC +# define LZO_INFO_CCVER LZO_PP_MACRO_EXPAND(__SUNPRO_CC) +# else +# define LZO_CC_SUNPROC 1 +# define LZO_INFO_CCVER "unknown" +# endif +#elif defined(__TINYC__) +# define LZO_CC_TINYC 1 +# define LZO_INFO_CC "Tiny C" +# define LZO_INFO_CCVER LZO_PP_MACRO_EXPAND(__TINYC__) +#elif defined(__TSC__) +# define LZO_CC_TOPSPEEDC 1 +# define LZO_INFO_CC "TopSpeed C" +# define LZO_INFO_CCVER LZO_PP_MACRO_EXPAND(__TSC__) +#elif defined(__WATCOMC__) +# define LZO_CC_WATCOMC 1 +# define LZO_INFO_CC "Watcom C" +# define LZO_INFO_CCVER LZO_PP_MACRO_EXPAND(__WATCOMC__) +#elif defined(__TURBOC__) +# define LZO_CC_TURBOC 1 +# define LZO_INFO_CC "Turbo C" +# define LZO_INFO_CCVER LZO_PP_MACRO_EXPAND(__TURBOC__) +#elif defined(__ZTC__) +# define LZO_CC_ZORTECHC 1 +# define LZO_INFO_CC "Zortech C" +# if ((__ZTC__-0) == 0x310) +# define LZO_INFO_CCVER "0x310" +# else +# define LZO_INFO_CCVER LZO_PP_MACRO_EXPAND(__ZTC__) +# endif +#elif defined(__GNUC__) && defined(__VERSION__) +# if defined(__GNUC_MINOR__) && defined(__GNUC_PATCHLEVEL__) +# define LZO_CC_GNUC (__GNUC__ * 0x10000L + (__GNUC_MINOR__-0) * 0x100 + (__GNUC_PATCHLEVEL__-0)) +# elif defined(__GNUC_MINOR__) +# define LZO_CC_GNUC (__GNUC__ * 0x10000L + (__GNUC_MINOR__-0) * 0x100) +# else +# define LZO_CC_GNUC (__GNUC__ * 0x10000L) +# endif +# define LZO_INFO_CC "gcc" +# define LZO_INFO_CCVER __VERSION__ +#elif defined(_MSC_VER) && ((_MSC_VER-0) > 0) +# define LZO_CC_MSC _MSC_VER +# define LZO_INFO_CC "Microsoft C" +# if defined(_MSC_FULL_VER) +# define LZO_INFO_CCVER LZO_PP_MACRO_EXPAND(_MSC_VER) "." LZO_PP_MACRO_EXPAND(_MSC_FULL_VER) +# else +# define LZO_INFO_CCVER LZO_PP_MACRO_EXPAND(_MSC_VER) +# endif +#else +# define LZO_CC_UNKNOWN 1 +# define LZO_INFO_CC "unknown" +# define LZO_INFO_CCVER "unknown" +#endif +#if (LZO_CC_GNUC) && defined(__OPEN64__) +# if defined(__OPENCC__) && defined(__OPENCC_MINOR__) && defined(__OPENCC_PATCHLEVEL__) +# define LZO_CC_OPEN64 (__OPENCC__ * 0x10000L + (__OPENCC_MINOR__-0) * 0x100 + (__OPENCC_PATCHLEVEL__-0)) +# define LZO_CC_OPEN64_GNUC LZO_CC_GNUC +# endif +#endif +#if (LZO_CC_GNUC) && defined(__PCC__) +# if defined(__PCC__) && defined(__PCC_MINOR__) && defined(__PCC_MINORMINOR__) +# define LZO_CC_PCC (__PCC__ * 0x10000L + (__PCC_MINOR__-0) * 0x100 + (__PCC_MINORMINOR__-0)) +# define LZO_CC_PCC_GNUC LZO_CC_GNUC +# endif +#endif +#if 0 && (LZO_CC_MSC && (_MSC_VER >= 1200)) && !defined(_MSC_FULL_VER) +# error "LZO_CC_MSC: _MSC_FULL_VER is not defined" +#endif +#if !defined(__LZO_ARCH_OVERRIDE) && !(LZO_ARCH_GENERIC) && defined(_CRAY) +# if (UINT_MAX > LZO_0xffffffffL) && defined(_CRAY) +# if defined(_CRAYMPP) || defined(_CRAYT3D) || defined(_CRAYT3E) +# define LZO_ARCH_CRAY_MPP 1 +# elif defined(_CRAY1) +# define LZO_ARCH_CRAY_PVP 1 +# endif +# endif +#endif +#if !defined(__LZO_ARCH_OVERRIDE) +#if (LZO_ARCH_GENERIC) +# define LZO_INFO_ARCH "generic" +#elif (LZO_OS_DOS16 || LZO_OS_OS216 || LZO_OS_WIN16) +# define LZO_ARCH_I086 1 +# define LZO_INFO_ARCH "i086" +#elif defined(__aarch64__) || defined(_M_ARM64) +# define LZO_ARCH_ARM64 1 +# define LZO_INFO_ARCH "arm64" +#elif defined(__alpha__) || defined(__alpha) || defined(_M_ALPHA) +# define LZO_ARCH_ALPHA 1 +# define LZO_INFO_ARCH "alpha" +#elif (LZO_ARCH_CRAY_MPP) && (defined(_CRAYT3D) || defined(_CRAYT3E)) +# define LZO_ARCH_ALPHA 1 +# define LZO_INFO_ARCH "alpha" +#elif defined(__amd64__) || defined(__x86_64__) || defined(_M_AMD64) +# define LZO_ARCH_AMD64 1 +# define LZO_INFO_ARCH "amd64" +#elif defined(__arm__) || defined(_M_ARM) +# define LZO_ARCH_ARM 1 +# define LZO_INFO_ARCH "arm" +#elif defined(__IAR_SYSTEMS_ICC__) && defined(__ICCARM__) +# define LZO_ARCH_ARM 1 +# define LZO_INFO_ARCH "arm" +#elif (UINT_MAX <= LZO_0xffffL) && defined(__AVR__) +# define LZO_ARCH_AVR 1 +# define LZO_INFO_ARCH "avr" +#elif defined(__avr32__) || defined(__AVR32__) +# define LZO_ARCH_AVR32 1 +# define LZO_INFO_ARCH "avr32" +#elif defined(__bfin__) +# define LZO_ARCH_BLACKFIN 1 +# define LZO_INFO_ARCH "blackfin" +#elif (UINT_MAX == LZO_0xffffL) && defined(__C166__) +# define LZO_ARCH_C166 1 +# define LZO_INFO_ARCH "c166" +#elif defined(__cris__) +# define LZO_ARCH_CRIS 1 +# define LZO_INFO_ARCH "cris" +#elif defined(__IAR_SYSTEMS_ICC__) && defined(__ICCEZ80__) +# define LZO_ARCH_EZ80 1 +# define LZO_INFO_ARCH "ez80" +#elif defined(__H8300__) || defined(__H8300H__) || defined(__H8300S__) || defined(__H8300SX__) +# define LZO_ARCH_H8300 1 +# define LZO_INFO_ARCH "h8300" +#elif defined(__hppa__) || defined(__hppa) +# define LZO_ARCH_HPPA 1 +# define LZO_INFO_ARCH "hppa" +#elif defined(__386__) || defined(__i386__) || defined(__i386) || defined(_M_IX86) || defined(_M_I386) +# define LZO_ARCH_I386 1 +# define LZO_ARCH_IA32 1 +# define LZO_INFO_ARCH "i386" +#elif (LZO_CC_ZORTECHC && defined(__I86__)) +# define LZO_ARCH_I386 1 +# define LZO_ARCH_IA32 1 +# define LZO_INFO_ARCH "i386" +#elif (LZO_OS_DOS32 && LZO_CC_HIGHC) && defined(_I386) +# define LZO_ARCH_I386 1 +# define LZO_ARCH_IA32 1 +# define LZO_INFO_ARCH "i386" +#elif defined(__ia64__) || defined(__ia64) || defined(_M_IA64) +# define LZO_ARCH_IA64 1 +# define LZO_INFO_ARCH "ia64" +#elif (UINT_MAX == LZO_0xffffL) && defined(__m32c__) +# define LZO_ARCH_M16C 1 +# define LZO_INFO_ARCH "m16c" +#elif defined(__IAR_SYSTEMS_ICC__) && defined(__ICCM16C__) +# define LZO_ARCH_M16C 1 +# define LZO_INFO_ARCH "m16c" +#elif defined(__m32r__) +# define LZO_ARCH_M32R 1 +# define LZO_INFO_ARCH "m32r" +#elif (LZO_OS_TOS) || defined(__m68k__) || defined(__m68000__) || defined(__mc68000__) || defined(__mc68020__) || defined(_M_M68K) +# define LZO_ARCH_M68K 1 +# define LZO_INFO_ARCH "m68k" +#elif (UINT_MAX == LZO_0xffffL) && defined(__C251__) +# define LZO_ARCH_MCS251 1 +# define LZO_INFO_ARCH "mcs251" +#elif (UINT_MAX == LZO_0xffffL) && defined(__C51__) +# define LZO_ARCH_MCS51 1 +# define LZO_INFO_ARCH "mcs51" +#elif defined(__IAR_SYSTEMS_ICC__) && defined(__ICC8051__) +# define LZO_ARCH_MCS51 1 +# define LZO_INFO_ARCH "mcs51" +#elif defined(__mips__) || defined(__mips) || defined(_MIPS_ARCH) || defined(_M_MRX000) +# define LZO_ARCH_MIPS 1 +# define LZO_INFO_ARCH "mips" +#elif (UINT_MAX == LZO_0xffffL) && defined(__MSP430__) +# define LZO_ARCH_MSP430 1 +# define LZO_INFO_ARCH "msp430" +#elif defined(__IAR_SYSTEMS_ICC__) && defined(__ICC430__) +# define LZO_ARCH_MSP430 1 +# define LZO_INFO_ARCH "msp430" +#elif defined(__powerpc__) || defined(__powerpc) || defined(__ppc__) || defined(__PPC__) || defined(_M_PPC) || defined(_ARCH_PPC) || defined(_ARCH_PWR) +# define LZO_ARCH_POWERPC 1 +# define LZO_INFO_ARCH "powerpc" +#elif defined(__powerpc64__) || defined(__powerpc64) || defined(__ppc64__) || defined(__PPC64__) +# define LZO_ARCH_POWERPC 1 +# define LZO_INFO_ARCH "powerpc" +#elif defined(__powerpc64le__) || defined(__powerpc64le) || defined(__ppc64le__) || defined(__PPC64LE__) +# define LZO_ARCH_POWERPC 1 +# define LZO_INFO_ARCH "powerpc" +#elif defined(__riscv) +# define LZO_ARCH_RISCV 1 +# define LZO_INFO_ARCH "riscv" +#elif defined(__s390__) || defined(__s390) || defined(__s390x__) || defined(__s390x) +# define LZO_ARCH_S390 1 +# define LZO_INFO_ARCH "s390" +#elif defined(__sh__) || defined(_M_SH) +# define LZO_ARCH_SH 1 +# define LZO_INFO_ARCH "sh" +#elif defined(__sparc__) || defined(__sparc) || defined(__sparcv8) +# define LZO_ARCH_SPARC 1 +# define LZO_INFO_ARCH "sparc" +#elif defined(__SPU__) +# define LZO_ARCH_SPU 1 +# define LZO_INFO_ARCH "spu" +#elif (UINT_MAX == LZO_0xffffL) && defined(__z80) +# define LZO_ARCH_Z80 1 +# define LZO_INFO_ARCH "z80" +#elif (LZO_ARCH_CRAY_PVP) +# if defined(_CRAYSV1) +# define LZO_ARCH_CRAY_SV1 1 +# define LZO_INFO_ARCH "cray_sv1" +# elif (_ADDR64) +# define LZO_ARCH_CRAY_T90 1 +# define LZO_INFO_ARCH "cray_t90" +# elif (_ADDR32) +# define LZO_ARCH_CRAY_YMP 1 +# define LZO_INFO_ARCH "cray_ymp" +# else +# define LZO_ARCH_CRAY_XMP 1 +# define LZO_INFO_ARCH "cray_xmp" +# endif +#else +# define LZO_ARCH_UNKNOWN 1 +# define LZO_INFO_ARCH "unknown" +#endif +#endif +#if !defined(LZO_ARCH_ARM_THUMB2) +#if (LZO_ARCH_ARM) +# if defined(__thumb__) || defined(__thumb) || defined(_M_THUMB) +# if defined(__thumb2__) +# define LZO_ARCH_ARM_THUMB2 1 +# elif 1 && defined(__TARGET_ARCH_THUMB) && ((__TARGET_ARCH_THUMB)+0 >= 4) +# define LZO_ARCH_ARM_THUMB2 1 +# elif 1 && defined(_MSC_VER) && defined(_M_THUMB) && ((_M_THUMB)+0 >= 7) +# define LZO_ARCH_ARM_THUMB2 1 +# endif +# endif +#endif +#endif +#if (LZO_ARCH_ARM_THUMB2) +# undef LZO_INFO_ARCH +# define LZO_INFO_ARCH "arm_thumb2" +#endif +#if 1 && (LZO_ARCH_UNKNOWN) && (LZO_OS_DOS32 || LZO_OS_OS2) +# error "FIXME - missing define for CPU architecture" +#endif +#if 1 && (LZO_ARCH_UNKNOWN) && (LZO_OS_WIN32) +# error "FIXME - missing LZO_OS_WIN32 define for CPU architecture" +#endif +#if 1 && (LZO_ARCH_UNKNOWN) && (LZO_OS_WIN64) +# error "FIXME - missing LZO_OS_WIN64 define for CPU architecture" +#endif +#if (LZO_OS_OS216 || LZO_OS_WIN16) +# define LZO_ARCH_I086PM 1 +#elif 1 && (LZO_OS_DOS16 && defined(BLX286)) +# define LZO_ARCH_I086PM 1 +#elif 1 && (LZO_OS_DOS16 && defined(DOSX286)) +# define LZO_ARCH_I086PM 1 +#elif 1 && (LZO_OS_DOS16 && LZO_CC_BORLANDC && defined(__DPMI16__)) +# define LZO_ARCH_I086PM 1 +#endif +#if (LZO_ARCH_AMD64 && !LZO_ARCH_X64) +# define LZO_ARCH_X64 1 +#elif (!LZO_ARCH_AMD64 && LZO_ARCH_X64) && defined(__LZO_ARCH_OVERRIDE) +# define LZO_ARCH_AMD64 1 +#endif +#if (LZO_ARCH_ARM64 && !LZO_ARCH_AARCH64) +# define LZO_ARCH_AARCH64 1 +#elif (!LZO_ARCH_ARM64 && LZO_ARCH_AARCH64) && defined(__LZO_ARCH_OVERRIDE) +# define LZO_ARCH_ARM64 1 +#endif +#if (LZO_ARCH_I386 && !LZO_ARCH_X86) +# define LZO_ARCH_X86 1 +#elif (!LZO_ARCH_I386 && LZO_ARCH_X86) && defined(__LZO_ARCH_OVERRIDE) +# define LZO_ARCH_I386 1 +#endif +#if (LZO_ARCH_AMD64 && !LZO_ARCH_X64) || (!LZO_ARCH_AMD64 && LZO_ARCH_X64) +# error "unexpected configuration - check your compiler defines" +#endif +#if (LZO_ARCH_ARM64 && !LZO_ARCH_AARCH64) || (!LZO_ARCH_ARM64 && LZO_ARCH_AARCH64) +# error "unexpected configuration - check your compiler defines" +#endif +#if (LZO_ARCH_I386 && !LZO_ARCH_X86) || (!LZO_ARCH_I386 && LZO_ARCH_X86) +# error "unexpected configuration - check your compiler defines" +#endif +#if (LZO_ARCH_ARM_THUMB1 && !LZO_ARCH_ARM) +# error "unexpected configuration - check your compiler defines" +#endif +#if (LZO_ARCH_ARM_THUMB2 && !LZO_ARCH_ARM) +# error "unexpected configuration - check your compiler defines" +#endif +#if (LZO_ARCH_ARM_THUMB1 && LZO_ARCH_ARM_THUMB2) +# error "unexpected configuration - check your compiler defines" +#endif +#if (LZO_ARCH_I086PM && !LZO_ARCH_I086) +# error "unexpected configuration - check your compiler defines" +#endif +#if (LZO_ARCH_I086) +# if (UINT_MAX != LZO_0xffffL) +# error "unexpected configuration - check your compiler defines" +# endif +# if (ULONG_MAX != LZO_0xffffffffL) +# error "unexpected configuration - check your compiler defines" +# endif +#endif +#if (LZO_ARCH_I386) +# if (UINT_MAX != LZO_0xffffL) && defined(__i386_int16__) +# error "unexpected configuration - check your compiler defines" +# endif +# if (UINT_MAX != LZO_0xffffffffL) && !defined(__i386_int16__) +# error "unexpected configuration - check your compiler defines" +# endif +# if (ULONG_MAX != LZO_0xffffffffL) +# error "unexpected configuration - check your compiler defines" +# endif +#endif +#if (LZO_ARCH_AMD64 || LZO_ARCH_I386) +# if !defined(LZO_TARGET_FEATURE_SSE2) +# if defined(__SSE2__) +# define LZO_TARGET_FEATURE_SSE2 1 +# elif defined(_MSC_VER) && (defined(_M_IX86_FP) && ((_M_IX86_FP)+0 >= 2)) +# define LZO_TARGET_FEATURE_SSE2 1 +# elif (LZO_CC_INTELC_MSC || LZO_CC_MSC) && defined(_M_AMD64) +# define LZO_TARGET_FEATURE_SSE2 1 +# endif +# endif +# if !defined(LZO_TARGET_FEATURE_SSSE3) +# if (LZO_TARGET_FEATURE_SSE2) +# if defined(__SSSE3__) +# define LZO_TARGET_FEATURE_SSSE3 1 +# elif defined(_MSC_VER) && defined(__AVX__) +# define LZO_TARGET_FEATURE_SSSE3 1 +# endif +# endif +# endif +# if !defined(LZO_TARGET_FEATURE_SSE4_2) +# if (LZO_TARGET_FEATURE_SSSE3) +# if defined(__SSE4_2__) +# define LZO_TARGET_FEATURE_SSE4_2 1 +# endif +# endif +# endif +# if !defined(LZO_TARGET_FEATURE_AVX) +# if (LZO_TARGET_FEATURE_SSSE3) +# if defined(__AVX__) +# define LZO_TARGET_FEATURE_AVX 1 +# endif +# endif +# endif +# if !defined(LZO_TARGET_FEATURE_AVX2) +# if (LZO_TARGET_FEATURE_AVX) +# if defined(__AVX2__) +# define LZO_TARGET_FEATURE_AVX2 1 +# endif +# endif +# endif +#endif +#if (LZO_TARGET_FEATURE_SSSE3 && !(LZO_TARGET_FEATURE_SSE2)) +# error "unexpected configuration - check your compiler defines" +#endif +#if (LZO_TARGET_FEATURE_SSE4_2 && !(LZO_TARGET_FEATURE_SSSE3)) +# error "unexpected configuration - check your compiler defines" +#endif +#if (LZO_TARGET_FEATURE_AVX && !(LZO_TARGET_FEATURE_SSSE3)) +# error "unexpected configuration - check your compiler defines" +#endif +#if (LZO_TARGET_FEATURE_AVX2 && !(LZO_TARGET_FEATURE_AVX)) +# error "unexpected configuration - check your compiler defines" +#endif +#if (LZO_ARCH_ARM) +# if !defined(LZO_TARGET_FEATURE_NEON) +# if defined(__ARM_NEON) && ((__ARM_NEON)+0) +# define LZO_TARGET_FEATURE_NEON 1 +# elif 1 && defined(__ARM_NEON__) && ((__ARM_NEON__)+0) +# define LZO_TARGET_FEATURE_NEON 1 +# elif 1 && defined(__TARGET_FEATURE_NEON) && ((__TARGET_FEATURE_NEON)+0) +# define LZO_TARGET_FEATURE_NEON 1 +# endif +# endif +#elif (LZO_ARCH_ARM64) +# if !defined(LZO_TARGET_FEATURE_NEON) +# if 1 +# define LZO_TARGET_FEATURE_NEON 1 +# endif +# endif +#endif +#if 0 +#elif !defined(__LZO_MM_OVERRIDE) +#if (LZO_ARCH_I086) +#if (UINT_MAX != LZO_0xffffL) +# error "unexpected configuration - check your compiler defines" +#endif +#if defined(__TINY__) || defined(M_I86TM) || defined(_M_I86TM) +# define LZO_MM_TINY 1 +#elif defined(__HUGE__) || defined(_HUGE_) || defined(M_I86HM) || defined(_M_I86HM) +# define LZO_MM_HUGE 1 +#elif defined(__SMALL__) || defined(M_I86SM) || defined(_M_I86SM) || defined(SMALL_MODEL) +# define LZO_MM_SMALL 1 +#elif defined(__MEDIUM__) || defined(M_I86MM) || defined(_M_I86MM) +# define LZO_MM_MEDIUM 1 +#elif defined(__COMPACT__) || defined(M_I86CM) || defined(_M_I86CM) +# define LZO_MM_COMPACT 1 +#elif defined(__LARGE__) || defined(M_I86LM) || defined(_M_I86LM) || defined(LARGE_MODEL) +# define LZO_MM_LARGE 1 +#elif (LZO_CC_AZTECC) +# if defined(_LARGE_CODE) && defined(_LARGE_DATA) +# define LZO_MM_LARGE 1 +# elif defined(_LARGE_CODE) +# define LZO_MM_MEDIUM 1 +# elif defined(_LARGE_DATA) +# define LZO_MM_COMPACT 1 +# else +# define LZO_MM_SMALL 1 +# endif +#elif (LZO_CC_ZORTECHC && defined(__VCM__)) +# define LZO_MM_LARGE 1 +#else +# error "unknown LZO_ARCH_I086 memory model" +#endif +#if (LZO_OS_DOS16 || LZO_OS_OS216 || LZO_OS_WIN16) +#define LZO_HAVE_MM_HUGE_PTR 1 +#define LZO_HAVE_MM_HUGE_ARRAY 1 +#if (LZO_MM_TINY) +# undef LZO_HAVE_MM_HUGE_ARRAY +#endif +#if (LZO_CC_AZTECC || LZO_CC_PACIFICC || LZO_CC_ZORTECHC) +# undef LZO_HAVE_MM_HUGE_PTR +# undef LZO_HAVE_MM_HUGE_ARRAY +#elif (LZO_CC_DMC || LZO_CC_SYMANTECC) +# undef LZO_HAVE_MM_HUGE_ARRAY +#elif (LZO_CC_MSC && defined(_QC)) +# undef LZO_HAVE_MM_HUGE_ARRAY +# if (_MSC_VER < 600) +# undef LZO_HAVE_MM_HUGE_PTR +# endif +#elif (LZO_CC_TURBOC && (__TURBOC__ < 0x0295)) +# undef LZO_HAVE_MM_HUGE_ARRAY +#endif +#if (LZO_ARCH_I086PM) && !(LZO_HAVE_MM_HUGE_PTR) +# if (LZO_OS_DOS16) +# error "unexpected configuration - check your compiler defines" +# elif (LZO_CC_ZORTECHC) +# else +# error "unexpected configuration - check your compiler defines" +# endif +#endif +#if defined(__cplusplus) +extern "C" { +#endif +#if (LZO_CC_BORLANDC && (__BORLANDC__ >= 0x0200)) + extern void __near __cdecl _AHSHIFT(void); +# define LZO_MM_AHSHIFT ((unsigned) _AHSHIFT) +#elif (LZO_CC_DMC || LZO_CC_SYMANTECC || LZO_CC_ZORTECHC) + extern void __near __cdecl _AHSHIFT(void); +# define LZO_MM_AHSHIFT ((unsigned) _AHSHIFT) +#elif (LZO_CC_MSC || LZO_CC_TOPSPEEDC) + extern void __near __cdecl _AHSHIFT(void); +# define LZO_MM_AHSHIFT ((unsigned) _AHSHIFT) +#elif (LZO_CC_TURBOC && (__TURBOC__ >= 0x0295)) + extern void __near __cdecl _AHSHIFT(void); +# define LZO_MM_AHSHIFT ((unsigned) _AHSHIFT) +#elif ((LZO_CC_AZTECC || LZO_CC_PACIFICC || LZO_CC_TURBOC) && LZO_OS_DOS16) +# define LZO_MM_AHSHIFT 12 +#elif (LZO_CC_WATCOMC) + extern unsigned char _HShift; +# define LZO_MM_AHSHIFT ((unsigned) _HShift) +#else +# error "FIXME - implement LZO_MM_AHSHIFT" +#endif +#if defined(__cplusplus) +} +#endif +#endif +#elif (LZO_ARCH_C166) +#if !defined(__MODEL__) +# error "FIXME - LZO_ARCH_C166 __MODEL__" +#elif ((__MODEL__) == 0) +# define LZO_MM_SMALL 1 +#elif ((__MODEL__) == 1) +# define LZO_MM_SMALL 1 +#elif ((__MODEL__) == 2) +# define LZO_MM_LARGE 1 +#elif ((__MODEL__) == 3) +# define LZO_MM_TINY 1 +#elif ((__MODEL__) == 4) +# define LZO_MM_XTINY 1 +#elif ((__MODEL__) == 5) +# define LZO_MM_XSMALL 1 +#else +# error "FIXME - LZO_ARCH_C166 __MODEL__" +#endif +#elif (LZO_ARCH_MCS251) +#if !defined(__MODEL__) +# error "FIXME - LZO_ARCH_MCS251 __MODEL__" +#elif ((__MODEL__) == 0) +# define LZO_MM_SMALL 1 +#elif ((__MODEL__) == 2) +# define LZO_MM_LARGE 1 +#elif ((__MODEL__) == 3) +# define LZO_MM_TINY 1 +#elif ((__MODEL__) == 4) +# define LZO_MM_XTINY 1 +#elif ((__MODEL__) == 5) +# define LZO_MM_XSMALL 1 +#else +# error "FIXME - LZO_ARCH_MCS251 __MODEL__" +#endif +#elif (LZO_ARCH_MCS51) +#if !defined(__MODEL__) +# error "FIXME - LZO_ARCH_MCS51 __MODEL__" +#elif ((__MODEL__) == 1) +# define LZO_MM_SMALL 1 +#elif ((__MODEL__) == 2) +# define LZO_MM_LARGE 1 +#elif ((__MODEL__) == 3) +# define LZO_MM_TINY 1 +#elif ((__MODEL__) == 4) +# define LZO_MM_XTINY 1 +#elif ((__MODEL__) == 5) +# define LZO_MM_XSMALL 1 +#else +# error "FIXME - LZO_ARCH_MCS51 __MODEL__" +#endif +#elif (LZO_ARCH_CRAY_PVP) +# define LZO_MM_PVP 1 +#else +# define LZO_MM_FLAT 1 +#endif +#if (LZO_MM_COMPACT) +# define LZO_INFO_MM "compact" +#elif (LZO_MM_FLAT) +# define LZO_INFO_MM "flat" +#elif (LZO_MM_HUGE) +# define LZO_INFO_MM "huge" +#elif (LZO_MM_LARGE) +# define LZO_INFO_MM "large" +#elif (LZO_MM_MEDIUM) +# define LZO_INFO_MM "medium" +#elif (LZO_MM_PVP) +# define LZO_INFO_MM "pvp" +#elif (LZO_MM_SMALL) +# define LZO_INFO_MM "small" +#elif (LZO_MM_TINY) +# define LZO_INFO_MM "tiny" +#else +# error "unknown memory model" +#endif +#endif +#if !defined(__lzo_gnuc_extension__) +#if (LZO_CC_GNUC >= 0x020800ul) +# define __lzo_gnuc_extension__ __extension__ +#elif (LZO_CC_ARMCC_GNUC || LZO_CC_CLANG || LZO_CC_LLVM || LZO_CC_PATHSCALE) +# define __lzo_gnuc_extension__ __extension__ +#elif (LZO_CC_IBMC >= 600) +# define __lzo_gnuc_extension__ __extension__ +#endif +#endif +#if !defined(__lzo_gnuc_extension__) +# define __lzo_gnuc_extension__ /*empty*/ +#endif +#if !defined(lzo_has_builtin) +#if (LZO_CC_CLANG) && defined(__has_builtin) +# define lzo_has_builtin __has_builtin +#endif +#endif +#if !defined(lzo_has_builtin) +# define lzo_has_builtin(x) 0 +#endif +#if !defined(lzo_has_attribute) +#if (LZO_CC_CLANG) && defined(__has_attribute) +# define lzo_has_attribute __has_attribute +#endif +#endif +#if !defined(lzo_has_attribute) +# define lzo_has_attribute(x) 0 +#endif +#if !defined(lzo_has_declspec_attribute) +#if (LZO_CC_CLANG) && defined(__has_declspec_attribute) +# define lzo_has_declspec_attribute __has_declspec_attribute +#endif +#endif +#if !defined(lzo_has_declspec_attribute) +# define lzo_has_declspec_attribute(x) 0 +#endif +#if !defined(lzo_has_feature) +#if (LZO_CC_CLANG) && defined(__has_feature) +# define lzo_has_feature __has_feature +#endif +#endif +#if !defined(lzo_has_feature) +# define lzo_has_feature(x) 0 +#endif +#if !defined(lzo_has_extension) +#if (LZO_CC_CLANG) && defined(__has_extension) +# define lzo_has_extension __has_extension +#elif (LZO_CC_CLANG) && defined(__has_feature) +# define lzo_has_extension __has_feature +#endif +#endif +#if !defined(lzo_has_extension) +# define lzo_has_extension(x) 0 +#endif +#if !defined(LZO_CFG_USE_NEW_STYLE_CASTS) && defined(__cplusplus) && 0 +# if (LZO_CC_GNUC && (LZO_CC_GNUC < 0x020800ul)) +# define LZO_CFG_USE_NEW_STYLE_CASTS 0 +# elif (LZO_CC_INTELC && (__INTEL_COMPILER < 1200)) +# define LZO_CFG_USE_NEW_STYLE_CASTS 0 +# else +# define LZO_CFG_USE_NEW_STYLE_CASTS 1 +# endif +#endif +#if !defined(LZO_CFG_USE_NEW_STYLE_CASTS) +# define LZO_CFG_USE_NEW_STYLE_CASTS 0 +#endif +#if !defined(__cplusplus) +# if defined(LZO_CFG_USE_NEW_STYLE_CASTS) +# undef LZO_CFG_USE_NEW_STYLE_CASTS +# endif +# define LZO_CFG_USE_NEW_STYLE_CASTS 0 +#endif +#if !defined(LZO_REINTERPRET_CAST) +# if (LZO_CFG_USE_NEW_STYLE_CASTS) +# define LZO_REINTERPRET_CAST(t,e) (reinterpret_cast (e)) +# endif +#endif +#if !defined(LZO_REINTERPRET_CAST) +# define LZO_REINTERPRET_CAST(t,e) ((t) (e)) +#endif +#if !defined(LZO_STATIC_CAST) +# if (LZO_CFG_USE_NEW_STYLE_CASTS) +# define LZO_STATIC_CAST(t,e) (static_cast (e)) +# endif +#endif +#if !defined(LZO_STATIC_CAST) +# define LZO_STATIC_CAST(t,e) ((t) (e)) +#endif +#if !defined(LZO_STATIC_CAST2) +# define LZO_STATIC_CAST2(t1,t2,e) LZO_STATIC_CAST(t1, LZO_STATIC_CAST(t2, e)) +#endif +#if !defined(LZO_UNCONST_CAST) +# if (LZO_CFG_USE_NEW_STYLE_CASTS) +# define LZO_UNCONST_CAST(t,e) (const_cast (e)) +# elif (LZO_HAVE_MM_HUGE_PTR) +# define LZO_UNCONST_CAST(t,e) ((t) (e)) +# elif (LZO_CC_ARMCC_GNUC || LZO_CC_CLANG || LZO_CC_GNUC || LZO_CC_LLVM || LZO_CC_PATHSCALE) +# define LZO_UNCONST_CAST(t,e) ((t) ((void *) ((lzo_uintptr_t) ((const void *) (e))))) +# endif +#endif +#if !defined(LZO_UNCONST_CAST) +# define LZO_UNCONST_CAST(t,e) ((t) ((void *) ((const void *) (e)))) +#endif +#if !defined(LZO_UNCONST_VOLATILE_CAST) +# if (LZO_CFG_USE_NEW_STYLE_CASTS) +# define LZO_UNCONST_VOLATILE_CAST(t,e) (const_cast (e)) +# elif (LZO_HAVE_MM_HUGE_PTR) +# define LZO_UNCONST_VOLATILE_CAST(t,e) ((t) (e)) +# elif (LZO_CC_ARMCC_GNUC || LZO_CC_CLANG || LZO_CC_GNUC || LZO_CC_LLVM || LZO_CC_PATHSCALE) +# define LZO_UNCONST_VOLATILE_CAST(t,e) ((t) ((volatile void *) ((lzo_uintptr_t) ((volatile const void *) (e))))) +# endif +#endif +#if !defined(LZO_UNCONST_VOLATILE_CAST) +# define LZO_UNCONST_VOLATILE_CAST(t,e) ((t) ((volatile void *) ((volatile const void *) (e)))) +#endif +#if !defined(LZO_UNVOLATILE_CAST) +# if (LZO_CFG_USE_NEW_STYLE_CASTS) +# define LZO_UNVOLATILE_CAST(t,e) (const_cast (e)) +# elif (LZO_HAVE_MM_HUGE_PTR) +# define LZO_UNVOLATILE_CAST(t,e) ((t) (e)) +# elif (LZO_CC_ARMCC_GNUC || LZO_CC_CLANG || LZO_CC_GNUC || LZO_CC_LLVM || LZO_CC_PATHSCALE) +# define LZO_UNVOLATILE_CAST(t,e) ((t) ((void *) ((lzo_uintptr_t) ((volatile void *) (e))))) +# endif +#endif +#if !defined(LZO_UNVOLATILE_CAST) +# define LZO_UNVOLATILE_CAST(t,e) ((t) ((void *) ((volatile void *) (e)))) +#endif +#if !defined(LZO_UNVOLATILE_CONST_CAST) +# if (LZO_CFG_USE_NEW_STYLE_CASTS) +# define LZO_UNVOLATILE_CONST_CAST(t,e) (const_cast (e)) +# elif (LZO_HAVE_MM_HUGE_PTR) +# define LZO_UNVOLATILE_CONST_CAST(t,e) ((t) (e)) +# elif (LZO_CC_ARMCC_GNUC || LZO_CC_CLANG || LZO_CC_GNUC || LZO_CC_LLVM || LZO_CC_PATHSCALE) +# define LZO_UNVOLATILE_CONST_CAST(t,e) ((t) ((const void *) ((lzo_uintptr_t) ((volatile const void *) (e))))) +# endif +#endif +#if !defined(LZO_UNVOLATILE_CONST_CAST) +# define LZO_UNVOLATILE_CONST_CAST(t,e) ((t) ((const void *) ((volatile const void *) (e)))) +#endif +#if !defined(LZO_PCAST) +# if (LZO_HAVE_MM_HUGE_PTR) +# define LZO_PCAST(t,e) ((t) (e)) +# endif +#endif +#if !defined(LZO_PCAST) +# define LZO_PCAST(t,e) LZO_STATIC_CAST(t, LZO_STATIC_CAST(void *, e)) +#endif +#if !defined(LZO_CCAST) +# if (LZO_HAVE_MM_HUGE_PTR) +# define LZO_CCAST(t,e) ((t) (e)) +# endif +#endif +#if !defined(LZO_CCAST) +# define LZO_CCAST(t,e) LZO_STATIC_CAST(t, LZO_STATIC_CAST(const void *, e)) +#endif +#if !defined(LZO_ICONV) +# define LZO_ICONV(t,e) LZO_STATIC_CAST(t, e) +#endif +#if !defined(LZO_ICAST) +# define LZO_ICAST(t,e) LZO_STATIC_CAST(t, e) +#endif +#if !defined(LZO_ITRUNC) +# define LZO_ITRUNC(t,e) LZO_STATIC_CAST(t, e) +#endif +#if !defined(__lzo_cte) +# if (LZO_CC_MSC || LZO_CC_WATCOMC) +# define __lzo_cte(e) ((void)0,(e)) +# elif 1 +# define __lzo_cte(e) ((void)0,(e)) +# endif +#endif +#if !defined(__lzo_cte) +# define __lzo_cte(e) (e) +#endif +#if !defined(LZO_BLOCK_BEGIN) +# define LZO_BLOCK_BEGIN do { +# define LZO_BLOCK_END } while __lzo_cte(0) +#endif +#if !defined(LZO_UNUSED) +# if (LZO_CC_BORLANDC && (__BORLANDC__ >= 0x0600)) +# define LZO_UNUSED(var) ((void) &var) +# elif (LZO_CC_BORLANDC || LZO_CC_HIGHC || LZO_CC_NDPC || LZO_CC_PELLESC || LZO_CC_TURBOC) +# define LZO_UNUSED(var) if (&var) ; else +# elif (LZO_CC_CLANG && (LZO_CC_CLANG >= 0x030200ul)) +# define LZO_UNUSED(var) ((void) &var) +# elif (LZO_CC_CLANG || LZO_CC_GNUC || LZO_CC_LLVM || LZO_CC_PATHSCALE) +# define LZO_UNUSED(var) ((void) var) +# elif (LZO_CC_MSC && (_MSC_VER < 900)) +# define LZO_UNUSED(var) if (&var) ; else +# elif (LZO_CC_KEILC) +# define LZO_UNUSED(var) {extern int lzo_unused__[1-2*!(sizeof(var)>0)]; (void)lzo_unused__;} +# elif (LZO_CC_PACIFICC) +# define LZO_UNUSED(var) ((void) sizeof(var)) +# elif (LZO_CC_WATCOMC) && defined(__cplusplus) +# define LZO_UNUSED(var) ((void) var) +# else +# define LZO_UNUSED(var) ((void) &var) +# endif +#endif +#if !defined(LZO_UNUSED_RESULT) +# define LZO_UNUSED_RESULT(var) LZO_UNUSED(var) +#endif +#if !defined(LZO_UNUSED_FUNC) +# if (LZO_CC_BORLANDC && (__BORLANDC__ >= 0x0600)) +# define LZO_UNUSED_FUNC(func) ((void) func) +# elif (LZO_CC_BORLANDC || LZO_CC_NDPC || LZO_CC_TURBOC) +# define LZO_UNUSED_FUNC(func) if (func) ; else +# elif (LZO_CC_CLANG || LZO_CC_LLVM) +# define LZO_UNUSED_FUNC(func) ((void) &func) +# elif (LZO_CC_MSC && (_MSC_VER < 900)) +# define LZO_UNUSED_FUNC(func) if (func) ; else +# elif (LZO_CC_MSC) +# define LZO_UNUSED_FUNC(func) ((void) &func) +# elif (LZO_CC_KEILC || LZO_CC_PELLESC) +# define LZO_UNUSED_FUNC(func) {extern int lzo_unused__[1-2*!(sizeof((int)func)>0)]; (void)lzo_unused__;} +# else +# define LZO_UNUSED_FUNC(func) ((void) func) +# endif +#endif +#if !defined(LZO_UNUSED_LABEL) +# if (LZO_CC_CLANG >= 0x020800ul) +# define LZO_UNUSED_LABEL(l) (__lzo_gnuc_extension__ ((void) ((const void *) &&l))) +# elif (LZO_CC_ARMCC || LZO_CC_CLANG || LZO_CC_INTELC || LZO_CC_WATCOMC) +# define LZO_UNUSED_LABEL(l) if __lzo_cte(0) goto l +# else +# define LZO_UNUSED_LABEL(l) switch (0) case 1:goto l +# endif +#endif +#if !defined(LZO_DEFINE_UNINITIALIZED_VAR) +# if 0 +# define LZO_DEFINE_UNINITIALIZED_VAR(type,var,init) type var +# elif 0 && (LZO_CC_GNUC) +# define LZO_DEFINE_UNINITIALIZED_VAR(type,var,init) type var = var +# else +# define LZO_DEFINE_UNINITIALIZED_VAR(type,var,init) type var = init +# endif +#endif +#if !defined(__lzo_inline) +#if (LZO_CC_TURBOC && (__TURBOC__ <= 0x0295)) +#elif defined(__cplusplus) +# define __lzo_inline inline +#elif defined(__STDC_VERSION__) && (__STDC_VERSION__-0 >= 199901L) +# define __lzo_inline inline +#elif (LZO_CC_BORLANDC && (__BORLANDC__ >= 0x0550)) +# define __lzo_inline __inline +#elif (LZO_CC_ARMCC_GNUC || LZO_CC_CILLY || LZO_CC_CLANG || LZO_CC_GNUC || LZO_CC_LLVM || LZO_CC_PATHSCALE || LZO_CC_PGI) +# define __lzo_inline __inline__ +#elif (LZO_CC_DMC) +# define __lzo_inline __inline +#elif (LZO_CC_GHS) +# define __lzo_inline __inline__ +#elif (LZO_CC_IBMC >= 600) +# define __lzo_inline __inline__ +#elif (LZO_CC_INTELC) +# define __lzo_inline __inline +#elif (LZO_CC_MWERKS && (__MWERKS__ >= 0x2405)) +# define __lzo_inline __inline +#elif (LZO_CC_MSC && (_MSC_VER >= 900)) +# define __lzo_inline __inline +#elif (LZO_CC_SUNPROC >= 0x5100) +# define __lzo_inline __inline__ +#endif +#endif +#if defined(__lzo_inline) +# ifndef __lzo_HAVE_inline +# define __lzo_HAVE_inline 1 +# endif +#else +# define __lzo_inline /*empty*/ +#endif +#if !defined(__lzo_forceinline) +#if (LZO_CC_GNUC >= 0x030200ul) +# define __lzo_forceinline __inline__ __attribute__((__always_inline__)) +#elif (LZO_CC_IBMC >= 700) +# define __lzo_forceinline __inline__ __attribute__((__always_inline__)) +#elif (LZO_CC_INTELC_MSC && (__INTEL_COMPILER >= 450)) +# define __lzo_forceinline __forceinline +#elif (LZO_CC_INTELC_GNUC && (__INTEL_COMPILER >= 800)) +# define __lzo_forceinline __inline__ __attribute__((__always_inline__)) +#elif (LZO_CC_ARMCC_GNUC || LZO_CC_CLANG || LZO_CC_LLVM || LZO_CC_PATHSCALE) +# define __lzo_forceinline __inline__ __attribute__((__always_inline__)) +#elif (LZO_CC_MSC && (_MSC_VER >= 1200)) +# define __lzo_forceinline __forceinline +#elif (LZO_CC_PGI >= 0x0d0a00ul) +# define __lzo_forceinline __inline__ __attribute__((__always_inline__)) +#elif (LZO_CC_SUNPROC >= 0x5100) +# define __lzo_forceinline __inline__ __attribute__((__always_inline__)) +#endif +#endif +#if defined(__lzo_forceinline) +# ifndef __lzo_HAVE_forceinline +# define __lzo_HAVE_forceinline 1 +# endif +#else +# define __lzo_forceinline __lzo_inline +#endif +#if !defined(__lzo_noinline) +#if 1 && (LZO_ARCH_I386) && (LZO_CC_GNUC >= 0x040000ul) && (LZO_CC_GNUC < 0x040003ul) +# define __lzo_noinline __attribute__((__noinline__,__used__)) +#elif (LZO_CC_GNUC >= 0x030200ul) +# define __lzo_noinline __attribute__((__noinline__)) +#elif (LZO_CC_IBMC >= 700) +# define __lzo_noinline __attribute__((__noinline__)) +#elif (LZO_CC_INTELC_MSC && (__INTEL_COMPILER >= 600)) +# define __lzo_noinline __declspec(noinline) +#elif (LZO_CC_INTELC_GNUC && (__INTEL_COMPILER >= 800)) +# define __lzo_noinline __attribute__((__noinline__)) +#elif (LZO_CC_ARMCC_GNUC || LZO_CC_CLANG || LZO_CC_LLVM || LZO_CC_PATHSCALE) +# define __lzo_noinline __attribute__((__noinline__)) +#elif (LZO_CC_MSC && (_MSC_VER >= 1300)) +# define __lzo_noinline __declspec(noinline) +#elif (LZO_CC_MWERKS && (__MWERKS__ >= 0x3200) && (LZO_OS_WIN32 || LZO_OS_WIN64)) +# if defined(__cplusplus) +# else +# define __lzo_noinline __declspec(noinline) +# endif +#elif (LZO_CC_PGI >= 0x0d0a00ul) +# define __lzo_noinline __attribute__((__noinline__)) +#elif (LZO_CC_SUNPROC >= 0x5100) +# define __lzo_noinline __attribute__((__noinline__)) +#endif +#endif +#if defined(__lzo_noinline) +# ifndef __lzo_HAVE_noinline +# define __lzo_HAVE_noinline 1 +# endif +#else +# define __lzo_noinline /*empty*/ +#endif +#if (__lzo_HAVE_forceinline || __lzo_HAVE_noinline) && !(__lzo_HAVE_inline) +# error "unexpected configuration - check your compiler defines" +#endif +#if !defined(__lzo_static_inline) +#if (LZO_CC_IBMC) +# define __lzo_static_inline __lzo_gnuc_extension__ static __lzo_inline +#endif +#endif +#if !defined(__lzo_static_inline) +# define __lzo_static_inline static __lzo_inline +#endif +#if !defined(__lzo_static_forceinline) +#if (LZO_CC_IBMC) +# define __lzo_static_forceinline __lzo_gnuc_extension__ static __lzo_forceinline +#endif +#endif +#if !defined(__lzo_static_forceinline) +# define __lzo_static_forceinline static __lzo_forceinline +#endif +#if !defined(__lzo_static_noinline) +#if (LZO_CC_IBMC) +# define __lzo_static_noinline __lzo_gnuc_extension__ static __lzo_noinline +#endif +#endif +#if !defined(__lzo_static_noinline) +# define __lzo_static_noinline static __lzo_noinline +#endif +#if !defined(__lzo_c99_extern_inline) +#if defined(__GNUC_GNU_INLINE__) +# define __lzo_c99_extern_inline __lzo_inline +#elif defined(__GNUC_STDC_INLINE__) +# define __lzo_c99_extern_inline extern __lzo_inline +#elif defined(__STDC_VERSION__) && (__STDC_VERSION__-0 >= 199901L) +# define __lzo_c99_extern_inline extern __lzo_inline +#endif +#if !defined(__lzo_c99_extern_inline) && (__lzo_HAVE_inline) +# define __lzo_c99_extern_inline __lzo_inline +#endif +#endif +#if defined(__lzo_c99_extern_inline) +# ifndef __lzo_HAVE_c99_extern_inline +# define __lzo_HAVE_c99_extern_inline 1 +# endif +#else +# define __lzo_c99_extern_inline /*empty*/ +#endif +#if !defined(__lzo_may_alias) +#if (LZO_CC_GNUC >= 0x030400ul) +# define __lzo_may_alias __attribute__((__may_alias__)) +#elif (LZO_CC_CLANG >= 0x020900ul) +# define __lzo_may_alias __attribute__((__may_alias__)) +#elif (LZO_CC_INTELC_GNUC && (__INTEL_COMPILER >= 1210)) && 0 +# define __lzo_may_alias __attribute__((__may_alias__)) +#elif (LZO_CC_PGI >= 0x0d0a00ul) && 0 +# define __lzo_may_alias __attribute__((__may_alias__)) +#endif +#endif +#if defined(__lzo_may_alias) +# ifndef __lzo_HAVE_may_alias +# define __lzo_HAVE_may_alias 1 +# endif +#else +# define __lzo_may_alias /*empty*/ +#endif +#if !defined(__lzo_noreturn) +#if (LZO_CC_GNUC >= 0x020700ul) +# define __lzo_noreturn __attribute__((__noreturn__)) +#elif (LZO_CC_IBMC >= 700) +# define __lzo_noreturn __attribute__((__noreturn__)) +#elif (LZO_CC_INTELC_MSC && (__INTEL_COMPILER >= 450)) +# define __lzo_noreturn __declspec(noreturn) +#elif (LZO_CC_INTELC_GNUC && (__INTEL_COMPILER >= 600)) +# define __lzo_noreturn __attribute__((__noreturn__)) +#elif (LZO_CC_ARMCC_GNUC || LZO_CC_CLANG || LZO_CC_LLVM || LZO_CC_PATHSCALE) +# define __lzo_noreturn __attribute__((__noreturn__)) +#elif (LZO_CC_MSC && (_MSC_VER >= 1200)) +# define __lzo_noreturn __declspec(noreturn) +#elif (LZO_CC_PGI >= 0x0d0a00ul) +# define __lzo_noreturn __attribute__((__noreturn__)) +#endif +#endif +#if defined(__lzo_noreturn) +# ifndef __lzo_HAVE_noreturn +# define __lzo_HAVE_noreturn 1 +# endif +#else +# define __lzo_noreturn /*empty*/ +#endif +#if !defined(__lzo_nothrow) +#if (LZO_CC_GNUC >= 0x030300ul) +# define __lzo_nothrow __attribute__((__nothrow__)) +#elif (LZO_CC_INTELC_MSC && (__INTEL_COMPILER >= 450)) && defined(__cplusplus) +# define __lzo_nothrow __declspec(nothrow) +#elif (LZO_CC_INTELC_GNUC && (__INTEL_COMPILER >= 900)) +# define __lzo_nothrow __attribute__((__nothrow__)) +#elif (LZO_CC_ARMCC_GNUC || LZO_CC_CLANG || LZO_CC_LLVM || LZO_CC_PATHSCALE) +# define __lzo_nothrow __attribute__((__nothrow__)) +#elif (LZO_CC_MSC && (_MSC_VER >= 1200)) && defined(__cplusplus) +# define __lzo_nothrow __declspec(nothrow) +#endif +#endif +#if defined(__lzo_nothrow) +# ifndef __lzo_HAVE_nothrow +# define __lzo_HAVE_nothrow 1 +# endif +#else +# define __lzo_nothrow /*empty*/ +#endif +#if !defined(__lzo_restrict) +#if (LZO_CC_GNUC >= 0x030400ul) +# define __lzo_restrict __restrict__ +#elif (LZO_CC_IBMC >= 800) && !defined(__cplusplus) +# define __lzo_restrict __restrict__ +#elif (LZO_CC_IBMC >= 1210) +# define __lzo_restrict __restrict__ +#elif (LZO_CC_INTELC_MSC && (__INTEL_COMPILER >= 600)) +#elif (LZO_CC_INTELC_GNUC && (__INTEL_COMPILER >= 600)) +# define __lzo_restrict __restrict__ +#elif (LZO_CC_ARMCC_GNUC || LZO_CC_CLANG || LZO_CC_LLVM) +# define __lzo_restrict __restrict__ +#elif (LZO_CC_MSC && (_MSC_VER >= 1400)) +# define __lzo_restrict __restrict +#elif (LZO_CC_PGI >= 0x0d0a00ul) +# define __lzo_restrict __restrict__ +#endif +#endif +#if defined(__lzo_restrict) +# ifndef __lzo_HAVE_restrict +# define __lzo_HAVE_restrict 1 +# endif +#else +# define __lzo_restrict /*empty*/ +#endif +#if !defined(__lzo_alignof) +#if (LZO_CC_ARMCC || LZO_CC_CILLY || LZO_CC_CLANG || LZO_CC_GNUC || LZO_CC_LLVM || LZO_CC_PATHSCALE || LZO_CC_PGI) +# define __lzo_alignof(e) __alignof__(e) +#elif (LZO_CC_GHS) && !defined(__cplusplus) +# define __lzo_alignof(e) __alignof__(e) +#elif (LZO_CC_IBMC >= 600) +# define __lzo_alignof(e) (__lzo_gnuc_extension__ __alignof__(e)) +#elif (LZO_CC_INTELC && (__INTEL_COMPILER >= 700)) +# define __lzo_alignof(e) __alignof__(e) +#elif (LZO_CC_MSC && (_MSC_VER >= 1300)) +# define __lzo_alignof(e) __alignof(e) +#elif (LZO_CC_SUNPROC >= 0x5100) +# define __lzo_alignof(e) __alignof__(e) +#endif +#endif +#if defined(__lzo_alignof) +# ifndef __lzo_HAVE_alignof +# define __lzo_HAVE_alignof 1 +# endif +#endif +#if !defined(__lzo_struct_packed) +#if (LZO_CC_CLANG && (LZO_CC_CLANG < 0x020800ul)) && defined(__cplusplus) +#elif (LZO_CC_GNUC && (LZO_CC_GNUC < 0x020700ul)) +#elif (LZO_CC_GNUC && (LZO_CC_GNUC < 0x020800ul)) && defined(__cplusplus) +#elif (LZO_CC_PCC && (LZO_CC_PCC < 0x010100ul)) +#elif (LZO_CC_SUNPROC && (LZO_CC_SUNPROC < 0x5110)) && !defined(__cplusplus) +#elif (LZO_CC_GNUC >= 0x030400ul) && !(LZO_CC_PCC_GNUC) && (LZO_ARCH_AMD64 || LZO_ARCH_I386) +# define __lzo_struct_packed(s) struct s { +# define __lzo_struct_packed_end() } __attribute__((__gcc_struct__,__packed__)); +# define __lzo_struct_packed_ma_end() } __lzo_may_alias __attribute__((__gcc_struct__,__packed__)); +#elif (LZO_CC_ARMCC || LZO_CC_CLANG || LZO_CC_GNUC || LZO_CC_INTELC_GNUC || LZO_CC_LLVM || LZO_CC_PATHSCALE || (LZO_CC_PGI >= 0x0d0a00ul) || (LZO_CC_SUNPROC >= 0x5100)) +# define __lzo_struct_packed(s) struct s { +# define __lzo_struct_packed_end() } __attribute__((__packed__)); +# define __lzo_struct_packed_ma_end() } __lzo_may_alias __attribute__((__packed__)); +#elif (LZO_CC_IBMC >= 700) +# define __lzo_struct_packed(s) __lzo_gnuc_extension__ struct s { +# define __lzo_struct_packed_end() } __attribute__((__packed__)); +# define __lzo_struct_packed_ma_end() } __lzo_may_alias __attribute__((__packed__)); +#elif (LZO_CC_INTELC_MSC) || (LZO_CC_MSC && (_MSC_VER >= 1300)) +# define __lzo_struct_packed(s) __pragma(pack(push,1)) struct s { +# define __lzo_struct_packed_end() } __pragma(pack(pop)); +#elif (LZO_CC_WATCOMC && (__WATCOMC__ >= 900)) +# define __lzo_struct_packed(s) _Packed struct s { +# define __lzo_struct_packed_end() }; +#endif +#endif +#if defined(__lzo_struct_packed) && !defined(__lzo_struct_packed_ma) +# define __lzo_struct_packed_ma(s) __lzo_struct_packed(s) +#endif +#if defined(__lzo_struct_packed_end) && !defined(__lzo_struct_packed_ma_end) +# define __lzo_struct_packed_ma_end() __lzo_struct_packed_end() +#endif +#if !defined(__lzo_byte_struct) +#if defined(__lzo_struct_packed) +# define __lzo_byte_struct(s,n) __lzo_struct_packed(s) unsigned char a[n]; __lzo_struct_packed_end() +# define __lzo_byte_struct_ma(s,n) __lzo_struct_packed_ma(s) unsigned char a[n]; __lzo_struct_packed_ma_end() +#elif (LZO_CC_CILLY || LZO_CC_CLANG || LZO_CC_PGI || (LZO_CC_SUNPROC >= 0x5100)) +# define __lzo_byte_struct(s,n) struct s { unsigned char a[n]; } __attribute__((__packed__)); +# define __lzo_byte_struct_ma(s,n) struct s { unsigned char a[n]; } __lzo_may_alias __attribute__((__packed__)); +#endif +#endif +#if defined(__lzo_byte_struct) && !defined(__lzo_byte_struct_ma) +# define __lzo_byte_struct_ma(s,n) __lzo_byte_struct(s,n) +#endif +#if !defined(__lzo_struct_align16) && (__lzo_HAVE_alignof) +#if (LZO_CC_GNUC && (LZO_CC_GNUC < 0x030000ul)) +#elif (LZO_CC_CLANG && (LZO_CC_CLANG < 0x020800ul)) && defined(__cplusplus) +#elif (LZO_CC_CILLY || LZO_CC_PCC) +#elif (LZO_CC_INTELC_MSC) || (LZO_CC_MSC && (_MSC_VER >= 1300)) +# define __lzo_struct_align16(s) struct __declspec(align(16)) s { +# define __lzo_struct_align16_end() }; +# define __lzo_struct_align32(s) struct __declspec(align(32)) s { +# define __lzo_struct_align32_end() }; +# define __lzo_struct_align64(s) struct __declspec(align(64)) s { +# define __lzo_struct_align64_end() }; +#elif (LZO_CC_ARMCC || LZO_CC_CLANG || LZO_CC_GNUC || (LZO_CC_IBMC >= 700) || LZO_CC_INTELC_GNUC || LZO_CC_LLVM || LZO_CC_PATHSCALE) +# define __lzo_struct_align16(s) struct s { +# define __lzo_struct_align16_end() } __attribute__((__aligned__(16))); +# define __lzo_struct_align32(s) struct s { +# define __lzo_struct_align32_end() } __attribute__((__aligned__(32))); +# define __lzo_struct_align64(s) struct s { +# define __lzo_struct_align64_end() } __attribute__((__aligned__(64))); +#endif +#endif +#if !defined(__lzo_union_um) +#if (LZO_CC_CLANG && (LZO_CC_CLANG < 0x020800ul)) && defined(__cplusplus) +#elif (LZO_CC_GNUC && (LZO_CC_GNUC < 0x020700ul)) +#elif (LZO_CC_GNUC && (LZO_CC_GNUC < 0x020800ul)) && defined(__cplusplus) +#elif (LZO_CC_INTELC_GNUC && (__INTEL_COMPILER < 810)) +#elif (LZO_CC_PCC && (LZO_CC_PCC < 0x010100ul)) +#elif (LZO_CC_SUNPROC && (LZO_CC_SUNPROC < 0x5110)) && !defined(__cplusplus) +#elif (LZO_CC_ARMCC || LZO_CC_CLANG || LZO_CC_GNUC || LZO_CC_INTELC_GNUC || LZO_CC_LLVM || LZO_CC_PATHSCALE || (LZO_CC_PGI >= 0x0d0a00ul) || (LZO_CC_SUNPROC >= 0x5100)) +# define __lzo_union_am(s) union s { +# define __lzo_union_am_end() } __lzo_may_alias; +# define __lzo_union_um(s) union s { +# define __lzo_union_um_end() } __lzo_may_alias __attribute__((__packed__)); +#elif (LZO_CC_IBMC >= 700) +# define __lzo_union_am(s) __lzo_gnuc_extension__ union s { +# define __lzo_union_am_end() } __lzo_may_alias; +# define __lzo_union_um(s) __lzo_gnuc_extension__ union s { +# define __lzo_union_um_end() } __lzo_may_alias __attribute__((__packed__)); +#elif (LZO_CC_INTELC_MSC) || (LZO_CC_MSC && (_MSC_VER >= 1300)) +# define __lzo_union_um(s) __pragma(pack(push,1)) union s { +# define __lzo_union_um_end() } __pragma(pack(pop)); +#elif (LZO_CC_WATCOMC && (__WATCOMC__ >= 900)) +# define __lzo_union_um(s) _Packed union s { +# define __lzo_union_um_end() }; +#endif +#endif +#if !defined(__lzo_union_am) +# define __lzo_union_am(s) union s { +# define __lzo_union_am_end() }; +#endif +#if !defined(__lzo_constructor) +#if (LZO_CC_GNUC >= 0x030400ul) +# define __lzo_constructor __attribute__((__constructor__,__used__)) +#elif (LZO_CC_GNUC >= 0x020700ul) +# define __lzo_constructor __attribute__((__constructor__)) +#elif (LZO_CC_INTELC_GNUC && (__INTEL_COMPILER >= 800)) +# define __lzo_constructor __attribute__((__constructor__,__used__)) +#elif (LZO_CC_ARMCC_GNUC || LZO_CC_CLANG || LZO_CC_LLVM || LZO_CC_PATHSCALE) +# define __lzo_constructor __attribute__((__constructor__)) +#endif +#endif +#if defined(__lzo_constructor) +# ifndef __lzo_HAVE_constructor +# define __lzo_HAVE_constructor 1 +# endif +#endif +#if !defined(__lzo_destructor) +#if (LZO_CC_GNUC >= 0x030400ul) +# define __lzo_destructor __attribute__((__destructor__,__used__)) +#elif (LZO_CC_GNUC >= 0x020700ul) +# define __lzo_destructor __attribute__((__destructor__)) +#elif (LZO_CC_INTELC_GNUC && (__INTEL_COMPILER >= 800)) +# define __lzo_destructor __attribute__((__destructor__,__used__)) +#elif (LZO_CC_ARMCC_GNUC || LZO_CC_CLANG || LZO_CC_LLVM || LZO_CC_PATHSCALE) +# define __lzo_destructor __attribute__((__destructor__)) +#endif +#endif +#if defined(__lzo_destructor) +# ifndef __lzo_HAVE_destructor +# define __lzo_HAVE_destructor 1 +# endif +#endif +#if (__lzo_HAVE_destructor) && !(__lzo_HAVE_constructor) +# error "unexpected configuration - check your compiler defines" +#endif +#if !defined(__lzo_likely) && !defined(__lzo_unlikely) +#if (LZO_CC_GNUC >= 0x030200ul) +# define __lzo_likely(e) (__builtin_expect(!!(e),1)) +# define __lzo_unlikely(e) (__builtin_expect(!!(e),0)) +#elif (LZO_CC_IBMC >= 1010) +# define __lzo_likely(e) (__builtin_expect(!!(e),1)) +# define __lzo_unlikely(e) (__builtin_expect(!!(e),0)) +#elif (LZO_CC_INTELC && (__INTEL_COMPILER >= 800)) +# define __lzo_likely(e) (__builtin_expect(!!(e),1)) +# define __lzo_unlikely(e) (__builtin_expect(!!(e),0)) +#elif (LZO_CC_CLANG && LZO_CC_CLANG_C2) +#elif (LZO_CC_ARMCC_GNUC || LZO_CC_CLANG || LZO_CC_LLVM || LZO_CC_PATHSCALE) +# define __lzo_likely(e) (__builtin_expect(!!(e),1)) +# define __lzo_unlikely(e) (__builtin_expect(!!(e),0)) +#endif +#endif +#if defined(__lzo_likely) +# ifndef __lzo_HAVE_likely +# define __lzo_HAVE_likely 1 +# endif +#else +# define __lzo_likely(e) (e) +#endif +#if defined(__lzo_very_likely) +# ifndef __lzo_HAVE_very_likely +# define __lzo_HAVE_very_likely 1 +# endif +#else +# define __lzo_very_likely(e) __lzo_likely(e) +#endif +#if defined(__lzo_unlikely) +# ifndef __lzo_HAVE_unlikely +# define __lzo_HAVE_unlikely 1 +# endif +#else +# define __lzo_unlikely(e) (e) +#endif +#if defined(__lzo_very_unlikely) +# ifndef __lzo_HAVE_very_unlikely +# define __lzo_HAVE_very_unlikely 1 +# endif +#else +# define __lzo_very_unlikely(e) __lzo_unlikely(e) +#endif +#if !defined(__lzo_loop_forever) +# if (LZO_CC_IBMC) +# define __lzo_loop_forever() LZO_BLOCK_BEGIN for (;;) { ; } LZO_BLOCK_END +# else +# define __lzo_loop_forever() do { ; } while __lzo_cte(1) +# endif +#endif +#if !defined(__lzo_unreachable) +#if (LZO_CC_CLANG && (LZO_CC_CLANG >= 0x020800ul)) && lzo_has_builtin(__builtin_unreachable) +# define __lzo_unreachable() __builtin_unreachable(); +#elif (LZO_CC_GNUC >= 0x040500ul) +# define __lzo_unreachable() __builtin_unreachable(); +#elif (LZO_CC_INTELC_GNUC && (__INTEL_COMPILER >= 1300)) && 1 +# define __lzo_unreachable() __builtin_unreachable(); +#endif +#endif +#if defined(__lzo_unreachable) +# ifndef __lzo_HAVE_unreachable +# define __lzo_HAVE_unreachable 1 +# endif +#else +# if 0 +# define __lzo_unreachable() ((void)0); +# else +# define __lzo_unreachable() __lzo_loop_forever(); +# endif +#endif +#if !defined(lzo_unused_funcs_impl) +# if 1 && (LZO_CC_ARMCC_GNUC || LZO_CC_CLANG || (LZO_CC_GNUC >= 0x020700ul) || LZO_CC_INTELC_GNUC || LZO_CC_LLVM || LZO_CC_PATHSCALE || LZO_CC_PGI) +# define lzo_unused_funcs_impl(r,f) static r __attribute__((__unused__)) f +# elif 1 && (LZO_CC_BORLANDC || LZO_CC_GNUC) +# define lzo_unused_funcs_impl(r,f) static r f +# else +# define lzo_unused_funcs_impl(r,f) __lzo_static_forceinline r f +# endif +#endif +#ifndef __LZO_CTA_NAME +#if (LZO_CFG_USE_COUNTER) +# define __LZO_CTA_NAME(a) LZO_PP_ECONCAT2(a,__COUNTER__) +#else +# define __LZO_CTA_NAME(a) LZO_PP_ECONCAT2(a,__LINE__) +#endif +#endif +#if !defined(LZO_COMPILE_TIME_ASSERT_HEADER) +# if (LZO_CC_AZTECC || LZO_CC_ZORTECHC) +# define LZO_COMPILE_TIME_ASSERT_HEADER(e) LZO_EXTERN_C_BEGIN extern int __LZO_CTA_NAME(lzo_cta__)[1-!(e)]; LZO_EXTERN_C_END +# elif (LZO_CC_DMC || LZO_CC_SYMANTECC) +# define LZO_COMPILE_TIME_ASSERT_HEADER(e) LZO_EXTERN_C_BEGIN extern int __LZO_CTA_NAME(lzo_cta__)[1u-2*!(e)]; LZO_EXTERN_C_END +# elif (LZO_CC_TURBOC && (__TURBOC__ == 0x0295)) +# define LZO_COMPILE_TIME_ASSERT_HEADER(e) LZO_EXTERN_C_BEGIN extern int __LZO_CTA_NAME(lzo_cta__)[1-!(e)]; LZO_EXTERN_C_END +# elif (LZO_CC_CLANG && (LZO_CC_CLANG < 0x020900ul)) && defined(__cplusplus) +# define LZO_COMPILE_TIME_ASSERT_HEADER(e) LZO_EXTERN_C_BEGIN int __LZO_CTA_NAME(lzo_cta_f__)(int [1-2*!(e)]); LZO_EXTERN_C_END +# elif (LZO_CC_GNUC) && defined(__CHECKER__) && defined(__SPARSE_CHECKER__) +# define LZO_COMPILE_TIME_ASSERT_HEADER(e) LZO_EXTERN_C_BEGIN enum {__LZO_CTA_NAME(lzo_cta_e__)=1/!!(e)} __attribute__((__unused__)); LZO_EXTERN_C_END +# else +# define LZO_COMPILE_TIME_ASSERT_HEADER(e) LZO_EXTERN_C_BEGIN extern int __LZO_CTA_NAME(lzo_cta__)[1-2*!(e)]; LZO_EXTERN_C_END +# endif +#endif +#if !defined(LZO_COMPILE_TIME_ASSERT) +# if (LZO_CC_AZTECC) +# define LZO_COMPILE_TIME_ASSERT(e) {typedef int __LZO_CTA_NAME(lzo_cta_t__)[1-!(e)];} +# elif (LZO_CC_CLANG && (LZO_CC_CLANG >= 0x030000ul)) +# define LZO_COMPILE_TIME_ASSERT(e) {typedef int __LZO_CTA_NAME(lzo_cta_t__)[1-2*!(e)] __attribute__((__unused__));} +# elif (LZO_CC_DMC || LZO_CC_PACIFICC || LZO_CC_SYMANTECC || LZO_CC_ZORTECHC) +# define LZO_COMPILE_TIME_ASSERT(e) switch(0) case 1:case !(e):break; +# elif (LZO_CC_GNUC) && defined(__CHECKER__) && defined(__SPARSE_CHECKER__) +# define LZO_COMPILE_TIME_ASSERT(e) {(void) (0/!!(e));} +# elif (LZO_CC_GNUC >= 0x040700ul) && (LZO_CFG_USE_COUNTER) && defined(__cplusplus) +# define LZO_COMPILE_TIME_ASSERT(e) {enum {__LZO_CTA_NAME(lzo_cta_e__)=1/!!(e)} __attribute__((__unused__));} +# elif (LZO_CC_GNUC >= 0x040700ul) +# define LZO_COMPILE_TIME_ASSERT(e) {typedef int __LZO_CTA_NAME(lzo_cta_t__)[1-2*!(e)] __attribute__((__unused__));} +# elif (LZO_CC_MSC && (_MSC_VER < 900)) +# define LZO_COMPILE_TIME_ASSERT(e) switch(0) case 1:case !(e):break; +# elif (LZO_CC_TURBOC && (__TURBOC__ == 0x0295)) +# define LZO_COMPILE_TIME_ASSERT(e) switch(0) case 1:case !(e):break; +# else +# define LZO_COMPILE_TIME_ASSERT(e) {typedef int __LZO_CTA_NAME(lzo_cta_t__)[1-2*!(e)];} +# endif +#endif +#if (LZO_LANG_ASSEMBLER) +# undef LZO_COMPILE_TIME_ASSERT_HEADER +# define LZO_COMPILE_TIME_ASSERT_HEADER(e) /*empty*/ +#else +LZO_COMPILE_TIME_ASSERT_HEADER(1 == 1) +#if defined(__cplusplus) +extern "C" { LZO_COMPILE_TIME_ASSERT_HEADER(2 == 2) } +#endif +LZO_COMPILE_TIME_ASSERT_HEADER(3 == 3) +#endif +#if (LZO_ARCH_I086 || LZO_ARCH_I386) && (LZO_OS_DOS16 || LZO_OS_DOS32 || LZO_OS_OS2 || LZO_OS_OS216 || LZO_OS_WIN16 || LZO_OS_WIN32 || LZO_OS_WIN64) +# if (LZO_CC_GNUC || LZO_CC_HIGHC || LZO_CC_NDPC || LZO_CC_PACIFICC) +# elif (LZO_CC_DMC || LZO_CC_SYMANTECC || LZO_CC_ZORTECHC) +# define __lzo_cdecl __cdecl +# define __lzo_cdecl_atexit /*empty*/ +# define __lzo_cdecl_main __cdecl +# if (LZO_OS_OS2 && (LZO_CC_DMC || LZO_CC_SYMANTECC)) +# define __lzo_cdecl_qsort __pascal +# elif (LZO_OS_OS2 && (LZO_CC_ZORTECHC)) +# define __lzo_cdecl_qsort _stdcall +# else +# define __lzo_cdecl_qsort __cdecl +# endif +# elif (LZO_CC_WATCOMC) +# define __lzo_cdecl __cdecl +# else +# define __lzo_cdecl __cdecl +# define __lzo_cdecl_atexit __cdecl +# define __lzo_cdecl_main __cdecl +# define __lzo_cdecl_qsort __cdecl +# endif +# if (LZO_CC_GNUC || LZO_CC_HIGHC || LZO_CC_NDPC || LZO_CC_PACIFICC || LZO_CC_WATCOMC) +# elif (LZO_OS_OS2 && (LZO_CC_DMC || LZO_CC_SYMANTECC)) +# define __lzo_cdecl_sighandler __pascal +# elif (LZO_OS_OS2 && (LZO_CC_ZORTECHC)) +# define __lzo_cdecl_sighandler _stdcall +# elif (LZO_CC_MSC && (_MSC_VER >= 1400)) && defined(_M_CEE_PURE) +# define __lzo_cdecl_sighandler __clrcall +# elif (LZO_CC_MSC && (_MSC_VER >= 600 && _MSC_VER < 700)) +# if defined(_DLL) +# define __lzo_cdecl_sighandler _far _cdecl _loadds +# elif defined(_MT) +# define __lzo_cdecl_sighandler _far _cdecl +# else +# define __lzo_cdecl_sighandler _cdecl +# endif +# else +# define __lzo_cdecl_sighandler __cdecl +# endif +#elif (LZO_ARCH_I386) && (LZO_CC_WATCOMC) +# define __lzo_cdecl __cdecl +#elif (LZO_ARCH_M68K && LZO_OS_TOS && (LZO_CC_PUREC || LZO_CC_TURBOC)) +# define __lzo_cdecl cdecl +#endif +#if !defined(__lzo_cdecl) +# define __lzo_cdecl /*empty*/ +#endif +#if !defined(__lzo_cdecl_atexit) +# define __lzo_cdecl_atexit /*empty*/ +#endif +#if !defined(__lzo_cdecl_main) +# define __lzo_cdecl_main /*empty*/ +#endif +#if !defined(__lzo_cdecl_qsort) +# define __lzo_cdecl_qsort /*empty*/ +#endif +#if !defined(__lzo_cdecl_sighandler) +# define __lzo_cdecl_sighandler /*empty*/ +#endif +#if !defined(__lzo_cdecl_va) +# define __lzo_cdecl_va __lzo_cdecl +#endif +#if !(LZO_CFG_NO_WINDOWS_H) +#if !defined(LZO_HAVE_WINDOWS_H) +#if (LZO_OS_CYGWIN || (LZO_OS_EMX && defined(__RSXNT__)) || LZO_OS_WIN32 || LZO_OS_WIN64) +# if (LZO_CC_WATCOMC && (__WATCOMC__ < 1000)) +# elif ((LZO_OS_WIN32 && defined(__PW32__)) && (LZO_CC_GNUC && (LZO_CC_GNUC < 0x030000ul))) +# elif ((LZO_OS_CYGWIN || defined(__MINGW32__)) && (LZO_CC_GNUC && (LZO_CC_GNUC < 0x025f00ul))) +# else +# define LZO_HAVE_WINDOWS_H 1 +# endif +#endif +#endif +#endif +#define LZO_SIZEOF_CHAR 1 +#ifndef LZO_SIZEOF_SHORT +#if defined(SIZEOF_SHORT) +# define LZO_SIZEOF_SHORT (SIZEOF_SHORT) +#elif defined(__SIZEOF_SHORT__) +# define LZO_SIZEOF_SHORT (__SIZEOF_SHORT__) +#endif +#endif +#ifndef LZO_SIZEOF_INT +#if defined(SIZEOF_INT) +# define LZO_SIZEOF_INT (SIZEOF_INT) +#elif defined(__SIZEOF_INT__) +# define LZO_SIZEOF_INT (__SIZEOF_INT__) +#endif +#endif +#ifndef LZO_SIZEOF_LONG +#if defined(SIZEOF_LONG) +# define LZO_SIZEOF_LONG (SIZEOF_LONG) +#elif defined(__SIZEOF_LONG__) +# define LZO_SIZEOF_LONG (__SIZEOF_LONG__) +#endif +#endif +#ifndef LZO_SIZEOF_LONG_LONG +#if defined(SIZEOF_LONG_LONG) +# define LZO_SIZEOF_LONG_LONG (SIZEOF_LONG_LONG) +#elif defined(__SIZEOF_LONG_LONG__) +# define LZO_SIZEOF_LONG_LONG (__SIZEOF_LONG_LONG__) +#endif +#endif +#ifndef LZO_SIZEOF___INT16 +#if defined(SIZEOF___INT16) +# define LZO_SIZEOF___INT16 (SIZEOF___INT16) +#endif +#endif +#ifndef LZO_SIZEOF___INT32 +#if defined(SIZEOF___INT32) +# define LZO_SIZEOF___INT32 (SIZEOF___INT32) +#endif +#endif +#ifndef LZO_SIZEOF___INT64 +#if defined(SIZEOF___INT64) +# define LZO_SIZEOF___INT64 (SIZEOF___INT64) +#endif +#endif +#ifndef LZO_SIZEOF_VOID_P +#if defined(SIZEOF_VOID_P) +# define LZO_SIZEOF_VOID_P (SIZEOF_VOID_P) +#elif defined(__SIZEOF_POINTER__) +# define LZO_SIZEOF_VOID_P (__SIZEOF_POINTER__) +#endif +#endif +#ifndef LZO_SIZEOF_SIZE_T +#if defined(SIZEOF_SIZE_T) +# define LZO_SIZEOF_SIZE_T (SIZEOF_SIZE_T) +#elif defined(__SIZEOF_SIZE_T__) +# define LZO_SIZEOF_SIZE_T (__SIZEOF_SIZE_T__) +#endif +#endif +#ifndef LZO_SIZEOF_PTRDIFF_T +#if defined(SIZEOF_PTRDIFF_T) +# define LZO_SIZEOF_PTRDIFF_T (SIZEOF_PTRDIFF_T) +#elif defined(__SIZEOF_PTRDIFF_T__) +# define LZO_SIZEOF_PTRDIFF_T (__SIZEOF_PTRDIFF_T__) +#endif +#endif +#define __LZO_LSR(x,b) (((x)+0ul) >> (b)) +#if !defined(LZO_SIZEOF_SHORT) +# if (LZO_ARCH_CRAY_PVP) +# define LZO_SIZEOF_SHORT 8 +# elif (USHRT_MAX == LZO_0xffffL) +# define LZO_SIZEOF_SHORT 2 +# elif (__LZO_LSR(USHRT_MAX,7) == 1) +# define LZO_SIZEOF_SHORT 1 +# elif (__LZO_LSR(USHRT_MAX,15) == 1) +# define LZO_SIZEOF_SHORT 2 +# elif (__LZO_LSR(USHRT_MAX,31) == 1) +# define LZO_SIZEOF_SHORT 4 +# elif (__LZO_LSR(USHRT_MAX,63) == 1) +# define LZO_SIZEOF_SHORT 8 +# elif (__LZO_LSR(USHRT_MAX,127) == 1) +# define LZO_SIZEOF_SHORT 16 +# else +# error "LZO_SIZEOF_SHORT" +# endif +#endif +LZO_COMPILE_TIME_ASSERT_HEADER(LZO_SIZEOF_SHORT == sizeof(short)) +#if !defined(LZO_SIZEOF_INT) +# if (LZO_ARCH_CRAY_PVP) +# define LZO_SIZEOF_INT 8 +# elif (UINT_MAX == LZO_0xffffL) +# define LZO_SIZEOF_INT 2 +# elif (UINT_MAX == LZO_0xffffffffL) +# define LZO_SIZEOF_INT 4 +# elif (__LZO_LSR(UINT_MAX,7) == 1) +# define LZO_SIZEOF_INT 1 +# elif (__LZO_LSR(UINT_MAX,15) == 1) +# define LZO_SIZEOF_INT 2 +# elif (__LZO_LSR(UINT_MAX,31) == 1) +# define LZO_SIZEOF_INT 4 +# elif (__LZO_LSR(UINT_MAX,63) == 1) +# define LZO_SIZEOF_INT 8 +# elif (__LZO_LSR(UINT_MAX,127) == 1) +# define LZO_SIZEOF_INT 16 +# else +# error "LZO_SIZEOF_INT" +# endif +#endif +LZO_COMPILE_TIME_ASSERT_HEADER(LZO_SIZEOF_INT == sizeof(int)) +#if !defined(LZO_SIZEOF_LONG) +# if (ULONG_MAX == LZO_0xffffffffL) +# define LZO_SIZEOF_LONG 4 +# elif (__LZO_LSR(ULONG_MAX,7) == 1) +# define LZO_SIZEOF_LONG 1 +# elif (__LZO_LSR(ULONG_MAX,15) == 1) +# define LZO_SIZEOF_LONG 2 +# elif (__LZO_LSR(ULONG_MAX,31) == 1) +# define LZO_SIZEOF_LONG 4 +# elif (__LZO_LSR(ULONG_MAX,39) == 1) +# define LZO_SIZEOF_LONG 5 +# elif (__LZO_LSR(ULONG_MAX,63) == 1) +# define LZO_SIZEOF_LONG 8 +# elif (__LZO_LSR(ULONG_MAX,127) == 1) +# define LZO_SIZEOF_LONG 16 +# else +# error "LZO_SIZEOF_LONG" +# endif +#endif +LZO_COMPILE_TIME_ASSERT_HEADER(LZO_SIZEOF_LONG == sizeof(long)) +#if !defined(LZO_SIZEOF_LONG_LONG) && !defined(LZO_SIZEOF___INT64) +#if (LZO_SIZEOF_LONG > 0 && LZO_SIZEOF_LONG < 8) +# if defined(__LONG_MAX__) && defined(__LONG_LONG_MAX__) +# if (LZO_CC_GNUC >= 0x030300ul) +# if ((__LONG_MAX__-0) == (__LONG_LONG_MAX__-0)) +# define LZO_SIZEOF_LONG_LONG LZO_SIZEOF_LONG +# elif (__LZO_LSR(__LONG_LONG_MAX__,30) == 1) +# define LZO_SIZEOF_LONG_LONG 4 +# endif +# endif +# endif +#endif +#endif +#if !defined(LZO_SIZEOF_LONG_LONG) && !defined(LZO_SIZEOF___INT64) +#if (LZO_SIZEOF_LONG > 0 && LZO_SIZEOF_LONG < 8) +#if (LZO_ARCH_I086 && LZO_CC_DMC) +#elif (LZO_CC_CILLY) && defined(__GNUC__) +# define LZO_SIZEOF_LONG_LONG 8 +#elif (LZO_CC_ARMCC_GNUC || LZO_CC_CLANG || LZO_CC_GNUC || LZO_CC_LLVM || LZO_CC_PATHSCALE) +# define LZO_SIZEOF_LONG_LONG 8 +#elif ((LZO_OS_WIN32 || LZO_OS_WIN64 || defined(_WIN32)) && LZO_CC_MSC && (_MSC_VER >= 1400)) +# define LZO_SIZEOF_LONG_LONG 8 +#elif (LZO_OS_WIN64 || defined(_WIN64)) +# define LZO_SIZEOF___INT64 8 +#elif (LZO_ARCH_I386 && (LZO_CC_DMC)) +# define LZO_SIZEOF_LONG_LONG 8 +#elif (LZO_ARCH_I386 && (LZO_CC_SYMANTECC && (__SC__ >= 0x700))) +# define LZO_SIZEOF_LONG_LONG 8 +#elif (LZO_ARCH_I386 && (LZO_CC_INTELC && defined(__linux__))) +# define LZO_SIZEOF_LONG_LONG 8 +#elif (LZO_ARCH_I386 && (LZO_CC_MWERKS || LZO_CC_PELLESC || LZO_CC_PGI || LZO_CC_SUNPROC)) +# define LZO_SIZEOF_LONG_LONG 8 +#elif (LZO_ARCH_I386 && (LZO_CC_INTELC || LZO_CC_MSC)) +# define LZO_SIZEOF___INT64 8 +#elif ((LZO_OS_WIN32 || defined(_WIN32)) && (LZO_CC_MSC)) +# define LZO_SIZEOF___INT64 8 +#elif (LZO_ARCH_I386 && (LZO_CC_BORLANDC && (__BORLANDC__ >= 0x0520))) +# define LZO_SIZEOF___INT64 8 +#elif (LZO_ARCH_I386 && (LZO_CC_WATCOMC && (__WATCOMC__ >= 1100))) +# define LZO_SIZEOF___INT64 8 +#elif (LZO_CC_GHS && defined(__LLONG_BIT) && ((__LLONG_BIT-0) == 64)) +# define LZO_SIZEOF_LONG_LONG 8 +#elif (LZO_CC_WATCOMC && defined(_INTEGRAL_MAX_BITS) && ((_INTEGRAL_MAX_BITS-0) == 64)) +# define LZO_SIZEOF___INT64 8 +#elif (LZO_OS_OS400 || defined(__OS400__)) && defined(__LLP64_IFC__) +# define LZO_SIZEOF_LONG_LONG 8 +#elif (defined(__vms) || defined(__VMS)) && ((__INITIAL_POINTER_SIZE-0) == 64) +# define LZO_SIZEOF_LONG_LONG 8 +#elif (LZO_CC_SDCC) && (LZO_SIZEOF_INT == 2) +#elif 1 && defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) +# define LZO_SIZEOF_LONG_LONG 8 +#endif +#endif +#endif +#if defined(__cplusplus) && (LZO_CC_GNUC) +# if (LZO_CC_GNUC < 0x020800ul) +# undef LZO_SIZEOF_LONG_LONG +# endif +#endif +#if (LZO_CFG_NO_LONG_LONG) +# undef LZO_SIZEOF_LONG_LONG +#elif defined(__NO_LONG_LONG) +# undef LZO_SIZEOF_LONG_LONG +#elif defined(_NO_LONGLONG) +# undef LZO_SIZEOF_LONG_LONG +#endif +#if !defined(LZO_WORDSIZE) +#if (LZO_ARCH_ALPHA) +# define LZO_WORDSIZE 8 +#elif (LZO_ARCH_AMD64) +# define LZO_WORDSIZE 8 +#elif (LZO_ARCH_ARM64) +# define LZO_WORDSIZE 8 +#elif (LZO_ARCH_AVR) +# define LZO_WORDSIZE 1 +#elif (LZO_ARCH_H8300) +# if defined(__H8300H__) || defined(__H8300S__) || defined(__H8300SX__) +# define LZO_WORDSIZE 4 +# else +# define LZO_WORDSIZE 2 +# endif +#elif (LZO_ARCH_I086) +# define LZO_WORDSIZE 2 +#elif (LZO_ARCH_IA64) +# define LZO_WORDSIZE 8 +#elif (LZO_ARCH_M16C) +# define LZO_WORDSIZE 2 +#elif (LZO_ARCH_SPU) +# define LZO_WORDSIZE 4 +#elif (LZO_ARCH_Z80) +# define LZO_WORDSIZE 1 +#elif (LZO_SIZEOF_LONG == 8) && ((defined(__mips__) && defined(__R5900__)) || defined(__MIPS_PSX2__)) +# define LZO_WORDSIZE 8 +#elif (LZO_OS_OS400 || defined(__OS400__)) +# define LZO_WORDSIZE 8 +#elif (defined(__vms) || defined(__VMS)) && (__INITIAL_POINTER_SIZE+0 == 64) +# define LZO_WORDSIZE 8 +#endif +#endif +#if !defined(LZO_SIZEOF_VOID_P) +#if defined(__ILP32__) || defined(__ILP32) || defined(_ILP32) +LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(int) == 4) +LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(long) == 4) +# define LZO_SIZEOF_VOID_P 4 +#elif defined(__ILP64__) || defined(__ILP64) || defined(_ILP64) +LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(int) == 8) +LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(long) == 8) +# define LZO_SIZEOF_VOID_P 8 +#elif defined(__LLP64__) || defined(__LLP64) || defined(_LLP64) || defined(_WIN64) +LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(long) == 4) +# define LZO_SIZEOF_VOID_P 8 +#elif defined(__LP64__) || defined(__LP64) || defined(_LP64) +LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(long) == 8) +# define LZO_SIZEOF_VOID_P 8 +#elif (LZO_ARCH_AVR) +# define LZO_SIZEOF_VOID_P 2 +#elif (LZO_ARCH_C166 || LZO_ARCH_MCS51 || LZO_ARCH_MCS251 || LZO_ARCH_MSP430) +# define LZO_SIZEOF_VOID_P 2 +#elif (LZO_ARCH_H8300) +# if defined(__H8300H__) || defined(__H8300S__) || defined(__H8300SX__) + LZO_COMPILE_TIME_ASSERT_HEADER(LZO_WORDSIZE == 4) +# if defined(__NORMAL_MODE__) +# define LZO_SIZEOF_VOID_P 2 +# else +# define LZO_SIZEOF_VOID_P 4 +# endif +# else + LZO_COMPILE_TIME_ASSERT_HEADER(LZO_WORDSIZE == 2) +# define LZO_SIZEOF_VOID_P 2 +# endif +# if (LZO_CC_GNUC && (LZO_CC_GNUC < 0x040000ul)) && (LZO_SIZEOF_INT == 4) +# define LZO_SIZEOF_SIZE_T LZO_SIZEOF_INT +# define LZO_SIZEOF_PTRDIFF_T LZO_SIZEOF_INT +# endif +#elif (LZO_ARCH_I086) +# if (LZO_MM_TINY || LZO_MM_SMALL || LZO_MM_MEDIUM) +# define LZO_SIZEOF_VOID_P 2 +# elif (LZO_MM_COMPACT || LZO_MM_LARGE || LZO_MM_HUGE) +# define LZO_SIZEOF_VOID_P 4 +# else +# error "invalid LZO_ARCH_I086 memory model" +# endif +#elif (LZO_ARCH_M16C) +# if defined(__m32c_cpu__) || defined(__m32cm_cpu__) +# define LZO_SIZEOF_VOID_P 4 +# else +# define LZO_SIZEOF_VOID_P 2 +# endif +#elif (LZO_ARCH_SPU) +# define LZO_SIZEOF_VOID_P 4 +#elif (LZO_ARCH_Z80) +# define LZO_SIZEOF_VOID_P 2 +#elif (LZO_SIZEOF_LONG == 8) && ((defined(__mips__) && defined(__R5900__)) || defined(__MIPS_PSX2__)) +# define LZO_SIZEOF_VOID_P 4 +#elif (LZO_OS_OS400 || defined(__OS400__)) +# if defined(__LLP64_IFC__) +# define LZO_SIZEOF_VOID_P 8 +# define LZO_SIZEOF_SIZE_T LZO_SIZEOF_LONG +# define LZO_SIZEOF_PTRDIFF_T LZO_SIZEOF_LONG +# else +# define LZO_SIZEOF_VOID_P 16 +# define LZO_SIZEOF_SIZE_T LZO_SIZEOF_LONG +# define LZO_SIZEOF_PTRDIFF_T LZO_SIZEOF_LONG +# endif +#elif (defined(__vms) || defined(__VMS)) && (__INITIAL_POINTER_SIZE+0 == 64) +# define LZO_SIZEOF_VOID_P 8 +# define LZO_SIZEOF_SIZE_T LZO_SIZEOF_LONG +# define LZO_SIZEOF_PTRDIFF_T LZO_SIZEOF_LONG +#endif +#endif +#if !defined(LZO_SIZEOF_VOID_P) +# define LZO_SIZEOF_VOID_P LZO_SIZEOF_LONG +#endif +LZO_COMPILE_TIME_ASSERT_HEADER(LZO_SIZEOF_VOID_P == sizeof(void *)) +#if !defined(LZO_SIZEOF_SIZE_T) +#if (LZO_ARCH_I086 || LZO_ARCH_M16C) +# define LZO_SIZEOF_SIZE_T 2 +#endif +#endif +#if !defined(LZO_SIZEOF_SIZE_T) +# define LZO_SIZEOF_SIZE_T LZO_SIZEOF_VOID_P +#endif +#if defined(offsetof) +LZO_COMPILE_TIME_ASSERT_HEADER(LZO_SIZEOF_SIZE_T == sizeof(size_t)) +#endif +#if !defined(LZO_SIZEOF_PTRDIFF_T) +#if (LZO_ARCH_I086) +# if (LZO_MM_TINY || LZO_MM_SMALL || LZO_MM_MEDIUM || LZO_MM_HUGE) +# define LZO_SIZEOF_PTRDIFF_T LZO_SIZEOF_VOID_P +# elif (LZO_MM_COMPACT || LZO_MM_LARGE) +# if (LZO_CC_BORLANDC || LZO_CC_TURBOC) +# define LZO_SIZEOF_PTRDIFF_T 4 +# else +# define LZO_SIZEOF_PTRDIFF_T 2 +# endif +# else +# error "invalid LZO_ARCH_I086 memory model" +# endif +#endif +#endif +#if !defined(LZO_SIZEOF_PTRDIFF_T) +# define LZO_SIZEOF_PTRDIFF_T LZO_SIZEOF_SIZE_T +#endif +#if defined(offsetof) +LZO_COMPILE_TIME_ASSERT_HEADER(LZO_SIZEOF_PTRDIFF_T == sizeof(ptrdiff_t)) +#endif +#if !defined(LZO_WORDSIZE) +# define LZO_WORDSIZE LZO_SIZEOF_VOID_P +#endif +#if (LZO_ABI_NEUTRAL_ENDIAN) +# undef LZO_ABI_BIG_ENDIAN +# undef LZO_ABI_LITTLE_ENDIAN +#elif !(LZO_ABI_BIG_ENDIAN) && !(LZO_ABI_LITTLE_ENDIAN) +#if (LZO_ARCH_ALPHA) && (LZO_ARCH_CRAY_MPP) +# define LZO_ABI_BIG_ENDIAN 1 +#elif (LZO_ARCH_IA64) && (LZO_OS_POSIX_LINUX || LZO_OS_WIN64) +# define LZO_ABI_LITTLE_ENDIAN 1 +#elif (LZO_ARCH_ALPHA || LZO_ARCH_AMD64 || LZO_ARCH_BLACKFIN || LZO_ARCH_CRIS || LZO_ARCH_I086 || LZO_ARCH_I386 || LZO_ARCH_MSP430 || LZO_ARCH_RISCV) +# define LZO_ABI_LITTLE_ENDIAN 1 +#elif (LZO_ARCH_AVR32 || LZO_ARCH_M68K || LZO_ARCH_S390 || LZO_ARCH_SPU) +# define LZO_ABI_BIG_ENDIAN 1 +#elif 1 && defined(__IAR_SYSTEMS_ICC__) && defined(__LITTLE_ENDIAN__) +# if (__LITTLE_ENDIAN__ == 1) +# define LZO_ABI_LITTLE_ENDIAN 1 +# else +# define LZO_ABI_BIG_ENDIAN 1 +# endif +#elif 1 && defined(__BIG_ENDIAN__) && !defined(__LITTLE_ENDIAN__) +# define LZO_ABI_BIG_ENDIAN 1 +#elif 1 && defined(__LITTLE_ENDIAN__) && !defined(__BIG_ENDIAN__) +# define LZO_ABI_LITTLE_ENDIAN 1 +#elif 1 && (LZO_ARCH_ARM) && defined(__ARM_BIG_ENDIAN) && ((__ARM_BIG_ENDIAN)+0) +# define LZO_ABI_BIG_ENDIAN 1 +#elif 1 && (LZO_ARCH_ARM) && defined(__ARMEB__) && !defined(__ARMEL__) +# define LZO_ABI_BIG_ENDIAN 1 +#elif 1 && (LZO_ARCH_ARM) && defined(__ARMEL__) && !defined(__ARMEB__) +# define LZO_ABI_LITTLE_ENDIAN 1 +#elif 1 && (LZO_ARCH_ARM) && defined(_MSC_VER) && defined(_WIN32) +# define LZO_ABI_LITTLE_ENDIAN 1 +#elif 1 && (LZO_ARCH_ARM && LZO_CC_ARMCC_ARMCC) +# if defined(__BIG_ENDIAN) && defined(__LITTLE_ENDIAN) +# error "unexpected configuration - check your compiler defines" +# elif defined(__BIG_ENDIAN) +# define LZO_ABI_BIG_ENDIAN 1 +# else +# define LZO_ABI_LITTLE_ENDIAN 1 +# endif +# define LZO_ABI_LITTLE_ENDIAN 1 +#elif 1 && (LZO_ARCH_ARM64) && defined(__ARM_BIG_ENDIAN) && ((__ARM_BIG_ENDIAN)+0) +# define LZO_ABI_BIG_ENDIAN 1 +#elif 1 && (LZO_ARCH_ARM64) && defined(__AARCH64EB__) && !defined(__AARCH64EL__) +# define LZO_ABI_BIG_ENDIAN 1 +#elif 1 && (LZO_ARCH_ARM64) && defined(__AARCH64EL__) && !defined(__AARCH64EB__) +# define LZO_ABI_LITTLE_ENDIAN 1 +#elif 1 && (LZO_ARCH_ARM64) && defined(_MSC_VER) && defined(_WIN32) +# define LZO_ABI_LITTLE_ENDIAN 1 +#elif 1 && (LZO_ARCH_MIPS) && defined(__MIPSEB__) && !defined(__MIPSEL__) +# define LZO_ABI_BIG_ENDIAN 1 +#elif 1 && (LZO_ARCH_MIPS) && defined(__MIPSEL__) && !defined(__MIPSEB__) +# define LZO_ABI_LITTLE_ENDIAN 1 +#endif +#endif +#if (LZO_ABI_BIG_ENDIAN) && (LZO_ABI_LITTLE_ENDIAN) +# error "unexpected configuration - check your compiler defines" +#endif +#if (LZO_ABI_BIG_ENDIAN) +# define LZO_INFO_ABI_ENDIAN "be" +#elif (LZO_ABI_LITTLE_ENDIAN) +# define LZO_INFO_ABI_ENDIAN "le" +#elif (LZO_ABI_NEUTRAL_ENDIAN) +# define LZO_INFO_ABI_ENDIAN "neutral" +#endif +#if (LZO_SIZEOF_INT == 1 && LZO_SIZEOF_LONG == 2 && LZO_SIZEOF_VOID_P == 2) +# define LZO_ABI_I8LP16 1 +# define LZO_INFO_ABI_PM "i8lp16" +#elif (LZO_SIZEOF_INT == 2 && LZO_SIZEOF_LONG == 2 && LZO_SIZEOF_VOID_P == 2) +# define LZO_ABI_ILP16 1 +# define LZO_INFO_ABI_PM "ilp16" +#elif (LZO_SIZEOF_INT == 2 && LZO_SIZEOF_LONG == 4 && LZO_SIZEOF_VOID_P == 4) +# define LZO_ABI_LP32 1 +# define LZO_INFO_ABI_PM "lp32" +#elif (LZO_SIZEOF_INT == 4 && LZO_SIZEOF_LONG == 4 && LZO_SIZEOF_VOID_P == 4) +# define LZO_ABI_ILP32 1 +# define LZO_INFO_ABI_PM "ilp32" +#elif (LZO_SIZEOF_INT == 4 && LZO_SIZEOF_LONG == 4 && LZO_SIZEOF_VOID_P == 8 && LZO_SIZEOF_SIZE_T == 8) +# define LZO_ABI_LLP64 1 +# define LZO_INFO_ABI_PM "llp64" +#elif (LZO_SIZEOF_INT == 4 && LZO_SIZEOF_LONG == 8 && LZO_SIZEOF_VOID_P == 8) +# define LZO_ABI_LP64 1 +# define LZO_INFO_ABI_PM "lp64" +#elif (LZO_SIZEOF_INT == 8 && LZO_SIZEOF_LONG == 8 && LZO_SIZEOF_VOID_P == 8) +# define LZO_ABI_ILP64 1 +# define LZO_INFO_ABI_PM "ilp64" +#elif (LZO_SIZEOF_INT == 4 && LZO_SIZEOF_LONG == 8 && LZO_SIZEOF_VOID_P == 4) +# define LZO_ABI_IP32L64 1 +# define LZO_INFO_ABI_PM "ip32l64" +#endif +#if (LZO_SIZEOF_INT == 4 && LZO_SIZEOF_VOID_P == 4 && LZO_WORDSIZE == 8) +# define LZO_ABI_IP32W64 1 +# ifndef LZO_INFO_ABI_PM +# define LZO_INFO_ABI_PM "ip32w64" +# endif +#endif +#if 0 +#elif !defined(__LZO_LIBC_OVERRIDE) +#if (LZO_LIBC_NAKED) +# define LZO_INFO_LIBC "naked" +#elif (LZO_LIBC_FREESTANDING) +# define LZO_INFO_LIBC "freestanding" +#elif (LZO_LIBC_MOSTLY_FREESTANDING) +# define LZO_INFO_LIBC "mfreestanding" +#elif (LZO_LIBC_ISOC90) +# define LZO_INFO_LIBC "isoc90" +#elif (LZO_LIBC_ISOC99) +# define LZO_INFO_LIBC "isoc99" +#elif (LZO_CC_ARMCC_ARMCC) && defined(__ARMCLIB_VERSION) +# define LZO_LIBC_ISOC90 1 +# define LZO_INFO_LIBC "isoc90" +#elif defined(__dietlibc__) +# define LZO_LIBC_DIETLIBC 1 +# define LZO_INFO_LIBC "dietlibc" +#elif defined(_NEWLIB_VERSION) +# define LZO_LIBC_NEWLIB 1 +# define LZO_INFO_LIBC "newlib" +#elif defined(__UCLIBC__) && defined(__UCLIBC_MAJOR__) && defined(__UCLIBC_MINOR__) +# if defined(__UCLIBC_SUBLEVEL__) +# define LZO_LIBC_UCLIBC (__UCLIBC_MAJOR__ * 0x10000L + (__UCLIBC_MINOR__-0) * 0x100 + (__UCLIBC_SUBLEVEL__-0)) +# else +# define LZO_LIBC_UCLIBC 0x00090bL +# endif +# define LZO_INFO_LIBC "uc" "libc" +#elif defined(__GLIBC__) && defined(__GLIBC_MINOR__) +# define LZO_LIBC_GLIBC (__GLIBC__ * 0x10000L + (__GLIBC_MINOR__-0) * 0x100) +# define LZO_INFO_LIBC "glibc" +#elif (LZO_CC_MWERKS) && defined(__MSL__) +# define LZO_LIBC_MSL __MSL__ +# define LZO_INFO_LIBC "msl" +#elif 1 && defined(__IAR_SYSTEMS_ICC__) +# define LZO_LIBC_ISOC90 1 +# define LZO_INFO_LIBC "isoc90" +#else +# define LZO_LIBC_DEFAULT 1 +# define LZO_INFO_LIBC "default" +#endif +#endif +#if (LZO_ARCH_I386 && (LZO_OS_DOS32 || LZO_OS_WIN32) && (LZO_CC_DMC || LZO_CC_INTELC || LZO_CC_MSC || LZO_CC_PELLESC)) +# define LZO_ASM_SYNTAX_MSC 1 +#elif (LZO_OS_WIN64 && (LZO_CC_DMC || LZO_CC_INTELC || LZO_CC_MSC || LZO_CC_PELLESC)) +#elif (LZO_ARCH_I386 && LZO_CC_GNUC && (LZO_CC_GNUC == 0x011f00ul)) +#elif (LZO_ARCH_I386 && (LZO_CC_CLANG || LZO_CC_GNUC || LZO_CC_INTELC || LZO_CC_PATHSCALE)) +# define LZO_ASM_SYNTAX_GNUC 1 +#elif (LZO_ARCH_AMD64 && (LZO_CC_CLANG || LZO_CC_GNUC || LZO_CC_INTELC || LZO_CC_PATHSCALE)) +# define LZO_ASM_SYNTAX_GNUC 1 +#elif (LZO_CC_GNUC) +# define LZO_ASM_SYNTAX_GNUC 1 +#endif +#if (LZO_ASM_SYNTAX_GNUC) +#if (LZO_ARCH_I386 && LZO_CC_GNUC && (LZO_CC_GNUC < 0x020000ul)) +# define __LZO_ASM_CLOBBER "ax" +# define __LZO_ASM_CLOBBER_LIST_CC /*empty*/ +# define __LZO_ASM_CLOBBER_LIST_CC_MEMORY /*empty*/ +# define __LZO_ASM_CLOBBER_LIST_EMPTY /*empty*/ +#elif (LZO_CC_INTELC && (__INTEL_COMPILER < 1000)) +# define __LZO_ASM_CLOBBER "memory" +# define __LZO_ASM_CLOBBER_LIST_CC /*empty*/ +# define __LZO_ASM_CLOBBER_LIST_CC_MEMORY : "memory" +# define __LZO_ASM_CLOBBER_LIST_EMPTY /*empty*/ +#else +# define __LZO_ASM_CLOBBER "cc", "memory" +# define __LZO_ASM_CLOBBER_LIST_CC : "cc" +# define __LZO_ASM_CLOBBER_LIST_CC_MEMORY : "cc", "memory" +# define __LZO_ASM_CLOBBER_LIST_EMPTY /*empty*/ +#endif +#endif +#if (LZO_ARCH_ALPHA) +# define LZO_OPT_AVOID_UINT_INDEX 1 +#elif (LZO_ARCH_AMD64) +# define LZO_OPT_AVOID_INT_INDEX 1 +# define LZO_OPT_AVOID_UINT_INDEX 1 +# ifndef LZO_OPT_UNALIGNED16 +# define LZO_OPT_UNALIGNED16 1 +# endif +# ifndef LZO_OPT_UNALIGNED32 +# define LZO_OPT_UNALIGNED32 1 +# endif +# ifndef LZO_OPT_UNALIGNED64 +# define LZO_OPT_UNALIGNED64 1 +# endif +#elif (LZO_ARCH_ARM) +# if defined(__ARM_FEATURE_UNALIGNED) +# if ((__ARM_FEATURE_UNALIGNED)+0) +# ifndef LZO_OPT_UNALIGNED16 +# define LZO_OPT_UNALIGNED16 1 +# endif +# ifndef LZO_OPT_UNALIGNED32 +# define LZO_OPT_UNALIGNED32 1 +# endif +# endif +# elif 1 && (LZO_ARCH_ARM_THUMB2) +# ifndef LZO_OPT_UNALIGNED16 +# define LZO_OPT_UNALIGNED16 1 +# endif +# ifndef LZO_OPT_UNALIGNED32 +# define LZO_OPT_UNALIGNED32 1 +# endif +# elif 1 && defined(__ARM_ARCH) && ((__ARM_ARCH)+0 >= 7) +# ifndef LZO_OPT_UNALIGNED16 +# define LZO_OPT_UNALIGNED16 1 +# endif +# ifndef LZO_OPT_UNALIGNED32 +# define LZO_OPT_UNALIGNED32 1 +# endif +# elif 1 && defined(__TARGET_ARCH_ARM) && ((__TARGET_ARCH_ARM)+0 >= 7) +# ifndef LZO_OPT_UNALIGNED16 +# define LZO_OPT_UNALIGNED16 1 +# endif +# ifndef LZO_OPT_UNALIGNED32 +# define LZO_OPT_UNALIGNED32 1 +# endif +# elif 1 && defined(__TARGET_ARCH_ARM) && ((__TARGET_ARCH_ARM)+0 >= 6) && (defined(__TARGET_PROFILE_A) || defined(__TARGET_PROFILE_R)) +# ifndef LZO_OPT_UNALIGNED16 +# define LZO_OPT_UNALIGNED16 1 +# endif +# ifndef LZO_OPT_UNALIGNED32 +# define LZO_OPT_UNALIGNED32 1 +# endif +# elif 1 && defined(_MSC_VER) && defined(_M_ARM) && ((_M_ARM)+0 >= 7) +# ifndef LZO_OPT_UNALIGNED16 +# define LZO_OPT_UNALIGNED16 1 +# endif +# ifndef LZO_OPT_UNALIGNED32 +# define LZO_OPT_UNALIGNED32 1 +# endif +# endif +#elif (LZO_ARCH_ARM64) +# ifndef LZO_OPT_UNALIGNED16 +# define LZO_OPT_UNALIGNED16 1 +# endif +# ifndef LZO_OPT_UNALIGNED32 +# define LZO_OPT_UNALIGNED32 1 +# endif +# ifndef LZO_OPT_UNALIGNED64 +# define LZO_OPT_UNALIGNED64 1 +# endif +#elif (LZO_ARCH_CRIS) +# ifndef LZO_OPT_UNALIGNED16 +# define LZO_OPT_UNALIGNED16 1 +# endif +# ifndef LZO_OPT_UNALIGNED32 +# define LZO_OPT_UNALIGNED32 1 +# endif +#elif (LZO_ARCH_I386) +# ifndef LZO_OPT_UNALIGNED16 +# define LZO_OPT_UNALIGNED16 1 +# endif +# ifndef LZO_OPT_UNALIGNED32 +# define LZO_OPT_UNALIGNED32 1 +# endif +#elif (LZO_ARCH_IA64) +# define LZO_OPT_AVOID_INT_INDEX 1 +# define LZO_OPT_AVOID_UINT_INDEX 1 +# define LZO_OPT_PREFER_POSTINC 1 +#elif (LZO_ARCH_M68K) +# define LZO_OPT_PREFER_POSTINC 1 +# define LZO_OPT_PREFER_PREDEC 1 +# if defined(__mc68020__) && !defined(__mcoldfire__) +# ifndef LZO_OPT_UNALIGNED16 +# define LZO_OPT_UNALIGNED16 1 +# endif +# ifndef LZO_OPT_UNALIGNED32 +# define LZO_OPT_UNALIGNED32 1 +# endif +# endif +#elif (LZO_ARCH_MIPS) +# define LZO_OPT_AVOID_UINT_INDEX 1 +#elif (LZO_ARCH_POWERPC) +# define LZO_OPT_PREFER_PREINC 1 +# define LZO_OPT_PREFER_PREDEC 1 +# if (LZO_ABI_BIG_ENDIAN) || (LZO_WORDSIZE == 8) +# ifndef LZO_OPT_UNALIGNED16 +# define LZO_OPT_UNALIGNED16 1 +# endif +# ifndef LZO_OPT_UNALIGNED32 +# define LZO_OPT_UNALIGNED32 1 +# endif +# if (LZO_WORDSIZE == 8) +# ifndef LZO_OPT_UNALIGNED64 +# define LZO_OPT_UNALIGNED64 1 +# endif +# endif +# endif +#elif (LZO_ARCH_RISCV) +# define LZO_OPT_AVOID_UINT_INDEX 1 +# ifndef LZO_OPT_UNALIGNED16 +# define LZO_OPT_UNALIGNED16 1 +# endif +# ifndef LZO_OPT_UNALIGNED32 +# define LZO_OPT_UNALIGNED32 1 +# endif +# if (LZO_WORDSIZE == 8) +# ifndef LZO_OPT_UNALIGNED64 +# define LZO_OPT_UNALIGNED64 1 +# endif +# endif +#elif (LZO_ARCH_S390) +# ifndef LZO_OPT_UNALIGNED16 +# define LZO_OPT_UNALIGNED16 1 +# endif +# ifndef LZO_OPT_UNALIGNED32 +# define LZO_OPT_UNALIGNED32 1 +# endif +# if (LZO_WORDSIZE == 8) +# ifndef LZO_OPT_UNALIGNED64 +# define LZO_OPT_UNALIGNED64 1 +# endif +# endif +#elif (LZO_ARCH_SH) +# define LZO_OPT_PREFER_POSTINC 1 +# define LZO_OPT_PREFER_PREDEC 1 +#endif +#ifndef LZO_CFG_NO_INLINE_ASM +#if (LZO_ABI_NEUTRAL_ENDIAN) || (LZO_ARCH_GENERIC) +# define LZO_CFG_NO_INLINE_ASM 1 +#elif (LZO_CC_LLVM) +# define LZO_CFG_NO_INLINE_ASM 1 +#endif +#endif +#if (LZO_CFG_NO_INLINE_ASM) +# undef LZO_ASM_SYNTAX_MSC +# undef LZO_ASM_SYNTAX_GNUC +# undef __LZO_ASM_CLOBBER +# undef __LZO_ASM_CLOBBER_LIST_CC +# undef __LZO_ASM_CLOBBER_LIST_CC_MEMORY +# undef __LZO_ASM_CLOBBER_LIST_EMPTY +#endif +#ifndef LZO_CFG_NO_UNALIGNED +#if (LZO_ABI_NEUTRAL_ENDIAN) || (LZO_ARCH_GENERIC) +# define LZO_CFG_NO_UNALIGNED 1 +#endif +#endif +#if (LZO_CFG_NO_UNALIGNED) +# undef LZO_OPT_UNALIGNED16 +# undef LZO_OPT_UNALIGNED32 +# undef LZO_OPT_UNALIGNED64 +#endif +#if defined(__LZO_INFOSTR_MM) +#elif (LZO_MM_FLAT) && (defined(__LZO_INFOSTR_PM) || defined(LZO_INFO_ABI_PM)) +# define __LZO_INFOSTR_MM "" +#elif defined(LZO_INFO_MM) +# define __LZO_INFOSTR_MM "." LZO_INFO_MM +#else +# define __LZO_INFOSTR_MM "" +#endif +#if defined(__LZO_INFOSTR_PM) +#elif defined(LZO_INFO_ABI_PM) +# define __LZO_INFOSTR_PM "." LZO_INFO_ABI_PM +#else +# define __LZO_INFOSTR_PM "" +#endif +#if defined(__LZO_INFOSTR_ENDIAN) +#elif defined(LZO_INFO_ABI_ENDIAN) +# define __LZO_INFOSTR_ENDIAN "." LZO_INFO_ABI_ENDIAN +#else +# define __LZO_INFOSTR_ENDIAN "" +#endif +#if defined(__LZO_INFOSTR_OSNAME) +#elif defined(LZO_INFO_OS_CONSOLE) +# define __LZO_INFOSTR_OSNAME LZO_INFO_OS "." LZO_INFO_OS_CONSOLE +#elif defined(LZO_INFO_OS_POSIX) +# define __LZO_INFOSTR_OSNAME LZO_INFO_OS "." LZO_INFO_OS_POSIX +#else +# define __LZO_INFOSTR_OSNAME LZO_INFO_OS +#endif +#if defined(__LZO_INFOSTR_LIBC) +#elif defined(LZO_INFO_LIBC) +# define __LZO_INFOSTR_LIBC "." LZO_INFO_LIBC +#else +# define __LZO_INFOSTR_LIBC "" +#endif +#if defined(__LZO_INFOSTR_CCVER) +#elif defined(LZO_INFO_CCVER) +# define __LZO_INFOSTR_CCVER " " LZO_INFO_CCVER +#else +# define __LZO_INFOSTR_CCVER "" +#endif +#define LZO_INFO_STRING \ + LZO_INFO_ARCH __LZO_INFOSTR_MM __LZO_INFOSTR_PM __LZO_INFOSTR_ENDIAN \ + " " __LZO_INFOSTR_OSNAME __LZO_INFOSTR_LIBC " " LZO_INFO_CC __LZO_INFOSTR_CCVER +#if !(LZO_CFG_SKIP_LZO_TYPES) +#if (!(LZO_SIZEOF_SHORT+0 > 0 && LZO_SIZEOF_INT+0 > 0 && LZO_SIZEOF_LONG+0 > 0)) +# error "missing defines for sizes" +#endif +#if (!(LZO_SIZEOF_PTRDIFF_T+0 > 0 && LZO_SIZEOF_SIZE_T+0 > 0 && LZO_SIZEOF_VOID_P+0 > 0)) +# error "missing defines for sizes" +#endif +#define LZO_TYPEOF_CHAR 1u +#define LZO_TYPEOF_SHORT 2u +#define LZO_TYPEOF_INT 3u +#define LZO_TYPEOF_LONG 4u +#define LZO_TYPEOF_LONG_LONG 5u +#define LZO_TYPEOF___INT8 17u +#define LZO_TYPEOF___INT16 18u +#define LZO_TYPEOF___INT32 19u +#define LZO_TYPEOF___INT64 20u +#define LZO_TYPEOF___INT128 21u +#define LZO_TYPEOF___INT256 22u +#define LZO_TYPEOF___MODE_QI 33u +#define LZO_TYPEOF___MODE_HI 34u +#define LZO_TYPEOF___MODE_SI 35u +#define LZO_TYPEOF___MODE_DI 36u +#define LZO_TYPEOF___MODE_TI 37u +#define LZO_TYPEOF_CHAR_P 129u +#if !defined(lzo_llong_t) +#if (LZO_SIZEOF_LONG_LONG+0 > 0) +# if !(LZO_LANG_ASSEMBLER) + __lzo_gnuc_extension__ typedef long long lzo_llong_t__; + __lzo_gnuc_extension__ typedef unsigned long long lzo_ullong_t__; +# endif +# define lzo_llong_t lzo_llong_t__ +# define lzo_ullong_t lzo_ullong_t__ +#endif +#endif +#if !defined(lzo_int16e_t) +#if (LZO_CFG_PREFER_TYPEOF_ACC_INT16E_T == LZO_TYPEOF_SHORT) && (LZO_SIZEOF_SHORT != 2) +# undef LZO_CFG_PREFER_TYPEOF_ACC_INT16E_T +#endif +#if (LZO_SIZEOF_LONG == 2) && !(LZO_CFG_PREFER_TYPEOF_ACC_INT16E_T == LZO_TYPEOF_SHORT) +# define lzo_int16e_t long +# define lzo_uint16e_t unsigned long +# define LZO_TYPEOF_LZO_INT16E_T LZO_TYPEOF_LONG +#elif (LZO_SIZEOF_INT == 2) && !(LZO_CFG_PREFER_TYPEOF_ACC_INT16E_T == LZO_TYPEOF_SHORT) +# define lzo_int16e_t int +# define lzo_uint16e_t unsigned int +# define LZO_TYPEOF_LZO_INT16E_T LZO_TYPEOF_INT +#elif (LZO_SIZEOF_SHORT == 2) +# define lzo_int16e_t short int +# define lzo_uint16e_t unsigned short int +# define LZO_TYPEOF_LZO_INT16E_T LZO_TYPEOF_SHORT +#elif 1 && !(LZO_CFG_TYPE_NO_MODE_HI) && (LZO_CC_CLANG || (LZO_CC_GNUC >= 0x025f00ul) || LZO_CC_LLVM) +# if !(LZO_LANG_ASSEMBLER) + typedef int lzo_int16e_hi_t__ __attribute__((__mode__(__HI__))); + typedef unsigned int lzo_uint16e_hi_t__ __attribute__((__mode__(__HI__))); +# endif +# define lzo_int16e_t lzo_int16e_hi_t__ +# define lzo_uint16e_t lzo_uint16e_hi_t__ +# define LZO_TYPEOF_LZO_INT16E_T LZO_TYPEOF___MODE_HI +#elif (LZO_SIZEOF___INT16 == 2) +# define lzo_int16e_t __int16 +# define lzo_uint16e_t unsigned __int16 +# define LZO_TYPEOF_LZO_INT16E_T LZO_TYPEOF___INT16 +#else +#endif +#endif +#if defined(lzo_int16e_t) +# define LZO_SIZEOF_LZO_INT16E_T 2 + LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(lzo_int16e_t) == 2) + LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(lzo_int16e_t) == LZO_SIZEOF_LZO_INT16E_T) +#endif +#if !defined(lzo_int32e_t) +#if (LZO_CFG_PREFER_TYPEOF_ACC_INT32E_T == LZO_TYPEOF_INT) && (LZO_SIZEOF_INT != 4) +# undef LZO_CFG_PREFER_TYPEOF_ACC_INT32E_T +#endif +#if (LZO_SIZEOF_LONG == 4) && !(LZO_CFG_PREFER_TYPEOF_ACC_INT32E_T == LZO_TYPEOF_INT) +# define lzo_int32e_t long int +# define lzo_uint32e_t unsigned long int +# define LZO_TYPEOF_LZO_INT32E_T LZO_TYPEOF_LONG +#elif (LZO_SIZEOF_INT == 4) +# define lzo_int32e_t int +# define lzo_uint32e_t unsigned int +# define LZO_TYPEOF_LZO_INT32E_T LZO_TYPEOF_INT +#elif (LZO_SIZEOF_SHORT == 4) +# define lzo_int32e_t short int +# define lzo_uint32e_t unsigned short int +# define LZO_TYPEOF_LZO_INT32E_T LZO_TYPEOF_SHORT +#elif (LZO_SIZEOF_LONG_LONG == 4) +# define lzo_int32e_t lzo_llong_t +# define lzo_uint32e_t lzo_ullong_t +# define LZO_TYPEOF_LZO_INT32E_T LZO_TYPEOF_LONG_LONG +#elif 1 && !(LZO_CFG_TYPE_NO_MODE_SI) && (LZO_CC_CLANG || (LZO_CC_GNUC >= 0x025f00ul) || LZO_CC_LLVM) && (__INT_MAX__+0 > 2147483647L) +# if !(LZO_LANG_ASSEMBLER) + typedef int lzo_int32e_si_t__ __attribute__((__mode__(__SI__))); + typedef unsigned int lzo_uint32e_si_t__ __attribute__((__mode__(__SI__))); +# endif +# define lzo_int32e_t lzo_int32e_si_t__ +# define lzo_uint32e_t lzo_uint32e_si_t__ +# define LZO_TYPEOF_LZO_INT32E_T LZO_TYPEOF___MODE_SI +#elif 1 && !(LZO_CFG_TYPE_NO_MODE_SI) && (LZO_CC_GNUC >= 0x025f00ul) && defined(__AVR__) && (__LONG_MAX__+0 == 32767L) +# if !(LZO_LANG_ASSEMBLER) + typedef int lzo_int32e_si_t__ __attribute__((__mode__(__SI__))); + typedef unsigned int lzo_uint32e_si_t__ __attribute__((__mode__(__SI__))); +# endif +# define lzo_int32e_t lzo_int32e_si_t__ +# define lzo_uint32e_t lzo_uint32e_si_t__ +# define LZO_INT32_C(c) (c##LL) +# define LZO_UINT32_C(c) (c##ULL) +# define LZO_TYPEOF_LZO_INT32E_T LZO_TYPEOF___MODE_SI +#elif (LZO_SIZEOF___INT32 == 4) +# define lzo_int32e_t __int32 +# define lzo_uint32e_t unsigned __int32 +# define LZO_TYPEOF_LZO_INT32E_T LZO_TYPEOF___INT32 +#else +#endif +#endif +#if defined(lzo_int32e_t) +# define LZO_SIZEOF_LZO_INT32E_T 4 + LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(lzo_int32e_t) == 4) + LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(lzo_int32e_t) == LZO_SIZEOF_LZO_INT32E_T) +#endif +#if !defined(lzo_int64e_t) +#if (LZO_SIZEOF___INT64 == 8) +# if (LZO_CC_BORLANDC) && !defined(LZO_CFG_PREFER_TYPEOF_ACC_INT64E_T) +# define LZO_CFG_PREFER_TYPEOF_ACC_INT64E_T LZO_TYPEOF___INT64 +# endif +#endif +#if (LZO_CFG_PREFER_TYPEOF_ACC_INT64E_T == LZO_TYPEOF_LONG_LONG) && (LZO_SIZEOF_LONG_LONG != 8) +# undef LZO_CFG_PREFER_TYPEOF_ACC_INT64E_T +#endif +#if (LZO_CFG_PREFER_TYPEOF_ACC_INT64E_T == LZO_TYPEOF___INT64) && (LZO_SIZEOF___INT64 != 8) +# undef LZO_CFG_PREFER_TYPEOF_ACC_INT64E_T +#endif +#if (LZO_SIZEOF_INT == 8) && (LZO_SIZEOF_INT < LZO_SIZEOF_LONG) +# define lzo_int64e_t int +# define lzo_uint64e_t unsigned int +# define LZO_TYPEOF_LZO_INT64E_T LZO_TYPEOF_INT +#elif (LZO_SIZEOF_LONG == 8) && !(LZO_CFG_PREFER_TYPEOF_ACC_INT64E_T == LZO_TYPEOF_LONG_LONG) && !(LZO_CFG_PREFER_TYPEOF_ACC_INT64E_T == LZO_TYPEOF___INT64) +# define lzo_int64e_t long int +# define lzo_uint64e_t unsigned long int +# define LZO_TYPEOF_LZO_INT64E_T LZO_TYPEOF_LONG +#elif (LZO_SIZEOF_LONG_LONG == 8) && !(LZO_CFG_PREFER_TYPEOF_ACC_INT64E_T == LZO_TYPEOF___INT64) +# define lzo_int64e_t lzo_llong_t +# define lzo_uint64e_t lzo_ullong_t +# define LZO_TYPEOF_LZO_INT64E_T LZO_TYPEOF_LONG_LONG +# if (LZO_CC_BORLANDC) +# define LZO_INT64_C(c) ((c) + 0ll) +# define LZO_UINT64_C(c) ((c) + 0ull) +# elif 0 +# define LZO_INT64_C(c) (__lzo_gnuc_extension__ (c##LL)) +# define LZO_UINT64_C(c) (__lzo_gnuc_extension__ (c##ULL)) +# else +# define LZO_INT64_C(c) (c##LL) +# define LZO_UINT64_C(c) (c##ULL) +# endif +#elif (LZO_SIZEOF___INT64 == 8) +# define lzo_int64e_t __int64 +# define lzo_uint64e_t unsigned __int64 +# define LZO_TYPEOF_LZO_INT64E_T LZO_TYPEOF___INT64 +# if (LZO_CC_BORLANDC) +# define LZO_INT64_C(c) ((c) + 0i64) +# define LZO_UINT64_C(c) ((c) + 0ui64) +# else +# define LZO_INT64_C(c) (c##i64) +# define LZO_UINT64_C(c) (c##ui64) +# endif +#else +#endif +#endif +#if defined(lzo_int64e_t) +# define LZO_SIZEOF_LZO_INT64E_T 8 + LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(lzo_int64e_t) == 8) + LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(lzo_int64e_t) == LZO_SIZEOF_LZO_INT64E_T) +#endif +#if !defined(lzo_int32l_t) +#if defined(lzo_int32e_t) +# define lzo_int32l_t lzo_int32e_t +# define lzo_uint32l_t lzo_uint32e_t +# define LZO_SIZEOF_LZO_INT32L_T LZO_SIZEOF_LZO_INT32E_T +# define LZO_TYPEOF_LZO_INT32L_T LZO_TYPEOF_LZO_INT32E_T +#elif (LZO_SIZEOF_INT >= 4) && (LZO_SIZEOF_INT < LZO_SIZEOF_LONG) +# define lzo_int32l_t int +# define lzo_uint32l_t unsigned int +# define LZO_SIZEOF_LZO_INT32L_T LZO_SIZEOF_INT +# define LZO_TYPEOF_LZO_INT32L_T LZO_SIZEOF_INT +#elif (LZO_SIZEOF_LONG >= 4) +# define lzo_int32l_t long int +# define lzo_uint32l_t unsigned long int +# define LZO_SIZEOF_LZO_INT32L_T LZO_SIZEOF_LONG +# define LZO_TYPEOF_LZO_INT32L_T LZO_SIZEOF_LONG +#else +# error "lzo_int32l_t" +#endif +#endif +#if 1 + LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(lzo_int32l_t) >= 4) + LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(lzo_int32l_t) == LZO_SIZEOF_LZO_INT32L_T) +#endif +#if !defined(lzo_int64l_t) +#if defined(lzo_int64e_t) +# define lzo_int64l_t lzo_int64e_t +# define lzo_uint64l_t lzo_uint64e_t +# define LZO_SIZEOF_LZO_INT64L_T LZO_SIZEOF_LZO_INT64E_T +# define LZO_TYPEOF_LZO_INT64L_T LZO_TYPEOF_LZO_INT64E_T +#else +#endif +#endif +#if defined(lzo_int64l_t) + LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(lzo_int64l_t) >= 8) + LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(lzo_int64l_t) == LZO_SIZEOF_LZO_INT64L_T) +#endif +#if !defined(lzo_int32f_t) +#if (LZO_SIZEOF_SIZE_T >= 8) +# define lzo_int32f_t lzo_int64l_t +# define lzo_uint32f_t lzo_uint64l_t +# define LZO_SIZEOF_LZO_INT32F_T LZO_SIZEOF_LZO_INT64L_T +# define LZO_TYPEOF_LZO_INT32F_T LZO_TYPEOF_LZO_INT64L_T +#else +# define lzo_int32f_t lzo_int32l_t +# define lzo_uint32f_t lzo_uint32l_t +# define LZO_SIZEOF_LZO_INT32F_T LZO_SIZEOF_LZO_INT32L_T +# define LZO_TYPEOF_LZO_INT32F_T LZO_TYPEOF_LZO_INT32L_T +#endif +#endif +#if 1 + LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(lzo_int32f_t) >= 4) + LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(lzo_int32f_t) == LZO_SIZEOF_LZO_INT32F_T) +#endif +#if !defined(lzo_int64f_t) +#if defined(lzo_int64l_t) +# define lzo_int64f_t lzo_int64l_t +# define lzo_uint64f_t lzo_uint64l_t +# define LZO_SIZEOF_LZO_INT64F_T LZO_SIZEOF_LZO_INT64L_T +# define LZO_TYPEOF_LZO_INT64F_T LZO_TYPEOF_LZO_INT64L_T +#else +#endif +#endif +#if defined(lzo_int64f_t) + LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(lzo_int64f_t) >= 8) + LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(lzo_int64f_t) == LZO_SIZEOF_LZO_INT64F_T) +#endif +#if !defined(lzo_intptr_t) +#if 1 && (LZO_OS_OS400 && (LZO_SIZEOF_VOID_P == 16)) +# define __LZO_INTPTR_T_IS_POINTER 1 +# if !(LZO_LANG_ASSEMBLER) + typedef char * lzo_intptr_t; + typedef char * lzo_uintptr_t; +# endif +# define lzo_intptr_t lzo_intptr_t +# define lzo_uintptr_t lzo_uintptr_t +# define LZO_SIZEOF_LZO_INTPTR_T LZO_SIZEOF_VOID_P +# define LZO_TYPEOF_LZO_INTPTR_T LZO_TYPEOF_CHAR_P +#elif (LZO_CC_MSC && (_MSC_VER >= 1300) && (LZO_SIZEOF_VOID_P == 4) && (LZO_SIZEOF_INT == 4)) +# if !(LZO_LANG_ASSEMBLER) + typedef __w64 int lzo_intptr_t; + typedef __w64 unsigned int lzo_uintptr_t; +# endif +# define lzo_intptr_t lzo_intptr_t +# define lzo_uintptr_t lzo_uintptr_t +# define LZO_SIZEOF_LZO_INTPTR_T LZO_SIZEOF_INT +# define LZO_TYPEOF_LZO_INTPTR_T LZO_TYPEOF_INT +#elif (LZO_SIZEOF_SHORT == LZO_SIZEOF_VOID_P) && (LZO_SIZEOF_INT > LZO_SIZEOF_VOID_P) +# define lzo_intptr_t short +# define lzo_uintptr_t unsigned short +# define LZO_SIZEOF_LZO_INTPTR_T LZO_SIZEOF_SHORT +# define LZO_TYPEOF_LZO_INTPTR_T LZO_TYPEOF_SHORT +#elif (LZO_SIZEOF_INT >= LZO_SIZEOF_VOID_P) && (LZO_SIZEOF_INT < LZO_SIZEOF_LONG) +# define lzo_intptr_t int +# define lzo_uintptr_t unsigned int +# define LZO_SIZEOF_LZO_INTPTR_T LZO_SIZEOF_INT +# define LZO_TYPEOF_LZO_INTPTR_T LZO_TYPEOF_INT +#elif (LZO_SIZEOF_LONG >= LZO_SIZEOF_VOID_P) +# define lzo_intptr_t long +# define lzo_uintptr_t unsigned long +# define LZO_SIZEOF_LZO_INTPTR_T LZO_SIZEOF_LONG +# define LZO_TYPEOF_LZO_INTPTR_T LZO_TYPEOF_LONG +#elif (LZO_SIZEOF_LZO_INT64L_T >= LZO_SIZEOF_VOID_P) +# define lzo_intptr_t lzo_int64l_t +# define lzo_uintptr_t lzo_uint64l_t +# define LZO_SIZEOF_LZO_INTPTR_T LZO_SIZEOF_LZO_INT64L_T +# define LZO_TYPEOF_LZO_INTPTR_T LZO_TYPEOF_LZO_INT64L_T +#else +# error "lzo_intptr_t" +#endif +#endif +#if 1 + LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(lzo_intptr_t) >= sizeof(void *)) + LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(lzo_intptr_t) == sizeof(lzo_uintptr_t)) +#endif +#if !defined(lzo_word_t) +#if defined(LZO_WORDSIZE) && (LZO_WORDSIZE+0 > 0) +#if (LZO_WORDSIZE == LZO_SIZEOF_LZO_INTPTR_T) && !(__LZO_INTPTR_T_IS_POINTER) +# define lzo_word_t lzo_uintptr_t +# define lzo_sword_t lzo_intptr_t +# define LZO_SIZEOF_LZO_WORD_T LZO_SIZEOF_LZO_INTPTR_T +# define LZO_TYPEOF_LZO_WORD_T LZO_TYPEOF_LZO_INTPTR_T +#elif (LZO_WORDSIZE == LZO_SIZEOF_LONG) +# define lzo_word_t unsigned long +# define lzo_sword_t long +# define LZO_SIZEOF_LZO_WORD_T LZO_SIZEOF_LONG +# define LZO_TYPEOF_LZO_WORD_T LZO_TYPEOF_LONG +#elif (LZO_WORDSIZE == LZO_SIZEOF_INT) +# define lzo_word_t unsigned int +# define lzo_sword_t int +# define LZO_SIZEOF_LZO_WORD_T LZO_SIZEOF_INT +# define LZO_TYPEOF_LZO_WORD_T LZO_TYPEOF_INT +#elif (LZO_WORDSIZE == LZO_SIZEOF_SHORT) +# define lzo_word_t unsigned short +# define lzo_sword_t short +# define LZO_SIZEOF_LZO_WORD_T LZO_SIZEOF_SHORT +# define LZO_TYPEOF_LZO_WORD_T LZO_TYPEOF_SHORT +#elif (LZO_WORDSIZE == 1) +# define lzo_word_t unsigned char +# define lzo_sword_t signed char +# define LZO_SIZEOF_LZO_WORD_T 1 +# define LZO_TYPEOF_LZO_WORD_T LZO_TYPEOF_CHAR +#elif (LZO_WORDSIZE == LZO_SIZEOF_LZO_INT64L_T) +# define lzo_word_t lzo_uint64l_t +# define lzo_sword_t lzo_int64l_t +# define LZO_SIZEOF_LZO_WORD_T LZO_SIZEOF_LZO_INT64L_T +# define LZO_TYPEOF_LZO_WORD_T LZO_SIZEOF_LZO_INT64L_T +#elif (LZO_ARCH_SPU) && (LZO_CC_GNUC) +#if 0 +# if !(LZO_LANG_ASSEMBLER) + typedef unsigned lzo_word_t __attribute__((__mode__(__V16QI__))); + typedef int lzo_sword_t __attribute__((__mode__(__V16QI__))); +# endif +# define lzo_word_t lzo_word_t +# define lzo_sword_t lzo_sword_t +# define LZO_SIZEOF_LZO_WORD_T 16 +# define LZO_TYPEOF_LZO_WORD_T LZO_TYPEOF___MODE_V16QI +#endif +#else +# error "lzo_word_t" +#endif +#endif +#endif +#if 1 && defined(lzo_word_t) + LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(lzo_word_t) == LZO_WORDSIZE) + LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(lzo_sword_t) == LZO_WORDSIZE) +#endif +#if 1 +#define lzo_int8_t signed char +#define lzo_uint8_t unsigned char +#define LZO_SIZEOF_LZO_INT8_T 1 +#define LZO_TYPEOF_LZO_INT8_T LZO_TYPEOF_CHAR +LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(lzo_int8_t) == 1) +LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(lzo_int8_t) == sizeof(lzo_uint8_t)) +#endif +#if defined(lzo_int16e_t) +#define lzo_int16_t lzo_int16e_t +#define lzo_uint16_t lzo_uint16e_t +#define LZO_SIZEOF_LZO_INT16_T LZO_SIZEOF_LZO_INT16E_T +#define LZO_TYPEOF_LZO_INT16_T LZO_TYPEOF_LZO_INT16E_T +LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(lzo_int16_t) == 2) +LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(lzo_int16_t) == sizeof(lzo_uint16_t)) +#endif +#if defined(lzo_int32e_t) +#define lzo_int32_t lzo_int32e_t +#define lzo_uint32_t lzo_uint32e_t +#define LZO_SIZEOF_LZO_INT32_T LZO_SIZEOF_LZO_INT32E_T +#define LZO_TYPEOF_LZO_INT32_T LZO_TYPEOF_LZO_INT32E_T +LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(lzo_int32_t) == 4) +LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(lzo_int32_t) == sizeof(lzo_uint32_t)) +#endif +#if defined(lzo_int64e_t) +#define lzo_int64_t lzo_int64e_t +#define lzo_uint64_t lzo_uint64e_t +#define LZO_SIZEOF_LZO_INT64_T LZO_SIZEOF_LZO_INT64E_T +#define LZO_TYPEOF_LZO_INT64_T LZO_TYPEOF_LZO_INT64E_T +LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(lzo_int64_t) == 8) +LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(lzo_int64_t) == sizeof(lzo_uint64_t)) +#endif +#if 1 +#define lzo_int_least32_t lzo_int32l_t +#define lzo_uint_least32_t lzo_uint32l_t +#define LZO_SIZEOF_LZO_INT_LEAST32_T LZO_SIZEOF_LZO_INT32L_T +#define LZO_TYPEOF_LZO_INT_LEAST32_T LZO_TYPEOF_LZO_INT32L_T +LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(lzo_int_least32_t) >= 4) +LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(lzo_int_least32_t) == sizeof(lzo_uint_least32_t)) +#endif +#if defined(lzo_int64l_t) +#define lzo_int_least64_t lzo_int64l_t +#define lzo_uint_least64_t lzo_uint64l_t +#define LZO_SIZEOF_LZO_INT_LEAST64_T LZO_SIZEOF_LZO_INT64L_T +#define LZO_TYPEOF_LZO_INT_LEAST64_T LZO_TYPEOF_LZO_INT64L_T +LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(lzo_int_least64_t) >= 8) +LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(lzo_int_least64_t) == sizeof(lzo_uint_least64_t)) +#endif +#if 1 +#define lzo_int_fast32_t lzo_int32f_t +#define lzo_uint_fast32_t lzo_uint32f_t +#define LZO_SIZEOF_LZO_INT_FAST32_T LZO_SIZEOF_LZO_INT32F_T +#define LZO_TYPEOF_LZO_INT_FAST32_T LZO_TYPEOF_LZO_INT32F_T +LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(lzo_int_fast32_t) >= 4) +LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(lzo_int_fast32_t) == sizeof(lzo_uint_fast32_t)) +#endif +#if defined(lzo_int64f_t) +#define lzo_int_fast64_t lzo_int64f_t +#define lzo_uint_fast64_t lzo_uint64f_t +#define LZO_SIZEOF_LZO_INT_FAST64_T LZO_SIZEOF_LZO_INT64F_T +#define LZO_TYPEOF_LZO_INT_FAST64_T LZO_TYPEOF_LZO_INT64F_T +LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(lzo_int_fast64_t) >= 8) +LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(lzo_int_fast64_t) == sizeof(lzo_uint_fast64_t)) +#endif +#if !defined(LZO_INT16_C) +# if (LZO_BROKEN_INTEGRAL_CONSTANTS) && (LZO_SIZEOF_INT >= 2) +# define LZO_INT16_C(c) ((c) + 0) +# define LZO_UINT16_C(c) ((c) + 0U) +# elif (LZO_BROKEN_INTEGRAL_CONSTANTS) && (LZO_SIZEOF_LONG >= 2) +# define LZO_INT16_C(c) ((c) + 0L) +# define LZO_UINT16_C(c) ((c) + 0UL) +# elif (LZO_SIZEOF_INT >= 2) +# define LZO_INT16_C(c) (c) +# define LZO_UINT16_C(c) (c##U) +# elif (LZO_SIZEOF_LONG >= 2) +# define LZO_INT16_C(c) (c##L) +# define LZO_UINT16_C(c) (c##UL) +# else +# error "LZO_INT16_C" +# endif +#endif +#if !defined(LZO_INT32_C) +# if (LZO_BROKEN_INTEGRAL_CONSTANTS) && (LZO_SIZEOF_INT >= 4) +# define LZO_INT32_C(c) ((c) + 0) +# define LZO_UINT32_C(c) ((c) + 0U) +# elif (LZO_BROKEN_INTEGRAL_CONSTANTS) && (LZO_SIZEOF_LONG >= 4) +# define LZO_INT32_C(c) ((c) + 0L) +# define LZO_UINT32_C(c) ((c) + 0UL) +# elif (LZO_SIZEOF_INT >= 4) +# define LZO_INT32_C(c) (c) +# define LZO_UINT32_C(c) (c##U) +# elif (LZO_SIZEOF_LONG >= 4) +# define LZO_INT32_C(c) (c##L) +# define LZO_UINT32_C(c) (c##UL) +# elif (LZO_SIZEOF_LONG_LONG >= 4) +# define LZO_INT32_C(c) (c##LL) +# define LZO_UINT32_C(c) (c##ULL) +# else +# error "LZO_INT32_C" +# endif +#endif +#if !defined(LZO_INT64_C) && defined(lzo_int64l_t) +# if (LZO_BROKEN_INTEGRAL_CONSTANTS) && (LZO_SIZEOF_INT >= 8) +# define LZO_INT64_C(c) ((c) + 0) +# define LZO_UINT64_C(c) ((c) + 0U) +# elif (LZO_BROKEN_INTEGRAL_CONSTANTS) && (LZO_SIZEOF_LONG >= 8) +# define LZO_INT64_C(c) ((c) + 0L) +# define LZO_UINT64_C(c) ((c) + 0UL) +# elif (LZO_SIZEOF_INT >= 8) +# define LZO_INT64_C(c) (c) +# define LZO_UINT64_C(c) (c##U) +# elif (LZO_SIZEOF_LONG >= 8) +# define LZO_INT64_C(c) (c##L) +# define LZO_UINT64_C(c) (c##UL) +# else +# error "LZO_INT64_C" +# endif +#endif +#endif + +#endif /* already included */ + +/* vim:set ts=4 sw=4 et: */ diff --git a/tools/z64compress/src/enc/stretchy_buffer.h b/tools/z64compress/src/enc/stretchy_buffer.h new file mode 100644 index 000000000..cbd48a300 --- /dev/null +++ b/tools/z64compress/src/enc/stretchy_buffer.h @@ -0,0 +1,262 @@ +// stretchy_buffer.h - v1.03 - public domain - nothings.org/stb +// a vector<>-like dynamic array for C +// +// version history: +// 1.03 - compile as C++ maybe +// 1.02 - tweaks to syntax for no good reason +// 1.01 - added a "common uses" documentation section +// 1.0 - fixed bug in the version I posted prematurely +// 0.9 - rewrite to try to avoid strict-aliasing optimization +// issues, but won't compile as C++ +// +// Will probably not work correctly with strict-aliasing optimizations. +// +// The idea: +// +// This implements an approximation to C++ vector<> for C, in that it +// provides a generic definition for dynamic arrays which you can +// still access in a typesafe way using arr[i] or *(arr+i). However, +// it is simply a convenience wrapper around the common idiom of +// of keeping a set of variables (in a struct or globals) which store +// - pointer to array +// - the length of the "in-use" part of the array +// - the current size of the allocated array +// +// I find it to be the single most useful non-built-in-structure when +// programming in C (hash tables a close second), but to be clear +// it lacks many of the capabilities of C++ vector<>: there is no +// range checking, the object address isn't stable (see next section +// for details), the set of methods available is small (although +// the file stb.h has another implementation of stretchy buffers +// called 'stb_arr' which provides more methods, e.g. for insertion +// and deletion). +// +// How to use: +// +// Unlike other stb header file libraries, there is no need to +// define an _IMPLEMENTATION symbol. Every #include creates as +// much implementation is needed. +// +// stretchy_buffer.h does not define any types, so you do not +// need to #include it to before defining data types that are +// stretchy buffers, only in files that *manipulate* stretchy +// buffers. +// +// If you want a stretchy buffer aka dynamic array containing +// objects of TYPE, declare such an array as: +// +// TYPE *myarray = NULL; +// +// (There is no typesafe way to distinguish between stretchy +// buffers and regular arrays/pointers; this is necessary to +// make ordinary array indexing work on these objects.) +// +// Unlike C++ vector<>, the stretchy_buffer has the same +// semantics as an object that you manually malloc and realloc. +// The pointer may relocate every time you add a new object +// to it, so you: +// +// 1. can't take long-term pointers to elements of the array +// 2. have to return the pointer from functions which might expand it +// (either as a return value or by storing it to a ptr-to-ptr) +// +// Now you can do the following things with this array: +// +// sb_free(TYPE *a) free the array +// sb_count(TYPE *a) the number of elements in the array +// sb_push(TYPE *a, TYPE v) adds v on the end of the array, a la push_back +// sb_add(TYPE *a, int n) adds n uninitialized elements at end of array & returns pointer to first added +// sb_last(TYPE *a) returns an lvalue of the last item in the array +// a[n] access the nth (counting from 0) element of the array +// +// #define STRETCHY_BUFFER_NO_SHORT_NAMES to only export +// names of the form 'stb_sb_' if you have a name that would +// otherwise collide. +// +// Note that these are all macros and many of them evaluate +// their arguments more than once, so the arguments should +// be side-effect-free. +// +// Note that 'TYPE *a' in sb_push and sb_add must be lvalues +// so that the library can overwrite the existing pointer if +// the object has to be reallocated. +// +// In an out-of-memory condition, the code will try to +// set up a null-pointer or otherwise-invalid-pointer +// exception to happen later. It's possible optimizing +// compilers could detect this write-to-null statically +// and optimize away some of the code, but it should only +// be along the failure path. Nevertheless, for more security +// in the face of such compilers, #define STRETCHY_BUFFER_OUT_OF_MEMORY +// to a statement such as assert(0) or exit(1) or something +// to force a failure when out-of-memory occurs. +// +// Common use: +// +// The main application for this is when building a list of +// things with an unknown quantity, either due to loading from +// a file or through a process which produces an unpredictable +// number. +// +// My most common idiom is something like: +// +// SomeStruct *arr = NULL; +// while (something) +// { +// SomeStruct new_one; +// new_one.whatever = whatever; +// new_one.whatup = whatup; +// new_one.foobar = barfoo; +// sb_push(arr, new_one); +// } +// +// and various closely-related factorings of that. For example, +// you might have several functions to create/init new SomeStructs, +// and if you use the above idiom, you might prefer to make them +// return structs rather than take non-const-pointers-to-structs, +// so you can do things like: +// +// SomeStruct *arr = NULL; +// while (something) +// { +// if (case_A) { +// sb_push(arr, some_func1()); +// } else if (case_B) { +// sb_push(arr, some_func2()); +// } else { +// sb_push(arr, some_func3()); +// } +// } +// +// Note that the above relies on the fact that sb_push doesn't +// evaluate its second argument more than once. The macros do +// evaluate the *array* argument multiple times, and numeric +// arguments may be evaluated multiple times, but you can rely +// on the second argument of sb_push being evaluated only once. +// +// Of course, you don't have to store bare objects in the array; +// if you need the objects to have stable pointers, store an array +// of pointers instead: +// +// SomeStruct **arr = NULL; +// while (something) +// { +// SomeStruct *new_one = malloc(sizeof(*new_one)); +// new_one->whatever = whatever; +// new_one->whatup = whatup; +// new_one->foobar = barfoo; +// sb_push(arr, new_one); +// } +// +// How it works: +// +// A long-standing tradition in things like malloc implementations +// is to store extra data before the beginning of the block returned +// to the user. The stretchy buffer implementation here uses the +// same trick; the current-count and current-allocation-size are +// stored before the beginning of the array returned to the user. +// (This means you can't directly free() the pointer, because the +// allocated pointer is different from the type-safe pointer provided +// to the user.) +// +// The details are trivial and implementation is straightforward; +// the main trick is in realizing in the first place that it's +// possible to do this in a generic, type-safe way in C. +// +// Contributors: +// +// Timothy Wright (github:ZenToad) +// +// LICENSE +// +// See end of file for license information. + +#ifndef STB_STRETCHY_BUFFER_H_INCLUDED +#define STB_STRETCHY_BUFFER_H_INCLUDED + +#ifndef NO_STRETCHY_BUFFER_SHORT_NAMES +#define sb_free stb_sb_free +#define sb_push stb_sb_push +#define sb_count stb_sb_count +#define sb_add stb_sb_add +#define sb_last stb_sb_last +#endif + +#define stb_sb_free(a) ((a) ? free(stb__sbraw(a)),0 : 0) +#define stb_sb_push(a,v) (stb__sbmaybegrow(a,1), (a)[stb__sbn(a)++] = (v)) +#define stb_sb_count(a) ((a) ? stb__sbn(a) : 0) +#define stb_sb_add(a,n) (stb__sbmaybegrow(a,n), stb__sbn(a)+=(n), &(a)[stb__sbn(a)-(n)]) +#define stb_sb_last(a) ((a)[stb__sbn(a)-1]) + +#define stb__sbraw(a) ((int *) (a) - 2) +#define stb__sbm(a) stb__sbraw(a)[0] +#define stb__sbn(a) stb__sbraw(a)[1] + +#define stb__sbneedgrow(a,n) ((a)==0 || stb__sbn(a)+(n) >= stb__sbm(a)) +#define stb__sbmaybegrow(a,n) (stb__sbneedgrow(a,(n)) ? stb__sbgrow(a,n) : 0) +#define stb__sbgrow(a,n) (*((void **)&(a)) = stb__sbgrowf((a), (n), sizeof(*(a)))) + +#include + +static void * stb__sbgrowf(void *arr, int increment, int itemsize) +{ + int dbl_cur = arr ? 2*stb__sbm(arr) : 0; + int min_needed = stb_sb_count(arr) + increment; + int m = dbl_cur > min_needed ? dbl_cur : min_needed; + int *p = (int *) realloc(arr ? stb__sbraw(arr) : 0, itemsize * m + sizeof(int)*2); + if (p) { + if (!arr) + p[1] = 0; + p[0] = m; + return p+2; + } else { + #ifdef STRETCHY_BUFFER_OUT_OF_MEMORY + STRETCHY_BUFFER_OUT_OF_MEMORY ; + #endif + return (void *) (2*sizeof(int)); // try to force a NULL pointer exception later + } +} +#endif // STB_STRETCHY_BUFFER_H_INCLUDED + + +/* +------------------------------------------------------------------------------ +This software is available under 2 licenses -- choose whichever you prefer. +------------------------------------------------------------------------------ +ALTERNATIVE A - MIT License +Copyright (c) 2017 Sean Barrett +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +------------------------------------------------------------------------------ +ALTERNATIVE B - Public Domain (www.unlicense.org) +This is free and unencumbered software released into the public domain. +Anyone is free to copy, modify, publish, use, compile, sell, or distribute this +software, either in source code form or as a compiled binary, for any purpose, +commercial or non-commercial, and by any means. +In jurisdictions that recognize copyright laws, the author or authors of this +software dedicate any and all copyright interest in the software to the public +domain. We make this dedication for the benefit of the public at large and to +the detriment of our heirs and successors. We intend this dedication to be an +overt act of relinquishment in perpetuity of all present and future rights to +this software under copyright law. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +------------------------------------------------------------------------------ +*/ diff --git a/tools/z64compress/src/enc/ucl.c b/tools/z64compress/src/enc/ucl.c new file mode 100644 index 000000000..49474af25 --- /dev/null +++ b/tools/z64compress/src/enc/ucl.c @@ -0,0 +1,50 @@ +#include +#include +#include +#include "ucl/ucl.h" + +int +uclenc( + void *_src + , unsigned src_sz + , void *_dst + , unsigned *dst_sz + , void *_ctx +) +{ + unsigned char *src = _src; + unsigned char *dst = _dst; + int r; + int level = 10; + ucl_uint result_sz; + + extern int g_hlen; /* header length */ + memset(dst, 0, g_hlen); + memcpy(dst, "UCL0", 4); + dst[4] = (src_sz >> 24); + dst[5] = (src_sz >> 16); + dst[6] = (src_sz >> 8); + dst[7] = (src_sz >> 0); + + r = ucl_nrv2b_99_compress( + src /* in */ + , src_sz /* in size */ + , dst + g_hlen /* out */ + , &result_sz /* out size */ + , NULL /* callback */ + , level /* level */ + , NULL /* conf */ + , NULL /* result */ + ); + + if (r != UCL_E_OK) + { + fprintf(stderr, "[!] fatal compression error %d\n", r); + exit(EXIT_FAILURE); + } + + *dst_sz = result_sz + g_hlen; + + return 0; +} + diff --git a/tools/z64compress/src/enc/ucl/comp/n2_99.ch b/tools/z64compress/src/enc/ucl/comp/n2_99.ch new file mode 100644 index 000000000..06c5c6467 --- /dev/null +++ b/tools/z64compress/src/enc/ucl/comp/n2_99.ch @@ -0,0 +1,651 @@ +/* n2_99.ch -- implementation of the NRV2[BDE]-99 compression algorithms + + This file is part of the UCL data compression library. + + Copyright (C) 1996-2004 Markus Franz Xaver Johannes Oberhumer + All Rights Reserved. + + The UCL library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + The UCL library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with the UCL library; see the file COPYING. + If not, write to the Free Software Foundation, Inc., + 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + + Markus F.X.J. Oberhumer + + http://www.oberhumer.com/opensource/ucl/ + */ + + + +#define HAVE_MEMCMP 1 +#define HAVE_MEMCPY 1 +#define HAVE_MEMMOVE 1 +#define HAVE_MEMSET 1 + + +#include "../ucl_conf.h" +#include "../ucl.h" + +#include +#include +#include +#include + + +/*********************************************************************** +// +************************************************************************/ + +#define SWD_USE_MALLOC 1 +#if (ACC_OS_DOS16) +#define SWD_HMASK (s->hmask) +#define HEAD2_VAR +#define IF_HEAD2(s) if (s->use_head2) +#else +#define SWD_HMASK (UCL_UINT32_C(65535)) +#define IF_HEAD2(s) +#endif +#define SWD_N (8*1024*1024ul) /* max. size of ring buffer */ +#define SWD_F 2048 /* upper limit for match length */ +#define SWD_THRESHOLD 1 /* lower limit for match length */ + +#if defined(NRV2B) +# define UCL_COMPRESS_T ucl_nrv2b_t +# define ucl_swd_t ucl_nrv2b_swd_t +# define ucl_nrv_99_compress ucl_nrv2b_99_compress +# define M2_MAX_OFFSET 0xd00 +#elif defined(NRV2D) +# define UCL_COMPRESS_T ucl_nrv2d_t +# define ucl_swd_t ucl_nrv2d_swd_t +# define ucl_nrv_99_compress ucl_nrv2d_99_compress +# define M2_MAX_OFFSET 0x500 +#elif defined(NRV2E) +# define UCL_COMPRESS_T ucl_nrv2e_t +# define ucl_swd_t ucl_nrv2e_swd_t +# define ucl_nrv_99_compress ucl_nrv2e_99_compress +# define M2_MAX_OFFSET 0x500 +#else +# error +#endif +#define ucl_swd_p ucl_swd_t * __UCL_MMODEL + +#include "ucl_mchw.ch" + + +/*********************************************************************** +// start-step-stop prefix coding +************************************************************************/ + +static void code_prefix_ss11(UCL_COMPRESS_T *c, ucl_uint32 i) +{ + if (i >= 2) + { + ucl_uint32 t = 4; + i += 2; + do { + t <<= 1; + } while (i >= t); + t >>= 1; + do { + t >>= 1; + bbPutBit(c, (i & t) ? 1 : 0); + bbPutBit(c, 0); + } while (t > 2); + } + bbPutBit(c, (unsigned)i & 1); + bbPutBit(c, 1); +} + + +#if defined(NRV2D) || defined(NRV2E) +static void code_prefix_ss12(UCL_COMPRESS_T *c, ucl_uint32 i) +{ + if (i >= 2) + { + ucl_uint32 t = 2; + do { + i -= t; + t <<= 2; + } while (i >= t); + do { + t >>= 1; + bbPutBit(c, (i & t) ? 1 : 0); + bbPutBit(c, 0); + t >>= 1; + bbPutBit(c, (i & t) ? 1 : 0); + } while (t > 2); + } + bbPutBit(c, (unsigned)i & 1); + bbPutBit(c, 1); +} +#endif + + +static void +code_match(UCL_COMPRESS_T *c, ucl_uint m_len, const ucl_uint m_off) +{ + unsigned m_low = 0; + + while (m_len > c->conf.max_match) + { + code_match(c, c->conf.max_match - 3, m_off); + m_len -= c->conf.max_match - 3; + } + + c->match_bytes += m_len; + if (m_len > c->result[3]) + c->result[3] = m_len; + if (m_off > c->result[1]) + c->result[1] = m_off; + + bbPutBit(c, 0); + +#if defined(NRV2B) + if (m_off == c->last_m_off) + { + bbPutBit(c, 0); + bbPutBit(c, 1); + } + else + { + code_prefix_ss11(c, 1 + ((m_off - 1) >> 8)); + bbPutByte(c, (unsigned)m_off - 1); + } + m_len = m_len - 1 - (m_off > M2_MAX_OFFSET); + if (m_len >= 4) + { + bbPutBit(c,0); + bbPutBit(c,0); + code_prefix_ss11(c, m_len - 4); + } + else + { + bbPutBit(c, m_len > 1); + bbPutBit(c, (unsigned)m_len & 1); + } +#elif defined(NRV2D) + m_len = m_len - 1 - (m_off > M2_MAX_OFFSET); + assert(m_len > 0); + m_low = (m_len >= 4) ? 0u : (unsigned) m_len; + if (m_off == c->last_m_off) + { + bbPutBit(c, 0); + bbPutBit(c, 1); + bbPutBit(c, m_low > 1); + bbPutBit(c, m_low & 1); + } + else + { + code_prefix_ss12(c, 1 + ((m_off - 1) >> 7)); + bbPutByte(c, ((((unsigned)m_off - 1) & 0x7f) << 1) | ((m_low > 1) ? 0 : 1)); + bbPutBit(c, m_low & 1); + } + if (m_len >= 4) + code_prefix_ss11(c, m_len - 4); +#elif defined(NRV2E) + m_len = m_len - 1 - (m_off > M2_MAX_OFFSET); + assert(m_len > 0); + m_low = (m_len <= 2); + if (m_off == c->last_m_off) + { + bbPutBit(c, 0); + bbPutBit(c, 1); + bbPutBit(c, m_low); + } + else + { + code_prefix_ss12(c, 1 + ((m_off - 1) >> 7)); + bbPutByte(c, ((((unsigned)m_off - 1) & 0x7f) << 1) | (m_low ^ 1)); + } + if (m_low) + bbPutBit(c, (unsigned)m_len - 1); + else if (m_len <= 4) + { + bbPutBit(c, 1); + bbPutBit(c, (unsigned)m_len - 3); + } + else + { + bbPutBit(c, 0); + code_prefix_ss11(c, m_len - 5); + } +#else +# error +#endif + + c->last_m_off = m_off; + (void)m_low; +} + + +static void +code_run(UCL_COMPRESS_T *c, const ucl_bytep ii, ucl_uint lit) +{ + if (lit == 0) + return; + c->lit_bytes += lit; + if (lit > c->result[5]) + c->result[5] = lit; + do { + bbPutBit(c, 1); + bbPutByte(c, *ii++); + } while (--lit > 0); +} + + +/*********************************************************************** +// +************************************************************************/ + +static int +len_of_coded_match(UCL_COMPRESS_T *c, ucl_uint m_len, ucl_uint m_off) +{ + int b; + if (m_len < 2 || (m_len == 2 && (m_off > M2_MAX_OFFSET)) + || m_off > c->conf.max_offset) + return -1; + assert(m_off > 0); + + m_len = m_len - 2 - (m_off > M2_MAX_OFFSET); + + if (m_off == c->last_m_off) + b = 1 + 2; + else + { +#if defined(NRV2B) + b = 1 + 10; + m_off = (m_off - 1) >> 8; + while (m_off > 0) + { + b += 2; + m_off >>= 1; + } +#elif defined(NRV2D) || defined(NRV2E) + b = 1 + 9; + m_off = (m_off - 1) >> 7; + while (m_off > 0) + { + b += 3; + m_off >>= 2; + } +#else +# error +#endif + } + +#if defined(NRV2B) || defined(NRV2D) + b += 2; + if (m_len < 3) + return b; + m_len -= 3; +#elif defined(NRV2E) + b += 2; + if (m_len < 2) + return b; + if (m_len < 4) + return b + 1; + m_len -= 4; +#else +# error +#endif + do { + b += 2; + m_len >>= 1; + } while (m_len > 0); + + return b; +} + + +/*********************************************************************** +// +************************************************************************/ + +#if !defined(NDEBUG) +static +void assert_match( const ucl_swd_p swd, ucl_uint m_len, ucl_uint m_off ) +{ + const UCL_COMPRESS_T *c = swd->c; + ucl_uint d_off; + + assert(m_len >= 2); + if (m_off <= (ucl_uint) (c->bp - c->in)) + { + assert(c->bp - m_off + m_len < c->ip); + assert(ucl_memcmp(c->bp, c->bp - m_off, m_len) == 0); + } + else + { + assert(swd->dict != NULL); + d_off = m_off - (ucl_uint) (c->bp - c->in); + assert(d_off <= swd->dict_len); + if (m_len > d_off) + { + assert(ucl_memcmp(c->bp, swd->dict_end - d_off, d_off) == 0); + assert(c->in + m_len - d_off < c->ip); + assert(ucl_memcmp(c->bp + d_off, c->in, m_len - d_off) == 0); + } + else + { + assert(ucl_memcmp(c->bp, swd->dict_end - d_off, m_len) == 0); + } + } +} +#else +# define assert_match(a,b,c) ((void)0) +#endif + + +#if defined(SWD_BEST_OFF) + +static void +better_match ( const ucl_swd_p swd, ucl_uint *m_len, ucl_uint *m_off ) +{ +} + +#endif + + +/*********************************************************************** +// +************************************************************************/ + +UCL_PUBLIC(int) +ucl_nrv_99_compress ( const ucl_bytep in, ucl_uint in_len, + ucl_bytep out, ucl_uintp out_len, + ucl_progress_callback_p cb, + int level, + const struct ucl_compress_config_p conf, + ucl_uintp result) +{ + const ucl_bytep ii; + ucl_uint lit; + ucl_uint m_len, m_off; + UCL_COMPRESS_T c_buffer; + UCL_COMPRESS_T * const c = &c_buffer; +#undef s +#if defined(SWD_USE_MALLOC) + ucl_swd_t the_swd = {0}; +# define s (&the_swd) +#else +// static ucl_swd_p s = 0; +#endif + ucl_uint result_buffer[16]; + int r; + + struct swd_config_t + { + unsigned try_lazy; + ucl_uint good_length; + ucl_uint max_lazy; + ucl_uint nice_length; + ucl_uint max_chain; + ucl_uint32 flags; + ucl_uint32 max_offset; + }; + const struct swd_config_t *sc; + static const struct swd_config_t swd_config[10] = { +#define F SWD_F + /* faster compression */ + { 0, 0, 0, 8, 4, 0, 48*1024L }, + { 0, 0, 0, 16, 8, 0, 48*1024L }, + { 0, 0, 0, 32, 16, 0, 48*1024L }, + { 1, 4, 4, 16, 16, 0, 48*1024L }, + { 1, 8, 16, 32, 32, 0, 48*1024L }, + { 1, 8, 16, 128, 128, 0, 48*1024L }, + { 2, 8, 32, 128, 256, 0, 128*1024L }, + { 2, 32, 128, F, 2048, 1, 128*1024L }, + { 2, 32, 128, F, 2048, 1, 256*1024L }, + { 2, F, F, F, 4096, 1, SWD_N } + /* max. compression */ +#undef F + }; + + if (level < 1 || level > 10) + return UCL_E_INVALID_ARGUMENT; + sc = &swd_config[level - 1]; + + memset(c, 0, sizeof(*c)); + memset(&c->conf, 0xff, sizeof(c->conf)); + c->ip = c->in = in; + c->in_end = in + in_len; + c->out = out; + if (cb && cb->callback) + c->cb = cb; + cb = NULL; + c->result = result ? result : (ucl_uintp) result_buffer; + result = NULL; + ucl_memset(c->result, 0, 16*sizeof(*c->result)); + c->result[0] = c->result[2] = c->result[4] = UCL_UINT_MAX; + if (conf) + ucl_memcpy(&c->conf, conf, sizeof(c->conf)); + conf = NULL; + r = bbConfig(c, 0, 8); + if (r == 0) + r = bbConfig(c, c->conf.bb_endian, c->conf.bb_size); + if (r != 0) + return UCL_E_INVALID_ARGUMENT; + c->bb_op = out; + + ii = c->ip; /* point to start of literal run */ + lit = 0; + +#if !defined(s) + if (!s) + s = (ucl_swd_p) ucl_malloc(ucl_sizeof(*s)); + if (!s) + return UCL_E_OUT_OF_MEMORY; + memset(s, 0, ucl_sizeof(*s)); +#endif + s->f = UCL_MIN((ucl_uint)SWD_F, c->conf.max_match); + s->n = UCL_MIN((ucl_uint)SWD_N, sc->max_offset); + s->hmask = UCL_UINT32_C(65535); +#ifdef HEAD2_VAR + s->use_head2 = 1; +#if defined(ACC_MM_AHSHIFT) + if (ACC_MM_AHSHIFT != 3) { + s->hmask = 16 * 1024 - 1; + s->use_head2 = 0; + } +#endif +#endif + if (c->conf.max_offset != UCL_UINT_MAX) + s->n = UCL_MIN(SWD_N, c->conf.max_offset); + if (in_len < s->n) + s->n = UCL_MAX(in_len, 256); + if (s->f < 8 || s->n < 256) + return UCL_E_INVALID_ARGUMENT; + r = init_match(c,s,NULL,0,sc->flags); + if (r == UCL_E_OK && (SWD_HSIZE - 1 != s->hmask)) + r = UCL_E_ERROR; + if (r != UCL_E_OK) + { +#if !defined(s) + ucl_free(s); +#endif + return r; + } + if (sc->max_chain > 0) + s->max_chain = sc->max_chain; + if (sc->nice_length > 0) + s->nice_length = sc->nice_length; + if (c->conf.max_match < s->nice_length) + s->nice_length = c->conf.max_match; + + if (c->cb) + (*c->cb->callback)(0,0,-1,c->cb->user); + + c->last_m_off = 1; + r = find_match(c,s,0,0); + if (r != UCL_E_OK) + return r; + while (c->look > 0) + { + ucl_uint ahead; + ucl_uint max_ahead; + int l1, l2; + + c->codesize = (ucl_uint) (c->bb_op - out); + + m_len = c->m_len; + m_off = c->m_off; + + assert(c->bp == c->ip - c->look); + assert(c->bp >= in); + if (lit == 0) + ii = c->bp; + assert(ii + lit == c->bp); + assert(s->b_char == *(c->bp)); + + if (m_len < 2 || (m_len == 2 && (m_off > M2_MAX_OFFSET)) + || m_off > c->conf.max_offset) + { + /* a literal */ + lit++; + s->max_chain = sc->max_chain; + r = find_match(c,s,1,0); + assert(r == 0); + continue; + } + + /* a match */ +#if defined(SWD_BEST_OFF) + if (s->use_best_off) + better_match(s,&m_len,&m_off); +#endif + assert_match(s,m_len,m_off); + + /* shall we try a lazy match ? */ + ahead = 0; + if (sc->try_lazy <= 0 || m_len >= sc->max_lazy || m_off == c->last_m_off) + { + /* no */ + l1 = 0; + max_ahead = 0; + } + else + { + /* yes, try a lazy match */ + l1 = len_of_coded_match(c,m_len,m_off); + assert(l1 > 0); + max_ahead = UCL_MIN((ucl_uint)sc->try_lazy, m_len - 1); + } + + while (ahead < max_ahead && c->look > m_len) + { + if (m_len >= sc->good_length) + s->max_chain = sc->max_chain >> 2; + else + s->max_chain = sc->max_chain; + r = find_match(c,s,1,0); + ahead++; + + assert(r == 0); + assert(c->look > 0); + assert(ii + lit + ahead == c->bp); + + if (c->m_len < 2) + continue; +#if defined(SWD_BEST_OFF) + if (s->use_best_off) + better_match(s,&c->m_len,&c->m_off); +#endif + l2 = len_of_coded_match(c,c->m_len,c->m_off); + if (l2 < 0) + continue; +#if 1 + if (l1 + (int)(ahead + c->m_len - m_len) * 5 > l2 + (int)(ahead) * 9) +#else + if (l1 > l2) +#endif + { + c->lazy++; + assert_match(s,c->m_len,c->m_off); + +#if 0 + if (l3 > 0) + { + /* code previous run */ + code_run(c,ii,lit); + lit = 0; + /* code shortened match */ + code_match(c,ahead,m_off); + } + else +#endif + { + lit += ahead; + assert(ii + lit == c->bp); + } + goto lazy_match_done; + } + } + + assert(ii + lit + ahead == c->bp); + + /* 1 - code run */ + code_run(c,ii,lit); + lit = 0; + + /* 2 - code match */ + code_match(c,m_len,m_off); + s->max_chain = sc->max_chain; + r = find_match(c,s,m_len,1+ahead); + assert(r == 0); + +lazy_match_done: ; + } + + /* store final run */ + code_run(c,ii,lit); + + /* EOF */ + bbPutBit(c, 0); +#if defined(NRV2B) + code_prefix_ss11(c, UCL_UINT32_C(0x1000000)); + bbPutByte(c, 0xff); +#elif defined(NRV2D) || defined(NRV2E) + code_prefix_ss12(c, UCL_UINT32_C(0x1000000)); + bbPutByte(c, 0xff); +#else +# error +#endif + bbFlushBits(c, 0); + + assert(c->textsize == in_len); + c->codesize = (ucl_uint) (c->bb_op - out); + *out_len = (ucl_uint) (c->bb_op - out); + if (c->cb) + (*c->cb->callback)(c->textsize,c->codesize,4,c->cb->user); + +#if 0 + printf("%7ld %7ld -> %7ld %7ld %7ld %ld (max: %d %d %d)\n", + (long) c->textsize, (long) in_len, (long) c->codesize, + c->match_bytes, c->lit_bytes, c->lazy, + c->result[1], c->result[3], c->result[5]); +#endif + assert(c->lit_bytes + c->match_bytes == in_len); + + swd_exit(s); +#if !defined(s) + ucl_free(s); +#endif + return UCL_E_OK; +#undef s +} + + +/* +vi:ts=4:et +*/ + diff --git a/tools/z64compress/src/enc/ucl/comp/n2b_99.c b/tools/z64compress/src/enc/ucl/comp/n2b_99.c new file mode 100644 index 000000000..e3b11cc16 --- /dev/null +++ b/tools/z64compress/src/enc/ucl/comp/n2b_99.c @@ -0,0 +1,38 @@ +/* n2b_99.c -- implementation of the NRV2B-99 compression algorithm + + This file is part of the UCL data compression library. + + Copyright (C) 1996-2004 Markus Franz Xaver Johannes Oberhumer + All Rights Reserved. + + The UCL library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + The UCL library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with the UCL library; see the file COPYING. + If not, write to the Free Software Foundation, Inc., + 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + + Markus F.X.J. Oberhumer + + http://www.oberhumer.com/opensource/ucl/ + */ + + + +#define NRV2B +#include "n2_99.ch" +#undef NRV2B + + +/* +vi:ts=4:et +*/ + diff --git a/tools/z64compress/src/enc/ucl/comp/ucl_mchw.ch b/tools/z64compress/src/enc/ucl/comp/ucl_mchw.ch new file mode 100644 index 000000000..c462576d8 --- /dev/null +++ b/tools/z64compress/src/enc/ucl/comp/ucl_mchw.ch @@ -0,0 +1,312 @@ +/* ucl_mchw.ch -- matching functions using a window + + This file is part of the UCL data compression library. + + Copyright (C) 1996-2004 Markus Franz Xaver Johannes Oberhumer + All Rights Reserved. + + The UCL library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + The UCL library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with the UCL library; see the file COPYING. + If not, write to the Free Software Foundation, Inc., + 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + + Markus F.X.J. Oberhumer + + http://www.oberhumer.com/opensource/ucl/ + */ + + +/*********************************************************************** +// +************************************************************************/ + +typedef struct +{ + int init; + + ucl_uint look; /* bytes in lookahead buffer */ + + ucl_uint m_len; + ucl_uint m_off; + + ucl_uint last_m_len; + ucl_uint last_m_off; + + const ucl_bytep bp; + const ucl_bytep ip; + const ucl_bytep in; + const ucl_bytep in_end; + ucl_bytep out; + + ucl_uint32 bb_b; + unsigned bb_k; + unsigned bb_c_endian; + unsigned bb_c_s; + unsigned bb_c_s8; + ucl_bytep bb_p; + ucl_bytep bb_op; + + struct ucl_compress_config_t conf; + ucl_uintp result; + + ucl_progress_callback_p cb; + + ucl_uint textsize; /* text size counter */ + ucl_uint codesize; /* code size counter */ + ucl_uint printcount; /* counter for reporting progress every 1K bytes */ + + /* some stats */ + unsigned long lit_bytes; + unsigned long match_bytes; + unsigned long rep_bytes; + unsigned long lazy; +} +UCL_COMPRESS_T; + + + +#if (ACC_OS_TOS && (ACC_CC_PUREC || ACC_CC_TURBOC)) +/* the cast is needed to work around a code generation bug */ +#define getbyte(c) ((c).ip < (c).in_end ? (int) (unsigned) *((c).ip)++ : (-1)) +#else +#define getbyte(c) ((c).ip < (c).in_end ? *((c).ip)++ : (-1)) +#endif + +#include "ucl_swd.ch" + + + +/*********************************************************************** +// +************************************************************************/ + +static int +init_match ( UCL_COMPRESS_T *c, ucl_swd_t *s, + const ucl_bytep dict, ucl_uint dict_len, + ucl_uint32 flags ) +{ + int r; + + assert(!c->init); + c->init = 1; + + s->c = c; + + c->last_m_len = c->last_m_off = 0; + + c->textsize = c->codesize = c->printcount = 0; + c->lit_bytes = c->match_bytes = c->rep_bytes = 0; + c->lazy = 0; + + r = swd_init(s,dict,dict_len); + if (r != UCL_E_OK) + { + swd_exit(s); + return r; + } + + s->use_best_off = (flags & 1) ? 1 : 0; + return UCL_E_OK; +} + + +/*********************************************************************** +// +************************************************************************/ + +static int +find_match ( UCL_COMPRESS_T *c, ucl_swd_t *s, + ucl_uint this_len, ucl_uint skip ) +{ + assert(c->init); + + if (skip > 0) + { + assert(this_len >= skip); + swd_accept(s, this_len - skip); + c->textsize += this_len - skip + 1; + } + else + { + assert(this_len <= 1); + c->textsize += this_len - skip; + } + + s->m_len = SWD_THRESHOLD; +#ifdef SWD_BEST_OFF + if (s->use_best_off) + memset(s->best_pos,0,sizeof(s->best_pos)); +#endif + swd_findbest(s); + c->m_len = s->m_len; +#if defined(__UCL_CHECKER) + /* s->m_off may be uninitialized if we didn't find a match, + * but then its value will never be used. + */ + c->m_off = (s->m_len == SWD_THRESHOLD) ? 0 : s->m_off; +#else + c->m_off = s->m_off; +#endif + + swd_getbyte(s); + + if (s->b_char < 0) + { + c->look = 0; + c->m_len = 0; + swd_exit(s); + } + else + { + c->look = s->look + 1; + } + c->bp = c->ip - c->look; + +#if 0 + /* brute force match search */ + if (c->m_len > SWD_THRESHOLD && c->m_len + 1 <= c->look) + { + const ucl_bytep ip = c->bp; + const ucl_bytep m = c->bp - c->m_off; + const ucl_bytep in = c->in; + + if (ip - in > s->n) + in = ip - s->n; + for (;;) + { + while (*in != *ip) + in++; + if (in == ip) + break; + if (in != m) + if (memcmp(in,ip,c->m_len+1) == 0) + printf("%p %p %p %5d\n",in,ip,m,c->m_len); + in++; + } + } +#endif + + if (c->cb && c->textsize > c->printcount) + { + (*c->cb->callback)(c->textsize,c->codesize,3,c->cb->user); + c->printcount += 1024; + } + + return UCL_E_OK; +} + + +/*********************************************************************** +// bit buffer +************************************************************************/ + +static int bbConfig(UCL_COMPRESS_T *c, int endian, int bitsize) +{ + if (endian != -1) + { + if (endian != 0) + return UCL_E_ERROR; + c->bb_c_endian = endian; + } + if (bitsize != -1) + { + if (bitsize != 8 && bitsize != 16 && bitsize != 32) + return UCL_E_ERROR; + c->bb_c_s = bitsize; + c->bb_c_s8 = bitsize / 8; + } + c->bb_b = 0; c->bb_k = 0; + c->bb_p = NULL; + c->bb_op = NULL; + return UCL_E_OK; +} + + +static void bbWriteBits(UCL_COMPRESS_T *c) +{ + ucl_bytep p = c->bb_p; + ucl_uint32 b = c->bb_b; + + p[0] = UCL_BYTE(b >> 0); + if (c->bb_c_s >= 16) + { + p[1] = UCL_BYTE(b >> 8); + if (c->bb_c_s == 32) + { + p[2] = UCL_BYTE(b >> 16); + p[3] = UCL_BYTE(b >> 24); + } + } +} + + +static void bbPutBit(UCL_COMPRESS_T *c, unsigned bit) +{ + assert(bit == 0 || bit == 1); + assert(c->bb_k <= c->bb_c_s); + + if (c->bb_k < c->bb_c_s) + { + if (c->bb_k == 0) + { + assert(c->bb_p == NULL); + c->bb_p = c->bb_op; + c->bb_op += c->bb_c_s8; + } + assert(c->bb_p != NULL); + assert(c->bb_p + c->bb_c_s8 <= c->bb_op); + + c->bb_b = (c->bb_b << 1) + bit; + c->bb_k++; + } + else + { + assert(c->bb_p != NULL); + assert(c->bb_p + c->bb_c_s8 <= c->bb_op); + + bbWriteBits(c); + c->bb_p = c->bb_op; + c->bb_op += c->bb_c_s8; + c->bb_b = bit; + c->bb_k = 1; + } +} + + +static void bbPutByte(UCL_COMPRESS_T *c, unsigned b) +{ + /**printf("putbyte %p %p %x (%d)\n", op, bb_p, x, bb_k);*/ + assert(c->bb_p == NULL || c->bb_p + c->bb_c_s8 <= c->bb_op); + *c->bb_op++ = UCL_BYTE(b); +} + + +static void bbFlushBits(UCL_COMPRESS_T *c, unsigned filler_bit) +{ + if (c->bb_k > 0) + { + assert(c->bb_k <= c->bb_c_s); + while (c->bb_k != c->bb_c_s) + bbPutBit(c, filler_bit); + bbWriteBits(c); + c->bb_k = 0; + } + c->bb_p = NULL; +} + + + +/* +vi:ts=4:et +*/ + diff --git a/tools/z64compress/src/enc/ucl/comp/ucl_swd.ch b/tools/z64compress/src/enc/ucl/comp/ucl_swd.ch new file mode 100644 index 000000000..8b829415b --- /dev/null +++ b/tools/z64compress/src/enc/ucl/comp/ucl_swd.ch @@ -0,0 +1,686 @@ +/* ucl_swd.c -- sliding window dictionary + + This file is part of the UCL data compression library. + + Copyright (C) 1996-2004 Markus Franz Xaver Johannes Oberhumer + All Rights Reserved. + + The UCL library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + The UCL library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with the UCL library; see the file COPYING. + If not, write to the Free Software Foundation, Inc., + 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + + Markus F.X.J. Oberhumer + + http://www.oberhumer.com/opensource/ucl/ + */ + + +#if (UCL_UINT_MAX < UCL_0xffffffffL) +# error "UCL_UINT_MAX" +#endif + + +/*********************************************************************** +// +************************************************************************/ + +/* unsigned type for dictionary access - don't waste memory here */ +#if (0UL + SWD_N + SWD_F + SWD_F < 0UL + USHRT_MAX) + typedef unsigned short swd_uint; +# define SWD_UINT_MAX USHRT_MAX +#else + typedef ucl_uint swd_uint; +# define SWD_UINT_MAX UCL_UINT_MAX +#endif +#define swd_uintp swd_uint __UCL_MMODEL * +#define SWD_UINT(x) ((swd_uint)(x)) + + +#ifndef SWD_MAX_CHAIN +# define SWD_MAX_CHAIN 2048 +#endif +#define SWD_HSIZE (SWD_HMASK + 1) + +#if !defined(HEAD3) +#if 1 +# define HEAD3(b,p) \ + (((0x9f5f*(((((ucl_uint32)b[p]<<5)^b[p+1])<<5)^b[p+2]))>>5) & SWD_HMASK) +#else +# define HEAD3(b,p) \ + (((0x9f5f*(((((ucl_uint32)b[p+2]<<5)^b[p+1])<<5)^b[p]))>>5) & SWD_HMASK) +#endif +#endif + +#if !defined(HEAD2) +#if (SWD_THRESHOLD == 1) +# if 1 && defined(UA_GET2) +# define HEAD2(b,p) UA_GET2(&(b[p])) +# else +# define HEAD2(b,p) (b[p] ^ ((unsigned)b[p+1]<<8)) +# endif +# define NIL2 SWD_UINT_MAX +#endif +#endif + + +#if defined(__UCL_CHECKER) + /* malloc arrays of the exact size to detect any overrun */ +# ifndef SWD_USE_MALLOC +# define SWD_USE_MALLOC +# endif +#endif + + +typedef struct +{ +/* public - "built-in" */ + ucl_uint n; + ucl_uint f; + ucl_uint threshold; + ucl_uint hmask; + +/* public - configuration */ + ucl_uint max_chain; + ucl_uint nice_length; + ucl_bool use_best_off; + ucl_uint lazy_insert; + +/* public - output */ + ucl_uint m_len; + ucl_uint m_off; + ucl_uint look; + int b_char; +#if defined(SWD_BEST_OFF) + ucl_uint best_off[ SWD_BEST_OFF ]; +#endif + +/* semi public */ + UCL_COMPRESS_T *c; + ucl_uint m_pos; +#if defined(SWD_BEST_OFF) + ucl_uint best_pos[ SWD_BEST_OFF ]; +#endif + +/* private */ + const ucl_bytep dict; + const ucl_bytep dict_end; + ucl_uint dict_len; + +/* private */ + ucl_uint ip; /* input pointer (lookahead) */ + ucl_uint bp; /* buffer pointer */ + ucl_uint rp; /* remove pointer */ + ucl_uint b_size; + + ucl_bytep b_wrap; + + ucl_uint node_count; + ucl_uint first_rp; + +#if defined(SWD_USE_MALLOC) + ucl_bytep b; + swd_uintp head3; + swd_uintp succ3; + swd_uintp best3; + swd_uintp llen3; +#ifdef HEAD2 + swd_uintp head2; +#ifdef HEAD2_VAR + int use_head2; +#endif +#endif +#else + unsigned char b [ SWD_N + SWD_F + SWD_F ]; + swd_uint head3 [ SWD_HSIZE ]; + swd_uint succ3 [ SWD_N + SWD_F ]; + swd_uint best3 [ SWD_N + SWD_F ]; + swd_uint llen3 [ SWD_HSIZE ]; +#ifdef HEAD2 + swd_uint head2 [ UCL_UINT32_C(65536) ]; +#endif +#endif +} +ucl_swd_t; + + +/* Access macro for head3. + * head3[key] may be uninitialized if the list is emtpy, + * but then its value will never be used. + */ +#if defined(__UCL_CHECKER) +# define s_get_head3(s,key) \ + ((s->llen3[key] == 0) ? SWD_UINT_MAX : s->head3[key]) +#else +# define s_get_head3(s,key) s->head3[key] +#endif + + +/*********************************************************************** +// +************************************************************************/ + +static +void swd_initdict(ucl_swd_t *s, const ucl_bytep dict, ucl_uint dict_len) +{ + s->dict = s->dict_end = NULL; + s->dict_len = 0; + + if (!dict || dict_len <= 0) + return; + if (dict_len > s->n) + { + dict += dict_len - s->n; + dict_len = s->n; + } + + s->dict = dict; + s->dict_len = dict_len; + s->dict_end = dict + dict_len; + ucl_memcpy(s->b,dict,dict_len); + s->ip = dict_len; +} + + +static +void swd_insertdict(ucl_swd_t *s, ucl_uint node, ucl_uint len) +{ + ucl_uint key; + + s->node_count = s->n - len; + s->first_rp = node; + + while (len-- > 0) + { + key = HEAD3(s->b,node); + s->succ3[node] = s_get_head3(s,key); + s->head3[key] = SWD_UINT(node); + s->best3[node] = SWD_UINT(s->f + 1); + s->llen3[key]++; + assert(s->llen3[key] <= s->n); + +#ifdef HEAD2 + IF_HEAD2(s) { + key = HEAD2(s->b,node); + s->head2[key] = SWD_UINT(node); + } +#endif + + node++; + } +} + + +/*********************************************************************** +// +************************************************************************/ + +static +int swd_init(ucl_swd_t *s, const ucl_bytep dict, ucl_uint dict_len) +{ +#if defined(SWD_USE_MALLOC) + s->b = NULL; + s->head3 = NULL; + s->succ3 = NULL; + s->best3 = NULL; + s->llen3 = NULL; +#ifdef HEAD2 + s->head2 = NULL; +#endif +#endif + + if (s->n == 0) + s->n = SWD_N; + if (s->f == 0) + s->f = SWD_F; + s->threshold = SWD_THRESHOLD; + if (s->n > SWD_N || s->f > SWD_F) + return UCL_E_INVALID_ARGUMENT; + +#if defined(SWD_USE_MALLOC) + s->b = (ucl_bytep) ucl_alloc(1, s->n + s->f + s->f); + s->head3 = (swd_uintp) ucl_alloc(SWD_HSIZE, sizeof(*s->head3)); + s->succ3 = (swd_uintp) ucl_alloc(s->n + s->f, sizeof(*s->succ3)); + s->best3 = (swd_uintp) ucl_alloc(s->n + s->f, sizeof(*s->best3)); + s->llen3 = (swd_uintp) ucl_alloc(SWD_HSIZE, sizeof(*s->llen3)); + if (!s->b || !s->head3 || !s->succ3 || !s->best3 || !s->llen3) + return UCL_E_OUT_OF_MEMORY; +#ifdef HEAD2 + IF_HEAD2(s) { + s->head2 = (swd_uintp) ucl_alloc(UCL_UINT32_C(65536), sizeof(*s->head2)); + if (!s->head2) + return UCL_E_OUT_OF_MEMORY; + } +#endif +#endif + + /* defaults */ + s->max_chain = SWD_MAX_CHAIN; + s->nice_length = s->f; + s->use_best_off = 0; + s->lazy_insert = 0; + + s->b_size = s->n + s->f; + if (s->b_size + s->f >= SWD_UINT_MAX) + return UCL_E_ERROR; + s->b_wrap = s->b + s->b_size; + s->node_count = s->n; + + ucl_memset(s->llen3, 0, (ucl_uint)sizeof(s->llen3[0]) * SWD_HSIZE); +#ifdef HEAD2 + IF_HEAD2(s) { +#if 1 + ucl_memset(s->head2, 0xff, (ucl_uint)sizeof(s->head2[0]) * UCL_UINT32_C(65536)); + assert(s->head2[0] == NIL2); +#else + ucl_uint32 i; + for (i = 0; i < UCL_UINT32_C(65536); i++) + s->head2[i] = NIL2; +#endif + } +#endif + + s->ip = 0; + swd_initdict(s,dict,dict_len); + s->bp = s->ip; + s->first_rp = s->ip; + + assert(s->ip + s->f <= s->b_size); +#if 1 + s->look = (ucl_uint) (s->c->in_end - s->c->ip); + if (s->look > 0) + { + if (s->look > s->f) + s->look = s->f; + ucl_memcpy(&s->b[s->ip],s->c->ip,s->look); + s->c->ip += s->look; + s->ip += s->look; + } +#else + s->look = 0; + while (s->look < s->f) + { + int c; + if ((c = getbyte(*(s->c))) < 0) + break; + s->b[s->ip] = UCL_BYTE(c); + s->ip++; + s->look++; + } +#endif + if (s->ip == s->b_size) + s->ip = 0; + + if (s->look >= 2 && s->dict_len > 0) + swd_insertdict(s,0,s->dict_len); + + s->rp = s->first_rp; + if (s->rp >= s->node_count) + s->rp -= s->node_count; + else + s->rp += s->b_size - s->node_count; + +#if defined(__UCL_CHECKER) + /* initialize memory for the first few HEAD3 (if s->ip is not far + * enough ahead to do this job for us). The value doesn't matter. */ + if (s->look < 3) + ucl_memset(&s->b[s->bp+s->look],0,3); +#endif + + return UCL_E_OK; +} + + +static +void swd_exit(ucl_swd_t *s) +{ +#if defined(SWD_USE_MALLOC) + /* free in reverse order of allocations */ +# ifdef HEAD2 + ucl_free(s->head2); s->head2 = NULL; +#endif + ucl_free(s->llen3); s->llen3 = NULL; + ucl_free(s->best3); s->best3 = NULL; + ucl_free(s->succ3); s->succ3 = NULL; + ucl_free(s->head3); s->head3 = NULL; + ucl_free(s->b); s->b = NULL; +#else + //ACC_UNUSED(s); +#endif +} + + +#define swd_pos2off(s,pos) \ + (s->bp > (pos) ? s->bp - (pos) : s->b_size - ((pos) - s->bp)) + + +/*********************************************************************** +// +************************************************************************/ + +static +void swd_getbyte(ucl_swd_t *s) +{ + int c; + + if ((c = getbyte(*(s->c))) < 0) + { + if (s->look > 0) + --s->look; +#if defined(__UCL_CHECKER) + /* initialize memory - value doesn't matter */ + s->b[s->ip] = 0; + if (s->ip < s->f) + s->b_wrap[s->ip] = 0; +#endif + } + else + { + s->b[s->ip] = UCL_BYTE(c); + if (s->ip < s->f) + s->b_wrap[s->ip] = UCL_BYTE(c); + } + if (++s->ip == s->b_size) + s->ip = 0; + if (++s->bp == s->b_size) + s->bp = 0; + if (++s->rp == s->b_size) + s->rp = 0; +} + + +/*********************************************************************** +// remove node from lists +************************************************************************/ + +static +void swd_remove_node(ucl_swd_t *s, ucl_uint node) +{ + if (s->node_count == 0) + { + ucl_uint key; + +#ifdef UCL_DEBUG + if (s->first_rp != UCL_UINT_MAX) + { + if (node != s->first_rp) + printf("Remove %5u: %5u %5u %5u %5u %6u %6u\n", + node, s->rp, s->ip, s->bp, s->first_rp, + s->ip - node, s->ip - s->bp); + assert(node == s->first_rp); + s->first_rp = UCL_UINT_MAX; + } +#endif + + key = HEAD3(s->b,node); + assert(s->llen3[key] > 0); + --s->llen3[key]; + +#ifdef HEAD2 + IF_HEAD2(s) { + key = HEAD2(s->b,node); + assert(s->head2[key] != NIL2); + if ((ucl_uint) s->head2[key] == node) + s->head2[key] = NIL2; + } +#endif + } + else + --s->node_count; +} + + +/*********************************************************************** +// +************************************************************************/ + +static +void swd_accept(ucl_swd_t *s, ucl_uint n) +{ + assert(n <= s->look); + + if (n > 0) do + { + ucl_uint key; + + swd_remove_node(s,s->rp); + + /* add bp into HEAD3 */ + key = HEAD3(s->b,s->bp); + s->succ3[s->bp] = s_get_head3(s,key); + s->head3[key] = SWD_UINT(s->bp); + s->best3[s->bp] = SWD_UINT(s->f + 1); + s->llen3[key]++; + assert(s->llen3[key] <= s->n); + +#ifdef HEAD2 + IF_HEAD2(s) { + /* add bp into HEAD2 */ + key = HEAD2(s->b,s->bp); + s->head2[key] = SWD_UINT(s->bp); + } +#endif + + swd_getbyte(s); + } while (--n > 0); +} + + +/*********************************************************************** +// +************************************************************************/ + +static +void swd_search(ucl_swd_t *s, ucl_uint node, ucl_uint cnt) +{ + const ucl_bytep p1; + const ucl_bytep p2; + const ucl_bytep px; + ucl_uint m_len = s->m_len; + const ucl_bytep b = s->b; + const ucl_bytep bp = s->b + s->bp; + const ucl_bytep bx = s->b + s->bp + s->look; + unsigned char scan_end1; + + assert(s->m_len > 0); + + scan_end1 = bp[m_len - 1]; + for ( ; cnt-- > 0; node = s->succ3[node]) + { + p1 = bp; + p2 = b + node; + px = bx; + + assert(m_len < s->look); + + if ( +#if 1 + p2[m_len - 1] == scan_end1 && + p2[m_len] == p1[m_len] && +#endif + p2[0] == p1[0] && + p2[1] == p1[1]) + { + ucl_uint i; + assert(ucl_memcmp(bp,&b[node],3) == 0); + +#if 0 && defined(UA_GET4) + p1 += 3; p2 += 3; + while (p1 < px && UA_GET4(p1) == UA_GET4(p2)) + p1 += 4, p2 += 4; + while (p1 < px && *p1 == *p2) + p1 += 1, p2 += 1; +#else + p1 += 2; p2 += 2; + do {} while (++p1 < px && *p1 == *++p2); +#endif + i = (ucl_uint) (p1 - bp); + +#ifdef UCL_DEBUG + if (ucl_memcmp(bp,&b[node],i) != 0) + printf("%5ld %5ld %02x%02x %02x%02x\n", + (long)s->bp, (long) node, + bp[0], bp[1], b[node], b[node+1]); +#endif + assert(ucl_memcmp(bp,&b[node],i) == 0); + +#if defined(SWD_BEST_OFF) + if (i < SWD_BEST_OFF) + { + if (s->best_pos[i] == 0) + s->best_pos[i] = node + 1; + } +#endif + if (i > m_len) + { + s->m_len = m_len = i; + s->m_pos = node; + if (m_len == s->look) + return; + if (m_len >= s->nice_length) + return; + if (m_len > (ucl_uint) s->best3[node]) + return; + scan_end1 = bp[m_len - 1]; + } + } + } +} + + +/*********************************************************************** +// +************************************************************************/ + +#ifdef HEAD2 + +static +ucl_bool swd_search2(ucl_swd_t *s) +{ + ucl_uint key; + + assert(s->look >= 2); + assert(s->m_len > 0); + + key = s->head2[ HEAD2(s->b,s->bp) ]; + if (key == NIL2) + return 0; +#ifdef UCL_DEBUG + if (ucl_memcmp(&s->b[s->bp],&s->b[key],2) != 0) + printf("%5ld %5ld %02x%02x %02x%02x\n", (long)s->bp, (long)key, + s->b[s->bp], s->b[s->bp+1], s->b[key], s->b[key+1]); +#endif + assert(ucl_memcmp(&s->b[s->bp],&s->b[key],2) == 0); +#if defined(SWD_BEST_OFF) + if (s->best_pos[2] == 0) + s->best_pos[2] = key + 1; +#endif + + if (s->m_len < 2) + { + s->m_len = 2; + s->m_pos = key; + } + return 1; +} + +#endif + + +/*********************************************************************** +// +************************************************************************/ + +static +void swd_findbest(ucl_swd_t *s) +{ + ucl_uint key; + ucl_uint cnt, node; + ucl_uint len; + + assert(s->m_len > 0); + + /* get current head, add bp into HEAD3 */ + key = HEAD3(s->b,s->bp); + node = s->succ3[s->bp] = s_get_head3(s,key); + cnt = s->llen3[key]++; + assert(s->llen3[key] <= s->n + s->f); + if (cnt > s->max_chain && s->max_chain > 0) + cnt = s->max_chain; + s->head3[key] = SWD_UINT(s->bp); + + s->b_char = s->b[s->bp]; + len = s->m_len; + if (s->m_len >= s->look) + { + if (s->look == 0) + s->b_char = -1; + s->m_off = 0; + s->best3[s->bp] = SWD_UINT(s->f + 1); + } + else + { +#if defined(HEAD2_VAR) + if (s->use_head2) { + if (swd_search2(s) && s->look >= 3) + swd_search(s,node,cnt); + } else { + if (s->look >= 3) + swd_search(s,node,cnt); + } +#elif defined(HEAD2) + if (swd_search2(s) && s->look >= 3) + swd_search(s,node,cnt); +#else + if (s->look >= 3) + swd_search(s,node,cnt); +#endif + if (s->m_len > len) + s->m_off = swd_pos2off(s,s->m_pos); + s->best3[s->bp] = SWD_UINT(s->m_len); + +#if defined(SWD_BEST_OFF) + if (s->use_best_off) + { + int i; + for (i = 2; i < SWD_BEST_OFF; i++) + if (s->best_pos[i] > 0) + s->best_off[i] = swd_pos2off(s,s->best_pos[i]-1); + else + s->best_off[i] = 0; + } +#endif + } + + swd_remove_node(s,s->rp); + +#ifdef HEAD2 + /* add bp into HEAD2 */ + IF_HEAD2(s) { + key = HEAD2(s->b,s->bp); + s->head2[key] = SWD_UINT(s->bp); + } +#endif +} + + +#undef HEAD3 +#undef HEAD2 +#undef IF_HEAD2 +#undef s_get_head3 + + +/* +vi:ts=4:et +*/ + diff --git a/tools/z64compress/src/enc/ucl/getbit.h b/tools/z64compress/src/enc/ucl/getbit.h new file mode 100644 index 000000000..36ef48c99 --- /dev/null +++ b/tools/z64compress/src/enc/ucl/getbit.h @@ -0,0 +1,64 @@ +/* getbit.h -- bit-buffer access + + This file is part of the UCL data compression library. + + Copyright (C) 1996-2004 Markus Franz Xaver Johannes Oberhumer + All Rights Reserved. + + The UCL library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + The UCL library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with the UCL library; see the file COPYING. + If not, write to the Free Software Foundation, Inc., + 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + + Markus F.X.J. Oberhumer + + http://www.oberhumer.com/opensource/ucl/ + */ + + +/*********************************************************************** +// +************************************************************************/ + +#if 1 +#define getbit_8(bb, src, ilen) \ + (((bb = bb & 0x7f ? bb*2 : ((unsigned)src[ilen++]*2+1)) >> 8) & 1) +#elif 1 +#define getbit_8(bb, src, ilen) \ + (bb*=2,bb&0xff ? (bb>>8)&1 : ((bb=src[ilen++]*2+1)>>8)&1) +#else +#define getbit_8(bb, src, ilen) \ + (((bb*=2, (bb&0xff ? bb : (bb=src[ilen++]*2+1,bb))) >> 8) & 1) +#endif + + +#define getbit_le16(bb, src, ilen) \ + (bb*=2,bb&0xffff ? (bb>>16)&1 : (ilen+=2,((bb=(src[ilen-2]+src[ilen-1]*256u)*2+1)>>16)&1)) + + +#if 1 && (ACC_ENDIAN_LITTLE_ENDIAN) && defined(UA_GET4) +#define getbit_le32(bb, bc, src, ilen) \ + (bc > 0 ? ((bb>>--bc)&1) : (bc=31,\ + bb=UA_GET4((src)+ilen),ilen+=4,(bb>>31)&1)) +#else +#define getbit_le32(bb, bc, src, ilen) \ + (bc > 0 ? ((bb>>--bc)&1) : (bc=31,\ + bb=src[ilen]+src[ilen+1]*0x100+src[ilen+2]*UCL_UINT32_C(0x10000)+src[ilen+3]*UCL_UINT32_C(0x1000000),\ + ilen+=4,(bb>>31)&1)) +#endif + + +/* +vi:ts=4:et +*/ + diff --git a/tools/z64compress/src/enc/ucl/n2b_d.c b/tools/z64compress/src/enc/ucl/n2b_d.c new file mode 100644 index 000000000..0dc359068 --- /dev/null +++ b/tools/z64compress/src/enc/ucl/n2b_d.c @@ -0,0 +1,179 @@ +/* n2b_d.c -- implementation of the NRV2B decompression algorithm + + This file is part of the UCL data compression library. + + Copyright (C) 1996-2004 Markus Franz Xaver Johannes Oberhumer + All Rights Reserved. + + The UCL library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + The UCL library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with the UCL library; see the file COPYING. + If not, write to the Free Software Foundation, Inc., + 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + + Markus F.X.J. Oberhumer + + http://www.oberhumer.com/opensource/ucl/ + */ + + +/*********************************************************************** +// actual implementation used by a recursive #include +************************************************************************/ + +#ifdef getbit + +#ifdef SAFE +#define fail(x,r) if (x) { *dst_len = olen; return r; } +#else +#define fail(x,r) +#endif + +{ + ucl_uint32 bb = 0; +#ifdef TEST_OVERLAP + ucl_uint ilen = src_off, olen = 0, last_m_off = 1; +#else + ucl_uint ilen = 0, olen = 0, last_m_off = 1; +#endif +#ifdef SAFE + const ucl_uint oend = *dst_len; +#endif + //ACC_UNUSED(wrkmem); + +#ifdef TEST_OVERLAP + src_len += src_off; + fail(oend >= src_len, UCL_E_OVERLAP_OVERRUN); +#endif + + for (;;) + { + ucl_uint m_off, m_len; + + while (getbit(bb)) + { + fail(ilen >= src_len, UCL_E_INPUT_OVERRUN); + fail(olen >= oend, UCL_E_OUTPUT_OVERRUN); +#ifdef TEST_OVERLAP + fail(olen > ilen, UCL_E_OVERLAP_OVERRUN); + olen++; ilen++; +#else + dst[olen++] = src[ilen++]; +#endif + } + m_off = 1; + do { + m_off = m_off*2 + getbit(bb); + fail(ilen >= src_len, UCL_E_INPUT_OVERRUN); + fail(m_off > UCL_UINT32_C(0xffffff) + 3, UCL_E_LOOKBEHIND_OVERRUN); + } while (!getbit(bb)); + if (m_off == 2) + { + m_off = last_m_off; + } + else + { + fail(ilen >= src_len, UCL_E_INPUT_OVERRUN); + m_off = (m_off-3)*256 + src[ilen++]; + if (m_off == UCL_UINT32_C(0xffffffff)) + break; + last_m_off = ++m_off; + } + m_len = getbit(bb); + m_len = m_len*2 + getbit(bb); + if (m_len == 0) + { + m_len++; + do { + m_len = m_len*2 + getbit(bb); + fail(ilen >= src_len, UCL_E_INPUT_OVERRUN); + fail(m_len >= oend, UCL_E_OUTPUT_OVERRUN); + } while (!getbit(bb)); + m_len += 2; + } + m_len += (m_off > 0xd00); + fail(olen + m_len > oend, UCL_E_OUTPUT_OVERRUN); + fail(m_off > olen, UCL_E_LOOKBEHIND_OVERRUN); +#ifdef TEST_OVERLAP + olen += m_len + 1; + fail(olen > ilen, UCL_E_OVERLAP_OVERRUN); +#else + { + const ucl_bytep m_pos; + m_pos = dst + olen - m_off; + dst[olen++] = *m_pos++; + do dst[olen++] = *m_pos++; while (--m_len > 0); + } +#endif + } + *dst_len = olen; + return ilen == src_len ? UCL_E_OK : (ilen < src_len ? UCL_E_INPUT_NOT_CONSUMED : UCL_E_INPUT_OVERRUN); +} + +#undef fail + +#endif /* getbit */ + + +/*********************************************************************** +// decompressor entries for the different bit-buffer sizes +************************************************************************/ + +#ifndef getbit + +#include "ucl_conf.h" +#include "ucl.h" +#include "getbit.h" + + +UCL_PUBLIC(int) +ucl_nrv2b_decompress_8 ( const ucl_bytep src, ucl_uint src_len, + ucl_bytep dst, ucl_uintp dst_len, + ucl_voidp wrkmem ) +{ +#define getbit(bb) getbit_8(bb,src,ilen) +#include "n2b_d.c" +#undef getbit +} + +#if 0 +UCL_PUBLIC(int) +ucl_nrv2b_decompress_le16 ( const ucl_bytep src, ucl_uint src_len, + ucl_bytep dst, ucl_uintp dst_len, + ucl_voidp wrkmem ) +{ +#define getbit(bb) getbit_le16(bb,src,ilen) +#include "n2b_d.c" +#undef getbit +} + + +UCL_PUBLIC(int) +ucl_nrv2b_decompress_le32 ( const ucl_bytep src, ucl_uint src_len, + ucl_bytep dst, ucl_uintp dst_len, + ucl_voidp wrkmem ) +{ + unsigned bc = 0; +#define getbit(bb) getbit_le32(bb,bc,src,ilen) +#include "n2b_d.c" +#undef getbit +} +#endif /* 0 */ + + +#endif /* !getbit */ + + +/* +vi:ts=4:et +*/ + diff --git a/tools/z64compress/src/enc/ucl/ucl.h b/tools/z64compress/src/enc/ucl/ucl.h new file mode 100644 index 000000000..f136d4a2d --- /dev/null +++ b/tools/z64compress/src/enc/ucl/ucl.h @@ -0,0 +1,249 @@ +/* ucl.h -- prototypes for the UCL data compression library + + This file is part of the UCL data compression library. + + Copyright (C) 2004 Markus Franz Xaver Johannes Oberhumer + Copyright (C) 2003 Markus Franz Xaver Johannes Oberhumer + Copyright (C) 2002 Markus Franz Xaver Johannes Oberhumer + Copyright (C) 2001 Markus Franz Xaver Johannes Oberhumer + Copyright (C) 2000 Markus Franz Xaver Johannes Oberhumer + Copyright (C) 1999 Markus Franz Xaver Johannes Oberhumer + Copyright (C) 1998 Markus Franz Xaver Johannes Oberhumer + Copyright (C) 1997 Markus Franz Xaver Johannes Oberhumer + Copyright (C) 1996 Markus Franz Xaver Johannes Oberhumer + All Rights Reserved. + + The UCL library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + The UCL library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with the UCL library; see the file COPYING. + If not, write to the Free Software Foundation, Inc., + 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + + Markus F.X.J. Oberhumer + + http://www.oberhumer.com/opensource/ucl/ + */ + + +#ifndef __UCL_H_INCLUDED +#define __UCL_H_INCLUDED + +#ifndef __UCLCONF_H_INCLUDED +#include "uclconf.h" +#endif + +#ifdef __cplusplus +extern "C" { +#endif + + +/*********************************************************************** +// Compression fine-tuning configuration. +// +// Pass a NULL pointer to the compression functions for default values. +// Otherwise set all values to -1 [i.e. initialize the struct by a +// `memset(x,0xff,sizeof(x))'] and then set the required values. +************************************************************************/ + +struct ucl_compress_config_t +{ + int bb_endian; + int bb_size; + ucl_uint max_offset; + ucl_uint max_match; + int s_level; + int h_level; + int p_level; + int c_flags; + ucl_uint m_size; +}; + +#define ucl_compress_config_p ucl_compress_config_t __UCL_MMODEL * + + +/*********************************************************************** +// compressors +// +// Pass NULL for `cb' (no progress callback), `conf' (default compression +// configuration) and `result' (no statistical result). +************************************************************************/ + +UCL_EXTERN(int) +ucl_nrv2b_99_compress ( const ucl_bytep src, ucl_uint src_len, + ucl_bytep dst, ucl_uintp dst_len, + ucl_progress_callback_p cb, + int level, + const struct ucl_compress_config_p conf, + ucl_uintp result ); + +UCL_EXTERN(int) +ucl_nrv2d_99_compress ( const ucl_bytep src, ucl_uint src_len, + ucl_bytep dst, ucl_uintp dst_len, + ucl_progress_callback_p cb, + int level, + const struct ucl_compress_config_p conf, + ucl_uintp result ); + +UCL_EXTERN(int) +ucl_nrv2e_99_compress ( const ucl_bytep src, ucl_uint src_len, + ucl_bytep dst, ucl_uintp dst_len, + ucl_progress_callback_p cb, + int level, + const struct ucl_compress_config_p conf, + ucl_uintp result ); + + +/*********************************************************************** +// decompressors +// +// Always pass NULL for `wrkmem'. This parameter is for symetry +// with my other compression libaries and is not used in UCL - +// UCL does not need any additional memory (or even local stack space) +// for decompression. +************************************************************************/ + +UCL_EXTERN(int) +ucl_nrv2b_decompress_8 ( const ucl_bytep src, ucl_uint src_len, + ucl_bytep dst, ucl_uintp dst_len, + ucl_voidp wrkmem ); +UCL_EXTERN(int) +ucl_nrv2b_decompress_le16 ( const ucl_bytep src, ucl_uint src_len, + ucl_bytep dst, ucl_uintp dst_len, + ucl_voidp wrkmem ); +UCL_EXTERN(int) +ucl_nrv2b_decompress_le32 ( const ucl_bytep src, ucl_uint src_len, + ucl_bytep dst, ucl_uintp dst_len, + ucl_voidp wrkmem ); +UCL_EXTERN(int) +ucl_nrv2b_decompress_safe_8 ( const ucl_bytep src, ucl_uint src_len, + ucl_bytep dst, ucl_uintp dst_len, + ucl_voidp wrkmem ); +UCL_EXTERN(int) +ucl_nrv2b_decompress_safe_le16 ( const ucl_bytep src, ucl_uint src_len, + ucl_bytep dst, ucl_uintp dst_len, + ucl_voidp wrkmem ); +UCL_EXTERN(int) +ucl_nrv2b_decompress_safe_le32 ( const ucl_bytep src, ucl_uint src_len, + ucl_bytep dst, ucl_uintp dst_len, + ucl_voidp wrkmem ); + +UCL_EXTERN(int) +ucl_nrv2d_decompress_8 ( const ucl_bytep src, ucl_uint src_len, + ucl_bytep dst, ucl_uintp dst_len, + ucl_voidp wrkmem ); +UCL_EXTERN(int) +ucl_nrv2d_decompress_le16 ( const ucl_bytep src, ucl_uint src_len, + ucl_bytep dst, ucl_uintp dst_len, + ucl_voidp wrkmem ); +UCL_EXTERN(int) +ucl_nrv2d_decompress_le32 ( const ucl_bytep src, ucl_uint src_len, + ucl_bytep dst, ucl_uintp dst_len, + ucl_voidp wrkmem ); +UCL_EXTERN(int) +ucl_nrv2d_decompress_safe_8 ( const ucl_bytep src, ucl_uint src_len, + ucl_bytep dst, ucl_uintp dst_len, + ucl_voidp wrkmem ); +UCL_EXTERN(int) +ucl_nrv2d_decompress_safe_le16 ( const ucl_bytep src, ucl_uint src_len, + ucl_bytep dst, ucl_uintp dst_len, + ucl_voidp wrkmem ); +UCL_EXTERN(int) +ucl_nrv2d_decompress_safe_le32 ( const ucl_bytep src, ucl_uint src_len, + ucl_bytep dst, ucl_uintp dst_len, + ucl_voidp wrkmem ); + +UCL_EXTERN(int) +ucl_nrv2e_decompress_8 ( const ucl_bytep src, ucl_uint src_len, + ucl_bytep dst, ucl_uintp dst_len, + ucl_voidp wrkmem ); +UCL_EXTERN(int) +ucl_nrv2e_decompress_le16 ( const ucl_bytep src, ucl_uint src_len, + ucl_bytep dst, ucl_uintp dst_len, + ucl_voidp wrkmem ); +UCL_EXTERN(int) +ucl_nrv2e_decompress_le32 ( const ucl_bytep src, ucl_uint src_len, + ucl_bytep dst, ucl_uintp dst_len, + ucl_voidp wrkmem ); +UCL_EXTERN(int) +ucl_nrv2e_decompress_safe_8 ( const ucl_bytep src, ucl_uint src_len, + ucl_bytep dst, ucl_uintp dst_len, + ucl_voidp wrkmem ); +UCL_EXTERN(int) +ucl_nrv2e_decompress_safe_le16 ( const ucl_bytep src, ucl_uint src_len, + ucl_bytep dst, ucl_uintp dst_len, + ucl_voidp wrkmem ); +UCL_EXTERN(int) +ucl_nrv2e_decompress_safe_le32 ( const ucl_bytep src, ucl_uint src_len, + ucl_bytep dst, ucl_uintp dst_len, + ucl_voidp wrkmem ); + + +/*********************************************************************** +// assembler decompressors [TO BE ADDED] +************************************************************************/ + + +/*********************************************************************** +// test an overlapping in-place decompression within a buffer: +// - try a virtual decompression from &buf[src_off] -> &buf[0] +// - no data is actually written +// - only the bytes at buf[src_off..src_off+src_len-1] will get accessed +// +// NOTE: always pass NULL for `wrkmem' - see above. +************************************************************************/ + +UCL_EXTERN(int) +ucl_nrv2b_test_overlap_8 ( const ucl_bytep buf, ucl_uint src_off, + ucl_uint src_len, ucl_uintp dst_len, + ucl_voidp wrkmem ); +UCL_EXTERN(int) +ucl_nrv2b_test_overlap_le16 ( const ucl_bytep buf, ucl_uint src_off, + ucl_uint src_len, ucl_uintp dst_len, + ucl_voidp wrkmem ); +UCL_EXTERN(int) +ucl_nrv2b_test_overlap_le32 ( const ucl_bytep buf, ucl_uint src_off, + ucl_uint src_len, ucl_uintp dst_len, + ucl_voidp wrkmem ); + +UCL_EXTERN(int) +ucl_nrv2d_test_overlap_8 ( const ucl_bytep buf, ucl_uint src_off, + ucl_uint src_len, ucl_uintp dst_len, + ucl_voidp wrkmem ); +UCL_EXTERN(int) +ucl_nrv2d_test_overlap_le16 ( const ucl_bytep buf, ucl_uint src_off, + ucl_uint src_len, ucl_uintp dst_len, + ucl_voidp wrkmem ); +UCL_EXTERN(int) +ucl_nrv2d_test_overlap_le32 ( const ucl_bytep buf, ucl_uint src_off, + ucl_uint src_len, ucl_uintp dst_len, + ucl_voidp wrkmem ); + +UCL_EXTERN(int) +ucl_nrv2e_test_overlap_8 ( const ucl_bytep buf, ucl_uint src_off, + ucl_uint src_len, ucl_uintp dst_len, + ucl_voidp wrkmem ); +UCL_EXTERN(int) +ucl_nrv2e_test_overlap_le16 ( const ucl_bytep buf, ucl_uint src_off, + ucl_uint src_len, ucl_uintp dst_len, + ucl_voidp wrkmem ); +UCL_EXTERN(int) +ucl_nrv2e_test_overlap_le32 ( const ucl_bytep buf, ucl_uint src_off, + ucl_uint src_len, ucl_uintp dst_len, + ucl_voidp wrkmem ); + + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* already included */ + diff --git a/tools/z64compress/src/enc/ucl/ucl_conf.h b/tools/z64compress/src/enc/ucl/ucl_conf.h new file mode 100644 index 000000000..79f5c6b61 --- /dev/null +++ b/tools/z64compress/src/enc/ucl/ucl_conf.h @@ -0,0 +1,220 @@ +/* ucl_conf.h -- main internal configuration file for the the UCL library + + This file is part of the UCL data compression library. + + Copyright (C) 1996-2004 Markus Franz Xaver Johannes Oberhumer + All Rights Reserved. + + The UCL library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + The UCL library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with the UCL library; see the file COPYING. + If not, write to the Free Software Foundation, Inc., + 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + + Markus F.X.J. Oberhumer + + http://www.oberhumer.com/opensource/ucl/ + */ + + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the library and is subject + to change. + */ + + +#ifndef __UCL_CONF_H +#define __UCL_CONF_H + + +/*********************************************************************** +// +************************************************************************/ + +#if defined(__UCLCONF_H_INCLUDED) +# error "include this file first" +#endif +#include "uclconf.h" + +#if !defined(__UCL_MMODEL_HUGE) && defined(HAVE_MEMCMP) +# define ucl_memcmp(a,b,c) memcmp(a,b,c) +#endif +#if !defined(__UCL_MMODEL_HUGE) && defined(HAVE_MEMCPY) +# define ucl_memcpy(a,b,c) memcpy(a,b,c) +#endif +#if !defined(__UCL_MMODEL_HUGE) && defined(HAVE_MEMMOVE) +# define ucl_memmove(a,b,c) memmove(a,b,c) +#endif +#if !defined(__UCL_MMODEL_HUGE) && defined(HAVE_MEMSET) +# define ucl_memset(a,b,c) memset(a,b,c) +#endif +#if 0 /* WANT_ACC */ +#if defined(UCL_HAVE_CONFIG_H) +# define ACC_CONFIG_NO_HEADER 1 +#endif +#define __ACCLIB_FUNCNAME(f) error_do_not_use_acclib +#include "acc/acc.h" + +#if (ACC_CC_MSC && (_MSC_VER >= 1300)) + /* avoid `-Wall' warnings in system header files */ +# pragma warning(disable: 4820) + /* avoid warnings about inlining */ +# pragma warning(disable: 4710 4711) +#endif + +#if defined(__UCL_MMODEL_HUGE) && (!ACC_HAVE_MM_HUGE_PTR) +# error "this should not happen - check defines for __huge" +#endif + +#if (ACC_OS_DOS16 + 0 != UCL_OS_DOS16 + 0) +# error "DOS16" +#endif +#if (ACC_OS_OS216 + 0 != UCL_OS_OS216 + 0) +# error "OS216" +#endif +#if (ACC_OS_WIN16 + 0 != UCL_OS_WIN16 + 0) +# error "WIN16" +#endif +#if (ACC_OS_DOS32 + 0 != UCL_OS_DOS32 + 0) +# error "DOS32" +#endif +#if (ACC_OS_OS2 + 0 != UCL_OS_OS2 + 0) +# error "DOS32" +#endif +#if (ACC_OS_WIN32 + 0 != UCL_OS_WIN32 + 0) +# error "WIN32" +#endif +#if (ACC_OS_WIN64 + 0 != UCL_OS_WIN64 + 0) +# error "WIN64" +#endif + + +#include "acc/acc_incd.h" +#if (ACC_OS_DOS16 || ACC_OS_OS216 || ACC_OS_WIN16) +# include "acc/acc_ince.h" +# include "acc/acc_inci.h" +#endif + +#undef NDEBUG +#if !defined(UCL_DEBUG) +# define NDEBUG 1 +#endif +#include + + +#if (ACC_OS_DOS16 || ACC_OS_OS216 || ACC_OS_WIN16) && (ACC_CC_BORLANDC) +# if (__BORLANDC__ >= 0x0450) /* v4.00 */ +# pragma option -h /* enable fast huge pointers */ +# else +# pragma option -h- /* disable fast huge pointers - compiler bug */ +# endif +#endif +#endif /* WANT_ACC */ + + +/*********************************************************************** +// +************************************************************************/ + +#if 1 +# define UCL_BYTE(x) ((unsigned char) (x)) +#else +# define UCL_BYTE(x) ((unsigned char) ((x) & 0xff)) +#endif +#if 0 +# define UCL_USHORT(x) ((unsigned short) (x)) +#else +# define UCL_USHORT(x) ((unsigned short) ((x) & 0xffff)) +#endif + +#define UCL_MAX(a,b) ((a) >= (b) ? (a) : (b)) +#define UCL_MIN(a,b) ((a) <= (b) ? (a) : (b)) +#define UCL_MAX3(a,b,c) ((a) >= (b) ? UCL_MAX(a,c) : UCL_MAX(b,c)) +#define UCL_MIN3(a,b,c) ((a) <= (b) ? UCL_MIN(a,c) : UCL_MIN(b,c)) + +#define ucl_sizeof(type) ((ucl_uint) (sizeof(type))) + +#define UCL_HIGH(array) ((ucl_uint) (sizeof(array)/sizeof(*(array)))) + +/* this always fits into 16 bits */ +#define UCL_SIZE(bits) (1u << (bits)) +#define UCL_MASK(bits) (UCL_SIZE(bits) - 1) + +#define UCL_LSIZE(bits) (1ul << (bits)) +#define UCL_LMASK(bits) (UCL_LSIZE(bits) - 1) + +#define UCL_USIZE(bits) ((ucl_uint) 1 << (bits)) +#define UCL_UMASK(bits) (UCL_USIZE(bits) - 1) + +/* Maximum value of a signed/unsigned type. + Do not use casts, avoid overflows ! */ +#define UCL_STYPE_MAX(b) (((1l << (8*(b)-2)) - 1l) + (1l << (8*(b)-2))) +#define UCL_UTYPE_MAX(b) (((1ul << (8*(b)-1)) - 1ul) + (1ul << (8*(b)-1))) + + +/*********************************************************************** +// compiler and architecture specific stuff +************************************************************************/ + +/* Some defines that indicate if memory can be accessed at unaligned + * memory addresses. You should also test that this is actually faster + * even if it is allowed by your system. + */ + +#undef UA_GET2 +#undef UA_SET2 +#undef UA_GET4 +#undef UA_SET4 +#if 1 && (ACC_ARCH_AMD64 || ACC_ARCH_IA32) +# define UA_GET2(p) (* (const ucl_ushortp) (p)) +# define UA_SET2(p) (* (ucl_ushortp) (p)) +# define UA_GET4(p) (* (const acc_uint32e_t *) (p)) +# define UA_SET4(p) (* (acc_uint32e_t *) (p)) +#elif 0 && (ACC_ARCH_M68K) && (ACC_CC_GNUC >= 0x020900ul) + typedef struct { unsigned short v; } __ucl_ua2_t __attribute__((__aligned__(1))); + typedef struct { unsigned long v; } __ucl_ua4_t __attribute__((__aligned__(1))); +# define UA_GET2(p) (((const __ucl_ua2_t *)(p))->v) +# define UA_SET2(p) (((__ucl_ua2_t *)(p))->v) +# define UA_GET4(p) (((const __ucl_ua4_t *)(p))->v) +# define UA_SET4(p) (((__ucl_ua4_t *)(p))->v) +#endif + + +/*********************************************************************** +// some globals +************************************************************************/ + +__UCL_EXTERN_C int __ucl_init_done; +UCL_EXTERN(const ucl_bytep) ucl_copyright(void); + + +/*********************************************************************** +// ANSI C preprocessor macros +************************************************************************/ + +#define _UCL_STRINGIZE(x) #x +#define _UCL_MEXPAND(x) _UCL_STRINGIZE(x) + + +/*********************************************************************** +// +************************************************************************/ + +//#include "ucl_ptr.h" + + +#endif /* already included */ + +/* +vi:ts=4:et +*/ + diff --git a/tools/z64compress/src/enc/ucl/uclconf.h b/tools/z64compress/src/enc/ucl/uclconf.h new file mode 100644 index 000000000..ab18ca173 --- /dev/null +++ b/tools/z64compress/src/enc/ucl/uclconf.h @@ -0,0 +1,490 @@ +/* uclconf.h -- configuration for the UCL data compression library + + This file is part of the UCL data compression library. + + Copyright (C) 2004 Markus Franz Xaver Johannes Oberhumer + Copyright (C) 2003 Markus Franz Xaver Johannes Oberhumer + Copyright (C) 2002 Markus Franz Xaver Johannes Oberhumer + Copyright (C) 2001 Markus Franz Xaver Johannes Oberhumer + Copyright (C) 2000 Markus Franz Xaver Johannes Oberhumer + Copyright (C) 1999 Markus Franz Xaver Johannes Oberhumer + Copyright (C) 1998 Markus Franz Xaver Johannes Oberhumer + Copyright (C) 1997 Markus Franz Xaver Johannes Oberhumer + Copyright (C) 1996 Markus Franz Xaver Johannes Oberhumer + All Rights Reserved. + + The UCL library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + The UCL library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with the UCL library; see the file COPYING. + If not, write to the Free Software Foundation, Inc., + 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + + Markus F.X.J. Oberhumer + + http://www.oberhumer.com/opensource/ucl/ + */ + + +#ifndef __UCLCONF_H_INCLUDED +#define __UCLCONF_H_INCLUDED + +#define UCL_VERSION 0x010300L +#define UCL_VERSION_STRING "1.03" +#define UCL_VERSION_DATE "Jul 20 2004" + +/* internal Autoconf configuration file - only used when building UCL */ +#if defined(UCL_HAVE_CONFIG_H) +# include +#endif +#include + +#ifdef __cplusplus +extern "C" { +#endif + + + +/*********************************************************************** +// UCL requires a conforming +************************************************************************/ + +#if !defined(CHAR_BIT) || (CHAR_BIT != 8) +# error "invalid CHAR_BIT" +#endif +#if !defined(UCHAR_MAX) || !defined(UINT_MAX) || !defined(ULONG_MAX) +# error "check your compiler installation" +#endif +#if (USHRT_MAX < 1) || (UINT_MAX < 1) || (ULONG_MAX < 1) +# error "your limits.h macros are broken" +#endif + +/* workaround a compiler bug under hpux 10.20 */ +#define UCL_0xffffL 65535ul +#define UCL_0xffffffffL 4294967295ul + +#if !defined(UCL_UINT32_C) +# if (UINT_MAX < UCL_0xffffffffL) +# define UCL_UINT32_C(c) c ## UL +# else +# define UCL_UINT32_C(c) ((c) + 0U) +# endif +#endif + + +/*********************************************************************** +// architecture defines +************************************************************************/ + +#if (defined(__CYGWIN__) || defined(__CYGWIN32__)) && defined(__GNUC__) +# define UCL_OS_CYGWIN 1 +#elif defined(__EMX__) && defined(__GNUC__) +# define UCL_OS_EMX 1 +#elif defined(__BORLANDC__) && defined(__DPMI32__) && (__BORLANDC__ >= 0x0460) +# define UCL_OS_DOS32 1 +#elif defined(__BORLANDC__) && defined(__DPMI16__) +# define UCL_OS_DOS16 1 +#elif defined(__ZTC__) && defined(DOS386) +# define UCL_OS_DOS32 1 +#elif defined(__OS2__) || defined(__OS2V2__) +# if (UINT_MAX == UCL_0xffffL) +# define UCL_OS_OS216 1 +# elif (UINT_MAX == UCL_0xffffffffL) +# define UCL_OS_OS2 1 +# else +# error "check your limits.h header" +# endif +#elif defined(__WIN64__) || defined(_WIN64) || defined(WIN64) +# define UCL_OS_WIN64 1 +#elif defined(__WIN32__) || defined(_WIN32) || defined(WIN32) || defined(__WINDOWS_386__) +# define UCL_OS_WIN32 1 +#elif defined(__MWERKS__) && defined(__INTEL__) +# define UCL_OS_WIN32 1 +#elif defined(__WINDOWS__) || defined(_WINDOWS) || defined(_Windows) +# if (UINT_MAX == UCL_0xffffL) +# define UCL_OS_WIN16 1 +# elif (UINT_MAX == UCL_0xffffffffL) +# define UCL_OS_WIN32 1 +# else +# error "check your limits.h header" +# endif +#elif defined(__DOS__) || defined(__MSDOS__) || defined(_MSDOS) || defined(MSDOS) || (defined(__PACIFIC__) && defined(DOS)) +# if (UINT_MAX == UCL_0xffffL) +# define UCL_OS_DOS16 1 +# elif (UINT_MAX == UCL_0xffffffffL) +# define UCL_OS_DOS32 1 +# else +# error "check your limits.h header" +# endif +#elif defined(__WATCOMC__) +# if defined(__NT__) && (UINT_MAX == UCL_0xffffL) + /* wcl: NT host defaults to DOS target */ +# define UCL_OS_DOS16 1 +# elif defined(__NT__) && (__WATCOMC__ < 1100) + /* wcl386: Watcom C 11 defines _WIN32 */ +# define UCL_OS_WIN32 1 +# else +# error "please specify a target using the -bt compiler option" +# endif +#elif defined(__palmos__) +# if (UINT_MAX == UCL_0xffffL) +# define UCL_OS_PALMOS 1 +# else +# error "check your limits.h header" +# endif +#elif defined(__TOS__) || defined(__atarist__) +# define UCL_OS_TOS 1 +#elif defined(macintosh) +# define UCL_OS_MACCLASSIC 1 +#elif defined(__VMS) +# define UCL_OS_VMS 1 +#else +# define UCL_OS_POSIX 1 +#endif + +/* memory checkers */ +#if !defined(__UCL_CHECKER) +# if defined(__BOUNDS_CHECKING_ON) +# define __UCL_CHECKER 1 +# elif defined(__CHECKER__) +# define __UCL_CHECKER 1 +# elif defined(__INSURE__) +# define __UCL_CHECKER 1 +# elif defined(__PURIFY__) +# define __UCL_CHECKER 1 +# endif +#endif + +/* fix ancient compiler versions */ +#if (UINT_MAX == UCL_0xffffL) +#if (defined(__MSDOS__) && defined(__TURBOC__) && (__TURBOC__ < 0x0410)) || (defined(MSDOS) && defined(_MSC_VER) && (_MSC_VER < 700)) +# if !defined(__cdecl) +# define __cdecl cdecl +# endif +# if !defined(__far) +# define __far far +# endif +# if !defined(__huge) +# define __huge huge +# endif +# if !defined(__near) +# define __near near +# endif +#endif +#endif + + +/*********************************************************************** +// integral and pointer types +************************************************************************/ + +/* Integral types with 32 bits or more */ +#if !defined(UCL_UINT32_MAX) +# if (UINT_MAX >= UCL_0xffffffffL) + typedef unsigned int ucl_uint32; + typedef int ucl_int32; +# define UCL_UINT32_MAX UINT_MAX +# define UCL_INT32_MAX INT_MAX +# define UCL_INT32_MIN INT_MIN +# elif (ULONG_MAX >= UCL_0xffffffffL) + typedef unsigned long ucl_uint32; + typedef long ucl_int32; +# define UCL_UINT32_MAX ULONG_MAX +# define UCL_INT32_MAX LONG_MAX +# define UCL_INT32_MIN LONG_MIN +# else +# error "ucl_uint32" +# endif +#endif + +/* ucl_uint is used like size_t */ +#if !defined(UCL_UINT_MAX) +# if (UINT_MAX >= UCL_0xffffffffL) + typedef unsigned int ucl_uint; + typedef int ucl_int; +# define UCL_UINT_MAX UINT_MAX +# define UCL_INT_MAX INT_MAX +# define UCL_INT_MIN INT_MIN +# elif (ULONG_MAX >= UCL_0xffffffffL) + typedef unsigned long ucl_uint; + typedef long ucl_int; +# define UCL_UINT_MAX ULONG_MAX +# define UCL_INT_MAX LONG_MAX +# define UCL_INT_MIN LONG_MIN +# else +# error "ucl_uint" +# endif +#endif + +/* Memory model that allows to access memory at offsets of ucl_uint. */ +#if !defined(__UCL_MMODEL) +# if (UCL_UINT_MAX <= UINT_MAX) +# define __UCL_MMODEL +# elif defined(UCL_OS_DOS16) || defined(UCL_OS_OS216) || defined(UCL_OS_WIN16) +# define __UCL_MMODEL_HUGE 1 +# define __UCL_MMODEL __huge +# define ucl_uintptr_t unsigned long +# else +# define __UCL_MMODEL +# endif +#endif + +/* no typedef here because of const-pointer issues */ +#define ucl_bytep unsigned char __UCL_MMODEL * +#define ucl_charp char __UCL_MMODEL * +#define ucl_voidp void __UCL_MMODEL * +#define ucl_shortp short __UCL_MMODEL * +#define ucl_ushortp unsigned short __UCL_MMODEL * +#define ucl_uint32p ucl_uint32 __UCL_MMODEL * +#define ucl_int32p ucl_int32 __UCL_MMODEL * +#define ucl_uintp ucl_uint __UCL_MMODEL * +#define ucl_intp ucl_int __UCL_MMODEL * +#define ucl_voidpp ucl_voidp __UCL_MMODEL * +#define ucl_bytepp ucl_bytep __UCL_MMODEL * +/* deprecated - use `ucl_bytep' instead of `ucl_byte *' */ +#define ucl_byte unsigned char __UCL_MMODEL + +typedef int ucl_bool; + + +/*********************************************************************** +// function types +************************************************************************/ + +/* name mangling */ +#if !defined(__UCL_EXTERN_C) +# ifdef __cplusplus +# define __UCL_EXTERN_C extern "C" +# else +# define __UCL_EXTERN_C extern +# endif +#endif + +/* calling convention */ +#if !defined(__UCL_CDECL) +# if defined(__GNUC__) || defined(__HIGHC__) || defined(__NDPC__) +# define __UCL_CDECL +# elif defined(UCL_OS_DOS16) || defined(UCL_OS_OS216) || defined(UCL_OS_WIN16) +# define __UCL_CDECL __far __cdecl +# elif defined(UCL_OS_DOS32) || defined(UCL_OS_OS2) || defined(UCL_OS_WIN32) || defined(UCL_OS_WIN64) +# define __UCL_CDECL __cdecl +# else +# define __UCL_CDECL +# endif +#endif + +/* DLL export information */ +#if !defined(__UCL_EXPORT1) +# define __UCL_EXPORT1 +#endif +#if !defined(__UCL_EXPORT2) +# define __UCL_EXPORT2 +#endif + +/* __cdecl calling convention for public C and assembly functions */ +#if !defined(UCL_PUBLIC) +# define UCL_PUBLIC(_rettype) __UCL_EXPORT1 _rettype __UCL_EXPORT2 __UCL_CDECL +#endif +#if !defined(UCL_EXTERN) +# define UCL_EXTERN(_rettype) __UCL_EXTERN_C UCL_PUBLIC(_rettype) +#endif +#if !defined(UCL_PRIVATE) +# define UCL_PRIVATE(_rettype) static _rettype __UCL_CDECL +#endif + +/* C++ exception specification for extern "C" function types */ +#if !defined(__cplusplus) +# undef UCL_NOTHROW +# define UCL_NOTHROW +#elif !defined(UCL_NOTHROW) +# define UCL_NOTHROW +#endif + +/* function types */ +typedef int +(__UCL_CDECL *ucl_compress_t) ( const ucl_bytep src, ucl_uint src_len, + ucl_bytep dst, ucl_uintp dst_len, + ucl_voidp wrkmem ); + +typedef int +(__UCL_CDECL *ucl_decompress_t) ( const ucl_bytep src, ucl_uint src_len, + ucl_bytep dst, ucl_uintp dst_len, + ucl_voidp wrkmem ); + +typedef int +(__UCL_CDECL *ucl_optimize_t) ( ucl_bytep src, ucl_uint src_len, + ucl_bytep dst, ucl_uintp dst_len, + ucl_voidp wrkmem ); + +typedef int +(__UCL_CDECL *ucl_compress_dict_t)(const ucl_bytep src, ucl_uint src_len, + ucl_bytep dst, ucl_uintp dst_len, + ucl_voidp wrkmem, + const ucl_bytep dict, ucl_uint dict_len ); + +typedef int +(__UCL_CDECL *ucl_decompress_dict_t)(const ucl_bytep src, ucl_uint src_len, + ucl_bytep dst, ucl_uintp dst_len, + ucl_voidp wrkmem, + const ucl_bytep dict, ucl_uint dict_len ); + +/* a progress indicator callback function */ +typedef struct +{ + void (__UCL_CDECL *callback) (ucl_uint, ucl_uint, int, ucl_voidp); + ucl_voidp user; +} +ucl_progress_callback_t; +#define ucl_progress_callback_p ucl_progress_callback_t __UCL_MMODEL * + + +/*********************************************************************** +// error codes and prototypes +************************************************************************/ + +/* Error codes for the compression/decompression functions. Negative + * values are errors, positive values will be used for special but + * normal events. + */ +#define UCL_E_OK 0 +#define UCL_E_ERROR (-1) +#define UCL_E_INVALID_ARGUMENT (-2) +#define UCL_E_OUT_OF_MEMORY (-3) +/* compression errors */ +#define UCL_E_NOT_COMPRESSIBLE (-101) +/* decompression errors */ +#define UCL_E_INPUT_OVERRUN (-201) +#define UCL_E_OUTPUT_OVERRUN (-202) +#define UCL_E_LOOKBEHIND_OVERRUN (-203) +#define UCL_E_EOF_NOT_FOUND (-204) +#define UCL_E_INPUT_NOT_CONSUMED (-205) +#define UCL_E_OVERLAP_OVERRUN (-206) + + +/* ucl_init() should be the first function you call. + * Check the return code ! + * + * ucl_init() is a macro to allow checking that the library and the + * compiler's view of various types are consistent. + */ +#define ucl_init() __ucl_init2(UCL_VERSION,(int)sizeof(short),(int)sizeof(int),\ + (int)sizeof(long),(int)sizeof(ucl_uint32),(int)sizeof(ucl_uint),\ + (int)-1,(int)sizeof(char *),(int)sizeof(ucl_voidp),\ + (int)sizeof(ucl_compress_t)) +UCL_EXTERN(int) __ucl_init2(ucl_uint32,int,int,int,int,int,int,int,int,int); + +/* version functions (useful for shared libraries) */ +UCL_EXTERN(ucl_uint32) ucl_version(void); +UCL_EXTERN(const char *) ucl_version_string(void); +UCL_EXTERN(const char *) ucl_version_date(void); +UCL_EXTERN(const ucl_charp) _ucl_version_string(void); +UCL_EXTERN(const ucl_charp) _ucl_version_date(void); + +/* string functions */ +UCL_EXTERN(int) +ucl_memcmp(const ucl_voidp _s1, const ucl_voidp _s2, ucl_uint _len); +UCL_EXTERN(ucl_voidp) +ucl_memcpy(ucl_voidp _dest, const ucl_voidp _src, ucl_uint _len); +UCL_EXTERN(ucl_voidp) +ucl_memmove(ucl_voidp _dest, const ucl_voidp _src, ucl_uint _len); +UCL_EXTERN(ucl_voidp) +ucl_memset(ucl_voidp _s, int _c, ucl_uint _len); + +/* checksum functions */ +UCL_EXTERN(ucl_uint32) +ucl_adler32(ucl_uint32 _adler, const ucl_bytep _buf, ucl_uint _len); +UCL_EXTERN(ucl_uint32) +ucl_crc32(ucl_uint32 _c, const ucl_bytep _buf, ucl_uint _len); +UCL_EXTERN(const ucl_uint32p) +ucl_get_crc32_table(void); + +/* memory allocation hooks */ +typedef ucl_voidp (__UCL_CDECL *ucl_malloc_hook_t) (ucl_uint); +typedef void (__UCL_CDECL *ucl_free_hook_t) (ucl_voidp); +UCL_EXTERN(void) +ucl_set_malloc_hooks(ucl_malloc_hook_t, ucl_free_hook_t); +UCL_EXTERN(void) +ucl_get_malloc_hooks(ucl_malloc_hook_t*, ucl_free_hook_t*); + +#ifndef UCL_SAFE_ALLOC +#define UCL_SAFE_ALLOC 1 +#include +#include + +/* safe calloc */ +static +inline +void * +calloc_safe(size_t nmemb, size_t size) +{ + void *result; + + result = calloc(nmemb, size); + + if (!result) + { + fprintf(stderr, "[!] memory error\n"); + exit(EXIT_FAILURE); + } + + return result; +} + +/* safe malloc */ +static +inline +void * +malloc_safe(size_t size) +{ + void *result; + + result = malloc(size); + + if (!result) + { + fprintf(stderr, "[!] memory error\n"); + exit(EXIT_FAILURE); + } + + return result; +} +#endif /* UCL_SAFE_ALLOC */ + +/* memory allocation functions */ +#if 0 +UCL_EXTERN(ucl_voidp) ucl_malloc(ucl_uint); +UCL_EXTERN(ucl_voidp) ucl_alloc(ucl_uint, ucl_uint); +UCL_EXTERN(void) ucl_free(ucl_voidp); +#else +# define ucl_malloc(a) (malloc_safe(a)) +# define ucl_alloc(a, b) (calloc_safe(a, b)) +# define ucl_free(a) (free(a)) +#endif + + +/* misc. */ +UCL_EXTERN(ucl_bool) ucl_assert(int _expr); +UCL_EXTERN(int) _ucl_config_check(void); +typedef union { ucl_bytep p; ucl_uint u; } __ucl_pu_u; +typedef union { ucl_bytep p; ucl_uint32 u32; } __ucl_pu32_u; + +/* align a char pointer on a boundary that is a multiple of `size' */ +UCL_EXTERN(unsigned) __ucl_align_gap(const ucl_voidp _ptr, ucl_uint _size); +#define UCL_PTR_ALIGN_UP(_ptr,_size) \ + ((_ptr) + (ucl_uint) __ucl_align_gap((const ucl_voidp)(_ptr),(ucl_uint)(_size))) + + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* already included */ + diff --git a/tools/z64compress/src/enc/yar.c b/tools/z64compress/src/enc/yar.c new file mode 100644 index 000000000..823b3425d --- /dev/null +++ b/tools/z64compress/src/enc/yar.c @@ -0,0 +1,450 @@ +/* yar.c: decode and encode MM yaz archives */ + +#include +#include +#include +#include + +#define FERR(x) { \ + fprintf(stderr, x); \ + fprintf(stderr, "\n"); \ + exit(EXIT_FAILURE); \ +} + +/* surely an archive won't exceed 64 MB */ +#define YAR_MAX (1024 * 1024 * 64) + +/* align out address before writing compressed file */ +#define FILE_ALIGN \ + while ((outSz % align)) \ + { \ + out[outSz] = 0; \ + outSz += 1; \ + } + +struct yarFile +{ + int idx; /* original index in list */ + int ofs; /* global offset of file */ +}; + +static +unsigned int +u32b(void *src) +{ + unsigned char *arr = src; + + return (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3]; +} + +static +void +u32wr(void *dst, unsigned int src) +{ + unsigned char *arr = dst; + + arr[0] = src >> 24; + arr[1] = src >> 16; + arr[2] = src >> 8; + arr[3] = src; +} + +static +void +progress(const char *name, int progress, int end) +{ + fprintf( + stderr + , "\r""repacking '%s' %d/%d: " + , name + , progress + , end + ); +} + +/* reencode yar archive */ +/* returns 0 on success, pointer to error message otherwise */ +char * +yar_reencode( + unsigned char *src /* source archive */ + , unsigned int sz /* source archive size */ + , unsigned char *dst /* destination archive */ + , unsigned int *dst_sz /* destination archive size */ + , int align /* compressed file alignment */ + + , const char *name /* name of archive (0 = hide progress) */ + , const char *codec /* the expected encoding header "Yaz0" */ + , void *imm /* intermediate buffer for conversion */ + , void *ctx /* compression context (if applicable) */ + + /* decompresses file; return non-zero on fail; optional + * if files are already decompressed (up to user to know) + */ + , int decode(void *src, void *dst, unsigned dstSz, unsigned *srcSz) + + /* compress file; returns non-zero on fail; optional if + * files are desired to be left decompressed + */ + , int encode(void *src, unsigned srcSz, void *dst, unsigned *dstSz, void *ctx) + + /* test if file has been previously encoded; optional */ + , int exist(void *src, unsigned srcSz, void *dst, unsigned *dstSz) +) +{ + unsigned char *ss; + unsigned char *out; + unsigned int end; + unsigned int end_out; + unsigned int outSz = 0; + int progress_end; + struct yarFile *list = 0; + struct yarFile *item; + int list_num; + + assert(src); + assert(sz); + assert(dst_sz); + assert(dst); + assert(align >= 0); + assert((align & 3) == 0); /* cannot have alignment smaller than 4 */ + + out = dst; + + end = 0; + + ss = src; + item = list; + do + { + unsigned ofs; + unsigned uncompSz; + unsigned OG_encSz; + unsigned char *b; + + ofs = u32b(ss) + end; + + /* first entry points to end of list, and first file */ + if (!end) + { + end = ofs; + outSz = end; + + /* allocate file list */ + list_num = (end / 4) + 1; + list = calloc(list_num, sizeof(*list)); + if (!list) + return "memory error"; + item = list; + + FILE_ALIGN + + /* output file may be aligned differently */ + end_out = outSz; + + progress_end = end / 4; + } + + /* b now points to compressed file */ + b = src + ofs; + + /* update progress display */ + if (name) + progress(name, (ss - src) / 4, progress_end); + + /* there should be room for 4-byte codec and 4-byte size */ + if (b + 4 >= src + sz) + break; + + /* decompressed file size is second word */ + uncompSz = u32b(b + 4); + + /* yaz-encoded file */ + if (!memcmp(b, codec, 4)) + { + unsigned char *fout = out + outSz; + unsigned encSz; + + /* user doesn't want encoded data */ + if (!encode) + { + imm = fout; + encSz = uncompSz; + } + + /* decode 'b' only if user provided decoder */ + if (decode) + { + if (decode(b, imm, uncompSz, &OG_encSz)) + return "decoder error"; + } + /* if no decoder is provided, direct copy */ + else + memcpy(imm, b + 0x10, uncompSz); + + /* encode only if user wants that */ + if (encode) + { + /* if no exist function has been provided, or + * if it hasn't been encoded yet, encode it + */ + if (!exist || !exist(imm, uncompSz, fout, &encSz)) + { + if (encode(imm, uncompSz, fout, &encSz, ctx)) + return "encoder error"; + } + } + + /* point current entry to new file location */ + if (ss > src) + u32wr(out + (ss - src), outSz - end_out); + + /* first entry follows different rules */ + else + u32wr(out + (ss - src), end_out); + + /* advance out_sz to immediately after current file */ + outSz += encSz; + + /* align output */ + FILE_ALIGN + } + + /* end of list */ + else if (u32b(b) == 0) + break; + + /* unknown codec */ + else + { + char *errmsg = (char*)out; + char srep[16]; + sprintf(srep, "%08x", u32b(b)); + sprintf( + errmsg + , "unknown codec 0x%s encountered at %08X!\n" + , srep + , ofs + ); + return errmsg; + } + + ss += 4; + item += 1; + + } while (ss - src < end); + + /* update progress display */ + if (name) + progress(name, progress_end, progress_end); + + /* point final entry to end (four 00 bytes) */ + u32wr(out + (ss - src), outSz - end_out); + memset(out + outSz, 0, 16); + outSz += 4; + + /* in case list end changed due to padding, make multiple * + * end-of-list markers throughout the alignment space */ + if (end_out > end) + { + unsigned i; + unsigned last = u32b(out + (end - 4)); + for (i = 0; i < (end_out - end) / 4; ++i) + { + u32wr(out + end + i * 4, last); + } + } + + /* align final output size to 16 */ + if (outSz & 15) + outSz += 16 - (outSz & 15); + + /* if new file was constructed, note its size */ + *dst_sz = outSz; + + /* cleanup */ + free(list); + + /* success */ + return 0; +} + +#ifdef YAR_MAIN_TEST +/* + * + * usage example (writes decompressed archive) + * + */ + +/* yaz decoder, courtesy of spinout182 */ +static +int spinout_yaz_dec(void *_src, void *_dst, int dstSz) +{ + unsigned char *src = _src; + unsigned char *dst = _dst; + + int srcPlace = 0, dstPlace = 0; /*current read/write positions*/ + + unsigned int validBitCount = 0; /*number of valid bits left in "code" byte*/ + unsigned char currCodeByte = 0; + + int uncompressedSize = dstSz; + + src += 0x10; + + while(dstPlace < uncompressedSize) + { + /*read new "code" byte if the current one is used up*/ + if(!validBitCount) + { + currCodeByte = src[srcPlace]; + ++srcPlace; + validBitCount = 8; + } + + if(currCodeByte & 0x80) + { + /*direct copy*/ + dst[dstPlace] = src[srcPlace]; + dstPlace++; + srcPlace++; + } + else + { + /*RLE part*/ + unsigned char byte1 = src[srcPlace]; + unsigned char byte2 = src[srcPlace + 1]; + srcPlace += 2; + + unsigned int dist = ((byte1 & 0xF) << 8) | byte2; + unsigned int copySource = dstPlace - (dist + 1); + + unsigned int numBytes = byte1 >> 4; + if(numBytes) + numBytes += 2; + else + { + numBytes = src[srcPlace] + 0x12; + srcPlace++; + } + + /*copy run*/ + int i; + for(i = 0; i < numBytes; ++i) + { + dst[dstPlace] = dst[copySource]; + copySource++; + dstPlace++; + } + } + + /*use next bit from "code" byte*/ + currCodeByte <<= 1; + validBitCount-=1; + } + + return 0; +} + + +/* encodes decompressed data, storing result in dst */ +static +int encode(void *src, int srcSz, void *_dst, int *dstSz, void *ctx) +{ + unsigned char *dst = _dst; + +/* header */ + /* codec */ + memcpy(dst, "raw0", 4); + + /* decompressed size */ + u32wr(dst + 4, srcSz); + + /* 8 more bytes of padding */ + memset(dst + 8, 0, 8); + +/* contents */ + /* direct copy (data left unencoded; you could encode here though) */ + memcpy(dst + 0x10, src, srcSz); + *dstSz = srcSz + 0x10; + + return 0; +} + +/* checks if data has already been encoded */ +/* if it does, dst is filled with that data and 1 is returned */ +/* 0 is returned otherwise */ +static +int exist(void *src, int srcSz, void *dst, int *dstSz) +{ + return 0; +} + +/* unsafe but it's a test program so it's fine */ +static +unsigned char * +file_read(char *fn, unsigned *sz) +{ + FILE *fp; + unsigned char *raw; + + assert(fn); + assert(sz); + + fp = fopen(fn, "rb"); + if (!fp) + FERR("failed to open file for reading"); + + fseek(fp, 0, SEEK_END); + *sz = ftell(fp); + + if (!sz) + FERR("read file size == 0"); + + fseek(fp, 0, SEEK_SET); + raw = malloc(*sz); + if (!raw) + FERR("memory error"); + + if (fread(raw, 1, *sz, fp) != *sz) + FERR("file read error"); + + fclose(fp); + return raw; +} + +int main(int argc, char *argv[]) +{ + unsigned char *raw; + unsigned int raw_sz; + + unsigned char *out; + unsigned char *imm; + unsigned int out_sz = 0; + + if (argc != 2) + FERR("args: unyar in.yar > out.yar"); + + raw = file_read(argv[1], &raw_sz); + fprintf(stderr, "input file %s:\n", argv[1]); + + /* surely an archive won't exceed 64 MB */ + out = malloc(1024 * 1024 * 64); + imm = malloc(1024 * 1024 * 64); + + yar_reencode( + raw, raw_sz, out, &out_sz, 12, "Yaz0", imm + , spinout_yaz_dec + , encode + , exist + ); + + /* write output to stdout */ + fwrite(out, 1, out_sz, stdout); + + free(raw); + free(out); + free(imm); +} + +#endif /* YAR_MAIN_TEST */ + diff --git a/tools/z64compress/src/enc/yar.h b/tools/z64compress/src/enc/yar.h new file mode 100644 index 000000000..08890949e --- /dev/null +++ b/tools/z64compress/src/enc/yar.h @@ -0,0 +1,36 @@ +/* yar.c: decode and encode MM yaz archives */ + +#ifndef Z64YAR_H_INCLUDED +#define Z64YAR_H_INCLUDED + +/* reencode yar archive */ +/* returns 0 on success, pointer to error message otherwise */ +char * +yar_reencode( + unsigned char *src /* source archive */ + , unsigned int sz /* source archive size */ + , unsigned char *dst /* destination archive */ + , unsigned int *dst_sz /* destination archive size */ + , int align /* compressed file alignment */ + + , const char *name /* name of archive (0 = hide progress) */ + , const char *codec /* the expected encoding header "Yaz0" */ + , void *imm /* intermediate buffer for conversion */ + , void *ctx /* compression context (if applicable) */ + + /* decompresses file; return non-zero on fail; optional + * if files are already decompressed (up to user to know) + */ + , int decode(void *src, void *dst, unsigned dstSz, unsigned *srcSz) + + /* compress file; returns non-zero on fail; optional if + * files are desired to be left decompressed + */ + , int encode(void *src, unsigned srcSz, void *dst, unsigned *dstSz, void *ctx) + + /* test if file has been previously encoded; optional */ + , int exist(void *src, unsigned srcSz, void *dst, unsigned *dstSz) +); + +#endif /* Z64YAR_H_INCLUDED */ + diff --git a/tools/z64compress/src/enc/yaz.c b/tools/z64compress/src/enc/yaz.c new file mode 100644 index 000000000..1cd9b6892 --- /dev/null +++ b/tools/z64compress/src/enc/yaz.c @@ -0,0 +1,470 @@ +#include +#include +#include +#include +#include "stretchy_buffer.h" + +struct yazCtx +{ + uint16_t *c; + uint32_t *cmds; + uint16_t *ctrl; + uint8_t *raws; + uint8_t *ctl; + uint8_t *back; + int *return_data; +}; + +void yazCtx_free(void *_ctx) +{ + struct yazCtx *ctx = _ctx; + + if (!ctx) + return; + + free(ctx->return_data); + sb_free(ctx->c); + sb_free(ctx->raws); + sb_free(ctx->ctrl); + sb_free(ctx->cmds); + sb_free(ctx->ctl); + sb_free(ctx->back); +} + +void *yazCtx_new(void) +{ + struct yazCtx *ctx = calloc(1, sizeof(*ctx)); + + if (!ctx) + return 0; + + /* allocate everything */ + ctx->c = sb_add(ctx->c, 32); + ctx->return_data = malloc(2 * sizeof(*ctx->return_data)); + ctx->raws = sb_add(ctx->raws, 32); + ctx->ctrl = sb_add(ctx->ctrl, 32); + ctx->cmds = sb_add(ctx->cmds, 32); + ctx->ctl = sb_add(ctx->ctl , 32); + ctx->back = sb_add(ctx->back, 32); + + return ctx; +} + +// MIO0 encoding +#define MIx 0 + +#define min(MINA, MINB) ( ( (MINA)<(MINB) ) ? (MINA) : (MINB) ) +#define max(MAXA, MAXB) ( ( (MAXA)>(MAXB) ) ? (MAXA) : (MAXB) ) + +#define U32b(u32X) ((u32X)[0]<<24|(u32X)[1]<<16|(u32X)[2]<<8|(u32X)[3]) +#define U16b(u32X) ((u32X)[0]<<8|(u32X)[1]) +#define U32wr(u32DST,u32SRC) (*(u32DST+0))=((u32SRC)>>24)&0xFF,\ + (*(u32DST+1))=((u32SRC)>>16)&0xFF,\ + (*(u32DST+2))=((u32SRC)>>8)&0xFF,\ + (*(u32DST+3))=((u32SRC)>>0)&0xFF +#define U16wr(u16DST,u16SRC) (*(u16DST+0))=((u16SRC)>>8)&0xFF,\ + (*(u16DST+1))=((u16SRC)>>0)&0xFF + +static uint16_t *_enc_next_cpy(struct yazCtx *ctx, uint8_t *back) { + stb__sbn(ctx->c)=0; // initialize count to 0 + int x; + for (x=0; x < (sb_count(back) & (0xFFFFFFFE)); x += 2) { + sb_push(ctx->c, (back[x]<<8) | back[x+1]); + } + return ctx->c; +} + +static uint32_t _enc_z_from_tables(struct yazCtx *ctx, uint8_t *ctl, uint8_t *back, uint8_t *values, uint8_t *output, int dec_s, const char *mode) { + //_enc_next_cpy(NULL); + uint8_t *b=ctl, *v=values; + uint16_t *c = _enc_next_cpy(ctx, back); + uint32_t bit=0x10000, output_position=0; + // if dec_s declared, will keep accurate track + while (dec_s > 0) { + // get next bit + if (bit > 0xFFFF) { + bit = b[0]; + b++; + output[output_position++] = bit & 0xFF; + bit |= 0x100; + } + // catch end of control commands + if (bit & 0x80) { + output[output_position++] = v[0]; + v++; + dec_s--; + } else { + uint16_t val=c[0]; + c++; + output[output_position++] = ((val>>8)&0xFF); + output[output_position++] = ((val)&0xFF); + + // decrement dec_s accurately with length + val>>=12; + val&=0xF; + if(MIx) + dec_s--; + else if(val==0) { + val = v[0]; + v++; + output[output_position++]=val; + val+=16; + } + dec_s -= val+2; + } + bit <<= 1; + } + return output_position; +} + +static int _enc_find(struct yazCtx *ctx, uint8_t *array, uint8_t *needle, int needle_len, int start_index, int source_length) { + while(start_index < (source_length - needle_len + 1)) { + int r, index = -1; + for(r=start_index; r < (source_length - needle_len + 1); r++) { + if(array[r]==needle[0]) { + index=r; + break; + } + } + + // if we did not find even the first element, the search has failed + if (index == -1) + return -1; + + int i, p; + // check for needle + for (i = 0, p = index; i < needle_len; i++, p++) { + if (array[p] != needle[i]) + break; + } + if(i==needle_len) { + // needle was found + return index; + } + // continue to search for needle + start_index = index + 1; + } + return -1; +} + +static int *_enc_search(struct yazCtx *ctx, uint8_t *data, uint32_t pos, uint32_t sz, uint32_t cap/*=0x111*/) { + int *return_data = ctx->return_data; + // this is necessary unless pos is signed, so let's play it safe + int mp = (pos>0x1000)?(pos-0x1000):0; + int ml = min(cap, sz - pos); + if(ml<3) { + return_data[0]=return_data[1]=0; + return return_data; + } + int + hitp = 0, + hitl = 3, + hl = -1 + ; + + if (mp < pos) { + hl = _enc_find(ctx, data+mp, data+pos, hitl, 0, pos + hitl - mp); + while (hl < (pos - mp)) { + while ((hitl < ml) && (data[pos + hitl] == data[mp + hl + hitl]) ) { + hitl += 1; + } + mp += hl; + hitp = mp; + if (hitl == ml) { + return_data[0] = hitp; + return_data[1] = hitl; + return return_data; + } + mp += 1; + hitl += 1; + if (mp >= pos) + break; + hl = _enc_find(ctx, data+mp, data+pos, hitl, 0, pos + hitl - mp); + } + } + + // if length < 4, return miss + if (hitl < 4) + hitl = 1; + + return_data[0] = hitp; + return_data[1] = hitl-1; + return return_data; +} + +static +uint32_t encode(struct yazCtx *ctx, uint8_t *data, uint32_t data_size, uint8_t *output, const char *mode) { + uint32_t + cap=0x111, + sz=data_size, + pos=0, + flag=0x80000000 + ; + // initialize count of each to 0 + stb__sbn(ctx->raws)=0; + stb__sbn(ctx->ctrl)=0; + stb__sbn(ctx->cmds)=0; + + sb_push(ctx->cmds, 0); + + if(data_size==0) { + memcpy(output, mode, 4); + int i; + for(i=4; i<16; i++) + output[i]=0x00; + return 16; + } + while(posraws, data[pos]); + ctx->cmds[sb_count(ctx->cmds)-1] |= flag; + pos += 1; + } else { + search_return = _enc_search(ctx, data, pos+1, sz, cap); + int tstp = search_return[0]; + int tstl = search_return[1]; + + if ((hitl + 1) < tstl) { + sb_push(ctx->raws, data[pos]); + ctx->cmds[sb_count(ctx->cmds)-1] |= flag; + pos += 1; + flag >>= 1; + if (flag == 0) { + flag = 0x80000000; + sb_push(ctx->cmds, 0); + } + hitl = tstl; + hitp = tstp; + } + int e = pos - hitp - 1; + pos += hitl; + // handle MIx first, then Yax conditions + if (cap == 0x12) { + hitl -= 3; + sb_push(ctx->ctrl, (hitl<<12) | e); + } else if (hitl < 0x12) { + hitl -= 2; + sb_push(ctx->ctrl, (hitl<<12)|e); + } else { + sb_push(ctx->ctrl, e); + sb_push(ctx->raws, hitl - 0x12); + } + } + // advance the flag and refill if required + flag >>= 1; + if (flag == 0) { + flag = 0x80000000; + sb_push(ctx->cmds, 0);//cmds.push_back(0); + } + } + + // if no cmds in final word, delete it + if (flag == 0x80000000) { + stb__sbn(ctx->cmds) -= 1;//cmds.erase(cmds.end()-1); + } + + // block and stream differentiation + // Yay is block, Yaz is stream + int mode_block=1, mode_stream=1; // temporary, for testing +#ifdef YAZ_MAIN_TEST + int g_hlen = 8; +#else + extern int g_hlen; +#endif + mode_block=!strcmp(mode,"Yay0"); + if (g_hlen) { + memcpy(output, mode, 4); + U32wr(output+4, sz); + } else + output -= 8; /* headerless */ + if (mode_block) { + uint32_t l = (sb_count(ctx->cmds) << 2) + 16; + uint32_t o = (sb_count(ctx->ctrl) << 1) + l; + U32wr(output+8, l); + U32wr(output+12, o); + + uint32_t output_position = g_hlen + 8; + uint32_t x; + for (x=0; xcmds); x++) { + U32wr(output+output_position, ctx->cmds[x]); + output_position+=4; + } + for (x=0; xctrl); x++) { + U16wr(output+output_position, ctx->ctrl[x]); + output_position+=2; + } + for (x=0; xraws); x++) { + output[output_position++] = ctx->raws[x]; + } + return output_position; + } else if(mode_stream) { + U32wr(output+8, 0); + U32wr(output+12, 0); + + uint32_t output_position = 0; + stb__sbn(ctx->ctl)=0; // initialize count to 0 + stb__sbn(ctx->back)=0; // initialize count to 0 + uint32_t x; + for (x=0; x < sb_count(ctx->cmds); x++) { + sb_push(ctx->ctl, (ctx->cmds[x]>>24)&0xFF); + sb_push(ctx->ctl, (ctx->cmds[x]>>16)&0xFF); + sb_push(ctx->ctl, (ctx->cmds[x]>>8)&0xFF); + sb_push(ctx->ctl, (ctx->cmds[x])&0xFF); + } + for (x=0; x < sb_count(ctx->ctrl); x++) { + sb_push(ctx->back, (ctx->ctrl[x]>>8)&0xFF); + sb_push(ctx->back, (ctx->ctrl[x])&0xFF); + } + output_position = _enc_z_from_tables(ctx, ctx->ctl, ctx->back, ctx->raws, output+g_hlen+8, data_size, mode); + return output_position + g_hlen + 8; + } + return 0; +} + + +int +yazenc( + void *_src + , unsigned src_sz + , void *_dst + , unsigned *dst_sz + , void *_ctx +) +{ + unsigned char *src = _src; + unsigned char *dst = _dst; + if (!_ctx) + return 1; + *dst_sz = encode(_ctx, src, src_sz, dst, "Yaz0"); + return 0; +} + +/* yaz decoder, courtesy of spinout182 */ +int +yazdec(void *_src, void *_dst, unsigned dstSz, unsigned *srcSz) +{ + unsigned char *src = _src; + unsigned char *dst = _dst; + + int srcPlace = 0, dstPlace = 0; /*current read/write positions*/ + + unsigned int validBitCount = 0; /*number of valid bits left in "code" byte*/ + unsigned char currCodeByte = 0; + + int uncompressedSize = dstSz; + + src += 0x10; + + while(dstPlace < uncompressedSize) + { + /*read new "code" byte if the current one is used up*/ + if(!validBitCount) + { + currCodeByte = src[srcPlace]; + ++srcPlace; + validBitCount = 8; + } + + if(currCodeByte & 0x80) + { + /*direct copy*/ + dst[dstPlace] = src[srcPlace]; + dstPlace++; + srcPlace++; + } + else + { + /*RLE part*/ + unsigned char byte1 = src[srcPlace]; + unsigned char byte2 = src[srcPlace + 1]; + srcPlace += 2; + + unsigned int dist = ((byte1 & 0xF) << 8) | byte2; + unsigned int copySource = dstPlace - (dist + 1); + + unsigned int numBytes = byte1 >> 4; + if(numBytes) + numBytes += 2; + else + { + numBytes = src[srcPlace] + 0x12; + srcPlace++; + } + + /*copy run*/ + int i; + for(i = 0; i < numBytes; ++i) + { + dst[dstPlace] = dst[copySource]; + copySource++; + dstPlace++; + } + } + + /*use next bit from "code" byte*/ + currCodeByte <<= 1; + validBitCount-=1; + } + + if (srcSz) + *srcSz = srcPlace; + + return 0; +} + +#ifdef YAZ_MAIN_TEST + +#define FERR(x) { \ + fprintf(stderr, x); \ + fprintf(stderr, "\n"); \ + exit(EXIT_FAILURE); \ +} + +int main(int argc, char* argv[]) +{ + FILE *fp; + struct yazCtx *ctx; + unsigned size; + + if(argc < 2) + FERR("args: yazenc in.raw > out.yaz"); + + fp = fopen(argv[1], "rb"); + if(fp == NULL) + FERR("failed to open file"); + + fseek(fp, 0, SEEK_END); + size = ftell(fp); + fseek(fp, 0, SEEK_SET); + + fprintf(stderr, "input file size: %d\n", size); + + void *buf = malloc(size); + void *outbuf = malloc( (size + 64) * 2); + + if (fread(buf, 1, size, fp) != size) + FERR("failed to read file"); + + fclose(fp); + + ctx = yazCtx_new(); + if (yazenc(buf, size, outbuf, &size, ctx)) + FERR("encoding error"); + + if (fwrite(outbuf, 1, size, stdout) != size) + FERR("failed to write stdout"); + + yazCtx_free(ctx); + free(buf); + free(outbuf); + return EXIT_SUCCESS; +} +#endif /* YAZ_MAIN_TEST */ + + diff --git a/tools/z64compress/src/enc/zlib.c b/tools/z64compress/src/enc/zlib.c new file mode 100644 index 000000000..28b7fd756 --- /dev/null +++ b/tools/z64compress/src/enc/zlib.c @@ -0,0 +1,103 @@ +#include +#include +#include +#include +#include "libdeflate/libdeflate.h" + +#define CAPACITY (1024 * 1024 * 4) /* output buffer max (4 mb) */ + +int +zlibenc( + void *_src + , unsigned src_sz + , void *_dst + , unsigned *dst_sz + , void *_ctx +) +{ + unsigned char *src = _src; + unsigned char *dst = _dst; + unsigned result_sz; + + extern int g_hlen; /* header length */ + memset(dst, 0, g_hlen); + memcpy(dst, "ZLIB", 4); + dst[4] = (src_sz >> 24); + dst[5] = (src_sz >> 16); + dst[6] = (src_sz >> 8); + dst[7] = (src_sz >> 0); + + /* zlib and gzip have different header lengths + * https://stackoverflow.com/a/68538037 + */ +#if 1 + + #if 0 /* zlib */ + z_stream stream = {0}; + int r; + stream.avail_in = src_sz; + stream.next_in = src; + stream.avail_out = CAPACITY; + stream.next_out = dst + g_hlen; + #define HEADER_LEN 2 + if ((r = deflateInit(&stream, Z_BEST_COMPRESSION)) != Z_OK) + { + fprintf(stderr, "[!] fatal compression error %d\n", r); + exit(EXIT_FAILURE); + } + if ((r = deflate(&stream, Z_FINISH)) == Z_STREAM_ERROR) + { + fprintf(stderr, "[!] Z_STREAM_ERROR\n"); + exit(EXIT_FAILURE); + } + deflateEnd(&stream); + + result_sz = CAPACITY - stream.avail_out; + #else /* libdeflate */ + #define HEADER_LEN 0 + int level = 12; + struct libdeflate_compressor *compressor; + compressor = libdeflate_alloc_compressor(level); + result_sz = libdeflate_deflate_compress( + compressor + , src, src_sz + , dst + g_hlen + , CAPACITY + ); + libdeflate_free_compressor(compressor); + #endif +#else + /* this gzip code was left in for testing purposes; it may + * be useful if matching ique recompression is ever revisited; + * ique matches (except for one byte...) when compressed using + * gzip 1.2.4 or 1.2.4a (they produce identical results), + * available here: https://ftp.gnu.org/gnu/gzip/ + * this is not a compression error, because decompressing the + * recompressed rom produces a rom identical to the original + * decompressed ique rom; + * TODO: find out why that byte doesn't match on recompression; + * TODO: once that's working, add --codec ique for those wanting + * matching ique recompression; otherwise, modern zlib works great! + */ + #define HEADER_LEN 10 + FILE *fp = fopen("tmp.bin", "wb"); + fwrite(src, 1, src_sz, fp); + fclose(fp); + system("./gzip -c -9 -n tmp.bin > tmp.bin.gzip"); + fp = fopen("tmp.bin.gzip", "rb"); + fseek(fp, 0, SEEK_END); + result_sz = ftell(fp); + fseek(fp, 0, SEEK_SET); + fread(dst, 1, result_sz, fp); + fclose(fp); +#endif + *dst_sz = result_sz + g_hlen; + + /* trim zlib/gzip header */ + memmove(dst + g_hlen, dst + g_hlen + HEADER_LEN, result_sz); + *dst_sz -= HEADER_LEN; + + return 0; + (void)_ctx; /* -Wunused-parameter */ +} + diff --git a/tools/z64compress/src/enc/zx7.c b/tools/z64compress/src/enc/zx7.c new file mode 100644 index 000000000..a2e898233 --- /dev/null +++ b/tools/z64compress/src/enc/zx7.c @@ -0,0 +1,37 @@ +#if 0 +#include "zx7/zx7.h" +#include "zx7/optimize.c" +#include "zx7/compress.c" +#include "zx7/zx7.c" + +int +zx7enc( + void *_src + , unsigned src_sz + , void *_dst + , unsigned *dst_sz + , void *_ctx +) +{ + unsigned char *src = _src; + unsigned char *dst = _dst; + + extern int g_hlen; /* header length */ + memset(dst, 0, g_hlen); + memcpy(dst, "ZX70", 4); + dst[4] = (src_sz >> 24); + dst[5] = (src_sz >> 16); + dst[6] = (src_sz >> 8); + dst[7] = (src_sz >> 0); + + *dst_sz = ZX7Compress(src, src_sz, dst + g_hlen); + + if (!*dst_sz) + return 1; + + *dst_sz += g_hlen; + + return 0; +} +#endif + diff --git a/tools/z64compress/src/enc/zx7/compress.c b/tools/z64compress/src/enc/zx7/compress.c new file mode 100644 index 000000000..1df625e3d --- /dev/null +++ b/tools/z64compress/src/enc/zx7/compress.c @@ -0,0 +1,160 @@ +/* + * (c) Copyright 2012-2016 by Einar Saukas. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * The name of its author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "zx7.h" + +#if !TARGET_PRIZM + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +static unsigned char* c_output_data; +static unsigned int c_output_index; +static unsigned int c_bit_index; +static int c_bit_mask; + +static inline void c_write_byte(int value) { + c_output_data[c_output_index++] = value; +} + +static void c_write_bit(int value) { + if (c_bit_mask == 0) { + c_bit_mask = 128; + c_bit_index = c_output_index; + c_write_byte(0); + } + if (value > 0) { + c_output_data[c_bit_index] |= c_bit_mask; + } + c_bit_mask >>= 1; +} + +static void write_elias_gamma(int value) { + int i; + + for (i = 2; i <= value; i <<= 1) { + c_write_bit(0); + } + while ((i >>= 1) > 0) { + c_write_bit(value & i); + } +} + +unsigned char *compress( + Optimal *optimal + , unsigned char *input_data + , unsigned int input_size + , long skip + , unsigned int *output_size + , unsigned char *dst +) +{ + unsigned int input_index; + unsigned int input_prev; + int offset1; + int mask; + int i; + + /* calculate and allocate output buffer */ + input_index = input_size-1; + *output_size = (optimal[input_index].bits+18+7)/8 + 3; + unsigned char *ret = dst; + if (!ret) { + return 0; + } + + c_output_data = ret + 3; + + /* un-reverse optimal sequence */ + optimal[input_index].bits = 0; + while (input_index != skip) { + input_prev = input_index - (optimal[input_index].len > 0 ? optimal[input_index].len : 1); + optimal[input_prev].bits = input_index; + input_index = input_prev; + } + + c_output_index = 0; + c_bit_mask = 0; + + /* first byte is always literal */ + c_write_byte(input_data[input_index]); + + /* process remaining bytes */ + while ((input_index = optimal[input_index].bits) > 0) { + if (optimal[input_index].len == 0) { + + /* literal indicator */ + c_write_bit(0); + + /* literal value */ + c_write_byte(input_data[input_index]); + + } else { + + /* sequence indicator */ + c_write_bit(1); + + /* sequence length */ + write_elias_gamma(optimal[input_index].len-1); + + /* sequence offset */ + offset1 = optimal[input_index].offset-1; + if (offset1 < 128) { + c_write_byte(offset1); + } else { + offset1 -= 128; + c_write_byte((offset1 & 127) | 128); + for (mask = 1024; mask > 127; mask >>= 1) { + c_write_bit(offset1 & mask); + } + } + } + } + + /* sequence indicator */ + c_write_bit(1); + + /* end marker > MAX_LEN */ + for (i = 0; i < 16; i++) { + c_write_bit(0); + } + c_write_bit(1); + + // decompressed size is first three bytes + ret[0] = (input_size & 0xFF0000) >> 16; + ret[1] = (input_size & 0x00FF00) >> 8; + ret[2] = (input_size & 0x0000FF); + + return ret; +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/tools/z64compress/src/enc/zx7/dzx7.c b/tools/z64compress/src/enc/zx7/dzx7.c new file mode 100644 index 000000000..5909f31f7 --- /dev/null +++ b/tools/z64compress/src/enc/zx7/dzx7.c @@ -0,0 +1,138 @@ +/* + * (c) Copyright 2015 by Einar Saukas. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * The name of its author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "zx7.h" + +#ifdef __cplusplus +extern "C" { +#endif + +static unsigned char *d_input_data; +static unsigned char *d_output_data; +static unsigned int d_input_index; +static unsigned int d_output_index; +static unsigned int d_input_size; +static int bit_mask; +static int bit_value; + +static +int d_read_byte(void) { + return d_input_data[d_input_index++]; +} + +static +int d_read_bit(void) { + bit_mask >>= 1; + if (bit_mask == 0) { + bit_mask = 128; + bit_value = d_read_byte(); + } + return bit_value & bit_mask ? 1 : 0; +} + +static +int read_elias_gamma(void) { + int i; + int value; + + i = 0; + while (!d_read_bit()) { + i++; + } + if (i > 15) { + return -1; + } + value = 1; + while (i--) { + value = value << 1 | d_read_bit(); + } + return value; +} + +int read_offset(void) { + int value; + int i; + + value = d_read_byte(); + if (value < 128) { + return value; + } else { + i = d_read_bit(); + i = i << 1 | d_read_bit(); + i = i << 1 | d_read_bit(); + i = i << 1 | d_read_bit(); + return ((value & 127) | (i << 7)) + 128; + } +} + +static +void d_write_byte(int value) { + d_output_data[d_output_index++] = value; +} + +void d_write_bytes(int offset, int length) { + int i; + while (length-- > 0) { + i = d_output_index-offset; + d_write_byte(d_output_data[i]); + } +} + +unsigned int ZX7GetDecompressedSize(unsigned char* compressedData) { + return compressedData[0] * 65536 + compressedData[1] * 256 + compressedData[2]; +} + +int ZX7Decompress(unsigned char* srcData, unsigned char* destData, unsigned int destLength) { + if (destLength < ZX7GetDecompressedSize(srcData) || !srcData || !destData) { + return -1; + } + + int length; + + d_input_data = srcData + 3; + d_output_data = destData; + + d_input_size = 0; + d_input_index = 0; + d_output_index = 0; + bit_mask = 0; + + d_write_byte(d_read_byte()); + while (1) { + if (!d_read_bit()) { + d_write_byte(d_read_byte()); + } else { + length = read_elias_gamma()+1; + if (length == 0) { + return 0; + } + d_write_bytes(read_offset()+1, length); + } + } +} + +#ifdef __cplusplus +} +#endif diff --git a/tools/z64compress/src/enc/zx7/optimize.c b/tools/z64compress/src/enc/zx7/optimize.c new file mode 100644 index 000000000..e0d59396d --- /dev/null +++ b/tools/z64compress/src/enc/zx7/optimize.c @@ -0,0 +1,167 @@ +/* + * (c) Copyright 2012-2016 by Einar Saukas. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * The name of its author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "zx7.h" + +#if !TARGET_PRIZM + +#ifdef __cplusplus +extern "C" { +#endif + + +#include +#include /* memset */ + +/* reusable memory (free using zx7_shutdown()) */ +static unsigned int *min = 0; +static unsigned int *max = 0; +static unsigned int *matches = 0; +static unsigned int *match_slots = 0; +static Optimal *optimal = 0; + +void +zx7_shutdown(void) +{ + if (min) + free(min); + if (max) + free(max); + if (matches) + free(matches); + if (match_slots) + free(match_slots); + if (optimal) + free(optimal); +} + +static int elias_gamma_bits(int value) { + int bits; + + bits = 1; + while (value > 1) { + bits += 2; + value >>= 1; + } + return bits; +} + +static int count_bits(int offset, int len) { + return 1 + (offset > 128 ? 12 : 8) + elias_gamma_bits(len-1); +} + +Optimal *optimize( + unsigned char *input_data + , unsigned int input_size + , unsigned long skip +) { + + unsigned int *match; + int match_index; + int offset; + unsigned int len; + unsigned int best_len; + unsigned int bits; + unsigned int i; + + /* allocate all data structures at once */ + if (!min) + min = malloc((MAX_OFFSET+1) * sizeof(*min)); + if (!max) + max = malloc((MAX_OFFSET+1) * sizeof(*max)); + if (!matches) + matches = malloc(256 * 256 * sizeof(*matches)); + if (!match_slots) + match_slots = malloc(input_size * sizeof(*match_slots)); + if (!optimal) + optimal = malloc(input_size * sizeof(*optimal)); + + if (!min || !max || !matches || !match_slots || !optimal) + return 0; + + memset(min, 0, (MAX_OFFSET+1) * sizeof(*min)); + memset(max, 0, (MAX_OFFSET+1) * sizeof(*max)); + memset(matches, 0, 256 * 256 * sizeof(*matches)); + memset(match_slots, 0, input_size * sizeof(*match_slots)); + memset(optimal, 0, input_size * sizeof(*optimal)); + + /* index skipped bytes */ + for (i = 1; i <= skip; i++) { + match_index = input_data[i-1] << 8 | input_data[i]; + match_slots[i] = matches[match_index]; + matches[match_index] = i; + } + + /* first byte is always literal */ + optimal[skip].bits = 8; + + /* process remaining bytes */ + for (; i < input_size; i++) { + + optimal[i].bits = optimal[i-1].bits + 9; + match_index = input_data[i-1] << 8 | input_data[i]; + best_len = 1; + for (match = &matches[match_index]; *match != 0 && best_len < MAX_LEN; match = &match_slots[*match]) { + offset = i - *match; + if (offset > MAX_OFFSET) { + *match = 0; + break; + } + + for (len = 2; len <= MAX_LEN && i >= skip+len; len++) { + if (len > best_len) { + best_len = len; + bits = optimal[i-len].bits + count_bits(offset, len); + if (optimal[i].bits > bits) { + optimal[i].bits = bits; + optimal[i].offset = offset; + optimal[i].len = len; + } + } else if (max[offset] != 0 && i+1 == max[offset]+len) { + len = i-min[offset]; + if (len > best_len) { + len = best_len; + } + } + if (i < offset+len || input_data[i-len] != input_data[i-len-offset]) { + break; + } + } + min[offset] = i+1-len; + max[offset] = i; + } + match_slots[i] = matches[match_index]; + matches[match_index] = i; + } + + return optimal; +} + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/tools/z64compress/src/enc/zx7/zx7.c b/tools/z64compress/src/enc/zx7/zx7.c new file mode 100644 index 000000000..06f65d374 --- /dev/null +++ b/tools/z64compress/src/enc/zx7/zx7.c @@ -0,0 +1,46 @@ +/* + * (c) Copyright 2012-2016 by Einar Saukas. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * The name of its author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "zx7.h" + +#if !TARGET_PRIZM + +#ifdef __cplusplus +extern "C" { +#endif + +// ZX7 Compress the given data, outData is malloc'd and the return value is the length (first 3 bytes of data will be 24-bit size result for convenience) +unsigned int ZX7Compress(unsigned char *srcData, unsigned int inLength, unsigned char *outData) { + unsigned int output_size; + compress(optimize(srcData, inLength, 0), srcData, inLength, 0, &output_size, outData); + + return output_size; +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/tools/z64compress/src/enc/zx7/zx7.h b/tools/z64compress/src/enc/zx7/zx7.h new file mode 100644 index 000000000..a3a4e6cda --- /dev/null +++ b/tools/z64compress/src/enc/zx7/zx7.h @@ -0,0 +1,66 @@ +/* + * (c) Copyright 2012-2016 by Einar Saukas. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * The name of its author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +#if !TARGET_PRIZM + +typedef struct optimal_t { + unsigned int bits; + int offset; + int len; +} Optimal; + +#define MAX_OFFSET 2176 /* range 1..2176 */ +#define MAX_LEN 65536 /* range 2..65536 */ + +Optimal *optimize(unsigned char *input_data, unsigned int input_size, unsigned long skip); + +unsigned char *compress(Optimal *optimal, unsigned char *input_data, unsigned int input_size, long skip, unsigned int *output_size, unsigned char *dst); + +// THOMAS : added these for my use: + +// ZX7 Compress the given data, outData is malloc'd and the return value is the length (first 3 bytes of data will be 24-bit size result for convenience) +unsigned int ZX7Compress(unsigned char *srcData, unsigned int inLength, unsigned char *outData); + +#endif + +// Get decompressed size of ZX7Compress'd data +unsigned int ZX7GetDecompressedSize(unsigned char* compressedData); + +// Decompress the given data. Returns 0 with no errors +int ZX7Decompress(unsigned char* srcData, unsigned char* destData, unsigned int destLength); + +/* free reusable buffers */ +void +zx7_shutdown(void); + +#ifdef __cplusplus +} +#endif diff --git a/tools/z64compress/src/main.c b/tools/z64compress/src/main.c new file mode 100644 index 000000000..104be2262 --- /dev/null +++ b/tools/z64compress/src/main.c @@ -0,0 +1,393 @@ +#include +#include +#include +#include +#include + +#include "wow.h" +#include "rom.h" + +FILE* printer; +int g_hlen = 8; + +static void compress(struct rom *rom, int start, int end) +{ + rom_dma_compress(rom, start, end, 1); +} + +static void skip(struct rom *rom, int start, int end) +{ + rom_dma_compress(rom, start, end, 0); +} + +static void repack(struct rom *rom, int start, int end) +{ + rom_dma_repack( + rom + , start + , end + , "yaz" /* old codec */ + , 0 /* new codec */ + ); +} + +static void do_pattern( + struct rom *rom + , const char *str + , void func(struct rom *rom, int start, int end) +) +{ + const char *Ostr = str; + int last_int = -1; + int last_op = 0; + int cur; + int len; + + while (*str) + { + if (*str == '\'' || *str == '"') + { + ++str; + continue; + } + + /* calculate length of current token */ + len = strspn(str, "0123456789xXaAbBcCdDeEfF"); /* allow hex */ + if (!len) /* no len, assume it's an operator */ + len = 1; + + /* is a number or variable */ + if (isdigit(*str) || !strncmp(str, "END", 3)) + { + /* 'END' is shorthand for last entry */ + if (!strncmp(str, "END", 3)) + { + cur = rom_dma_num(rom); + str += 2; + } + + /* otherwise, it's a number */ + else + sscanf(str, "%i", &cur); + + if (cur < last_int) + die( + "invalid pattern '%s'; " + "values are not in ascending order" + , Ostr + ); + + /* apply operations on item(s) */ + if (last_op == '-') + func(rom, last_int, cur); + else + func(rom, cur, cur); + + /* prevents processing this item + * again when 'through' is used + */ + cur += 1; + } + + /* 'through' or 'single item', respectively */ + else if (*str == '-' || *str == ',') + { + if (last_int < 0) + die( + "invalid pattern '%s'; " + "pattern does not begin with number" + , Ostr + ); + last_op = *str; + } + + /* unknown character encountered */ + else + { + die( + "invalid pattern '%s'; " + "encountered unknown operator '%c'" + , Ostr + , *str + ); + } + + /* advance */ + str += len; + last_int = cur; + } +} + +static void usage(void) +{ + /* compression examples for users to adapt to their needs */ + fprintf(printer, "\n"); + fprintf(printer, " compressing oot debug\n"); + fprintf(printer, " --in \"path/to/in.z64\"\n"); + fprintf(printer, " --out \"path/to/out.z64\"\n"); + fprintf(printer, " --mb 32\n"); + fprintf(printer, " --codec yaz\n"); + fprintf(printer, " --cache \"path/to/cache\"\n"); + fprintf(printer, " --dma \"0x12F70,1548\"\n"); + fprintf(printer, " --compress \"9-14,28-END\"\n"); + fprintf(printer, " --threads 4\n"); + fprintf(printer, "\n"); + fprintf(printer, " compressing oot ntsc 1.0\n"); + fprintf(printer, " --in \"path/to/in.z64\"\n"); + fprintf(printer, " --out \"path/to/out.z64\"\n"); + fprintf(printer, " --mb 32\n"); + fprintf(printer, " --codec yaz\n"); + fprintf(printer, " --cache \"path/to/cache\"\n"); + fprintf(printer, " --dma \"0x7430,1526\"\n"); + fprintf(printer, " --compress \"10-14,27-END\"\n"); + fprintf(printer, " --threads 4\n"); + fprintf(printer, "\n"); + fprintf(printer, " compressing mm usa\n"); + fprintf(printer, " --in \"path/to/in.z64\"\n"); + fprintf(printer, " --out \"path/to/out.z64\"\n"); + fprintf(printer, " --mb 32\n"); + fprintf(printer, " --codec yaz\n"); + fprintf(printer, " --cache \"path/to/cache\"\n"); + fprintf(printer, " --dma \"0x1A500,1568\"\n"); + fprintf(printer, " --compress \"10-14,23,24,31-END\"\n"); + fprintf(printer, " --skip \"1127\"\n"); + fprintf(printer, " --repack \"15-20,22\"\n"); + fprintf(printer, " --threads 4\n"); + fprintf(printer, "\n"); + fprintf(printer, " arguments\n"); + fprintf(printer, " --in uncompressed input rom\n"); + fprintf(printer, "\n"); + fprintf(printer, " --out compressed output rom\n"); + fprintf(printer, "\n"); + fprintf(printer, " --matching attempt matching compression at the cost of\n"); + fprintf(printer, " some optimizations and reduced performance\n"); + fprintf(printer, "\n"); + fprintf(printer, " --mb how many mb the compressed rom should be\n"); + fprintf(printer, "\n"); + fprintf(printer, " --codec currently supported codecs\n"); + fprintf(printer, " yaz\n"); + fprintf(printer, " ucl\n"); + fprintf(printer, " lzo\n"); + fprintf(printer, " zlib\n"); + fprintf(printer, " aplib\n"); + fprintf(printer, " * to use non-yaz codecs, find patches\n"); + fprintf(printer, " and code on my z64enc repo\n"); + fprintf(printer, "\n"); + fprintf(printer, " --cache is optional and won't be created if\n"); + fprintf(printer, " no path is specified (having a cache\n"); + fprintf(printer, " makes subsequent compressions faster)\n"); + fprintf(printer, " * pro-tip: linux users who don't want a\n"); + fprintf(printer, " cache to persist across power cycles\n"); + fprintf(printer, " can use the path \"/tmp/z64compress\"\n"); + fprintf(printer, "\n"); + fprintf(printer, " --dma specify dmadata address and count\n"); + fprintf(printer, "\n"); + fprintf(printer, " --compress enable compression on specified files\n"); + fprintf(printer, "\n"); + fprintf(printer, " --skip disable compression on specified files\n"); + fprintf(printer, "\n"); + fprintf(printer, " --headerless don't write file headers (for iQue)\n"); + fprintf(printer, "\n"); + fprintf(printer, " --repack handles Majora's Mask archives\n"); + fprintf(printer, "\n"); + fprintf(printer, " --threads optional multithreading;\n"); + fprintf(printer, " exclude this argument to disable it\n"); + fprintf(printer, "\n"); + fprintf(printer, " --only-stdout reserve stderr for errors and print\n"); + fprintf(printer, " everything else to stdout\n"); + fprintf(printer, "\n"); + fprintf(printer, " arguments are executed as they\n"); + fprintf(printer, " are parsed, so order matters!\n"); + fprintf(printer, "\n"); +} + +wow_main +{ + struct rom *rom = 0; + const char *Ain = 0; + const char *Aout = 0; + const char *Adma = 0; + const char *Acodec = 0; + const char *Acache = 0; + int Amb = 0; + int Athreads = 0; + bool Amatching = false; + bool Aonly_stdout = false; + bool Aheaderless = false; + wow_main_argv; + + printer = stderr; + for (int i = 1; i < argc; ++i) + { + if (!strcmp(argv[i], "--only-stdout")) + { + setvbuf(stdout, NULL, _IONBF, 0); + printer = stdout; + } + } + + fprintf(printer, "welcome to z64compress 1.0.2 \n"); + + if (argc <= 1) + { + usage(); + return EXIT_FAILURE; + } + + /* hacky argument handling */ + for (int i = 1; i < argc; i += 2) + { + const char *arg = argv[i]; + + /* arguments that do not require additional parameters */ + + if(!strcmp(arg, "--only-stdout")) + { + if (Aonly_stdout) + die("--only-stdout arg provided more than once"); + // handled above + Aonly_stdout = true; + i--; + continue; + } + else if (!strcmp(arg, "--matching")) + { + if (Amatching) + die("--matching arg provided more than once"); + Amatching = true; + i--; + continue; + } + else if (!strcmp(arg, "--headerless")) + { + if (Aheaderless) + die("--headerless arg provided more than once"); + Aheaderless = true; + g_hlen = 0; + i--; + continue; + } + + /* arguments with additional parameters */ + + const char *next = argv[i + 1]; + + if (!next) + die("%s missing parameter", arg); + + if (!strcmp(arg, "--in")) + { + if (Ain) + die("--in arg provided more than once"); + Ain = next; + rom = rom_new(Ain); + } + else if (!strcmp(arg, "--out")) + { + if (Aout) + die("--out arg provided more than once"); + Aout = next; + } + else if (!strcmp(arg, "--cache")) + { + if (Acache) + die("--cache arg provided more than once"); + Acache = next; + rom_set_cache(rom, Acache); + } + else if (!strcmp(arg, "--codec")) + { + if (Acodec) + die("--codec arg provided more than once"); + if (!Ain) + die("--dma arg provided before --in arg"); + Acodec = next; + rom_set_codec(rom, Acodec); + } + else if (!strcmp(arg, "--dma")) + { + int num; + int start = 0; + + if (!Acodec) + die("--dma arg provided before --codec arg"); + if (!Ain) + die("--dma arg provided before --in arg"); + if (Adma) + die("--dma arg provided more than once"); + Adma = next; + if (sscanf(Adma, "%i,%i", &start, &num) != 2) + die("--dma bad formatting '%s'", Adma); + rom_dma(rom, start, num, Amatching); + } + else if (!strcmp(arg, "--mb")) + { + if (Amb) + die("--mb arg provided more than once"); + if (sscanf(next, "%i", &Amb) != 1) + die("--mb could not get value from string '%s'", next); + if (Amb <= 0) + die("--mb invalid value %d", Amb); + } + else if (!strcmp(arg, "--compress")) + { + if (!Adma) + die("--compress arg provided before --dma arg"); + do_pattern(rom, next, compress); + } + else if (!strcmp(arg, "--skip")) + { + if (!Adma) + die("--skip arg provided before --dma arg"); + do_pattern(rom, next, skip); + } + else if (!strcmp(arg, "--repack")) + { + if (!Adma) + die("--repack arg provided before --dma arg"); + if (!Acodec) + die("--repack arg provided before --codec arg"); + do_pattern(rom, next, repack); + } + else if(!strcmp(arg, "--threads")) + { + if (Athreads) + die("--threads arg provided more than once"); + if (sscanf(next, "%i", &Athreads) != 1) + die("--threads could not get value from string '%s'", next); + if (Athreads < 0) + die("--threads invalid value %d", Athreads); + } + else + { + die("unknown argument '%s'", arg); + } + } + + #define ARG_ZERO_TEST(TEST, NAME) \ + if (!(TEST)) \ + die("no " NAME " arg provided") + + ARG_ZERO_TEST(Ain , "--in" ); + ARG_ZERO_TEST(Aout , "--out" ); + ARG_ZERO_TEST(Acodec, "--codec"); + + #undef ARG_ZERO_TEST + + /* finished initializing dma settings */ + rom_dma_ready(rom, Amatching); + + /* compress rom */ + rom_compress(rom, Amb, Athreads, Amatching); + fprintf(printer, "rom compressed successfully!\n"); + + /* write compressed rom */ + rom_save(rom, Aout); + fprintf(printer, "compressed rom written successfully!\n"); + + /* cleanup */ + rom_free(rom); + + return EXIT_SUCCESS; +} + diff --git a/tools/z64compress/src/n64crc.c b/tools/z64compress/src/n64crc.c new file mode 100644 index 000000000..a34d646ea --- /dev/null +++ b/tools/z64compress/src/n64crc.c @@ -0,0 +1,197 @@ +/* snesrc - SNES Recompiler + * + * Mar 23, 2010: addition by spinout to actually fix CRC if it is incorrect + * + * Copyright notice for this file: + * Copyright (C) 2005 Parasyte + * + * Based on uCON64's N64 checksum algorithm by Andreas Sterbenz + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include + +#define ROL(i, b) (((i) << (b)) | ((i) >> (32 - (b)))) +#define BYTES2LONG(b) ( (b)[0] << 24 | \ + (b)[1] << 16 | \ + (b)[2] << 8 | \ + (b)[3] ) + +#define N64_HEADER_SIZE 0x40 +#define N64_BC_SIZE (0x1000 - N64_HEADER_SIZE) + +#define N64_CRC1 0x10 +#define N64_CRC2 0x14 + +#define CHECKSUM_START 0x00001000 +#define CHECKSUM_LENGTH 0x00100000 +#define CHECKSUM_CIC6102 0xF8CA4DDC +#define CHECKSUM_CIC6103 0xA3886759 +#define CHECKSUM_CIC6105 0xDF26F436 +#define CHECKSUM_CIC6106 0x1FEA617A + + +static void gen_table(unsigned int crc_table[256]) +{ + unsigned int crc, poly; + int i, j; + + poly = 0xEDB88320; + for (i = 0; i < 256; i++) { + crc = i; + for (j = 8; j > 0; j--) { + if (crc & 1) crc = (crc >> 1) ^ poly; + else crc >>= 1; + } + crc_table[i] = crc; + } +} + + +static unsigned int crc32( + unsigned int crc_table[256] + , unsigned char *data + , int len +) +{ + unsigned int crc = ~0; + int i; + + for (i = 0; i < len; i++) { + crc = (crc >> 8) ^ crc_table[(crc ^ data[i]) & 0xFF]; + } + + return ~crc; +} + + +static int N64GetCIC(unsigned int crc_table[256], unsigned char *data) +{ + switch (crc32(crc_table, &data[N64_HEADER_SIZE], N64_BC_SIZE)) { + case 0x6170A4A1: return 6101; + case 0x90BB6CB5: return 6102; + case 0x0B050EE0: return 6103; + case 0x98BC2C86: return 6105; + case 0xACC8580A: return 6106; + } + + return 0; +} + + +static int N64CalcCRC( + unsigned int crc_table[256] + , unsigned int *crc + , unsigned char *data +) +{ + int bootcode, i; + unsigned int seed; + unsigned int t1, t2, t3; + unsigned int t4, t5, t6; + unsigned int r, d; + + switch ((bootcode = N64GetCIC(crc_table, data))) { + case 6101: + case 6102: + seed = CHECKSUM_CIC6102; + break; + case 6103: + seed = CHECKSUM_CIC6103; + break; + case 6105: + seed = CHECKSUM_CIC6105; + break; + case 6106: + seed = CHECKSUM_CIC6106; + break; + default: + return 1; + } + + t1 = t2 = t3 = t4 = t5 = t6 = seed; + + i = CHECKSUM_START; + while (i < (CHECKSUM_START + CHECKSUM_LENGTH)) { + d = BYTES2LONG(&data[i]); + if ((t6 + d) < t6) + t4++; + t6 += d; + t3 ^= d; + r = ROL(d, (d & 0x1F)); + t5 += r; + if (t2 > d) + t2 ^= r; + else + t2 ^= t6 ^ d; + + if (bootcode == 6105) + t1 += BYTES2LONG(&data[N64_HEADER_SIZE + 0x0710 + (i & 0xFF)]) ^ d; + else + t1 += t5 ^ d; + + i += 4; + } + if (bootcode == 6103) { + crc[0] = (t6 ^ t4) + t3; + crc[1] = (t5 ^ t2) + t1; + } + else if (bootcode == 6106) { + crc[0] = (t6 * t4) + t3; + crc[1] = (t5 * t2) + t1; + } + else { + crc[0] = t6 ^ t4 ^ t3; + crc[1] = t5 ^ t2 ^ t1; + } + + return 0; +} + + +/* recalculate rom crc */ +void n64crc(void *rom) +{ + unsigned int crc_table[256]; + unsigned char CRC1[4]; + unsigned char CRC2[4]; + unsigned int crc[2]; + unsigned char *rom8 = rom; + + assert(rom); + + gen_table(crc_table); + + if (!N64CalcCRC(crc_table, crc, rom)) + { + unsigned int kk1 = crc[0]; + unsigned int kk2 = crc[1]; + int i; + + for (i = 0; i < 4; ++i) + { + CRC1[i] = (kk1 >> (24-8*i))&0xFF; + CRC2[i] = (kk2 >> (24-8*i))&0xFF; + } + + for (i = 0; i < 4; ++i) + *(rom8 + N64_CRC1 + i) = CRC1[i]; + + for (i = 0; i < 4; ++i) + *(rom8 + N64_CRC2 + i) = CRC2[i]; + } +} + diff --git a/tools/z64compress/src/n64crc.h b/tools/z64compress/src/n64crc.h new file mode 100644 index 000000000..b5342dac6 --- /dev/null +++ b/tools/z64compress/src/n64crc.h @@ -0,0 +1,32 @@ +/* snesrc - SNES Recompiler + * + * Mar 23, 2010: addition by spinout to actually fix CRC if it is incorrect + * + * Copyright notice for this file: + * Copyright (C) 2005 Parasyte + * + * Based on uCON64's N64 checksum algorithm by Andreas Sterbenz + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef N64CRC_H_INCLUDED +#define N64CRC_H_INCLUDED + +/* recalculate rom crc */ +void n64crc(void *rom); + +#endif /* N64CRC_H_INCLUDED */ + diff --git a/tools/z64compress/src/rom.c b/tools/z64compress/src/rom.c new file mode 100644 index 000000000..50a7a4914 --- /dev/null +++ b/tools/z64compress/src/rom.c @@ -0,0 +1,1714 @@ +/* + * rom.c + * + * functions for compression magic reside herein + * + * z64me + * + */ + +#include +#include +#include +#include +#include +#include +#include + +/* POSIX dependencies */ +#include +#include +#include + +/* threading */ +#include + +#include "enc/enc.h" /* file compression */ +#include "enc/yar.h" /* MM archive tools */ + +#include "sha1.h" /* sha1 helpers */ +#include "n64crc.h" /* n64crc() */ + +#include "wow.h" +#include "wow_dirent.h" /* XXX always #include after dirent.h */ +#undef fopen +#undef fread +#undef fwrite +#undef remove +#define fopen wow_fopen +#define fread wow_fread +#define fwrite wow_fwrite +#define remove wow_remove + +extern FILE* printer; + +#define SIZE_16MB (1024 * 1024 * 16) +#define SIZE_4MB (1024 * 1024 * 4) + +#define DMA_DELETED 0xffffffff /* aka UINT32_MAX */ + +#define DMASORT(ROM, FUNC) \ + qsort( \ + ROM->dma \ + , ROM->dma_num \ + , sizeof(*(ROM->dma)) \ + , FUNC \ + ) + +#define DMASORT_N(ROM, FUNC, NUM) \ + qsort( \ + ROM->dma \ + , NUM \ + , sizeof(*(ROM->dma)) \ + , FUNC \ + ) + +#define DMA_FOR_EACH \ +for (dma = rom->dma; (unsigned)(dma - rom->dma) < rom->dma_num; ++dma) + +#define PROGRESS_A_B (int)(dma - rom->dma), rom->dma_num + +#define ALIGN(x, n) (((x) + ((n)-1)) & ~((n)-1)) +#define ALIGN16(x) ALIGN(x, 16) +#define ALIGN8MB(x) ALIGN(x, 8 * 0x100000) + +/* + * + * private types + * + */ + + +struct encoder +{ + int (*encfunc)( + void *src + , unsigned src_sz + , void *dst + , unsigned *dst_sz + , void *_ctx + ); + void *(*ctx_new)(void); + void (*ctx_free)(void *); +}; + + +struct dma +{ + char *compname; /* name of compressed file */ + void *compbuf; /* cache-less compressed data */ + unsigned int index; /* original index location */ + int compress; /* entry can be compressed */ + int deleted; /* points to deleted file */ + unsigned compSz; /* cache-less compressed size */ + unsigned int start; /* start offset */ + unsigned int end; /* end offset */ + unsigned int Pstart; /* start of physical (P) data */ + unsigned int Pend; /* end of physical (P) data */ + unsigned int Ostart; /* original (O) start */ + unsigned int Oend; /* original (O) end */ +}; + + +struct rom +{ + char *fn; /* filename of loaded rom */ + char *codec; /* compression codec */ + char *cache; /* compression cache */ + unsigned char *data; /* raw rom data */ + unsigned int data_sz; /* size of rom data */ + unsigned int ofs; /* offset where rom_write() writes */ + int is_comp; /* non-0 if rom has been compressed */ + struct dma *dma; /* dma array */ + unsigned int dma_num; /* number of entries in dma array */ + unsigned char *dma_raw; /* pointer to raw dmadata */ + int dma_ready; /* non-zero after dma_ready() */ + + /* memory pools for things like compression */ + struct + { + void *mb16; /* 16 mb */ + void *mb4; /* 4 mb */ + } mem; +}; + + +struct fldr_item +{ + char *name; /* name */ + void *udata; /* udata */ +}; + + +struct folder +{ + struct fldr_item *item; /* item array */ + int num; /* number of items in array */ + struct fldr_item *active; /* active item */ +}; + + +struct compThread +{ + struct rom *rom; + void *data; + int (*encfunc)( + void *src + , unsigned src_sz + , void *dst + , unsigned *dst_sz + , void *_ctx + ); + const char *codec; + char *dot_codec; + struct folder *list; + int stride; /* number of entries to advance each time */ + int ofs; /* starting entry in list */ + int report; /* report progress to stderr (last thread only) */ + void *ctx; /* compression context */ + bool matching; + pthread_t pt; /* pthread */ +}; + +/* + * + * private functions + * + */ + + +/* get 32-bit value from raw data */ +static int get32(void *_data) +{ + unsigned char *data = _data; + + return (data[0] << 24) | (data[1] << 16) | (data[2] << 8) | data[3]; +} + + +/* get size of a file; returns 0 if fopen fails */ +static unsigned int file_size(const char *fn) +{ + FILE *fp; + unsigned int sz; + + fp = fopen(fn, "rb"); + if (!fp) + return 0; + + fseek(fp, 0, SEEK_END); + sz = ftell(fp); + fclose(fp); + + return sz; +} + +/* load a file into an existing buffer */ +static void *file_load_into( + const char *dir + , const char *fn + , unsigned int *sz + , void *dst +) +{ + FILE *fp; + + assert(fn); + assert(sz); + assert(dst); + + if (!dir) + dir = ""; + + *sz = 0; + + fp = fopen(fn, "rb"); + if (!fp) + die("failed to open '%s%s' for reading", dir, fn); + + fseek(fp, 0, SEEK_END); + *sz = ftell(fp); + + if (!*sz) + die("size of file '%s%s' is zero", dir, fn); + + fseek(fp, 0, SEEK_SET); + + if (fread(dst, 1, *sz, fp) != *sz) + die("failed to read contents of '%s%s'", dir, fn); + + fclose(fp); + + return dst; +} + +/* load a file */ +static void *file_load(const char *fn, unsigned int *sz) +{ + unsigned char *dst; + + assert(fn); + assert(sz); + + *sz = file_size(fn); + if (!*sz) + die("failed to get size of file '%s'", fn); + + dst = malloc_safe(*sz); + + return file_load_into(0, fn, sz, dst); +} + +/* write file */ +static unsigned int file_write( + const char *fn + , void *data + , unsigned int data_sz +) +{ + FILE *fp; + + assert(fn); + assert(data); + assert(data_sz); + + fp = fopen(fn, "wb"); + if (!fp) + return 0; + + data_sz = fwrite(data, 1, data_sz, fp); + + fclose(fp); + + return data_sz; +} + + +/* allocate a folder structure and parse current working directory */ +static struct folder *folder_new(void) +{ + wow_DIR *dir; + struct wow_dirent *ep; + struct fldr_item *item; + struct folder *folder; + char cwd[4096]; + int count = 0; + int recount = 0; + + /* allocate a folder */ + folder = calloc_safe(1, sizeof(*folder)); + + /* get current working directory for error reporting */ + wow_getcwd_safe(cwd, sizeof(cwd)); + + /* first pass: count the contents */ + dir = wow_opendir("."); + if (!dir) + die("failed to parse directory '%s'", cwd); + while ((ep = wow_readdir(dir))) + count += 1; + wow_closedir(dir); + + /* folder is empty */ + if (!count) + die("folder '%s' is empty", cwd); + + /* allocate item array */ + item = calloc_safe(count, sizeof(*item)); + folder->item = item; + folder->num = count; + + /* second pass: retrieve requested contents */ + dir = wow_opendir("."); + if (!dir) + die("failed to parse directory '%s'", cwd); + for ( + recount = 0 + ; (ep = wow_readdir(dir)) && recount < count + ; ++recount, ++item + ) + { + const char *dn; + + dn = (const char*)ep->d_name; + + /* skip names starting with '.' (covers both "." and "..") */ + if (*dn == '.') + continue; + + /* skip directories */ + if (wow_is_dir(dn)) + continue; + + /* make a copy of the string */ + item->name = strdup_safe(dn); + } + + if (recount != count) + die("contents of '%s' changed during read, try again", cwd); + wow_closedir(dir); + + return folder; +} + + +/* free a folder structure */ +static void folder_free(struct folder *folder) +{ + if (!folder) + return; + + /* folder contains items */ + if (folder->item) + { + struct fldr_item *item; + + /* walk item list, freeing resources owned by each */ + for ( + item = folder->item + ; item - folder->item < folder->num + ; ++item + ) + { + if (item->name) + free(item->name); + } + + /* free item list */ + free(folder->item); + } + + /* free folder */ + free(folder); +} + + +/* locate folder item by name, ignoring extension (such as '.raw') */ +static struct fldr_item *folder_findNameNoExt( + struct folder *folder + , char *name +) +{ + struct fldr_item *item; + + assert(folder); + assert(name); + + for ( + item = folder->item + ; item - folder->item < folder->num + ; ++item + ) + { + char *period; + int nchar; + + /* item has no name */ + if (!item->name) + continue; + + /* doesn't contain a period */ + if (!(period = strrchr(item->name, '.'))) + continue; + + /* number of bytes to compare */ + nchar = period - item->name; + + /* names match */ + if (!memcmp(item->name, name, nchar)) + return item; + } + + return 0; +} + + +/* retrieve encoder from name */ +static const struct encoder *encoder(const char *name) +{ + if (!strcmp(name, "yaz")) + { + static const struct encoder yaz = { + .encfunc = yazenc + , .ctx_new = yazCtx_new + , .ctx_free = yazCtx_free + }; + + return &yaz; + } + else if (!strcmp(name, "lzo")) + { + static const struct encoder lzo = { + .encfunc = lzoenc + , .ctx_new = lzoCtx_new + , .ctx_free = lzoCtx_free + }; + + return &lzo; + } + else if (!strcmp(name, "ucl")) + { + static const struct encoder ucl = { + .encfunc = uclenc + }; + + return &ucl; + } + /*else if (!strcmp(name, "zx7")) + { + static const struct encoder zx7 = { + .encfunc = zx7enc + }; + + return &zx7; + }*/ + else if (!strcmp(name, "zlib")) + { + static const struct encoder zlib = { + .encfunc = zlibenc + }; + + return &zlib; + } + else if (!strcmp(name, "aplib")) + { + static const struct encoder aplib = { + .encfunc = aplenc + }; + + return &aplib; + } + else + die("unknown compression codec '%s'", name); + + return 0; +} + + +/* sort dma array by start, ascending */ +static int sortfunc_dma_start_ascend(const void *_a, const void *_b) +{ + const struct dma *a = _a; + const struct dma *b = _b; + + if (a->start < b->start) + return -1; + + else if (a->start > b->start) + return 1; + + return 0; +} + + +/* sort dma array by size, descending */ +static int sortfunc_dma_size_descend(const void *_a, const void *_b) +{ + const struct dma *a = _a; + const struct dma *b = _b; + + unsigned int a_len = a->end - a->start; + unsigned int b_len = b->end - b->start; + + if (a_len < b_len) + return 1; + + else if (a_len > b_len) + return -1; + + return 0; +} + + +/* enter a directory (will be created if it doesn't exist) */ +static void dir_enter(const char *dir) +{ + /* unable to enter directory */ + if (wow_chdir(dir)) + { + /* attempt to create directory */ + if (wow_mkdir(dir)) + die("failed to create directory '%s'", dir); + + if (wow_chdir(dir)) + die("failed to enter directory '%s'", dir); + } +} + + +static void report_progress( + struct rom *rom + , const char *codec + , int v + , int total +) +{ + /* caching enabled */ + if (rom->cache) + fprintf( + printer + , "\r""updating '%s/%s' %d/%d: " + , rom->cache + , codec + , v + , total + ); + + else + fprintf( + printer + , "\r""compressing file %d/%d: " + , v + , total + ); +} + +/* compress a list of files */ +static void dma_compress( + struct rom *rom + , void *compbuf + , int encfunc( + void *src + , unsigned src_sz + , void *dst + , unsigned *dst_sz + , void *_ctx + ) + , const char *codec + , char *dot_codec + , struct folder *list + , int stride /* number of entries to advance each time */ + , int ofs /* starting entry in list */ + , int report /* report progress to stderr (last thread only) */ + , void *ctx /* compression context */ + , bool matching +) +{ + struct dma *dma; + struct fldr_item *item; + + for (dma = rom->dma + ofs + ; (unsigned)(dma - rom->dma) < rom->dma_num + ; dma += stride + ) + { + char *iname = 0; + unsigned char *data = rom->data + dma->start; + unsigned char checksum[64]; + char readable[64]; + int len = dma->end - dma->start; + + /* report the progress */ + if (report) + report_progress(rom, codec, PROGRESS_A_B); + + /* skip files that have a size of 0 */ + if (dma->start == dma->end) + continue; + + /* caching is disabled, just compress */ + if (!rom->cache) + { + int err; + dma->compbuf = compbuf; + + /* don't compress this file */ + if (!dma->compress) + { + dma->compSz = dma->end - dma->start; + dma->compbuf = memdup_safe( + rom->data + dma->start + , dma->compSz + ); + continue; + } + + err = + encfunc( + rom->data + dma->start + , dma->end - dma->start + , dma->compbuf + , &dma->compSz + , ctx + ); + + /* file doesn't benefit from compression */ + if (!matching && dma->compSz >= dma->end - dma->start) + { + dma->compSz = dma->end - dma->start; + dma->compbuf = memdup_safe( + rom->data + dma->start + , dma->compSz + ); + dma->compress = 0; + } + else + dma->compbuf = memdup_safe(dma->compbuf, dma->compSz); + + if (err) + die("compression error"); + + /* the rest of the loop applies only to caches */ + continue; + } + + /* get readable checksum name */ + stb_sha1(checksum, data, len); + stb_sha1_readable(readable, checksum); + + /* see if item already exists in folder */ + item = folder_findNameNoExt(list, readable); + if (item) + { + /* use full file name, including extension */ + iname = item->name; + + /* it exists, so use udata to mark the file as used */ + item->udata = dot_codec; + dma->compSz = file_size(iname); + + /* uncompressed file */ + if (strstr(iname, ".raw")) + dma->compress = 0; + } + /* item doesn't exist, so create it */ + else + { + void *out = compbuf; + unsigned out_sz; + int err; + + /* file not marked for compression */ + if (!dma->compress) + { + out = rom->data + dma->start; + out_sz = dma->end - dma->start; + dma->compress = 0; + strcat(readable, ".raw"); + + /* write file */ + if (file_write(readable, out, out_sz) != out_sz) + die("error writing file 'cache/%s/%s'", codec, readable); + + dma->compSz = out_sz; + dma->compname = strdup_safe(readable); + + /* the remaining block applies only to compressed files */ + continue; + } + + err = + encfunc( + rom->data + dma->start + , dma->end - dma->start + , out + , &out_sz + , ctx + ); + + if (err) + die("compression error"); + + /* file doesn't benefit from compression */ + if (!matching && out_sz >= dma->end - dma->start) + { + out = rom->data + dma->start; + out_sz = dma->end - dma->start; + dma->compress = 0; + strcat(readable, ".raw"); + } + /* file benefits from compression */ + else + /* add encoding as extension, ex '.yaz' */ + strcat(readable, dot_codec); + + /* write file */ + if (file_write(readable, out, out_sz) != out_sz) + die("error writing file 'cache/%s/%s'", codec, readable); + + dma->compSz = out_sz; + iname = readable; + } + + /* back up compressed filename to + * avoid having to re-checksum later + */ + dma->compname = strdup_safe(iname); + } +} + + +static void *dma_compress_threadfunc(void *_CT) +{ + struct compThread *CT = _CT; + + dma_compress( + CT->rom + , CT->data + , CT->encfunc + , CT->codec + , CT->dot_codec + , CT->list + , CT->stride + , CT->ofs + , CT->report + , CT->ctx + , CT->matching + ); + + return 0; +} + + +static void dma_compress_thread( + struct compThread *CT + , struct rom *rom + , void *compbuf + , int encfunc( + void *src + , unsigned src_sz + , void *dst + , unsigned *dst_sz + , void *_ctx + ) + , const char *codec + , char *dot_codec + , struct folder *list + , int stride /* number of entries to advance each time */ + , int ofs /* starting entry in list */ + , int report /* report progress to stderr (last thread only) */ + , void *ctx /* compression context */ + , bool matching +) +{ + CT->rom = rom; + CT->data = compbuf; + CT->encfunc = encfunc; + CT->codec = codec; + CT->dot_codec = dot_codec; + CT->list = list; + CT->stride = stride; + CT->ofs = ofs; + CT->report = report; + CT->ctx = ctx; + CT->matching = matching; + + if (pthread_create(&CT->pt, 0, dma_compress_threadfunc, CT)) + die("threading error"); +} + + +/* get dma entry by original index (useful after reordering) */ +static struct dma *dma_get_idx(struct rom *rom, unsigned idx) +{ + struct dma *dma; + + assert(idx < rom->dma_num && "dma index too high"); + + /* walk dma list for matching index */ + DMA_FOR_EACH + { + if (dma->index == idx) + break; + } + + return dma; +} + + +/* write 'num' bytes to rom and advance */ +static void rom_write(struct rom *rom, void *data, int sz) +{ + unsigned char *raw; + + assert(rom); + assert(rom->data); + assert(data); + assert(sz); + + if (rom->ofs + sz > rom->data_sz) + die( + "can't write %d bytes at 0x%X b/c it exceeds rom size" + , sz, rom->ofs + ); + + raw = rom->data + rom->ofs; + + memcpy(raw, data, sz); + + rom->ofs += sz; +} + + +/* write 32 bit value to rom and advance */ +static void rom_write32(struct rom *rom, unsigned int value) +{ + unsigned char raw[4]; + + raw[0] = value >> 24; + raw[1] = value >> 16; + raw[2] = value >> 8; + raw[3] = value; + + rom_write(rom, raw, 4); +} + + +/* write dma table into rom */ +static void rom_write_dmadata(struct rom *rom) +{ + struct dma *dma; + int num; + int numUsed; + + assert(rom); + assert(rom->dma); + assert(rom->dma_raw); + + dma = rom->dma; + num = rom->dma_num; + + /* sort all entries by size, descending */ + DMASORT(rom, sortfunc_dma_size_descend); + + /* find first entry where size == 0 (aka first unused entry) */ + for (dma = rom->dma; dma - rom->dma < num; ++dma) + if (dma->start == dma->end) + break; + numUsed = dma - rom->dma; + + /* sort all used entries by start address, ascending */ + DMASORT_N(rom, sortfunc_dma_start_ascend, numUsed); + + /* + * at this point, unused entries have been moved to the end + */ + + /* zero the table */ + memset(rom->dma_raw, 0, num * 16); + + /* write every entry */ + rom->ofs = rom->dma_raw - rom->data; + for (dma = rom->dma; dma - rom->dma < num; ++dma) + { + rom_write32(rom, dma->start); + rom_write32(rom, dma->end); + rom_write32(rom, dma->Pstart); + rom_write32(rom, dma->Pend); + + /* early end condition: all entries have been written */ + if (!dma->end) + break; + } +} + + +/* + * + * public functions + * + */ + +/* compress rom using specified algorithm */ +void rom_compress(struct rom *rom, int mb, int numThreads, bool matching) +{ + struct dma *dma; + struct folder *list = 0; + struct fldr_item *item; + char *dot_codec = 0; + const char *codec; + char cwd[4096] = {0}; + char cache_codec[4096] = {0}; + const char *cache; + const struct encoder *enc = 0; + unsigned int compsz = mb * 0x100000; + unsigned int comp_total = 0; + unsigned int largest_compress = 1024; + float total_compressed = 0; + float total_decompressed = 0; + struct compThread *compThread = 0; + int dma_num = rom->dma_num; + int i; + + assert(rom); + assert(rom->dma); + assert(rom->dma_ready); + assert(rom->is_comp == 0 && "rom_compressed called more than once"); + + rom->is_comp = 1; + + if (numThreads <= 0) + numThreads = 1; + + /* default codec = yaz */ + if (!(codec = rom->codec)) + codec = "yaz"; + + cache = rom->cache; + + if (compsz > rom->data_sz || mb < 0) + die("invalid mb argument %d", mb); + + /* get encoding functions */ + enc = encoder(codec); + + /* restore original start/end for nonexistent files */ + DMA_FOR_EACH + { + if (dma->deleted) + { + dma->start = dma->Ostart; + dma->end = dma->Oend; + dma->compress = 0; /* deleted files don't compress */ + } + } + + /* sort dma entries by size, descending */ + DMASORT(rom, sortfunc_dma_size_descend); + + /* locate largest file that will be compressed */ + DMA_FOR_EACH + { + if (dma->compress && dma->end - dma->start > largest_compress) + largest_compress = dma->end - dma->start; + } + + /* no file should compress to over 2x its uncompressed size */ + largest_compress *= 2; + + /* allocate compression buffer for each thread */ + compThread = calloc_safe(numThreads, sizeof(*compThread)); + for (i = 0; i < numThreads; ++i) + { + compThread[i].data = malloc_safe(largest_compress); + + /* allocate compression contexts (if applicable) */ + if (enc->ctx_new) + { + compThread[i].ctx = enc->ctx_new(); + if (!compThread[i].ctx) + die("memory error"); + } + } + + /* if using compression cache */ + if (cache) + { + sprintf(cache_codec, "%s/%s/", cache, codec); + + /* store current working directory for later */ + wow_getcwd_safe(cwd, sizeof(cwd)); + + /* create and enter cache folder */ + dir_enter(cache); + + /* create and enter directory for the encoding algorithm */ + dir_enter(codec); + + /* make a '.yaz' string from 'yaz' */ + dot_codec = malloc_safe(strlen(codec) + 1/*'.'*/ + 1/*'\0'*/); + strcpy(dot_codec, "."); + strcat(dot_codec, codec); + + /* get list of all files in current working directory */ + list = folder_new(); + } + + /* now compress every compressible file */ + if (numThreads <= 1) + { + dma_compress( + rom + , compThread[0].data + , enc->encfunc + , codec + , dot_codec + , list + , 1 /* stride */ + , 0 /* ofs */ + , 1 /* report */ + , compThread[0].ctx + , matching + ); + } + else + { + /* spawn threads */ + for (i = 0; i < numThreads; ++i) + { + dma_compress_thread( + &compThread[i] + , rom + , compThread[i].data + , enc->encfunc + , codec + , dot_codec + , list + , numThreads /* stride */ + , i /* ofs */ + , (i+1)==numThreads /* report */ + , compThread[i].ctx + , matching + ); + } + + /* wait for all threads to complete */ + for (i = 0; i < numThreads; ++i) + { + if (pthread_join(compThread[i].pt, NULL)) + die("threading error"); + } + } + + /* all files now compressed */ + report_progress(rom, codec, PROGRESS_A_B); + fprintf(printer, "success!\n"); + + /* sort by original start, ascending */ + DMASORT(rom, sortfunc_dma_start_ascend); + + /* determine physical addresses for each segment */ + comp_total = 0; + DMA_FOR_EACH + { + char *fn = dma->compname; + unsigned int sz; + unsigned int sz16; + + if (dma->deleted) + continue; + + /* cached file logic */ + if (cache) + { + /* skip entries that don't reference compressed files */ + if (!fn) + continue; + + sz = dma->compSz; + + /* sz == 0 */ + if (!sz) + die("'%s/%s/%s' file size == 0", cache, codec, fn); + } + + /* in-memory file logic */ + else + { + /* skip entries that don't reference compressed data */ + sz = dma->compSz; + if (!sz) + continue; + } + + /* ensure we remain 16-byte-aligned after advancing */ + sz16 = sz; + if (sz16 & 15) + sz16 += 16 - (sz16 & 15); + + dma->Pstart = comp_total; + if (dma->compress) + { + dma->Pend = dma->Pstart + sz16; + + /* compressed file ratio variables */ + total_compressed += sz16; + total_decompressed += dma->end - dma->start; + } + else + dma->Pend = 0; + comp_total += sz16; + + if (mb != 0 && dma->Pend > compsz) + die("ran out of compressed rom space (try increasing --mb)"); + } + + /* adaptive final size */ + if (mb == 0) + compsz = ALIGN8MB(comp_total); + + if (matching) + { + /* fill the entire (compressed) rom space with 00010203...FF... + in order to match retail rom padding */ + unsigned char n = 0; /* will intentionally overflow */ + for (unsigned int j = 0; j < compsz; j++, n++) + { + rom->data[j] = n; + } + } + else + { + /* zero the entire (compressed) rom space */ + memset(rom->data, 0, compsz); + } + + /* inject compressed files */ + comp_total = 0; + DMA_FOR_EACH + { + unsigned char *dst; + char *fn = dma->compname; + unsigned int sz; + fprintf(printer, "\r""injecting file %d/%d: ", PROGRESS_A_B); + + if (dma->deleted) + continue; + + dst = rom->data + dma->Pstart; + + /* external cached file logic */ + if (cache) + { + /* skip entries that don't reference compressed files */ + if (!fn) + continue; + + /* load file into rom at offset */ + dst = file_load_into(cache_codec, fn, &sz, dst); + } + /* otherwise, a simple memcpy */ + else + { + memcpy(dst, dma->compbuf, dma->compSz); + sz = dma->compSz; + } + + if (matching) + { + /* since matching rom padding is not zero but file padding is zero, + fill file padding space with zeros */ + memset(dst + sz, 0, ALIGN16(sz) - sz); + } + } + fprintf(printer, "\r""injecting file %d/%d: ", dma_num, dma_num); + fprintf(printer, "success!\n"); + + fprintf( + printer + , "compression ratio: %.02f%%\n" + , (total_compressed / total_decompressed) * 100.0f + ); + + /* now free compressed file names */ + DMA_FOR_EACH + { + if (dma->compname) + free(dma->compname); + } + + /* remove unused cache files */ + if (list) + { + for (item = list->item; item - list->item < list->num; ++item) + { + /* udata hasn't been marked, so file is unused */ + if (item->name && !item->udata) + { + if (remove(item->name)) + die("failed to remove '%s/%s/%s'" + , cache, codec, item->name + ); + } + } + } + + /* update rom size for when rom_save() is used */ + rom->data_sz = compsz; + + /* cleanup */ + DMA_FOR_EACH + { + /* zero starts/ends of deleted files */ + if (!matching && dma->deleted) + { + dma->start = 0; + dma->end = 0; + dma->Pstart = 0; + dma->Pend = 0; + } + + /* free any compbufs */ + if (dma->compbuf) + free(dma->compbuf); + dma->compSz = 0; + dma->compbuf = 0; + } + if (list) + folder_free(list); + if (dot_codec) + free(dot_codec); + for (i = 0; i < numThreads; ++i) + { + free(compThread[i].data); + + /* free compression contexts (if applicable) */ + if (enc->ctx_free) + { + assert(compThread[i].ctx); + enc->ctx_free(compThread[i].ctx); + } + } + free(compThread); + + /* return to prior working directory */ + if (*cwd) + wow_chdir(cwd); +} + + +/* specify start of dmadata and number of entries */ +void rom_dma(struct rom *rom, unsigned int offset, int num_entries, bool matching) +{ + struct dma *dma; + unsigned char *raw; + + assert(rom); + assert(rom->data); + assert(rom->dma == 0 && "called rom_dma() more than once"); + + if (num_entries <= 0) + die("invalid number of dma entries %d", num_entries); + + dma = calloc_safe(num_entries, sizeof(*dma)); + rom->dma = dma; + rom->dma_num = num_entries; + + raw = rom->data + offset; + rom->dma_raw = raw; + + /* initialize every entry */ + while (dma - rom->dma < num_entries) + { + /* propagate defaults */ + dma->index = dma - rom->dma; + dma->start = get32(raw); + dma->end = get32(raw + 4); + dma->Pstart = get32(raw + 8); + dma->Pend = get32(raw + 12); + dma->Ostart = dma->start; + dma->Oend = dma->end; + dma->compress = 0; /* compression off by default */ + + /* nonexistent file */ + if (dma->Pstart == DMA_DELETED && dma->Pend == DMA_DELETED) + { + dma->deleted = 1; + + if (!matching) + { + dma->start = 0; + dma->end = 0; + dma->Ostart = 0; + dma->Oend = 0; + dma->Pstart = 0; + dma->Pend = 0; + } + } + + /* invalid dma conditions */ + else if ( + (dma->Pend & 3) /* not 4-byte aligned */ + || (dma->Pstart & 3) + || (dma->start & 3) + || (dma->end & 3) + || dma->start > dma->end + || (dma->Pstart > dma->Pend && dma->Pend) + || dma->Pend > rom->data_sz + ) + { + die( + "invalid dma entry encountered: %08X %08X %08X %08X" + , dma->start, dma->end, dma->Pstart, dma->Pend + ); + } + + /* rom is compressed */ + if (dma->Pend && dma->Pend != DMA_DELETED) + { + die( + "encountered dma entry %08X %08X %08X %08X" + ", which suggests the rom is already compressed...\n" + "now exiting..." + , dma->start, dma->end, dma->Pstart, dma->Pend + ); + } + + /* advance to next entry */ + raw += 16; + dma += 1; + } +} + +/* call this once dma settings are finalized */ +void rom_dma_ready(struct rom *rom, bool matching) +{ + struct dma *dma; + int num; + unsigned int lowest = 0; + unsigned int highest_end = 0; /* highest end dma offset */ + + assert(rom); + assert(rom->data); + assert(rom->dma); + assert(rom->dma_ready == 0 && "dma_ready called more than once"); + + dma = rom->dma; + num = rom->dma_num; + + /* sort by start offset, ascending */ + DMASORT(rom, sortfunc_dma_start_ascend); + + /* confirm no entries overlap */ + for (dma = rom->dma ; dma - rom->dma < num; ++dma) + { + /* skip blank entries */ + if (!dma->start && !dma->end) + continue; + + /* warn on empty files */ + if (dma->end == dma->start) + { + fprintf( + printer + , "warning: dma entry %d is empty file (%08X == %08X)\n" + , dma->index, dma->start, dma->end + ); + dma->Pstart = dma->Pend = DMA_DELETED; + } + + /* nonexistent file */ + if (dma->Pstart == DMA_DELETED && dma->Pend == DMA_DELETED) + { + dma->deleted = 1; + + if (!matching) + { + dma->Ostart = 0; + dma->Oend = 0; + dma->start = 0; + dma->end = 0; + dma->compress = 0; + } + continue; + } + + /* fatal error on entries where end < start */ + if (dma->end < dma->start) + die( + "dma invalid entry %d (%08X < %08X)" + , dma->index, dma->end, dma->start + ); + + /* fatal error on unaligned entries */ + if ((dma->start & 3) || (dma->end & 3)) + die( + "dma unaligned pointer (%08X %08X)" + , dma->start + , dma->end + ); + + /* fatal error on entries exceeding rom size */ + if (dma->end > rom->data_sz) + die( + "dma entry %d (%08X - %08X) exceeds rom size (%08X)" + , dma->index, dma->start, dma->end, rom->data_sz + ); + + /* if at least one entry has been processed, and its + * start is lower than any of the previous ends + */ + if (dma > rom->dma && dma->start < lowest) + die( + "dma table entry %d (%08X - %08X) " + "overlaps entry %d (%08X - %08X)" + , dma->index, dma->start, dma->end + , (dma-1)->index, (dma-1)->start, (dma-1)->end + ); + + /* store highest dma end offset */ + if (dma->end > highest_end) + highest_end = dma->end; + + /* lowest acceptable start for next entry is end of current */ + lowest = dma->end; + } + + /* note dma_ready() has been called */ + rom->dma_ready = 1; +} + +/* reencode existing archives within rom + * NOTE: must be used before dma_ready() + */ +/* TODO optimization opportunities: threading, caching */ +void rom_dma_repack( + struct rom *rom + , unsigned start + , unsigned end + , const char *from /* old codec */ + , const char *to /* new codec */ +) +{ + const struct encoder *enc = 0; + int (*decfunc)( + void *src, void *dst, unsigned dstSz, unsigned *srcSz + ) = 0; + void *ctx = 0; + + assert(rom); + assert(rom->data); + assert(rom->dma); + assert(rom->dma_ready == 0 && "dma_repack must precede dma_ready"); + + /* default codec = yaz */ + if (!from) + from = "yaz"; + if (!(to = rom->codec)) + to = "yaz"; + + /* swap start and end if they are not in ascending order */ + if (end < start) + { + int t = end; + end = start; + start = t; + } + + /* allocate compression buffers, 16 mb */ + if (!rom->mem.mb16) + rom->mem.mb16 = malloc_safe(SIZE_16MB); + if (!rom->mem.mb4) + rom->mem.mb4 = malloc_safe(SIZE_4MB); + + /* no need to reencode when the codec is the same */ + if (!strcmp(from, to)) + return; + + /* get decoding function */ + if (!strcmp(from, "yaz")) + { + from = "Yaz0"; + decfunc = yazdec; + } + else if (!strcmp(from, "raw")) + { + from = "raw0"; + } + else + die("dma_repack from='%s' unsupported", from); + + /* get encoding function */ + enc = encoder(to); + + /* allocate compression context (if applicable) */ + if (enc->ctx_new) + { + ctx = enc->ctx_new(); + if (!ctx) + die("memory error"); + } + + /* start <= idx <= end */ + while (start <= end && start < rom->dma_num) + { + struct dma *dma = dma_get_idx(rom, start); + + unsigned char *dst = rom->data + dma->start; + const char *errstr; + unsigned int Osz = dma->end - dma->start; + unsigned int Nsz; + char name[32]; + + dma->compress = 0; + + sprintf(name, "%08X", dma->start); + + errstr = + yar_reencode( + dst + , Osz + , rom->mem.mb16 + , &Nsz + , 4 + + , name + , from + , rom->mem.mb4 + , ctx + + , decfunc + , enc->encfunc + , 0 + ); + + /* fatal error */ + if (errstr) + die("%s", errstr); + + /* repacked archive won't fit in place of original archive */ + if (Nsz > Osz) + die("repacking failed, new archive 0x%X bytes too big" + , Nsz - Osz + ); + + /* copy encoded file into rom */ + memcpy(dst, rom->mem.mb16, Nsz); + + /* file sizes changed */ + fprintf(printer, "%.2f kb saved!\n", ((float)(Osz-Nsz))/1000.0f); + + dma->end = dma->start + Nsz; + + start += 1; + } + + /* free compression context (if applicable) */ + if (enc->ctx_free) + { + assert(ctx); + enc->ctx_free(ctx); + } +} + + +/* set compression flag on indices start <= idx <= end */ +void rom_dma_compress( + struct rom *rom + , unsigned start + , unsigned end + , int comp +) +{ + assert(rom); + assert(rom->data); + assert(rom->dma); + assert(rom->dma_ready == 0 && "dma_compress must precede dma_ready"); + + /* swap start and end if they are not in ascending order */ + if (end < start) + { + int t = end; + end = start; + start = t; + } + + /* start <= idx <= end */ + while (start <= end && start < rom->dma_num) + { + struct dma *dma = rom->dma + start; + + dma->compress = comp; + start += 1; + } +} + + +/* set rom compressed file cache directory */ +void rom_set_cache(struct rom *rom, const char *cache) +{ + assert(rom); + assert(cache); + + if (rom->cache) + free(rom->cache); + + rom->cache = strdup_safe(cache); +} + +/* get number of dma entries */ +int rom_dma_num(struct rom *rom) +{ + assert(rom); + + return rom->dma_num; +} + +/* set rom compression codec + * valid options: "yaz", "lzo", "ucl", "aplib" + * NOTE: to use codecs besides yaz, get patches from the z64enc repo + */ +void rom_set_codec(struct rom *rom, const char *codec) +{ + assert(rom); + assert(codec); + + if (rom->codec) + free(rom->codec); + + rom->codec = strdup_safe(codec); +} + +/* save rom to disk using specified filename */ +void rom_save(struct rom *rom, const char *fn) +{ + assert(rom); + assert(rom->data); + + /* updates dmadata */ + rom_write_dmadata(rom); + + /* recalculate crc */ + n64crc(rom->data); + + if (file_write(fn, rom->data, rom->data_sz) != rom->data_sz) + die("failed to write file '%s'", fn); +} + +/* allocate a rom structure */ +struct rom *rom_new(const char *fn) +{ + struct rom *dst; + + assert(fn); + + /* allocate destination rom structure */ + dst = calloc_safe(1, sizeof(*dst)); + + /* propagate rom file */ + dst->data = file_load(fn, &dst->data_sz); + + /* double its bounds just in case compressed rom is larger + * (this can happen if, say, a 23mb rom is provided, + * gets compressed to 17mb, and is rounded up to 24mb) + * (retail rom sizes always use increments of 8) + */ + dst->data_sz *= 2; + dst->data = realloc(dst->data, dst->data_sz); + + /* back up load file name */ + dst->fn = strdup_safe(fn); + + return dst; +} + +/* free a rom structure */ +void rom_free(struct rom *rom) +{ + if (!rom) + return; + + if (rom->codec) + free(rom->codec); + + if (rom->data) + free(rom->data); + + if (rom->dma) + free(rom->dma); + + if (rom->cache) + free(rom->cache); + + if (rom->fn) + free(rom->fn); + + /* free any memory pools that were allocated */ + if (rom->mem.mb16) + free(rom->mem.mb16); + if (rom->mem.mb4) + free(rom->mem.mb4); + + free(rom); +} + diff --git a/tools/z64compress/src/rom.h b/tools/z64compress/src/rom.h new file mode 100644 index 000000000..dbc6eaa43 --- /dev/null +++ b/tools/z64compress/src/rom.h @@ -0,0 +1,62 @@ +/* + * rom.h + * + * functions for compression magic reside herein + * + * z64me + * + */ + +#ifndef Z64COMPRESS_ROM_H_INCLUDED +#define Z64COMPRESS_ROM_H_INCLUDED + +/* opaque definition */ +struct rom; + +/* allocate a rom structure and load rom file */ +struct rom *rom_new(const char *fn); + +/* free a rom structure */ +void rom_free(struct rom *rom); + +/* save rom to disk using specified filename */ +void rom_save(struct rom *rom, const char *fn); + +/* compress rom using specified algorithm */ +void rom_compress(struct rom *rom, int mb, int numThreads, bool matching); + +/* specify start of dmadata and number of entries */ +void rom_dma(struct rom *rom, unsigned int offset, int num_entries, bool matching); + +/* call this once dma settings are finalized */ +void rom_dma_ready(struct rom *rom, bool matching); + +/* set compression flag on indices start <= idx <= end */ +void +rom_dma_compress(struct rom *rom, unsigned start, unsigned end, int comp); + +/* reencode existing archives within rom + * NOTE: must be used before dma_ready() + */ +void rom_dma_repack( + struct rom *rom + , unsigned start + , unsigned end + , const char *from /* old codec */ + , const char *to /* new codec */ +); + +/* get number of dma entries */ +int rom_dma_num(struct rom *rom); + +/* set rom compression codec + * valid options: "yaz", "lzo", "ucl", "aplib" + * NOTE: to use codecs besides yaz, get patches from the z64enc repo + */ +void rom_set_codec(struct rom *rom, const char *codec); + +/* set rom compressed file cache directory */ +void rom_set_cache(struct rom *rom, const char *cache); + +#endif /* Z64COMPRESS_ROM_H_INCLUDED */ + diff --git a/tools/z64compress/src/sha1.c b/tools/z64compress/src/sha1.c new file mode 100644 index 000000000..07b068dd6 --- /dev/null +++ b/tools/z64compress/src/sha1.c @@ -0,0 +1,141 @@ +#include +#include +#include + +#define stb_big32(c) (((c)[0]<<24) + (c)[1]*65536 + (c)[2]*256 + (c)[3]) + +static void stb__sha1(unsigned char *chunk, unsigned h[5]) +{ + int i; + unsigned a,b,c,d,e; + unsigned w[80]; + + for (i=0; i < 16; ++i) + w[i] = stb_big32(&chunk[i*4]); + for (i=16; i < 80; ++i) { + unsigned t; + t = w[i-3] ^ w[i-8] ^ w[i-14] ^ w[i-16]; + w[i] = (t + t) | (t >> 31); + } + + a = h[0]; + b = h[1]; + c = h[2]; + d = h[3]; + e = h[4]; + + #define STB__SHA1(k,f) \ + { \ + unsigned temp = (a << 5) + (a >> 27) + (f) + e + (k) + w[i]; \ + e = d; \ + d = c; \ + c = (b << 30) + (b >> 2); \ + b = a; \ + a = temp; \ + } + + i=0; + for (; i < 20; ++i) STB__SHA1(0x5a827999, d ^ (b & (c ^ d)) ); + for (; i < 40; ++i) STB__SHA1(0x6ed9eba1, b ^ c ^ d ); + for (; i < 60; ++i) STB__SHA1(0x8f1bbcdc, (b & c) + (d & (b ^ c)) ); + for (; i < 80; ++i) STB__SHA1(0xca62c1d6, b ^ c ^ d ); + + #undef STB__SHA1 + + h[0] += a; + h[1] += b; + h[2] += c; + h[3] += d; + h[4] += e; +} + +void stb_sha1(unsigned char output[20], unsigned char *buffer, unsigned len) +{ + unsigned char final_block[128]; + unsigned end_start, final_len, j; + int i; + + unsigned h[5]; + + h[0] = 0x67452301; + h[1] = 0xefcdab89; + h[2] = 0x98badcfe; + h[3] = 0x10325476; + h[4] = 0xc3d2e1f0; + + // we need to write padding to the last one or two + // blocks, so build those first into 'final_block' + + // we have to write one special byte, plus the 8-byte length + + // compute the block where the data runs out + end_start = len & ~63; + + // compute the earliest we can encode the length + if (((len+9) & ~63) == end_start) { + // it all fits in one block, so fill a second-to-last block + end_start -= 64; + } + + final_len = end_start + 128; + + // now we need to copy the data in + assert(end_start + 128 >= len+9); + assert(end_start < len || len < 64-9); + + j = 0; + if (end_start > len) + j = (unsigned) - (int) end_start; + + for (; end_start + j < len; ++j) + final_block[j] = buffer[end_start + j]; + final_block[j++] = 0x80; + while (j < 128-5) // 5 byte length, so write 4 extra padding bytes + final_block[j++] = 0; + // big-endian size + final_block[j++] = len >> 29; + final_block[j++] = len >> 21; + final_block[j++] = len >> 13; + final_block[j++] = len >> 5; + final_block[j++] = len << 3; + assert(j == 128 && end_start + j == final_len); + + for (j=0; j < final_len; j += 64) { // 512-bit chunks + if (j+64 >= end_start+64) + stb__sha1(&final_block[j - end_start], h); + else + stb__sha1(&buffer[j], h); + } + + for (i=0; i < 5; ++i) { + output[i*4 + 0] = h[i] >> 24; + output[i*4 + 1] = h[i] >> 16; + output[i*4 + 2] = h[i] >> 8; + output[i*4 + 3] = h[i] >> 0; + } +} + +// client can truncate this wherever they like +void stb_sha1_readable(char display[30], unsigned char sha[20]) +{ + char encoding[65] = "0123456789abcdefghijklmnopqrstuv" + "wxyzABCDEFGHIJKLMNOPQRSTUVWXYZ#$"; + int num_bits = 0, acc=0; + int i=0,o=0; + while (o < 26) { + int v; + // expand the accumulator + if (num_bits < 6) { + assert(i != 20); + acc += sha[i++] << num_bits; + num_bits += 8; + } + v = acc & ((1 << 6) - 1); + display[o++] = encoding[v]; + acc >>= 6; + num_bits -= 6; + } + assert(num_bits == 20*8 - 26*6); + display[o++] = '\0'; +} + diff --git a/tools/z64compress/src/sha1.h b/tools/z64compress/src/sha1.h new file mode 100644 index 000000000..3c7e0dc23 --- /dev/null +++ b/tools/z64compress/src/sha1.h @@ -0,0 +1,8 @@ +#ifndef STB_SHA1_H_INCLUDED +#define STB_SHA1_H_INCLUDED + +void stb_sha1(unsigned char output[20], unsigned char *buffer, unsigned len); +void stb_sha1_readable(char display[30], unsigned char sha[20]); + +#endif /* STB_SHA1_H_INCLUDED */ + diff --git a/tools/z64compress/src/wow.c b/tools/z64compress/src/wow.c new file mode 100644 index 000000000..21b2c671d --- /dev/null +++ b/tools/z64compress/src/wow.c @@ -0,0 +1,3 @@ +#define WOW_IMPLEMENTATION +#include "wow.h" + diff --git a/tools/z64compress/src/wow.h b/tools/z64compress/src/wow.h new file mode 100644 index 000000000..cc2d88cd3 --- /dev/null +++ b/tools/z64compress/src/wow.h @@ -0,0 +1,769 @@ +/* + * wow.h + * + * a small collection of functions + * to make writing software easier + * + * z64me + * + */ + +#ifndef WOW_H_INCLUDED +#define WOW_H_INCLUDED + +#include /* size_t */ +#include /* file ops */ +#include /* alloc */ +#include /* stat */ +#include /* strdup */ +#include /* chdir, getcwd */ +#include +#include + +#ifdef _WIN32 + #include + #undef near + #undef far +#endif + + +#if (_WIN32 && UNICODE) + #define wow_main int wmain(int argc, wchar_t *Wargv[]) + #define wow_main_argv char **argv = wow_conv_args(argc, (void*)Wargv) +#else + #define wow_main int main(int argc, char *argv[]) + #define wow_main_argv do{}while(0) +#endif + + +#ifndef WOW_API_PREFIX + #define WOW_API_PREFIX +#endif + +WOW_API_PREFIX +void * +wow_utf8_to_wchar(const char *str); + +WOW_API_PREFIX +char * +wow_wchar_to_utf8(void *wstr); + + +/* converts argv[] from wchar to char win32, in place */ +WOW_API_PREFIX +void * +wow_conv_args(int argc, void *argv[]); + + +/* returns non-zero if path is a directory */ +WOW_API_PREFIX +int +wow_is_dir_w(void const *path); + + +/* returns non-zero if path is a directory */ +WOW_API_PREFIX +int +wow_is_dir(char const *path); + + +/* fread abstraction that falls back to buffer-based fread * + * if a big fread fails; if that still fails, returns 0 */ +WOW_API_PREFIX +size_t +wow_fread_bytes(void *ptr, size_t bytes, FILE *stream); + + +/* fwrite abstraction that falls back to buffer-based fwrite * + * if a big fwrite fails; if that still fails, returns 0 */ +WOW_API_PREFIX +size_t +wow_fwrite_bytes(const void *ptr, size_t bytes, FILE *stream); + + +/* fread abstraction that falls back to buffer-based fread * + * if a big fread fails; if that still fails, returns 0 */ +WOW_API_PREFIX +size_t +wow_fread(void *ptr, size_t size, size_t nmemb, FILE *stream); + + +/* fwrite abstraction that falls back to buffer-based fwrite * + * if a big fwrite fails; if that still fails, returns 0 */ +WOW_API_PREFIX +size_t +wow_fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream); + + +/* fopen abstraction for utf8 support on windows win32 */ +WOW_API_PREFIX +FILE * +wow_fopen(char const *name, char const *mode); + + +/* remove abstraction for utf8 support on windows win32 */ +WOW_API_PREFIX +int +wow_remove(char const *path); + + +/* mkdir */ +WOW_API_PREFIX +int +wow_mkdir(char const *path); + + +/* chdir */ +WOW_API_PREFIX +int +wow_chdir(char const *path); + + +/* getcwd */ +WOW_API_PREFIX +char * +wow_getcwd(char *buf, size_t size); + + +/* getcwd_safe */ +WOW_API_PREFIX +char * +wow_getcwd_safe(char *buf, size_t size); + + +/* system */ +WOW_API_PREFIX +int +wow_system(char const *path); + +WOW_API_PREFIX void die(const char *fmt, ...) + __attribute__ ((format (printf, 1, 2))) +; +WOW_API_PREFIX void *calloc_safe(size_t nmemb, size_t size); +WOW_API_PREFIX void *malloc_safe(size_t size); +WOW_API_PREFIX void *realloc_safe(void *ptr, size_t size); +WOW_API_PREFIX char *strdup_safe(const char *s); +WOW_API_PREFIX void *memdup_safe(void *ptr, size_t size); + +#ifdef WOW_IMPLEMENTATION + +WOW_API_PREFIX void die(const char *fmt, ...) +{ + va_list args; + va_start(args, fmt); +#ifdef UNICODE + char buf[4096]; + vsprintf(buf, fmt, args); + wchar_t *wc = wow_utf8_to_wchar(buf); + setlocale(LC_ALL, ""); + fwprintf(stderr, L"%ls", wc); + free(wc); +#else + vfprintf(stderr, fmt, args); +#endif + va_end(args); + fprintf(stderr, "\n"); + exit(EXIT_FAILURE); +} + +WOW_API_PREFIX void *calloc_safe(size_t nmemb, size_t size) +{ + void *result = calloc(nmemb, size); + + if (!result) + die("memory error"); + + return result; +} + +WOW_API_PREFIX void *malloc_safe(size_t size) +{ + void *result = malloc(size); + + if (!result) + die("memory error"); + + return result; +} + +WOW_API_PREFIX void *realloc_safe(void *ptr, size_t size) +{ + void *result = realloc(ptr, size); + + if (!result) + die("memory error"); + + return result; +} + +WOW_API_PREFIX char *strdup_safe(const char *s) +{ + char *result; + int n; + + if (!s) + return 0; + + n = strlen(s) + 1; + + result = malloc_safe(n); + + strcpy(result, s); + + return result; +} + +WOW_API_PREFIX void *memdup_safe(void *ptr, size_t size) +{ + void *result; + + if (!ptr || !size) + return 0; + + result = malloc_safe(size); + + memcpy(result, ptr, size); + + return result; +} + + +WOW_API_PREFIX +void * +wow_utf8_to_wchar(const char *str) +{ +#ifdef UNICODE +extern __declspec(dllimport) int __stdcall MultiByteToWideChar(unsigned int cp, unsigned long flags, const char *str, int cbmb, wchar_t *widestr, int cchwide); + wchar_t *wstr; + int wstr_sz = (strlen(str) + 1) * 16;//sizeof(*wstr); + wstr = calloc_safe(1, wstr_sz); + MultiByteToWideChar(65001/*utf8*/, 0, str, -1, wstr, wstr_sz); + return wstr; +#else + return strdup(str); +#endif +} + +WOW_API_PREFIX +char * +wow_wchar_to_utf8_buf(void *wstr, void *dst, int dst_max) +{ +#ifdef UNICODE +extern __declspec(dllimport) int __stdcall WideCharToMultiByte(unsigned int cp, unsigned long flags, const wchar_t *widestr, int cchwide, char *str, int cbmb, const char *defchar, int *used_default); + WideCharToMultiByte(65001/*utf8*/, 0, wstr, -1, dst, dst_max, NULL, NULL); + return dst; +#else + (void)dst_max; /* unused parameter */ + return strcpy(dst, wstr); +#endif +} + +WOW_API_PREFIX +char * +wow_wchar_to_utf8(void *wstr) +{ +#ifdef UNICODE +extern __declspec(dllimport) int __stdcall WideCharToMultiByte(unsigned int cp, unsigned long flags, const wchar_t *widestr, int cchwide, char *str, int cbmb, const char *defchar, int *used_default); + char *str; + int str_sz = (wcslen(wstr) + 1) * sizeof(*str); + str = calloc_safe(1, str_sz); + WideCharToMultiByte(65001/*utf8*/, 0, wstr, -1, str, str_sz, NULL, NULL); + return str; +#else + return strdup(wstr); +#endif +} + +WOW_API_PREFIX +char * +wow_wchar_to_utf8_inplace(void *wstr) +{ +#ifdef UNICODE +extern __declspec(dllimport) int __stdcall WideCharToMultiByte(unsigned int cp, unsigned long flags, const wchar_t *widestr, int cchwide, char *str, int cbmb, const char *defchar, int *used_default); + char buf[4096]; + char *str; + int wstr_len = wcslen(wstr); + unsigned str_sz = (wstr_len + 1) * sizeof(*str); + if (str_sz >= sizeof(buf)) + str = malloc_safe(str_sz); + else + str = buf; + WideCharToMultiByte(65001/*utf8*/, 0, wstr, -1, str, str_sz, NULL, NULL); + memcpy(wstr, str, wstr_len + 1); + ((char*)wstr)[wstr_len+1] = '\0'; + if (str != buf) + free(str); + return wstr; +#else + return wstr; +#endif +} + + +/* argument abstraction: converts argv[] from wchar to char win32 */ +WOW_API_PREFIX +void * +wow_conv_args(int argc, void *argv[]) +{ +#ifdef UNICODE + int i; + for (i=0; i < argc; ++i) + { + //fprintf(stderr, "[%d]: %s\n", i, argv[i]); + //fwprintf(stderr, L"[%d]: %s\n", i, (wchar_t*)argv[i]); + argv[i] = wow_wchar_to_utf8_inplace(argv[i]); + //fwprintf(stderr, L"[%d]: %s\n", i, wow_utf8_to_wchar(argv[i])); + } +#else + (void)argc; /* unused parameter */ +#endif + return argv; +} + +/* returns non-zero if path is a directory */ +WOW_API_PREFIX +int +wow_is_dir_w(void const *path) +{ + struct stat s; +#if (_WIN32 && UNICODE) + if (wstat(path, &s) == 0) +#else + if (stat(path, &s) == 0) +#endif + { + if (s.st_mode & S_IFDIR) + return 1; + } + + return 0; +} + + +/* returns non-zero if path is a directory */ +WOW_API_PREFIX +int +wow_is_dir(char const *path) +{ + int rv; + void *wpath = 0; + +#if (_WIN32 && UNICODE) + wpath = wow_utf8_to_wchar(path); + rv = wow_is_dir_w(wpath); +#else + rv = wow_is_dir_w(path); +#endif + if (wpath) + free(wpath); + + return rv; +} + + +/* fread abstraction that falls back to buffer-based fread * + * if a big fread fails; if that still fails, returns 0 */ +WOW_API_PREFIX +size_t +wow_fread_bytes(void *ptr, size_t bytes, FILE *stream) +{ + if (!stream || !ptr || !bytes) + return 0; + + unsigned char *ptr8 = ptr; + size_t Oofs = ftell(stream); + size_t bufsz = 1024 * 1024; /* 1 mb at a time */ + size_t Obytes = bytes; + size_t rem; + + fseek(stream, 0, SEEK_END); + rem = ftell(stream) - Oofs; + fseek(stream, Oofs, SEEK_SET); + + if (bytes > rem) + bytes = rem; + + /* everything worked */ + if (fread(ptr, 1, bytes, stream) == bytes) + return Obytes; + + /* failed: try falling back to slower buffered read */ + fseek(stream, Oofs, SEEK_SET); + while (bytes) + { + /* don't read past end */ + if (bytes < bufsz) + bufsz = bytes; + if (bufsz > rem) + { + bytes = rem; + bufsz = rem; + } + + /* still failed */ + if (fread(ptr8, 1, bufsz, stream) != bufsz) + return 0; + + /* advance */ + ptr8 += bufsz; + bytes -= bufsz; + rem -= bufsz; + } + + /* success */ + return Obytes; +} + + +/* fwrite abstraction that falls back to buffer-based fwrite * + * if a big fwrite fails; if that still fails, returns 0 */ +WOW_API_PREFIX +size_t +wow_fwrite_bytes(const void *ptr, size_t bytes, FILE *stream) +{ + if (!stream || !ptr || !bytes) + return 0; + + const unsigned char *ptr8 = ptr; + size_t bufsz = 1024 * 1024; /* 1 mb at a time */ + size_t Obytes = bytes; + + /* everything worked */ + if (fwrite(ptr, 1, bytes, stream) == bytes) + return bytes; + + /* failed: try falling back to slower buffered read */ + while (bytes) + { + /* don't read past end */ + if (bytes < bufsz) + bufsz = bytes; + + /* still failed */ + if (fwrite(ptr8, 1, bufsz, stream) != bufsz) + return 0; + + /* advance */ + ptr8 += bufsz; + bytes -= bufsz; + } + + /* success */ + return Obytes; +} + + +/* fread abstraction that falls back to buffer-based fread * + * if a big fread fails; if that still fails, returns 0 */ +WOW_API_PREFIX +size_t +wow_fread(void *ptr, size_t size, size_t nmemb, FILE *stream) +{ + if (!stream || !ptr || !size || !nmemb) + return 0; + + if (wow_fread_bytes(ptr, size * nmemb, stream) == size * nmemb) + return nmemb; + + return 0; +} + + +/* fwrite abstraction that falls back to buffer-based fwrite * + * if a big fwrite fails; if that still fails, returns 0 */ +WOW_API_PREFIX +size_t +wow_fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream) +{ + if (!stream || !ptr || !size || !nmemb) + return 0; + + if (wow_fwrite_bytes(ptr, size * nmemb, stream) == size * nmemb) + return nmemb; + + return 0; +} + + +/* fopen abstraction for utf8 support on windows win32 */ +WOW_API_PREFIX +FILE * +wow_fopen(char const *name, char const *mode) +{ +#ifdef UNICODE + void *wname = 0; + void *wmode = 0; + FILE *fp = 0; + + wname = wow_utf8_to_wchar(name); + if (!wname) + goto L_cleanup; + + /* TODO eventually, an error message would be cool */ + if (wow_is_dir_w(wname)) + goto L_cleanup; + + wmode = wow_utf8_to_wchar(mode); + if (!wmode) + goto L_cleanup; + + fp = _wfopen(wname, wmode); + +L_cleanup: + if (wname) free(wname); + if (wmode) free(wmode); + if (fp) + return fp; + return 0; +#else + /* TODO eventually, an error message would be cool */ + if (wow_is_dir_w(name)) + return 0; + return fopen(name, mode); +#endif +} + + +/* remove abstraction for utf8 support on windows win32 */ +WOW_API_PREFIX +int +wow_remove(char const *path) +{ +#ifdef UNICODE + void *wpath = 0; + int rval; + + wpath = wow_utf8_to_wchar(path); + if (!wpath) + return -1; + + rval = _wremove(wpath); + free(wpath); + return rval; +#else + return remove(path); +#endif +} + + +/* mkdir */ +WOW_API_PREFIX +int +wow_mkdir(char const *path) +{ +#if defined(_WIN32) && defined(UNICODE) +extern int _wmkdir(const wchar_t *); + void *wname = 0; + int rval; + + wname = wow_utf8_to_wchar(path); + if (!wname) + return -1; + + rval = _wmkdir(wname); + + if (wname) + free(wname); + + return rval; +#elif defined(_WIN32) /* win32 no unicode */ +extern int _mkdir(const char *); + return _mkdir(path); +#else /* ! _WIN32 */ + return mkdir(path, 0777); +#endif +} + + +/* chdir */ +WOW_API_PREFIX +int +wow_chdir(char const *path) +{ +#if defined(_WIN32) && defined(UNICODE) +extern int _wchdir(const wchar_t *); + void *wname = 0; + int rval; + + wname = wow_utf8_to_wchar(path); + if (!wname) + return -1; + + rval = _wchdir(wname); + + if (wname) + free(wname); + + return rval; +#elif defined(_WIN32) /* win32 no unicode */ +extern int _chdir(const char *); + return _chdir(path); +#else /* ! _WIN32 */ + return chdir(path); +#endif +} + + +/* getcwd */ +WOW_API_PREFIX +char * +wow_getcwd(char *buf, size_t size) +{ +#if defined(_WIN32) && defined(UNICODE) +//extern int _wgetcwd(const wchar_t *, int); +extern _CRTIMP wchar_t *__cdecl _wgetcwd(wchar_t *_DstBuf,int _SizeInWords); + wchar_t wname[4096]; + + if (!buf || !size) + return 0; + + if (!_wgetcwd(wname, sizeof(wname) / sizeof(wname[0]))) + return 0; + + return wow_wchar_to_utf8_buf(wname, buf, size); +#elif defined(_WIN32) /* win32 no unicode */ +//extern char *_getcwd(char *, int); + return _getcwd(buf, size); +#else /* ! _WIN32 */ + return getcwd(buf, size); +#endif +} + + +/* getcwd_safe */ +WOW_API_PREFIX +char * +wow_getcwd_safe(char *buf, size_t size) +{ + char *result = wow_getcwd(buf, size); + + if (!result) + die("failed to get current working directory"); + + return result; +} + + +/* system */ +WOW_API_PREFIX +int +wow_system(char const *path) +{ +#if defined(_WIN32) && defined(UNICODE) + void *wname = 0; + int rval; + + wname = wow_utf8_to_wchar(path); + if (!wname) + return -1; + + rval = _wsystem(wname); + + if (wname) + free(wname); + + return rval; +#else /* not win32 unicode */ + return system(path); +#endif +} + + +/* system_gui */ +WOW_API_PREFIX +int +wow_system_gui(char const *name, const char *param) +{ +#if defined(_WIN32) + STARTUPINFOW si; + PROCESS_INFORMATION pi; + + ZeroMemory(&si, sizeof(si)); + si.cb = sizeof(si); + ZeroMemory(&pi, sizeof(pi)); + int rval = 0 /*success */; +//extern int ShellExecuteA(void *hwnd, void *op, void *file, void *param, void *dir, int cmd); +//extern int ShellExecuteW(void *hwnd, void *op, void *file, void *param, void *dir, int cmd); +//const int SW_SHOWNORMAL = 1; + #if defined(UNICODE) + void *wname = 0; + void *wparam = 0; + + wname = wow_utf8_to_wchar(name); + if (!wname) + { + return -1; + } + wparam = wow_utf8_to_wchar(param); + if (!wparam) + { + free(wname); + return -1; + } + +#if 0 + if (CreateProcessW( + wname, wparam + , NULL, NULL + , FALSE + , CREATE_NO_WINDOW + , NULL + , NULL + , &si, &pi) + ) + { + //WaitForSingleObject(pi.hProcess, INFINITE); + //CloseHandle(pi.hProcess); + //CloseHandle(pi.hThread); + } + else + rval = 1; +#else + rval = (int)ShellExecuteW(NULL, L"open", wname, wparam, L".", SW_SHOWNORMAL); + rval = rval <= 32; +#endif + + free(wname); + free(wparam); + #else /* win32 non-unicode */ +#if 0 + if (CreateProcessA( + name, x + , NULL, NULL + , FALSE + , CREATE_NO_WINDOW + , NULL + , NULL + , &si, &pi) + ) + { + //WaitForSingleObject(pi.hProcess, INFINITE); + //CloseHandle(pi.hProcess); + //CloseHandle(pi.hThread); + } + else + rval = 1; +#else + rval = (int)ShellExecuteA(NULL, "open", name, param, ".", SW_SHOWNORMAL); + rval = rval <= 32; +#endif + #endif + return rval;//rval <= 32; +#else /* not win32 unicode */ + char *x = malloc_safe(strlen(name) + strlen(param) + 128); + if (!x) + return -1; + strcpy(x, "\""); + strcat(x, name); + strcat(x, "\" "); + strcat(x, param); + int rval = system(x); + free(x); + return rval; +#endif +} + +#endif /* WOW_IMPLEMENTATION */ + +#endif /* WOW_H_INCLUDED */ + diff --git a/tools/z64compress/src/wow_dirent.h b/tools/z64compress/src/wow_dirent.h new file mode 100644 index 000000000..e9db5ca0b --- /dev/null +++ b/tools/z64compress/src/wow_dirent.h @@ -0,0 +1,61 @@ +/* + * wow_dirent.h + * + * dirent wrapper that abstracts unicode/utf8 platforms + * + * must be #include'd after dirent.h + * + * z64me + * + */ + +#ifndef WOW_DIRENT_INCLUDED +#define WOW_DIRENT_INCLUDED +#include "wow.h" + +#if defined(_WIN32) && defined(UNICODE) +# define wow_DIR _WDIR +# define wow_dirent _wdirent +static +wow_DIR * +wow_opendir(const char *path) +{ + void *wpath = wow_utf8_to_wchar(path); + if (!wpath) + return NULL; + + wow_DIR *rv = _wopendir(wpath); + + free(wpath); + + return rv; +} +static +struct wow_dirent * +wow_readdir(wow_DIR *dir) +{ + struct wow_dirent *ep = _wreaddir(dir); + if (!ep) + return 0; + + /* convert d_name to utf8 for working on them directly */ + char *str = wow_wchar_to_utf8(ep->d_name); + memcpy(ep->d_name, str, strlen(str) + 1); + free(str); + + return ep; +} +# define wow_closedir _wclosedir +# define wow_dirent_char wchar_t + +#else /* not win32 unicode */ +# define wow_DIR DIR +# define wow_dirent dirent +# define wow_opendir opendir +# define wow_readdir readdir +# define wow_closedir closedir +# define wow_dirent_char char +#endif + +#endif /* WOW_DIRENT_INCLUDED */ +