diff --git a/tools/gzinject/.gitattributes b/tools/gzinject/.gitattributes
new file mode 100644
index 000000000..66cccc58a
--- /dev/null
+++ b/tools/gzinject/.gitattributes
@@ -0,0 +1,65 @@
+###############################################################################
+# Set default behavior to automatically normalize line endings.
+###############################################################################
+* text=auto
+
+###############################################################################
+# Set default behavior for command prompt diff.
+#
+# This is need for earlier builds of msysgit that does not have it on by
+# default for csharp files.
+# Note: This is only used by command line
+###############################################################################
+#*.cs diff=csharp
+
+###############################################################################
+# Set the merge driver for project and solution files
+#
+# Merging from the command prompt will add diff markers to the files if there
+# are conflicts (Merging from VS is not affected by the settings below, in VS
+# the diff markers are never inserted). Diff markers may cause the following
+# file extensions to fail to load in VS. An alternative would be to treat
+# these files as binary and thus will always conflict and require user
+# intervention with every merge. To do so, just uncomment the entries below
+###############################################################################
+#*.sln merge=binary
+#*.csproj merge=binary
+#*.vbproj merge=binary
+#*.vcxproj merge=binary
+#*.vcproj merge=binary
+#*.dbproj merge=binary
+#*.fsproj merge=binary
+#*.lsproj merge=binary
+#*.wixproj merge=binary
+#*.modelproj merge=binary
+#*.sqlproj merge=binary
+#*.wwaproj merge=binary
+
+###############################################################################
+# behavior for image files
+#
+# image files are treated as binary by default.
+###############################################################################
+#*.jpg binary
+#*.png binary
+#*.gif binary
+
+###############################################################################
+# diff behavior for common document formats
+#
+# Convert binary document formats to text before diffing them. This feature
+# is only available from the command line. Turn it on by uncommenting the
+# entries below.
+###############################################################################
+#*.doc diff=astextplain
+#*.DOC diff=astextplain
+#*.docx diff=astextplain
+#*.DOCX diff=astextplain
+#*.dot diff=astextplain
+#*.DOT diff=astextplain
+#*.pdf diff=astextplain
+#*.PDF diff=astextplain
+#*.rtf diff=astextplain
+#*.RTF diff=astextplain
+*.h linguist-language=C
+*.c linguist-language=C
diff --git a/tools/gzinject/.gitignore b/tools/gzinject/.gitignore
new file mode 100644
index 000000000..9ed774981
--- /dev/null
+++ b/tools/gzinject/.gitignore
@@ -0,0 +1,279 @@
+## Ignore Visual Studio temporary files, build results, and
+## files generated by popular Visual Studio add-ons.
+
+# User-specific files
+*.suo
+*.user
+*.userosscache
+*.sln.docstates
+
+# User-specific files (MonoDevelop/Xamarin Studio)
+*.userprefs
+
+# Build results
+[Dd]ebug/
+[Dd]ebugPublic/
+[Rr]elease/
+[Rr]eleases/
+x64/
+x86/
+bld/
+[Bb]in/
+[Oo]bj/
+[Ll]og/
+
+# Visual Studio 2015 cache/options directory
+.vs/
+# Uncomment if you have tasks that create the project's static files in wwwroot
+#wwwroot/
+
+# MSTest test Results
+[Tt]est[Rr]esult*/
+[Bb]uild[Ll]og.*
+
+# NUNIT
+*.VisualState.xml
+TestResult.xml
+
+# Build Results of an ATL Project
+[Dd]ebugPS/
+[Rr]eleasePS/
+dlldata.c
+
+# DNX
+project.lock.json
+project.fragment.lock.json
+artifacts/
+
+*_i.c
+*_p.c
+*_i.h
+*.ilk
+*.meta
+*.obj
+*.pch
+*.pdb
+*.pgc
+*.pgd
+*.rsp
+*.sbr
+*.tlb
+*.tli
+*.tlh
+*.tmp
+*.tmp_proj
+*.log
+*.vspscc
+*.vssscc
+.builds
+*.pidb
+*.svclog
+*.scc
+
+# Chutzpah Test files
+_Chutzpah*
+
+# Visual C++ cache files
+ipch/
+*.aps
+*.ncb
+*.opendb
+*.opensdf
+*.sdf
+*.cachefile
+*.VC.db
+*.VC.VC.opendb
+
+# Visual Studio profiler
+*.psess
+*.vsp
+*.vspx
+*.sap
+
+# TFS 2012 Local Workspace
+$tf/
+
+# Guidance Automation Toolkit
+*.gpState
+
+# ReSharper is a .NET coding add-in
+_ReSharper*/
+*.[Rr]e[Ss]harper
+*.DotSettings.user
+
+# JustCode is a .NET coding add-in
+.JustCode
+
+# TeamCity is a build add-in
+_TeamCity*
+
+# DotCover is a Code Coverage Tool
+*.dotCover
+
+# NCrunch
+_NCrunch_*
+.*crunch*.local.xml
+nCrunchTemp_*
+
+# MightyMoose
+*.mm.*
+AutoTest.Net/
+
+# Web workbench (sass)
+.sass-cache/
+
+# Installshield output folder
+[Ee]xpress/
+
+# DocProject is a documentation generator add-in
+DocProject/buildhelp/
+DocProject/Help/*.HxT
+DocProject/Help/*.HxC
+DocProject/Help/*.hhc
+DocProject/Help/*.hhk
+DocProject/Help/*.hhp
+DocProject/Help/Html2
+DocProject/Help/html
+
+# Click-Once directory
+publish/
+
+# Publish Web Output
+*.[Pp]ublish.xml
+*.azurePubxml
+# TODO: Comment the next line if you want to checkin your web deploy settings
+# but database connection strings (with potential passwords) will be unencrypted
+#*.pubxml
+*.publishproj
+
+# Microsoft Azure Web App publish settings. Comment the next line if you want to
+# checkin your Azure Web App publish settings, but sensitive information contained
+# in these scripts will be unencrypted
+PublishScripts/
+
+# NuGet Packages
+*.nupkg
+# The packages folder can be ignored because of Package Restore
+**/packages/*
+# except build/, which is used as an MSBuild target.
+!**/packages/build/
+# Uncomment if necessary however generally it will be regenerated when needed
+#!**/packages/repositories.config
+# NuGet v3's project.json files produces more ignoreable files
+*.nuget.props
+*.nuget.targets
+
+# Microsoft Azure Build Output
+csx/
+*.build.csdef
+
+# Microsoft Azure Emulator
+ecf/
+rcf/
+
+# Windows Store app package directories and files
+AppPackages/
+BundleArtifacts/
+Package.StoreAssociation.xml
+_pkginfo.txt
+
+# Visual Studio cache files
+# files ending in .cache can be ignored
+*.[Cc]ache
+# but keep track of directories ending in .cache
+!*.[Cc]ache/
+
+# Others
+ClientBin/
+~$*
+*~
+*.dbmdl
+*.dbproj.schemaview
+*.jfm
+*.pfx
+*.publishsettings
+node_modules/
+orleans.codegen.cs
+
+# Since there are multiple workflows, uncomment next line to ignore bower_components
+# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
+#bower_components/
+
+# RIA/Silverlight projects
+Generated_Code/
+
+# Backup & report files from converting an old project file
+# to a newer Visual Studio version. Backup files are not needed,
+# because we have git ;-)
+_UpgradeReport_Files/
+Backup*/
+UpgradeLog*.XML
+UpgradeLog*.htm
+
+# SQL Server files
+*.mdf
+*.ldf
+
+# Business Intelligence projects
+*.rdl.data
+*.bim.layout
+*.bim_*.settings
+
+# Microsoft Fakes
+FakesAssemblies/
+
+# GhostDoc plugin setting file
+*.GhostDoc.xml
+
+# Node.js Tools for Visual Studio
+.ntvs_analysis.dat
+
+# Visual Studio 6 build log
+*.plg
+
+# Visual Studio 6 workspace options file
+*.opt
+
+# Visual Studio LightSwitch build output
+**/*.HTMLClient/GeneratedArtifacts
+**/*.DesktopClient/GeneratedArtifacts
+**/*.DesktopClient/ModelManifest.xml
+**/*.Server/GeneratedArtifacts
+**/*.Server/ModelManifest.xml
+_Pvt_Extensions
+
+# Paket dependency manager
+.paket/paket.exe
+paket-files/
+
+# FAKE - F# Make
+.fake/
+
+# JetBrains Rider
+.idea/
+*.sln.iml
+
+# CodeRush
+.cr/
+
+# Python Tools for Visual Studio (PTVS)
+__pycache__/
+*.pyc
+
+# Testing File
+*.Wad
+TestExtract/
+*.exe
+*.stackdump
+/gzinject/Debug
+/gzinject.zip
+*.bin
+Debug/
+CppProperties.json
+wadextract/
+*.o
+Makefile
+config.*
+gzinject
+*.zip
+autom4te.cache/
\ No newline at end of file
diff --git a/tools/gzinject/.gitrepo b/tools/gzinject/.gitrepo
new file mode 100644
index 000000000..ccddd36d5
--- /dev/null
+++ b/tools/gzinject/.gitrepo
@@ -0,0 +1,12 @@
+; DO NOT EDIT (unless you know what you are doing)
+;
+; This subdirectory is a git "subrepo", and this file is maintained by the
+; git-subrepo command. See https://github.com/ingydotnet/git-subrepo#readme
+;
+[subrepo]
+ remote = https://github.com/krimtonz/gzinject.git
+ branch = master
+ commit = ee44efce5d842e5d4488ee47c16da8b673da5086
+ parent = 53941daac4bb1482a9125f3595642df1bafb5f6d
+ method = merge
+ cmdver = 0.4.5
diff --git a/tools/gzinject/BUILDING.md b/tools/gzinject/BUILDING.md
new file mode 100644
index 000000000..7fb64a1c3
--- /dev/null
+++ b/tools/gzinject/BUILDING.md
@@ -0,0 +1,8 @@
+## Prerequisites
+
+gcc, make
+
+## Building
+Run `./configure` You can use `--prefx=DIR` to specify the output directory, install will install gzinject to `DIR/bin`, then run `make` to build the executable, and `make install` to install it to `DIR/bin`
+
+By default gzinject will use the crypto library provided by OpenSSL, to disable this and use builtin (slower) crypto functions remove `-D_USE_LIBCRYPTO` From the make file, and change SRC = gzinject.c to SRC = *.c
diff --git a/tools/gzinject/LICENSE b/tools/gzinject/LICENSE
new file mode 100644
index 000000000..94a9ed024
--- /dev/null
+++ b/tools/gzinject/LICENSE
@@ -0,0 +1,674 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc.
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The GNU General Public License is a free, copyleft license for
+software and other kinds of works.
+
+ The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works. By contrast,
+the GNU General Public License is intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users. We, the Free Software Foundation, use the
+GNU General Public License for most of our software; it applies also to
+any other work released this way by its authors. You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+ To protect your rights, we need to prevent others from denying you
+these rights or asking you to surrender the rights. Therefore, you have
+certain responsibilities if you distribute copies of the software, or if
+you modify it: responsibilities to respect the freedom of others.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must pass on to the recipients the same
+freedoms that you received. You must make sure that they, too, receive
+or can get the source code. And you must show them these terms so they
+know their rights.
+
+ Developers that use the GNU GPL protect your rights with two steps:
+(1) assert copyright on the software, and (2) offer you this License
+giving you legal permission to copy, distribute and/or modify it.
+
+ For the developers' and authors' protection, the GPL clearly explains
+that there is no warranty for this free software. For both users' and
+authors' sake, the GPL requires that modified versions be marked as
+changed, so that their problems will not be attributed erroneously to
+authors of previous versions.
+
+ Some devices are designed to deny users access to install or run
+modified versions of the software inside them, although the manufacturer
+can do so. This is fundamentally incompatible with the aim of
+protecting users' freedom to change the software. The systematic
+pattern of such abuse occurs in the area of products for individuals to
+use, which is precisely where it is most unacceptable. Therefore, we
+have designed this version of the GPL to prohibit the practice for those
+products. If such problems arise substantially in other domains, we
+stand ready to extend this provision to those domains in future versions
+of the GPL, as needed to protect the freedom of users.
+
+ Finally, every program is threatened constantly by software patents.
+States should not allow patents to restrict development and use of
+software on general-purpose computers, but in those that do, we wish to
+avoid the special danger that patents applied to a free program could
+make it effectively proprietary. To prevent this, the GPL assures that
+patents cannot be used to render the program non-free.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ TERMS AND CONDITIONS
+
+ 0. Definitions.
+
+ "This License" refers to version 3 of the GNU General Public License.
+
+ "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+ "The Program" refers to any copyrightable work licensed under this
+License. Each licensee is addressed as "you". "Licensees" and
+"recipients" may be individuals or organizations.
+
+ To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy. The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+ A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+ To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy. Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+ To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies. Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+ An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License. If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+ 1. Source Code.
+
+ The "source code" for a work means the preferred form of the work
+for making modifications to it. "Object code" means any non-source
+form of a work.
+
+ A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+ The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form. A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+ The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities. However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work. For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+ The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+ The Corresponding Source for a work in source code form is that
+same work.
+
+ 2. Basic Permissions.
+
+ All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met. This License explicitly affirms your unlimited
+permission to run the unmodified Program. The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work. This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+ You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force. You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright. Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+ Conveying under any other circumstances is permitted solely under
+the conditions stated below. Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+ 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+ No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+ When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+ 4. Conveying Verbatim Copies.
+
+ You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+ You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+ 5. Conveying Modified Source Versions.
+
+ You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+ a) The work must carry prominent notices stating that you modified
+ it, and giving a relevant date.
+
+ b) The work must carry prominent notices stating that it is
+ released under this License and any conditions added under section
+ 7. This requirement modifies the requirement in section 4 to
+ "keep intact all notices".
+
+ c) You must license the entire work, as a whole, under this
+ License to anyone who comes into possession of a copy. This
+ License will therefore apply, along with any applicable section 7
+ additional terms, to the whole of the work, and all its parts,
+ regardless of how they are packaged. This License gives no
+ permission to license the work in any other way, but it does not
+ invalidate such permission if you have separately received it.
+
+ d) If the work has interactive user interfaces, each must display
+ Appropriate Legal Notices; however, if the Program has interactive
+ interfaces that do not display Appropriate Legal Notices, your
+ work need not make them do so.
+
+ A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit. Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+ 6. Conveying Non-Source Forms.
+
+ You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+ a) Convey the object code in, or embodied in, a physical product
+ (including a physical distribution medium), accompanied by the
+ Corresponding Source fixed on a durable physical medium
+ customarily used for software interchange.
+
+ b) Convey the object code in, or embodied in, a physical product
+ (including a physical distribution medium), accompanied by a
+ written offer, valid for at least three years and valid for as
+ long as you offer spare parts or customer support for that product
+ model, to give anyone who possesses the object code either (1) a
+ copy of the Corresponding Source for all the software in the
+ product that is covered by this License, on a durable physical
+ medium customarily used for software interchange, for a price no
+ more than your reasonable cost of physically performing this
+ conveying of source, or (2) access to copy the
+ Corresponding Source from a network server at no charge.
+
+ c) Convey individual copies of the object code with a copy of the
+ written offer to provide the Corresponding Source. This
+ alternative is allowed only occasionally and noncommercially, and
+ only if you received the object code with such an offer, in accord
+ with subsection 6b.
+
+ d) Convey the object code by offering access from a designated
+ place (gratis or for a charge), and offer equivalent access to the
+ Corresponding Source in the same way through the same place at no
+ further charge. You need not require recipients to copy the
+ Corresponding Source along with the object code. If the place to
+ copy the object code is a network server, the Corresponding Source
+ may be on a different server (operated by you or a third party)
+ that supports equivalent copying facilities, provided you maintain
+ clear directions next to the object code saying where to find the
+ Corresponding Source. Regardless of what server hosts the
+ Corresponding Source, you remain obligated to ensure that it is
+ available for as long as needed to satisfy these requirements.
+
+ e) Convey the object code using peer-to-peer transmission, provided
+ you inform other peers where the object code and Corresponding
+ Source of the work are being offered to the general public at no
+ charge under subsection 6d.
+
+ A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+ A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling. In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage. For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product. A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+ "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source. The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+ If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information. But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+ The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed. Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+ Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+ 7. Additional Terms.
+
+ "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law. If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+ When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it. (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.) You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+ Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+ a) Disclaiming warranty or limiting liability differently from the
+ terms of sections 15 and 16 of this License; or
+
+ b) Requiring preservation of specified reasonable legal notices or
+ author attributions in that material or in the Appropriate Legal
+ Notices displayed by works containing it; or
+
+ c) Prohibiting misrepresentation of the origin of that material, or
+ requiring that modified versions of such material be marked in
+ reasonable ways as different from the original version; or
+
+ d) Limiting the use for publicity purposes of names of licensors or
+ authors of the material; or
+
+ e) Declining to grant rights under trademark law for use of some
+ trade names, trademarks, or service marks; or
+
+ f) Requiring indemnification of licensors and authors of that
+ material by anyone who conveys the material (or modified versions of
+ it) with contractual assumptions of liability to the recipient, for
+ any liability that these contractual assumptions directly impose on
+ those licensors and authors.
+
+ All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10. If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term. If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+ If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+ Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+ 8. Termination.
+
+ You may not propagate or modify a covered work except as expressly
+provided under this License. Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+ However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+ Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+ Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License. If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+ 9. Acceptance Not Required for Having Copies.
+
+ You are not required to accept this License in order to receive or
+run a copy of the Program. Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance. However,
+nothing other than this License grants you permission to propagate or
+modify any covered work. These actions infringe copyright if you do
+not accept this License. Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+ 10. Automatic Licensing of Downstream Recipients.
+
+ Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License. You are not responsible
+for enforcing compliance by third parties with this License.
+
+ An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations. If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+ You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License. For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+ 11. Patents.
+
+ A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based. The
+work thus licensed is called the contributor's "contributor version".
+
+ A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version. For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+ Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+ In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement). To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+ If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients. "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+ If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+ A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License. You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+ Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+ 12. No Surrender of Others' Freedom.
+
+ If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all. For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+ 13. Use with the GNU Affero General Public License.
+
+ Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU Affero General Public License into a single
+combined work, and to convey the resulting work. The terms of this
+License will continue to apply to the part which is the covered work,
+but the special requirements of the GNU Affero General Public License,
+section 13, concerning interaction through a network will apply to the
+combination as such.
+
+ 14. Revised Versions of this License.
+
+ The Free Software Foundation may publish revised and/or new versions of
+the GNU General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+ Each version is given a distinguishing version number. If the
+Program specifies that a certain numbered version of the GNU General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation. If the Program does not specify a version number of the
+GNU General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+ If the Program specifies that a proxy can decide which future
+versions of the GNU General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+ Later license versions may give you additional or different
+permissions. However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+ 15. Disclaimer of Warranty.
+
+ THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+ 16. Limitation of Liability.
+
+ IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+ 17. Interpretation of Sections 15 and 16.
+
+ If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+
+ Copyright (C)
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+
+Also add information on how to contact you by electronic and paper mail.
+
+ If the program does terminal interaction, make it output a short
+notice like this when it starts in an interactive mode:
+
+ Copyright (C)
+ This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, your program's commands
+might be different; for a GUI interface, you would use an "about box".
+
+ You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU GPL, see
+.
+
+ The GNU General Public License does not permit incorporating your program
+into proprietary programs. If your program is a subroutine library, you
+may consider it more useful to permit linking proprietary applications with
+the library. If this is what you want to do, use the GNU Lesser General
+Public License instead of this License. But first, please read
+.
diff --git a/tools/gzinject/Makefile.in b/tools/gzinject/Makefile.in
new file mode 100644
index 000000000..92b57f7ca
--- /dev/null
+++ b/tools/gzinject/Makefile.in
@@ -0,0 +1,39 @@
+CC = @CC@
+LD = @CC@
+INSTALL = @INSTALL@
+CFLAGS = -Wall -Wno-unused-result @CFLAGS@
+CPPFLAGS = @CPPFLAGS@
+prefix = @prefix@
+exec_prefix = @exec_prefix@
+bindir = @bindir@
+PROGNAME = gzinject
+LDFLAGS = -s @LDFLAGS@
+CFILES = *.c
+SRCDIR = src
+CSRC := $(foreach s,$(CFILES),$(wildcard $(SRCDIR)/$(s)))
+COBJ = $(patsubst $(SRCDIR)/%,$(OBJDIR)/%.o,$(CSRC))
+LIBS =
+OBJDIR = obj
+OUTDIR = $(OBJDIR)
+
+.PHONY : all install clean distclean
+
+all : $(PROGNAME)
+
+clean :
+ rm -rf $(PROGNAME) obj
+
+distclean : clean
+ rm -f Makefile
+
+install : $(PROGNAME)
+ $(INSTALL) -p -D --target-directory=$(DESTDIR)$(bindir) $(PROGNAME)
+
+$(PROGNAME) : $(COBJ)
+ $(LD) $(LDFLAGS) $^ -o $@ $(LIBS)
+
+$(OUTDIR) :
+ mkdir -p $@
+
+$(COBJ) : $(OBJDIR)/%.o: $(SRCDIR)/% | $(OBJDIR)
+ $(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@
diff --git a/tools/gzinject/README.md b/tools/gzinject/README.md
new file mode 100644
index 000000000..a3893a7d2
--- /dev/null
+++ b/tools/gzinject/README.md
@@ -0,0 +1,74 @@
+## About
+
+gzinject is a wad editing utility, primarily used for patching N64 VC Emulators, and replacing the rom inside. gzinject uses patch files to patch content files within the wad. A description of the patch file format can be seen in the [Patch](#Patch) section.
+
+## Executable
+
+To build your own, run ./configure, then make, and make install. See BUILDING for more instructions
+
+Prebuilt Windows executable is contained under releases (https://github.com/krimtonz/gzinject/releases/latest)
+
+## Usage
+```
+Usage:
+ gzinject -a extract -w SOURCEWAD [options]
+ gzinject -a pack -w DESTWAD [options]
+ gzinject -a inject -w SOURCEWAD -m ROM [options]
+ gzinject -a genkey [options]
+ gzinject --help
+ gzinject --version
+
+Actions:
+ extract extracts SOURCEWAD to directory
+ pack packs directory into DESTWAD
+ inject injects rom into SOURCEWAD
+ genkey generates wii common-key
+
+Options:
+ -i, --channelid=ID New Channel ID For Pack and Inject actions (default: none)
+ -t, --title=title New Channel name for pack and inject actions (default: none)
+ -h, --help Prints this help message
+ -k, --key=keyfile Location of the common-key file (default: common-key.bin)
+ -r, --region=1-3 Region to use (default: 3)
+ --verbose Print out verbose program execution information
+ -d, --directory=directory Directory to extract contents to, or directory to read contents from (default: wadextract)
+ --cleanup Remove files before performing actions
+ --version Prints the current version
+ -m, --rom=rom Rom to inject for inject action (default: none)
+ -o, --outputwad=outwad The output wad for inject actions (default: SOURCEWAD-inject.wad)
+ -p, --patch-file=patchfile gzi file to use for applying patches (default: none)
+ -c, --content=contentfile the primary content file (default: 5)
+ --dol-inject Binary data to inject into the emulator program, requires --dol-loading
+ --dol-loading The loading address for the binary specified by --dol-inject
+ --dol-after After which patch file to inject the dol, default: after all patches
+```
+
+## Patch
+gzi files are text files with a command on each line. A # starting the line indicates a comment.
+
+line format:
+ccss oooooooo dddddddd\
+Where c indicates the command, s indicates the data size, o indicates the offset into the current file, and d indicates the data to replace with.
+
+```
+Commands:
+ 00: Begin using content file specified by d, offset and size are not used for this command
+ 01: lz77 decompress the current content file. offset, size, and data are not used for this command
+ 02: lz77 compress the current content file. offset, size, and data are not used for this command
+ 03: apply patch to currently selected file. If offset is higher than the file sizes, or a current file has not been selected, the patch is not applied
+
+Sizes:
+ 01: a one byte value. data & 0x000000FF is applied to content + offset
+ 02: a two byte value. data & 0x0000FFFF is applied to content + offset
+ 04: a four byte value. data is applied to content + offset
+```
+
+
+## Thanks/Authors
+
+gzinject was primarily written by me.\n
+Thanks to glankk (https://github.com/glankk) for providing memory/controller fixes for OOT as well as debugging, testing, and providing fixes for various errors\
+The general workflow of extracting/packing the wad was taken from showmiiwads (https://github.com/dnasdw/showmiiwads/)\
+AES encryption/decryption was taken from kokke (https://github.com/kokke/tiny-AES-c)\
+SHA1 taken from clibs (https://github.com/clibs/sha1)\
+MD5 taken from Alexander Peslyak http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5
diff --git a/tools/gzinject/configure b/tools/gzinject/configure
new file mode 100644
index 000000000..b9f5c308f
--- /dev/null
+++ b/tools/gzinject/configure
@@ -0,0 +1,4468 @@
+#! /bin/sh
+# Guess values for system-dependent variables and create Makefiles.
+# Generated by GNU Autoconf 2.69.
+#
+#
+# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
+#
+#
+# This configure script is free software; the Free Software Foundation
+# gives unlimited permission to copy, distribute and modify it.
+## -------------------- ##
+## M4sh Initialization. ##
+## -------------------- ##
+
+# Be more Bourne compatible
+DUALCASE=1; export DUALCASE # for MKS sh
+if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then :
+ emulate sh
+ NULLCMD=:
+ # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which
+ # is contrary to our usage. Disable this feature.
+ alias -g '${1+"$@"}'='"$@"'
+ setopt NO_GLOB_SUBST
+else
+ case `(set -o) 2>/dev/null` in #(
+ *posix*) :
+ set -o posix ;; #(
+ *) :
+ ;;
+esac
+fi
+
+
+as_nl='
+'
+export as_nl
+# Printing a long string crashes Solaris 7 /usr/bin/printf.
+as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\'
+as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo
+as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo
+# Prefer a ksh shell builtin over an external printf program on Solaris,
+# but without wasting forks for bash or zsh.
+if test -z "$BASH_VERSION$ZSH_VERSION" \
+ && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then
+ as_echo='print -r --'
+ as_echo_n='print -rn --'
+elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then
+ as_echo='printf %s\n'
+ as_echo_n='printf %s'
+else
+ if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then
+ as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"'
+ as_echo_n='/usr/ucb/echo -n'
+ else
+ as_echo_body='eval expr "X$1" : "X\\(.*\\)"'
+ as_echo_n_body='eval
+ arg=$1;
+ case $arg in #(
+ *"$as_nl"*)
+ expr "X$arg" : "X\\(.*\\)$as_nl";
+ arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;;
+ esac;
+ expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl"
+ '
+ export as_echo_n_body
+ as_echo_n='sh -c $as_echo_n_body as_echo'
+ fi
+ export as_echo_body
+ as_echo='sh -c $as_echo_body as_echo'
+fi
+
+# The user is always right.
+if test "${PATH_SEPARATOR+set}" != set; then
+ PATH_SEPARATOR=:
+ (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && {
+ (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 ||
+ PATH_SEPARATOR=';'
+ }
+fi
+
+
+# IFS
+# We need space, tab and new line, in precisely that order. Quoting is
+# there to prevent editors from complaining about space-tab.
+# (If _AS_PATH_WALK were called with IFS unset, it would disable word
+# splitting by setting IFS to empty value.)
+IFS=" "" $as_nl"
+
+# Find who we are. Look in the path if we contain no directory separator.
+as_myself=
+case $0 in #((
+ *[\\/]* ) as_myself=$0 ;;
+ *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break
+ done
+IFS=$as_save_IFS
+
+ ;;
+esac
+# We did not find ourselves, most probably we were run as `sh COMMAND'
+# in which case we are not to be found in the path.
+if test "x$as_myself" = x; then
+ as_myself=$0
+fi
+if test ! -f "$as_myself"; then
+ $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2
+ exit 1
+fi
+
+# Unset variables that we do not need and which cause bugs (e.g. in
+# pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1"
+# suppresses any "Segmentation fault" message there. '((' could
+# trigger a bug in pdksh 5.2.14.
+for as_var in BASH_ENV ENV MAIL MAILPATH
+do eval test x\${$as_var+set} = xset \
+ && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || :
+done
+PS1='$ '
+PS2='> '
+PS4='+ '
+
+# NLS nuisances.
+LC_ALL=C
+export LC_ALL
+LANGUAGE=C
+export LANGUAGE
+
+# CDPATH.
+(unset CDPATH) >/dev/null 2>&1 && unset CDPATH
+
+# Use a proper internal environment variable to ensure we don't fall
+ # into an infinite loop, continuously re-executing ourselves.
+ if test x"${_as_can_reexec}" != xno && test "x$CONFIG_SHELL" != x; then
+ _as_can_reexec=no; export _as_can_reexec;
+ # We cannot yet assume a decent shell, so we have to provide a
+# neutralization value for shells without unset; and this also
+# works around shells that cannot unset nonexistent variables.
+# Preserve -v and -x to the replacement shell.
+BASH_ENV=/dev/null
+ENV=/dev/null
+(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV
+case $- in # ((((
+ *v*x* | *x*v* ) as_opts=-vx ;;
+ *v* ) as_opts=-v ;;
+ *x* ) as_opts=-x ;;
+ * ) as_opts= ;;
+esac
+exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"}
+# Admittedly, this is quite paranoid, since all the known shells bail
+# out after a failed `exec'.
+$as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2
+as_fn_exit 255
+ fi
+ # We don't want this to propagate to other subprocesses.
+ { _as_can_reexec=; unset _as_can_reexec;}
+if test "x$CONFIG_SHELL" = x; then
+ as_bourne_compatible="if test -n \"\${ZSH_VERSION+set}\" && (emulate sh) >/dev/null 2>&1; then :
+ emulate sh
+ NULLCMD=:
+ # Pre-4.2 versions of Zsh do word splitting on \${1+\"\$@\"}, which
+ # is contrary to our usage. Disable this feature.
+ alias -g '\${1+\"\$@\"}'='\"\$@\"'
+ setopt NO_GLOB_SUBST
+else
+ case \`(set -o) 2>/dev/null\` in #(
+ *posix*) :
+ set -o posix ;; #(
+ *) :
+ ;;
+esac
+fi
+"
+ as_required="as_fn_return () { (exit \$1); }
+as_fn_success () { as_fn_return 0; }
+as_fn_failure () { as_fn_return 1; }
+as_fn_ret_success () { return 0; }
+as_fn_ret_failure () { return 1; }
+
+exitcode=0
+as_fn_success || { exitcode=1; echo as_fn_success failed.; }
+as_fn_failure && { exitcode=1; echo as_fn_failure succeeded.; }
+as_fn_ret_success || { exitcode=1; echo as_fn_ret_success failed.; }
+as_fn_ret_failure && { exitcode=1; echo as_fn_ret_failure succeeded.; }
+if ( set x; as_fn_ret_success y && test x = \"\$1\" ); then :
+
+else
+ exitcode=1; echo positional parameters were not saved.
+fi
+test x\$exitcode = x0 || exit 1
+test -x / || exit 1"
+ as_suggested=" as_lineno_1=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_1a=\$LINENO
+ as_lineno_2=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_2a=\$LINENO
+ eval 'test \"x\$as_lineno_1'\$as_run'\" != \"x\$as_lineno_2'\$as_run'\" &&
+ test \"x\`expr \$as_lineno_1'\$as_run' + 1\`\" = \"x\$as_lineno_2'\$as_run'\"' || exit 1
+test \$(( 1 + 1 )) = 2 || exit 1"
+ if (eval "$as_required") 2>/dev/null; then :
+ as_have_required=yes
+else
+ as_have_required=no
+fi
+ if test x$as_have_required = xyes && (eval "$as_suggested") 2>/dev/null; then :
+
+else
+ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+as_found=false
+for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ as_found=:
+ case $as_dir in #(
+ /*)
+ for as_base in sh bash ksh sh5; do
+ # Try only shells that exist, to save several forks.
+ as_shell=$as_dir/$as_base
+ if { test -f "$as_shell" || test -f "$as_shell.exe"; } &&
+ { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$as_shell"; } 2>/dev/null; then :
+ CONFIG_SHELL=$as_shell as_have_required=yes
+ if { $as_echo "$as_bourne_compatible""$as_suggested" | as_run=a "$as_shell"; } 2>/dev/null; then :
+ break 2
+fi
+fi
+ done;;
+ esac
+ as_found=false
+done
+$as_found || { if { test -f "$SHELL" || test -f "$SHELL.exe"; } &&
+ { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$SHELL"; } 2>/dev/null; then :
+ CONFIG_SHELL=$SHELL as_have_required=yes
+fi; }
+IFS=$as_save_IFS
+
+
+ if test "x$CONFIG_SHELL" != x; then :
+ export CONFIG_SHELL
+ # We cannot yet assume a decent shell, so we have to provide a
+# neutralization value for shells without unset; and this also
+# works around shells that cannot unset nonexistent variables.
+# Preserve -v and -x to the replacement shell.
+BASH_ENV=/dev/null
+ENV=/dev/null
+(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV
+case $- in # ((((
+ *v*x* | *x*v* ) as_opts=-vx ;;
+ *v* ) as_opts=-v ;;
+ *x* ) as_opts=-x ;;
+ * ) as_opts= ;;
+esac
+exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"}
+# Admittedly, this is quite paranoid, since all the known shells bail
+# out after a failed `exec'.
+$as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2
+exit 255
+fi
+
+ if test x$as_have_required = xno; then :
+ $as_echo "$0: This script requires a shell more modern than all"
+ $as_echo "$0: the shells that I found on your system."
+ if test x${ZSH_VERSION+set} = xset ; then
+ $as_echo "$0: In particular, zsh $ZSH_VERSION has bugs and should"
+ $as_echo "$0: be upgraded to zsh 4.3.4 or later."
+ else
+ $as_echo "$0: Please tell bug-autoconf@gnu.org about your system,
+$0: including any error possibly output before this
+$0: message. Then install a modern shell, or manually run
+$0: the script under such a shell if you do have one."
+ fi
+ exit 1
+fi
+fi
+fi
+SHELL=${CONFIG_SHELL-/bin/sh}
+export SHELL
+# Unset more variables known to interfere with behavior of common tools.
+CLICOLOR_FORCE= GREP_OPTIONS=
+unset CLICOLOR_FORCE GREP_OPTIONS
+
+## --------------------- ##
+## M4sh Shell Functions. ##
+## --------------------- ##
+# as_fn_unset VAR
+# ---------------
+# Portably unset VAR.
+as_fn_unset ()
+{
+ { eval $1=; unset $1;}
+}
+as_unset=as_fn_unset
+
+# as_fn_set_status STATUS
+# -----------------------
+# Set $? to STATUS, without forking.
+as_fn_set_status ()
+{
+ return $1
+} # as_fn_set_status
+
+# as_fn_exit STATUS
+# -----------------
+# Exit the shell with STATUS, even in a "trap 0" or "set -e" context.
+as_fn_exit ()
+{
+ set +e
+ as_fn_set_status $1
+ exit $1
+} # as_fn_exit
+
+# as_fn_mkdir_p
+# -------------
+# Create "$as_dir" as a directory, including parents if necessary.
+as_fn_mkdir_p ()
+{
+
+ case $as_dir in #(
+ -*) as_dir=./$as_dir;;
+ esac
+ test -d "$as_dir" || eval $as_mkdir_p || {
+ as_dirs=
+ while :; do
+ case $as_dir in #(
+ *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'(
+ *) as_qdir=$as_dir;;
+ esac
+ as_dirs="'$as_qdir' $as_dirs"
+ as_dir=`$as_dirname -- "$as_dir" ||
+$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+ X"$as_dir" : 'X\(//\)[^/]' \| \
+ X"$as_dir" : 'X\(//\)$' \| \
+ X"$as_dir" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X"$as_dir" |
+ sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+ s//\1/
+ q
+ }
+ /^X\(\/\/\)[^/].*/{
+ s//\1/
+ q
+ }
+ /^X\(\/\/\)$/{
+ s//\1/
+ q
+ }
+ /^X\(\/\).*/{
+ s//\1/
+ q
+ }
+ s/.*/./; q'`
+ test -d "$as_dir" && break
+ done
+ test -z "$as_dirs" || eval "mkdir $as_dirs"
+ } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir"
+
+
+} # as_fn_mkdir_p
+
+# as_fn_executable_p FILE
+# -----------------------
+# Test if FILE is an executable regular file.
+as_fn_executable_p ()
+{
+ test -f "$1" && test -x "$1"
+} # as_fn_executable_p
+# as_fn_append VAR VALUE
+# ----------------------
+# Append the text in VALUE to the end of the definition contained in VAR. Take
+# advantage of any shell optimizations that allow amortized linear growth over
+# repeated appends, instead of the typical quadratic growth present in naive
+# implementations.
+if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then :
+ eval 'as_fn_append ()
+ {
+ eval $1+=\$2
+ }'
+else
+ as_fn_append ()
+ {
+ eval $1=\$$1\$2
+ }
+fi # as_fn_append
+
+# as_fn_arith ARG...
+# ------------------
+# Perform arithmetic evaluation on the ARGs, and store the result in the
+# global $as_val. Take advantage of shells that can avoid forks. The arguments
+# must be portable across $(()) and expr.
+if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then :
+ eval 'as_fn_arith ()
+ {
+ as_val=$(( $* ))
+ }'
+else
+ as_fn_arith ()
+ {
+ as_val=`expr "$@" || test $? -eq 1`
+ }
+fi # as_fn_arith
+
+
+# as_fn_error STATUS ERROR [LINENO LOG_FD]
+# ----------------------------------------
+# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are
+# provided, also output the error to LOG_FD, referencing LINENO. Then exit the
+# script with STATUS, using 1 if that was 0.
+as_fn_error ()
+{
+ as_status=$1; test $as_status -eq 0 && as_status=1
+ if test "$4"; then
+ as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+ $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4
+ fi
+ $as_echo "$as_me: error: $2" >&2
+ as_fn_exit $as_status
+} # as_fn_error
+
+if expr a : '\(a\)' >/dev/null 2>&1 &&
+ test "X`expr 00001 : '.*\(...\)'`" = X001; then
+ as_expr=expr
+else
+ as_expr=false
+fi
+
+if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then
+ as_basename=basename
+else
+ as_basename=false
+fi
+
+if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then
+ as_dirname=dirname
+else
+ as_dirname=false
+fi
+
+as_me=`$as_basename -- "$0" ||
+$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \
+ X"$0" : 'X\(//\)$' \| \
+ X"$0" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X/"$0" |
+ sed '/^.*\/\([^/][^/]*\)\/*$/{
+ s//\1/
+ q
+ }
+ /^X\/\(\/\/\)$/{
+ s//\1/
+ q
+ }
+ /^X\/\(\/\).*/{
+ s//\1/
+ q
+ }
+ s/.*/./; q'`
+
+# Avoid depending upon Character Ranges.
+as_cr_letters='abcdefghijklmnopqrstuvwxyz'
+as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+as_cr_Letters=$as_cr_letters$as_cr_LETTERS
+as_cr_digits='0123456789'
+as_cr_alnum=$as_cr_Letters$as_cr_digits
+
+
+ as_lineno_1=$LINENO as_lineno_1a=$LINENO
+ as_lineno_2=$LINENO as_lineno_2a=$LINENO
+ eval 'test "x$as_lineno_1'$as_run'" != "x$as_lineno_2'$as_run'" &&
+ test "x`expr $as_lineno_1'$as_run' + 1`" = "x$as_lineno_2'$as_run'"' || {
+ # Blame Lee E. McMahon (1931-1989) for sed's syntax. :-)
+ sed -n '
+ p
+ /[$]LINENO/=
+ ' <$as_myself |
+ sed '
+ s/[$]LINENO.*/&-/
+ t lineno
+ b
+ :lineno
+ N
+ :loop
+ s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/
+ t loop
+ s/-\n.*//
+ ' >$as_me.lineno &&
+ chmod +x "$as_me.lineno" ||
+ { $as_echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2; as_fn_exit 1; }
+
+ # If we had to re-execute with $CONFIG_SHELL, we're ensured to have
+ # already done that, so ensure we don't try to do so again and fall
+ # in an infinite loop. This has already happened in practice.
+ _as_can_reexec=no; export _as_can_reexec
+ # Don't try to exec as it changes $[0], causing all sort of problems
+ # (the dirname of $[0] is not the place where we might find the
+ # original and so on. Autoconf is especially sensitive to this).
+ . "./$as_me.lineno"
+ # Exit status is that of the last command.
+ exit
+}
+
+ECHO_C= ECHO_N= ECHO_T=
+case `echo -n x` in #(((((
+-n*)
+ case `echo 'xy\c'` in
+ *c*) ECHO_T=' ';; # ECHO_T is single tab character.
+ xy) ECHO_C='\c';;
+ *) echo `echo ksh88 bug on AIX 6.1` > /dev/null
+ ECHO_T=' ';;
+ esac;;
+*)
+ ECHO_N='-n';;
+esac
+
+rm -f conf$$ conf$$.exe conf$$.file
+if test -d conf$$.dir; then
+ rm -f conf$$.dir/conf$$.file
+else
+ rm -f conf$$.dir
+ mkdir conf$$.dir 2>/dev/null
+fi
+if (echo >conf$$.file) 2>/dev/null; then
+ if ln -s conf$$.file conf$$ 2>/dev/null; then
+ as_ln_s='ln -s'
+ # ... but there are two gotchas:
+ # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail.
+ # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable.
+ # In both cases, we have to default to `cp -pR'.
+ ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe ||
+ as_ln_s='cp -pR'
+ elif ln conf$$.file conf$$ 2>/dev/null; then
+ as_ln_s=ln
+ else
+ as_ln_s='cp -pR'
+ fi
+else
+ as_ln_s='cp -pR'
+fi
+rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file
+rmdir conf$$.dir 2>/dev/null
+
+if mkdir -p . 2>/dev/null; then
+ as_mkdir_p='mkdir -p "$as_dir"'
+else
+ test -d ./-p && rmdir ./-p
+ as_mkdir_p=false
+fi
+
+as_test_x='test -x'
+as_executable_p=as_fn_executable_p
+
+# Sed expression to map a string onto a valid CPP name.
+as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'"
+
+# Sed expression to map a string onto a valid variable name.
+as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'"
+
+
+test -n "$DJDIR" || exec 7<&0 &1
+
+# Name of the host.
+# hostname on some systems (SVR3.2, old GNU/Linux) returns a bogus exit status,
+# so uname gets run too.
+ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q`
+
+#
+# Initializations.
+#
+ac_default_prefix=/usr/local
+ac_clean_files=
+ac_config_libobj_dir=.
+LIBOBJS=
+cross_compiling=no
+subdirs=
+MFLAGS=
+MAKEFLAGS=
+
+# Identity of this package.
+PACKAGE_NAME=
+PACKAGE_TARNAME=
+PACKAGE_VERSION=
+PACKAGE_STRING=
+PACKAGE_BUGREPORT=
+PACKAGE_URL=
+
+ac_default_prefix=/usr/local
+# Factoring default headers for most tests.
+ac_includes_default="\
+#include
+#ifdef HAVE_SYS_TYPES_H
+# include
+#endif
+#ifdef HAVE_SYS_STAT_H
+# include
+#endif
+#ifdef STDC_HEADERS
+# include
+# include
+#else
+# ifdef HAVE_STDLIB_H
+# include
+# endif
+#endif
+#ifdef HAVE_STRING_H
+# if !defined STDC_HEADERS && defined HAVE_MEMORY_H
+# include
+# endif
+# include
+#endif
+#ifdef HAVE_STRINGS_H
+# include
+#endif
+#ifdef HAVE_INTTYPES_H
+# include
+#endif
+#ifdef HAVE_STDINT_H
+# include
+#endif
+#ifdef HAVE_UNISTD_H
+# include
+#endif"
+
+ac_subst_vars='LTLIBOBJS
+LIBOBJS
+EGREP
+GREP
+CPP
+INSTALL_DATA
+INSTALL_SCRIPT
+INSTALL_PROGRAM
+OBJEXT
+EXEEXT
+ac_ct_CC
+CPPFLAGS
+LDFLAGS
+CFLAGS
+CC
+target_alias
+host_alias
+build_alias
+LIBS
+ECHO_T
+ECHO_N
+ECHO_C
+DEFS
+mandir
+localedir
+libdir
+psdir
+pdfdir
+dvidir
+htmldir
+infodir
+docdir
+oldincludedir
+includedir
+runstatedir
+localstatedir
+sharedstatedir
+sysconfdir
+datadir
+datarootdir
+libexecdir
+sbindir
+bindir
+program_transform_name
+prefix
+exec_prefix
+PACKAGE_URL
+PACKAGE_BUGREPORT
+PACKAGE_STRING
+PACKAGE_VERSION
+PACKAGE_TARNAME
+PACKAGE_NAME
+PATH_SEPARATOR
+SHELL'
+ac_subst_files=''
+ac_user_opts='
+enable_option_checking
+'
+ ac_precious_vars='build_alias
+host_alias
+target_alias
+CC
+CFLAGS
+LDFLAGS
+LIBS
+CPPFLAGS
+CPP'
+
+
+# Initialize some variables set by options.
+ac_init_help=
+ac_init_version=false
+ac_unrecognized_opts=
+ac_unrecognized_sep=
+# The variables have the same names as the options, with
+# dashes changed to underlines.
+cache_file=/dev/null
+exec_prefix=NONE
+no_create=
+no_recursion=
+prefix=NONE
+program_prefix=NONE
+program_suffix=NONE
+program_transform_name=s,x,x,
+silent=
+site=
+srcdir=
+verbose=
+x_includes=NONE
+x_libraries=NONE
+
+# Installation directory options.
+# These are left unexpanded so users can "make install exec_prefix=/foo"
+# and all the variables that are supposed to be based on exec_prefix
+# by default will actually change.
+# Use braces instead of parens because sh, perl, etc. also accept them.
+# (The list follows the same order as the GNU Coding Standards.)
+bindir='${exec_prefix}/bin'
+sbindir='${exec_prefix}/sbin'
+libexecdir='${exec_prefix}/libexec'
+datarootdir='${prefix}/share'
+datadir='${datarootdir}'
+sysconfdir='${prefix}/etc'
+sharedstatedir='${prefix}/com'
+localstatedir='${prefix}/var'
+runstatedir='${localstatedir}/run'
+includedir='${prefix}/include'
+oldincludedir='/usr/include'
+docdir='${datarootdir}/doc/${PACKAGE}'
+infodir='${datarootdir}/info'
+htmldir='${docdir}'
+dvidir='${docdir}'
+pdfdir='${docdir}'
+psdir='${docdir}'
+libdir='${exec_prefix}/lib'
+localedir='${datarootdir}/locale'
+mandir='${datarootdir}/man'
+
+ac_prev=
+ac_dashdash=
+for ac_option
+do
+ # If the previous option needs an argument, assign it.
+ if test -n "$ac_prev"; then
+ eval $ac_prev=\$ac_option
+ ac_prev=
+ continue
+ fi
+
+ case $ac_option in
+ *=?*) ac_optarg=`expr "X$ac_option" : '[^=]*=\(.*\)'` ;;
+ *=) ac_optarg= ;;
+ *) ac_optarg=yes ;;
+ esac
+
+ # Accept the important Cygnus configure options, so we can diagnose typos.
+
+ case $ac_dashdash$ac_option in
+ --)
+ ac_dashdash=yes ;;
+
+ -bindir | --bindir | --bindi | --bind | --bin | --bi)
+ ac_prev=bindir ;;
+ -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*)
+ bindir=$ac_optarg ;;
+
+ -build | --build | --buil | --bui | --bu)
+ ac_prev=build_alias ;;
+ -build=* | --build=* | --buil=* | --bui=* | --bu=*)
+ build_alias=$ac_optarg ;;
+
+ -cache-file | --cache-file | --cache-fil | --cache-fi \
+ | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c)
+ ac_prev=cache_file ;;
+ -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \
+ | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*)
+ cache_file=$ac_optarg ;;
+
+ --config-cache | -C)
+ cache_file=config.cache ;;
+
+ -datadir | --datadir | --datadi | --datad)
+ ac_prev=datadir ;;
+ -datadir=* | --datadir=* | --datadi=* | --datad=*)
+ datadir=$ac_optarg ;;
+
+ -datarootdir | --datarootdir | --datarootdi | --datarootd | --dataroot \
+ | --dataroo | --dataro | --datar)
+ ac_prev=datarootdir ;;
+ -datarootdir=* | --datarootdir=* | --datarootdi=* | --datarootd=* \
+ | --dataroot=* | --dataroo=* | --dataro=* | --datar=*)
+ datarootdir=$ac_optarg ;;
+
+ -disable-* | --disable-*)
+ ac_useropt=`expr "x$ac_option" : 'x-*disable-\(.*\)'`
+ # Reject names that are not valid shell variable names.
+ expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
+ as_fn_error $? "invalid feature name: $ac_useropt"
+ ac_useropt_orig=$ac_useropt
+ ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'`
+ case $ac_user_opts in
+ *"
+"enable_$ac_useropt"
+"*) ;;
+ *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--disable-$ac_useropt_orig"
+ ac_unrecognized_sep=', ';;
+ esac
+ eval enable_$ac_useropt=no ;;
+
+ -docdir | --docdir | --docdi | --doc | --do)
+ ac_prev=docdir ;;
+ -docdir=* | --docdir=* | --docdi=* | --doc=* | --do=*)
+ docdir=$ac_optarg ;;
+
+ -dvidir | --dvidir | --dvidi | --dvid | --dvi | --dv)
+ ac_prev=dvidir ;;
+ -dvidir=* | --dvidir=* | --dvidi=* | --dvid=* | --dvi=* | --dv=*)
+ dvidir=$ac_optarg ;;
+
+ -enable-* | --enable-*)
+ ac_useropt=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'`
+ # Reject names that are not valid shell variable names.
+ expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
+ as_fn_error $? "invalid feature name: $ac_useropt"
+ ac_useropt_orig=$ac_useropt
+ ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'`
+ case $ac_user_opts in
+ *"
+"enable_$ac_useropt"
+"*) ;;
+ *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--enable-$ac_useropt_orig"
+ ac_unrecognized_sep=', ';;
+ esac
+ eval enable_$ac_useropt=\$ac_optarg ;;
+
+ -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \
+ | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \
+ | --exec | --exe | --ex)
+ ac_prev=exec_prefix ;;
+ -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \
+ | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \
+ | --exec=* | --exe=* | --ex=*)
+ exec_prefix=$ac_optarg ;;
+
+ -gas | --gas | --ga | --g)
+ # Obsolete; use --with-gas.
+ with_gas=yes ;;
+
+ -help | --help | --hel | --he | -h)
+ ac_init_help=long ;;
+ -help=r* | --help=r* | --hel=r* | --he=r* | -hr*)
+ ac_init_help=recursive ;;
+ -help=s* | --help=s* | --hel=s* | --he=s* | -hs*)
+ ac_init_help=short ;;
+
+ -host | --host | --hos | --ho)
+ ac_prev=host_alias ;;
+ -host=* | --host=* | --hos=* | --ho=*)
+ host_alias=$ac_optarg ;;
+
+ -htmldir | --htmldir | --htmldi | --htmld | --html | --htm | --ht)
+ ac_prev=htmldir ;;
+ -htmldir=* | --htmldir=* | --htmldi=* | --htmld=* | --html=* | --htm=* \
+ | --ht=*)
+ htmldir=$ac_optarg ;;
+
+ -includedir | --includedir | --includedi | --included | --include \
+ | --includ | --inclu | --incl | --inc)
+ ac_prev=includedir ;;
+ -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \
+ | --includ=* | --inclu=* | --incl=* | --inc=*)
+ includedir=$ac_optarg ;;
+
+ -infodir | --infodir | --infodi | --infod | --info | --inf)
+ ac_prev=infodir ;;
+ -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*)
+ infodir=$ac_optarg ;;
+
+ -libdir | --libdir | --libdi | --libd)
+ ac_prev=libdir ;;
+ -libdir=* | --libdir=* | --libdi=* | --libd=*)
+ libdir=$ac_optarg ;;
+
+ -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \
+ | --libexe | --libex | --libe)
+ ac_prev=libexecdir ;;
+ -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \
+ | --libexe=* | --libex=* | --libe=*)
+ libexecdir=$ac_optarg ;;
+
+ -localedir | --localedir | --localedi | --localed | --locale)
+ ac_prev=localedir ;;
+ -localedir=* | --localedir=* | --localedi=* | --localed=* | --locale=*)
+ localedir=$ac_optarg ;;
+
+ -localstatedir | --localstatedir | --localstatedi | --localstated \
+ | --localstate | --localstat | --localsta | --localst | --locals)
+ ac_prev=localstatedir ;;
+ -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \
+ | --localstate=* | --localstat=* | --localsta=* | --localst=* | --locals=*)
+ localstatedir=$ac_optarg ;;
+
+ -mandir | --mandir | --mandi | --mand | --man | --ma | --m)
+ ac_prev=mandir ;;
+ -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*)
+ mandir=$ac_optarg ;;
+
+ -nfp | --nfp | --nf)
+ # Obsolete; use --without-fp.
+ with_fp=no ;;
+
+ -no-create | --no-create | --no-creat | --no-crea | --no-cre \
+ | --no-cr | --no-c | -n)
+ no_create=yes ;;
+
+ -no-recursion | --no-recursion | --no-recursio | --no-recursi \
+ | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r)
+ no_recursion=yes ;;
+
+ -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \
+ | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \
+ | --oldin | --oldi | --old | --ol | --o)
+ ac_prev=oldincludedir ;;
+ -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \
+ | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \
+ | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*)
+ oldincludedir=$ac_optarg ;;
+
+ -prefix | --prefix | --prefi | --pref | --pre | --pr | --p)
+ ac_prev=prefix ;;
+ -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*)
+ prefix=$ac_optarg ;;
+
+ -program-prefix | --program-prefix | --program-prefi | --program-pref \
+ | --program-pre | --program-pr | --program-p)
+ ac_prev=program_prefix ;;
+ -program-prefix=* | --program-prefix=* | --program-prefi=* \
+ | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*)
+ program_prefix=$ac_optarg ;;
+
+ -program-suffix | --program-suffix | --program-suffi | --program-suff \
+ | --program-suf | --program-su | --program-s)
+ ac_prev=program_suffix ;;
+ -program-suffix=* | --program-suffix=* | --program-suffi=* \
+ | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*)
+ program_suffix=$ac_optarg ;;
+
+ -program-transform-name | --program-transform-name \
+ | --program-transform-nam | --program-transform-na \
+ | --program-transform-n | --program-transform- \
+ | --program-transform | --program-transfor \
+ | --program-transfo | --program-transf \
+ | --program-trans | --program-tran \
+ | --progr-tra | --program-tr | --program-t)
+ ac_prev=program_transform_name ;;
+ -program-transform-name=* | --program-transform-name=* \
+ | --program-transform-nam=* | --program-transform-na=* \
+ | --program-transform-n=* | --program-transform-=* \
+ | --program-transform=* | --program-transfor=* \
+ | --program-transfo=* | --program-transf=* \
+ | --program-trans=* | --program-tran=* \
+ | --progr-tra=* | --program-tr=* | --program-t=*)
+ program_transform_name=$ac_optarg ;;
+
+ -pdfdir | --pdfdir | --pdfdi | --pdfd | --pdf | --pd)
+ ac_prev=pdfdir ;;
+ -pdfdir=* | --pdfdir=* | --pdfdi=* | --pdfd=* | --pdf=* | --pd=*)
+ pdfdir=$ac_optarg ;;
+
+ -psdir | --psdir | --psdi | --psd | --ps)
+ ac_prev=psdir ;;
+ -psdir=* | --psdir=* | --psdi=* | --psd=* | --ps=*)
+ psdir=$ac_optarg ;;
+
+ -q | -quiet | --quiet | --quie | --qui | --qu | --q \
+ | -silent | --silent | --silen | --sile | --sil)
+ silent=yes ;;
+
+ -runstatedir | --runstatedir | --runstatedi | --runstated \
+ | --runstate | --runstat | --runsta | --runst | --runs \
+ | --run | --ru | --r)
+ ac_prev=runstatedir ;;
+ -runstatedir=* | --runstatedir=* | --runstatedi=* | --runstated=* \
+ | --runstate=* | --runstat=* | --runsta=* | --runst=* | --runs=* \
+ | --run=* | --ru=* | --r=*)
+ runstatedir=$ac_optarg ;;
+
+ -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb)
+ ac_prev=sbindir ;;
+ -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \
+ | --sbi=* | --sb=*)
+ sbindir=$ac_optarg ;;
+
+ -sharedstatedir | --sharedstatedir | --sharedstatedi \
+ | --sharedstated | --sharedstate | --sharedstat | --sharedsta \
+ | --sharedst | --shareds | --shared | --share | --shar \
+ | --sha | --sh)
+ ac_prev=sharedstatedir ;;
+ -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \
+ | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \
+ | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \
+ | --sha=* | --sh=*)
+ sharedstatedir=$ac_optarg ;;
+
+ -site | --site | --sit)
+ ac_prev=site ;;
+ -site=* | --site=* | --sit=*)
+ site=$ac_optarg ;;
+
+ -srcdir | --srcdir | --srcdi | --srcd | --src | --sr)
+ ac_prev=srcdir ;;
+ -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*)
+ srcdir=$ac_optarg ;;
+
+ -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \
+ | --syscon | --sysco | --sysc | --sys | --sy)
+ ac_prev=sysconfdir ;;
+ -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \
+ | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*)
+ sysconfdir=$ac_optarg ;;
+
+ -target | --target | --targe | --targ | --tar | --ta | --t)
+ ac_prev=target_alias ;;
+ -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*)
+ target_alias=$ac_optarg ;;
+
+ -v | -verbose | --verbose | --verbos | --verbo | --verb)
+ verbose=yes ;;
+
+ -version | --version | --versio | --versi | --vers | -V)
+ ac_init_version=: ;;
+
+ -with-* | --with-*)
+ ac_useropt=`expr "x$ac_option" : 'x-*with-\([^=]*\)'`
+ # Reject names that are not valid shell variable names.
+ expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
+ as_fn_error $? "invalid package name: $ac_useropt"
+ ac_useropt_orig=$ac_useropt
+ ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'`
+ case $ac_user_opts in
+ *"
+"with_$ac_useropt"
+"*) ;;
+ *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--with-$ac_useropt_orig"
+ ac_unrecognized_sep=', ';;
+ esac
+ eval with_$ac_useropt=\$ac_optarg ;;
+
+ -without-* | --without-*)
+ ac_useropt=`expr "x$ac_option" : 'x-*without-\(.*\)'`
+ # Reject names that are not valid shell variable names.
+ expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
+ as_fn_error $? "invalid package name: $ac_useropt"
+ ac_useropt_orig=$ac_useropt
+ ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'`
+ case $ac_user_opts in
+ *"
+"with_$ac_useropt"
+"*) ;;
+ *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--without-$ac_useropt_orig"
+ ac_unrecognized_sep=', ';;
+ esac
+ eval with_$ac_useropt=no ;;
+
+ --x)
+ # Obsolete; use --with-x.
+ with_x=yes ;;
+
+ -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \
+ | --x-incl | --x-inc | --x-in | --x-i)
+ ac_prev=x_includes ;;
+ -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \
+ | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*)
+ x_includes=$ac_optarg ;;
+
+ -x-libraries | --x-libraries | --x-librarie | --x-librari \
+ | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l)
+ ac_prev=x_libraries ;;
+ -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \
+ | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*)
+ x_libraries=$ac_optarg ;;
+
+ -*) as_fn_error $? "unrecognized option: \`$ac_option'
+Try \`$0 --help' for more information"
+ ;;
+
+ *=*)
+ ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='`
+ # Reject names that are not valid shell variable names.
+ case $ac_envvar in #(
+ '' | [0-9]* | *[!_$as_cr_alnum]* )
+ as_fn_error $? "invalid variable name: \`$ac_envvar'" ;;
+ esac
+ eval $ac_envvar=\$ac_optarg
+ export $ac_envvar ;;
+
+ *)
+ # FIXME: should be removed in autoconf 3.0.
+ $as_echo "$as_me: WARNING: you should use --build, --host, --target" >&2
+ expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null &&
+ $as_echo "$as_me: WARNING: invalid host type: $ac_option" >&2
+ : "${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option}"
+ ;;
+
+ esac
+done
+
+if test -n "$ac_prev"; then
+ ac_option=--`echo $ac_prev | sed 's/_/-/g'`
+ as_fn_error $? "missing argument to $ac_option"
+fi
+
+if test -n "$ac_unrecognized_opts"; then
+ case $enable_option_checking in
+ no) ;;
+ fatal) as_fn_error $? "unrecognized options: $ac_unrecognized_opts" ;;
+ *) $as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2 ;;
+ esac
+fi
+
+# Check all directory arguments for consistency.
+for ac_var in exec_prefix prefix bindir sbindir libexecdir datarootdir \
+ datadir sysconfdir sharedstatedir localstatedir includedir \
+ oldincludedir docdir infodir htmldir dvidir pdfdir psdir \
+ libdir localedir mandir runstatedir
+do
+ eval ac_val=\$$ac_var
+ # Remove trailing slashes.
+ case $ac_val in
+ */ )
+ ac_val=`expr "X$ac_val" : 'X\(.*[^/]\)' \| "X$ac_val" : 'X\(.*\)'`
+ eval $ac_var=\$ac_val;;
+ esac
+ # Be sure to have absolute directory names.
+ case $ac_val in
+ [\\/$]* | ?:[\\/]* ) continue;;
+ NONE | '' ) case $ac_var in *prefix ) continue;; esac;;
+ esac
+ as_fn_error $? "expected an absolute directory name for --$ac_var: $ac_val"
+done
+
+# There might be people who depend on the old broken behavior: `$host'
+# used to hold the argument of --host etc.
+# FIXME: To remove some day.
+build=$build_alias
+host=$host_alias
+target=$target_alias
+
+# FIXME: To remove some day.
+if test "x$host_alias" != x; then
+ if test "x$build_alias" = x; then
+ cross_compiling=maybe
+ elif test "x$build_alias" != "x$host_alias"; then
+ cross_compiling=yes
+ fi
+fi
+
+ac_tool_prefix=
+test -n "$host_alias" && ac_tool_prefix=$host_alias-
+
+test "$silent" = yes && exec 6>/dev/null
+
+
+ac_pwd=`pwd` && test -n "$ac_pwd" &&
+ac_ls_di=`ls -di .` &&
+ac_pwd_ls_di=`cd "$ac_pwd" && ls -di .` ||
+ as_fn_error $? "working directory cannot be determined"
+test "X$ac_ls_di" = "X$ac_pwd_ls_di" ||
+ as_fn_error $? "pwd does not report name of working directory"
+
+
+# Find the source files, if location was not specified.
+if test -z "$srcdir"; then
+ ac_srcdir_defaulted=yes
+ # Try the directory containing this script, then the parent directory.
+ ac_confdir=`$as_dirname -- "$as_myself" ||
+$as_expr X"$as_myself" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+ X"$as_myself" : 'X\(//\)[^/]' \| \
+ X"$as_myself" : 'X\(//\)$' \| \
+ X"$as_myself" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X"$as_myself" |
+ sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+ s//\1/
+ q
+ }
+ /^X\(\/\/\)[^/].*/{
+ s//\1/
+ q
+ }
+ /^X\(\/\/\)$/{
+ s//\1/
+ q
+ }
+ /^X\(\/\).*/{
+ s//\1/
+ q
+ }
+ s/.*/./; q'`
+ srcdir=$ac_confdir
+ if test ! -r "$srcdir/$ac_unique_file"; then
+ srcdir=..
+ fi
+else
+ ac_srcdir_defaulted=no
+fi
+if test ! -r "$srcdir/$ac_unique_file"; then
+ test "$ac_srcdir_defaulted" = yes && srcdir="$ac_confdir or .."
+ as_fn_error $? "cannot find sources ($ac_unique_file) in $srcdir"
+fi
+ac_msg="sources are in $srcdir, but \`cd $srcdir' does not work"
+ac_abs_confdir=`(
+ cd "$srcdir" && test -r "./$ac_unique_file" || as_fn_error $? "$ac_msg"
+ pwd)`
+# When building in place, set srcdir=.
+if test "$ac_abs_confdir" = "$ac_pwd"; then
+ srcdir=.
+fi
+# Remove unnecessary trailing slashes from srcdir.
+# Double slashes in file names in object file debugging info
+# mess up M-x gdb in Emacs.
+case $srcdir in
+*/) srcdir=`expr "X$srcdir" : 'X\(.*[^/]\)' \| "X$srcdir" : 'X\(.*\)'`;;
+esac
+for ac_var in $ac_precious_vars; do
+ eval ac_env_${ac_var}_set=\${${ac_var}+set}
+ eval ac_env_${ac_var}_value=\$${ac_var}
+ eval ac_cv_env_${ac_var}_set=\${${ac_var}+set}
+ eval ac_cv_env_${ac_var}_value=\$${ac_var}
+done
+
+#
+# Report the --help message.
+#
+if test "$ac_init_help" = "long"; then
+ # Omit some internal or obsolete options to make the list less imposing.
+ # This message is too long to be a string in the A/UX 3.1 sh.
+ cat <<_ACEOF
+\`configure' configures this package to adapt to many kinds of systems.
+
+Usage: $0 [OPTION]... [VAR=VALUE]...
+
+To assign environment variables (e.g., CC, CFLAGS...), specify them as
+VAR=VALUE. See below for descriptions of some of the useful variables.
+
+Defaults for the options are specified in brackets.
+
+Configuration:
+ -h, --help display this help and exit
+ --help=short display options specific to this package
+ --help=recursive display the short help of all the included packages
+ -V, --version display version information and exit
+ -q, --quiet, --silent do not print \`checking ...' messages
+ --cache-file=FILE cache test results in FILE [disabled]
+ -C, --config-cache alias for \`--cache-file=config.cache'
+ -n, --no-create do not create output files
+ --srcdir=DIR find the sources in DIR [configure dir or \`..']
+
+Installation directories:
+ --prefix=PREFIX install architecture-independent files in PREFIX
+ [$ac_default_prefix]
+ --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX
+ [PREFIX]
+
+By default, \`make install' will install all the files in
+\`$ac_default_prefix/bin', \`$ac_default_prefix/lib' etc. You can specify
+an installation prefix other than \`$ac_default_prefix' using \`--prefix',
+for instance \`--prefix=\$HOME'.
+
+For better control, use the options below.
+
+Fine tuning of the installation directories:
+ --bindir=DIR user executables [EPREFIX/bin]
+ --sbindir=DIR system admin executables [EPREFIX/sbin]
+ --libexecdir=DIR program executables [EPREFIX/libexec]
+ --sysconfdir=DIR read-only single-machine data [PREFIX/etc]
+ --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com]
+ --localstatedir=DIR modifiable single-machine data [PREFIX/var]
+ --runstatedir=DIR modifiable per-process data [LOCALSTATEDIR/run]
+ --libdir=DIR object code libraries [EPREFIX/lib]
+ --includedir=DIR C header files [PREFIX/include]
+ --oldincludedir=DIR C header files for non-gcc [/usr/include]
+ --datarootdir=DIR read-only arch.-independent data root [PREFIX/share]
+ --datadir=DIR read-only architecture-independent data [DATAROOTDIR]
+ --infodir=DIR info documentation [DATAROOTDIR/info]
+ --localedir=DIR locale-dependent data [DATAROOTDIR/locale]
+ --mandir=DIR man documentation [DATAROOTDIR/man]
+ --docdir=DIR documentation root [DATAROOTDIR/doc/PACKAGE]
+ --htmldir=DIR html documentation [DOCDIR]
+ --dvidir=DIR dvi documentation [DOCDIR]
+ --pdfdir=DIR pdf documentation [DOCDIR]
+ --psdir=DIR ps documentation [DOCDIR]
+_ACEOF
+
+ cat <<\_ACEOF
+_ACEOF
+fi
+
+if test -n "$ac_init_help"; then
+
+ cat <<\_ACEOF
+
+Some influential environment variables:
+ CC C compiler command
+ CFLAGS C compiler flags
+ LDFLAGS linker flags, e.g. -L if you have libraries in a
+ nonstandard directory
+ LIBS libraries to pass to the linker, e.g. -l
+ CPPFLAGS (Objective) C/C++ preprocessor flags, e.g. -I if
+ you have headers in a nonstandard directory
+ CPP C preprocessor
+
+Use these variables to override the choices made by `configure' or to help
+it to find libraries and programs with nonstandard names/locations.
+
+Report bugs to the package provider.
+_ACEOF
+ac_status=$?
+fi
+
+if test "$ac_init_help" = "recursive"; then
+ # If there are subdirs, report their specific --help.
+ for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue
+ test -d "$ac_dir" ||
+ { cd "$srcdir" && ac_pwd=`pwd` && srcdir=. && test -d "$ac_dir"; } ||
+ continue
+ ac_builddir=.
+
+case "$ac_dir" in
+.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;;
+*)
+ ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'`
+ # A ".." for each directory in $ac_dir_suffix.
+ ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'`
+ case $ac_top_builddir_sub in
+ "") ac_top_builddir_sub=. ac_top_build_prefix= ;;
+ *) ac_top_build_prefix=$ac_top_builddir_sub/ ;;
+ esac ;;
+esac
+ac_abs_top_builddir=$ac_pwd
+ac_abs_builddir=$ac_pwd$ac_dir_suffix
+# for backward compatibility:
+ac_top_builddir=$ac_top_build_prefix
+
+case $srcdir in
+ .) # We are building in place.
+ ac_srcdir=.
+ ac_top_srcdir=$ac_top_builddir_sub
+ ac_abs_top_srcdir=$ac_pwd ;;
+ [\\/]* | ?:[\\/]* ) # Absolute name.
+ ac_srcdir=$srcdir$ac_dir_suffix;
+ ac_top_srcdir=$srcdir
+ ac_abs_top_srcdir=$srcdir ;;
+ *) # Relative name.
+ ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix
+ ac_top_srcdir=$ac_top_build_prefix$srcdir
+ ac_abs_top_srcdir=$ac_pwd/$srcdir ;;
+esac
+ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix
+
+ cd "$ac_dir" || { ac_status=$?; continue; }
+ # Check for guested configure.
+ if test -f "$ac_srcdir/configure.gnu"; then
+ echo &&
+ $SHELL "$ac_srcdir/configure.gnu" --help=recursive
+ elif test -f "$ac_srcdir/configure"; then
+ echo &&
+ $SHELL "$ac_srcdir/configure" --help=recursive
+ else
+ $as_echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2
+ fi || ac_status=$?
+ cd "$ac_pwd" || { ac_status=$?; break; }
+ done
+fi
+
+test -n "$ac_init_help" && exit $ac_status
+if $ac_init_version; then
+ cat <<\_ACEOF
+configure
+generated by GNU Autoconf 2.69
+
+Copyright (C) 2012 Free Software Foundation, Inc.
+This configure script is free software; the Free Software Foundation
+gives unlimited permission to copy, distribute and modify it.
+_ACEOF
+ exit
+fi
+
+## ------------------------ ##
+## Autoconf initialization. ##
+## ------------------------ ##
+
+# ac_fn_c_try_compile LINENO
+# --------------------------
+# Try to compile conftest.$ac_ext, and return whether this succeeded.
+ac_fn_c_try_compile ()
+{
+ as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+ rm -f conftest.$ac_objext
+ if { { ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+ (eval "$ac_compile") 2>conftest.err
+ ac_status=$?
+ if test -s conftest.err; then
+ grep -v '^ *+' conftest.err >conftest.er1
+ cat conftest.er1 >&5
+ mv -f conftest.er1 conftest.err
+ fi
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then :
+ ac_retval=0
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ ac_retval=1
+fi
+ eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+ as_fn_set_status $ac_retval
+
+} # ac_fn_c_try_compile
+
+# ac_fn_c_try_cpp LINENO
+# ----------------------
+# Try to preprocess conftest.$ac_ext, and return whether this succeeded.
+ac_fn_c_try_cpp ()
+{
+ as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+ if { { ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+ (eval "$ac_cpp conftest.$ac_ext") 2>conftest.err
+ ac_status=$?
+ if test -s conftest.err; then
+ grep -v '^ *+' conftest.err >conftest.er1
+ cat conftest.er1 >&5
+ mv -f conftest.er1 conftest.err
+ fi
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; } > conftest.i && {
+ test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ }; then :
+ ac_retval=0
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ ac_retval=1
+fi
+ eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+ as_fn_set_status $ac_retval
+
+} # ac_fn_c_try_cpp
+
+# ac_fn_c_check_header_mongrel LINENO HEADER VAR INCLUDES
+# -------------------------------------------------------
+# Tests whether HEADER exists, giving a warning if it cannot be compiled using
+# the include files in INCLUDES and setting the cache variable VAR
+# accordingly.
+ac_fn_c_check_header_mongrel ()
+{
+ as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+ if eval \${$3+:} false; then :
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
+$as_echo_n "checking for $2... " >&6; }
+if eval \${$3+:} false; then :
+ $as_echo_n "(cached) " >&6
+fi
+eval ac_res=\$$3
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
+$as_echo "$ac_res" >&6; }
+else
+ # Is the header compilable?
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 usability" >&5
+$as_echo_n "checking $2 usability... " >&6; }
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+$4
+#include <$2>
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ ac_header_compiler=yes
+else
+ ac_header_compiler=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_compiler" >&5
+$as_echo "$ac_header_compiler" >&6; }
+
+# Is the header present?
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 presence" >&5
+$as_echo_n "checking $2 presence... " >&6; }
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include <$2>
+_ACEOF
+if ac_fn_c_try_cpp "$LINENO"; then :
+ ac_header_preproc=yes
+else
+ ac_header_preproc=no
+fi
+rm -f conftest.err conftest.i conftest.$ac_ext
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_preproc" >&5
+$as_echo "$ac_header_preproc" >&6; }
+
+# So? What about this header?
+case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in #((
+ yes:no: )
+ { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&5
+$as_echo "$as_me: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&2;}
+ { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5
+$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;}
+ ;;
+ no:yes:* )
+ { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: present but cannot be compiled" >&5
+$as_echo "$as_me: WARNING: $2: present but cannot be compiled" >&2;}
+ { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: check for missing prerequisite headers?" >&5
+$as_echo "$as_me: WARNING: $2: check for missing prerequisite headers?" >&2;}
+ { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: see the Autoconf documentation" >&5
+$as_echo "$as_me: WARNING: $2: see the Autoconf documentation" >&2;}
+ { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: section \"Present But Cannot Be Compiled\"" >&5
+$as_echo "$as_me: WARNING: $2: section \"Present But Cannot Be Compiled\"" >&2;}
+ { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5
+$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;}
+ ;;
+esac
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
+$as_echo_n "checking for $2... " >&6; }
+if eval \${$3+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ eval "$3=\$ac_header_compiler"
+fi
+eval ac_res=\$$3
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
+$as_echo "$ac_res" >&6; }
+fi
+ eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+
+} # ac_fn_c_check_header_mongrel
+
+# ac_fn_c_try_run LINENO
+# ----------------------
+# Try to link conftest.$ac_ext, and return whether this succeeded. Assumes
+# that executables *can* be run.
+ac_fn_c_try_run ()
+{
+ as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+ if { { ac_try="$ac_link"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+ (eval "$ac_link") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; } && { ac_try='./conftest$ac_exeext'
+ { { case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+ (eval "$ac_try") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; }; then :
+ ac_retval=0
+else
+ $as_echo "$as_me: program exited with status $ac_status" >&5
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ ac_retval=$ac_status
+fi
+ rm -rf conftest.dSYM conftest_ipa8_conftest.oo
+ eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+ as_fn_set_status $ac_retval
+
+} # ac_fn_c_try_run
+
+# ac_fn_c_check_header_compile LINENO HEADER VAR INCLUDES
+# -------------------------------------------------------
+# Tests whether HEADER exists and can be compiled using the include files in
+# INCLUDES, setting the cache variable VAR accordingly.
+ac_fn_c_check_header_compile ()
+{
+ as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
+$as_echo_n "checking for $2... " >&6; }
+if eval \${$3+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+$4
+#include <$2>
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ eval "$3=yes"
+else
+ eval "$3=no"
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+eval ac_res=\$$3
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
+$as_echo "$ac_res" >&6; }
+ eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+
+} # ac_fn_c_check_header_compile
+cat >config.log <<_ACEOF
+This file contains any messages produced by compilers while
+running configure, to aid debugging if configure makes a mistake.
+
+It was created by $as_me, which was
+generated by GNU Autoconf 2.69. Invocation command line was
+
+ $ $0 $@
+
+_ACEOF
+exec 5>>config.log
+{
+cat <<_ASUNAME
+## --------- ##
+## Platform. ##
+## --------- ##
+
+hostname = `(hostname || uname -n) 2>/dev/null | sed 1q`
+uname -m = `(uname -m) 2>/dev/null || echo unknown`
+uname -r = `(uname -r) 2>/dev/null || echo unknown`
+uname -s = `(uname -s) 2>/dev/null || echo unknown`
+uname -v = `(uname -v) 2>/dev/null || echo unknown`
+
+/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown`
+/bin/uname -X = `(/bin/uname -X) 2>/dev/null || echo unknown`
+
+/bin/arch = `(/bin/arch) 2>/dev/null || echo unknown`
+/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null || echo unknown`
+/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown`
+/usr/bin/hostinfo = `(/usr/bin/hostinfo) 2>/dev/null || echo unknown`
+/bin/machine = `(/bin/machine) 2>/dev/null || echo unknown`
+/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null || echo unknown`
+/bin/universe = `(/bin/universe) 2>/dev/null || echo unknown`
+
+_ASUNAME
+
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ $as_echo "PATH: $as_dir"
+ done
+IFS=$as_save_IFS
+
+} >&5
+
+cat >&5 <<_ACEOF
+
+
+## ----------- ##
+## Core tests. ##
+## ----------- ##
+
+_ACEOF
+
+
+# Keep a trace of the command line.
+# Strip out --no-create and --no-recursion so they do not pile up.
+# Strip out --silent because we don't want to record it for future runs.
+# Also quote any args containing shell meta-characters.
+# Make two passes to allow for proper duplicate-argument suppression.
+ac_configure_args=
+ac_configure_args0=
+ac_configure_args1=
+ac_must_keep_next=false
+for ac_pass in 1 2
+do
+ for ac_arg
+ do
+ case $ac_arg in
+ -no-create | --no-c* | -n | -no-recursion | --no-r*) continue ;;
+ -q | -quiet | --quiet | --quie | --qui | --qu | --q \
+ | -silent | --silent | --silen | --sile | --sil)
+ continue ;;
+ *\'*)
+ ac_arg=`$as_echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;;
+ esac
+ case $ac_pass in
+ 1) as_fn_append ac_configure_args0 " '$ac_arg'" ;;
+ 2)
+ as_fn_append ac_configure_args1 " '$ac_arg'"
+ if test $ac_must_keep_next = true; then
+ ac_must_keep_next=false # Got value, back to normal.
+ else
+ case $ac_arg in
+ *=* | --config-cache | -C | -disable-* | --disable-* \
+ | -enable-* | --enable-* | -gas | --g* | -nfp | --nf* \
+ | -q | -quiet | --q* | -silent | --sil* | -v | -verb* \
+ | -with-* | --with-* | -without-* | --without-* | --x)
+ case "$ac_configure_args0 " in
+ "$ac_configure_args1"*" '$ac_arg' "* ) continue ;;
+ esac
+ ;;
+ -* ) ac_must_keep_next=true ;;
+ esac
+ fi
+ as_fn_append ac_configure_args " '$ac_arg'"
+ ;;
+ esac
+ done
+done
+{ ac_configure_args0=; unset ac_configure_args0;}
+{ ac_configure_args1=; unset ac_configure_args1;}
+
+# When interrupted or exit'd, cleanup temporary files, and complete
+# config.log. We remove comments because anyway the quotes in there
+# would cause problems or look ugly.
+# WARNING: Use '\'' to represent an apostrophe within the trap.
+# WARNING: Do not start the trap code with a newline, due to a FreeBSD 4.0 bug.
+trap 'exit_status=$?
+ # Save into config.log some information that might help in debugging.
+ {
+ echo
+
+ $as_echo "## ---------------- ##
+## Cache variables. ##
+## ---------------- ##"
+ echo
+ # The following way of writing the cache mishandles newlines in values,
+(
+ for ac_var in `(set) 2>&1 | sed -n '\''s/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'\''`; do
+ eval ac_val=\$$ac_var
+ case $ac_val in #(
+ *${as_nl}*)
+ case $ac_var in #(
+ *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5
+$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;;
+ esac
+ case $ac_var in #(
+ _ | IFS | as_nl) ;; #(
+ BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #(
+ *) { eval $ac_var=; unset $ac_var;} ;;
+ esac ;;
+ esac
+ done
+ (set) 2>&1 |
+ case $as_nl`(ac_space='\'' '\''; set) 2>&1` in #(
+ *${as_nl}ac_space=\ *)
+ sed -n \
+ "s/'\''/'\''\\\\'\'''\''/g;
+ s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\''\\2'\''/p"
+ ;; #(
+ *)
+ sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p"
+ ;;
+ esac |
+ sort
+)
+ echo
+
+ $as_echo "## ----------------- ##
+## Output variables. ##
+## ----------------- ##"
+ echo
+ for ac_var in $ac_subst_vars
+ do
+ eval ac_val=\$$ac_var
+ case $ac_val in
+ *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;;
+ esac
+ $as_echo "$ac_var='\''$ac_val'\''"
+ done | sort
+ echo
+
+ if test -n "$ac_subst_files"; then
+ $as_echo "## ------------------- ##
+## File substitutions. ##
+## ------------------- ##"
+ echo
+ for ac_var in $ac_subst_files
+ do
+ eval ac_val=\$$ac_var
+ case $ac_val in
+ *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;;
+ esac
+ $as_echo "$ac_var='\''$ac_val'\''"
+ done | sort
+ echo
+ fi
+
+ if test -s confdefs.h; then
+ $as_echo "## ----------- ##
+## confdefs.h. ##
+## ----------- ##"
+ echo
+ cat confdefs.h
+ echo
+ fi
+ test "$ac_signal" != 0 &&
+ $as_echo "$as_me: caught signal $ac_signal"
+ $as_echo "$as_me: exit $exit_status"
+ } >&5
+ rm -f core *.core core.conftest.* &&
+ rm -f -r conftest* confdefs* conf$$* $ac_clean_files &&
+ exit $exit_status
+' 0
+for ac_signal in 1 2 13 15; do
+ trap 'ac_signal='$ac_signal'; as_fn_exit 1' $ac_signal
+done
+ac_signal=0
+
+# confdefs.h avoids OS command line length limits that DEFS can exceed.
+rm -f -r conftest* confdefs.h
+
+$as_echo "/* confdefs.h */" > confdefs.h
+
+# Predefined preprocessor variables.
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_NAME "$PACKAGE_NAME"
+_ACEOF
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_TARNAME "$PACKAGE_TARNAME"
+_ACEOF
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_VERSION "$PACKAGE_VERSION"
+_ACEOF
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_STRING "$PACKAGE_STRING"
+_ACEOF
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT"
+_ACEOF
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_URL "$PACKAGE_URL"
+_ACEOF
+
+
+# Let the site file select an alternate cache file if it wants to.
+# Prefer an explicitly selected file to automatically selected ones.
+ac_site_file1=NONE
+ac_site_file2=NONE
+if test -n "$CONFIG_SITE"; then
+ # We do not want a PATH search for config.site.
+ case $CONFIG_SITE in #((
+ -*) ac_site_file1=./$CONFIG_SITE;;
+ */*) ac_site_file1=$CONFIG_SITE;;
+ *) ac_site_file1=./$CONFIG_SITE;;
+ esac
+elif test "x$prefix" != xNONE; then
+ ac_site_file1=$prefix/share/config.site
+ ac_site_file2=$prefix/etc/config.site
+else
+ ac_site_file1=$ac_default_prefix/share/config.site
+ ac_site_file2=$ac_default_prefix/etc/config.site
+fi
+for ac_site_file in "$ac_site_file1" "$ac_site_file2"
+do
+ test "x$ac_site_file" = xNONE && continue
+ if test /dev/null != "$ac_site_file" && test -r "$ac_site_file"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: loading site script $ac_site_file" >&5
+$as_echo "$as_me: loading site script $ac_site_file" >&6;}
+ sed 's/^/| /' "$ac_site_file" >&5
+ . "$ac_site_file" \
+ || { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "failed to load site script $ac_site_file
+See \`config.log' for more details" "$LINENO" 5; }
+ fi
+done
+
+if test -r "$cache_file"; then
+ # Some versions of bash will fail to source /dev/null (special files
+ # actually), so we avoid doing that. DJGPP emulates it as a regular file.
+ if test /dev/null != "$cache_file" && test -f "$cache_file"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: loading cache $cache_file" >&5
+$as_echo "$as_me: loading cache $cache_file" >&6;}
+ case $cache_file in
+ [\\/]* | ?:[\\/]* ) . "$cache_file";;
+ *) . "./$cache_file";;
+ esac
+ fi
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: creating cache $cache_file" >&5
+$as_echo "$as_me: creating cache $cache_file" >&6;}
+ >$cache_file
+fi
+
+# Check that the precious variables saved in the cache have kept the same
+# value.
+ac_cache_corrupted=false
+for ac_var in $ac_precious_vars; do
+ eval ac_old_set=\$ac_cv_env_${ac_var}_set
+ eval ac_new_set=\$ac_env_${ac_var}_set
+ eval ac_old_val=\$ac_cv_env_${ac_var}_value
+ eval ac_new_val=\$ac_env_${ac_var}_value
+ case $ac_old_set,$ac_new_set in
+ set,)
+ { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5
+$as_echo "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;}
+ ac_cache_corrupted=: ;;
+ ,set)
+ { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was not set in the previous run" >&5
+$as_echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;}
+ ac_cache_corrupted=: ;;
+ ,);;
+ *)
+ if test "x$ac_old_val" != "x$ac_new_val"; then
+ # differences in whitespace do not lead to failure.
+ ac_old_val_w=`echo x $ac_old_val`
+ ac_new_val_w=`echo x $ac_new_val`
+ if test "$ac_old_val_w" != "$ac_new_val_w"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' has changed since the previous run:" >&5
+$as_echo "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;}
+ ac_cache_corrupted=:
+ else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&5
+$as_echo "$as_me: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&2;}
+ eval $ac_var=\$ac_old_val
+ fi
+ { $as_echo "$as_me:${as_lineno-$LINENO}: former value: \`$ac_old_val'" >&5
+$as_echo "$as_me: former value: \`$ac_old_val'" >&2;}
+ { $as_echo "$as_me:${as_lineno-$LINENO}: current value: \`$ac_new_val'" >&5
+$as_echo "$as_me: current value: \`$ac_new_val'" >&2;}
+ fi;;
+ esac
+ # Pass precious variables to config.status.
+ if test "$ac_new_set" = set; then
+ case $ac_new_val in
+ *\'*) ac_arg=$ac_var=`$as_echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;;
+ *) ac_arg=$ac_var=$ac_new_val ;;
+ esac
+ case " $ac_configure_args " in
+ *" '$ac_arg' "*) ;; # Avoid dups. Use of quotes ensures accuracy.
+ *) as_fn_append ac_configure_args " '$ac_arg'" ;;
+ esac
+ fi
+done
+if $ac_cache_corrupted; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+ { $as_echo "$as_me:${as_lineno-$LINENO}: error: changes in the environment can compromise the build" >&5
+$as_echo "$as_me: error: changes in the environment can compromise the build" >&2;}
+ as_fn_error $? "run \`make distclean' and/or \`rm $cache_file' and start over" "$LINENO" 5
+fi
+## -------------------- ##
+## Main body of script. ##
+## -------------------- ##
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+if test -n "$ac_tool_prefix"; then
+ # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args.
+set dummy ${ac_tool_prefix}gcc; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_CC+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ if test -n "$CC"; then
+ ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_prog_CC="${ac_tool_prefix}gcc"
+ $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+ done
+IFS=$as_save_IFS
+
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
+$as_echo "$CC" >&6; }
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_CC"; then
+ ac_ct_CC=$CC
+ # Extract the first word of "gcc", so it can be a program name with args.
+set dummy gcc; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_ac_ct_CC+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ if test -n "$ac_ct_CC"; then
+ ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_prog_ac_ct_CC="gcc"
+ $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+ done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_CC=$ac_cv_prog_ac_ct_CC
+if test -n "$ac_ct_CC"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5
+$as_echo "$ac_ct_CC" >&6; }
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+ if test "x$ac_ct_CC" = x; then
+ CC=""
+ else
+ case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+ CC=$ac_ct_CC
+ fi
+else
+ CC="$ac_cv_prog_CC"
+fi
+
+if test -z "$CC"; then
+ if test -n "$ac_tool_prefix"; then
+ # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args.
+set dummy ${ac_tool_prefix}cc; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_CC+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ if test -n "$CC"; then
+ ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_prog_CC="${ac_tool_prefix}cc"
+ $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+ done
+IFS=$as_save_IFS
+
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
+$as_echo "$CC" >&6; }
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+ fi
+fi
+if test -z "$CC"; then
+ # Extract the first word of "cc", so it can be a program name with args.
+set dummy cc; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_CC+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ if test -n "$CC"; then
+ ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+ ac_prog_rejected=no
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then
+ ac_prog_rejected=yes
+ continue
+ fi
+ ac_cv_prog_CC="cc"
+ $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+ done
+IFS=$as_save_IFS
+
+if test $ac_prog_rejected = yes; then
+ # We found a bogon in the path, so make sure we never use it.
+ set dummy $ac_cv_prog_CC
+ shift
+ if test $# != 0; then
+ # We chose a different compiler from the bogus one.
+ # However, it has the same basename, so the bogon will be chosen
+ # first if we set CC to just the basename; use the full file name.
+ shift
+ ac_cv_prog_CC="$as_dir/$ac_word${1+' '}$@"
+ fi
+fi
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
+$as_echo "$CC" >&6; }
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$CC"; then
+ if test -n "$ac_tool_prefix"; then
+ for ac_prog in cl.exe
+ do
+ # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args.
+set dummy $ac_tool_prefix$ac_prog; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_CC+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ if test -n "$CC"; then
+ ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_prog_CC="$ac_tool_prefix$ac_prog"
+ $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+ done
+IFS=$as_save_IFS
+
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
+$as_echo "$CC" >&6; }
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+ test -n "$CC" && break
+ done
+fi
+if test -z "$CC"; then
+ ac_ct_CC=$CC
+ for ac_prog in cl.exe
+do
+ # Extract the first word of "$ac_prog", so it can be a program name with args.
+set dummy $ac_prog; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_ac_ct_CC+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ if test -n "$ac_ct_CC"; then
+ ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_prog_ac_ct_CC="$ac_prog"
+ $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+ done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_CC=$ac_cv_prog_ac_ct_CC
+if test -n "$ac_ct_CC"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5
+$as_echo "$ac_ct_CC" >&6; }
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+ test -n "$ac_ct_CC" && break
+done
+
+ if test "x$ac_ct_CC" = x; then
+ CC=""
+ else
+ case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+ CC=$ac_ct_CC
+ fi
+fi
+
+fi
+
+
+test -z "$CC" && { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "no acceptable C compiler found in \$PATH
+See \`config.log' for more details" "$LINENO" 5; }
+
+# Provide some information about the compiler.
+$as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler version" >&5
+set X $ac_compile
+ac_compiler=$2
+for ac_option in --version -v -V -qversion; do
+ { { ac_try="$ac_compiler $ac_option >&5"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+ (eval "$ac_compiler $ac_option >&5") 2>conftest.err
+ ac_status=$?
+ if test -s conftest.err; then
+ sed '10a\
+... rest of stderr output deleted ...
+ 10q' conftest.err >conftest.er1
+ cat conftest.er1 >&5
+ fi
+ rm -f conftest.er1 conftest.err
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }
+done
+
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+ac_clean_files_save=$ac_clean_files
+ac_clean_files="$ac_clean_files a.out a.out.dSYM a.exe b.out"
+# Try to create an executable without -o first, disregard a.out.
+# It will help us diagnose broken compilers, and finding out an intuition
+# of exeext.
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the C compiler works" >&5
+$as_echo_n "checking whether the C compiler works... " >&6; }
+ac_link_default=`$as_echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'`
+
+# The possible output files:
+ac_files="a.out conftest.exe conftest a.exe a_out.exe b.out conftest.*"
+
+ac_rmfiles=
+for ac_file in $ac_files
+do
+ case $ac_file in
+ *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;;
+ * ) ac_rmfiles="$ac_rmfiles $ac_file";;
+ esac
+done
+rm -f $ac_rmfiles
+
+if { { ac_try="$ac_link_default"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+ (eval "$ac_link_default") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; then :
+ # Autoconf-2.13 could set the ac_cv_exeext variable to `no'.
+# So ignore a value of `no', otherwise this would lead to `EXEEXT = no'
+# in a Makefile. We should not override ac_cv_exeext if it was cached,
+# so that the user can short-circuit this test for compilers unknown to
+# Autoconf.
+for ac_file in $ac_files ''
+do
+ test -f "$ac_file" || continue
+ case $ac_file in
+ *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj )
+ ;;
+ [ab].out )
+ # We found the default executable, but exeext='' is most
+ # certainly right.
+ break;;
+ *.* )
+ if test "${ac_cv_exeext+set}" = set && test "$ac_cv_exeext" != no;
+ then :; else
+ ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'`
+ fi
+ # We set ac_cv_exeext here because the later test for it is not
+ # safe: cross compilers may not add the suffix if given an `-o'
+ # argument, so we may need to know it at that point already.
+ # Even if this section looks crufty: it has the advantage of
+ # actually working.
+ break;;
+ * )
+ break;;
+ esac
+done
+test "$ac_cv_exeext" = no && ac_cv_exeext=
+
+else
+ ac_file=''
+fi
+if test -z "$ac_file"; then :
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+$as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error 77 "C compiler cannot create executables
+See \`config.log' for more details" "$LINENO" 5; }
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler default output file name" >&5
+$as_echo_n "checking for C compiler default output file name... " >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_file" >&5
+$as_echo "$ac_file" >&6; }
+ac_exeext=$ac_cv_exeext
+
+rm -f -r a.out a.out.dSYM a.exe conftest$ac_cv_exeext b.out
+ac_clean_files=$ac_clean_files_save
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of executables" >&5
+$as_echo_n "checking for suffix of executables... " >&6; }
+if { { ac_try="$ac_link"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+ (eval "$ac_link") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; then :
+ # If both `conftest.exe' and `conftest' are `present' (well, observable)
+# catch `conftest.exe'. For instance with Cygwin, `ls conftest' will
+# work properly (i.e., refer to `conftest.exe'), while it won't with
+# `rm'.
+for ac_file in conftest.exe conftest conftest.*; do
+ test -f "$ac_file" || continue
+ case $ac_file in
+ *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;;
+ *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'`
+ break;;
+ * ) break;;
+ esac
+done
+else
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "cannot compute suffix of executables: cannot compile and link
+See \`config.log' for more details" "$LINENO" 5; }
+fi
+rm -f conftest conftest$ac_cv_exeext
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_exeext" >&5
+$as_echo "$ac_cv_exeext" >&6; }
+
+rm -f conftest.$ac_ext
+EXEEXT=$ac_cv_exeext
+ac_exeext=$EXEEXT
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include
+int
+main ()
+{
+FILE *f = fopen ("conftest.out", "w");
+ return ferror (f) || fclose (f) != 0;
+
+ ;
+ return 0;
+}
+_ACEOF
+ac_clean_files="$ac_clean_files conftest.out"
+# Check that the compiler produces executables we can run. If not, either
+# the compiler is broken, or we cross compile.
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are cross compiling" >&5
+$as_echo_n "checking whether we are cross compiling... " >&6; }
+if test "$cross_compiling" != yes; then
+ { { ac_try="$ac_link"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+ (eval "$ac_link") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }
+ if { ac_try='./conftest$ac_cv_exeext'
+ { { case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+ (eval "$ac_try") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; }; then
+ cross_compiling=no
+ else
+ if test "$cross_compiling" = maybe; then
+ cross_compiling=yes
+ else
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "cannot run C compiled programs.
+If you meant to cross compile, use \`--host'.
+See \`config.log' for more details" "$LINENO" 5; }
+ fi
+ fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $cross_compiling" >&5
+$as_echo "$cross_compiling" >&6; }
+
+rm -f conftest.$ac_ext conftest$ac_cv_exeext conftest.out
+ac_clean_files=$ac_clean_files_save
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of object files" >&5
+$as_echo_n "checking for suffix of object files... " >&6; }
+if ${ac_cv_objext+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.o conftest.obj
+if { { ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+ (eval "$ac_compile") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; then :
+ for ac_file in conftest.o conftest.obj conftest.*; do
+ test -f "$ac_file" || continue;
+ case $ac_file in
+ *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM ) ;;
+ *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'`
+ break;;
+ esac
+done
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "cannot compute suffix of object files: cannot compile
+See \`config.log' for more details" "$LINENO" 5; }
+fi
+rm -f conftest.$ac_cv_objext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_objext" >&5
+$as_echo "$ac_cv_objext" >&6; }
+OBJEXT=$ac_cv_objext
+ac_objext=$OBJEXT
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU C compiler" >&5
+$as_echo_n "checking whether we are using the GNU C compiler... " >&6; }
+if ${ac_cv_c_compiler_gnu+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+int
+main ()
+{
+#ifndef __GNUC__
+ choke me
+#endif
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ ac_compiler_gnu=yes
+else
+ ac_compiler_gnu=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ac_cv_c_compiler_gnu=$ac_compiler_gnu
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_compiler_gnu" >&5
+$as_echo "$ac_cv_c_compiler_gnu" >&6; }
+if test $ac_compiler_gnu = yes; then
+ GCC=yes
+else
+ GCC=
+fi
+ac_test_CFLAGS=${CFLAGS+set}
+ac_save_CFLAGS=$CFLAGS
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC accepts -g" >&5
+$as_echo_n "checking whether $CC accepts -g... " >&6; }
+if ${ac_cv_prog_cc_g+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ ac_save_c_werror_flag=$ac_c_werror_flag
+ ac_c_werror_flag=yes
+ ac_cv_prog_cc_g=no
+ CFLAGS="-g"
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ ac_cv_prog_cc_g=yes
+else
+ CFLAGS=""
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+
+else
+ ac_c_werror_flag=$ac_save_c_werror_flag
+ CFLAGS="-g"
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ ac_cv_prog_cc_g=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ ac_c_werror_flag=$ac_save_c_werror_flag
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_g" >&5
+$as_echo "$ac_cv_prog_cc_g" >&6; }
+if test "$ac_test_CFLAGS" = set; then
+ CFLAGS=$ac_save_CFLAGS
+elif test $ac_cv_prog_cc_g = yes; then
+ if test "$GCC" = yes; then
+ CFLAGS="-g -O2"
+ else
+ CFLAGS="-g"
+ fi
+else
+ if test "$GCC" = yes; then
+ CFLAGS="-O2"
+ else
+ CFLAGS=
+ fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO C89" >&5
+$as_echo_n "checking for $CC option to accept ISO C89... " >&6; }
+if ${ac_cv_prog_cc_c89+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ ac_cv_prog_cc_c89=no
+ac_save_CC=$CC
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include
+#include
+struct stat;
+/* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */
+struct buf { int x; };
+FILE * (*rcsopen) (struct buf *, struct stat *, int);
+static char *e (p, i)
+ char **p;
+ int i;
+{
+ return p[i];
+}
+static char *f (char * (*g) (char **, int), char **p, ...)
+{
+ char *s;
+ va_list v;
+ va_start (v,p);
+ s = g (p, va_arg (v,int));
+ va_end (v);
+ return s;
+}
+
+/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has
+ function prototypes and stuff, but not '\xHH' hex character constants.
+ These don't provoke an error unfortunately, instead are silently treated
+ as 'x'. The following induces an error, until -std is added to get
+ proper ANSI mode. Curiously '\x00'!='x' always comes out true, for an
+ array size at least. It's necessary to write '\x00'==0 to get something
+ that's true only with -std. */
+int osf4_cc_array ['\x00' == 0 ? 1 : -1];
+
+/* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters
+ inside strings and character constants. */
+#define FOO(x) 'x'
+int xlc6_cc_array[FOO(a) == 'x' ? 1 : -1];
+
+int test (int i, double x);
+struct s1 {int (*f) (int a);};
+struct s2 {int (*f) (double a);};
+int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int);
+int argc;
+char **argv;
+int
+main ()
+{
+return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1];
+ ;
+ return 0;
+}
+_ACEOF
+for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std \
+ -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__"
+do
+ CC="$ac_save_CC $ac_arg"
+ if ac_fn_c_try_compile "$LINENO"; then :
+ ac_cv_prog_cc_c89=$ac_arg
+fi
+rm -f core conftest.err conftest.$ac_objext
+ test "x$ac_cv_prog_cc_c89" != "xno" && break
+done
+rm -f conftest.$ac_ext
+CC=$ac_save_CC
+
+fi
+# AC_CACHE_VAL
+case "x$ac_cv_prog_cc_c89" in
+ x)
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: none needed" >&5
+$as_echo "none needed" >&6; } ;;
+ xno)
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5
+$as_echo "unsupported" >&6; } ;;
+ *)
+ CC="$CC $ac_cv_prog_cc_c89"
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c89" >&5
+$as_echo "$ac_cv_prog_cc_c89" >&6; } ;;
+esac
+if test "x$ac_cv_prog_cc_c89" != xno; then :
+
+fi
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+ac_aux_dir=
+for ac_dir in "$srcdir" "$srcdir/.." "$srcdir/../.."; do
+ if test -f "$ac_dir/install-sh"; then
+ ac_aux_dir=$ac_dir
+ ac_install_sh="$ac_aux_dir/install-sh -c"
+ break
+ elif test -f "$ac_dir/install.sh"; then
+ ac_aux_dir=$ac_dir
+ ac_install_sh="$ac_aux_dir/install.sh -c"
+ break
+ elif test -f "$ac_dir/shtool"; then
+ ac_aux_dir=$ac_dir
+ ac_install_sh="$ac_aux_dir/shtool install -c"
+ break
+ fi
+done
+if test -z "$ac_aux_dir"; then
+ as_fn_error $? "cannot find install-sh, install.sh, or shtool in \"$srcdir\" \"$srcdir/..\" \"$srcdir/../..\"" "$LINENO" 5
+fi
+
+# These three variables are undocumented and unsupported,
+# and are intended to be withdrawn in a future Autoconf release.
+# They can cause serious problems if a builder's source tree is in a directory
+# whose full name contains unusual characters.
+ac_config_guess="$SHELL $ac_aux_dir/config.guess" # Please don't use this var.
+ac_config_sub="$SHELL $ac_aux_dir/config.sub" # Please don't use this var.
+ac_configure="$SHELL $ac_aux_dir/configure" # Please don't use this var.
+
+
+# Find a good install program. We prefer a C program (faster),
+# so one script is as good as another. But avoid the broken or
+# incompatible versions:
+# SysV /etc/install, /usr/sbin/install
+# SunOS /usr/etc/install
+# IRIX /sbin/install
+# AIX /bin/install
+# AmigaOS /C/install, which installs bootblocks on floppy discs
+# AIX 4 /usr/bin/installbsd, which doesn't work without a -g flag
+# AFS /usr/afsws/bin/install, which mishandles nonexistent args
+# SVR4 /usr/ucb/install, which tries to use the nonexistent group "staff"
+# OS/2's system install, which has a completely different semantic
+# ./install, which can be erroneously created by make from ./install.sh.
+# Reject install programs that cannot install multiple files.
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for a BSD-compatible install" >&5
+$as_echo_n "checking for a BSD-compatible install... " >&6; }
+if test -z "$INSTALL"; then
+if ${ac_cv_path_install+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ # Account for people who put trailing slashes in PATH elements.
+case $as_dir/ in #((
+ ./ | .// | /[cC]/* | \
+ /etc/* | /usr/sbin/* | /usr/etc/* | /sbin/* | /usr/afsws/bin/* | \
+ ?:[\\/]os2[\\/]install[\\/]* | ?:[\\/]OS2[\\/]INSTALL[\\/]* | \
+ /usr/ucb/* ) ;;
+ *)
+ # OSF1 and SCO ODT 3.0 have their own names for install.
+ # Don't use installbsd from OSF since it installs stuff as root
+ # by default.
+ for ac_prog in ginstall scoinst install; do
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if as_fn_executable_p "$as_dir/$ac_prog$ac_exec_ext"; then
+ if test $ac_prog = install &&
+ grep dspmsg "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then
+ # AIX install. It has an incompatible calling convention.
+ :
+ elif test $ac_prog = install &&
+ grep pwplus "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then
+ # program-specific install script used by HP pwplus--don't use.
+ :
+ else
+ rm -rf conftest.one conftest.two conftest.dir
+ echo one > conftest.one
+ echo two > conftest.two
+ mkdir conftest.dir
+ if "$as_dir/$ac_prog$ac_exec_ext" -c conftest.one conftest.two "`pwd`/conftest.dir" &&
+ test -s conftest.one && test -s conftest.two &&
+ test -s conftest.dir/conftest.one &&
+ test -s conftest.dir/conftest.two
+ then
+ ac_cv_path_install="$as_dir/$ac_prog$ac_exec_ext -c"
+ break 3
+ fi
+ fi
+ fi
+ done
+ done
+ ;;
+esac
+
+ done
+IFS=$as_save_IFS
+
+rm -rf conftest.one conftest.two conftest.dir
+
+fi
+ if test "${ac_cv_path_install+set}" = set; then
+ INSTALL=$ac_cv_path_install
+ else
+ # As a last resort, use the slow shell script. Don't cache a
+ # value for INSTALL within a source directory, because that will
+ # break other packages using the cache if that directory is
+ # removed, or if the value is a relative name.
+ INSTALL=$ac_install_sh
+ fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $INSTALL" >&5
+$as_echo "$INSTALL" >&6; }
+
+# Use test -z because SunOS4 sh mishandles braces in ${var-val}.
+# It thinks the first close brace ends the variable substitution.
+test -z "$INSTALL_PROGRAM" && INSTALL_PROGRAM='${INSTALL}'
+
+test -z "$INSTALL_SCRIPT" && INSTALL_SCRIPT='${INSTALL}'
+
+test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644'
+
+CFLAGS="-O3"
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to run the C preprocessor" >&5
+$as_echo_n "checking how to run the C preprocessor... " >&6; }
+# On Suns, sometimes $CPP names a directory.
+if test -n "$CPP" && test -d "$CPP"; then
+ CPP=
+fi
+if test -z "$CPP"; then
+ if ${ac_cv_prog_CPP+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ # Double quotes because CPP needs to be expanded
+ for CPP in "$CC -E" "$CC -E -traditional-cpp" "/lib/cpp"
+ do
+ ac_preproc_ok=false
+for ac_c_preproc_warn_flag in '' yes
+do
+ # Use a header file that comes with gcc, so configuring glibc
+ # with a fresh cross-compiler works.
+ # Prefer to if __STDC__ is defined, since
+ # exists even on freestanding compilers.
+ # On the NeXT, cc -E runs the code through the compiler's parser,
+ # not just through cpp. "Syntax error" is here to catch this case.
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#ifdef __STDC__
+# include
+#else
+# include
+#endif
+ Syntax error
+_ACEOF
+if ac_fn_c_try_cpp "$LINENO"; then :
+
+else
+ # Broken: fails on valid input.
+continue
+fi
+rm -f conftest.err conftest.i conftest.$ac_ext
+
+ # OK, works on sane cases. Now check whether nonexistent headers
+ # can be detected and how.
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include
+_ACEOF
+if ac_fn_c_try_cpp "$LINENO"; then :
+ # Broken: success on invalid input.
+continue
+else
+ # Passes both tests.
+ac_preproc_ok=:
+break
+fi
+rm -f conftest.err conftest.i conftest.$ac_ext
+
+done
+# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
+rm -f conftest.i conftest.err conftest.$ac_ext
+if $ac_preproc_ok; then :
+ break
+fi
+
+ done
+ ac_cv_prog_CPP=$CPP
+
+fi
+ CPP=$ac_cv_prog_CPP
+else
+ ac_cv_prog_CPP=$CPP
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $CPP" >&5
+$as_echo "$CPP" >&6; }
+ac_preproc_ok=false
+for ac_c_preproc_warn_flag in '' yes
+do
+ # Use a header file that comes with gcc, so configuring glibc
+ # with a fresh cross-compiler works.
+ # Prefer to if __STDC__ is defined, since
+ # exists even on freestanding compilers.
+ # On the NeXT, cc -E runs the code through the compiler's parser,
+ # not just through cpp. "Syntax error" is here to catch this case.
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#ifdef __STDC__
+# include
+#else
+# include
+#endif
+ Syntax error
+_ACEOF
+if ac_fn_c_try_cpp "$LINENO"; then :
+
+else
+ # Broken: fails on valid input.
+continue
+fi
+rm -f conftest.err conftest.i conftest.$ac_ext
+
+ # OK, works on sane cases. Now check whether nonexistent headers
+ # can be detected and how.
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include
+_ACEOF
+if ac_fn_c_try_cpp "$LINENO"; then :
+ # Broken: success on invalid input.
+continue
+else
+ # Passes both tests.
+ac_preproc_ok=:
+break
+fi
+rm -f conftest.err conftest.i conftest.$ac_ext
+
+done
+# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
+rm -f conftest.i conftest.err conftest.$ac_ext
+if $ac_preproc_ok; then :
+
+else
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "C preprocessor \"$CPP\" fails sanity check
+See \`config.log' for more details" "$LINENO" 5; }
+fi
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for grep that handles long lines and -e" >&5
+$as_echo_n "checking for grep that handles long lines and -e... " >&6; }
+if ${ac_cv_path_GREP+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ if test -z "$GREP"; then
+ ac_path_GREP_found=false
+ # Loop through the user's path and test for each of PROGNAME-LIST
+ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_prog in grep ggrep; do
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ ac_path_GREP="$as_dir/$ac_prog$ac_exec_ext"
+ as_fn_executable_p "$ac_path_GREP" || continue
+# Check for GNU ac_path_GREP and select it if it is found.
+ # Check for GNU $ac_path_GREP
+case `"$ac_path_GREP" --version 2>&1` in
+*GNU*)
+ ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_found=:;;
+*)
+ ac_count=0
+ $as_echo_n 0123456789 >"conftest.in"
+ while :
+ do
+ cat "conftest.in" "conftest.in" >"conftest.tmp"
+ mv "conftest.tmp" "conftest.in"
+ cp "conftest.in" "conftest.nl"
+ $as_echo 'GREP' >> "conftest.nl"
+ "$ac_path_GREP" -e 'GREP$' -e '-(cannot match)-' < "conftest.nl" >"conftest.out" 2>/dev/null || break
+ diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break
+ as_fn_arith $ac_count + 1 && ac_count=$as_val
+ if test $ac_count -gt ${ac_path_GREP_max-0}; then
+ # Best one so far, save it but keep looking for a better one
+ ac_cv_path_GREP="$ac_path_GREP"
+ ac_path_GREP_max=$ac_count
+ fi
+ # 10*(2^10) chars as input seems more than enough
+ test $ac_count -gt 10 && break
+ done
+ rm -f conftest.in conftest.tmp conftest.nl conftest.out;;
+esac
+
+ $ac_path_GREP_found && break 3
+ done
+ done
+ done
+IFS=$as_save_IFS
+ if test -z "$ac_cv_path_GREP"; then
+ as_fn_error $? "no acceptable grep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5
+ fi
+else
+ ac_cv_path_GREP=$GREP
+fi
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_GREP" >&5
+$as_echo "$ac_cv_path_GREP" >&6; }
+ GREP="$ac_cv_path_GREP"
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for egrep" >&5
+$as_echo_n "checking for egrep... " >&6; }
+if ${ac_cv_path_EGREP+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ if echo a | $GREP -E '(a|b)' >/dev/null 2>&1
+ then ac_cv_path_EGREP="$GREP -E"
+ else
+ if test -z "$EGREP"; then
+ ac_path_EGREP_found=false
+ # Loop through the user's path and test for each of PROGNAME-LIST
+ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_prog in egrep; do
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ ac_path_EGREP="$as_dir/$ac_prog$ac_exec_ext"
+ as_fn_executable_p "$ac_path_EGREP" || continue
+# Check for GNU ac_path_EGREP and select it if it is found.
+ # Check for GNU $ac_path_EGREP
+case `"$ac_path_EGREP" --version 2>&1` in
+*GNU*)
+ ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_found=:;;
+*)
+ ac_count=0
+ $as_echo_n 0123456789 >"conftest.in"
+ while :
+ do
+ cat "conftest.in" "conftest.in" >"conftest.tmp"
+ mv "conftest.tmp" "conftest.in"
+ cp "conftest.in" "conftest.nl"
+ $as_echo 'EGREP' >> "conftest.nl"
+ "$ac_path_EGREP" 'EGREP$' < "conftest.nl" >"conftest.out" 2>/dev/null || break
+ diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break
+ as_fn_arith $ac_count + 1 && ac_count=$as_val
+ if test $ac_count -gt ${ac_path_EGREP_max-0}; then
+ # Best one so far, save it but keep looking for a better one
+ ac_cv_path_EGREP="$ac_path_EGREP"
+ ac_path_EGREP_max=$ac_count
+ fi
+ # 10*(2^10) chars as input seems more than enough
+ test $ac_count -gt 10 && break
+ done
+ rm -f conftest.in conftest.tmp conftest.nl conftest.out;;
+esac
+
+ $ac_path_EGREP_found && break 3
+ done
+ done
+ done
+IFS=$as_save_IFS
+ if test -z "$ac_cv_path_EGREP"; then
+ as_fn_error $? "no acceptable egrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5
+ fi
+else
+ ac_cv_path_EGREP=$EGREP
+fi
+
+ fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_EGREP" >&5
+$as_echo "$ac_cv_path_EGREP" >&6; }
+ EGREP="$ac_cv_path_EGREP"
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for ANSI C header files" >&5
+$as_echo_n "checking for ANSI C header files... " >&6; }
+if ${ac_cv_header_stdc+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include
+#include
+#include
+#include
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ ac_cv_header_stdc=yes
+else
+ ac_cv_header_stdc=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+if test $ac_cv_header_stdc = yes; then
+ # SunOS 4.x string.h does not declare mem*, contrary to ANSI.
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+ $EGREP "memchr" >/dev/null 2>&1; then :
+
+else
+ ac_cv_header_stdc=no
+fi
+rm -f conftest*
+
+fi
+
+if test $ac_cv_header_stdc = yes; then
+ # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI.
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+ $EGREP "free" >/dev/null 2>&1; then :
+
+else
+ ac_cv_header_stdc=no
+fi
+rm -f conftest*
+
+fi
+
+if test $ac_cv_header_stdc = yes; then
+ # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi.
+ if test "$cross_compiling" = yes; then :
+ :
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include
+#include
+#if ((' ' & 0x0FF) == 0x020)
+# define ISLOWER(c) ('a' <= (c) && (c) <= 'z')
+# define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c))
+#else
+# define ISLOWER(c) \
+ (('a' <= (c) && (c) <= 'i') \
+ || ('j' <= (c) && (c) <= 'r') \
+ || ('s' <= (c) && (c) <= 'z'))
+# define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c))
+#endif
+
+#define XOR(e, f) (((e) && !(f)) || (!(e) && (f)))
+int
+main ()
+{
+ int i;
+ for (i = 0; i < 256; i++)
+ if (XOR (islower (i), ISLOWER (i))
+ || toupper (i) != TOUPPER (i))
+ return 2;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_run "$LINENO"; then :
+
+else
+ ac_cv_header_stdc=no
+fi
+rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
+ conftest.$ac_objext conftest.beam conftest.$ac_ext
+fi
+
+fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_header_stdc" >&5
+$as_echo "$ac_cv_header_stdc" >&6; }
+if test $ac_cv_header_stdc = yes; then
+
+$as_echo "#define STDC_HEADERS 1" >>confdefs.h
+
+fi
+
+# On IRIX 5.3, sys/types and inttypes.h are conflicting.
+for ac_header in sys/types.h sys/stat.h stdlib.h string.h memory.h strings.h \
+ inttypes.h stdint.h unistd.h
+do :
+ as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh`
+ac_fn_c_check_header_compile "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default
+"
+if eval test \"x\$"$as_ac_Header"\" = x"yes"; then :
+ cat >>confdefs.h <<_ACEOF
+#define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+
+done
+
+
+for ac_header in wmmintrin.h
+do :
+ ac_fn_c_check_header_mongrel "$LINENO" "wmmintrin.h" "ac_cv_header_wmmintrin_h" "$ac_includes_default"
+if test "x$ac_cv_header_wmmintrin_h" = xyes; then :
+ cat >>confdefs.h <<_ACEOF
+#define HAVE_WMMINTRIN_H 1
+_ACEOF
+
+ CFLAGS+=" -maes -msse -msse2"
+ CPPFLAGS="-DFASTAES"
+
+
+fi
+
+done
+
+ac_config_files="$ac_config_files Makefile"
+
+cat >confcache <<\_ACEOF
+# This file is a shell script that caches the results of configure
+# tests run on this system so they can be shared between configure
+# scripts and configure runs, see configure's option --config-cache.
+# It is not useful on other systems. If it contains results you don't
+# want to keep, you may remove or edit it.
+#
+# config.status only pays attention to the cache file if you give it
+# the --recheck option to rerun configure.
+#
+# `ac_cv_env_foo' variables (set or unset) will be overridden when
+# loading this file, other *unset* `ac_cv_foo' will be assigned the
+# following values.
+
+_ACEOF
+
+# The following way of writing the cache mishandles newlines in values,
+# but we know of no workaround that is simple, portable, and efficient.
+# So, we kill variables containing newlines.
+# Ultrix sh set writes to stderr and can't be redirected directly,
+# and sets the high bit in the cache file unless we assign to the vars.
+(
+ for ac_var in `(set) 2>&1 | sed -n 's/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'`; do
+ eval ac_val=\$$ac_var
+ case $ac_val in #(
+ *${as_nl}*)
+ case $ac_var in #(
+ *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5
+$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;;
+ esac
+ case $ac_var in #(
+ _ | IFS | as_nl) ;; #(
+ BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #(
+ *) { eval $ac_var=; unset $ac_var;} ;;
+ esac ;;
+ esac
+ done
+
+ (set) 2>&1 |
+ case $as_nl`(ac_space=' '; set) 2>&1` in #(
+ *${as_nl}ac_space=\ *)
+ # `set' does not quote correctly, so add quotes: double-quote
+ # substitution turns \\\\ into \\, and sed turns \\ into \.
+ sed -n \
+ "s/'/'\\\\''/g;
+ s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p"
+ ;; #(
+ *)
+ # `set' quotes correctly as required by POSIX, so do not add quotes.
+ sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p"
+ ;;
+ esac |
+ sort
+) |
+ sed '
+ /^ac_cv_env_/b end
+ t clear
+ :clear
+ s/^\([^=]*\)=\(.*[{}].*\)$/test "${\1+set}" = set || &/
+ t end
+ s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/
+ :end' >>confcache
+if diff "$cache_file" confcache >/dev/null 2>&1; then :; else
+ if test -w "$cache_file"; then
+ if test "x$cache_file" != "x/dev/null"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: updating cache $cache_file" >&5
+$as_echo "$as_me: updating cache $cache_file" >&6;}
+ if test ! -f "$cache_file" || test -h "$cache_file"; then
+ cat confcache >"$cache_file"
+ else
+ case $cache_file in #(
+ */* | ?:*)
+ mv -f confcache "$cache_file"$$ &&
+ mv -f "$cache_file"$$ "$cache_file" ;; #(
+ *)
+ mv -f confcache "$cache_file" ;;
+ esac
+ fi
+ fi
+ else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: not updating unwritable cache $cache_file" >&5
+$as_echo "$as_me: not updating unwritable cache $cache_file" >&6;}
+ fi
+fi
+rm -f confcache
+
+test "x$prefix" = xNONE && prefix=$ac_default_prefix
+# Let make expand exec_prefix.
+test "x$exec_prefix" = xNONE && exec_prefix='${prefix}'
+
+# Transform confdefs.h into DEFS.
+# Protect against shell expansion while executing Makefile rules.
+# Protect against Makefile macro expansion.
+#
+# If the first sed substitution is executed (which looks for macros that
+# take arguments), then branch to the quote section. Otherwise,
+# look for a macro that doesn't take arguments.
+ac_script='
+:mline
+/\\$/{
+ N
+ s,\\\n,,
+ b mline
+}
+t clear
+:clear
+s/^[ ]*#[ ]*define[ ][ ]*\([^ (][^ (]*([^)]*)\)[ ]*\(.*\)/-D\1=\2/g
+t quote
+s/^[ ]*#[ ]*define[ ][ ]*\([^ ][^ ]*\)[ ]*\(.*\)/-D\1=\2/g
+t quote
+b any
+:quote
+s/[ `~#$^&*(){}\\|;'\''"<>?]/\\&/g
+s/\[/\\&/g
+s/\]/\\&/g
+s/\$/$$/g
+H
+:any
+${
+ g
+ s/^\n//
+ s/\n/ /g
+ p
+}
+'
+DEFS=`sed -n "$ac_script" confdefs.h`
+
+
+ac_libobjs=
+ac_ltlibobjs=
+U=
+for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue
+ # 1. Remove the extension, and $U if already installed.
+ ac_script='s/\$U\././;s/\.o$//;s/\.obj$//'
+ ac_i=`$as_echo "$ac_i" | sed "$ac_script"`
+ # 2. Prepend LIBOBJDIR. When used with automake>=1.10 LIBOBJDIR
+ # will be set to the directory where LIBOBJS objects are built.
+ as_fn_append ac_libobjs " \${LIBOBJDIR}$ac_i\$U.$ac_objext"
+ as_fn_append ac_ltlibobjs " \${LIBOBJDIR}$ac_i"'$U.lo'
+done
+LIBOBJS=$ac_libobjs
+
+LTLIBOBJS=$ac_ltlibobjs
+
+
+
+: "${CONFIG_STATUS=./config.status}"
+ac_write_fail=0
+ac_clean_files_save=$ac_clean_files
+ac_clean_files="$ac_clean_files $CONFIG_STATUS"
+{ $as_echo "$as_me:${as_lineno-$LINENO}: creating $CONFIG_STATUS" >&5
+$as_echo "$as_me: creating $CONFIG_STATUS" >&6;}
+as_write_fail=0
+cat >$CONFIG_STATUS <<_ASEOF || as_write_fail=1
+#! $SHELL
+# Generated by $as_me.
+# Run this file to recreate the current configuration.
+# Compiler output produced by configure, useful for debugging
+# configure, is in config.log if it exists.
+
+debug=false
+ac_cs_recheck=false
+ac_cs_silent=false
+
+SHELL=\${CONFIG_SHELL-$SHELL}
+export SHELL
+_ASEOF
+cat >>$CONFIG_STATUS <<\_ASEOF || as_write_fail=1
+## -------------------- ##
+## M4sh Initialization. ##
+## -------------------- ##
+
+# Be more Bourne compatible
+DUALCASE=1; export DUALCASE # for MKS sh
+if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then :
+ emulate sh
+ NULLCMD=:
+ # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which
+ # is contrary to our usage. Disable this feature.
+ alias -g '${1+"$@"}'='"$@"'
+ setopt NO_GLOB_SUBST
+else
+ case `(set -o) 2>/dev/null` in #(
+ *posix*) :
+ set -o posix ;; #(
+ *) :
+ ;;
+esac
+fi
+
+
+as_nl='
+'
+export as_nl
+# Printing a long string crashes Solaris 7 /usr/bin/printf.
+as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\'
+as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo
+as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo
+# Prefer a ksh shell builtin over an external printf program on Solaris,
+# but without wasting forks for bash or zsh.
+if test -z "$BASH_VERSION$ZSH_VERSION" \
+ && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then
+ as_echo='print -r --'
+ as_echo_n='print -rn --'
+elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then
+ as_echo='printf %s\n'
+ as_echo_n='printf %s'
+else
+ if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then
+ as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"'
+ as_echo_n='/usr/ucb/echo -n'
+ else
+ as_echo_body='eval expr "X$1" : "X\\(.*\\)"'
+ as_echo_n_body='eval
+ arg=$1;
+ case $arg in #(
+ *"$as_nl"*)
+ expr "X$arg" : "X\\(.*\\)$as_nl";
+ arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;;
+ esac;
+ expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl"
+ '
+ export as_echo_n_body
+ as_echo_n='sh -c $as_echo_n_body as_echo'
+ fi
+ export as_echo_body
+ as_echo='sh -c $as_echo_body as_echo'
+fi
+
+# The user is always right.
+if test "${PATH_SEPARATOR+set}" != set; then
+ PATH_SEPARATOR=:
+ (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && {
+ (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 ||
+ PATH_SEPARATOR=';'
+ }
+fi
+
+
+# IFS
+# We need space, tab and new line, in precisely that order. Quoting is
+# there to prevent editors from complaining about space-tab.
+# (If _AS_PATH_WALK were called with IFS unset, it would disable word
+# splitting by setting IFS to empty value.)
+IFS=" "" $as_nl"
+
+# Find who we are. Look in the path if we contain no directory separator.
+as_myself=
+case $0 in #((
+ *[\\/]* ) as_myself=$0 ;;
+ *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break
+ done
+IFS=$as_save_IFS
+
+ ;;
+esac
+# We did not find ourselves, most probably we were run as `sh COMMAND'
+# in which case we are not to be found in the path.
+if test "x$as_myself" = x; then
+ as_myself=$0
+fi
+if test ! -f "$as_myself"; then
+ $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2
+ exit 1
+fi
+
+# Unset variables that we do not need and which cause bugs (e.g. in
+# pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1"
+# suppresses any "Segmentation fault" message there. '((' could
+# trigger a bug in pdksh 5.2.14.
+for as_var in BASH_ENV ENV MAIL MAILPATH
+do eval test x\${$as_var+set} = xset \
+ && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || :
+done
+PS1='$ '
+PS2='> '
+PS4='+ '
+
+# NLS nuisances.
+LC_ALL=C
+export LC_ALL
+LANGUAGE=C
+export LANGUAGE
+
+# CDPATH.
+(unset CDPATH) >/dev/null 2>&1 && unset CDPATH
+
+
+# as_fn_error STATUS ERROR [LINENO LOG_FD]
+# ----------------------------------------
+# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are
+# provided, also output the error to LOG_FD, referencing LINENO. Then exit the
+# script with STATUS, using 1 if that was 0.
+as_fn_error ()
+{
+ as_status=$1; test $as_status -eq 0 && as_status=1
+ if test "$4"; then
+ as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+ $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4
+ fi
+ $as_echo "$as_me: error: $2" >&2
+ as_fn_exit $as_status
+} # as_fn_error
+
+
+# as_fn_set_status STATUS
+# -----------------------
+# Set $? to STATUS, without forking.
+as_fn_set_status ()
+{
+ return $1
+} # as_fn_set_status
+
+# as_fn_exit STATUS
+# -----------------
+# Exit the shell with STATUS, even in a "trap 0" or "set -e" context.
+as_fn_exit ()
+{
+ set +e
+ as_fn_set_status $1
+ exit $1
+} # as_fn_exit
+
+# as_fn_unset VAR
+# ---------------
+# Portably unset VAR.
+as_fn_unset ()
+{
+ { eval $1=; unset $1;}
+}
+as_unset=as_fn_unset
+# as_fn_append VAR VALUE
+# ----------------------
+# Append the text in VALUE to the end of the definition contained in VAR. Take
+# advantage of any shell optimizations that allow amortized linear growth over
+# repeated appends, instead of the typical quadratic growth present in naive
+# implementations.
+if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then :
+ eval 'as_fn_append ()
+ {
+ eval $1+=\$2
+ }'
+else
+ as_fn_append ()
+ {
+ eval $1=\$$1\$2
+ }
+fi # as_fn_append
+
+# as_fn_arith ARG...
+# ------------------
+# Perform arithmetic evaluation on the ARGs, and store the result in the
+# global $as_val. Take advantage of shells that can avoid forks. The arguments
+# must be portable across $(()) and expr.
+if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then :
+ eval 'as_fn_arith ()
+ {
+ as_val=$(( $* ))
+ }'
+else
+ as_fn_arith ()
+ {
+ as_val=`expr "$@" || test $? -eq 1`
+ }
+fi # as_fn_arith
+
+
+if expr a : '\(a\)' >/dev/null 2>&1 &&
+ test "X`expr 00001 : '.*\(...\)'`" = X001; then
+ as_expr=expr
+else
+ as_expr=false
+fi
+
+if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then
+ as_basename=basename
+else
+ as_basename=false
+fi
+
+if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then
+ as_dirname=dirname
+else
+ as_dirname=false
+fi
+
+as_me=`$as_basename -- "$0" ||
+$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \
+ X"$0" : 'X\(//\)$' \| \
+ X"$0" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X/"$0" |
+ sed '/^.*\/\([^/][^/]*\)\/*$/{
+ s//\1/
+ q
+ }
+ /^X\/\(\/\/\)$/{
+ s//\1/
+ q
+ }
+ /^X\/\(\/\).*/{
+ s//\1/
+ q
+ }
+ s/.*/./; q'`
+
+# Avoid depending upon Character Ranges.
+as_cr_letters='abcdefghijklmnopqrstuvwxyz'
+as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+as_cr_Letters=$as_cr_letters$as_cr_LETTERS
+as_cr_digits='0123456789'
+as_cr_alnum=$as_cr_Letters$as_cr_digits
+
+ECHO_C= ECHO_N= ECHO_T=
+case `echo -n x` in #(((((
+-n*)
+ case `echo 'xy\c'` in
+ *c*) ECHO_T=' ';; # ECHO_T is single tab character.
+ xy) ECHO_C='\c';;
+ *) echo `echo ksh88 bug on AIX 6.1` > /dev/null
+ ECHO_T=' ';;
+ esac;;
+*)
+ ECHO_N='-n';;
+esac
+
+rm -f conf$$ conf$$.exe conf$$.file
+if test -d conf$$.dir; then
+ rm -f conf$$.dir/conf$$.file
+else
+ rm -f conf$$.dir
+ mkdir conf$$.dir 2>/dev/null
+fi
+if (echo >conf$$.file) 2>/dev/null; then
+ if ln -s conf$$.file conf$$ 2>/dev/null; then
+ as_ln_s='ln -s'
+ # ... but there are two gotchas:
+ # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail.
+ # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable.
+ # In both cases, we have to default to `cp -pR'.
+ ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe ||
+ as_ln_s='cp -pR'
+ elif ln conf$$.file conf$$ 2>/dev/null; then
+ as_ln_s=ln
+ else
+ as_ln_s='cp -pR'
+ fi
+else
+ as_ln_s='cp -pR'
+fi
+rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file
+rmdir conf$$.dir 2>/dev/null
+
+
+# as_fn_mkdir_p
+# -------------
+# Create "$as_dir" as a directory, including parents if necessary.
+as_fn_mkdir_p ()
+{
+
+ case $as_dir in #(
+ -*) as_dir=./$as_dir;;
+ esac
+ test -d "$as_dir" || eval $as_mkdir_p || {
+ as_dirs=
+ while :; do
+ case $as_dir in #(
+ *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'(
+ *) as_qdir=$as_dir;;
+ esac
+ as_dirs="'$as_qdir' $as_dirs"
+ as_dir=`$as_dirname -- "$as_dir" ||
+$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+ X"$as_dir" : 'X\(//\)[^/]' \| \
+ X"$as_dir" : 'X\(//\)$' \| \
+ X"$as_dir" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X"$as_dir" |
+ sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+ s//\1/
+ q
+ }
+ /^X\(\/\/\)[^/].*/{
+ s//\1/
+ q
+ }
+ /^X\(\/\/\)$/{
+ s//\1/
+ q
+ }
+ /^X\(\/\).*/{
+ s//\1/
+ q
+ }
+ s/.*/./; q'`
+ test -d "$as_dir" && break
+ done
+ test -z "$as_dirs" || eval "mkdir $as_dirs"
+ } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir"
+
+
+} # as_fn_mkdir_p
+if mkdir -p . 2>/dev/null; then
+ as_mkdir_p='mkdir -p "$as_dir"'
+else
+ test -d ./-p && rmdir ./-p
+ as_mkdir_p=false
+fi
+
+
+# as_fn_executable_p FILE
+# -----------------------
+# Test if FILE is an executable regular file.
+as_fn_executable_p ()
+{
+ test -f "$1" && test -x "$1"
+} # as_fn_executable_p
+as_test_x='test -x'
+as_executable_p=as_fn_executable_p
+
+# Sed expression to map a string onto a valid CPP name.
+as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'"
+
+# Sed expression to map a string onto a valid variable name.
+as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'"
+
+
+exec 6>&1
+## ----------------------------------- ##
+## Main body of $CONFIG_STATUS script. ##
+## ----------------------------------- ##
+_ASEOF
+test $as_write_fail = 0 && chmod +x $CONFIG_STATUS || ac_write_fail=1
+
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+# Save the log message, to keep $0 and so on meaningful, and to
+# report actual input values of CONFIG_FILES etc. instead of their
+# values after options handling.
+ac_log="
+This file was extended by $as_me, which was
+generated by GNU Autoconf 2.69. Invocation command line was
+
+ CONFIG_FILES = $CONFIG_FILES
+ CONFIG_HEADERS = $CONFIG_HEADERS
+ CONFIG_LINKS = $CONFIG_LINKS
+ CONFIG_COMMANDS = $CONFIG_COMMANDS
+ $ $0 $@
+
+on `(hostname || uname -n) 2>/dev/null | sed 1q`
+"
+
+_ACEOF
+
+case $ac_config_files in *"
+"*) set x $ac_config_files; shift; ac_config_files=$*;;
+esac
+
+
+
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+# Files that config.status was made for.
+config_files="$ac_config_files"
+
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+ac_cs_usage="\
+\`$as_me' instantiates files and other configuration actions
+from templates according to the current configuration. Unless the files
+and actions are specified as TAGs, all are instantiated by default.
+
+Usage: $0 [OPTION]... [TAG]...
+
+ -h, --help print this help, then exit
+ -V, --version print version number and configuration settings, then exit
+ --config print configuration, then exit
+ -q, --quiet, --silent
+ do not print progress messages
+ -d, --debug don't remove temporary files
+ --recheck update $as_me by reconfiguring in the same conditions
+ --file=FILE[:TEMPLATE]
+ instantiate the configuration file FILE
+
+Configuration files:
+$config_files
+
+Report bugs to the package provider."
+
+_ACEOF
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
+ac_cs_version="\\
+config.status
+configured by $0, generated by GNU Autoconf 2.69,
+ with options \\"\$ac_cs_config\\"
+
+Copyright (C) 2012 Free Software Foundation, Inc.
+This config.status script is free software; the Free Software Foundation
+gives unlimited permission to copy, distribute and modify it."
+
+ac_pwd='$ac_pwd'
+srcdir='$srcdir'
+INSTALL='$INSTALL'
+test -n "\$AWK" || AWK=awk
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+# The default lists apply if the user does not specify any file.
+ac_need_defaults=:
+while test $# != 0
+do
+ case $1 in
+ --*=?*)
+ ac_option=`expr "X$1" : 'X\([^=]*\)='`
+ ac_optarg=`expr "X$1" : 'X[^=]*=\(.*\)'`
+ ac_shift=:
+ ;;
+ --*=)
+ ac_option=`expr "X$1" : 'X\([^=]*\)='`
+ ac_optarg=
+ ac_shift=:
+ ;;
+ *)
+ ac_option=$1
+ ac_optarg=$2
+ ac_shift=shift
+ ;;
+ esac
+
+ case $ac_option in
+ # Handling of the options.
+ -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r)
+ ac_cs_recheck=: ;;
+ --version | --versio | --versi | --vers | --ver | --ve | --v | -V )
+ $as_echo "$ac_cs_version"; exit ;;
+ --config | --confi | --conf | --con | --co | --c )
+ $as_echo "$ac_cs_config"; exit ;;
+ --debug | --debu | --deb | --de | --d | -d )
+ debug=: ;;
+ --file | --fil | --fi | --f )
+ $ac_shift
+ case $ac_optarg in
+ *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;;
+ '') as_fn_error $? "missing file argument" ;;
+ esac
+ as_fn_append CONFIG_FILES " '$ac_optarg'"
+ ac_need_defaults=false;;
+ --he | --h | --help | --hel | -h )
+ $as_echo "$ac_cs_usage"; exit ;;
+ -q | -quiet | --quiet | --quie | --qui | --qu | --q \
+ | -silent | --silent | --silen | --sile | --sil | --si | --s)
+ ac_cs_silent=: ;;
+
+ # This is an error.
+ -*) as_fn_error $? "unrecognized option: \`$1'
+Try \`$0 --help' for more information." ;;
+
+ *) as_fn_append ac_config_targets " $1"
+ ac_need_defaults=false ;;
+
+ esac
+ shift
+done
+
+ac_configure_extra_args=
+
+if $ac_cs_silent; then
+ exec 6>/dev/null
+ ac_configure_extra_args="$ac_configure_extra_args --silent"
+fi
+
+_ACEOF
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+if \$ac_cs_recheck; then
+ set X $SHELL '$0' $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion
+ shift
+ \$as_echo "running CONFIG_SHELL=$SHELL \$*" >&6
+ CONFIG_SHELL='$SHELL'
+ export CONFIG_SHELL
+ exec "\$@"
+fi
+
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+exec 5>>config.log
+{
+ echo
+ sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX
+## Running $as_me. ##
+_ASBOX
+ $as_echo "$ac_log"
+} >&5
+
+_ACEOF
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+
+# Handling of arguments.
+for ac_config_target in $ac_config_targets
+do
+ case $ac_config_target in
+ "Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;;
+
+ *) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;;
+ esac
+done
+
+
+# If the user did not use the arguments to specify the items to instantiate,
+# then the envvar interface is used. Set only those that are not.
+# We use the long form for the default assignment because of an extremely
+# bizarre bug on SunOS 4.1.3.
+if $ac_need_defaults; then
+ test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files
+fi
+
+# Have a temporary directory for convenience. Make it in the build tree
+# simply because there is no reason against having it here, and in addition,
+# creating and moving files from /tmp can sometimes cause problems.
+# Hook for its removal unless debugging.
+# Note that there is a small window in which the directory will not be cleaned:
+# after its creation but before its name has been assigned to `$tmp'.
+$debug ||
+{
+ tmp= ac_tmp=
+ trap 'exit_status=$?
+ : "${ac_tmp:=$tmp}"
+ { test ! -d "$ac_tmp" || rm -fr "$ac_tmp"; } && exit $exit_status
+' 0
+ trap 'as_fn_exit 1' 1 2 13 15
+}
+# Create a (secure) tmp directory for tmp files.
+
+{
+ tmp=`(umask 077 && mktemp -d "./confXXXXXX") 2>/dev/null` &&
+ test -d "$tmp"
+} ||
+{
+ tmp=./conf$$-$RANDOM
+ (umask 077 && mkdir "$tmp")
+} || as_fn_error $? "cannot create a temporary directory in ." "$LINENO" 5
+ac_tmp=$tmp
+
+# Set up the scripts for CONFIG_FILES section.
+# No need to generate them if there are no CONFIG_FILES.
+# This happens for instance with `./config.status config.h'.
+if test -n "$CONFIG_FILES"; then
+
+
+ac_cr=`echo X | tr X '\015'`
+# On cygwin, bash can eat \r inside `` if the user requested igncr.
+# But we know of no other shell where ac_cr would be empty at this
+# point, so we can use a bashism as a fallback.
+if test "x$ac_cr" = x; then
+ eval ac_cr=\$\'\\r\'
+fi
+ac_cs_awk_cr=`$AWK 'BEGIN { print "a\rb" }' /dev/null`
+if test "$ac_cs_awk_cr" = "a${ac_cr}b"; then
+ ac_cs_awk_cr='\\r'
+else
+ ac_cs_awk_cr=$ac_cr
+fi
+
+echo 'BEGIN {' >"$ac_tmp/subs1.awk" &&
+_ACEOF
+
+
+{
+ echo "cat >conf$$subs.awk <<_ACEOF" &&
+ echo "$ac_subst_vars" | sed 's/.*/&!$&$ac_delim/' &&
+ echo "_ACEOF"
+} >conf$$subs.sh ||
+ as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5
+ac_delim_num=`echo "$ac_subst_vars" | grep -c '^'`
+ac_delim='%!_!# '
+for ac_last_try in false false false false false :; do
+ . ./conf$$subs.sh ||
+ as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5
+
+ ac_delim_n=`sed -n "s/.*$ac_delim\$/X/p" conf$$subs.awk | grep -c X`
+ if test $ac_delim_n = $ac_delim_num; then
+ break
+ elif $ac_last_try; then
+ as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5
+ else
+ ac_delim="$ac_delim!$ac_delim _$ac_delim!! "
+ fi
+done
+rm -f conf$$subs.sh
+
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+cat >>"\$ac_tmp/subs1.awk" <<\\_ACAWK &&
+_ACEOF
+sed -n '
+h
+s/^/S["/; s/!.*/"]=/
+p
+g
+s/^[^!]*!//
+:repl
+t repl
+s/'"$ac_delim"'$//
+t delim
+:nl
+h
+s/\(.\{148\}\)..*/\1/
+t more1
+s/["\\]/\\&/g; s/^/"/; s/$/\\n"\\/
+p
+n
+b repl
+:more1
+s/["\\]/\\&/g; s/^/"/; s/$/"\\/
+p
+g
+s/.\{148\}//
+t nl
+:delim
+h
+s/\(.\{148\}\)..*/\1/
+t more2
+s/["\\]/\\&/g; s/^/"/; s/$/"/
+p
+b
+:more2
+s/["\\]/\\&/g; s/^/"/; s/$/"\\/
+p
+g
+s/.\{148\}//
+t delim
+' >$CONFIG_STATUS || ac_write_fail=1
+rm -f conf$$subs.awk
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+_ACAWK
+cat >>"\$ac_tmp/subs1.awk" <<_ACAWK &&
+ for (key in S) S_is_set[key] = 1
+ FS = ""
+
+}
+{
+ line = $ 0
+ nfields = split(line, field, "@")
+ substed = 0
+ len = length(field[1])
+ for (i = 2; i < nfields; i++) {
+ key = field[i]
+ keylen = length(key)
+ if (S_is_set[key]) {
+ value = S[key]
+ line = substr(line, 1, len) "" value "" substr(line, len + keylen + 3)
+ len += length(value) + length(field[++i])
+ substed = 1
+ } else
+ len += 1 + keylen
+ }
+
+ print line
+}
+
+_ACAWK
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+if sed "s/$ac_cr//" < /dev/null > /dev/null 2>&1; then
+ sed "s/$ac_cr\$//; s/$ac_cr/$ac_cs_awk_cr/g"
+else
+ cat
+fi < "$ac_tmp/subs1.awk" > "$ac_tmp/subs.awk" \
+ || as_fn_error $? "could not setup config files machinery" "$LINENO" 5
+_ACEOF
+
+# VPATH may cause trouble with some makes, so we remove sole $(srcdir),
+# ${srcdir} and @srcdir@ entries from VPATH if srcdir is ".", strip leading and
+# trailing colons and then remove the whole line if VPATH becomes empty
+# (actually we leave an empty line to preserve line numbers).
+if test "x$srcdir" = x.; then
+ ac_vpsub='/^[ ]*VPATH[ ]*=[ ]*/{
+h
+s///
+s/^/:/
+s/[ ]*$/:/
+s/:\$(srcdir):/:/g
+s/:\${srcdir}:/:/g
+s/:@srcdir@:/:/g
+s/^:*//
+s/:*$//
+x
+s/\(=[ ]*\).*/\1/
+G
+s/\n//
+s/^[^=]*=[ ]*$//
+}'
+fi
+
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+fi # test -n "$CONFIG_FILES"
+
+
+eval set X " :F $CONFIG_FILES "
+shift
+for ac_tag
+do
+ case $ac_tag in
+ :[FHLC]) ac_mode=$ac_tag; continue;;
+ esac
+ case $ac_mode$ac_tag in
+ :[FHL]*:*);;
+ :L* | :C*:*) as_fn_error $? "invalid tag \`$ac_tag'" "$LINENO" 5;;
+ :[FH]-) ac_tag=-:-;;
+ :[FH]*) ac_tag=$ac_tag:$ac_tag.in;;
+ esac
+ ac_save_IFS=$IFS
+ IFS=:
+ set x $ac_tag
+ IFS=$ac_save_IFS
+ shift
+ ac_file=$1
+ shift
+
+ case $ac_mode in
+ :L) ac_source=$1;;
+ :[FH])
+ ac_file_inputs=
+ for ac_f
+ do
+ case $ac_f in
+ -) ac_f="$ac_tmp/stdin";;
+ *) # Look for the file first in the build tree, then in the source tree
+ # (if the path is not absolute). The absolute path cannot be DOS-style,
+ # because $ac_f cannot contain `:'.
+ test -f "$ac_f" ||
+ case $ac_f in
+ [\\/$]*) false;;
+ *) test -f "$srcdir/$ac_f" && ac_f="$srcdir/$ac_f";;
+ esac ||
+ as_fn_error 1 "cannot find input file: \`$ac_f'" "$LINENO" 5;;
+ esac
+ case $ac_f in *\'*) ac_f=`$as_echo "$ac_f" | sed "s/'/'\\\\\\\\''/g"`;; esac
+ as_fn_append ac_file_inputs " '$ac_f'"
+ done
+
+ # Let's still pretend it is `configure' which instantiates (i.e., don't
+ # use $as_me), people would be surprised to read:
+ # /* config.h. Generated by config.status. */
+ configure_input='Generated from '`
+ $as_echo "$*" | sed 's|^[^:]*/||;s|:[^:]*/|, |g'
+ `' by configure.'
+ if test x"$ac_file" != x-; then
+ configure_input="$ac_file. $configure_input"
+ { $as_echo "$as_me:${as_lineno-$LINENO}: creating $ac_file" >&5
+$as_echo "$as_me: creating $ac_file" >&6;}
+ fi
+ # Neutralize special characters interpreted by sed in replacement strings.
+ case $configure_input in #(
+ *\&* | *\|* | *\\* )
+ ac_sed_conf_input=`$as_echo "$configure_input" |
+ sed 's/[\\\\&|]/\\\\&/g'`;; #(
+ *) ac_sed_conf_input=$configure_input;;
+ esac
+
+ case $ac_tag in
+ *:-:* | *:-) cat >"$ac_tmp/stdin" \
+ || as_fn_error $? "could not create $ac_file" "$LINENO" 5 ;;
+ esac
+ ;;
+ esac
+
+ ac_dir=`$as_dirname -- "$ac_file" ||
+$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+ X"$ac_file" : 'X\(//\)[^/]' \| \
+ X"$ac_file" : 'X\(//\)$' \| \
+ X"$ac_file" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X"$ac_file" |
+ sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+ s//\1/
+ q
+ }
+ /^X\(\/\/\)[^/].*/{
+ s//\1/
+ q
+ }
+ /^X\(\/\/\)$/{
+ s//\1/
+ q
+ }
+ /^X\(\/\).*/{
+ s//\1/
+ q
+ }
+ s/.*/./; q'`
+ as_dir="$ac_dir"; as_fn_mkdir_p
+ ac_builddir=.
+
+case "$ac_dir" in
+.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;;
+*)
+ ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'`
+ # A ".." for each directory in $ac_dir_suffix.
+ ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'`
+ case $ac_top_builddir_sub in
+ "") ac_top_builddir_sub=. ac_top_build_prefix= ;;
+ *) ac_top_build_prefix=$ac_top_builddir_sub/ ;;
+ esac ;;
+esac
+ac_abs_top_builddir=$ac_pwd
+ac_abs_builddir=$ac_pwd$ac_dir_suffix
+# for backward compatibility:
+ac_top_builddir=$ac_top_build_prefix
+
+case $srcdir in
+ .) # We are building in place.
+ ac_srcdir=.
+ ac_top_srcdir=$ac_top_builddir_sub
+ ac_abs_top_srcdir=$ac_pwd ;;
+ [\\/]* | ?:[\\/]* ) # Absolute name.
+ ac_srcdir=$srcdir$ac_dir_suffix;
+ ac_top_srcdir=$srcdir
+ ac_abs_top_srcdir=$srcdir ;;
+ *) # Relative name.
+ ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix
+ ac_top_srcdir=$ac_top_build_prefix$srcdir
+ ac_abs_top_srcdir=$ac_pwd/$srcdir ;;
+esac
+ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix
+
+
+ case $ac_mode in
+ :F)
+ #
+ # CONFIG_FILE
+ #
+
+ case $INSTALL in
+ [\\/$]* | ?:[\\/]* ) ac_INSTALL=$INSTALL ;;
+ *) ac_INSTALL=$ac_top_build_prefix$INSTALL ;;
+ esac
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+# If the template does not know about datarootdir, expand it.
+# FIXME: This hack should be removed a few years after 2.60.
+ac_datarootdir_hack=; ac_datarootdir_seen=
+ac_sed_dataroot='
+/datarootdir/ {
+ p
+ q
+}
+/@datadir@/p
+/@docdir@/p
+/@infodir@/p
+/@localedir@/p
+/@mandir@/p'
+case `eval "sed -n \"\$ac_sed_dataroot\" $ac_file_inputs"` in
+*datarootdir*) ac_datarootdir_seen=yes;;
+*@datadir@*|*@docdir@*|*@infodir@*|*@localedir@*|*@mandir@*)
+ { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5
+$as_echo "$as_me: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&2;}
+_ACEOF
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+ ac_datarootdir_hack='
+ s&@datadir@&$datadir&g
+ s&@docdir@&$docdir&g
+ s&@infodir@&$infodir&g
+ s&@localedir@&$localedir&g
+ s&@mandir@&$mandir&g
+ s&\\\${datarootdir}&$datarootdir&g' ;;
+esac
+_ACEOF
+
+# Neutralize VPATH when `$srcdir' = `.'.
+# Shell code in configure.ac might set extrasub.
+# FIXME: do we really want to maintain this feature?
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+ac_sed_extra="$ac_vpsub
+$extrasub
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+:t
+/@[a-zA-Z_][a-zA-Z_0-9]*@/!b
+s|@configure_input@|$ac_sed_conf_input|;t t
+s&@top_builddir@&$ac_top_builddir_sub&;t t
+s&@top_build_prefix@&$ac_top_build_prefix&;t t
+s&@srcdir@&$ac_srcdir&;t t
+s&@abs_srcdir@&$ac_abs_srcdir&;t t
+s&@top_srcdir@&$ac_top_srcdir&;t t
+s&@abs_top_srcdir@&$ac_abs_top_srcdir&;t t
+s&@builddir@&$ac_builddir&;t t
+s&@abs_builddir@&$ac_abs_builddir&;t t
+s&@abs_top_builddir@&$ac_abs_top_builddir&;t t
+s&@INSTALL@&$ac_INSTALL&;t t
+$ac_datarootdir_hack
+"
+eval sed \"\$ac_sed_extra\" "$ac_file_inputs" | $AWK -f "$ac_tmp/subs.awk" \
+ >$ac_tmp/out || as_fn_error $? "could not create $ac_file" "$LINENO" 5
+
+test -z "$ac_datarootdir_hack$ac_datarootdir_seen" &&
+ { ac_out=`sed -n '/\${datarootdir}/p' "$ac_tmp/out"`; test -n "$ac_out"; } &&
+ { ac_out=`sed -n '/^[ ]*datarootdir[ ]*:*=/p' \
+ "$ac_tmp/out"`; test -z "$ac_out"; } &&
+ { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file contains a reference to the variable \`datarootdir'
+which seems to be undefined. Please make sure it is defined" >&5
+$as_echo "$as_me: WARNING: $ac_file contains a reference to the variable \`datarootdir'
+which seems to be undefined. Please make sure it is defined" >&2;}
+
+ rm -f "$ac_tmp/stdin"
+ case $ac_file in
+ -) cat "$ac_tmp/out" && rm -f "$ac_tmp/out";;
+ *) rm -f "$ac_file" && mv "$ac_tmp/out" "$ac_file";;
+ esac \
+ || as_fn_error $? "could not create $ac_file" "$LINENO" 5
+ ;;
+
+
+
+ esac
+
+done # for ac_tag
+
+
+as_fn_exit 0
+_ACEOF
+ac_clean_files=$ac_clean_files_save
+
+test $ac_write_fail = 0 ||
+ as_fn_error $? "write failure creating $CONFIG_STATUS" "$LINENO" 5
+
+
+# configure is writing to config.log, and then calls config.status.
+# config.status does its own redirection, appending to config.log.
+# Unfortunately, on DOS this fails, as config.log is still kept open
+# by configure, so config.status won't be able to write to it; its
+# output is simply discarded. So we exec the FD to /dev/null,
+# effectively closing config.log, so it can be properly (re)opened and
+# appended to by config.status. When coming back to configure, we
+# need to make the FD available again.
+if test "$no_create" != yes; then
+ ac_cs_success=:
+ ac_config_status_args=
+ test "$silent" = yes &&
+ ac_config_status_args="$ac_config_status_args --quiet"
+ exec 5>/dev/null
+ $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false
+ exec 5>>config.log
+ # Use ||, not &&, to avoid exiting from the if with $? = 1, which
+ # would make configure fail if this is the last instruction.
+ $ac_cs_success || as_fn_exit 1
+fi
+if test -n "$ac_unrecognized_opts" && test "$enable_option_checking" != no; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: unrecognized options: $ac_unrecognized_opts" >&5
+$as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2;}
+fi
+
diff --git a/tools/gzinject/configure.ac b/tools/gzinject/configure.ac
new file mode 100644
index 000000000..bf8d7008d
--- /dev/null
+++ b/tools/gzinject/configure.ac
@@ -0,0 +1,14 @@
+AC_PREREQ([2.69])
+AC_INIT
+AC_PREFIX_DEFAULT([/usr/local])
+AC_PROG_CC
+AC_PROG_INSTALL
+CFLAGS="-O3"
+AC_CHECK_HEADERS([wmmintrin.h],
+ [
+ CFLAGS+=" -maes -msse -msse2"
+ CPPFLAGS="-DFASTAES"
+ ]
+)
+AC_CONFIG_FILES([Makefile])
+AC_OUTPUT
diff --git a/tools/gzinject/install-sh b/tools/gzinject/install-sh
new file mode 100644
index 000000000..0360b79e7
--- /dev/null
+++ b/tools/gzinject/install-sh
@@ -0,0 +1,501 @@
+#!/bin/sh
+# install - install a program, script, or datafile
+
+scriptversion=2016-01-11.22; # UTC
+
+# This originates from X11R5 (mit/util/scripts/install.sh), which was
+# later released in X11R6 (xc/config/util/install.sh) with the
+# following copyright and license.
+#
+# Copyright (C) 1994 X Consortium
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to
+# deal in the Software without restriction, including without limitation the
+# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+# sell copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+# AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC-
+# TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+# Except as contained in this notice, the name of the X Consortium shall not
+# be used in advertising or otherwise to promote the sale, use or other deal-
+# ings in this Software without prior written authorization from the X Consor-
+# tium.
+#
+#
+# FSF changes to this file are in the public domain.
+#
+# Calling this script install-sh is preferred over install.sh, to prevent
+# 'make' implicit rules from creating a file called install from it
+# when there is no Makefile.
+#
+# This script is compatible with the BSD install script, but was written
+# from scratch.
+
+tab=' '
+nl='
+'
+IFS=" $tab$nl"
+
+# Set DOITPROG to "echo" to test this script.
+
+doit=${DOITPROG-}
+doit_exec=${doit:-exec}
+
+# Put in absolute file names if you don't have them in your path;
+# or use environment vars.
+
+chgrpprog=${CHGRPPROG-chgrp}
+chmodprog=${CHMODPROG-chmod}
+chownprog=${CHOWNPROG-chown}
+cmpprog=${CMPPROG-cmp}
+cpprog=${CPPROG-cp}
+mkdirprog=${MKDIRPROG-mkdir}
+mvprog=${MVPROG-mv}
+rmprog=${RMPROG-rm}
+stripprog=${STRIPPROG-strip}
+
+posix_mkdir=
+
+# Desired mode of installed file.
+mode=0755
+
+chgrpcmd=
+chmodcmd=$chmodprog
+chowncmd=
+mvcmd=$mvprog
+rmcmd="$rmprog -f"
+stripcmd=
+
+src=
+dst=
+dir_arg=
+dst_arg=
+
+copy_on_change=false
+is_target_a_directory=possibly
+
+usage="\
+Usage: $0 [OPTION]... [-T] SRCFILE DSTFILE
+ or: $0 [OPTION]... SRCFILES... DIRECTORY
+ or: $0 [OPTION]... -t DIRECTORY SRCFILES...
+ or: $0 [OPTION]... -d DIRECTORIES...
+
+In the 1st form, copy SRCFILE to DSTFILE.
+In the 2nd and 3rd, copy all SRCFILES to DIRECTORY.
+In the 4th, create DIRECTORIES.
+
+Options:
+ --help display this help and exit.
+ --version display version info and exit.
+
+ -c (ignored)
+ -C install only if different (preserve the last data modification time)
+ -d create directories instead of installing files.
+ -g GROUP $chgrpprog installed files to GROUP.
+ -m MODE $chmodprog installed files to MODE.
+ -o USER $chownprog installed files to USER.
+ -s $stripprog installed files.
+ -t DIRECTORY install into DIRECTORY.
+ -T report an error if DSTFILE is a directory.
+
+Environment variables override the default commands:
+ CHGRPPROG CHMODPROG CHOWNPROG CMPPROG CPPROG MKDIRPROG MVPROG
+ RMPROG STRIPPROG
+"
+
+while test $# -ne 0; do
+ case $1 in
+ -c) ;;
+
+ -C) copy_on_change=true;;
+
+ -d) dir_arg=true;;
+
+ -g) chgrpcmd="$chgrpprog $2"
+ shift;;
+
+ --help) echo "$usage"; exit $?;;
+
+ -m) mode=$2
+ case $mode in
+ *' '* | *"$tab"* | *"$nl"* | *'*'* | *'?'* | *'['*)
+ echo "$0: invalid mode: $mode" >&2
+ exit 1;;
+ esac
+ shift;;
+
+ -o) chowncmd="$chownprog $2"
+ shift;;
+
+ -s) stripcmd=$stripprog;;
+
+ -t)
+ is_target_a_directory=always
+ dst_arg=$2
+ # Protect names problematic for 'test' and other utilities.
+ case $dst_arg in
+ -* | [=\(\)!]) dst_arg=./$dst_arg;;
+ esac
+ shift;;
+
+ -T) is_target_a_directory=never;;
+
+ --version) echo "$0 $scriptversion"; exit $?;;
+
+ --) shift
+ break;;
+
+ -*) echo "$0: invalid option: $1" >&2
+ exit 1;;
+
+ *) break;;
+ esac
+ shift
+done
+
+# We allow the use of options -d and -T together, by making -d
+# take the precedence; this is for compatibility with GNU install.
+
+if test -n "$dir_arg"; then
+ if test -n "$dst_arg"; then
+ echo "$0: target directory not allowed when installing a directory." >&2
+ exit 1
+ fi
+fi
+
+if test $# -ne 0 && test -z "$dir_arg$dst_arg"; then
+ # When -d is used, all remaining arguments are directories to create.
+ # When -t is used, the destination is already specified.
+ # Otherwise, the last argument is the destination. Remove it from $@.
+ for arg
+ do
+ if test -n "$dst_arg"; then
+ # $@ is not empty: it contains at least $arg.
+ set fnord "$@" "$dst_arg"
+ shift # fnord
+ fi
+ shift # arg
+ dst_arg=$arg
+ # Protect names problematic for 'test' and other utilities.
+ case $dst_arg in
+ -* | [=\(\)!]) dst_arg=./$dst_arg;;
+ esac
+ done
+fi
+
+if test $# -eq 0; then
+ if test -z "$dir_arg"; then
+ echo "$0: no input file specified." >&2
+ exit 1
+ fi
+ # It's OK to call 'install-sh -d' without argument.
+ # This can happen when creating conditional directories.
+ exit 0
+fi
+
+if test -z "$dir_arg"; then
+ if test $# -gt 1 || test "$is_target_a_directory" = always; then
+ if test ! -d "$dst_arg"; then
+ echo "$0: $dst_arg: Is not a directory." >&2
+ exit 1
+ fi
+ fi
+fi
+
+if test -z "$dir_arg"; then
+ do_exit='(exit $ret); exit $ret'
+ trap "ret=129; $do_exit" 1
+ trap "ret=130; $do_exit" 2
+ trap "ret=141; $do_exit" 13
+ trap "ret=143; $do_exit" 15
+
+ # Set umask so as not to create temps with too-generous modes.
+ # However, 'strip' requires both read and write access to temps.
+ case $mode in
+ # Optimize common cases.
+ *644) cp_umask=133;;
+ *755) cp_umask=22;;
+
+ *[0-7])
+ if test -z "$stripcmd"; then
+ u_plus_rw=
+ else
+ u_plus_rw='% 200'
+ fi
+ cp_umask=`expr '(' 777 - $mode % 1000 ')' $u_plus_rw`;;
+ *)
+ if test -z "$stripcmd"; then
+ u_plus_rw=
+ else
+ u_plus_rw=,u+rw
+ fi
+ cp_umask=$mode$u_plus_rw;;
+ esac
+fi
+
+for src
+do
+ # Protect names problematic for 'test' and other utilities.
+ case $src in
+ -* | [=\(\)!]) src=./$src;;
+ esac
+
+ if test -n "$dir_arg"; then
+ dst=$src
+ dstdir=$dst
+ test -d "$dstdir"
+ dstdir_status=$?
+ else
+
+ # Waiting for this to be detected by the "$cpprog $src $dsttmp" command
+ # might cause directories to be created, which would be especially bad
+ # if $src (and thus $dsttmp) contains '*'.
+ if test ! -f "$src" && test ! -d "$src"; then
+ echo "$0: $src does not exist." >&2
+ exit 1
+ fi
+
+ if test -z "$dst_arg"; then
+ echo "$0: no destination specified." >&2
+ exit 1
+ fi
+ dst=$dst_arg
+
+ # If destination is a directory, append the input filename; won't work
+ # if double slashes aren't ignored.
+ if test -d "$dst"; then
+ if test "$is_target_a_directory" = never; then
+ echo "$0: $dst_arg: Is a directory" >&2
+ exit 1
+ fi
+ dstdir=$dst
+ dst=$dstdir/`basename "$src"`
+ dstdir_status=0
+ else
+ dstdir=`dirname "$dst"`
+ test -d "$dstdir"
+ dstdir_status=$?
+ fi
+ fi
+
+ obsolete_mkdir_used=false
+
+ if test $dstdir_status != 0; then
+ case $posix_mkdir in
+ '')
+ # Create intermediate dirs using mode 755 as modified by the umask.
+ # This is like FreeBSD 'install' as of 1997-10-28.
+ umask=`umask`
+ case $stripcmd.$umask in
+ # Optimize common cases.
+ *[2367][2367]) mkdir_umask=$umask;;
+ .*0[02][02] | .[02][02] | .[02]) mkdir_umask=22;;
+
+ *[0-7])
+ mkdir_umask=`expr $umask + 22 \
+ - $umask % 100 % 40 + $umask % 20 \
+ - $umask % 10 % 4 + $umask % 2
+ `;;
+ *) mkdir_umask=$umask,go-w;;
+ esac
+
+ # With -d, create the new directory with the user-specified mode.
+ # Otherwise, rely on $mkdir_umask.
+ if test -n "$dir_arg"; then
+ mkdir_mode=-m$mode
+ else
+ mkdir_mode=
+ fi
+
+ posix_mkdir=false
+ case $umask in
+ *[123567][0-7][0-7])
+ # POSIX mkdir -p sets u+wx bits regardless of umask, which
+ # is incompatible with FreeBSD 'install' when (umask & 300) != 0.
+ ;;
+ *)
+ tmpdir=${TMPDIR-/tmp}/ins$RANDOM-$$
+ trap 'ret=$?; rmdir "$tmpdir/d" "$tmpdir" 2>/dev/null; exit $ret' 0
+
+ if (umask $mkdir_umask &&
+ exec $mkdirprog $mkdir_mode -p -- "$tmpdir/d") >/dev/null 2>&1
+ then
+ if test -z "$dir_arg" || {
+ # Check for POSIX incompatibilities with -m.
+ # HP-UX 11.23 and IRIX 6.5 mkdir -m -p sets group- or
+ # other-writable bit of parent directory when it shouldn't.
+ # FreeBSD 6.1 mkdir -m -p sets mode of existing directory.
+ ls_ld_tmpdir=`ls -ld "$tmpdir"`
+ case $ls_ld_tmpdir in
+ d????-?r-*) different_mode=700;;
+ d????-?--*) different_mode=755;;
+ *) false;;
+ esac &&
+ $mkdirprog -m$different_mode -p -- "$tmpdir" && {
+ ls_ld_tmpdir_1=`ls -ld "$tmpdir"`
+ test "$ls_ld_tmpdir" = "$ls_ld_tmpdir_1"
+ }
+ }
+ then posix_mkdir=:
+ fi
+ rmdir "$tmpdir/d" "$tmpdir"
+ else
+ # Remove any dirs left behind by ancient mkdir implementations.
+ rmdir ./$mkdir_mode ./-p ./-- 2>/dev/null
+ fi
+ trap '' 0;;
+ esac;;
+ esac
+
+ if
+ $posix_mkdir && (
+ umask $mkdir_umask &&
+ $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir"
+ )
+ then :
+ else
+
+ # The umask is ridiculous, or mkdir does not conform to POSIX,
+ # or it failed possibly due to a race condition. Create the
+ # directory the slow way, step by step, checking for races as we go.
+
+ case $dstdir in
+ /*) prefix='/';;
+ [-=\(\)!]*) prefix='./';;
+ *) prefix='';;
+ esac
+
+ oIFS=$IFS
+ IFS=/
+ set -f
+ set fnord $dstdir
+ shift
+ set +f
+ IFS=$oIFS
+
+ prefixes=
+
+ for d
+ do
+ test X"$d" = X && continue
+
+ prefix=$prefix$d
+ if test -d "$prefix"; then
+ prefixes=
+ else
+ if $posix_mkdir; then
+ (umask=$mkdir_umask &&
+ $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir") && break
+ # Don't fail if two instances are running concurrently.
+ test -d "$prefix" || exit 1
+ else
+ case $prefix in
+ *\'*) qprefix=`echo "$prefix" | sed "s/'/'\\\\\\\\''/g"`;;
+ *) qprefix=$prefix;;
+ esac
+ prefixes="$prefixes '$qprefix'"
+ fi
+ fi
+ prefix=$prefix/
+ done
+
+ if test -n "$prefixes"; then
+ # Don't fail if two instances are running concurrently.
+ (umask $mkdir_umask &&
+ eval "\$doit_exec \$mkdirprog $prefixes") ||
+ test -d "$dstdir" || exit 1
+ obsolete_mkdir_used=true
+ fi
+ fi
+ fi
+
+ if test -n "$dir_arg"; then
+ { test -z "$chowncmd" || $doit $chowncmd "$dst"; } &&
+ { test -z "$chgrpcmd" || $doit $chgrpcmd "$dst"; } &&
+ { test "$obsolete_mkdir_used$chowncmd$chgrpcmd" = false ||
+ test -z "$chmodcmd" || $doit $chmodcmd $mode "$dst"; } || exit 1
+ else
+
+ # Make a couple of temp file names in the proper directory.
+ dsttmp=$dstdir/_inst.$$_
+ rmtmp=$dstdir/_rm.$$_
+
+ # Trap to clean up those temp files at exit.
+ trap 'ret=$?; rm -f "$dsttmp" "$rmtmp" && exit $ret' 0
+
+ # Copy the file name to the temp name.
+ (umask $cp_umask && $doit_exec $cpprog "$src" "$dsttmp") &&
+
+ # and set any options; do chmod last to preserve setuid bits.
+ #
+ # If any of these fail, we abort the whole thing. If we want to
+ # ignore errors from any of these, just make sure not to ignore
+ # errors from the above "$doit $cpprog $src $dsttmp" command.
+ #
+ { test -z "$chowncmd" || $doit $chowncmd "$dsttmp"; } &&
+ { test -z "$chgrpcmd" || $doit $chgrpcmd "$dsttmp"; } &&
+ { test -z "$stripcmd" || $doit $stripcmd "$dsttmp"; } &&
+ { test -z "$chmodcmd" || $doit $chmodcmd $mode "$dsttmp"; } &&
+
+ # If -C, don't bother to copy if it wouldn't change the file.
+ if $copy_on_change &&
+ old=`LC_ALL=C ls -dlL "$dst" 2>/dev/null` &&
+ new=`LC_ALL=C ls -dlL "$dsttmp" 2>/dev/null` &&
+ set -f &&
+ set X $old && old=:$2:$4:$5:$6 &&
+ set X $new && new=:$2:$4:$5:$6 &&
+ set +f &&
+ test "$old" = "$new" &&
+ $cmpprog "$dst" "$dsttmp" >/dev/null 2>&1
+ then
+ rm -f "$dsttmp"
+ else
+ # Rename the file to the real destination.
+ $doit $mvcmd -f "$dsttmp" "$dst" 2>/dev/null ||
+
+ # The rename failed, perhaps because mv can't rename something else
+ # to itself, or perhaps because mv is so ancient that it does not
+ # support -f.
+ {
+ # Now remove or move aside any old file at destination location.
+ # We try this two ways since rm can't unlink itself on some
+ # systems and the destination file might be busy for other
+ # reasons. In this case, the final cleanup might fail but the new
+ # file should still install successfully.
+ {
+ test ! -f "$dst" ||
+ $doit $rmcmd -f "$dst" 2>/dev/null ||
+ { $doit $mvcmd -f "$dst" "$rmtmp" 2>/dev/null &&
+ { $doit $rmcmd -f "$rmtmp" 2>/dev/null; :; }
+ } ||
+ { echo "$0: cannot unlink or rename $dst" >&2
+ (exit 1); exit 1
+ }
+ } &&
+
+ # Now rename the file to the real destination.
+ $doit $mvcmd "$dsttmp" "$dst"
+ }
+ fi || exit 1
+
+ trap '' 0
+ fi
+done
+
+# Local variables:
+# eval: (add-hook 'write-file-hooks 'time-stamp)
+# time-stamp-start: "scriptversion="
+# time-stamp-format: "%:y-%02m-%02d.%02H"
+# time-stamp-time-zone: "UTC0"
+# time-stamp-end: "; # UTC"
+# End:
diff --git a/tools/gzinject/patches/NACE.gzi b/tools/gzinject/patches/NACE.gzi
new file mode 100644
index 000000000..30ab26f55
--- /dev/null
+++ b/tools/gzinject/patches/NACE.gzi
@@ -0,0 +1,6 @@
+# default gz patches for NACE
+0000 00000000 00000001
+# use 8MB memory
+0304 00002EB0 60000000
+# allocate 32MB for rom
+0304 0005BFD4 3C807200
diff --git a/tools/gzinject/patches/NACJ.gzi b/tools/gzinject/patches/NACJ.gzi
new file mode 100644
index 000000000..28415cb8f
--- /dev/null
+++ b/tools/gzinject/patches/NACJ.gzi
@@ -0,0 +1,6 @@
+# default gz patches for NACJ
+0000 00000000 00000001
+# use 8MB memory
+0304 00002EB0 60000000
+# allocate 32MB for rom
+0304 0005BF44 3C807200
diff --git a/tools/gzinject/patches/NKZE.gzi b/tools/gzinject/patches/NKZE.gzi
new file mode 100644
index 000000000..440cbee08
--- /dev/null
+++ b/tools/gzinject/patches/NKZE.gzi
@@ -0,0 +1,16 @@
+# NKZE kz-NZSE
+0000 00000000 00000001
+# decompress content1
+0100 00000000 00000000
+# apply 12MB fixes
+0304 00010B58 3C8000C0
+0304 0004BD20 67E47000
+0304 0004BC80 3CA00100
+# apply controller remappings
+0302 00148514 00000800
+0302 00148518 00000400
+0302 0014851C 00000200
+0302 00148520 00000100
+0302 00148528 00000020
+# compress content1
+0200 00000000 00000000
\ No newline at end of file
diff --git a/tools/gzinject/patches/NKZJ.gzi b/tools/gzinject/patches/NKZJ.gzi
new file mode 100644
index 000000000..d2ee665dd
--- /dev/null
+++ b/tools/gzinject/patches/NKZJ.gzi
@@ -0,0 +1,16 @@
+# NKZJ kz-NZSJ
+0000 00000000 00000001
+# decompress content1
+0100 00000000 00000000
+# apply 12MB fixes
+0304 00010B58 3C8000C0
+0304 0004BD94 67E47000
+0304 0004BCF4 3CA00100
+# apply controller remappings
+0302 0014AA54 00000800
+0302 0014AA58 00000400
+0302 0014AA5C 00000200
+0302 0014AA60 00000100
+0302 0014AA68 00000020
+# compress content1
+0200 00000000 00000000
\ No newline at end of file
diff --git a/tools/gzinject/patches/gz_default_remap.gzi b/tools/gzinject/patches/gz_default_remap.gzi
new file mode 100644
index 000000000..22b857ac4
--- /dev/null
+++ b/tools/gzinject/patches/gz_default_remap.gzi
@@ -0,0 +1,9 @@
+# gz standard remapping for NACE and NACJ
+0000 00000000 00000001
+# apply d-pad remappings
+0302 0016BAF0 00000800
+0302 0016BAF4 00000400
+0302 0016BAF8 00000200
+0302 0016BAFC 00000100
+# apply c-stick remapping
+0302 0016BB04 00000020
diff --git a/tools/gzinject/patches/gz_raphnet_remap.gzi b/tools/gzinject/patches/gz_raphnet_remap.gzi
new file mode 100644
index 000000000..61e1a968c
--- /dev/null
+++ b/tools/gzinject/patches/gz_raphnet_remap.gzi
@@ -0,0 +1,9 @@
+# gz raphnet remapping for NACE and NACJ
+0000 00000000 00000001
+# apply d-pad remappings
+0302 0016BAF0 00000800
+0302 0016BAF4 00000400
+0302 0016BAF8 00000200
+0302 0016BAFC 00000100
+# apply z-trigger remapping
+0302 0016BAD8 00000020
diff --git a/tools/gzinject/patches/hb_NACE.gzi b/tools/gzinject/patches/hb_NACE.gzi
new file mode 100644
index 000000000..dccad2e18
--- /dev/null
+++ b/tools/gzinject/patches/hb_NACE.gzi
@@ -0,0 +1,15 @@
+# homeboy patches for NACE
+0000 00000000 00000001
+# resize MEM2 heap for homeboy
+0302 00085732 00009010
+0304 00085738 60000000
+0304 00085744 60000000
+# homeboy hook
+0304 00002EA8 3c809000
+0304 00002EAC 38840800
+0304 00002EB0 7c8903a6
+0304 00002EB4 80630018
+0304 00002EB8 4e800421
+# Change iOS to 61
+0000 00000000 00000064
+0301 0000018B 0000003D
\ No newline at end of file
diff --git a/tools/gzinject/patches/hb_NACJ.gzi b/tools/gzinject/patches/hb_NACJ.gzi
new file mode 100644
index 000000000..0e852695d
--- /dev/null
+++ b/tools/gzinject/patches/hb_NACJ.gzi
@@ -0,0 +1,15 @@
+# homeboy patches for NACJ
+0000 00000000 00000001
+# resize MEM2 heap for homeboy
+0302 00085726 00009010
+0304 0008572C 60000000
+0304 00085738 60000000
+# homeboy hook
+0304 00002EA8 3c809000
+0304 00002EAC 38840800
+0304 00002EB0 7c8903a6
+0304 00002EB4 80630018
+0304 00002EB8 4e800421
+# Change iOS to 61
+0000 00000000 00000064
+0301 0000018B 0000003D
\ No newline at end of file
diff --git a/tools/gzinject/patches/ootr_dpad_remap.gzi b/tools/gzinject/patches/ootr_dpad_remap.gzi
new file mode 100644
index 000000000..57fcb9256
--- /dev/null
+++ b/tools/gzinject/patches/ootr_dpad_remap.gzi
@@ -0,0 +1,6 @@
+# ootr remapping for NACE and NACJ
+0000 00000000 00000001
+# apply d-pad remappings
+0302 0016BAF4 00000400
+0302 0016BAF8 00000200
+0302 0016BAFC 00000100
diff --git a/tools/gzinject/src/aes.c b/tools/gzinject/src/aes.c
new file mode 100644
index 000000000..f7701f0a8
--- /dev/null
+++ b/tools/gzinject/src/aes.c
@@ -0,0 +1,567 @@
+/*
+
+This is an implementation of the AES algorithm, specifically ECB, CTR and CBC mode.
+Block size can be chosen in aes.h - available choices are AES128, AES192, AES256.
+
+The implementation is verified against the test vectors in:
+National Institute of Standards and Technology Special Publication 800-38A 2001 ED
+
+ECB-AES128
+----------
+
+plain-text:
+6bc1bee22e409f96e93d7e117393172a
+ae2d8a571e03ac9c9eb76fac45af8e51
+30c81c46a35ce411e5fbc1191a0a52ef
+f69f2445df4f9b17ad2b417be66c3710
+
+key:
+2b7e151628aed2a6abf7158809cf4f3c
+
+resulting cipher
+3ad77bb40d7a3660a89ecaf32466ef97
+f5d3d58503b9699de785895a96fdbaaf
+43b1cd7f598ece23881b00e3ed030688
+7b0c785e27e8ad3f8223207104725dd4
+
+
+NOTE: String length must be evenly divisible by 16byte (str_len % 16 == 0)
+You should pad the end of the string with zeros if this is not the case.
+For AES192/256 the key size is proportionally larger.
+
+*/
+
+
+/*****************************************************************************/
+/* Includes: */
+/*****************************************************************************/
+#include
+#include // CBC mode, for memset
+#include "aes.h"
+
+/*****************************************************************************/
+/* Defines: */
+/*****************************************************************************/
+// The number of columns comprising a state in AES. This is a constant in AES. Value=4
+#define Nb 4
+
+#if defined(AES256) && (AES256 == 1)
+#define Nk 8
+#define Nr 14
+#elif defined(AES192) && (AES192 == 1)
+#define Nk 6
+#define Nr 12
+#else
+#define Nk 4 // The number of 32 bit words in a key.
+#define Nr 10 // The number of rounds in AES Cipher.
+#endif
+
+// jcallan@github points out that declaring Multiply as a function
+// reduces code size considerably with the Keil ARM compiler.
+// See this link for more information: https://github.com/kokke/tiny-AES-C/pull/3
+#ifndef MULTIPLY_AS_A_FUNCTION
+#define MULTIPLY_AS_A_FUNCTION 0
+#endif
+
+
+
+
+/*****************************************************************************/
+/* Private variables: */
+/*****************************************************************************/
+// state - array holding the intermediate results during decryption.
+typedef uint8_t state_t[4][4];
+
+
+
+// The lookup-tables are marked const so they can be placed in read-only storage instead of RAM
+// The numbers below can be computed dynamically trading ROM for RAM -
+// This can be useful in (embedded) bootloader applications, where ROM is often limited.
+static const uint8_t sbox[256] = {
+ //0 1 2 3 4 5 6 7 8 9 A B C D E F
+ 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
+ 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
+ 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
+ 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
+ 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
+ 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
+ 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
+ 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
+ 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
+ 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
+ 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
+ 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
+ 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
+ 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
+ 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
+ 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 };
+
+static const uint8_t rsbox[256] = {
+ 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
+ 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
+ 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
+ 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
+ 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
+ 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
+ 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
+ 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
+ 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
+ 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
+ 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
+ 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
+ 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
+ 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
+ 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
+ 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d };
+
+// The round constant word array, Rcon[i], contains the values given by
+// x to the power (i-1) being powers of x (x is denoted as {02}) in the field GF(2^8)
+static const uint8_t Rcon[11] = {
+ 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36 };
+
+/*
+* Jordan Goulder points out in PR #12 (https://github.com/kokke/tiny-AES-C/pull/12),
+* that you can remove most of the elements in the Rcon array, because they are unused.
+*
+* From Wikipedia's article on the Rijndael key schedule @ https://en.wikipedia.org/wiki/Rijndael_key_schedule#Rcon
+*
+* "Only the first some of these constants are actually used – up to rcon[10] for AES-128 (as 11 round keys are needed),
+* up to rcon[8] for AES-192, up to rcon[7] for AES-256. rcon[0] is not used in AES algorithm."
+*/
+
+
+/*****************************************************************************/
+/* Private functions: */
+/*****************************************************************************/
+/*
+static uint8_t getSBoxValue(uint8_t num)
+{
+return sbox[num];
+}
+*/
+#define getSBoxValue(num) (sbox[(num)])
+/*
+static uint8_t getSBoxInvert(uint8_t num)
+{
+return rsbox[num];
+}
+*/
+#define getSBoxInvert(num) (rsbox[(num)])
+
+// This function produces Nb(Nr+1) round keys. The round keys are used in each round to decrypt the states.
+static void KeyExpansion(uint8_t* RoundKey, const uint8_t* Key)
+{
+ unsigned i, j, k;
+ uint8_t tempa[4]; // Used for the column/row operations
+
+ // The first round key is the key itself.
+ for (i = 0; i < Nk; ++i)
+ {
+ RoundKey[(i * 4) + 0] = Key[(i * 4) + 0];
+ RoundKey[(i * 4) + 1] = Key[(i * 4) + 1];
+ RoundKey[(i * 4) + 2] = Key[(i * 4) + 2];
+ RoundKey[(i * 4) + 3] = Key[(i * 4) + 3];
+ }
+
+ // All other round keys are found from the previous round keys.
+ for (i = Nk; i < Nb * (Nr + 1); ++i)
+ {
+ {
+ k = (i - 1) * 4;
+ tempa[0] = RoundKey[k + 0];
+ tempa[1] = RoundKey[k + 1];
+ tempa[2] = RoundKey[k + 2];
+ tempa[3] = RoundKey[k + 3];
+
+ }
+
+ if (i % Nk == 0)
+ {
+ // This function shifts the 4 bytes in a word to the left once.
+ // [a0,a1,a2,a3] becomes [a1,a2,a3,a0]
+
+ // Function RotWord()
+ {
+ k = tempa[0];
+ tempa[0] = tempa[1];
+ tempa[1] = tempa[2];
+ tempa[2] = tempa[3];
+ tempa[3] = k;
+ }
+
+ // SubWord() is a function that takes a four-byte input word and
+ // applies the S-box to each of the four bytes to produce an output word.
+
+ // Function Subword()
+ {
+ tempa[0] = getSBoxValue(tempa[0]);
+ tempa[1] = getSBoxValue(tempa[1]);
+ tempa[2] = getSBoxValue(tempa[2]);
+ tempa[3] = getSBoxValue(tempa[3]);
+ }
+
+ tempa[0] = tempa[0] ^ Rcon[i / Nk];
+ }
+#if defined(AES256) && (AES256 == 1)
+ if (i % Nk == 4)
+ {
+ // Function Subword()
+ {
+ tempa[0] = getSBoxValue(tempa[0]);
+ tempa[1] = getSBoxValue(tempa[1]);
+ tempa[2] = getSBoxValue(tempa[2]);
+ tempa[3] = getSBoxValue(tempa[3]);
+ }
+ }
+#endif
+ j = i * 4; k = (i - Nk) * 4;
+ RoundKey[j + 0] = RoundKey[k + 0] ^ tempa[0];
+ RoundKey[j + 1] = RoundKey[k + 1] ^ tempa[1];
+ RoundKey[j + 2] = RoundKey[k + 2] ^ tempa[2];
+ RoundKey[j + 3] = RoundKey[k + 3] ^ tempa[3];
+ }
+}
+
+void AES_init_ctx(struct AES_ctx* ctx, const uint8_t* key)
+{
+ KeyExpansion(ctx->RoundKey, key);
+}
+#if defined(CBC) && (CBC == 1)
+void AES_init_ctx_iv(struct AES_ctx* ctx, const uint8_t* key, const uint8_t* iv)
+{
+ KeyExpansion(ctx->RoundKey, key);
+ memcpy(ctx->Iv, iv, AES_BLOCKLEN);
+}
+void AES_ctx_set_iv(struct AES_ctx* ctx, const uint8_t* iv)
+{
+ memcpy(ctx->Iv, iv, AES_BLOCKLEN);
+}
+#endif
+
+// This function adds the round key to state.
+// The round key is added to the state by an XOR function.
+static void AddRoundKey(uint8_t round, state_t* state, uint8_t* RoundKey)
+{
+ uint8_t i, j;
+ for (i = 0; i < 4; ++i)
+ {
+ for (j = 0; j < 4; ++j)
+ {
+ (*state)[i][j] ^= RoundKey[(round * Nb * 4) + (i * Nb) + j];
+ }
+ }
+}
+
+// The SubBytes Function Substitutes the values in the
+// state matrix with values in an S-box.
+static void SubBytes(state_t* state)
+{
+ uint8_t i, j;
+ for (i = 0; i < 4; ++i)
+ {
+ for (j = 0; j < 4; ++j)
+ {
+ (*state)[j][i] = getSBoxValue((*state)[j][i]);
+ }
+ }
+}
+
+// The ShiftRows() function shifts the rows in the state to the left.
+// Each row is shifted with different offset.
+// Offset = Row number. So the first row is not shifted.
+static void ShiftRows(state_t* state)
+{
+ uint8_t temp;
+
+ // Rotate first row 1 columns to left
+ temp = (*state)[0][1];
+ (*state)[0][1] = (*state)[1][1];
+ (*state)[1][1] = (*state)[2][1];
+ (*state)[2][1] = (*state)[3][1];
+ (*state)[3][1] = temp;
+
+ // Rotate second row 2 columns to left
+ temp = (*state)[0][2];
+ (*state)[0][2] = (*state)[2][2];
+ (*state)[2][2] = temp;
+
+ temp = (*state)[1][2];
+ (*state)[1][2] = (*state)[3][2];
+ (*state)[3][2] = temp;
+
+ // Rotate third row 3 columns to left
+ temp = (*state)[0][3];
+ (*state)[0][3] = (*state)[3][3];
+ (*state)[3][3] = (*state)[2][3];
+ (*state)[2][3] = (*state)[1][3];
+ (*state)[1][3] = temp;
+}
+
+static uint8_t xtime(uint8_t x)
+{
+ return ((x << 1) ^ (((x >> 7) & 1) * 0x1b));
+}
+
+// MixColumns function mixes the columns of the state matrix
+static void MixColumns(state_t* state)
+{
+ uint8_t i;
+ uint8_t Tmp, Tm, t;
+ for (i = 0; i < 4; ++i)
+ {
+ t = (*state)[i][0];
+ Tmp = (*state)[i][0] ^ (*state)[i][1] ^ (*state)[i][2] ^ (*state)[i][3];
+ Tm = (*state)[i][0] ^ (*state)[i][1]; Tm = xtime(Tm); (*state)[i][0] ^= Tm ^ Tmp;
+ Tm = (*state)[i][1] ^ (*state)[i][2]; Tm = xtime(Tm); (*state)[i][1] ^= Tm ^ Tmp;
+ Tm = (*state)[i][2] ^ (*state)[i][3]; Tm = xtime(Tm); (*state)[i][2] ^= Tm ^ Tmp;
+ Tm = (*state)[i][3] ^ t; Tm = xtime(Tm); (*state)[i][3] ^= Tm ^ Tmp;
+ }
+}
+
+// Multiply is used to multiply numbers in the field GF(2^8)
+#if MULTIPLY_AS_A_FUNCTION
+static uint8_t Multiply(uint8_t x, uint8_t y)
+{
+ return (((y & 1) * x) ^
+ ((y >> 1 & 1) * xtime(x)) ^
+ ((y >> 2 & 1) * xtime(xtime(x))) ^
+ ((y >> 3 & 1) * xtime(xtime(xtime(x)))) ^
+ ((y >> 4 & 1) * xtime(xtime(xtime(xtime(x))))));
+}
+#else
+#define Multiply(x, y) \
+ ( ((y & 1) * x) ^ \
+ ((y>>1 & 1) * xtime(x)) ^ \
+ ((y>>2 & 1) * xtime(xtime(x))) ^ \
+ ((y>>3 & 1) * xtime(xtime(xtime(x)))) ^ \
+ ((y>>4 & 1) * xtime(xtime(xtime(xtime(x)))))) \
+
+#endif
+
+// MixColumns function mixes the columns of the state matrix.
+// The method used to multiply may be difficult to understand for the inexperienced.
+// Please use the references to gain more information.
+static void InvMixColumns(state_t* state)
+{
+ int i;
+ uint8_t a, b, c, d;
+ for (i = 0; i < 4; ++i)
+ {
+ a = (*state)[i][0];
+ b = (*state)[i][1];
+ c = (*state)[i][2];
+ d = (*state)[i][3];
+
+ (*state)[i][0] = Multiply(a, 0x0e) ^ Multiply(b, 0x0b) ^ Multiply(c, 0x0d) ^ Multiply(d, 0x09);
+ (*state)[i][1] = Multiply(a, 0x09) ^ Multiply(b, 0x0e) ^ Multiply(c, 0x0b) ^ Multiply(d, 0x0d);
+ (*state)[i][2] = Multiply(a, 0x0d) ^ Multiply(b, 0x09) ^ Multiply(c, 0x0e) ^ Multiply(d, 0x0b);
+ (*state)[i][3] = Multiply(a, 0x0b) ^ Multiply(b, 0x0d) ^ Multiply(c, 0x09) ^ Multiply(d, 0x0e);
+ }
+}
+
+
+// The SubBytes Function Substitutes the values in the
+// state matrix with values in an S-box.
+static void InvSubBytes(state_t* state)
+{
+ uint8_t i, j;
+ for (i = 0; i < 4; ++i)
+ {
+ for (j = 0; j < 4; ++j)
+ {
+ (*state)[j][i] = getSBoxInvert((*state)[j][i]);
+ }
+ }
+}
+
+static void InvShiftRows(state_t* state)
+{
+ uint8_t temp;
+
+ // Rotate first row 1 columns to right
+ temp = (*state)[3][1];
+ (*state)[3][1] = (*state)[2][1];
+ (*state)[2][1] = (*state)[1][1];
+ (*state)[1][1] = (*state)[0][1];
+ (*state)[0][1] = temp;
+
+ // Rotate second row 2 columns to right
+ temp = (*state)[0][2];
+ (*state)[0][2] = (*state)[2][2];
+ (*state)[2][2] = temp;
+
+ temp = (*state)[1][2];
+ (*state)[1][2] = (*state)[3][2];
+ (*state)[3][2] = temp;
+
+ // Rotate third row 3 columns to right
+ temp = (*state)[0][3];
+ (*state)[0][3] = (*state)[1][3];
+ (*state)[1][3] = (*state)[2][3];
+ (*state)[2][3] = (*state)[3][3];
+ (*state)[3][3] = temp;
+}
+
+
+// Cipher is the main function that encrypts the PlainText.
+static void Cipher(state_t* state, uint8_t* RoundKey)
+{
+ uint8_t round = 0;
+
+ // Add the First round key to the state before starting the rounds.
+ AddRoundKey(0, state, RoundKey);
+
+ // There will be Nr rounds.
+ // The first Nr-1 rounds are identical.
+ // These Nr-1 rounds are executed in the loop below.
+ for (round = 1; round < Nr; ++round)
+ {
+ SubBytes(state);
+ ShiftRows(state);
+ MixColumns(state);
+ AddRoundKey(round, state, RoundKey);
+ }
+
+ // The last round is given below.
+ // The MixColumns function is not here in the last round.
+ SubBytes(state);
+ ShiftRows(state);
+ AddRoundKey(Nr, state, RoundKey);
+}
+
+static void InvCipher(state_t* state, uint8_t* RoundKey)
+{
+ uint8_t round = 0;
+
+ // Add the First round key to the state before starting the rounds.
+ AddRoundKey(Nr, state, RoundKey);
+
+ // There will be Nr rounds.
+ // The first Nr-1 rounds are identical.
+ // These Nr-1 rounds are executed in the loop below.
+ for (round = (Nr - 1); round > 0; --round)
+ {
+ InvShiftRows(state);
+ InvSubBytes(state);
+ AddRoundKey(round, state, RoundKey);
+ InvMixColumns(state);
+ }
+
+ // The last round is given below.
+ // The MixColumns function is not here in the last round.
+ InvShiftRows(state);
+ InvSubBytes(state);
+ AddRoundKey(0, state, RoundKey);
+}
+
+
+/*****************************************************************************/
+/* Public functions: */
+/*****************************************************************************/
+#if defined(ECB) && (ECB == 1)
+
+
+void AES_ECB_encrypt(struct AES_ctx *ctx, const uint8_t* buf)
+{
+ // The next function call encrypts the PlainText with the Key using AES algorithm.
+ Cipher((state_t*)buf, ctx->RoundKey);
+}
+
+void AES_ECB_decrypt(struct AES_ctx* ctx, const uint8_t* buf)
+{
+ // The next function call decrypts the PlainText with the Key using AES algorithm.
+ InvCipher((state_t*)buf, ctx->RoundKey);
+}
+
+
+#endif // #if defined(ECB) && (ECB == 1)
+
+
+
+
+
+#if defined(CBC) && (CBC == 1)
+
+
+static void XorWithIv(uint8_t* buf, uint8_t* Iv)
+{
+ uint8_t i;
+ for (i = 0; i < AES_BLOCKLEN; ++i) // The block in AES is always 128bit no matter the key size
+ {
+ buf[i] ^= Iv[i];
+ }
+}
+
+void AES_CBC_encrypt_buffer(struct AES_ctx *ctx, uint8_t* buf, uint32_t length)
+{
+ uintptr_t i;
+ uint8_t *Iv = ctx->Iv;
+ for (i = 0; i < length; i += AES_BLOCKLEN)
+ {
+ XorWithIv(buf, Iv);
+ Cipher((state_t*)buf, ctx->RoundKey);
+ Iv = buf;
+ buf += AES_BLOCKLEN;
+ //printf("Step %d - %d", i/16, i);
+ }
+ /* store Iv in ctx for next call */
+ memcpy(ctx->Iv, Iv, AES_BLOCKLEN);
+}
+
+void AES_CBC_decrypt_buffer(struct AES_ctx* ctx, uint8_t* buf, uint32_t length)
+{
+ uintptr_t i;
+ uint8_t storeNextIv[AES_BLOCKLEN];
+ for (i = 0; i < length; i += AES_BLOCKLEN)
+ {
+ memcpy(storeNextIv, buf, AES_BLOCKLEN);
+ InvCipher((state_t*)buf, ctx->RoundKey);
+ XorWithIv(buf, ctx->Iv);
+ memcpy(ctx->Iv, storeNextIv, AES_BLOCKLEN);
+ buf += AES_BLOCKLEN;
+ }
+
+}
+
+#endif // #if defined(CBC) && (CBC == 1)
+
+
+
+#if defined(CTR) && (CTR == 1)
+
+/* Symmetrical operation: same function for encrypting as for decrypting. Note any IV/nonce should never be reused with the same key */
+void AES_CTR_xcrypt_buffer(struct AES_ctx* ctx, uint8_t* buf, uint32_t length)
+{
+ uint8_t buffer[AES_BLOCKLEN];
+
+ unsigned i;
+ int bi;
+ for (i = 0, bi = AES_BLOCKLEN; i < length; ++i, ++bi)
+ {
+ if (bi == AES_BLOCKLEN) /* we need to regen xor compliment in buffer */
+ {
+
+ memcpy(buffer, ctx->Iv, AES_BLOCKLEN);
+ Cipher((state_t*)buffer, ctx->RoundKey);
+
+ /* Increment Iv and handle overflow */
+ for (bi = (AES_BLOCKLEN - 1); bi >= 0; --bi)
+ {
+ /* inc will owerflow */
+ if (ctx->Iv[bi] == 255)
+ {
+ ctx->Iv[bi] = 0;
+ continue;
+ }
+ ctx->Iv[bi] += 1;
+ break;
+ }
+ bi = 0;
+ }
+
+ buf[i] = (buf[i] ^ buffer[bi]);
+ }
+}
+
+#endif // #if defined(CTR) && (CTR == 1)
+
diff --git a/tools/gzinject/src/aes.h b/tools/gzinject/src/aes.h
new file mode 100644
index 000000000..d1a468630
--- /dev/null
+++ b/tools/gzinject/src/aes.h
@@ -0,0 +1,90 @@
+#ifndef _AES_H_
+#define _AES_H_
+
+#include
+
+// #define the macros below to 1/0 to enable/disable the mode of operation.
+//
+// CBC enables AES encryption in CBC-mode of operation.
+// CTR enables encryption in counter-mode.
+// ECB enables the basic ECB 16-byte block algorithm. All can be enabled simultaneously.
+
+// The #ifndef-guard allows it to be configured before #include'ing or at compile time.
+#ifndef CBC
+#define CBC 1
+#endif
+
+#ifndef ECB
+#define ECB 1
+#endif
+
+#ifndef CTR
+#define CTR 1
+#endif
+
+
+#define AES128 1
+//#define AES192 1
+//#define AES256 1
+
+#define AES_BLOCKLEN 16 //Block length in bytes AES is 128b block only
+
+#if defined(AES256) && (AES256 == 1)
+#define AES_KEYLEN 32
+#define AES_keyExpSize 240
+#elif defined(AES192) && (AES192 == 1)
+#define AES_KEYLEN 24
+#define AES_keyExpSize 208
+#else
+#define AES_KEYLEN 16 // Key length in bytes
+#define AES_keyExpSize 176
+#endif
+
+struct AES_ctx
+{
+ uint8_t RoundKey[AES_keyExpSize];
+#if (defined(CBC) && (CBC == 1)) || (defined(CTR) && (CTR == 1))
+ uint8_t Iv[AES_BLOCKLEN];
+#endif
+};
+
+void AES_init_ctx(struct AES_ctx* ctx, const uint8_t* key);
+#if defined(CBC) && (CBC == 1)
+void AES_init_ctx_iv(struct AES_ctx* ctx, const uint8_t* key, const uint8_t* iv);
+void AES_ctx_set_iv(struct AES_ctx* ctx, const uint8_t* iv);
+#endif
+
+#if defined(ECB) && (ECB == 1)
+// buffer size is exactly AES_BLOCKLEN bytes;
+// you need only AES_init_ctx as IV is not used in ECB
+// NB: ECB is considered insecure for most uses
+void AES_ECB_encrypt(struct AES_ctx* ctx, const uint8_t* buf);
+void AES_ECB_decrypt(struct AES_ctx* ctx, const uint8_t* buf);
+
+#endif // #if defined(ECB) && (ECB == !)
+
+
+#if defined(CBC) && (CBC == 1)
+// buffer size MUST be mutile of AES_BLOCKLEN;
+// Suggest https://en.wikipedia.org/wiki/Padding_(cryptography)#PKCS7 for padding scheme
+// NOTES: you need to set IV in ctx via AES_init_ctx_iv() or AES_ctx_set_iv()
+// no IV should ever be reused with the same key
+void AES_CBC_encrypt_buffer(struct AES_ctx* ctx, uint8_t* buf, uint32_t length);
+void AES_CBC_decrypt_buffer(struct AES_ctx* ctx, uint8_t* buf, uint32_t length);
+
+#endif // #if defined(CBC) && (CBC == 1)
+
+
+#if defined(CTR) && (CTR == 1)
+
+// Same function for encrypting as for decrypting.
+// IV is incremented for every block, and used after encryption as XOR-compliment for output
+// Suggesting https://en.wikipedia.org/wiki/Padding_(cryptography)#PKCS7 for padding scheme
+// NOTES: you need to set IV in ctx with AES_init_ctx_iv() or AES_ctx_set_iv()
+// no IV should ever be reused with the same key
+void AES_CTR_xcrypt_buffer(struct AES_ctx* ctx, uint8_t* buf, uint32_t length);
+
+#endif // #if defined(CTR) && (CTR == 1)
+
+
+#endif //_AES_H_
\ No newline at end of file
diff --git a/tools/gzinject/src/doltool.c b/tools/gzinject/src/doltool.c
new file mode 100644
index 000000000..dbc48e451
--- /dev/null
+++ b/tools/gzinject/src/doltool.c
@@ -0,0 +1,99 @@
+#include
+#include
+#include
+#include
+
+#include "doltool.h"
+#include "gzinject.h"
+
+void dol_load(doltool_ctxt_t *ctxt, uint8_t **file_data, uint32_t *file_size){
+
+ ctxt->file_data = file_data;
+ ctxt->file_size = file_size;
+
+ memcpy(&ctxt->hdr,*(ctxt->file_data),sizeof(ctxt->hdr));
+
+ for(int i=0;i<7;i++){
+ ctxt->hdr.text_size[i] = REVERSEENDIAN32(ctxt->hdr.text_size[i]);
+ ctxt->hdr.text_offset[i] = REVERSEENDIAN32(ctxt->hdr.text_offset[i]);
+ ctxt->hdr.text_loading[i] = REVERSEENDIAN32(ctxt->hdr.text_loading[i]);
+ if(ctxt->hdr.text_size[i]>0){
+ ctxt->text_sections[i] = *(ctxt->file_data) + ctxt->hdr.text_offset[i];
+ }
+ }
+ for(int i=0;i<11;i++){
+ ctxt->hdr.data_size[i] = REVERSEENDIAN32(ctxt->hdr.data_size[i]);
+ ctxt->hdr.data_offset[i] = REVERSEENDIAN32(ctxt->hdr.data_offset[i]);
+ ctxt->hdr.data_loading[i] = REVERSEENDIAN32(ctxt->hdr.data_loading[i]);
+ if(ctxt->hdr.data_size[i]>0){
+ ctxt->data_sections[i] = *(ctxt->file_data) + ctxt->hdr.data_offset[i];
+ }
+ }
+}
+
+void dol_inject(doltool_ctxt_t *ctxt, uint8_t *text, size_t size, uint32_t loading_addr){
+ int injection_idx = 0;
+ size = addpadding(size,16);
+ for(int i = 0;i<7;i++){
+ if(ctxt->text_sections[i]) continue;
+ injection_idx = i;
+ ctxt->text_sections[i] = text;
+ ctxt->hdr.text_loading[i] = loading_addr;
+ ctxt->hdr.text_offset[i] = ctxt->hdr.data_offset[0];
+ ctxt->hdr.text_size[i] = size;
+ break;
+ }
+ for(int i=0;i<11;i++){
+ if(ctxt->data_sections[i]){
+ ctxt->hdr.data_offset[i] += ctxt->hdr.text_size[injection_idx];
+ }else{
+ break;
+ }
+ }
+}
+
+size_t dol_save(doltool_ctxt_t *ctxt){
+ uint32_t text_sizes[7];
+ uint32_t data_sizes[11];
+ memcpy(text_sizes,ctxt->hdr.text_size,sizeof(ctxt->hdr.text_size));
+ memcpy(data_sizes,ctxt->hdr.data_size,sizeof(ctxt->hdr.data_size));
+ uint32_t totalsize = 0x100;
+ for(int i=0;i<7;i++){
+ totalsize += ctxt->hdr.text_size[i];
+ ctxt->hdr.text_size[i] = REVERSEENDIAN32(ctxt->hdr.text_size[i]);
+ ctxt->hdr.text_offset[i] = REVERSEENDIAN32(ctxt->hdr.text_offset[i]);
+ ctxt->hdr.text_loading[i] = REVERSEENDIAN32(ctxt->hdr.text_loading[i]);
+ }
+ for(int i=0;i<11;i++){
+ totalsize += ctxt->hdr.data_size[i];
+ ctxt->hdr.data_size[i] = REVERSEENDIAN32(ctxt->hdr.data_size[i]);
+ ctxt->hdr.data_offset[i] = REVERSEENDIAN32(ctxt->hdr.data_offset[i]);
+ ctxt->hdr.data_loading[i] = REVERSEENDIAN32(ctxt->hdr.data_loading[i]);
+ }
+
+ uint8_t *new_data = malloc(totalsize);
+ if(!new_data){
+ perror("Could not allocate new dol");
+ return 0;
+ }
+ memcpy(new_data,&ctxt->hdr,sizeof(ctxt->hdr));
+ uint8_t *p = new_data + sizeof(ctxt->hdr);
+ for(int i=0;i<7;i++){
+ if(ctxt->text_sections[i]){
+ memcpy(p,ctxt->text_sections[i],text_sizes[i]);
+ p += text_sizes[i];
+ }
+ }
+ for(int i=0;i<11;i++){
+ if(ctxt->data_sections[i]){
+ memcpy(p,ctxt->data_sections[i],data_sizes[i]);
+ p += data_sizes[i];
+ }
+ }
+ free(*(ctxt->file_data));
+ *(ctxt->file_data) = new_data;
+ if(ctxt->file_size){
+ *(ctxt->file_size) = totalsize;
+ }
+ return totalsize;
+}
\ No newline at end of file
diff --git a/tools/gzinject/src/doltool.h b/tools/gzinject/src/doltool.h
new file mode 100644
index 000000000..3c30194e9
--- /dev/null
+++ b/tools/gzinject/src/doltool.h
@@ -0,0 +1,31 @@
+#ifndef _DOLTOOL_H
+#define _DOLTOOL_H
+
+#include
+
+typedef struct {
+ uint32_t text_offset[7]; /* 0x000 */
+ uint32_t data_offset[11]; /* 0x01C */
+ uint32_t text_loading[7]; /* 0x048 */
+ uint32_t data_loading[11]; /* 0x064 */
+ uint32_t text_size[7]; /* 0x090 */
+ uint32_t data_size[11]; /* 0x0AC */
+ uint32_t bss_loading; /* 0x0D8 */
+ uint32_t bss_size; /* 0x0DC */
+ uint32_t entry; /* 0x0E0 */
+ char padding[0x1C]; /* 0x0E4 */
+} dol_hdr_t; /* 0x100 */
+
+typedef struct{
+ dol_hdr_t hdr;
+ uint8_t *text_sections[7];
+ uint8_t *data_sections[11];
+ uint8_t **file_data;
+ uint32_t *file_size;
+} doltool_ctxt_t;
+
+void dol_load(doltool_ctxt_t *ctxt, uint8_t **file_data, uint32_t *file_size);
+void dol_inject(doltool_ctxt_t *ctxt, uint8_t *text, size_t size, uint32_t loading_addr);
+size_t dol_save(doltool_ctxt_t *ctxt);
+
+#endif
\ No newline at end of file
diff --git a/tools/gzinject/src/fastaes.c b/tools/gzinject/src/fastaes.c
new file mode 100644
index 000000000..ed6e9ae4b
--- /dev/null
+++ b/tools/gzinject/src/fastaes.c
@@ -0,0 +1,115 @@
+#ifdef FASTAES
+#include
+#include "fastaes.h"
+
+static __m128i do_key_exp(__m128i a, __m128i b) {
+ __m128i tmp;
+
+ b = _mm_shuffle_epi32(b, 0xFF);
+ tmp = _mm_slli_si128(a, 4);
+ a = _mm_xor_si128(a, tmp);
+ tmp = _mm_slli_si128(a, 4);
+ a = _mm_xor_si128(a, tmp);
+ tmp = _mm_slli_si128(a, 4);
+ a = _mm_xor_si128(a, tmp);
+ a = _mm_xor_si128(a, b);
+
+ return a;
+}
+
+static void key_expansion(const uint8_t *key, __m128i *key_sched, __m128i *dkey_sched) {
+ key_sched[0] = _mm_loadu_si128((const __m128i_u*)key);
+ key_sched[1] = do_key_exp(key_sched[0], _mm_aeskeygenassist_si128(key_sched[0], 0x01));
+ key_sched[2] = do_key_exp(key_sched[1], _mm_aeskeygenassist_si128(key_sched[1], 0x02));
+ key_sched[3] = do_key_exp(key_sched[2], _mm_aeskeygenassist_si128(key_sched[2], 0x04));
+ key_sched[4] = do_key_exp(key_sched[3], _mm_aeskeygenassist_si128(key_sched[3], 0x08));
+ key_sched[5] = do_key_exp(key_sched[4], _mm_aeskeygenassist_si128(key_sched[4], 0x10));
+ key_sched[6] = do_key_exp(key_sched[5], _mm_aeskeygenassist_si128(key_sched[5], 0x20));
+ key_sched[7] = do_key_exp(key_sched[6], _mm_aeskeygenassist_si128(key_sched[6], 0x40));
+ key_sched[8] = do_key_exp(key_sched[7], _mm_aeskeygenassist_si128(key_sched[7], 0x80));
+ key_sched[9] = do_key_exp(key_sched[8], _mm_aeskeygenassist_si128(key_sched[8], 0x1B));
+ key_sched[10] = do_key_exp(key_sched[9], _mm_aeskeygenassist_si128(key_sched[9], 0x36));
+
+ dkey_sched[0] = key_sched[0];
+ dkey_sched[1] = _mm_aesimc_si128(key_sched[1]);
+ dkey_sched[2] = _mm_aesimc_si128(key_sched[2]);
+ dkey_sched[3] = _mm_aesimc_si128(key_sched[3]);
+ dkey_sched[4] = _mm_aesimc_si128(key_sched[4]);
+ dkey_sched[5] = _mm_aesimc_si128(key_sched[5]);
+ dkey_sched[6] = _mm_aesimc_si128(key_sched[6]);
+ dkey_sched[7] = _mm_aesimc_si128(key_sched[7]);
+ dkey_sched[8] = _mm_aesimc_si128(key_sched[8]);
+ dkey_sched[9] = _mm_aesimc_si128(key_sched[9]);
+ dkey_sched[10] = key_sched[10];
+
+}
+
+void aes_ctx_init(aes_ctxt_t *ctx, const uint8_t *key, const uint8_t *iv) {
+ memcpy(ctx->iv, iv, sizeof(ctx->iv));
+ key_expansion(key, ctx->key_schedule, ctx->dkey_schedule);
+}
+
+static __m128i cipher(__m128i state, __m128i *key_sched) {
+ state = _mm_xor_si128(state, key_sched[0]);
+
+ for(int i = 1; i < 10; i++) {
+ state = _mm_aesenc_si128(state, key_sched[i]);
+ }
+
+ return _mm_aesenclast_si128(state, key_sched[10]);
+}
+
+static __m128i inv_cipher(__m128i state, __m128i *key_sched) {
+ state = _mm_xor_si128(state, key_sched[10]);
+ state = _mm_aesdec_si128(state, key_sched[9]);
+ state = _mm_aesdec_si128(state, key_sched[8]);
+ state = _mm_aesdec_si128(state, key_sched[7]);
+ state = _mm_aesdec_si128(state, key_sched[6]);
+ state = _mm_aesdec_si128(state, key_sched[5]);
+ state = _mm_aesdec_si128(state, key_sched[4]);
+ state = _mm_aesdec_si128(state, key_sched[3]);
+ state = _mm_aesdec_si128(state, key_sched[2]);
+ state = _mm_aesdec_si128(state, key_sched[1]);
+
+ state = _mm_aesdeclast_si128(state, key_sched[0]);
+
+ return state;
+}
+
+void aes_encrypt_buffer(aes_ctxt_t *ctx, uint8_t *buffer, size_t len) {
+ __m128i iv = _mm_loadu_si128((const __m128i*)ctx->iv);
+ __m128i state;
+
+ for(int i = 0; i < len; i += 16) {
+ state = _mm_loadu_si128((const __m128i*)buffer);
+ state = _mm_xor_si128(state, iv);
+ state = cipher(state, ctx->key_schedule);
+ _mm_storeu_si128((__m128i_u*)buffer, state);
+ iv = state;
+
+ buffer += 16;
+ }
+
+ _mm_storeu_si128((__m128i_u*)&ctx->state, state);
+ _mm_storeu_si128((__m128i_u*)ctx->iv, iv);
+}
+
+void aes_decrypt_buffer(aes_ctxt_t *ctx, uint8_t *buffer, size_t len) {
+ __m128i state;
+ __m128i iv = _mm_loadu_si128((const __m128i_u*)ctx->iv);
+ __m128i next_iv;
+
+ for(int i = 0; i < len; i += 16) {
+ state = _mm_loadu_si128((const __m128i_u*)buffer);
+ next_iv = state;
+ state = inv_cipher(state, ctx->dkey_schedule);
+ state = _mm_xor_si128(state, iv);
+ iv = next_iv;
+ _mm_storeu_si128((__m128i_u*)buffer, state);
+ buffer += 16;
+ }
+
+ _mm_storeu_si128((__m128i_u*)&ctx->state, state);
+ _mm_storeu_si128((__m128i_u*)ctx->iv, iv);
+}
+#endif
diff --git a/tools/gzinject/src/fastaes.h b/tools/gzinject/src/fastaes.h
new file mode 100644
index 000000000..3080751fb
--- /dev/null
+++ b/tools/gzinject/src/fastaes.h
@@ -0,0 +1,27 @@
+#ifdef FASTAES
+#ifndef _FASTAES_H
+#define _FASTAES_H
+
+#include
+#include
+
+/**
+ * fast aes for x86/x86-64 processors.
+ */
+
+typedef uint8_t state_t[4][4];
+
+typedef struct {
+ state_t state;
+ uint8_t iv[16];
+ // gzinject only cares about aes128
+ __m128i key_schedule[11];
+ __m128i dkey_schedule[11];
+} aes_ctxt_t;
+
+void aes_ctx_init(aes_ctxt_t *ctx, const uint8_t *key, const uint8_t *iv);
+void aes_encrypt_buffer(aes_ctxt_t *ctx, uint8_t *buffer, size_t len);
+void aes_decrypt_buffer(aes_ctxt_t *ctx, uint8_t *buffer, size_t len);
+
+#endif
+#endif
diff --git a/tools/gzinject/src/gzi.c b/tools/gzinject/src/gzi.c
new file mode 100644
index 000000000..95768c217
--- /dev/null
+++ b/tools/gzinject/src/gzi.c
@@ -0,0 +1,218 @@
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include "gzi.h"
+#include "lz77.h"
+#include "gzinject.h"
+
+typedef int (*gzi_action_t)(gzi_ctxt_t *ctxt, int pos);
+
+static int gzi_cmd_file(gzi_ctxt_t *ctxt, int pos){
+ ctxt->curfile = ctxt->codes[pos].data & 0xFF;
+ if(verbose){
+ printf("Setting current file to %d\n",ctxt->curfile);
+ }
+ return 1;
+}
+
+static int gzi_cmd_lz77_decomp(gzi_ctxt_t *ctxt, int pos){
+ int32_t curfile = ctxt->curfile;
+ if(curfile<0){
+ printf("Warning: No file Selected, not decompressing.\n");
+ return 0;
+ }
+ if(verbose){
+ printf("LZ77 Decompressing %d\n",curfile);
+ }
+ int decompsize = addpadding(lz77_decompressed_size(ctxt->file_ptrs[curfile]),16);
+ uint8_t *decomp = calloc(decompsize,1);
+ lz77_decompress(ctxt->file_ptrs[curfile],decomp);
+ free(ctxt->file_ptrs[curfile]);
+ ctxt->file_ptrs[curfile] = decomp;
+ ctxt->file_sizes[curfile] = decompsize;
+ return 1;
+}
+
+static int gzi_cmd_lz77_comp(gzi_ctxt_t *ctxt, int pos){
+ int32_t curfile = ctxt->curfile;
+ if(curfile<0){
+ printf("Warning: No file selected, not compressing.\n");
+ return 0;
+ }
+ if(verbose){
+ printf("LZ77 Compressing %d\n",curfile);
+ }
+ uint8_t *comp = NULL;
+ uint32_t len = ctxt->file_sizes[curfile];
+
+ // I hate this, but it works for now.
+ len -= (8 - (len & 0x8));
+ int complen = lz77_compress(ctxt->file_ptrs[curfile],&comp,len,&len);
+ free(ctxt->file_ptrs[curfile]);
+ ctxt->file_ptrs[curfile] = comp;
+ ctxt->file_sizes[curfile] = complen;
+ return 1;
+}
+
+static int gzi_cmd_apply_patch(gzi_ctxt_t *ctxt, int pos){
+ int32_t curfile = ctxt->curfile;
+ if(curfile<0){
+ printf("Warning: No file selected, not applying patch.\n");
+ }
+ gzi_code_t code = ctxt->codes[pos];
+ uint32_t val = code.data;
+ if(verbose){
+ printf("Apply patch to %d. offset 0x%x = 0x%x\n",curfile,code.offset,code.data);
+ }
+ uint8_t *p;
+ switch(curfile){
+ case GZI_FILE_TMD:
+ p = ctxt->tmd;
+ break;
+ case GZI_FILE_TIK:
+ p = ctxt->tik;
+ break;
+ case GZI_FILE_CERT:
+ p = ctxt->cert;
+ break;
+ default:
+ if(curfile>ctxt->filecnt-1){
+ return -1;
+ }
+ p = ctxt->file_ptrs[curfile];
+ break;
+ }
+ switch(code.len){
+ case 1:
+ *((uint8_t*)(p + code.offset)) = (uint8_t)val;
+ break;
+ case 2:
+ *((uint16_t*)(p + code.offset)) = REVERSEENDIAN16((uint16_t)val);
+ break;
+ case 4:
+ default:
+ *((uint32_t*)(p + code.offset)) = REVERSEENDIAN32(val);
+ break;
+ }
+ return 1;
+}
+
+static gzi_action_t commands[] = {
+ gzi_cmd_file,
+ gzi_cmd_lz77_decomp,
+ gzi_cmd_lz77_comp,
+ gzi_cmd_apply_patch,
+};
+
+static char *readline(FILE *fle){
+ char *line = NULL;
+ int buflen=256;
+ for(int i=0;;++i){
+ int c = fgetc(fle);
+
+ if(i%buflen==0){
+ char *new = realloc(line,i+buflen);
+ line = new;
+ }
+ if(c==EOF || c=='\n'){
+ line[i] = 0;
+ return line;
+ }else{
+ line[i] = c;
+ }
+ }
+}
+
+int ishexstring(const char *string, size_t len){
+ const char *s;
+ for(s = string; *s!=0;s++){
+ if(!isxdigit(*s)){
+ return 0;
+ }
+ }
+ return s - string == len;
+}
+
+void parseline(gzi_ctxt_t *ctxt, const char *line){
+ char command[6]={0};
+ char offset[10]={0};
+ char data[10]={0};
+ sscanf(line,"%5s %9s %9s",command,offset,data);
+ if(!ishexstring(command,4) || !ishexstring(offset,8) || !ishexstring(offset,8))
+ return;
+ ctxt->codecnt++;
+ gzi_code_t *new_codes = realloc(ctxt->codes,sizeof(gzi_code_t) * ctxt->codecnt);
+ if(new_codes){
+ ctxt->codes = new_codes;
+ }
+ gzi_code_t code;
+ uint16_t cmd;
+ sscanf(command,"%"SCNx16,&cmd);
+ code.command = (cmd & 0xFF00) >> 8;
+ code.len = cmd & 0xFF;
+ sscanf(offset,"%"SCNx32,&code.offset);
+ sscanf(data,"%"SCNx32,&code.data);
+ memcpy(ctxt->codes + (ctxt->codecnt - 1),&code,sizeof(code));
+}
+
+int gzi_parse_file(gzi_ctxt_t *ctxt, const char *file){
+ FILE *fle = fopen(file,"r");
+ if(!fle){
+ fprintf(stderr,"Could not open %s, cannot parse file.\n",file);
+ }
+ if(verbose){
+ printf("Parsing gzi file %s\n",file);
+ }
+ while(!feof(fle)){
+ char *line = readline(fle);
+ if(!line){
+ fprintf(stderr,"Could not readline from gzi file %s.\n",file);
+ return 0;
+ }
+ if(line[0]=='#' || line[0]==0){
+ free(line);
+ continue;
+ }
+ parseline(ctxt,line);
+ free(line);
+ }
+ fclose(fle);
+ return 1;
+}
+
+int gzi_run(gzi_ctxt_t *ctxt){
+ if(verbose){
+ printf("Running gzi commands\n");
+ }
+ for(int i=0;icodecnt;i++){
+ commands[ctxt->codes[i].command](ctxt,i);
+ }
+ return 1;
+}
+
+int gzi_init(gzi_ctxt_t *ctxt, uint8_t **files, uint32_t *filesizes, int filecnt,
+ uint8_t *tmd, uint8_t *tik, uint8_t *cert,
+ uint32_t *tmd_size, uint32_t *tik_size, uint32_t *cert_size){
+ ctxt->codes = NULL;
+ ctxt->codecnt=0;
+ ctxt->curfile=-1;
+ ctxt->file_ptrs = files;
+ ctxt->file_sizes = filesizes;
+ ctxt->filecnt = filecnt;
+ ctxt->tmd = tmd;
+ ctxt->tik = tik;
+ ctxt->cert = cert;
+ ctxt->tmd_size = tmd_size;
+ ctxt->tik_size = tik_size;
+ ctxt->cert_size = cert_size;
+ return 1;
+}
+
+int gzi_destroy(gzi_ctxt_t *ctxt){
+ if(ctxt->codes) free(ctxt->codes);
+ return 1;
+}
\ No newline at end of file
diff --git a/tools/gzinject/src/gzi.h b/tools/gzinject/src/gzi.h
new file mode 100644
index 000000000..1b49fcfa9
--- /dev/null
+++ b/tools/gzinject/src/gzi.h
@@ -0,0 +1,39 @@
+#ifndef _PATCH_H_
+#define _PATCH_H_
+
+#include
+
+#define GZI_FILE_TMD 100
+#define GZI_FILE_TIK 101
+#define GZI_FILE_CERT 102
+
+typedef struct {
+ uint8_t command;
+ uint8_t len;
+ uint32_t offset;
+ uint32_t data;
+} gzi_code_t;
+
+typedef struct{
+ gzi_code_t *codes;
+ int codecnt;
+ int8_t curfile;
+ uint8_t **file_ptrs;
+ uint32_t *file_sizes;
+ uint8_t filecnt;
+ uint8_t *tmd;
+ uint8_t *tik;
+ uint8_t *cert;
+ uint32_t *tmd_size;
+ uint32_t *tik_size;
+ uint32_t *cert_size;
+} gzi_ctxt_t;
+
+int gzi_parse_file(gzi_ctxt_t *ctxt, const char *file);
+int gzi_run(gzi_ctxt_t *ctxt);
+int gzi_init(gzi_ctxt_t *ctxt, uint8_t **files, uint32_t *filesizes, int filecnt,
+ uint8_t *tmd, uint8_t *tik, uint8_t *cert,
+ uint32_t *tmd_size, uint32_t *tik_size, uint32_t *cert_size);
+int gzi_destroy(gzi_ctxt_t *ctxt);
+
+#endif
\ No newline at end of file
diff --git a/tools/gzinject/src/gzinject.c b/tools/gzinject/src/gzinject.c
new file mode 100644
index 000000000..1b15397ae
--- /dev/null
+++ b/tools/gzinject/src/gzinject.c
@@ -0,0 +1,1329 @@
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include "gzinject.h"
+#include "lz77.h"
+#include "u8.h"
+#include "gzi.h"
+#include "aes.h"
+#include "sha1.h"
+#include "md5.h"
+#include "romchu.h"
+#include "doltool.h"
+#include "fastaes.h"
+
+static uint8_t key[16] = {0};
+static uint8_t region = 0x03;
+
+static int cleanup = 0;
+static int content_num = 5;
+
+int verbose = 0;
+int dol_after = -1;
+
+static char *wad = NULL;
+static char *directory = NULL;
+static char *keyfile = NULL;
+static char *workingdirectory = NULL;
+static char *rom = NULL;
+static char *outwad = NULL;
+static patch_list_t *patch = NULL;
+static patch_list_t **patch_link = &patch;
+static dol_list_t *dol = NULL;
+static dol_list_t **dol_link = &dol;
+static dol_loading_list_t *dol_loading = NULL;
+static dol_loading_list_t **dol_loading_link = &dol_loading;
+static char *titleid = NULL;
+static char *channelname = NULL;
+
+uint16_t be16(const uint8_t *p)
+{
+ return (p[0] << 8) | p[1];
+}
+
+uint32_t be32(const uint8_t *p)
+{
+ return (p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3];
+}
+
+static const struct option cmdoptions[] = {
+ { "action",required_argument,0,'a' },
+ { "wad",required_argument,0,'w' },
+ { "channelid",required_argument,0,'i' },
+ { "channeltitle",required_argument,0,'t' },
+ { "help",no_argument,0,'h' },
+ { "key",required_argument,0,'k' },
+ { "region",required_argument,0,'r' },
+ { "verbose",no_argument,&verbose,1 },
+ { "directory",required_argument,0,'d' },
+ { "cleanup", no_argument,&cleanup,1},
+ { "version",no_argument,0,'v'},
+ { "rom",required_argument,0,'m'},
+ { "outputwad",required_argument,0,'o'},
+ { "patch-file",required_argument,0,'p'},
+ { "content-num",required_argument,0,'c'},
+ { "dol-inject",required_argument,0,'f'},
+ { "dol-loading",required_argument,0,'l'},
+ { "dol-after", required_argument,0,'e'},
+ { 0,0,0,0}
+};
+
+const uint8_t newkey[16] = {
+ 0x47, 0x5a, 0x49, 0x73, 0x4c, 0x69, 0x66, 0x65, 0x41, 0x6e, 0x64, 0x42, 0x65, 0x65, 0x72, 0x21
+};
+
+static SHA1_CTX sha1;
+static MD5_CTX md5;
+
+#ifdef FASTAES
+static aes_ctxt_t aes;
+static void do_encrypt(uint8_t *input, size_t size, const uint8_t *key, const uint8_t *iv) {
+ aes_ctx_init(&aes, key, iv);
+ aes_encrypt_buffer(&aes, input, size);
+}
+
+static void do_decrypt(uint8_t *input, size_t size, const uint8_t *key, const uint8_t *iv) {
+ aes_ctx_init(&aes, key, iv);
+ aes_decrypt_buffer(&aes, input, size);
+}
+
+#else
+
+static struct AES_ctx aes;
+static void do_encrypt(uint8_t *input, size_t size, const uint8_t *key, const uint8_t *iv) {
+ AES_init_ctx_iv(&aes, key, iv);
+ AES_CBC_encrypt_buffer(&aes, input, size);
+}
+
+static void do_decrypt(uint8_t *input, size_t size, const uint8_t *key, const uint8_t *iv) {
+ AES_init_ctx_iv(&aes, key, iv);
+ AES_CBC_decrypt_buffer(&aes, input, size);
+}
+#endif
+
+ static void do_sha1(uint8_t *input, uint8_t *output, size_t size) {
+ SHA1Init(&sha1);
+ SHA1Update(&sha1, input, size);
+ SHA1Final(output, &sha1);
+}
+
+ static void do_md5(uint8_t *input, uint8_t *output, size_t size) {
+ MD5_Init(&md5);
+ MD5_Update(&md5, input, size);
+ MD5_Final(output, &md5);
+}
+
+uint32_t addpadding(uint32_t inp, uint32_t padding) {
+ int ret = inp;
+ if (inp % padding != 0) {
+ ret = inp + (padding - (inp % padding));
+ }
+ return ret;
+}
+
+static uint32_t getcontentlength(uint8_t *tmd, uint32_t contentnum) {
+ uint32_t off = 0x1ec + (36 * contentnum);
+ return tmd[off + 4] << 24 |
+ tmd[off + 5] << 16 |
+ tmd[off + 6] << 8 |
+ tmd[off + 7];
+}
+
+static void setcontentlength(uint8_t *tmd, uint32_t contentnum, uint32_t size){
+ uint32_t off = 0x1ec + (36 * contentnum) + 4;
+ *((uint32_t*)(tmd + off)) = REVERSEENDIAN32(size);
+}
+
+static void removedir(const char *file);
+
+static void removefile(const char* file) {
+ struct stat sbuffer;
+ if (stat(file, &sbuffer) == 0) {
+ if ((sbuffer.st_mode & S_IFMT) == S_IFDIR) {
+ removedir(file);
+ }
+ else if ((sbuffer.st_mode & S_IFMT) == S_IFREG) {
+ if (verbose) {
+ printf("Removing %s\n", file);
+ }
+ remove(file);
+ }
+
+ }
+}
+
+static void removedir(const char *file) {
+ DIR *dir;
+ struct dirent *ent;
+ if ((dir = opendir(file)) != NULL) {
+ while ((ent = readdir(dir)) != NULL) {
+ if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0)
+ continue;
+ char *path = malloc(1000);
+ snprintf(path, 1000, "%s/%s", file, ent->d_name);
+ removefile(path);
+ free(path);
+ }
+ if (verbose) {
+ printf("Removing %s\n", file);
+ }
+ rmdir(file);
+ }
+
+}
+
+static char *removeext(char* mystr) {
+ char *retstr;
+ char *lastdot;
+ if (mystr == NULL)
+ return NULL;
+ if ((retstr = malloc(strlen(mystr) + 1)) == NULL)
+ return NULL;
+ strcpy(retstr, mystr);
+ lastdot = strrchr(retstr, '.');
+ if (lastdot != NULL)
+ *lastdot = '\0';
+ return retstr;
+}
+
+static void print_usage() {
+ puts("Usage:\n"
+ " gzinject -a extract -w SOURCEWAD [options]\n"
+ " gzinject -a pack -w DESTWAD [options]\n"
+ " gzinject -a inject -w SOURCEWAD -m ROM [options]\n"
+ " gzinject -a romc -m INROM -o OUTROM [options]\n"
+ " gzinject -a genkey [options]\n"
+ " gzinject --help\n"
+ " gzinject --version\n\n"
+ "Actions:\n"
+ " extract extracts SOURCEWAD to directory\n"
+ " pack packs directory into DESTWAD\n"
+ " inject injects rom into SOURCEWAD\n"
+ " romc decompresses a romc compressed rom\n"
+ " genkey generates wii common-key\n\n"
+ "Options:\n"
+ " -i, --channelid=ID New Channel ID For Pack and Inject actions (default: none)\n"
+ " -t, --title=title New Channel name for pack and inject actions (default: none)\n"
+ " -h, --help Prints this help message\n"
+ " -k, --key=keyfile Location of the common-key file (default: ./common-key.bin)\n"
+ " -r, --region=1-3 Region to use (default: 3)\n"
+ " --verbose Print out verbose program execution information\n"
+ " -d, --directory=directory Directory to extract contents to, or directory to read contents from (default: ./wadextract)\n"
+ " --cleanup Remove files before performing actions\n"
+ " --version Prints the current version\n"
+ " -m, --rom=rom Rom to inject for inject action (default: none), also rom to romc decompress\n"
+ " -o, --outputwad=outwad The output wad for inject actions (default: SOURCEWAD-inject.wad), also output for romc decompression\n"
+ " -p, --patch-file=patchfile gzi file to use for applying patches (default: none)\n"
+ " -c, --content=contentfile the primary content file (default: 5)\n"
+ " --dol-inject Binary data to inject into the emulator program, requires --dol-loading\n"
+ " --dol-loading The loading address for the binary specified by --dol-inject\n"
+ " --dol-after After which patch file to inject the dol, default: after all patches\n"
+ );
+}
+
+static void print_version(const char* prog) {
+ printf("%s Version ", prog);
+ printf(GZINJECT_VERSION);
+ printf("\n");
+}
+
+static void truchasign(uint8_t *data, uint8_t type, size_t len) {
+ uint16_t pos = 0x1f2;
+ if (type == W_TMD) {
+ pos = 0x1d4;
+ }
+
+ uint8_t digest[20];
+ do_sha1(data + 0x140, digest, len - 0x140);
+
+ uint16_t i;
+ if (digest[0] != 0x00) {
+ for (i = 4; i < 260; i++) {
+ data[i] = 0x00;
+ }
+ for (i = 0; i < 0xFFFF; i++) {
+ uint16_t revi = REVERSEENDIAN16(i);
+ memcpy(data + pos, &revi, 2);
+
+ do_sha1(data + 0x140, digest, len - 0x140);
+
+ if (digest[0] == 0x00) {
+ break;
+ }
+ }
+ }
+}
+
+static int do_extract() {
+ struct stat sbuffer;
+
+ if (stat(wad, &sbuffer) != 0) {
+ printf("Could not open %s\n", wad);
+ return 0;
+ }
+
+ if (verbose) {
+ printf("Extracting %s to %s\n", wad, directory);
+ }
+
+ uint8_t *data = (uint8_t*)malloc(sbuffer.st_size);
+ if(!data){
+ fprintf(stderr,"Could not allocate %ld bytes for wad\n",sbuffer.st_size);
+ return 0;
+ }
+ FILE *wadfile = fopen(wad, "rb");
+ if(!wadfile){
+ fprintf(stderr,"Could not open %s wad file\n",wad);
+ free(data);
+ return 0;
+ }
+ int bytesread = fread(data, 1, sbuffer.st_size, wadfile);
+ if(bytesread!=sbuffer.st_size || ferror(wadfile)){
+ fprintf(stderr,"Could not read total wad, or file error occured");
+ free(data);
+ fclose(wadfile);
+ return 0;
+ }
+ fclose(wadfile);
+ if (be32(&data[3]) != 0x20497300) {
+ fprintf(stderr,"%s is an invalid wad file!\n",wad);
+ free(data);
+ return 0;
+ }
+
+ uint32_t certsize = be32(data + 0x08);
+ uint32_t tiksize = be32(data + 0x10);
+ uint32_t tmdsize = be32(data + 0x14);
+ uint32_t datasize = be32(data + 0x18);
+ uint32_t footersize = be32(data + 0x1C);
+
+ uint32_t certpos = 0x40;
+ uint32_t tikpos = 0x40 + addpadding(certsize, 64);
+ uint32_t tmdpos = tikpos + addpadding(tiksize, 64);
+ uint32_t datapos = tmdpos + addpadding(tmdsize, 64);
+ uint32_t footerpos = datapos + addpadding(datasize,64);
+
+ if (cleanup == 1) removedir(directory);
+
+ stat(directory,&sbuffer);
+ if(S_ISDIR(sbuffer.st_mode)){
+ if(verbose){
+ printf("%s exists, not creating.\n",directory);
+ }
+ }else{
+ if(verbose)
+ printf("Creating %s\n",directory);
+ if(mkdir(directory, 0755)==-1){
+ fprintf(stderr,"Could not mkdir %s\n",directory);
+ free(data);
+ return 0;
+ }
+ }
+
+ if(chdir(directory)==-1){
+ fprintf(stderr,"Could not chdir to %s\n",directory);
+ free(data);
+ return 0;
+ }
+
+ uint16_t contentcount = be16(data + tmdpos + 0x1de);
+
+ if (verbose) {
+ printf("Writing cert.cert.\n");
+ }
+ FILE* outfile = fopen("cert.cert", "wb");
+ if(!outfile){
+ perror("Could not open cert.cert for writing\n");
+ free(data);
+ return 0;
+ }
+
+ fwrite(data + certpos, 1, certsize, outfile);
+ if(ferror(outfile)){
+ perror("Could not write to cert.cert\n");
+ free(data);
+ return 0;
+ }
+ fclose(outfile);
+
+ if (verbose) {
+ printf("Writing ticket.tik.\n");
+ }
+ outfile = fopen("ticket.tik", "wb");
+ if(!outfile){
+ perror("Could not open ticket.tik for writing.\n");
+ free(data);
+ return 0;
+ }
+ fwrite(data + tikpos, 1, tiksize, outfile);
+ if(ferror(outfile)){
+ perror("Could not write to ticket.tik\n");
+ free(data);
+ return 0;
+ }
+ fclose(outfile);
+
+ if (verbose) {
+ printf("Writing metadata.tmd.\n");
+ }
+ outfile = fopen("metadata.tmd", "wb");
+ if(!outfile){
+ perror("Could not open metadata.tmd for writing\n");
+ free(data);
+ return 0;
+ }
+ fwrite(data + tmdpos, 1, tmdsize, outfile);
+ if(ferror(outfile)){
+ perror("Could not write to metadata.tmd\n");
+ free(data);
+ return 0;
+ }
+ fclose(outfile);
+
+ if(verbose){
+ printf("Writing footer.bin\n");
+ }
+ outfile = fopen("footer.bin","wb");
+ if(!outfile){
+ perror("Could not open footer.bin for writing.\n");
+ free(data);
+ return 0;
+ }
+ fwrite(data + footerpos, 1, footersize, outfile);
+ if(ferror(outfile)){
+ perror("Could not write to footer.bin\n");
+ free(data);
+ return 0;
+ }
+ fclose(outfile);
+
+ uint8_t encryptedkey[16], iv[16];
+
+ uint8_t i, j;
+ for (i = 0; i < 16; i++) {
+ encryptedkey[i] = data[tikpos + 0x1bf + i];
+ }
+ for (i = 0; i < 8; i++) {
+ iv[i] = data[tikpos + 0x1dc + i];
+ iv[i + 8] = 0x00;
+ }
+ ;
+ do_decrypt(encryptedkey, 16, key, iv);
+
+ for (j = 2; j < 16; j++) iv[j] = 0x00;
+
+ uint8_t *contentpos = data + datapos;
+
+ for (i = 0; i < contentcount; i++) {
+
+ iv[0] = data[tmdpos + 0x1e8 + (0x24 * i)];
+ iv[1] = data[tmdpos + 0x1e9 + (0x24 * i)];
+
+ uint32_t size = addpadding(getcontentlength(data + tmdpos, i), 16);
+
+ if (verbose) {
+ printf("Decrypting contents %d.\n", i);
+ }
+
+ do_decrypt(contentpos, size, encryptedkey, iv);
+
+ // Main rom content file
+ if (i == content_num) {
+ if (verbose) {
+ printf("Extracting content %d uint8_t Archive.\n",content_num);
+ }
+ char dbuf[100];
+ snprintf(dbuf,100,"content%d",content_num);
+ if(!extract_u8_archive(contentpos,dbuf)){
+ perror("Could not extract u8 archive");
+ free(data);
+ return 0;
+ }
+ }
+
+ char contentname[100];
+ snprintf(contentname, 100, "content%d.app", i);
+ if (verbose) {
+ printf("Writing %s.\n", contentname);
+ }
+ outfile = fopen(contentname, "wb");
+ if(!outfile){
+ fprintf(stderr,"Could not open %s for writing\n",contentname);
+ free(data);
+ return 0;
+ }
+ fwrite(contentpos, 1, getcontentlength(data + tmdpos, i), outfile);
+ if(ferror(outfile)){
+ fprintf(stderr,"Could not write to %s\n",contentname);
+ free(data);
+ return 0;
+ }
+ fclose(outfile);
+ contentpos += addpadding(size, 64);
+ }
+ chdir("..");
+ free(data);
+ return 1;
+}
+
+static int apply_dol_patch(const char *dol_file, uint32_t loading_address, uint8_t **data, uint32_t *size){
+ if(verbose){
+ printf("Injecting dol file %s\n",dol_file);
+ }
+ struct stat sbuffer;
+ chdir(workingdirectory);
+ doltool_ctxt_t *dolctxt = calloc(1,sizeof(*dolctxt));
+ if(!dolctxt){
+ perror("Could not create dol ctxt");
+ errno = ENOMEM;
+ return -1;
+ }
+ dol_load(dolctxt,data,size);
+ FILE *inject_file = fopen(dol_file,"rb");
+ if(!inject_file){
+ free(dolctxt);
+ perror(dol_file);
+ errno = ENOENT;
+ return -1;
+ }
+ stat(dol_file,&sbuffer);
+ uint8_t *inject_data = malloc(sbuffer.st_size);
+ fread(inject_data,1,sbuffer.st_size,inject_file);
+ fclose(inject_file);
+ dol_inject(dolctxt,inject_data,sbuffer.st_size,loading_address);
+ dol_save(dolctxt);
+ free(dolctxt);
+ free(inject_data);
+ chdir(directory);
+ return 0;
+}
+
+static int do_pack() {
+ DIR *testdir = opendir(directory);
+ if (testdir) {
+ closedir(testdir);
+ }
+ else {
+ fprintf(stderr,"%s doesn't exist, or is not a directory!\n", directory);
+ return 0;
+ }
+
+ if (verbose) {
+ printf("Packing %s into %s\n", directory, wad);
+ }
+ if(chdir(directory)==-1){
+ fprintf(stderr,"Could not change directory to %s",directory);
+ return 0;
+ }
+
+ if (verbose) {
+ printf("Gathering WAD Header Information\n");
+ }
+
+ struct stat sbuffer;
+ if(stat("cert.cert", &sbuffer)!=0){
+ perror("Could not stat cert.cert\n");
+ return 0;
+ }
+ uint32_t certsize = sbuffer.st_size;
+
+ if(stat("ticket.tik", &sbuffer)!=0){
+ perror("Could not stat ticket.tik\n");
+ return 0;
+ }
+ uint32_t tiksize = sbuffer.st_size;
+
+ if(stat("metadata.tmd", &sbuffer)!=0){
+ perror("Could not stat metadata.tmd\n");
+ return 0;
+ }
+ uint32_t tmdsize = sbuffer.st_size;
+
+ if (verbose) {
+ printf("Reading cert.cert\n");
+ }
+ FILE *infile = fopen("cert.cert", "rb");
+ if(!infile){
+ perror("Could not open cert.cert for reading\n");
+ return 0;
+ }
+ uint8_t *cert = calloc(addpadding(certsize, 64), sizeof(uint8_t));
+ if(!cert){
+ fprintf(stderr,"Could not allocate %d bytes for cert\n",certsize);
+ return 0;
+ }
+ int bytesread = fread(cert, 1, certsize, infile);
+ if(bytesread!=certsize || ferror(infile)){
+ perror("Error reading from cert.cert\n");
+ free(cert);
+ return 0;
+ }
+ fclose(infile);
+
+ if (verbose) {
+ printf("Reading ticket.cert\n");
+ }
+ infile = fopen("ticket.tik", "rb");
+ if(!infile){
+ perror("Could not open ticket.tik for reading\n");
+ free(cert);
+ return 0;
+ }
+ uint8_t *tik = calloc(addpadding(tiksize, 64), sizeof(uint8_t));
+ if(!tik){
+ fprintf(stderr,"Could not allocate %d bytes for ticket\n",tiksize);
+ free(cert);
+ return 0;
+ }
+ bytesread = fread(tik, 1, tiksize, infile);
+ if(bytesread!=tiksize || ferror(infile)){
+ perror("Error reading from ticket.tik\n");
+ free(cert);
+ free(tik);
+ return 0;
+ }
+ fclose(infile);
+
+ if (verbose) {
+ printf("Reading metadata.tmd\n");
+ }
+ infile = fopen("metadata.tmd", "rb");
+ if(!infile){
+ perror("Could not open metadata.tmd for reading\n");
+ free(cert);
+ free(tik);
+ return 0;
+ }
+ uint8_t *tmd = calloc(addpadding(tmdsize, 64), sizeof(uint8_t));
+ if(!tmd){
+ fprintf(stderr,"Could not allocate %d bytes for tmd\n",tmdsize);
+ free(cert);
+ free(tik);
+ return 0;
+ }
+ bytesread = fread(tmd, 1, tmdsize, infile);
+ if(bytesread!=tmdsize || ferror(infile)){
+ perror("Error reading from tmddata.tmd\n");
+ free(cert);
+ free(tik);
+ free(tmd);
+ return 0;
+ }
+ fclose(infile);
+
+ if (verbose) {
+ printf("Generating Footer signature\n");
+ }
+ char footer[0x40] = {0};
+ sprintf(footer,"gzinject v%s https://github.com/krimtonz/gzinject", GZINJECT_VERSION);
+ uint32_t footersize = 0x40;
+
+ // Build Content5 into a .app file first
+ char dbuf[100], nbuf[100] = {0};
+ snprintf(dbuf,100,"content%d",content_num);
+ strcpy(nbuf,dbuf);
+ strcat(nbuf,".app");
+ if(verbose){
+ printf("Generating %s u8 archive\n",nbuf);
+ }
+
+ int content5len = create_u8_archive(dbuf,nbuf);
+ if(!content5len){
+ fprintf(stderr,"Could not create u8 archive from %s into %s\n",dbuf,nbuf);
+ free(cert);
+ free(tik);
+ free(tmd);
+ return 0;
+ }
+ chdir(workingdirectory);
+ chdir(directory);
+ if (verbose) {
+ printf("Modifying content metadata in the TMD\n");
+ }
+ uint16_t contentsc = be16(tmd + 0x1DE);
+ int i;
+
+ char cfname[100];
+ uint8_t **fileptrs = malloc(sizeof(*fileptrs) * contentsc);
+ if(!fileptrs){
+ perror("Could not allocate filepointers.\n");
+ free(cert);
+ free(tik);
+ free(tmd);
+ return 0;
+ }
+ uint32_t *filesizes = malloc(sizeof(*filesizes) * contentsc);
+ if(!filesizes){
+ perror("Could not allocate filesizes\n");
+ free(cert);
+ free(tik);
+ free(tmd);
+ free(fileptrs);
+ return 0;
+ }
+
+ for (i = 0; i < contentsc; i++) {
+ snprintf(cfname, 30, "content%d.app", i);
+ stat(cfname, &sbuffer);
+ filesizes[i] = addpadding(sbuffer.st_size,16);
+ fileptrs[i] = calloc(filesizes[i],1);
+ if(!fileptrs[i]){
+ fprintf(stderr,"Could not allocate %ld bytes for %s\n",sbuffer.st_size,cfname);
+ goto error;
+ }
+ infile = fopen(cfname,"rb");
+ if(!infile){
+ fprintf(stderr,"Could not open %s for reading\n",cfname);
+ goto error;
+ }
+ bytesread = fread(fileptrs[i],1,sbuffer.st_size,infile);
+ if(bytesread!=sbuffer.st_size || ferror(infile)){
+ fprintf(stderr,"Error reading from %s\n",cfname);
+ goto error;
+ }
+ fclose(infile);
+ setcontentlength(tmd,i,filesizes[i]);
+ }
+
+ int patch_idx = 0;
+ int dol_applied = 0;
+ if(dol_after>=101) dol_after-=101;
+
+ while(patch){
+ if(verbose){
+ printf("Applying %s gzi patches\n",patch->filename);
+ }
+
+ if(chdir(workingdirectory)!=0){
+ fprintf(stderr,"Could not change directory to %s",workingdirectory);
+ }
+ gzi_ctxt_t gzi;
+ if(!gzi_init(&gzi,fileptrs,filesizes,contentsc,tmd,tik,cert,&tmdsize,&tiksize,&certsize)){
+ perror("Could not initialize patch file");
+ goto error;
+
+ }
+ if(!gzi_parse_file(&gzi,patch->filename)){
+ perror("Could not parse gzi patch file");
+ goto error;
+ }
+ if(!gzi_run(&gzi)){
+ perror("Could not run gzi patch file");
+ goto error;
+ }
+ if(chdir(directory)!=0){
+ fprintf(stderr,"Could not change directory to %s",directory);
+ goto error;
+ }
+
+ for(int i=0;inext;
+ free(old_patch);
+ if(dol_after == patch_idx){
+ while(dol && dol_loading){
+ if (apply_dol_patch(dol->filename,dol_loading->loading_address,&fileptrs[1],&filesizes[1]) != 0) {
+ fprintf(stderr, "Could not inject dol patch\n");
+ goto error;
+ }
+ dol_list_t *old_dol = dol;
+ dol = dol->next;
+ free(old_dol);
+ dol_loading_list_t *old_loading = dol_loading;
+ dol_loading = dol_loading->next;
+ free(old_loading);
+ }
+ dol_applied = 1;
+ setcontentlength(tmd,1,filesizes[1]);
+ }
+ patch_idx++;
+ }
+
+ if(!dol_applied && dol && dol_loading){
+ while(dol && dol_loading){
+ if (apply_dol_patch(dol->filename,dol_loading->loading_address,&fileptrs[1],&filesizes[1]) != 0) {
+ fprintf(stderr, "Could not inject dol patch\n");
+ goto error;
+ }
+ dol_list_t *old_dol = dol;
+ dol = dol->next;
+ free(old_dol);
+ dol_loading_list_t *old_loading = dol_loading;
+ dol_loading = dol_loading->next;
+ free(old_loading);
+ }
+ setcontentlength(tmd,1,filesizes[1]);
+ }
+
+ // Change Title ID
+ if (titleid != NULL) {
+ if (verbose) {
+ printf("Changing Channel ID\n");
+ }
+ memcpy(tik + 0x1e0, titleid, 4);
+ memcpy(tmd + 0x190, titleid, 4);
+ }
+
+ if (verbose) {
+ printf("Changing region in the TMD\n");
+ }
+ // Change the Region
+ tmd[0x19d] = region;
+
+ if (verbose) {
+ printf("Changing encryption key in the ticket\n");
+ }
+ // New key
+ memcpy(tik + 0x1bf, &newkey, 16);
+
+ //Decrypt the new key
+ uint8_t newenc[16];
+ uint8_t iv[16];
+
+ for (i = 0; i < 16; i++) {
+ newenc[i] = *(tik + 0x1bf + i);
+ }
+ for (i = 0; i < 8; i++) {
+ iv[i] = *(tik + 0x1dc + i);
+ iv[i + 8] = 0x00;
+ }
+
+ do_decrypt(newenc, 16, key, iv);
+
+ int j;
+
+ for (j = 2; j < 15; j++) {
+ iv[j] = 0x00;
+ }
+
+ for (i = 0; i < contentsc; i++) {
+ uint8_t *contents = fileptrs[i];
+
+ if (i == 0) {
+ if (channelname != NULL) {
+ if (verbose) {
+ printf("Changing the Channel Name in content0.app\n");
+ }
+
+ uint16_t imetpos = -1;
+ for (j = 0; j < 400; j++) {
+ if (strcmp((char*)(contents + j),"IMET")==0) {
+ imetpos = j;
+ break;
+ }
+ }
+ if(imetpos!=-1){
+ uint16_t count = 0;
+ size_t cnamelen = strlen(channelname);
+ char namebuf[40] = {0};
+ for(j=0,count=0;count0){
+ fwrite(&padding,1,padcnt,outwadfile);
+ if(ferror(outwadfile)){
+ perror("Could not write write content padding\n");
+ goto error;
+ }
+ }
+ }
+ if (verbose) {
+ printf("Writing footer\n");
+ }
+ fwrite(footer, 1, 0x40, outwadfile);
+ if(ferror(outwadfile)){
+ perror("Could not write footer\n");
+ goto error;
+ }
+ fclose(outwadfile);
+
+
+ free(cert);
+ free(tik);
+ free(tmd);
+ for(i=0;ifilename = optarg;
+ new_patch->next = NULL;
+ *patch_link = new_patch;
+ patch_link = &new_patch->next;
+ break;
+ }
+ case 'c':
+ content_num = optarg[0] - 0x30;
+ if(content_num<0 || content_num>9) content_num=5;
+ break;
+ case 'f':
+ {
+ dol_list_t *new_dol = malloc(sizeof(*new_dol));
+ if(new_dol == NULL){
+ perror("Could not allocate dol list");
+ exit(1);
+ }
+ new_dol->filename = optarg;
+ new_dol->next = NULL;
+ *dol_link = new_dol;
+ dol_link = &new_dol->next;
+ break;
+ }
+ case 'l':{
+ char loading_address[10];
+ sscanf(optarg,"%9s",loading_address);
+ uint32_t addr;
+ sscanf(loading_address,"%"SCNx32,&addr);
+ dol_loading_list_t *new_dol_loading = malloc(sizeof(*new_dol_loading));
+ if(new_dol_loading == NULL){
+ perror("Could not allocate dol loading address.");
+ exit(1);
+ }
+ new_dol_loading->loading_address = addr;
+ new_dol_loading->next = NULL;
+ *dol_loading_link = new_dol_loading;
+ dol_loading_link = &new_dol_loading->next;
+ break;
+ }
+ case 'e': {
+ char dol_after_str[10];
+ sscanf(optarg, "%s", dol_after_str);
+ sscanf(dol_after_str, "%"SCNu32, &dol_after);
+ break;
+ }
+ default:
+ break;
+ }
+
+ }
+
+ if (action == NULL) {
+ print_usage();
+ exit(1);
+ }
+
+ if(strcmp(action, "romc") == 0){
+ romc();
+ return 0;
+ }
+
+ if (strcmp(action, "genkey") == 0){
+ genkey();
+ return 0;
+ }
+
+ if (strcmp(action, "extract") != 0 && strcmp(action, "pack") != 0 && strcmp(action, "inject") != 0) {
+ print_usage();
+ exit(1);
+ }
+
+ if (wad == NULL) {
+ print_usage();
+ exit(1);
+ }
+
+ if (directory == NULL) directory = "wadextract";
+
+ struct stat sbuffer;
+ if (keyfile == NULL) {
+ if (stat("key.bin", &sbuffer) == 0) {
+ keyfile = "key.bin";
+ }
+ else if (stat("common-key.bin", &sbuffer) == 0) {
+ keyfile = "common-key.bin";
+ }
+ else {
+ printf("Cannot find key.bin or common-key.bin.\n");
+ exit(1);
+ }
+ }
+ else {
+ if (stat(keyfile, &sbuffer) != 0) {
+ printf("Cannot find keyfile specified.\n");
+ exit(1);
+ }
+ }
+
+ FILE *fkeyfile = fopen(keyfile, "rb");
+ if(!fkeyfile){
+ perror("Could not open keyfile");
+ exit(1);
+ }
+
+ fread(&key, 1, 16, fkeyfile);
+ if(ferror(fkeyfile)){
+ perror("Could not read from keyfile.");
+ exit(1);
+ }
+ fclose(fkeyfile);
+
+ workingdirectory = malloc(200);
+ if(!workingdirectory){
+ perror("Could not allocate for working directory");
+ exit(1);
+ }
+ workingdirectory = getcwd(workingdirectory, 200);
+
+ if (strcmp(action, "extract") == 0) {
+ if(!do_extract()){
+ exit(1);
+ }
+ }
+ else if (strcmp(action, "pack") == 0) {
+ if(!do_pack()){
+ exit(1);
+ }
+ }
+ else if (strcmp(action, "inject") == 0) {
+ if (rom == NULL) {
+ printf("-a inject specified, but no rom to inject\n");
+ free(workingdirectory);
+ exit(1);
+
+ }
+ if(!do_extract()){
+ perror("Could not extract wad\n");
+ free(workingdirectory);
+ exit(1);
+ }
+
+ if (verbose) {
+ printf("Copying %s to %s/content%d/rom\n", rom, directory,content_num);
+ }
+ FILE *from = fopen(rom, "rb");
+ fseek(from, 0, SEEK_END);
+ size_t fromlen = ftell(from);
+ fseek(from, 0, SEEK_SET);
+ uint8_t *inrom = malloc(fromlen);
+ if(!inrom){
+ perror("could not allocate input rom\n");
+ free(workingdirectory);
+ exit(1);
+ }
+ fread(inrom, 1, fromlen, from);
+ fclose(from);
+
+ char *orom = malloc(200);
+ if(!orom){
+ perror("Could not allocate output rom name\n");
+ free(workingdirectory);
+ free(inrom);
+ exit(1);
+ }
+ snprintf(orom, 200, "%s/content%d/rom", directory,content_num);
+ from = fopen(orom, "wb");
+ fwrite(inrom, 1, fromlen, from);
+ fclose(from);
+ free(inrom);
+ free(orom);
+
+
+ char *wadname = removeext(wad),
+ *outname = malloc(strlen(wadname) + 12);
+ if(!outname){
+ perror("could not allocate for output wad name\n");
+ free(workingdirectory);
+ exit(1);
+ }
+ sprintf(outname, "%s-inject.wad", wadname);
+ free(wadname);
+ if (outwad == NULL) {
+ wad = outname;
+ }
+ else {
+ wad = outwad;
+ }
+
+ if(!do_pack()){
+ perror("Could not pack wad\n");
+ free(outname);
+ free(workingdirectory);
+ exit(1);
+ }
+ free(outname);
+ }
+
+ free(workingdirectory);
+ return 0;
+}
diff --git a/tools/gzinject/src/gzinject.h b/tools/gzinject/src/gzinject.h
new file mode 100644
index 000000000..493557caf
--- /dev/null
+++ b/tools/gzinject/src/gzinject.h
@@ -0,0 +1,50 @@
+#ifndef _GZINJECT_H_
+#define _GZINJECT_H_
+
+#include
+#include
+
+#define REVERSEENDIAN32(X) (((X) >> 24) & 0xff) | (((X)<<8) & 0xFF0000) | (((X) >> 8) & 0xff00) | (((X)<<24) & 0xff000000)
+#define REVERSEENDIAN16(X) (((X)>>8) & 0xff) | (((X)<<8) & 0xFF00)
+
+#define W_TIK 0x00
+#define W_TMD 0x01
+#define GZINJECT_VERSION "0.3.3"
+
+#if _WIN32
+#define mkdir(X,Y) mkdir(X)
+#define getcwd(X,Y) _getcwd(X,Y)
+#endif
+
+typedef enum{
+ FILE_DIRECTORY,
+ FILE_NORMAL
+}filetype_t;
+
+typedef struct patch_list patch_list_t;
+struct patch_list {
+ const char *filename;
+ patch_list_t *next;
+};
+
+typedef struct dol_list dol_list_t;
+struct dol_list{
+ const char *filename;
+ dol_list_t *next;
+};
+
+typedef struct dol_loading_list dol_loading_list_t;
+struct dol_loading_list{
+ uint32_t loading_address;
+ dol_loading_list_t *next;
+};
+
+ uint16_t be16(const uint8_t *p);
+
+ uint32_t be32(const uint8_t *p);
+
+uint32_t addpadding(uint32_t inp, uint32_t padding);
+
+extern int verbose;
+
+#endif
diff --git a/tools/gzinject/src/lz77.c b/tools/gzinject/src/lz77.c
new file mode 100644
index 000000000..5f9063ab3
--- /dev/null
+++ b/tools/gzinject/src/lz77.c
@@ -0,0 +1,175 @@
+#include
+#include
+#include
+#include "lz77.h"
+#include "gzinject.h"
+
+int lz77_compressed_length(uint8_t *src){
+ if(*src!=0x10){
+ return -1;
+ }
+ uint32_t size = *(uint32_t*)src >> 8;
+ uint32_t pos = 0;
+ int idx = 4;
+cloop:
+ while(pos>4 & 0xF) + 3;
+ if(((src[idx] & 0xF)<<8) + (src[idx + 1] + 1) <= pos){
+ pos+=n;
+ idx+=2;
+ }else{
+ break;
+ }
+ }else{
+ pos++;
+ idx++;
+ }
+ flags <<= 1;
+ lab++;
+ }else{
+ goto cloop;
+ }
+
+ }
+ return -1;
+ }
+ if(idx%4!=0){
+ idx+=4-idx%4;
+ }
+ return idx;
+}
+
+int lz77_decompressed_size(uint8_t *source){
+ return *(uint32_t*)(source + 1);
+}
+
+int lz77_decompress(uint8_t *src, uint8_t *dest){
+ if(*src++ != 0x10){
+ return -1;
+ }
+ int index1 = 0;
+ int num1 = src[0] + (src[1] << 8) + (src[2]<<16);
+ src+=3;
+ while(index1>4);
+ int num4 = 1 + ((src[0] & 0xF) << 8) + src[1];
+ src+=2;
+ if(num4>num1){
+ return -1;
+ }
+ for(int index3 = 0;index3=len){
+ dest[0] = -1;
+ dest[1] = 0;
+ return;
+ }
+ if(pos<2 || len-pos<2){
+ dest[0] = 0;
+ dest[1] = 0;
+ return;
+ }
+
+ int didx = 0;
+ for(int index = 1; index<0x1000 && index=0;--index){
+ if(source[pos+num]!=source[pos-d[index] + num % d[index]]){
+ if(didx>1){
+ memmove((void*)d + (sizeof(int) * index),(void*)d + (sizeof(int) * (index+1)),sizeof(int) * (didx - index - 1));
+ didx--;
+ }else{
+ flag = 0;
+ }
+ }
+ }
+ }
+ dest[0] = num;
+ dest[1] = d[0];
+}
+
+int lz77_compress(uint8_t *src, uint8_t **dest, uint32_t len, uint32_t *lenp){
+ int pos = 0;
+ int cpos = 0;
+ uint8_t *comp = calloc(len,1);
+ comp[cpos++] = 0x10;
+ uint8_t *cp = (uint8_t*)lenp;
+ for(int index=0;index<3;++index){
+ comp[cpos++]=*(uint8_t*)cp++;
+ }
+ int d[2];
+ int dbuf[0x4000];
+ while(pos 2){
+ uint8_t num2 = ((((d[0] - 3) & 0xF) << 4) + ((d[1] - 1) >> 8 & 0xF));
+ comp2[bpos++] = num2;
+ uint8_t num3 = (d[1] - 1) & 0xFF;
+ comp2[bpos++] = num3;
+ pos+=d[0];
+ num1 |= 1 << (8 - (index+1));
+ }else if(d[0]>=0){
+ comp2[bpos++] = src[pos++];
+ }else{
+ break;
+ }
+ }
+ comp[cpos++] = num1;
+ for(int i=0;i
+#include
+
+int lz77_compressed_length(uint8_t *source);
+int lz77_decompress(uint8_t *source, uint8_t *dest);
+int lz77_decompressed_size(uint8_t *source);
+int lz77_compress(uint8_t *src, uint8_t **dest, uint32_t len, uint32_t *intp);
+
+#endif
\ No newline at end of file
diff --git a/tools/gzinject/src/md5.c b/tools/gzinject/src/md5.c
new file mode 100644
index 000000000..e0affaaf0
--- /dev/null
+++ b/tools/gzinject/src/md5.c
@@ -0,0 +1,291 @@
+/*
+* This is an OpenSSL-compatible implementation of the RSA Data Security, Inc.
+* MD5 Message-Digest Algorithm (RFC 1321).
+*
+* Homepage:
+* http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5
+*
+* Author:
+* Alexander Peslyak, better known as Solar Designer
+*
+* This software was written by Alexander Peslyak in 2001. No copyright is
+* claimed, and the software is hereby placed in the public domain.
+* In case this attempt to disclaim copyright and place the software in the
+* public domain is deemed null and void, then the software is
+* Copyright (c) 2001 Alexander Peslyak and it is hereby released to the
+* general public under the following terms:
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted.
+*
+* There's ABSOLUTELY NO WARRANTY, express or implied.
+*
+* (This is a heavily cut-down "BSD license".)
+*
+* This differs from Colin Plumb's older public domain implementation in that
+* no exactly 32-bit integer data type is required (any 32-bit or wider
+* unsigned integer data type will do), there's no compile-time endianness
+* configuration, and the function prototypes match OpenSSL's. No code from
+* Colin Plumb's implementation has been reused; this comment merely compares
+* the properties of the two independent implementations.
+*
+* The primary goals of this implementation are portability and ease of use.
+* It is meant to be fast, but not as fast as possible. Some known
+* optimizations are not included to reduce source code size and avoid
+* compile-time configuration.
+*/
+
+#ifndef HAVE_OPENSSL
+
+#include
+
+#include "md5.h"
+
+/*
+* The basic MD5 functions.
+*
+* F and G are optimized compared to their RFC 1321 definitions for
+* architectures that lack an AND-NOT instruction, just like in Colin Plumb's
+* implementation.
+*/
+#define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z))))
+#define G(x, y, z) ((y) ^ ((z) & ((x) ^ (y))))
+#define H(x, y, z) (((x) ^ (y)) ^ (z))
+#define H2(x, y, z) ((x) ^ ((y) ^ (z)))
+#define I(x, y, z) ((y) ^ ((x) | ~(z)))
+
+/*
+* The MD5 transformation for all four rounds.
+*/
+#define STEP(f, a, b, c, d, x, t, s) \
+ (a) += f((b), (c), (d)) + (x) + (t); \
+ (a) = (((a) << (s)) | (((a) & 0xffffffff) >> (32 - (s)))); \
+ (a) += (b);
+
+/*
+* SET reads 4 input bytes in little-endian byte order and stores them in a
+* properly aligned word in host byte order.
+*
+* The check for little-endian architectures that tolerate unaligned memory
+* accesses is just an optimization. Nothing will break if it fails to detect
+* a suitable architecture.
+*
+* Unfortunately, this optimization may be a C strict aliasing rules violation
+* if the caller's data buffer has effective type that cannot be aliased by
+* MD5_u32plus. In practice, this problem may occur if these MD5 routines are
+* inlined into a calling function, or with future and dangerously advanced
+* link-time optimizations. For the time being, keeping these MD5 routines in
+* their own translation unit avoids the problem.
+*/
+#if defined(__i386__) || defined(__x86_64__) || defined(__vax__)
+#define SET(n) \
+ (*(MD5_u32plus *)&ptr[(n) * 4])
+#define GET(n) \
+ SET(n)
+#else
+#define SET(n) \
+ (ctx->block[(n)] = \
+ (MD5_u32plus)ptr[(n) * 4] | \
+ ((MD5_u32plus)ptr[(n) * 4 + 1] << 8) | \
+ ((MD5_u32plus)ptr[(n) * 4 + 2] << 16) | \
+ ((MD5_u32plus)ptr[(n) * 4 + 3] << 24))
+#define GET(n) \
+ (ctx->block[(n)])
+#endif
+
+/*
+* This processes one or more 64-byte data blocks, but does NOT update the bit
+* counters. There are no alignment requirements.
+*/
+static const void *body(MD5_CTX *ctx, const void *data, unsigned long size)
+{
+ const unsigned char *ptr;
+ MD5_u32plus a, b, c, d;
+ MD5_u32plus saved_a, saved_b, saved_c, saved_d;
+
+ ptr = (const unsigned char *)data;
+
+ a = ctx->a;
+ b = ctx->b;
+ c = ctx->c;
+ d = ctx->d;
+
+ do {
+ saved_a = a;
+ saved_b = b;
+ saved_c = c;
+ saved_d = d;
+
+ /* Round 1 */
+ STEP(F, a, b, c, d, SET(0), 0xd76aa478, 7)
+ STEP(F, d, a, b, c, SET(1), 0xe8c7b756, 12)
+ STEP(F, c, d, a, b, SET(2), 0x242070db, 17)
+ STEP(F, b, c, d, a, SET(3), 0xc1bdceee, 22)
+ STEP(F, a, b, c, d, SET(4), 0xf57c0faf, 7)
+ STEP(F, d, a, b, c, SET(5), 0x4787c62a, 12)
+ STEP(F, c, d, a, b, SET(6), 0xa8304613, 17)
+ STEP(F, b, c, d, a, SET(7), 0xfd469501, 22)
+ STEP(F, a, b, c, d, SET(8), 0x698098d8, 7)
+ STEP(F, d, a, b, c, SET(9), 0x8b44f7af, 12)
+ STEP(F, c, d, a, b, SET(10), 0xffff5bb1, 17)
+ STEP(F, b, c, d, a, SET(11), 0x895cd7be, 22)
+ STEP(F, a, b, c, d, SET(12), 0x6b901122, 7)
+ STEP(F, d, a, b, c, SET(13), 0xfd987193, 12)
+ STEP(F, c, d, a, b, SET(14), 0xa679438e, 17)
+ STEP(F, b, c, d, a, SET(15), 0x49b40821, 22)
+
+ /* Round 2 */
+ STEP(G, a, b, c, d, GET(1), 0xf61e2562, 5)
+ STEP(G, d, a, b, c, GET(6), 0xc040b340, 9)
+ STEP(G, c, d, a, b, GET(11), 0x265e5a51, 14)
+ STEP(G, b, c, d, a, GET(0), 0xe9b6c7aa, 20)
+ STEP(G, a, b, c, d, GET(5), 0xd62f105d, 5)
+ STEP(G, d, a, b, c, GET(10), 0x02441453, 9)
+ STEP(G, c, d, a, b, GET(15), 0xd8a1e681, 14)
+ STEP(G, b, c, d, a, GET(4), 0xe7d3fbc8, 20)
+ STEP(G, a, b, c, d, GET(9), 0x21e1cde6, 5)
+ STEP(G, d, a, b, c, GET(14), 0xc33707d6, 9)
+ STEP(G, c, d, a, b, GET(3), 0xf4d50d87, 14)
+ STEP(G, b, c, d, a, GET(8), 0x455a14ed, 20)
+ STEP(G, a, b, c, d, GET(13), 0xa9e3e905, 5)
+ STEP(G, d, a, b, c, GET(2), 0xfcefa3f8, 9)
+ STEP(G, c, d, a, b, GET(7), 0x676f02d9, 14)
+ STEP(G, b, c, d, a, GET(12), 0x8d2a4c8a, 20)
+
+ /* Round 3 */
+ STEP(H, a, b, c, d, GET(5), 0xfffa3942, 4)
+ STEP(H2, d, a, b, c, GET(8), 0x8771f681, 11)
+ STEP(H, c, d, a, b, GET(11), 0x6d9d6122, 16)
+ STEP(H2, b, c, d, a, GET(14), 0xfde5380c, 23)
+ STEP(H, a, b, c, d, GET(1), 0xa4beea44, 4)
+ STEP(H2, d, a, b, c, GET(4), 0x4bdecfa9, 11)
+ STEP(H, c, d, a, b, GET(7), 0xf6bb4b60, 16)
+ STEP(H2, b, c, d, a, GET(10), 0xbebfbc70, 23)
+ STEP(H, a, b, c, d, GET(13), 0x289b7ec6, 4)
+ STEP(H2, d, a, b, c, GET(0), 0xeaa127fa, 11)
+ STEP(H, c, d, a, b, GET(3), 0xd4ef3085, 16)
+ STEP(H2, b, c, d, a, GET(6), 0x04881d05, 23)
+ STEP(H, a, b, c, d, GET(9), 0xd9d4d039, 4)
+ STEP(H2, d, a, b, c, GET(12), 0xe6db99e5, 11)
+ STEP(H, c, d, a, b, GET(15), 0x1fa27cf8, 16)
+ STEP(H2, b, c, d, a, GET(2), 0xc4ac5665, 23)
+
+ /* Round 4 */
+ STEP(I, a, b, c, d, GET(0), 0xf4292244, 6)
+ STEP(I, d, a, b, c, GET(7), 0x432aff97, 10)
+ STEP(I, c, d, a, b, GET(14), 0xab9423a7, 15)
+ STEP(I, b, c, d, a, GET(5), 0xfc93a039, 21)
+ STEP(I, a, b, c, d, GET(12), 0x655b59c3, 6)
+ STEP(I, d, a, b, c, GET(3), 0x8f0ccc92, 10)
+ STEP(I, c, d, a, b, GET(10), 0xffeff47d, 15)
+ STEP(I, b, c, d, a, GET(1), 0x85845dd1, 21)
+ STEP(I, a, b, c, d, GET(8), 0x6fa87e4f, 6)
+ STEP(I, d, a, b, c, GET(15), 0xfe2ce6e0, 10)
+ STEP(I, c, d, a, b, GET(6), 0xa3014314, 15)
+ STEP(I, b, c, d, a, GET(13), 0x4e0811a1, 21)
+ STEP(I, a, b, c, d, GET(4), 0xf7537e82, 6)
+ STEP(I, d, a, b, c, GET(11), 0xbd3af235, 10)
+ STEP(I, c, d, a, b, GET(2), 0x2ad7d2bb, 15)
+ STEP(I, b, c, d, a, GET(9), 0xeb86d391, 21)
+
+ a += saved_a;
+ b += saved_b;
+ c += saved_c;
+ d += saved_d;
+
+ ptr += 64;
+ } while (size -= 64);
+
+ ctx->a = a;
+ ctx->b = b;
+ ctx->c = c;
+ ctx->d = d;
+
+ return ptr;
+}
+
+void MD5_Init(MD5_CTX *ctx)
+{
+ ctx->a = 0x67452301;
+ ctx->b = 0xefcdab89;
+ ctx->c = 0x98badcfe;
+ ctx->d = 0x10325476;
+
+ ctx->lo = 0;
+ ctx->hi = 0;
+}
+
+void MD5_Update(MD5_CTX *ctx, const void *data, unsigned long size)
+{
+ MD5_u32plus saved_lo;
+ unsigned long used, available;
+
+ saved_lo = ctx->lo;
+ if ((ctx->lo = (saved_lo + size) & 0x1fffffff) < saved_lo)
+ ctx->hi++;
+ ctx->hi += size >> 29;
+
+ used = saved_lo & 0x3f;
+
+ if (used) {
+ available = 64 - used;
+
+ if (size < available) {
+ memcpy(&ctx->buffer[used], data, size);
+ return;
+ }
+
+ memcpy(&ctx->buffer[used], data, available);
+ data = (const unsigned char *)data + available;
+ size -= available;
+ body(ctx, ctx->buffer, 64);
+ }
+
+ if (size >= 64) {
+ data = body(ctx, data, size & ~(unsigned long)0x3f);
+ size &= 0x3f;
+ }
+
+ memcpy(ctx->buffer, data, size);
+}
+
+#define OUT(dst, src) \
+ (dst)[0] = (unsigned char)(src); \
+ (dst)[1] = (unsigned char)((src) >> 8); \
+ (dst)[2] = (unsigned char)((src) >> 16); \
+ (dst)[3] = (unsigned char)((src) >> 24);
+
+void MD5_Final(unsigned char *result, MD5_CTX *ctx)
+{
+ unsigned long used, available;
+
+ used = ctx->lo & 0x3f;
+
+ ctx->buffer[used++] = 0x80;
+
+ available = 64 - used;
+
+ if (available < 8) {
+ memset(&ctx->buffer[used], 0, available);
+ body(ctx, ctx->buffer, 64);
+ used = 0;
+ available = 64;
+ }
+
+ memset(&ctx->buffer[used], 0, available - 8);
+
+ ctx->lo <<= 3;
+ OUT(&ctx->buffer[56], ctx->lo)
+ OUT(&ctx->buffer[60], ctx->hi)
+
+ body(ctx, ctx->buffer, 64);
+
+ OUT(&result[0], ctx->a)
+ OUT(&result[4], ctx->b)
+ OUT(&result[8], ctx->c)
+ OUT(&result[12], ctx->d)
+
+ memset(ctx, 0, sizeof(*ctx));
+}
+
+#endif
\ No newline at end of file
diff --git a/tools/gzinject/src/md5.h b/tools/gzinject/src/md5.h
new file mode 100644
index 000000000..f51d33e6d
--- /dev/null
+++ b/tools/gzinject/src/md5.h
@@ -0,0 +1,45 @@
+/*
+* This is an OpenSSL-compatible implementation of the RSA Data Security, Inc.
+* MD5 Message-Digest Algorithm (RFC 1321).
+*
+* Homepage:
+* http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5
+*
+* Author:
+* Alexander Peslyak, better known as Solar Designer
+*
+* This software was written by Alexander Peslyak in 2001. No copyright is
+* claimed, and the software is hereby placed in the public domain.
+* In case this attempt to disclaim copyright and place the software in the
+* public domain is deemed null and void, then the software is
+* Copyright (c) 2001 Alexander Peslyak and it is hereby released to the
+* general public under the following terms:
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted.
+*
+* There's ABSOLUTELY NO WARRANTY, express or implied.
+*
+* See md5.c for more information.
+*/
+
+#ifdef HAVE_OPENSSL
+#include
+#elif !defined(_MD5_H)
+#define _MD5_H
+
+/* Any 32-bit or wider unsigned integer data type will do */
+typedef unsigned int MD5_u32plus;
+
+typedef struct {
+ MD5_u32plus lo, hi;
+ MD5_u32plus a, b, c, d;
+ unsigned char buffer[64];
+ MD5_u32plus block[16];
+} MD5_CTX;
+
+extern void MD5_Init(MD5_CTX *ctx);
+extern void MD5_Update(MD5_CTX *ctx, const void *data, unsigned long size);
+extern void MD5_Final(unsigned char *result, MD5_CTX *ctx);
+
+#endif
\ No newline at end of file
diff --git a/tools/gzinject/src/romchu.c b/tools/gzinject/src/romchu.c
new file mode 100644
index 000000000..aa37bb9cc
--- /dev/null
+++ b/tools/gzinject/src/romchu.c
@@ -0,0 +1,543 @@
+#include
+#include
+#include
+#include
+#include
+
+/* romchu 0.6 */
+/* a decompressor for type 2 romc */
+/* reversed by hcs from the Wii VC wad for Super Smash Bros EU. */
+/* this code is public domain, have at it */
+/* Taken from https://forum.xentax.com/viewtopic.php?t=5364 */
+
+#define VERSION "0.6"
+
+struct bitstream;
+
+struct bitstream *init_bitstream(const unsigned char *pool, unsigned long pool_size);
+uint32_t get_bits(struct bitstream *bs, int bits);
+int bitstream_eof(struct bitstream *bs);
+void free_bitstream(struct bitstream *bs);
+
+struct huftable;
+
+struct huftable *load_table(struct bitstream *bs, int symbols);
+int huf_lookup(struct bitstream *bs, struct huftable *ht);
+void free_table(struct huftable *);
+
+struct {
+ unsigned int bits;
+ unsigned int base;
+} backref_len[0x1D], backref_disp[0x1E];
+
+uint8_t *romchu_decompress(uint8_t *compressed, size_t comp_size, size_t *decomp_size){
+
+ unsigned char head_buf[4];
+ unsigned char payload_buf[0x10000];
+ int block_count = 0;
+ long out_offset = 0;
+ uint8_t *decompressed;
+
+ uint64_t nominal_size;
+ int romc_type;
+ uint8_t *comp = compressed;
+ // read header
+ {
+ memcpy(head_buf,compressed,4);
+ nominal_size = head_buf[0];
+ nominal_size *= 0x100;
+ nominal_size |= head_buf[1];
+ nominal_size *= 0x100;
+ nominal_size |= head_buf[2];
+ nominal_size *= 0x40;
+ nominal_size |= head_buf[3]>>2;
+ romc_type = head_buf[3]&0x3;
+ decompressed = malloc(nominal_size);
+ if(decomp_size) *decomp_size = nominal_size;
+ if (!decompressed)
+ {
+ perror("malloc big outbuf buffer");
+ return NULL;
+ }
+
+ switch(romc_type) {
+ case 0:
+ memcpy(decompressed, compressed + 4, *decomp_size);
+ return decompressed;
+ case 2:
+ break;
+ default:
+ fprintf(stderr, "Unsupported romc type. %d\n", romc_type);
+ return NULL;
+
+ }
+ }
+
+ // initialize backreference lookup tables
+ {
+ for (unsigned int i = 0; i < 8; i++)
+ {
+ backref_len[i].bits = 0;
+ backref_len[i].base = i;
+ }
+
+ for (unsigned int i = 8, scale = 1; scale < 6; scale++)
+ {
+ for (unsigned int k = (1<<(scale+2));
+ k < (1<<(scale+3));
+ k += (1< 0)
+ {
+ read_size ++;
+ }
+
+ if (read_size > sizeof(payload_buf))
+ {
+ fprintf(stderr, "payload too large\n");
+ free(decompressed);
+ return NULL;
+ }
+ memcpy(payload_buf,compressed,read_size);
+ compressed+=read_size;
+
+ /* attempt to parse... */
+
+ if (compression_flag)
+ {
+ uint16_t tab1_size, tab2_size;
+ uint32_t body_size;
+ unsigned long tab1_offset, tab2_offset, body_offset;
+ struct bitstream *bs;
+ struct huftable *table1, *table2;
+
+ /* read table 1 size */
+ tab1_offset = 0;
+ bs = init_bitstream(payload_buf + tab1_offset, payload_bytes*8+payload_bits);
+ tab1_size = get_bits(bs, 16);
+ free_bitstream(bs);
+
+ /* load table 1 */
+ bs = init_bitstream(payload_buf + tab1_offset + 2, tab1_size);
+ table1 = load_table(bs, 0x11D);
+ free_bitstream(bs);
+
+ /* read table 2 size */
+ tab2_offset = tab1_offset + 2 + (tab1_size+7) / 8;
+ bs = init_bitstream(payload_buf + tab2_offset, 2*8);
+ tab2_size = get_bits(bs, 16);
+ free_bitstream(bs);
+
+ /* load table 2 */
+ bs = init_bitstream(payload_buf + tab2_offset + 2, tab2_size);
+ table2 = load_table(bs, 0x1E);
+ free_bitstream(bs);
+
+ /* decode body */
+ body_offset = tab2_offset + 2 + (tab2_size+7) / 8;
+ body_size = payload_bytes*8 + payload_bits - body_offset*8;
+ bs = init_bitstream(payload_buf + body_offset, body_size);
+
+ while (!bitstream_eof(bs))
+ {
+ int symbol = huf_lookup(bs, table1);
+
+ if (symbol < 0x100)
+ {
+ /* byte literal */
+ unsigned char b = symbol;
+ if (out_offset >= nominal_size)
+ {
+ fprintf(stderr, "generated too many bytes\n");
+ free(decompressed);
+ return NULL;
+ }
+ decompressed[out_offset++] = b;
+ }
+ else
+ {
+ /* backreference */
+ unsigned int len_bits = backref_len[symbol-0x100].bits;
+ unsigned int len = backref_len[symbol-0x100].base;
+ if (len_bits > 0)
+ {
+ len += get_bits(bs, len_bits);
+ }
+ len += 3;
+
+ int symbol2 = huf_lookup(bs, table2);
+
+ unsigned int disp_bits = backref_disp[symbol2].bits;
+ unsigned int disp = backref_disp[symbol2].base;
+ if (disp_bits > 0)
+ {
+ disp += get_bits(bs, disp_bits);
+ }
+ disp ++;
+
+ if (disp > out_offset)
+ {
+ fprintf(stderr, "backreference too far\n");
+ free(decompressed);
+ return NULL;
+ }
+ if (out_offset+len > nominal_size)
+ {
+ fprintf(stderr, "generated too many bytes\n");
+ free(decompressed);
+ return NULL;
+ }
+ for (unsigned int i = 0; i < len; i++, out_offset++)
+ {
+ decompressed[out_offset] = decompressed[out_offset-disp];
+ }
+ }
+ }
+
+ free_table(table1);
+ free_table(table2);
+ free_bitstream(bs);
+ }
+ else
+ {
+ if (out_offset + payload_bytes > nominal_size)
+ {
+ fprintf(stderr, "generated too many bytes\n");
+ free(decompressed);
+ return NULL;
+ }
+ memcpy(decompressed+out_offset, payload_buf, payload_bytes);
+ out_offset += payload_bytes;
+ }
+
+ block_count ++;
+ }
+ return decompressed;
+}
+
+/* bitstream reader */
+struct bitstream
+{
+ const unsigned char *pool;
+ long bits_left;
+ uint8_t first_byte;
+ int first_byte_bits;
+};
+
+struct bitstream *init_bitstream(const unsigned char *pool, unsigned long pool_size)
+{
+ struct bitstream *bs = malloc(sizeof(struct bitstream));
+ if (!bs)
+ {
+ perror("bitstream malloc");
+ exit(EXIT_FAILURE);
+ }
+
+ bs->pool = pool;
+ bs->bits_left = pool_size;
+ bs->first_byte_bits = 0;
+
+ /* check that padding bits are 0 (to ensure we aren't ignoring anything) */
+ if (pool_size%8)
+ {
+ if (pool[pool_size/8] & ~((1<<(pool_size%8))-1))
+ {
+ fprintf(stderr, "nonzero padding at end of bitstream\n");
+ exit(EXIT_FAILURE);
+ }
+ }
+
+ return bs;
+}
+
+uint32_t get_bits(struct bitstream *bs, int bits)
+{
+ uint32_t accum = 0;
+
+ if (bits > 32)
+ {
+ fprintf(stderr, "get_bits() supports max 32\n");
+ exit(EXIT_FAILURE);
+ }
+ if (bits > bs->bits_left + bs->first_byte_bits)
+ {
+ fprintf(stderr, "get_bits() underflow\n");
+ exit(EXIT_FAILURE);
+ }
+
+ for (int i = 0; i < bits; i++)
+ {
+ if (bs->first_byte_bits == 0)
+ {
+ bs->first_byte = *bs->pool;
+ bs->pool ++;
+ if (bs->bits_left >= 8)
+ {
+ bs->first_byte_bits = 8;
+ bs->bits_left -= 8;
+ }
+ else
+ {
+ bs->first_byte_bits = bs->bits_left;
+ bs->bits_left = 0;
+ }
+ }
+
+ accum >>= 1;
+ accum |= (bs->first_byte & 1)<<31;
+ bs->first_byte >>= 1;
+ bs->first_byte_bits --;
+ }
+
+ return accum>>(32-bits);
+}
+
+int bitstream_eof(struct bitstream *bs)
+{
+ return (bs->bits_left + bs->first_byte_bits == 0);
+}
+
+void free_bitstream(struct bitstream *bs)
+{
+ free(bs);
+}
+
+/* Huffman code handling */
+struct hufnode {
+ int is_leaf;
+ union {
+ struct {
+ int left, right;
+ } inner;
+ struct {
+ int symbol;
+ } leaf;
+ } u;
+};
+struct huftable {
+ int symbols;
+ struct hufnode *t;
+};
+
+struct huftable *load_table(struct bitstream *bs, int symbols)
+{
+ int len_count[32] = {0};
+ uint32_t codes[32];
+ int *length_of = malloc(sizeof(*length_of) * symbols);
+ struct huftable *ht;
+ int next_free_node;
+
+ for (int i = 0; i < symbols; )
+ {
+ if (get_bits(bs, 1))
+ {
+ /* run of equal lengths */
+ int count = get_bits(bs, 7) + 2;
+ int length = get_bits(bs, 5);
+
+ len_count[length] += count;
+ for (int j = 0; j < count; j++, i++)
+ {
+ length_of[i] = length;
+ }
+ }
+ else
+ {
+ /* set of inequal lengths */
+ int count = get_bits(bs, 7) + 1;
+
+ for (int j = 0; j < count; j++, i++)
+ {
+ int length = get_bits(bs, 5);
+ length_of[i] = length;
+ len_count[length] ++;
+ }
+ }
+ }
+
+ if (!bitstream_eof(bs))
+ {
+ fprintf(stderr, "did not exhaust bitstream reading table\n");
+ exit(EXIT_FAILURE);
+ }
+
+ /* compute the first canonical Huffman code for each length */
+ len_count[0] = 0; // not strictly necessary
+ for (uint32_t i = 1, accum = 0; i < 32; i++)
+ {
+ accum = codes[i] = (accum + len_count[i-1]) << 1;
+ }
+
+ /* allocate space for the tree */
+ ht = malloc(sizeof(struct huftable));
+ if (!ht)
+ {
+ perror("malloc of huftable");
+ exit(EXIT_FAILURE);
+ }
+ ht->symbols = symbols;
+ ht->t = malloc(sizeof(struct hufnode) * symbols * 2);
+ if (!ht->t)
+ {
+ perror("malloc of hufnodes");
+ exit(EXIT_FAILURE);
+ }
+
+ /* determine codes and build a tree */
+ for (int i = 0; i < symbols*2; i++)
+ {
+ ht->t[i].is_leaf = 0;
+ ht->t[i].u.inner.left = ht->t[i].u.inner.right = 0;
+ }
+ next_free_node = 1;
+ for (int i = 0; i < symbols; i++)
+ {
+ int cur = 0;
+ if (0 == length_of[i])
+ {
+ // 0 length indicates absent symbol
+ continue;
+ }
+
+ for (int j = length_of[i]-1; j >= 0; j --)
+ {
+ int next;
+ if (ht->t[cur].is_leaf)
+ {
+ fprintf(stderr, "oops, walked onto a leaf\n");
+ exit(EXIT_FAILURE);
+ }
+
+ if (codes[length_of[i]]&(1<t[cur].u.inner.right;
+ if (0 == next)
+ {
+ next = ht->t[cur].u.inner.right = next_free_node ++;
+ }
+ }
+ else
+ {
+ // 0 == left
+ next = ht->t[cur].u.inner.left ;
+ if (0 == next)
+ {
+ next = ht->t[cur].u.inner.left = next_free_node ++;
+ }
+ }
+
+ cur = next;
+ }
+
+ ht->t[cur].is_leaf = 1;
+ ht->t[cur].u.leaf.symbol = i;
+
+ codes[length_of[i]] ++;
+ }
+ free(length_of);
+ return ht;
+}
+
+int huf_lookup(struct bitstream *bs, struct huftable *ht)
+{
+ int cur = 0;
+ while (!ht->t[cur].is_leaf)
+ {
+ if (get_bits(bs, 1))
+ {
+ // 1 == right
+ cur = ht->t[cur].u.inner.right;
+ }
+ else
+ {
+ // 0 == left
+ cur = ht->t[cur].u.inner.left;
+ }
+ }
+
+ return ht->t[cur].u.leaf.symbol;
+}
+
+void free_table(struct huftable *ht)
+{
+ if (ht)
+ {
+ free(ht->t);
+ }
+ free(ht);
+}
diff --git a/tools/gzinject/src/romchu.h b/tools/gzinject/src/romchu.h
new file mode 100644
index 000000000..1c2f8b838
--- /dev/null
+++ b/tools/gzinject/src/romchu.h
@@ -0,0 +1,8 @@
+#ifndef _ROMCHU_H
+#define _ROMCHU_H
+
+#include
+
+uint8_t *romchu_decompress(uint8_t *compressed, size_t comp_size, size_t *decomp_size);
+
+#endif
\ No newline at end of file
diff --git a/tools/gzinject/src/sha1.c b/tools/gzinject/src/sha1.c
new file mode 100644
index 000000000..73794062a
--- /dev/null
+++ b/tools/gzinject/src/sha1.c
@@ -0,0 +1,296 @@
+/*
+SHA-1 in C
+By Steve Reid
+100% Public Domain
+
+Test Vectors (from FIPS PUB 180-1)
+"abc"
+A9993E36 4706816A BA3E2571 7850C26C 9CD0D89D
+"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq"
+84983E44 1C3BD26E BAAE4AA1 F95129E5 E54670F1
+A million repetitions of "a"
+34AA973C D4C4DAA4 F61EEB2B DBAD2731 6534016F
+*/
+
+/* #define LITTLE_ENDIAN * This should be #define'd already, if true. */
+/* #define SHA1HANDSOFF * Copies data before messing with it. */
+
+#define SHA1HANDSOFF
+
+#include
+#include
+
+/* for uint32_t */
+#include
+
+#include "sha1.h"
+
+
+#define rol(value, bits) (((value) << (bits)) | ((value) >> (32 - (bits))))
+
+/* blk0() and blk() perform the initial expand. */
+/* I got the idea of expanding during the round function from SSLeay */
+#if BYTE_ORDER == LITTLE_ENDIAN
+#define blk0(i) (block->l[i] = (rol(block->l[i],24)&0xFF00FF00) \
+ |(rol(block->l[i],8)&0x00FF00FF))
+#elif BYTE_ORDER == BIG_ENDIAN
+#define blk0(i) block->l[i]
+#else
+#error "Endianness not defined!"
+#endif
+#define blk(i) (block->l[i&15] = rol(block->l[(i+13)&15]^block->l[(i+8)&15] \
+ ^block->l[(i+2)&15]^block->l[i&15],1))
+
+/* (R0+R1), R2, R3, R4 are the different operations used in SHA1 */
+#define R0(v,w,x,y,z,i) z+=((w&(x^y))^y)+blk0(i)+0x5A827999+rol(v,5);w=rol(w,30);
+#define R1(v,w,x,y,z,i) z+=((w&(x^y))^y)+blk(i)+0x5A827999+rol(v,5);w=rol(w,30);
+#define R2(v,w,x,y,z,i) z+=(w^x^y)+blk(i)+0x6ED9EBA1+rol(v,5);w=rol(w,30);
+#define R3(v,w,x,y,z,i) z+=(((w|x)&y)|(w&x))+blk(i)+0x8F1BBCDC+rol(v,5);w=rol(w,30);
+#define R4(v,w,x,y,z,i) z+=(w^x^y)+blk(i)+0xCA62C1D6+rol(v,5);w=rol(w,30);
+
+
+/* Hash a single 512-bit block. This is the core of the algorithm. */
+
+void SHA1Transform(
+ uint32_t state[5],
+ const unsigned char buffer[64]
+)
+{
+ uint32_t a, b, c, d, e;
+
+ typedef union
+ {
+ unsigned char c[64];
+ uint32_t l[16];
+ } CHAR64LONG16;
+
+#ifdef SHA1HANDSOFF
+ CHAR64LONG16 block[1]; /* use array to appear as a pointer */
+
+ memcpy(block, buffer, 64);
+#else
+ /* The following had better never be used because it causes the
+ * pointer-to-const buffer to be cast into a pointer to non-const.
+ * And the result is written through. I threw a "const" in, hoping
+ * this will cause a diagnostic.
+ */
+ CHAR64LONG16 *block = (const CHAR64LONG16 *)buffer;
+#endif
+ /* Copy context->state[] to working vars */
+ a = state[0];
+ b = state[1];
+ c = state[2];
+ d = state[3];
+ e = state[4];
+ /* 4 rounds of 20 operations each. Loop unrolled. */
+ R0(a, b, c, d, e, 0);
+ R0(e, a, b, c, d, 1);
+ R0(d, e, a, b, c, 2);
+ R0(c, d, e, a, b, 3);
+ R0(b, c, d, e, a, 4);
+ R0(a, b, c, d, e, 5);
+ R0(e, a, b, c, d, 6);
+ R0(d, e, a, b, c, 7);
+ R0(c, d, e, a, b, 8);
+ R0(b, c, d, e, a, 9);
+ R0(a, b, c, d, e, 10);
+ R0(e, a, b, c, d, 11);
+ R0(d, e, a, b, c, 12);
+ R0(c, d, e, a, b, 13);
+ R0(b, c, d, e, a, 14);
+ R0(a, b, c, d, e, 15);
+ R1(e, a, b, c, d, 16);
+ R1(d, e, a, b, c, 17);
+ R1(c, d, e, a, b, 18);
+ R1(b, c, d, e, a, 19);
+ R2(a, b, c, d, e, 20);
+ R2(e, a, b, c, d, 21);
+ R2(d, e, a, b, c, 22);
+ R2(c, d, e, a, b, 23);
+ R2(b, c, d, e, a, 24);
+ R2(a, b, c, d, e, 25);
+ R2(e, a, b, c, d, 26);
+ R2(d, e, a, b, c, 27);
+ R2(c, d, e, a, b, 28);
+ R2(b, c, d, e, a, 29);
+ R2(a, b, c, d, e, 30);
+ R2(e, a, b, c, d, 31);
+ R2(d, e, a, b, c, 32);
+ R2(c, d, e, a, b, 33);
+ R2(b, c, d, e, a, 34);
+ R2(a, b, c, d, e, 35);
+ R2(e, a, b, c, d, 36);
+ R2(d, e, a, b, c, 37);
+ R2(c, d, e, a, b, 38);
+ R2(b, c, d, e, a, 39);
+ R3(a, b, c, d, e, 40);
+ R3(e, a, b, c, d, 41);
+ R3(d, e, a, b, c, 42);
+ R3(c, d, e, a, b, 43);
+ R3(b, c, d, e, a, 44);
+ R3(a, b, c, d, e, 45);
+ R3(e, a, b, c, d, 46);
+ R3(d, e, a, b, c, 47);
+ R3(c, d, e, a, b, 48);
+ R3(b, c, d, e, a, 49);
+ R3(a, b, c, d, e, 50);
+ R3(e, a, b, c, d, 51);
+ R3(d, e, a, b, c, 52);
+ R3(c, d, e, a, b, 53);
+ R3(b, c, d, e, a, 54);
+ R3(a, b, c, d, e, 55);
+ R3(e, a, b, c, d, 56);
+ R3(d, e, a, b, c, 57);
+ R3(c, d, e, a, b, 58);
+ R3(b, c, d, e, a, 59);
+ R4(a, b, c, d, e, 60);
+ R4(e, a, b, c, d, 61);
+ R4(d, e, a, b, c, 62);
+ R4(c, d, e, a, b, 63);
+ R4(b, c, d, e, a, 64);
+ R4(a, b, c, d, e, 65);
+ R4(e, a, b, c, d, 66);
+ R4(d, e, a, b, c, 67);
+ R4(c, d, e, a, b, 68);
+ R4(b, c, d, e, a, 69);
+ R4(a, b, c, d, e, 70);
+ R4(e, a, b, c, d, 71);
+ R4(d, e, a, b, c, 72);
+ R4(c, d, e, a, b, 73);
+ R4(b, c, d, e, a, 74);
+ R4(a, b, c, d, e, 75);
+ R4(e, a, b, c, d, 76);
+ R4(d, e, a, b, c, 77);
+ R4(c, d, e, a, b, 78);
+ R4(b, c, d, e, a, 79);
+ /* Add the working vars back into context.state[] */
+ state[0] += a;
+ state[1] += b;
+ state[2] += c;
+ state[3] += d;
+ state[4] += e;
+ /* Wipe variables */
+ a = b = c = d = e = 0;
+#ifdef SHA1HANDSOFF
+ memset(block, '\0', sizeof(block));
+#endif
+}
+
+
+/* SHA1Init - Initialize new context */
+
+void SHA1Init(
+ SHA1_CTX * context
+)
+{
+ /* SHA1 initialization constants */
+ context->state[0] = 0x67452301;
+ context->state[1] = 0xEFCDAB89;
+ context->state[2] = 0x98BADCFE;
+ context->state[3] = 0x10325476;
+ context->state[4] = 0xC3D2E1F0;
+ context->count[0] = context->count[1] = 0;
+}
+
+
+/* Run your data through this. */
+
+void SHA1Update(
+ SHA1_CTX * context,
+ const unsigned char *data,
+ uint32_t len
+)
+{
+ uint32_t i;
+
+ uint32_t j;
+
+ j = context->count[0];
+ if ((context->count[0] += len << 3) < j)
+ context->count[1]++;
+ context->count[1] += (len >> 29);
+ j = (j >> 3) & 63;
+ if ((j + len) > 63)
+ {
+ memcpy(&context->buffer[j], data, (i = 64 - j));
+ SHA1Transform(context->state, context->buffer);
+ for (; i + 63 < len; i += 64)
+ {
+ SHA1Transform(context->state, &data[i]);
+ }
+ j = 0;
+ }
+ else
+ i = 0;
+ memcpy(&context->buffer[j], &data[i], len - i);
+}
+
+
+/* Add padding and return the message digest. */
+
+void SHA1Final(
+ unsigned char digest[20],
+ SHA1_CTX * context
+)
+{
+ unsigned i;
+
+ unsigned char finalcount[8];
+
+ unsigned char c;
+
+#if 0 /* untested "improvement" by DHR */
+ /* Convert context->count to a sequence of bytes
+ * in finalcount. Second element first, but
+ * big-endian order within element.
+ * But we do it all backwards.
+ */
+ unsigned char *fcp = &finalcount[8];
+
+ for (i = 0; i < 2; i++)
+ {
+ uint32_t t = context->count[i];
+
+ int j;
+
+ for (j = 0; j < 4; t >>= 8, j++)
+ *--fcp = (unsigned char)t
+ }
+#else
+ for (i = 0; i < 8; i++)
+ {
+ finalcount[i] = (unsigned char)((context->count[(i >= 4 ? 0 : 1)] >> ((3 - (i & 3)) * 8)) & 255); /* Endian independent */
+ }
+#endif
+ c = 0200;
+ SHA1Update(context, &c, 1);
+ while ((context->count[0] & 504) != 448)
+ {
+ c = 0000;
+ SHA1Update(context, &c, 1);
+ }
+ SHA1Update(context, finalcount, 8); /* Should cause a SHA1Transform() */
+ for (i = 0; i < 20; i++)
+ {
+ digest[i] = (unsigned char)
+ ((context->state[i >> 2] >> ((3 - (i & 3)) * 8)) & 255);
+ }
+ /* Wipe variables */
+ memset(context, '\0', sizeof(*context));
+ memset(&finalcount, '\0', sizeof(finalcount));
+}
+
+void SHA1(
+ char *hash_out,
+ const char *str,
+ int len)
+{
+ SHA1_CTX ctx;
+ unsigned int ii;
+
+ SHA1Init(&ctx);
+ for (ii = 0; ii
+100% Public Domain
+*/
+
+#include "stdint.h"
+
+typedef struct
+{
+ uint32_t state[5];
+ uint32_t count[2];
+ unsigned char buffer[64];
+} SHA1_CTX;
+
+void SHA1Transform(
+ uint32_t state[5],
+ const unsigned char buffer[64]
+);
+
+void SHA1Init(
+ SHA1_CTX * context
+);
+
+void SHA1Update(
+ SHA1_CTX * context,
+ const unsigned char *data,
+ uint32_t len
+);
+
+void SHA1Final(
+ unsigned char digest[20],
+ SHA1_CTX * context
+);
+
+void SHA1(
+ char *hash_out,
+ const char *str,
+ int len);
+
+#endif /* SHA1_H */
\ No newline at end of file
diff --git a/tools/gzinject/src/u8.c b/tools/gzinject/src/u8.c
new file mode 100644
index 000000000..fec215538
--- /dev/null
+++ b/tools/gzinject/src/u8.c
@@ -0,0 +1,248 @@
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include "u8.h"
+
+void free_nodes(node_entry_t **nodes, uint8_t nodec){
+ for(int i=0;ifilename) free(nodes[i]->filename);
+ free(nodes[i]);
+ }
+ }
+}
+
+void get_dir_contents_recursive(const char *dirname, node_entry_t ***nodes, uint8_t *idx, node_entry_t *directory, int recursion){
+ struct stat sbuffer;
+ node_entry_t **node_array = *nodes;
+ DIR *dir;
+ struct dirent *ent;
+ chdir(dirname);
+ if ((dir = opendir(".")) != NULL) {
+ while ((ent = readdir(dir)) != NULL) {
+ if(ent->d_name[0]=='.') continue;
+ uint8_t this_idx = *idx;
+ node_entry_t **new_nodes = realloc(node_array, ((this_idx+1) * sizeof(*node_array)));
+ node_array = new_nodes;
+ size_t len = strlen(ent->d_name);
+ char *name = malloc(strlen(ent->d_name) + 1);
+ strcpy(name,ent->d_name);
+ name[len] = 0;
+ node_entry_t *node = malloc(sizeof(node_entry_t));
+ node->filename = name;
+ node->directory = directory;
+ stat(name, &sbuffer);
+ node->node.size = sbuffer.st_size;
+ (*idx)++;
+ directory->node.size++;
+ if ((sbuffer.st_mode & S_IFMT) == S_IFDIR) {
+ node->node.type = 0x0001;
+ node->node.data_offset=recursion;
+ node->node.size = 0;
+ get_dir_contents_recursive(name,&node_array,idx,node,recursion+1);
+
+ }else{
+ node->node.type = 0x0000;
+ }
+ node_array[this_idx] = node;
+ }
+ closedir(dir);
+ }
+ chdir("..");
+ *nodes = node_array;
+}
+
+void sort_dir(node_entry_t **src, node_entry_t **dest, node_entry_t *dir, size_t total_cnt,int start, int *pos){
+ for(int i=start;idirectory == dir && node->node.type==0x0000){
+ dest[(*pos)++] = node;
+ }
+ }
+ for(int i=start;idirectory == dir && node->node.type==0x0001){
+ dest[(*pos)++] = node;
+ sort_dir(src,dest,node,total_cnt,start,pos);
+ node->node.size = *pos;
+ }
+ }
+}
+
+int create_u8_archive(const char *dir, const char *output){
+ // Root Directory node.
+ node_entry_t rootdirnode;
+ rootdirnode.node.data_offset = 0;
+ rootdirnode.node.type=0x0001;
+ rootdirnode.node.name_offset=0;
+ rootdirnode.node.size=0;
+ rootdirnode.directory=NULL;
+ rootdirnode.filename=".";
+
+ uint8_t nodec = 1;
+
+ node_entry_t **dirnodes = malloc(sizeof(*dirnodes));
+ dirnodes[0] = &rootdirnode;
+ get_dir_contents_recursive(dir,&dirnodes,&nodec,dirnodes[0],0);
+ dirnodes[0]->node.size = nodec+1;
+ node_entry_t **sorted = malloc(sizeof(*sorted) * nodec);
+ sorted[0] = dirnodes[0];
+ int pos = 1;
+
+ sort_dir(dirnodes,sorted,sorted[0],nodec,pos,&pos);
+ free(dirnodes);
+ uint8_t *string_table = malloc(1);
+ string_table[0] = 0;
+
+ int npos = 1 , dpos = 0, dirdepth = 0;
+
+ uint8_t *data = NULL;
+ chdir(dir);
+
+ for(int i=0;inode.name_offset = npos;
+ size_t nlen = strlen(sorted[i]->filename) + 1;
+ uint8_t *new_table = realloc(string_table,npos + nlen);
+ if(new_table!=NULL){
+ string_table = new_table;
+ }
+
+ memcpy(string_table + npos,sorted[i]->filename,nlen);
+ string_table[npos+nlen-1]=0;
+ npos+=nlen;
+ if(sorted[i]->node.type==0x0001){
+ chdir(sorted[i]->filename);
+ dirdepth++;
+ }else{
+ uint32_t padlen = addpadding(sorted[i]->node.size,32);
+ uint8_t *new_data = realloc(data,dpos + padlen);
+ if(new_data!=NULL){
+ data = new_data;
+ }
+ memset(data + dpos,0,padlen);
+ FILE *fle = fopen(sorted[i]->filename, "rb");
+ fread(data + dpos, 1, sorted[i]->node.size, fle);
+ fclose(fle);
+ sorted[i]->node.data_offset = dpos;
+ dpos+=padlen;
+ }
+ }
+
+ for(int i=0;inode;
+ if(node.type==0x0000){
+ node.data_offset+=dataoffset;
+ }
+ node.data_offset = REVERSEENDIAN32(node.data_offset);
+ node.size = REVERSEENDIAN32(node.size);
+ node.name_offset = REVERSEENDIAN16(node.name_offset);
+ fwrite(&node, 1, sizeof(u8_node), foutfile);
+ }
+ free_nodes(sorted + 1,nodec - 1);
+ free(sorted);
+ fwrite(string_table, 1, npos, foutfile);
+ free(string_table);
+
+ uint8_t *padding = calloc(padcount, sizeof(uint8_t));
+ fwrite(padding, 1, padcount, foutfile);
+ free(padding);
+
+ fwrite(data, 1, dpos, foutfile);
+ free(data);
+
+ fclose(foutfile);
+
+ return 1;
+}
+
+int extract_u8_archive(uint8_t *data, const char *outdir){
+ mkdir(outdir, 0755);
+ chdir(outdir);
+ u8_header header;
+ uint32_t data_offset;
+ uint8_t *string_table;
+ size_t rest_size;
+
+ memcpy(&header, data, sizeof(header));
+
+ int curpos = sizeof(header);
+
+ u8_node root_node;
+ memcpy(&root_node, data + curpos, sizeof(u8_node));
+ curpos += sizeof(u8_node);
+
+ uint32_t nodec = be32((uint8_t*)&root_node.size) - 1;
+ u8_node *nodes = malloc(sizeof(u8_node)*nodec);
+ memcpy(nodes, data + curpos, sizeof(u8_node)*nodec);
+ curpos += sizeof(u8_node)*nodec;
+
+ data_offset = be32((uint8_t*)&header.data_offset);
+ rest_size = data_offset - sizeof(header) - (nodec + 1) * sizeof(u8_node);
+ string_table = malloc(rest_size);
+ memcpy(string_table, data + curpos, rest_size);
+
+ u8_node *node;
+ int dir_depth = 0;
+ FILE *outfile;
+ for (int j = 0; j < nodec; j++) {
+ node = &nodes[j];
+ uint32_t doffset = be32((uint8_t*)&node->data_offset);
+ uint32_t dsize = be32((uint8_t*)&node->size);
+ uint16_t name_offset = be16((uint8_t*)&node->name_offset);
+ uint16_t type = be16((uint8_t*)&node->type);
+ char *name = (char*)&string_table[name_offset];
+ if (type == 0x0000) { // Regular file
+ outfile = fopen(name, "wb");
+ fwrite(data + doffset, 1, dsize, outfile);
+ fclose(outfile);
+ }else if(type==0x0100){ // Directory
+ while(dir_depth>doffset+1){
+ chdir("..");
+ dir_depth--;
+ }
+ mkdir(name,0755);
+ chdir(name);
+ dir_depth++;
+ }
+ }
+ do{
+ chdir("..");
+ dir_depth--;
+ }while(dir_depth>0);
+ free(string_table);
+ free(nodes);
+ return 1;
+}
\ No newline at end of file
diff --git a/tools/gzinject/src/u8.h b/tools/gzinject/src/u8.h
new file mode 100644
index 000000000..7519c692a
--- /dev/null
+++ b/tools/gzinject/src/u8.h
@@ -0,0 +1,34 @@
+#ifndef U8_H_
+#define U8_H_
+
+#include
+#include "gzinject.h"
+
+typedef struct {
+ uint16_t type;
+ uint16_t name_offset;
+ uint32_t data_offset;
+ uint32_t size;
+}u8_node;
+
+typedef struct
+{
+ uint32_t tag;
+ uint32_t rootnode_offset;
+ uint32_t header_size;
+ uint32_t data_offset;
+ uint8_t padding[16];
+} u8_header;
+
+typedef struct node_entry_s node_entry_t;
+
+struct node_entry_s {
+ u8_node node;
+ char *filename;
+ node_entry_t *directory;
+};
+
+int create_u8_archive(const char *dir, const char *output);
+int extract_u8_archive(uint8_t *data, const char *outdir);
+
+#endif
\ No newline at end of file
diff --git a/tools/z64compress/.editorconfig b/tools/z64compress/.editorconfig
new file mode 100644
index 000000000..342ff359c
--- /dev/null
+++ b/tools/z64compress/.editorconfig
@@ -0,0 +1,15 @@
+root = true
+
+[*]
+end_of_line = lf
+insert_final_newline = true
+
+# Matches multiple files with brace expansion notation
+[*.{c,h,ch}]
+charset = utf-8
+indent_style = tab
+indent_size = 3
+trim_trailing_whitespace = false
+
+[*.md]
+trim_trailing_whitespace = false
diff --git a/tools/z64compress/.gitignore b/tools/z64compress/.gitignore
new file mode 100644
index 000000000..6a47e0517
--- /dev/null
+++ b/tools/z64compress/.gitignore
@@ -0,0 +1,3 @@
+bin/
+o/
+z64compress
diff --git a/tools/z64compress/.gitrepo b/tools/z64compress/.gitrepo
new file mode 100644
index 000000000..0165907a9
--- /dev/null
+++ b/tools/z64compress/.gitrepo
@@ -0,0 +1,12 @@
+; DO NOT EDIT (unless you know what you are doing)
+;
+; This subdirectory is a git "subrepo", and this file is maintained by the
+; git-subrepo command. See https://github.com/ingydotnet/git-subrepo#readme
+;
+[subrepo]
+ remote = https://github.com/z64tools/z64compress.git
+ branch = main
+ commit = 331039828b0e9c995b8727a64b5bc083c78d1476
+ parent = ce3fe6d65dd1b46509f3bbcb538e9bcc56f2cfa3
+ method = merge
+ cmdver = 0.4.5
diff --git a/tools/z64compress/LICENSE b/tools/z64compress/LICENSE
new file mode 100644
index 000000000..f288702d2
--- /dev/null
+++ b/tools/z64compress/LICENSE
@@ -0,0 +1,674 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc.
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The GNU General Public License is a free, copyleft license for
+software and other kinds of works.
+
+ The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works. By contrast,
+the GNU General Public License is intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users. We, the Free Software Foundation, use the
+GNU General Public License for most of our software; it applies also to
+any other work released this way by its authors. You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+ To protect your rights, we need to prevent others from denying you
+these rights or asking you to surrender the rights. Therefore, you have
+certain responsibilities if you distribute copies of the software, or if
+you modify it: responsibilities to respect the freedom of others.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must pass on to the recipients the same
+freedoms that you received. You must make sure that they, too, receive
+or can get the source code. And you must show them these terms so they
+know their rights.
+
+ Developers that use the GNU GPL protect your rights with two steps:
+(1) assert copyright on the software, and (2) offer you this License
+giving you legal permission to copy, distribute and/or modify it.
+
+ For the developers' and authors' protection, the GPL clearly explains
+that there is no warranty for this free software. For both users' and
+authors' sake, the GPL requires that modified versions be marked as
+changed, so that their problems will not be attributed erroneously to
+authors of previous versions.
+
+ Some devices are designed to deny users access to install or run
+modified versions of the software inside them, although the manufacturer
+can do so. This is fundamentally incompatible with the aim of
+protecting users' freedom to change the software. The systematic
+pattern of such abuse occurs in the area of products for individuals to
+use, which is precisely where it is most unacceptable. Therefore, we
+have designed this version of the GPL to prohibit the practice for those
+products. If such problems arise substantially in other domains, we
+stand ready to extend this provision to those domains in future versions
+of the GPL, as needed to protect the freedom of users.
+
+ Finally, every program is threatened constantly by software patents.
+States should not allow patents to restrict development and use of
+software on general-purpose computers, but in those that do, we wish to
+avoid the special danger that patents applied to a free program could
+make it effectively proprietary. To prevent this, the GPL assures that
+patents cannot be used to render the program non-free.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ TERMS AND CONDITIONS
+
+ 0. Definitions.
+
+ "This License" refers to version 3 of the GNU General Public License.
+
+ "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+ "The Program" refers to any copyrightable work licensed under this
+License. Each licensee is addressed as "you". "Licensees" and
+"recipients" may be individuals or organizations.
+
+ To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy. The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+ A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+ To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy. Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+ To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies. Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+ An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License. If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+ 1. Source Code.
+
+ The "source code" for a work means the preferred form of the work
+for making modifications to it. "Object code" means any non-source
+form of a work.
+
+ A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+ The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form. A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+ The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities. However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work. For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+ The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+ The Corresponding Source for a work in source code form is that
+same work.
+
+ 2. Basic Permissions.
+
+ All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met. This License explicitly affirms your unlimited
+permission to run the unmodified Program. The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work. This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+ You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force. You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright. Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+ Conveying under any other circumstances is permitted solely under
+the conditions stated below. Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+ 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+ No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+ When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+ 4. Conveying Verbatim Copies.
+
+ You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+ You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+ 5. Conveying Modified Source Versions.
+
+ You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+ a) The work must carry prominent notices stating that you modified
+ it, and giving a relevant date.
+
+ b) The work must carry prominent notices stating that it is
+ released under this License and any conditions added under section
+ 7. This requirement modifies the requirement in section 4 to
+ "keep intact all notices".
+
+ c) You must license the entire work, as a whole, under this
+ License to anyone who comes into possession of a copy. This
+ License will therefore apply, along with any applicable section 7
+ additional terms, to the whole of the work, and all its parts,
+ regardless of how they are packaged. This License gives no
+ permission to license the work in any other way, but it does not
+ invalidate such permission if you have separately received it.
+
+ d) If the work has interactive user interfaces, each must display
+ Appropriate Legal Notices; however, if the Program has interactive
+ interfaces that do not display Appropriate Legal Notices, your
+ work need not make them do so.
+
+ A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit. Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+ 6. Conveying Non-Source Forms.
+
+ You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+ a) Convey the object code in, or embodied in, a physical product
+ (including a physical distribution medium), accompanied by the
+ Corresponding Source fixed on a durable physical medium
+ customarily used for software interchange.
+
+ b) Convey the object code in, or embodied in, a physical product
+ (including a physical distribution medium), accompanied by a
+ written offer, valid for at least three years and valid for as
+ long as you offer spare parts or customer support for that product
+ model, to give anyone who possesses the object code either (1) a
+ copy of the Corresponding Source for all the software in the
+ product that is covered by this License, on a durable physical
+ medium customarily used for software interchange, for a price no
+ more than your reasonable cost of physically performing this
+ conveying of source, or (2) access to copy the
+ Corresponding Source from a network server at no charge.
+
+ c) Convey individual copies of the object code with a copy of the
+ written offer to provide the Corresponding Source. This
+ alternative is allowed only occasionally and noncommercially, and
+ only if you received the object code with such an offer, in accord
+ with subsection 6b.
+
+ d) Convey the object code by offering access from a designated
+ place (gratis or for a charge), and offer equivalent access to the
+ Corresponding Source in the same way through the same place at no
+ further charge. You need not require recipients to copy the
+ Corresponding Source along with the object code. If the place to
+ copy the object code is a network server, the Corresponding Source
+ may be on a different server (operated by you or a third party)
+ that supports equivalent copying facilities, provided you maintain
+ clear directions next to the object code saying where to find the
+ Corresponding Source. Regardless of what server hosts the
+ Corresponding Source, you remain obligated to ensure that it is
+ available for as long as needed to satisfy these requirements.
+
+ e) Convey the object code using peer-to-peer transmission, provided
+ you inform other peers where the object code and Corresponding
+ Source of the work are being offered to the general public at no
+ charge under subsection 6d.
+
+ A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+ A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling. In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage. For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product. A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+ "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source. The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+ If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information. But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+ The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed. Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+ Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+ 7. Additional Terms.
+
+ "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law. If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+ When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it. (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.) You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+ Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+ a) Disclaiming warranty or limiting liability differently from the
+ terms of sections 15 and 16 of this License; or
+
+ b) Requiring preservation of specified reasonable legal notices or
+ author attributions in that material or in the Appropriate Legal
+ Notices displayed by works containing it; or
+
+ c) Prohibiting misrepresentation of the origin of that material, or
+ requiring that modified versions of such material be marked in
+ reasonable ways as different from the original version; or
+
+ d) Limiting the use for publicity purposes of names of licensors or
+ authors of the material; or
+
+ e) Declining to grant rights under trademark law for use of some
+ trade names, trademarks, or service marks; or
+
+ f) Requiring indemnification of licensors and authors of that
+ material by anyone who conveys the material (or modified versions of
+ it) with contractual assumptions of liability to the recipient, for
+ any liability that these contractual assumptions directly impose on
+ those licensors and authors.
+
+ All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10. If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term. If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+ If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+ Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+ 8. Termination.
+
+ You may not propagate or modify a covered work except as expressly
+provided under this License. Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+ However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+ Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+ Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License. If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+ 9. Acceptance Not Required for Having Copies.
+
+ You are not required to accept this License in order to receive or
+run a copy of the Program. Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance. However,
+nothing other than this License grants you permission to propagate or
+modify any covered work. These actions infringe copyright if you do
+not accept this License. Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+ 10. Automatic Licensing of Downstream Recipients.
+
+ Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License. You are not responsible
+for enforcing compliance by third parties with this License.
+
+ An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations. If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+ You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License. For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+ 11. Patents.
+
+ A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based. The
+work thus licensed is called the contributor's "contributor version".
+
+ A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version. For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+ Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+ In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement). To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+ If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients. "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+ If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+ A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License. You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+ Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+ 12. No Surrender of Others' Freedom.
+
+ If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all. For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+ 13. Use with the GNU Affero General Public License.
+
+ Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU Affero General Public License into a single
+combined work, and to convey the resulting work. The terms of this
+License will continue to apply to the part which is the covered work,
+but the special requirements of the GNU Affero General Public License,
+section 13, concerning interaction through a network will apply to the
+combination as such.
+
+ 14. Revised Versions of this License.
+
+ The Free Software Foundation may publish revised and/or new versions of
+the GNU General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+ Each version is given a distinguishing version number. If the
+Program specifies that a certain numbered version of the GNU General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation. If the Program does not specify a version number of the
+GNU General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+ If the Program specifies that a proxy can decide which future
+versions of the GNU General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+ Later license versions may give you additional or different
+permissions. However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+ 15. Disclaimer of Warranty.
+
+ THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+ 16. Limitation of Liability.
+
+ IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+ 17. Interpretation of Sections 15 and 16.
+
+ If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+
+ Copyright (C)
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+
+Also add information on how to contact you by electronic and paper mail.
+
+ If the program does terminal interaction, make it output a short
+notice like this when it starts in an interactive mode:
+
+ Copyright (C)
+ This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, your program's commands
+might be different; for a GUI interface, you would use an "about box".
+
+ You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU GPL, see
+.
+
+ The GNU General Public License does not permit incorporating your program
+into proprietary programs. If your program is a subroutine library, you
+may consider it more useful to permit linking proprietary applications with
+the library. If this is what you want to do, use the GNU Lesser General
+Public License instead of this License. But first, please read
+.
diff --git a/tools/z64compress/Makefile b/tools/z64compress/Makefile
new file mode 100644
index 000000000..7a83c6190
--- /dev/null
+++ b/tools/z64compress/Makefile
@@ -0,0 +1,49 @@
+CC := gcc
+CFLAGS := -DNDEBUG -s -Os -flto -Wall -Wextra
+
+# Target platform, specify with TARGET= on the command line, linux64 is default.
+# Currently supported: linux64, linux32, win32
+TARGET ?= linux64
+
+ifeq ($(TARGET),linux32)
+ TARGET_CFLAGS := -m32
+else ifeq ($(TARGET),win32)
+# If using a cross compiler, specify the compiler executable on the command line.
+# make TARGET=win32 CC=~/c/mxe/usr/bin/i686-w64-mingw32.static-gcc
+ TARGET_LIBS := -mconsole -municode
+else ifneq ($(TARGET),linux64)
+ $(error Supported targets: linux64, linux32, win32)
+endif
+
+# Whether to use native optimizations, specify with NATIVE_OPT=0/1 on the command line, default is 0.
+# This is not supported by all compilers which is particularly an issue on Mac, and may inhibit tests.
+NATIVE_OPT ?= 0
+ifeq ($(NATIVE_OPT),1)
+ TARGET_CFLAGS += -march=native -mtune=native
+endif
+
+OBJ_DIR := o/$(TARGET)
+
+$(OBJ_DIR)/src/enc/%.o: CFLAGS := -DNDEBUG -s -Ofast -flto -Wall -Isrc/enc/libdeflate
+
+SRC_DIRS := $(shell find src -type d)
+C_DIRS := $(shell find src -type d -not -path "src/enc/libdeflate/*")
+C_FILES := $(foreach dir,$(C_DIRS),$(wildcard $(dir)/*.c))
+C_FILES += src/enc/libdeflate/lib/deflate_compress.c src/enc/libdeflate/lib/utils.c
+O_FILES := $(foreach f,$(C_FILES:.c=.o),$(OBJ_DIR)/$f)
+
+# Make build directories
+$(shell mkdir -p $(foreach dir,$(SRC_DIRS),$(OBJ_DIR)/$(dir)))
+
+.PHONY: all clean
+
+all: z64compress
+
+z64compress: $(O_FILES)
+ $(CC) $(TARGET_CFLAGS) $(CFLAGS) $(O_FILES) -lm -lpthread -lz $(TARGET_LIBS) -o z64compress
+
+$(OBJ_DIR)/%.o: %.c
+ $(CC) -c $(TARGET_CFLAGS) $(CFLAGS) $< -o $@
+
+clean:
+ $(RM) -rf z64compress bin o
diff --git a/tools/z64compress/README.md b/tools/z64compress/README.md
new file mode 100644
index 000000000..4e9a6ba86
--- /dev/null
+++ b/tools/z64compress/README.md
@@ -0,0 +1,102 @@
+# z64compress
+
+`z64compress` is a program for compressing Zelda 64 roms: be they retail, hacked traditionally, or custom-built from the [`Ocarina of Time`](https://github.com/zeldaret/oot) or [`Majora's Mask`](https://github.com/zeldaret/mm) reverse engineering projects. It is written in highly efficient C and leverages the power of multithreading to make compression as fast as possible. To reduce overhead on subsequent compressions, an optional cache directory can be specified.
+
+In addition to the default `yaz`, it supports some faster and more compact algorithms such as `DEFLATE`, `lzo`, `ucl`, and `aplib`. In order to use these, grab patches or code from my [`z64enc` repository](https://github.com/z64me/z64enc).
+
+If you add an algorithm, please make sure `valgrind` reports no memory leaks or other errors before making a pull request. Thank you!
+
+(By the way, `valgrind` works better without the `-march=native -mtune=native` optimizations, so turn those off when testing `valgrind`.)
+
+## Usage
+This is a command line application. Learn from these common examples and adapt the arguments to your needs:
+```
+ compressing oot debug
+ --in "path/to/in.z64"
+ --out "path/to/out.z64"
+ --mb 32
+ --codec yaz
+ --cache "path/to/cache"
+ --dma "0x12F70,1548"
+ --compress "9-14,28-END"
+ --threads 4
+
+ compressing oot ntsc 1.0
+ --in "path/to/in.z64"
+ --out "path/to/out.z64"
+ --mb 32
+ --codec yaz
+ --cache "path/to/cache"
+ --dma "0x7430,1526"
+ --compress "10-14,27-END"
+ --threads 4
+
+ compressing mm usa
+ --in "path/to/in.z64"
+ --out "path/to/out.z64"
+ --mb 32
+ --codec yaz
+ --cache "path/to/cache"
+ --dma "0x1A500,1568"
+ --compress "10-14,23,24,31-END"
+ --skip "1127"
+ --repack "15-20,22"
+ --threads 4
+```
+
+## Arguments
+```
+ --in uncompressed input rom
+
+ --out compressed output rom
+
+ --matching attempt matching compression at the cost of
+ some optimizations and reduced performance
+
+ --mb how many mb the compressed rom should be
+
+ --codec currently supported codecs
+ yaz
+ ucl
+ lzo
+ zlib
+ aplib
+ * to use non-yaz codecs, find patches
+ and code on my z64enc repo
+
+ --cache is optional and won't be created if
+ no path is specified (having a cache
+ makes subsequent compressions faster)
+ * pro-tip: linux users who don't want a
+ cache to persist across power cycles
+ can use the path "/tmp/z64compress"
+
+ --dma specify dmadata address and count
+
+ --compress enable compression on specified files
+
+ --skip disable compression on specified files
+
+ --repack handles Majora's Mask archives
+
+ --threads optional multithreading;
+ exclude this argument to disable it
+
+ --only-stdout reserve stderr for errors and print
+ everything else to stdout
+
+ arguments are executed as they
+ are parsed, so order matters!
+```
+
+## Building
+First, clone the repository and initialize its submodules:
+```
+git clone https://github.com/z64me/z64compress.git
+cd z64compress
+git submodule update --init
+```
+
+A Makefile-based build system is provided. Choose the target platform with `make TARGET=linux64|linux32|win32`, default is linux64. If building for windows with a cross compiler, specify the compiler executable with `make TARGET=win32 CC=/path/to/executable`.
+
+Alternatively, I have included shell scripts for building Linux and Windows binaries. Windows binaries are built using a cross compiler ([I recommend `MXE`](https://mxe.cc/)).
diff --git a/tools/z64compress/release-linux.sh b/tools/z64compress/release-linux.sh
new file mode 100644
index 000000000..bdac70dcc
--- /dev/null
+++ b/tools/z64compress/release-linux.sh
@@ -0,0 +1,14 @@
+# build compression functions (slow)
+gcc -DNDEBUG -s -Ofast -flto -lm -c -Wall -march=native -mtune=native src/enc/*.c src/enc/lzo/*.c src/enc/ucl/comp/*.c src/enc/apultra/*.c
+mkdir -p o
+mv *.o o
+
+# build everything else
+gcc -o z64compress -DNDEBUG src/*.c o/*.o src/enc/libdeflate/lib/deflate_compress.c src/enc/libdeflate/lib/utils.c -Isrc/enc/libdeflate -Wall -Wextra -s -Os -flto -lpthread -lz -march=native -mtune=native
+
+# move to bin directory
+mkdir -p bin/linux64
+mv z64compress bin/linux64
+
+
+
diff --git a/tools/z64compress/release-linux32.sh b/tools/z64compress/release-linux32.sh
new file mode 100644
index 000000000..06d829a7d
--- /dev/null
+++ b/tools/z64compress/release-linux32.sh
@@ -0,0 +1,14 @@
+# build compression functions (slow)
+gcc -m32 -DNDEBUG -s -Ofast -flto -lm -c -Wall -march=native -mtune=native src/enc/*.c src/enc/lzo/*.c src/enc/ucl/comp/*.c src/enc/apultra/*.c
+mkdir -p o
+mv *.o o
+
+# build everything else
+gcc -m32 -o z64compress -DNDEBUG src/*.c o/*.o src/enc/libdeflate/lib/deflate_compress.c src/enc/libdeflate/lib/utils.c -Isrc/enc/libdeflate -Wall -Wextra -s -Os -flto -lpthread -lz -march=native -mtune=native
+
+# move to bin directory
+mkdir -p bin/linux32
+mv z64compress bin/linux32
+
+
+
diff --git a/tools/z64compress/release-win32.sh b/tools/z64compress/release-win32.sh
new file mode 100644
index 000000000..94fc245c9
--- /dev/null
+++ b/tools/z64compress/release-win32.sh
@@ -0,0 +1,12 @@
+# build compression functions (slow)
+i686-w64-mingw32.static-gcc -DNDEBUG -s -Ofast -flto -lm -c -Wall src/enc/*.c src/enc/lzo/*.c src/enc/ucl/comp/*.c src/enc/apultra/*.c
+mkdir -p o
+mv *.o o
+
+# build everything else
+i686-w64-mingw32.static-gcc -o z64compress.exe -DNDEBUG src/*.c o/*.o src/enc/libdeflate/lib/deflate_compress.c src/enc/libdeflate/lib/utils.c -Isrc/enc/libdeflate -Wall -Wextra -s -Os -flto -lpthread -lz -mconsole -municode
+
+# move to bin directory
+mkdir -p bin/win32
+mv z64compress.exe bin/win32
+
diff --git a/tools/z64compress/src/enc/aplib.c b/tools/z64compress/src/enc/aplib.c
new file mode 100644
index 000000000..c2e720a7b
--- /dev/null
+++ b/tools/z64compress/src/enc/aplib.c
@@ -0,0 +1,48 @@
+#include
+#include
+#include
+#include "apultra/libapultra.h"
+
+static void compression_progress(long long nOriginalSize, long long nCompressedSize) {
+ /* do nothing */
+}
+
+int
+aplenc(
+ void *_src
+ , unsigned src_sz
+ , void *_dst
+ , unsigned *dst_sz
+ , void *_ctx
+)
+{
+ unsigned char *src = _src;
+ unsigned char *dst = _dst;
+ int nMaxCompressedSize = apultra_get_max_compressed_size(src_sz);
+ apultra_stats stats;
+
+ extern int g_hlen; /* header length */
+ memset(dst, 0, g_hlen);
+ memcpy(dst, "APL0", 4);
+ dst[4] = (src_sz >> 24);
+ dst[5] = (src_sz >> 16);
+ dst[6] = (src_sz >> 8);
+ dst[7] = (src_sz >> 0);
+
+ *dst_sz = apultra_compress(
+ src
+ , dst + g_hlen
+ , src_sz
+ , nMaxCompressedSize
+ , 0 /* flags */
+ , 0 /* nMaxWindowSize */
+ , 0 /* nDictionarySize */
+ , compression_progress
+ , &stats
+ );
+
+ *dst_sz = *dst_sz + g_hlen;
+
+ return 0;
+}
+
diff --git a/tools/z64compress/src/enc/apultra/apultra.c b/tools/z64compress/src/enc/apultra/apultra.c
new file mode 100644
index 000000000..24dc2b692
--- /dev/null
+++ b/tools/z64compress/src/enc/apultra/apultra.c
@@ -0,0 +1,1225 @@
+#if 0
+/*
+ * apultra.c - command line compression utility for the apultra library
+ *
+ * Copyright (C) 2019 Emmanuel Marty
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty. In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ * claim that you wrote the original software. If you use this software
+ * in a product, an acknowledgment in the product documentation would be
+ * appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ * misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+/*
+ * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
+ *
+ * Inspired by cap by Sven-Åke Dahl. https://github.com/svendahl/cap
+ * Also inspired by Charles Bloom's compression blog. http://cbloomrants.blogspot.com/
+ * With ideas from LZ4 by Yann Collet. https://github.com/lz4/lz4
+ * With help and support from spke
+ *
+ */
+
+#include
+#include
+#include
+#ifdef _WIN32
+#include
+#include
+#else
+#include
+#endif
+#include "libapultra.h"
+
+#define OPT_VERBOSE 1
+#define OPT_STATS 2
+#define OPT_BACKWARD 4
+
+#define TOOL_VERSION "1.4.1"
+
+/*---------------------------------------------------------------------------*/
+
+#ifdef _WIN32
+LARGE_INTEGER hpc_frequency;
+BOOL hpc_available = FALSE;
+#endif
+
+static void do_init_time() {
+#ifdef _WIN32
+ hpc_frequency.QuadPart = 0;
+ hpc_available = QueryPerformanceFrequency(&hpc_frequency);
+#endif
+}
+
+static long long do_get_time() {
+ long long nTime;
+
+#ifdef _WIN32
+ if (hpc_available) {
+ LARGE_INTEGER nCurTime;
+
+ /* Use HPC hardware for best precision */
+ QueryPerformanceCounter(&nCurTime);
+ nTime = (long long)(nCurTime.QuadPart * 1000000LL / hpc_frequency.QuadPart);
+ }
+ else {
+ struct _timeb tb;
+ _ftime(&tb);
+
+ nTime = ((long long)tb.time * 1000LL + (long long)tb.millitm) * 1000LL;
+ }
+#else
+ struct timeval tm;
+ gettimeofday(&tm, NULL);
+
+ nTime = (long long)tm.tv_sec * 1000000LL + (long long)tm.tv_usec;
+#endif
+ return nTime;
+}
+
+static void do_reverse_buffer(unsigned char *pBuffer, size_t nBufferSize) {
+ size_t nMidPoint = nBufferSize / 2;
+ size_t i, j;
+
+ for (i = 0, j = nBufferSize - 1; i < nMidPoint; i++, j--) {
+ unsigned char c = pBuffer[i];
+ pBuffer[i] = pBuffer[j];
+ pBuffer[j] = c;
+ }
+}
+
+/*---------------------------------------------------------------------------*/
+
+static void compression_progress(long long nOriginalSize, long long nCompressedSize) {
+ if (nOriginalSize >= 512 * 1024) {
+ fprintf(stdout, "\r%lld => %lld (%g %%) \b\b\b\b\b", nOriginalSize, nCompressedSize, (double)(nCompressedSize * 100.0 / nOriginalSize));
+ fflush(stdout);
+ }
+}
+
+static int do_compress(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions, const unsigned int nMaxWindowSize) {
+ long long nStartTime = 0LL, nEndTime = 0LL;
+ size_t nOriginalSize = 0L, nCompressedSize = 0L, nMaxCompressedSize;
+ int nFlags = 0;
+ apultra_stats stats;
+ unsigned char *pDecompressedData;
+ unsigned char *pCompressedData;
+
+ if (nOptions & OPT_VERBOSE) {
+ nStartTime = do_get_time();
+ }
+
+ FILE* f_dict = NULL;
+ size_t nDictionarySize = 0;
+ if (pszDictionaryFilename) {
+ /* Open the dictionary */
+ f_dict = fopen(pszDictionaryFilename, "rb");
+ if (!f_dict) {
+ fprintf(stderr, "error opening dictionary '%s' for reading\n", pszDictionaryFilename);
+ return 100;
+ }
+
+ /* Get dictionary size */
+ fseek(f_dict, 0, SEEK_END);
+ nDictionarySize = (size_t)ftell(f_dict);
+ fseek(f_dict, 0, SEEK_SET);
+
+ if (nDictionarySize > BLOCK_SIZE) nDictionarySize = BLOCK_SIZE;
+ }
+
+ /* Read the whole original file in memory */
+
+ FILE *f_in = fopen(pszInFilename, "rb");
+ if (!f_in) {
+ if (f_dict) fclose(f_dict);
+ fprintf(stderr, "error opening '%s' for reading\n", pszInFilename);
+ return 100;
+ }
+
+ fseek(f_in, 0, SEEK_END);
+ nOriginalSize = (size_t)ftell(f_in);
+ fseek(f_in, 0, SEEK_SET);
+
+ pDecompressedData = (unsigned char*)malloc(nDictionarySize + nOriginalSize);
+ if (!pDecompressedData) {
+ fclose(f_in);
+ if (f_dict) fclose(f_dict);
+ fprintf(stderr, "out of memory for reading '%s', %zd bytes needed\n", pszInFilename, nOriginalSize);
+ return 100;
+ }
+
+ if (f_dict) {
+ /* Read dictionary data */
+ if (fread(pDecompressedData + ((nOptions & OPT_BACKWARD) ? nOriginalSize : 0), 1, nDictionarySize, f_dict) != nDictionarySize) {
+ free(pDecompressedData);
+ fclose(f_in);
+ fclose(f_dict);
+ fprintf(stderr, "I/O error while reading dictionary '%s'\n", pszDictionaryFilename);
+ return 100;
+ }
+
+ fclose(f_dict);
+ f_dict = NULL;
+ }
+
+ /* Read input file data */
+ if (fread(pDecompressedData + ((nOptions & OPT_BACKWARD) ? 0 : nDictionarySize), 1, nOriginalSize, f_in) != nOriginalSize) {
+ free(pDecompressedData);
+ fclose(f_in);
+ fprintf(stderr, "I/O error while reading '%s'\n", pszInFilename);
+ return 100;
+ }
+
+ fclose(f_in);
+
+ if (nOptions & OPT_BACKWARD)
+ do_reverse_buffer(pDecompressedData, nDictionarySize + nOriginalSize);
+
+ /* Allocate max compressed size */
+
+ nMaxCompressedSize = apultra_get_max_compressed_size(nDictionarySize + nOriginalSize);
+
+ pCompressedData = (unsigned char*)malloc(nMaxCompressedSize);
+ if (!pCompressedData) {
+ free(pDecompressedData);
+ fprintf(stderr, "out of memory for compressing '%s', %zd bytes needed\n", pszInFilename, nMaxCompressedSize);
+ return 100;
+ }
+
+ memset(pCompressedData, 0, nMaxCompressedSize);
+
+ nCompressedSize = apultra_compress(pDecompressedData, pCompressedData, nDictionarySize + nOriginalSize, nMaxCompressedSize, nFlags, nMaxWindowSize, nDictionarySize, compression_progress, &stats);
+
+ if ((nOptions & OPT_VERBOSE)) {
+ nEndTime = do_get_time();
+ }
+
+ if (nCompressedSize == -1) {
+ free(pCompressedData);
+ free(pDecompressedData);
+ fprintf(stderr, "compression error for '%s'\n", pszInFilename);
+ return 100;
+ }
+
+ if (nOptions & OPT_BACKWARD)
+ do_reverse_buffer(pCompressedData, nCompressedSize);
+
+ if (pszOutFilename) {
+ FILE *f_out;
+
+ /* Write whole compressed file out */
+
+ f_out = fopen(pszOutFilename, "wb");
+ if (f_out) {
+ fwrite(pCompressedData, 1, nCompressedSize, f_out);
+ fclose(f_out);
+ }
+ }
+
+ free(pCompressedData);
+ free(pDecompressedData);
+
+ if ((nOptions & OPT_VERBOSE)) {
+ double fDelta = ((double)(nEndTime - nStartTime)) / 1000000.0;
+ double fSpeed = ((double)nOriginalSize / 1048576.0) / fDelta;
+ fprintf(stdout, "\rCompressed '%s' in %g seconds, %.02g Mb/s, %d tokens (%g bytes/token), %d into %d bytes ==> %g %%\n",
+ pszInFilename, fDelta, fSpeed, stats.commands_divisor, (double)nOriginalSize / (double)stats.commands_divisor,
+ (int)nOriginalSize, (int)nCompressedSize, (double)(nCompressedSize * 100.0 / nOriginalSize));
+ }
+
+ if (nOptions & OPT_STATS) {
+ fprintf(stdout, "Tokens: literals: %d short matches: %d normal matches: %d large matches: %d rep matches: %d EOD: %d\n",
+ stats.num_literals, stats.num_4bit_matches, stats.num_7bit_matches, stats.num_variable_matches, stats.num_rep_matches, stats.num_eod);
+ if (stats.match_divisor > 0) {
+ fprintf(stdout, "Offsets: min: %d avg: %d max: %d count: %d\n", stats.min_offset, (int)(stats.total_offsets / (long long)stats.match_divisor), stats.max_offset, stats.match_divisor);
+ fprintf(stdout, "Match lens: min: %d avg: %d max: %d count: %d\n", stats.min_match_len, stats.total_match_lens / stats.match_divisor, stats.max_match_len, stats.match_divisor);
+ }
+ else {
+ fprintf(stdout, "Offsets: none\n");
+ fprintf(stdout, "Match lens: none\n");
+ }
+ if (stats.rle1_divisor > 0) {
+ fprintf(stdout, "RLE1 lens: min: %d avg: %d max: %d count: %d\n", stats.min_rle1_len, stats.total_rle1_lens / stats.rle1_divisor, stats.max_rle1_len, stats.rle1_divisor);
+ }
+ else {
+ fprintf(stdout, "RLE1 lens: none\n");
+ }
+ if (stats.rle2_divisor > 0) {
+ fprintf(stdout, "RLE2 lens: min: %d avg: %d max: %d count: %d\n", stats.min_rle2_len, stats.total_rle2_lens / stats.rle2_divisor, stats.max_rle2_len, stats.rle2_divisor);
+ }
+ else {
+ fprintf(stdout, "RLE2 lens: none\n");
+ }
+ fprintf(stdout, "Safe distance: %d (0x%X)\n", stats.safe_dist, stats.safe_dist);
+ }
+ return 0;
+}
+
+/*---------------------------------------------------------------------------*/
+
+static int do_decompress(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions) {
+ long long nStartTime = 0LL, nEndTime = 0LL;
+ size_t nCompressedSize, nMaxDecompressedSize, nOriginalSize;
+ unsigned char *pCompressedData;
+ unsigned char *pDecompressedData;
+ int nFlags = 0;
+
+ /* Read the whole compressed file in memory */
+
+ FILE *f_in = fopen(pszInFilename, "rb");
+ if (!f_in) {
+ fprintf(stderr, "error opening '%s' for reading\n", pszInFilename);
+ return 100;
+ }
+
+ fseek(f_in, 0, SEEK_END);
+ nCompressedSize = (size_t)ftell(f_in);
+ fseek(f_in, 0, SEEK_SET);
+
+ pCompressedData = (unsigned char*)malloc(nCompressedSize);
+ if (!pCompressedData) {
+ fclose(f_in);
+ fprintf(stderr, "out of memory for reading '%s', %zd bytes needed\n", pszInFilename, nCompressedSize);
+ return 100;
+ }
+
+ if (fread(pCompressedData, 1, nCompressedSize, f_in) != nCompressedSize) {
+ free(pCompressedData);
+ fclose(f_in);
+ fprintf(stderr, "I/O error while reading '%s'\n", pszInFilename);
+ return 100;
+ }
+
+ fclose(f_in);
+
+ if (nOptions & OPT_BACKWARD)
+ do_reverse_buffer(pCompressedData, nCompressedSize);
+
+ /* Get max decompressed size */
+
+ nMaxDecompressedSize = apultra_get_max_decompressed_size(pCompressedData, nCompressedSize, nFlags);
+ if (nMaxDecompressedSize == -1) {
+ free(pCompressedData);
+ fprintf(stderr, "invalid compressed format for file '%s'\n", pszInFilename);
+ return 100;
+ }
+
+ FILE* f_dict = NULL;
+ size_t nDictionarySize = 0;
+ if (pszDictionaryFilename) {
+ /* Open the dictionary */
+ f_dict = fopen(pszDictionaryFilename, "rb");
+ if (!f_dict) {
+ fprintf(stderr, "error opening dictionary '%s' for reading\n", pszDictionaryFilename);
+ return 100;
+ }
+
+ /* Get dictionary size */
+ fseek(f_dict, 0, SEEK_END);
+ nDictionarySize = (size_t)ftell(f_dict);
+ fseek(f_dict, 0, SEEK_SET);
+
+ if (nDictionarySize > BLOCK_SIZE) nDictionarySize = BLOCK_SIZE;
+ }
+
+ /* Allocate max decompressed size */
+
+ pDecompressedData = (unsigned char*)malloc(nDictionarySize + nMaxDecompressedSize);
+ if (!pDecompressedData) {
+ free(pCompressedData);
+ if (f_dict) fclose(f_dict);
+ fprintf(stderr, "out of memory for decompressing '%s', %zd bytes needed\n", pszInFilename, nMaxDecompressedSize);
+ return 100;
+ }
+
+ memset(pDecompressedData, 0, nDictionarySize + nMaxDecompressedSize);
+
+ if (f_dict) {
+ /* Read dictionary data */
+ if (fread(pDecompressedData, 1, nDictionarySize, f_dict) != nDictionarySize) {
+ free(pDecompressedData);
+ fclose(f_in);
+ fclose(f_dict);
+ fprintf(stderr, "I/O error while reading dictionary '%s'\n", pszDictionaryFilename);
+ return 100;
+ }
+
+ fclose(f_dict);
+ f_dict = NULL;
+
+ if (nOptions & OPT_BACKWARD)
+ do_reverse_buffer(pDecompressedData, nDictionarySize);
+ }
+
+ if (nOptions & OPT_VERBOSE) {
+ nStartTime = do_get_time();
+ }
+
+ nOriginalSize = apultra_decompress(pCompressedData, pDecompressedData, nCompressedSize, nMaxDecompressedSize, nDictionarySize, nFlags);
+ if (nOriginalSize == -1) {
+ free(pDecompressedData);
+ free(pCompressedData);
+
+ fprintf(stderr, "decompression error for '%s'\n", pszInFilename);
+ return 100;
+ }
+
+ if (nOptions & OPT_BACKWARD)
+ do_reverse_buffer(pDecompressedData + nDictionarySize, nOriginalSize);
+
+ if (pszOutFilename) {
+ FILE *f_out;
+
+ /* Write whole decompressed file out */
+
+ f_out = fopen(pszOutFilename, "wb");
+ if (f_out) {
+ fwrite(pDecompressedData + nDictionarySize, 1, nOriginalSize, f_out);
+ fclose(f_out);
+ }
+ }
+
+ free(pDecompressedData);
+ free(pCompressedData);
+
+ if (nOptions & OPT_VERBOSE) {
+ nEndTime = do_get_time();
+ double fDelta = ((double)(nEndTime - nStartTime)) / 1000000.0;
+ double fSpeed = ((double)nOriginalSize / 1048576.0) / fDelta;
+ fprintf(stdout, "Decompressed '%s' in %g seconds, %g Mb/s\n",
+ pszInFilename, fDelta, fSpeed);
+ }
+
+ return 0;
+}
+
+/*---------------------------------------------------------------------------*/
+
+static int do_compare(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions) {
+ long long nStartTime = 0LL, nEndTime = 0LL;
+ size_t nCompressedSize, nMaxDecompressedSize, nOriginalSize, nDecompressedSize;
+ unsigned char *pCompressedData = NULL;
+ unsigned char *pOriginalData = NULL;
+ unsigned char *pDecompressedData = NULL;
+ int nFlags = 0;
+
+ /* Read the whole compressed file in memory */
+
+ FILE *f_in = fopen(pszInFilename, "rb");
+ if (!f_in) {
+ fprintf(stderr, "error opening '%s' for reading\n", pszInFilename);
+ return 100;
+ }
+
+ fseek(f_in, 0, SEEK_END);
+ nCompressedSize = (size_t)ftell(f_in);
+ fseek(f_in, 0, SEEK_SET);
+
+ pCompressedData = (unsigned char*)malloc(nCompressedSize);
+ if (!pCompressedData) {
+ fclose(f_in);
+ fprintf(stderr, "out of memory for reading '%s', %zd bytes needed\n", pszInFilename, nCompressedSize);
+ return 100;
+ }
+
+ if (fread(pCompressedData, 1, nCompressedSize, f_in) != nCompressedSize) {
+ free(pCompressedData);
+ fclose(f_in);
+ fprintf(stderr, "I/O error while reading '%s'\n", pszInFilename);
+ return 100;
+ }
+
+ fclose(f_in);
+
+ if (nOptions & OPT_BACKWARD)
+ do_reverse_buffer(pCompressedData, nCompressedSize);
+
+ /* Read the whole original file in memory */
+
+ f_in = fopen(pszOutFilename, "rb");
+ if (!f_in) {
+ free(pCompressedData);
+ fprintf(stderr, "error opening '%s' for reading\n", pszInFilename);
+ return 100;
+ }
+
+ fseek(f_in, 0, SEEK_END);
+ nOriginalSize = (size_t)ftell(f_in);
+ fseek(f_in, 0, SEEK_SET);
+
+ pOriginalData = (unsigned char*)malloc(nOriginalSize);
+ if (!pOriginalData) {
+ fclose(f_in);
+ free(pCompressedData);
+ fprintf(stderr, "out of memory for reading '%s', %zd bytes needed\n", pszInFilename, nOriginalSize);
+ return 100;
+ }
+
+ if (fread(pOriginalData, 1, nOriginalSize, f_in) != nOriginalSize) {
+ free(pOriginalData);
+ fclose(f_in);
+ free(pCompressedData);
+ fprintf(stderr, "I/O error while reading '%s'\n", pszInFilename);
+ return 100;
+ }
+
+ fclose(f_in);
+
+ /* Get max decompressed size */
+
+ nMaxDecompressedSize = apultra_get_max_decompressed_size(pCompressedData, nCompressedSize, nFlags);
+ if (nMaxDecompressedSize == -1) {
+ free(pOriginalData);
+ free(pCompressedData);
+ fprintf(stderr, "invalid compressed format for file '%s'\n", pszInFilename);
+ return 100;
+ }
+
+ FILE* f_dict = NULL;
+ size_t nDictionarySize = 0;
+ if (pszDictionaryFilename) {
+ /* Open the dictionary */
+ f_dict = fopen(pszDictionaryFilename, "rb");
+ if (!f_dict) {
+ fprintf(stderr, "error opening dictionary '%s' for reading\n", pszDictionaryFilename);
+ return 100;
+ }
+
+ /* Get dictionary size */
+ fseek(f_dict, 0, SEEK_END);
+ nDictionarySize = (size_t)ftell(f_dict);
+ fseek(f_dict, 0, SEEK_SET);
+
+ if (nDictionarySize > BLOCK_SIZE) nDictionarySize = BLOCK_SIZE;
+ }
+
+ /* Allocate max decompressed size */
+
+ pDecompressedData = (unsigned char*)malloc(nDictionarySize + nMaxDecompressedSize);
+ if (!pDecompressedData) {
+ free(pOriginalData);
+ free(pCompressedData);
+ if (f_dict) fclose(f_dict);
+ fprintf(stderr, "out of memory for decompressing '%s', %zd bytes needed\n", pszInFilename, nMaxDecompressedSize);
+ return 100;
+ }
+
+ memset(pDecompressedData, 0, nDictionarySize + nMaxDecompressedSize);
+
+ if (f_dict) {
+ /* Read dictionary data */
+ if (fread(pDecompressedData, 1, nDictionarySize, f_dict) != nDictionarySize) {
+ free(pDecompressedData);
+ fclose(f_in);
+ fclose(f_dict);
+ fprintf(stderr, "I/O error while reading dictionary '%s'\n", pszDictionaryFilename);
+ return 100;
+ }
+
+ fclose(f_dict);
+ f_dict = NULL;
+
+ if (nOptions & OPT_BACKWARD)
+ do_reverse_buffer(pDecompressedData, nDictionarySize);
+ }
+
+ if (nOptions & OPT_VERBOSE) {
+ nStartTime = do_get_time();
+ }
+
+ nDecompressedSize = apultra_decompress(pCompressedData, pDecompressedData, nCompressedSize, nMaxDecompressedSize, nDictionarySize, nFlags);
+ if (nDecompressedSize == -1) {
+ free(pDecompressedData);
+ free(pOriginalData);
+ free(pCompressedData);
+
+ fprintf(stderr, "decompression error for '%s'\n", pszInFilename);
+ return 100;
+ }
+
+ if (nOptions & OPT_BACKWARD)
+ do_reverse_buffer(pDecompressedData + nDictionarySize, nDecompressedSize);
+
+ if (nDecompressedSize != nOriginalSize || memcmp(pDecompressedData + nDictionarySize, pOriginalData, nOriginalSize)) {
+ fprintf(stderr, "error comparing compressed file '%s' with original '%s'\n", pszInFilename, pszOutFilename);
+ return 100;
+ }
+
+ free(pDecompressedData);
+ free(pOriginalData);
+ free(pCompressedData);
+
+ if (nOptions & OPT_VERBOSE) {
+ nEndTime = do_get_time();
+ double fDelta = ((double)(nEndTime - nStartTime)) / 1000000.0;
+ double fSpeed = ((double)nOriginalSize / 1048576.0) / fDelta;
+ fprintf(stdout, "Compared '%s' in %g seconds, %g Mb/s\n",
+ pszInFilename, fDelta, fSpeed);
+ }
+
+ return 0;
+}
+
+/*---------------------------------------------------------------------------*/
+
+static void generate_compressible_data(unsigned char *pBuffer, size_t nBufferSize, unsigned int nSeed, int nNumLiteralValues, float fMatchProbability) {
+ size_t nIndex = 0;
+ int nMatchProbability = (int)(fMatchProbability * 1023.0f);
+
+ srand(nSeed);
+
+ if (nIndex >= nBufferSize) return;
+ pBuffer[nIndex++] = rand() % nNumLiteralValues;
+
+ while (nIndex < nBufferSize) {
+ if ((rand() & 1023) >= nMatchProbability) {
+ size_t nLiteralCount = rand() & 127;
+ if (nLiteralCount > (nBufferSize - nIndex))
+ nLiteralCount = nBufferSize - nIndex;
+
+ while (nLiteralCount--)
+ pBuffer[nIndex++] = rand() % nNumLiteralValues;
+ }
+ else {
+ size_t nMatchLength = MIN_MATCH_SIZE + (rand() & 1023);
+ size_t nMatchOffset;
+
+ if (nMatchLength > (nBufferSize - nIndex))
+ nMatchLength = nBufferSize - nIndex;
+ if (nMatchLength > nIndex)
+ nMatchLength = nIndex;
+
+ if (nMatchLength < nIndex)
+ nMatchOffset = rand() % (nIndex - nMatchLength);
+ else
+ nMatchOffset = 0;
+
+ while (nMatchLength--) {
+ pBuffer[nIndex] = pBuffer[nIndex - nMatchOffset];
+ nIndex++;
+ }
+ }
+ }
+}
+
+static void xor_data(unsigned char *pBuffer, size_t nBufferSize, unsigned int nSeed, float fXorProbability) {
+ size_t nIndex = 0;
+ int nXorProbability = (int)(fXorProbability * 1023.0f);
+
+ srand(nSeed);
+
+ if (nIndex >= nBufferSize) return;
+
+ while (nIndex < nBufferSize) {
+ if ((rand() & 1023) < nXorProbability) {
+ pBuffer[nIndex] ^= 0xff;
+ }
+ nIndex++;
+ }
+}
+
+static int do_self_test(const unsigned int nOptions, const unsigned int nMaxWindowSize, const int nIsQuickTest) {
+ unsigned char *pGeneratedData;
+ unsigned char *pCompressedData;
+ unsigned char *pTmpCompressedData;
+ unsigned char *pTmpDecompressedData;
+ size_t nGeneratedDataSize;
+ size_t nMaxCompressedDataSize;
+ unsigned int nSeed = 123;
+ int nFlags = 0;
+ int i;
+
+ pGeneratedData = (unsigned char*)malloc(4 * BLOCK_SIZE);
+ if (!pGeneratedData) {
+ fprintf(stderr, "out of memory, %d bytes needed\n", 4 * BLOCK_SIZE);
+ return 100;
+ }
+
+ nMaxCompressedDataSize = apultra_get_max_compressed_size(4 * BLOCK_SIZE);
+ pCompressedData = (unsigned char*)malloc(nMaxCompressedDataSize);
+ if (!pCompressedData) {
+ free(pGeneratedData);
+ pGeneratedData = NULL;
+
+ fprintf(stderr, "out of memory, %zd bytes needed\n", nMaxCompressedDataSize);
+ return 100;
+ }
+
+ pTmpCompressedData = (unsigned char*)malloc(nMaxCompressedDataSize);
+ if (!pTmpCompressedData) {
+ free(pCompressedData);
+ pCompressedData = NULL;
+ free(pGeneratedData);
+ pGeneratedData = NULL;
+
+ fprintf(stderr, "out of memory, %zd bytes needed\n", nMaxCompressedDataSize);
+ return 100;
+ }
+
+ pTmpDecompressedData = (unsigned char*)malloc(4 * BLOCK_SIZE);
+ if (!pTmpDecompressedData) {
+ free(pTmpCompressedData);
+ pTmpCompressedData = NULL;
+ free(pCompressedData);
+ pCompressedData = NULL;
+ free(pGeneratedData);
+ pGeneratedData = NULL;
+
+ fprintf(stderr, "out of memory, %d bytes needed\n", 4 * BLOCK_SIZE);
+ return 100;
+ }
+
+ memset(pGeneratedData, 0, 4 * BLOCK_SIZE);
+ memset(pCompressedData, 0, nMaxCompressedDataSize);
+ memset(pTmpCompressedData, 0, nMaxCompressedDataSize);
+
+ /* Test compressing with a too small buffer to do anything, expect to fail cleanly */
+ for (i = 0; i < 12; i++) {
+ generate_compressible_data(pGeneratedData, i, nSeed, 256, 0.5f);
+ apultra_compress(pGeneratedData, pCompressedData, i, i, nFlags, nMaxWindowSize, 0 /* dictionary size */, NULL, NULL);
+ }
+
+ size_t nDataSizeStep = 128;
+ float fProbabilitySizeStep = nIsQuickTest ? 0.005f : 0.0005f;
+
+ for (nGeneratedDataSize = 1024; nGeneratedDataSize <= (nIsQuickTest ? 1024U : (4U * BLOCK_SIZE)); nGeneratedDataSize += nDataSizeStep) {
+ float fMatchProbability;
+
+ fprintf(stdout, "size %zd", nGeneratedDataSize);
+ for (fMatchProbability = 0; fMatchProbability <= 0.995f; fMatchProbability += fProbabilitySizeStep) {
+ int nNumLiteralValues[12] = { 1, 2, 3, 15, 30, 56, 96, 137, 178, 191, 255, 256 };
+ float fXorProbability;
+
+ fputc('.', stdout);
+ fflush(stdout);
+
+ for (i = 0; i < 12; i++) {
+ /* Generate data to compress */
+ generate_compressible_data(pGeneratedData, nGeneratedDataSize, nSeed, nNumLiteralValues[i], fMatchProbability);
+
+ /* Try to compress it, expected to succeed */
+ size_t nActualCompressedSize = apultra_compress(pGeneratedData, pCompressedData, nGeneratedDataSize, apultra_get_max_compressed_size(nGeneratedDataSize),
+ nFlags, nMaxWindowSize, 0 /* dictionary size */, NULL, NULL);
+ if (nActualCompressedSize == -1 || nActualCompressedSize < (1 + 1 + 1 /* footer */)) {
+ free(pTmpDecompressedData);
+ pTmpDecompressedData = NULL;
+ free(pTmpCompressedData);
+ pTmpCompressedData = NULL;
+ free(pCompressedData);
+ pCompressedData = NULL;
+ free(pGeneratedData);
+ pGeneratedData = NULL;
+
+ fprintf(stderr, "\nself-test: error compressing size %zd, seed %d, match probability %f, literals range %d\n", nGeneratedDataSize, nSeed, fMatchProbability, nNumLiteralValues[i]);
+ return 100;
+ }
+
+ /* Try to decompress it, expected to succeed */
+ size_t nActualDecompressedSize;
+ nActualDecompressedSize = apultra_decompress(pCompressedData, pTmpDecompressedData, nActualCompressedSize, nGeneratedDataSize, 0 /* dictionary size */, nFlags);
+ if (nActualDecompressedSize == -1) {
+ free(pTmpDecompressedData);
+ pTmpDecompressedData = NULL;
+ free(pTmpCompressedData);
+ pTmpCompressedData = NULL;
+ free(pCompressedData);
+ pCompressedData = NULL;
+ free(pGeneratedData);
+ pGeneratedData = NULL;
+
+ fprintf(stderr, "\nself-test: error decompressing size %zd, seed %d, match probability %f, literals range %d\n", nGeneratedDataSize, nSeed, fMatchProbability, nNumLiteralValues[i]);
+ return 100;
+ }
+
+ if (memcmp(pGeneratedData, pTmpDecompressedData, nGeneratedDataSize)) {
+ free(pTmpDecompressedData);
+ pTmpDecompressedData = NULL;
+ free(pTmpCompressedData);
+ pTmpCompressedData = NULL;
+ free(pCompressedData);
+ pCompressedData = NULL;
+ free(pGeneratedData);
+ pGeneratedData = NULL;
+
+ fprintf(stderr, "\nself-test: error comparing decompressed and original data, size %zd, seed %d, match probability %f, literals range %d\n", nGeneratedDataSize, nSeed, fMatchProbability, nNumLiteralValues[i]);
+ return 100;
+ }
+
+ /* Try to decompress corrupted data, expected to fail cleanly, without crashing or corrupting memory outside the output buffer */
+ for (fXorProbability = 0.05f; fXorProbability <= 0.5f; fXorProbability += 0.05f) {
+ memcpy(pTmpCompressedData, pCompressedData, nActualCompressedSize);
+ xor_data(pTmpCompressedData, nActualCompressedSize, nSeed, fXorProbability);
+ apultra_decompress(pTmpCompressedData, pGeneratedData, nActualCompressedSize, nGeneratedDataSize, 0 /* dictionary size */, nFlags);
+ }
+ }
+
+ nSeed++;
+ }
+
+ fputc(10, stdout);
+ fflush(stdout);
+
+ nDataSizeStep <<= 1;
+ if (nDataSizeStep > (128 * 4096))
+ nDataSizeStep = 128 * 4096;
+ fProbabilitySizeStep *= 1.25;
+ if (fProbabilitySizeStep > (0.0005f * 4096))
+ fProbabilitySizeStep = 0.0005f * 4096;
+ }
+
+ free(pTmpDecompressedData);
+ pTmpDecompressedData = NULL;
+
+ free(pTmpCompressedData);
+ pTmpCompressedData = NULL;
+
+ free(pCompressedData);
+ pCompressedData = NULL;
+
+ free(pGeneratedData);
+ pGeneratedData = NULL;
+
+ fprintf(stdout, "All tests passed.\n");
+ return 0;
+}
+
+/*---------------------------------------------------------------------------*/
+
+static int do_compr_benchmark(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions, const unsigned int nMaxWindowSize) {
+ size_t nFileSize, nMaxCompressedSize;
+ unsigned char *pFileData;
+ unsigned char *pCompressedData;
+ int nFlags = 0;
+ int i;
+
+ if (pszDictionaryFilename) {
+ fprintf(stderr, "in-memory benchmarking does not support dictionaries\n");
+ return 100;
+ }
+
+ /* Read the whole original file in memory */
+
+ FILE *f_in = fopen(pszInFilename, "rb");
+ if (!f_in) {
+ fprintf(stderr, "error opening '%s' for reading\n", pszInFilename);
+ return 100;
+ }
+
+ fseek(f_in, 0, SEEK_END);
+ nFileSize = (size_t)ftell(f_in);
+ fseek(f_in, 0, SEEK_SET);
+
+ pFileData = (unsigned char*)malloc(nFileSize);
+ if (!pFileData) {
+ fclose(f_in);
+ fprintf(stderr, "out of memory for reading '%s', %zd bytes needed\n", pszInFilename, nFileSize);
+ return 100;
+ }
+
+ if (fread(pFileData, 1, nFileSize, f_in) != nFileSize) {
+ free(pFileData);
+ fclose(f_in);
+ fprintf(stderr, "I/O error while reading '%s'\n", pszInFilename);
+ return 100;
+ }
+
+ fclose(f_in);
+
+ if (nOptions & OPT_BACKWARD)
+ do_reverse_buffer(pFileData, nFileSize);
+
+ /* Allocate max compressed size */
+
+ nMaxCompressedSize = apultra_get_max_compressed_size(nFileSize);
+
+ pCompressedData = (unsigned char*)malloc(nMaxCompressedSize + 2048);
+ if (!pCompressedData) {
+ free(pFileData);
+ fprintf(stderr, "out of memory for compressing '%s', %zd bytes needed\n", pszInFilename, nMaxCompressedSize);
+ return 100;
+ }
+
+ memset(pCompressedData + 1024, 0, nMaxCompressedSize);
+
+ long long nBestCompTime = -1;
+
+ size_t nActualCompressedSize = 0;
+ size_t nRightGuardPos = nMaxCompressedSize;
+
+ for (i = 0; i < 5; i++) {
+ unsigned char nGuard = 0x33 + i;
+ int j;
+
+ /* Write guard bytes around the output buffer, to help check for writes outside of it by the compressor */
+ memset(pCompressedData, nGuard, 1024);
+ memset(pCompressedData + 1024 + nRightGuardPos, nGuard, 1024);
+
+ long long t0 = do_get_time();
+ nActualCompressedSize = apultra_compress(pFileData, pCompressedData + 1024, nFileSize, nRightGuardPos, nFlags, nMaxWindowSize, 0 /* dictionary size */, NULL, NULL);
+ long long t1 = do_get_time();
+ if (nActualCompressedSize == -1) {
+ free(pCompressedData);
+ free(pFileData);
+ fprintf(stderr, "compression error\n");
+ return 100;
+ }
+
+ long long nCurDecTime = t1 - t0;
+ if (nBestCompTime == -1 || nBestCompTime > nCurDecTime)
+ nBestCompTime = nCurDecTime;
+
+ /* Check guard bytes before the output buffer */
+ for (j = 0; j < 1024; j++) {
+ if (pCompressedData[j] != nGuard) {
+ free(pCompressedData);
+ free(pFileData);
+ fprintf(stderr, "error, wrote outside of output buffer at %d!\n", j - 1024);
+ return 100;
+ }
+ }
+
+ /* Check guard bytes after the output buffer */
+ for (j = 0; j < 1024; j++) {
+ if (pCompressedData[1024 + nRightGuardPos + j] != nGuard) {
+ free(pCompressedData);
+ free(pFileData);
+ fprintf(stderr, "error, wrote outside of output buffer at %d!\n", j);
+ return 100;
+ }
+ }
+
+ nRightGuardPos = nActualCompressedSize;
+ }
+
+ if (nOptions & OPT_BACKWARD)
+ do_reverse_buffer(pCompressedData + 1024, nActualCompressedSize);
+
+ if (pszOutFilename) {
+ FILE *f_out;
+
+ /* Write whole compressed file out */
+
+ f_out = fopen(pszOutFilename, "wb");
+ if (f_out) {
+ fwrite(pCompressedData + 1024, 1, nActualCompressedSize, f_out);
+ fclose(f_out);
+ }
+ }
+
+ free(pCompressedData);
+ free(pFileData);
+
+ fprintf(stdout, "compressed size: %zd bytes\n", nActualCompressedSize);
+ fprintf(stdout, "compression time: %lld microseconds (%g Mb/s)\n", nBestCompTime, ((double)nActualCompressedSize / 1024.0) / ((double)nBestCompTime / 1000.0));
+
+ return 0;
+}
+
+/*---------------------------------------------------------------------------*/
+
+static int do_dec_benchmark(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions) {
+ size_t nFileSize, nMaxDecompressedSize;
+ unsigned char *pFileData;
+ unsigned char *pDecompressedData;
+ int nFlags = 0;
+ int i;
+
+ if (pszDictionaryFilename) {
+ fprintf(stderr, "in-memory benchmarking does not support dictionaries\n");
+ return 100;
+ }
+
+ /* Read the whole compressed file in memory */
+
+ FILE *f_in = fopen(pszInFilename, "rb");
+ if (!f_in) {
+ fprintf(stderr, "error opening '%s' for reading\n", pszInFilename);
+ return 100;
+ }
+
+ fseek(f_in, 0, SEEK_END);
+ nFileSize = (size_t)ftell(f_in);
+ fseek(f_in, 0, SEEK_SET);
+
+ pFileData = (unsigned char*)malloc(nFileSize);
+ if (!pFileData) {
+ fclose(f_in);
+ fprintf(stderr, "out of memory for reading '%s', %zd bytes needed\n", pszInFilename, nFileSize);
+ return 100;
+ }
+
+ if (fread(pFileData, 1, nFileSize, f_in) != nFileSize) {
+ free(pFileData);
+ fclose(f_in);
+ fprintf(stderr, "I/O error while reading '%s'\n", pszInFilename);
+ return 100;
+ }
+
+ fclose(f_in);
+
+ if (nOptions & OPT_BACKWARD)
+ do_reverse_buffer(pFileData, nFileSize);
+
+ /* Allocate max decompressed size */
+
+ nMaxDecompressedSize = apultra_get_max_decompressed_size(pFileData, nFileSize, nFlags);
+ if (nMaxDecompressedSize == -1) {
+ free(pFileData);
+ fprintf(stderr, "invalid compressed format for file '%s'\n", pszInFilename);
+ return 100;
+ }
+
+ pDecompressedData = (unsigned char*)malloc(nMaxDecompressedSize);
+ if (!pDecompressedData) {
+ free(pFileData);
+ fprintf(stderr, "out of memory for decompressing '%s', %zd bytes needed\n", pszInFilename, nMaxDecompressedSize);
+ return 100;
+ }
+
+ memset(pDecompressedData, 0, nMaxDecompressedSize);
+
+ long long nBestDecTime = -1;
+
+ size_t nActualDecompressedSize = 0;
+ for (i = 0; i < 50; i++) {
+ long long t0 = do_get_time();
+ nActualDecompressedSize = apultra_decompress(pFileData, pDecompressedData, nFileSize, nMaxDecompressedSize, 0 /* dictionary size */, nFlags);
+ long long t1 = do_get_time();
+ if (nActualDecompressedSize == -1) {
+ free(pDecompressedData);
+ free(pFileData);
+ fprintf(stderr, "decompression error\n");
+ return 100;
+ }
+
+ long long nCurDecTime = t1 - t0;
+ if (nBestDecTime == -1 || nBestDecTime > nCurDecTime)
+ nBestDecTime = nCurDecTime;
+ }
+
+ if (nOptions & OPT_BACKWARD)
+ do_reverse_buffer(pDecompressedData, nActualDecompressedSize);
+
+ if (pszOutFilename) {
+ FILE *f_out;
+
+ /* Write whole decompressed file out */
+
+ f_out = fopen(pszOutFilename, "wb");
+ if (f_out) {
+ fwrite(pDecompressedData, 1, nActualDecompressedSize, f_out);
+ fclose(f_out);
+ }
+ }
+
+ free(pDecompressedData);
+ free(pFileData);
+
+ fprintf(stdout, "decompressed size: %zd bytes\n", nActualDecompressedSize);
+ fprintf(stdout, "decompression time: %lld microseconds (%g Mb/s)\n", nBestDecTime, ((double)nActualDecompressedSize / 1024.0) / ((double)nBestDecTime / 1000.0));
+
+ return 0;
+}
+
+/*---------------------------------------------------------------------------*/
+
+int main(int argc, char **argv) {
+ int i;
+ const char *pszInFilename = NULL;
+ const char *pszOutFilename = NULL;
+ const char *pszDictionaryFilename = NULL;
+ int nArgsError = 0;
+ int nCommandDefined = 0;
+ int nVerifyCompression = 0;
+ char cCommand = 'z';
+ unsigned int nOptions = 0;
+ unsigned int nMaxWindowSize = 0;
+
+ for (i = 1; i < argc; i++) {
+ if (!strcmp(argv[i], "-d")) {
+ if (!nCommandDefined) {
+ nCommandDefined = 1;
+ cCommand = 'd';
+ }
+ else
+ nArgsError = 1;
+ }
+ else if (!strcmp(argv[i], "-z")) {
+ if (!nCommandDefined) {
+ nCommandDefined = 1;
+ cCommand = 'z';
+ }
+ else
+ nArgsError = 1;
+ }
+ else if (!strcmp(argv[i], "-c")) {
+ if (!nVerifyCompression) {
+ nVerifyCompression = 1;
+ }
+ else
+ nArgsError = 1;
+ }
+ else if (!strcmp(argv[i], "-cbench")) {
+ if (!nCommandDefined) {
+ nCommandDefined = 1;
+ cCommand = 'B';
+ }
+ else
+ nArgsError = 1;
+ }
+ else if (!strcmp(argv[i], "-dbench")) {
+ if (!nCommandDefined) {
+ nCommandDefined = 1;
+ cCommand = 'b';
+ }
+ else
+ nArgsError = 1;
+ }
+ else if (!strcmp(argv[i], "-test")) {
+ if (!nCommandDefined) {
+ nCommandDefined = 1;
+ cCommand = 't';
+ }
+ else
+ nArgsError = 1;
+ }
+ else if (!strcmp(argv[i], "-quicktest")) {
+ if (!nCommandDefined) {
+ nCommandDefined = 1;
+ cCommand = 'T';
+ }
+ else
+ nArgsError = 1;
+ }
+ else if (!strcmp(argv[i], "-D")) {
+ if (!pszDictionaryFilename && (i + 1) < argc) {
+ pszDictionaryFilename = argv[i + 1];
+ i++;
+ }
+ else
+ nArgsError = 1;
+ }
+ else if (!strncmp(argv[i], "-D", 2)) {
+ if (!pszDictionaryFilename) {
+ pszDictionaryFilename = argv[i] + 2;
+ }
+ else
+ nArgsError = 1;
+ }
+ else if (!strcmp(argv[i], "-v")) {
+ if ((nOptions & OPT_VERBOSE) == 0) {
+ nOptions |= OPT_VERBOSE;
+ }
+ else
+ nArgsError = 1;
+ }
+ else if (!strcmp(argv[i], "-w")) {
+ if (!nMaxWindowSize && (i + 1) < argc) {
+ char *pEnd = NULL;
+ nMaxWindowSize = (int)strtol(argv[i + 1], &pEnd, 10);
+ if (pEnd && pEnd != argv[i + 1] && (nMaxWindowSize >= 16 && nMaxWindowSize <= 0x200000)) {
+ i++;
+ }
+ else {
+ nArgsError = 1;
+ }
+ }
+ else
+ nArgsError = 1;
+ }
+ else if (!strncmp(argv[i], "-w", 2)) {
+ if (!nMaxWindowSize) {
+ char *pEnd = NULL;
+ nMaxWindowSize = (int)strtol(argv[i] + 2, &pEnd, 10);
+ if (!(pEnd && pEnd != (argv[i] + 2) && (nMaxWindowSize >= 16 && nMaxWindowSize <= 0x200000))) {
+ nArgsError = 1;
+ }
+ }
+ else
+ nArgsError = 1;
+ }
+ else if (!strcmp(argv[i], "-stats")) {
+ if ((nOptions & OPT_STATS) == 0) {
+ nOptions |= OPT_STATS;
+ }
+ else
+ nArgsError = 1;
+ }
+ else if (!strcmp(argv[i], "-b")) {
+ if ((nOptions & OPT_BACKWARD) == 0) {
+ nOptions |= OPT_BACKWARD;
+ }
+ else
+ nArgsError = 1;
+ }
+ else {
+ if (!pszInFilename)
+ pszInFilename = argv[i];
+ else {
+ if (!pszOutFilename)
+ pszOutFilename = argv[i];
+ else
+ nArgsError = 1;
+ }
+ }
+ }
+
+ if (!nArgsError && cCommand == 't') {
+ return do_self_test(nOptions, nMaxWindowSize, 0);
+ }
+ else if (!nArgsError && cCommand == 'T') {
+ return do_self_test(nOptions, nMaxWindowSize, 1);
+ }
+
+ if (nArgsError || !pszInFilename || !pszOutFilename) {
+ fprintf(stderr, "apultra command-line tool v" TOOL_VERSION " by Emmanuel Marty and spke\n");
+ fprintf(stderr, "usage: %s [-c] [-d] [-v] [-b] \n", argv[0]);
+ fprintf(stderr, " -c: check resulting stream after compressing\n");
+ fprintf(stderr, " -d: decompress (default: compress)\n");
+ fprintf(stderr, " -b: backwards compression or decompression\n");
+ fprintf(stderr, " -w : maximum window size, in bytes (16..2097152), defaults to maximum\n");
+ fprintf(stderr, " -D : use dictionary file\n");
+ fprintf(stderr, " -cbench: benchmark in-memory compression\n");
+ fprintf(stderr, " -dbench: benchmark in-memory decompression\n");
+ fprintf(stderr, " -test: run full automated self-tests\n");
+ fprintf(stderr, "-quicktest: run quick automated self-tests\n");
+ fprintf(stderr, " -stats: show compressed data stats\n");
+ fprintf(stderr, " -v: be verbose\n");
+ return 100;
+ }
+
+ do_init_time();
+
+ if (cCommand == 'z') {
+ int nResult = do_compress(pszInFilename, pszOutFilename, pszDictionaryFilename, nOptions, nMaxWindowSize);
+ if (nResult == 0 && nVerifyCompression) {
+ return do_compare(pszOutFilename, pszInFilename, pszDictionaryFilename, nOptions);
+ } else {
+ return nResult;
+ }
+ }
+ else if (cCommand == 'd') {
+ return do_decompress(pszInFilename, pszOutFilename, pszDictionaryFilename, nOptions);
+ }
+ else if (cCommand == 'B') {
+ return do_compr_benchmark(pszInFilename, pszOutFilename, pszDictionaryFilename, nOptions, nMaxWindowSize);
+ }
+ else if (cCommand == 'b') {
+ return do_dec_benchmark(pszInFilename, pszOutFilename, pszDictionaryFilename, nOptions);
+ }
+ else {
+ return 100;
+ }
+}
+#endif
diff --git a/tools/z64compress/src/enc/apultra/divsufsort.c b/tools/z64compress/src/enc/apultra/divsufsort.c
new file mode 100644
index 000000000..3a1c75304
--- /dev/null
+++ b/tools/z64compress/src/enc/apultra/divsufsort.c
@@ -0,0 +1,460 @@
+/*
+ * divsufsort.c for libdivsufsort
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "divsufsort_private.h"
+#ifdef _OPENMP
+# include
+#endif
+
+
+/*- Private Functions -*/
+
+/* Sorts suffixes of type B*. */
+static
+saidx_t
+sort_typeBstar(const sauchar_t *T, saidx_t *SA,
+ saidx_t *bucket_A, saidx_t *bucket_B,
+ saidx_t n) {
+ saidx_t *PAb, *ISAb, *buf;
+#ifdef _OPENMP
+ saidx_t *curbuf;
+ saidx_t l;
+#endif
+ saidx_t i, j, k, t, m, bufsize;
+ saint_t c0, c1;
+#ifdef _OPENMP
+ saint_t d0, d1;
+ int tmp;
+#endif
+
+ /* Initialize bucket arrays. */
+ for(i = 0; i < BUCKET_A_SIZE; ++i) { bucket_A[i] = 0; }
+ for(i = 0; i < BUCKET_B_SIZE; ++i) { bucket_B[i] = 0; }
+
+ /* Count the number of occurrences of the first one or two characters of each
+ type A, B and B* suffix. Moreover, store the beginning position of all
+ type B* suffixes into the array SA. */
+ for(i = n - 1, m = n, c0 = T[n - 1]; 0 <= i;) {
+ /* type A suffix. */
+ do { ++BUCKET_A(c1 = c0); } while((0 <= --i) && ((c0 = T[i]) >= c1));
+ if(0 <= i) {
+ /* type B* suffix. */
+ ++BUCKET_BSTAR(c0, c1);
+ SA[--m] = i;
+ /* type B suffix. */
+ for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) {
+ ++BUCKET_B(c0, c1);
+ }
+ }
+ }
+ m = n - m;
+/*
+note:
+ A type B* suffix is lexicographically smaller than a type B suffix that
+ begins with the same first two characters.
+*/
+
+ /* Calculate the index of start/end point of each bucket. */
+ for(c0 = 0, i = 0, j = 0; c0 < ALPHABET_SIZE; ++c0) {
+ t = i + BUCKET_A(c0);
+ BUCKET_A(c0) = i + j; /* start point */
+ i = t + BUCKET_B(c0, c0);
+ for(c1 = c0 + 1; c1 < ALPHABET_SIZE; ++c1) {
+ j += BUCKET_BSTAR(c0, c1);
+ BUCKET_BSTAR(c0, c1) = j; /* end point */
+ i += BUCKET_B(c0, c1);
+ }
+ }
+
+ if(0 < m) {
+ /* Sort the type B* suffixes by their first two characters. */
+ PAb = SA + n - m; ISAb = SA + m;
+ for(i = m - 2; 0 <= i; --i) {
+ t = PAb[i], c0 = T[t], c1 = T[t + 1];
+ SA[--BUCKET_BSTAR(c0, c1)] = i;
+ }
+ t = PAb[m - 1], c0 = T[t], c1 = T[t + 1];
+ SA[--BUCKET_BSTAR(c0, c1)] = m - 1;
+
+ /* Sort the type B* substrings using sssort. */
+#ifdef _OPENMP
+ tmp = omp_get_max_threads();
+ buf = SA + m, bufsize = (n - (2 * m)) / tmp;
+ c0 = ALPHABET_SIZE - 2, c1 = ALPHABET_SIZE - 1, j = m;
+#pragma omp parallel default(shared) private(curbuf, k, l, d0, d1, tmp)
+ {
+ tmp = omp_get_thread_num();
+ curbuf = buf + tmp * bufsize;
+ k = 0;
+ for(;;) {
+ #pragma omp critical(sssort_lock)
+ {
+ if(0 < (l = j)) {
+ d0 = c0, d1 = c1;
+ do {
+ k = BUCKET_BSTAR(d0, d1);
+ if(--d1 <= d0) {
+ d1 = ALPHABET_SIZE - 1;
+ if(--d0 < 0) { break; }
+ }
+ } while(((l - k) <= 1) && (0 < (l = k)));
+ c0 = d0, c1 = d1, j = k;
+ }
+ }
+ if(l == 0) { break; }
+ sssort(T, PAb, SA + k, SA + l,
+ curbuf, bufsize, 2, n, *(SA + k) == (m - 1));
+ }
+ }
+#else
+ buf = SA + m, bufsize = n - (2 * m);
+ for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) {
+ for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) {
+ i = BUCKET_BSTAR(c0, c1);
+ if(1 < (j - i)) {
+ sssort(T, PAb, SA + i, SA + j,
+ buf, bufsize, 2, n, *(SA + i) == (m - 1));
+ }
+ }
+ }
+#endif
+
+ /* Compute ranks of type B* substrings. */
+ for(i = m - 1; 0 <= i; --i) {
+ if(0 <= SA[i]) {
+ j = i;
+ do { ISAb[SA[i]] = i; } while((0 <= --i) && (0 <= SA[i]));
+ SA[i + 1] = i - j;
+ if(i <= 0) { break; }
+ }
+ j = i;
+ do { ISAb[SA[i] = ~SA[i]] = j; } while(SA[--i] < 0);
+ ISAb[SA[i]] = j;
+ }
+
+ /* Construct the inverse suffix array of type B* suffixes using trsort. */
+ trsort(ISAb, SA, m, 1);
+
+ /* Set the sorted order of tyoe B* suffixes. */
+ for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) {
+ for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { }
+ if(0 <= i) {
+ t = i;
+ for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { }
+ SA[ISAb[--j]] = ((t == 0) || (1 < (t - i))) ? t : ~t;
+ }
+ }
+
+ /* Calculate the index of start/end point of each bucket. */
+ BUCKET_B(ALPHABET_SIZE - 1, ALPHABET_SIZE - 1) = n; /* end point */
+ for(c0 = ALPHABET_SIZE - 2, k = m - 1; 0 <= c0; --c0) {
+ i = BUCKET_A(c0 + 1) - 1;
+ for(c1 = ALPHABET_SIZE - 1; c0 < c1; --c1) {
+ t = i - BUCKET_B(c0, c1);
+ BUCKET_B(c0, c1) = i; /* end point */
+
+ /* Move all type B* suffixes to the correct position. */
+ for(i = t, j = BUCKET_BSTAR(c0, c1);
+ j <= k;
+ --i, --k) { SA[i] = SA[k]; }
+ }
+ BUCKET_BSTAR(c0, c0 + 1) = i - BUCKET_B(c0, c0) + 1; /* start point */
+ BUCKET_B(c0, c0) = i; /* end point */
+ }
+ }
+
+ return m;
+}
+
+/* Constructs the suffix array by using the sorted order of type B* suffixes. */
+static
+void
+construct_SA(const sauchar_t *T, saidx_t *SA,
+ saidx_t *bucket_A, saidx_t *bucket_B,
+ saidx_t n, saidx_t m) {
+ saidx_t *i, *j, *k;
+ saidx_t s;
+ saint_t c0, c1, c2;
+
+ if(0 < m) {
+ /* Construct the sorted order of type B suffixes by using
+ the sorted order of type B* suffixes. */
+ for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
+ /* Scan the suffix array from right to left. */
+ for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
+ j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
+ i <= j;
+ --j) {
+ if(0 < (s = *j)) {
+ assert(T[s] == c1);
+ assert(((s + 1) < n) && (T[s] <= T[s + 1]));
+ assert(T[s - 1] <= T[s]);
+ *j = ~s;
+ c0 = T[--s];
+ if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
+ if(c0 != c2) {
+ if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
+ k = SA + BUCKET_B(c2 = c0, c1);
+ }
+ assert(k < j);
+ *k-- = s;
+ } else {
+ assert(((s == 0) && (T[s] == c1)) || (s < 0));
+ *j = ~s;
+ }
+ }
+ }
+ }
+
+ /* Construct the suffix array by using
+ the sorted order of type B suffixes. */
+ k = SA + BUCKET_A(c2 = T[n - 1]);
+ *k++ = (T[n - 2] < c2) ? ~(n - 1) : (n - 1);
+ /* Scan the suffix array from left to right. */
+ for(i = SA, j = SA + n; i < j; ++i) {
+ if(0 < (s = *i)) {
+ assert(T[s - 1] >= T[s]);
+ c0 = T[--s];
+ if((s == 0) || (T[s - 1] < c0)) { s = ~s; }
+ if(c0 != c2) {
+ BUCKET_A(c2) = k - SA;
+ k = SA + BUCKET_A(c2 = c0);
+ }
+ assert(i < k);
+ *k++ = s;
+ } else {
+ assert(s < 0);
+ *i = ~s;
+ }
+ }
+}
+
+#if 0
+/* Constructs the burrows-wheeler transformed string directly
+ by using the sorted order of type B* suffixes. */
+static
+saidx_t
+construct_BWT(const sauchar_t *T, saidx_t *SA,
+ saidx_t *bucket_A, saidx_t *bucket_B,
+ saidx_t n, saidx_t m) {
+ saidx_t *i, *j, *k, *orig;
+ saidx_t s;
+ saint_t c0, c1, c2;
+
+ if(0 < m) {
+ /* Construct the sorted order of type B suffixes by using
+ the sorted order of type B* suffixes. */
+ for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
+ /* Scan the suffix array from right to left. */
+ for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
+ j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
+ i <= j;
+ --j) {
+ if(0 < (s = *j)) {
+ assert(T[s] == c1);
+ assert(((s + 1) < n) && (T[s] <= T[s + 1]));
+ assert(T[s - 1] <= T[s]);
+ c0 = T[--s];
+ *j = ~((saidx_t)c0);
+ if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
+ if(c0 != c2) {
+ if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
+ k = SA + BUCKET_B(c2 = c0, c1);
+ }
+ assert(k < j);
+ *k-- = s;
+ } else if(s != 0) {
+ *j = ~s;
+#ifndef NDEBUG
+ } else {
+ assert(T[s] == c1);
+#endif
+ }
+ }
+ }
+ }
+
+ /* Construct the BWTed string by using
+ the sorted order of type B suffixes. */
+ k = SA + BUCKET_A(c2 = T[n - 1]);
+ *k++ = (T[n - 2] < c2) ? ~((saidx_t)T[n - 2]) : (n - 1);
+ /* Scan the suffix array from left to right. */
+ for(i = SA, j = SA + n, orig = SA; i < j; ++i) {
+ if(0 < (s = *i)) {
+ assert(T[s - 1] >= T[s]);
+ c0 = T[--s];
+ *i = c0;
+ if((0 < s) && (T[s - 1] < c0)) { s = ~((saidx_t)T[s - 1]); }
+ if(c0 != c2) {
+ BUCKET_A(c2) = k - SA;
+ k = SA + BUCKET_A(c2 = c0);
+ }
+ assert(i < k);
+ *k++ = s;
+ } else if(s != 0) {
+ *i = ~s;
+ } else {
+ orig = i;
+ }
+ }
+
+ return orig - SA;
+}
+#endif
+
+/*---------------------------------------------------------------------------*/
+
+/**
+ * Initialize suffix array context
+ *
+ * @return 0 for success, or non-zero in case of an error
+ */
+int divsufsort_init(divsufsort_ctx_t *ctx) {
+ ctx->bucket_A = (saidx_t *)malloc(BUCKET_A_SIZE * sizeof(saidx_t));
+ ctx->bucket_B = NULL;
+
+ if (ctx->bucket_A) {
+ ctx->bucket_B = (saidx_t *)malloc(BUCKET_B_SIZE * sizeof(saidx_t));
+
+ if (ctx->bucket_B)
+ return 0;
+ }
+
+ divsufsort_destroy(ctx);
+ return -1;
+}
+
+/**
+ * Destroy suffix array context
+ *
+ * @param ctx suffix array context to destroy
+ */
+void divsufsort_destroy(divsufsort_ctx_t *ctx) {
+ if (ctx->bucket_B) {
+ free(ctx->bucket_B);
+ ctx->bucket_B = NULL;
+ }
+
+ if (ctx->bucket_A) {
+ free(ctx->bucket_A);
+ ctx->bucket_A = NULL;
+ }
+}
+
+/*- Function -*/
+
+saint_t
+divsufsort_build_array(divsufsort_ctx_t *ctx, const sauchar_t *T, saidx_t *SA, saidx_t n) {
+ saidx_t m;
+ saint_t err = 0;
+
+ /* Check arguments. */
+ if((T == NULL) || (SA == NULL) || (n < 0)) { return -1; }
+ else if(n == 0) { return 0; }
+ else if(n == 1) { SA[0] = 0; return 0; }
+ else if(n == 2) { m = (T[0] < T[1]); SA[m ^ 1] = 0, SA[m] = 1; return 0; }
+
+ /* Suffixsort. */
+ if((ctx->bucket_A != NULL) && (ctx->bucket_B != NULL)) {
+ m = sort_typeBstar(T, SA, ctx->bucket_A, ctx->bucket_B, n);
+ construct_SA(T, SA, ctx->bucket_A, ctx->bucket_B, n, m);
+ } else {
+ err = -2;
+ }
+
+ return err;
+}
+
+#if 0
+saidx_t
+divbwt(const sauchar_t *T, sauchar_t *U, saidx_t *A, saidx_t n) {
+ saidx_t *B;
+ saidx_t *bucket_A, *bucket_B;
+ saidx_t m, pidx, i;
+
+ /* Check arguments. */
+ if((T == NULL) || (U == NULL) || (n < 0)) { return -1; }
+ else if(n <= 1) { if(n == 1) { U[0] = T[0]; } return n; }
+
+ if((B = A) == NULL) { B = (saidx_t *)malloc((size_t)(n + 1) * sizeof(saidx_t)); }
+ bucket_A = (saidx_t *)malloc(BUCKET_A_SIZE * sizeof(saidx_t));
+ bucket_B = (saidx_t *)malloc(BUCKET_B_SIZE * sizeof(saidx_t));
+
+ /* Burrows-Wheeler Transform. */
+ if((B != NULL) && (bucket_A != NULL) && (bucket_B != NULL)) {
+ m = sort_typeBstar(T, B, bucket_A, bucket_B, n);
+ pidx = construct_BWT(T, B, bucket_A, bucket_B, n, m);
+
+ /* Copy to output string. */
+ U[0] = T[n - 1];
+ for(i = 0; i < pidx; ++i) { U[i + 1] = (sauchar_t)B[i]; }
+ for(i += 1; i < n; ++i) { U[i] = (sauchar_t)B[i]; }
+ pidx += 1;
+ } else {
+ pidx = -2;
+ }
+
+ free(bucket_B);
+ free(bucket_A);
+ if(A == NULL) { free(B); }
+
+ return pidx;
+}
+
+const char *
+divsufsort_version(void) {
+ return PROJECT_VERSION_FULL;
+}
+#endif
+
+saint_t
+divsufsort(const sauchar_t *T, saidx_t *SA, saidx_t n) {
+ saidx_t *bucket_A, *bucket_B;
+ saidx_t m;
+ saint_t err = 0;
+
+ /* Check arguments. */
+ if((T == NULL) || (SA == NULL) || (n < 0)) { return -1; }
+ else if(n == 0) { return 0; }
+ else if(n == 1) { SA[0] = 0; return 0; }
+ else if(n == 2) { m = (T[0] < T[1]); SA[m ^ 1] = 0, SA[m] = 1; return 0; }
+
+ bucket_A = (saidx_t *)malloc(BUCKET_A_SIZE * sizeof(saidx_t));
+ bucket_B = (saidx_t *)malloc(BUCKET_B_SIZE * sizeof(saidx_t));
+
+ /* Suffixsort. */
+ if((bucket_A != NULL) && (bucket_B != NULL)) {
+ m = sort_typeBstar(T, SA, bucket_A, bucket_B, n);
+ construct_SA(T, SA, bucket_A, bucket_B, n, m);
+ } else {
+ err = -2;
+ }
+
+ free(bucket_B);
+ free(bucket_A);
+
+ return err;
+}
diff --git a/tools/z64compress/src/enc/apultra/divsufsort.h b/tools/z64compress/src/enc/apultra/divsufsort.h
new file mode 100644
index 000000000..5c617ee73
--- /dev/null
+++ b/tools/z64compress/src/enc/apultra/divsufsort.h
@@ -0,0 +1,192 @@
+/*
+ * divsufsort.h for libdivsufsort
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _DIVSUFSORT_H
+#define _DIVSUFSORT_H 1
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+#define DIVSUFSORT_API
+
+/*- Datatypes -*/
+#ifndef SAUCHAR_T
+#define SAUCHAR_T
+typedef unsigned char sauchar_t;
+#endif /* SAUCHAR_T */
+#ifndef SAINT_T
+#define SAINT_T
+typedef int saint_t;
+#endif /* SAINT_T */
+#ifndef SAIDX_T
+#define SAIDX_T
+typedef int saidx_t;
+#endif /* SAIDX_T */
+#ifndef PRIdSAIDX_T
+#define PRIdSAIDX_T "d"
+#endif
+
+/*- divsufsort context */
+typedef struct _divsufsort_ctx_t {
+ saidx_t *bucket_A;
+ saidx_t *bucket_B;
+} divsufsort_ctx_t;
+
+/*- Prototypes -*/
+
+/**
+ * Initialize suffix array context
+ *
+ * @return 0 for success, or non-zero in case of an error
+ */
+int divsufsort_init(divsufsort_ctx_t *ctx);
+
+/**
+ * Destroy suffix array context
+ *
+ * @param ctx suffix array context to destroy
+ */
+void divsufsort_destroy(divsufsort_ctx_t *ctx);
+
+/**
+ * Constructs the suffix array of a given string.
+ * @param ctx suffix array context
+ * @param T[0..n-1] The input string.
+ * @param SA[0..n-1] The output array of suffixes.
+ * @param n The length of the given string.
+ * @return 0 if no error occurred, -1 or -2 otherwise.
+ */
+DIVSUFSORT_API
+saint_t divsufsort_build_array(divsufsort_ctx_t *ctx, const sauchar_t *T, saidx_t *SA, saidx_t n);
+
+#if 0
+/**
+ * Constructs the burrows-wheeler transformed string of a given string.
+ * @param T[0..n-1] The input string.
+ * @param U[0..n-1] The output string. (can be T)
+ * @param A[0..n-1] The temporary array. (can be NULL)
+ * @param n The length of the given string.
+ * @return The primary index if no error occurred, -1 or -2 otherwise.
+ */
+DIVSUFSORT_API
+saidx_t
+divbwt(const sauchar_t *T, sauchar_t *U, saidx_t *A, saidx_t n);
+
+/**
+ * Returns the version of the divsufsort library.
+ * @return The version number string.
+ */
+DIVSUFSORT_API
+const char *
+divsufsort_version(void);
+
+
+/**
+ * Constructs the burrows-wheeler transformed string of a given string and suffix array.
+ * @param T[0..n-1] The input string.
+ * @param U[0..n-1] The output string. (can be T)
+ * @param SA[0..n-1] The suffix array. (can be NULL)
+ * @param n The length of the given string.
+ * @param idx The output primary index.
+ * @return 0 if no error occurred, -1 or -2 otherwise.
+ */
+DIVSUFSORT_API
+saint_t
+bw_transform(const sauchar_t *T, sauchar_t *U,
+ saidx_t *SA /* can NULL */,
+ saidx_t n, saidx_t *idx);
+
+/**
+ * Inverse BW-transforms a given BWTed string.
+ * @param T[0..n-1] The input string.
+ * @param U[0..n-1] The output string. (can be T)
+ * @param A[0..n-1] The temporary array. (can be NULL)
+ * @param n The length of the given string.
+ * @param idx The primary index.
+ * @return 0 if no error occurred, -1 or -2 otherwise.
+ */
+DIVSUFSORT_API
+saint_t
+inverse_bw_transform(const sauchar_t *T, sauchar_t *U,
+ saidx_t *A /* can NULL */,
+ saidx_t n, saidx_t idx);
+
+/**
+ * Checks the correctness of a given suffix array.
+ * @param T[0..n-1] The input string.
+ * @param SA[0..n-1] The input suffix array.
+ * @param n The length of the given string.
+ * @param verbose The verbose mode.
+ * @return 0 if no error occurred.
+ */
+DIVSUFSORT_API
+saint_t
+sufcheck(const sauchar_t *T, const saidx_t *SA, saidx_t n, saint_t verbose);
+
+/**
+ * Search for the pattern P in the string T.
+ * @param T[0..Tsize-1] The input string.
+ * @param Tsize The length of the given string.
+ * @param P[0..Psize-1] The input pattern string.
+ * @param Psize The length of the given pattern string.
+ * @param SA[0..SAsize-1] The input suffix array.
+ * @param SAsize The length of the given suffix array.
+ * @param idx The output index.
+ * @return The count of matches if no error occurred, -1 otherwise.
+ */
+DIVSUFSORT_API
+saidx_t
+sa_search(const sauchar_t *T, saidx_t Tsize,
+ const sauchar_t *P, saidx_t Psize,
+ const saidx_t *SA, saidx_t SAsize,
+ saidx_t *left);
+
+/**
+ * Search for the character c in the string T.
+ * @param T[0..Tsize-1] The input string.
+ * @param Tsize The length of the given string.
+ * @param SA[0..SAsize-1] The input suffix array.
+ * @param SAsize The length of the given suffix array.
+ * @param c The input character.
+ * @param idx The output index.
+ * @return The count of matches if no error occurred, -1 otherwise.
+ */
+DIVSUFSORT_API
+saidx_t
+sa_simplesearch(const sauchar_t *T, saidx_t Tsize,
+ const saidx_t *SA, saidx_t SAsize,
+ saint_t c, saidx_t *left);
+#endif
+
+saint_t
+divsufsort(const sauchar_t *T, saidx_t *SA, saidx_t n);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif /* __cplusplus */
+
+#endif /* _DIVSUFSORT_H */
diff --git a/tools/z64compress/src/enc/apultra/divsufsort_config.h b/tools/z64compress/src/enc/apultra/divsufsort_config.h
new file mode 100644
index 000000000..f112983cf
--- /dev/null
+++ b/tools/z64compress/src/enc/apultra/divsufsort_config.h
@@ -0,0 +1,9 @@
+#define HAVE_STRING_H 1
+#define HAVE_STDLIB_H 1
+#define HAVE_MEMORY_H 1
+#define HAVE_STDINT_H 1
+#define INLINE inline
+
+#ifdef _MSC_VER
+#pragma warning( disable : 4244 )
+#endif /* _MSC_VER */
diff --git a/tools/z64compress/src/enc/apultra/divsufsort_private.h b/tools/z64compress/src/enc/apultra/divsufsort_private.h
new file mode 100644
index 000000000..b4d97ad4b
--- /dev/null
+++ b/tools/z64compress/src/enc/apultra/divsufsort_private.h
@@ -0,0 +1,205 @@
+/*
+ * divsufsort_private.h for libdivsufsort
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _DIVSUFSORT_PRIVATE_H
+#define _DIVSUFSORT_PRIVATE_H 1
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+#include "divsufsort_config.h"
+#include
+#include
+#if HAVE_STRING_H
+# include
+#endif
+#if HAVE_STDLIB_H
+# include
+#endif
+#if HAVE_MEMORY_H
+# include
+#endif
+#if HAVE_STDDEF_H
+# include
+#endif
+#if HAVE_STRINGS_H
+# include
+#endif
+#if HAVE_INTTYPES_H
+# include
+#else
+# if HAVE_STDINT_H
+# include
+# endif
+#endif
+#if defined(BUILD_DIVSUFSORT64)
+# include "divsufsort64.h"
+# ifndef SAIDX_T
+# define SAIDX_T
+# define saidx_t saidx64_t
+# endif /* SAIDX_T */
+# ifndef PRIdSAIDX_T
+# define PRIdSAIDX_T PRIdSAIDX64_T
+# endif /* PRIdSAIDX_T */
+# define divsufsort divsufsort64
+# define divbwt divbwt64
+# define divsufsort_version divsufsort64_version
+# define bw_transform bw_transform64
+# define inverse_bw_transform inverse_bw_transform64
+# define sufcheck sufcheck64
+# define sa_search sa_search64
+# define sa_simplesearch sa_simplesearch64
+# define sssort sssort64
+# define trsort trsort64
+#else
+# include "divsufsort.h"
+#endif
+
+
+/*- Constants -*/
+#if !defined(UINT8_MAX)
+# define UINT8_MAX (255)
+#endif /* UINT8_MAX */
+#if defined(ALPHABET_SIZE) && (ALPHABET_SIZE < 1)
+# undef ALPHABET_SIZE
+#endif
+#if !defined(ALPHABET_SIZE)
+# define ALPHABET_SIZE (UINT8_MAX + 1)
+#endif
+/* for divsufsort.c */
+#define BUCKET_A_SIZE (ALPHABET_SIZE)
+#define BUCKET_B_SIZE (ALPHABET_SIZE * ALPHABET_SIZE)
+/* for sssort.c */
+#if defined(SS_INSERTIONSORT_THRESHOLD)
+# if SS_INSERTIONSORT_THRESHOLD < 1
+# undef SS_INSERTIONSORT_THRESHOLD
+# define SS_INSERTIONSORT_THRESHOLD (1)
+# endif
+#else
+# define SS_INSERTIONSORT_THRESHOLD (8)
+#endif
+#if defined(SS_BLOCKSIZE)
+# if SS_BLOCKSIZE < 0
+# undef SS_BLOCKSIZE
+# define SS_BLOCKSIZE (0)
+# elif 32768 <= SS_BLOCKSIZE
+# undef SS_BLOCKSIZE
+# define SS_BLOCKSIZE (32767)
+# endif
+#else
+# define SS_BLOCKSIZE (1024)
+#endif
+/* minstacksize = log(SS_BLOCKSIZE) / log(3) * 2 */
+#if SS_BLOCKSIZE == 0
+# if defined(BUILD_DIVSUFSORT64)
+# define SS_MISORT_STACKSIZE (96)
+# else
+# define SS_MISORT_STACKSIZE (64)
+# endif
+#elif SS_BLOCKSIZE <= 4096
+# define SS_MISORT_STACKSIZE (16)
+#else
+# define SS_MISORT_STACKSIZE (24)
+#endif
+#if defined(BUILD_DIVSUFSORT64)
+# define SS_SMERGE_STACKSIZE (64)
+#else
+# define SS_SMERGE_STACKSIZE (32)
+#endif
+/* for trsort.c */
+#define TR_INSERTIONSORT_THRESHOLD (8)
+#if defined(BUILD_DIVSUFSORT64)
+# define TR_STACKSIZE (96)
+#else
+# define TR_STACKSIZE (64)
+#endif
+
+
+/*- Macros -*/
+#ifndef SWAP
+# define SWAP(_a, _b) do { t = (_a); (_a) = (_b); (_b) = t; } while(0)
+#endif /* SWAP */
+#ifndef MIN
+# define MIN(_a, _b) (((_a) < (_b)) ? (_a) : (_b))
+#endif /* MIN */
+#ifndef MAX
+# define MAX(_a, _b) (((_a) > (_b)) ? (_a) : (_b))
+#endif /* MAX */
+#define STACK_PUSH(_a, _b, _c, _d)\
+ do {\
+ assert(ssize < STACK_SIZE);\
+ stack[ssize].a = (_a), stack[ssize].b = (_b),\
+ stack[ssize].c = (_c), stack[ssize++].d = (_d);\
+ } while(0)
+#define STACK_PUSH5(_a, _b, _c, _d, _e)\
+ do {\
+ assert(ssize < STACK_SIZE);\
+ stack[ssize].a = (_a), stack[ssize].b = (_b),\
+ stack[ssize].c = (_c), stack[ssize].d = (_d), stack[ssize++].e = (_e);\
+ } while(0)
+#define STACK_POP(_a, _b, _c, _d)\
+ do {\
+ assert(0 <= ssize);\
+ if(ssize == 0) { return; }\
+ (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
+ (_c) = stack[ssize].c, (_d) = stack[ssize].d;\
+ } while(0)
+#define STACK_POP5(_a, _b, _c, _d, _e)\
+ do {\
+ assert(0 <= ssize);\
+ if(ssize == 0) { return; }\
+ (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
+ (_c) = stack[ssize].c, (_d) = stack[ssize].d, (_e) = stack[ssize].e;\
+ } while(0)
+/* for divsufsort.c */
+#define BUCKET_A(_c0) bucket_A[(_c0)]
+#if ALPHABET_SIZE == 256
+#define BUCKET_B(_c0, _c1) (bucket_B[((_c1) << 8) | (_c0)])
+#define BUCKET_BSTAR(_c0, _c1) (bucket_B[((_c0) << 8) | (_c1)])
+#else
+#define BUCKET_B(_c0, _c1) (bucket_B[(_c1) * ALPHABET_SIZE + (_c0)])
+#define BUCKET_BSTAR(_c0, _c1) (bucket_B[(_c0) * ALPHABET_SIZE + (_c1)])
+#endif
+
+
+/*- Private Prototypes -*/
+/* sssort.c */
+void
+sssort(const sauchar_t *Td, const saidx_t *PA,
+ saidx_t *first, saidx_t *last,
+ saidx_t *buf, saidx_t bufsize,
+ saidx_t depth, saidx_t n, saint_t lastsuffix);
+/* trsort.c */
+void
+trsort(saidx_t *ISA, saidx_t *SA, saidx_t n, saidx_t depth);
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif /* __cplusplus */
+
+#endif /* _DIVSUFSORT_PRIVATE_H */
diff --git a/tools/z64compress/src/enc/apultra/expand.c b/tools/z64compress/src/enc/apultra/expand.c
new file mode 100644
index 000000000..c5ad18229
--- /dev/null
+++ b/tools/z64compress/src/enc/apultra/expand.c
@@ -0,0 +1,396 @@
+/*
+ * expand.c - decompressor implementation
+ *
+ * Copyright (C) 2019 Emmanuel Marty
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty. In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ * claim that you wrote the original software. If you use this software
+ * in a product, an acknowledgment in the product documentation would be
+ * appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ * misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+/*
+ * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
+ *
+ * Inspired by cap by Sven-Åke Dahl. https://github.com/svendahl/cap
+ * Also inspired by Charles Bloom's compression blog. http://cbloomrants.blogspot.com/
+ * With ideas from LZ4 by Yann Collet. https://github.com/lz4/lz4
+ * With help and support from spke
+ *
+ */
+
+#include
+#include
+#include "format.h"
+#include "expand.h"
+#include "libapultra.h"
+
+#ifdef _MSC_VER
+#define FORCE_INLINE __forceinline
+#else /* _MSC_VER */
+#define FORCE_INLINE __attribute__((always_inline))
+#endif /* _MSC_VER */
+
+static inline FORCE_INLINE int apultra_read_bit(const unsigned char **ppInBlock, const unsigned char *pDataEnd, int *nCurBitMask, unsigned char *bits) {
+ const unsigned char *pInBlock = *ppInBlock;
+ int nBit;
+
+ if ((*nCurBitMask) == 0) {
+ if (pInBlock >= pDataEnd) return -1;
+ (*bits) = *pInBlock++;
+ (*nCurBitMask) = 128;
+ }
+
+ nBit = ((*bits) & 128) ? 1 : 0;
+
+ (*bits) <<= 1;
+ (*nCurBitMask) >>= 1;
+
+ *ppInBlock = pInBlock;
+ return nBit;
+}
+
+static inline FORCE_INLINE int apultra_read_gamma2(const unsigned char **ppInBlock, const unsigned char *pDataEnd, int *nCurBitMask, unsigned char *bits) {
+ int bit;
+ unsigned int v = 1;
+
+ do {
+ v = (v << 1) + apultra_read_bit(ppInBlock, pDataEnd, nCurBitMask, bits);
+ bit = apultra_read_bit(ppInBlock, pDataEnd, nCurBitMask, bits);
+ if (bit < 0) return bit;
+ } while (bit);
+
+ return v;
+}
+
+/**
+ * Get maximum decompressed size of compressed data
+ *
+ * @param pInputData compressed data
+ * @param nInputSize compressed size in bytes
+ * @param nFlags compression flags (set to 0)
+ *
+ * @return maximum decompressed size
+ */
+size_t apultra_get_max_decompressed_size(const unsigned char *pInputData, size_t nInputSize, const unsigned int nFlags) {
+ const unsigned char *pInputDataEnd = pInputData + nInputSize;
+ int nCurBitMask = 0;
+ unsigned char bits = 0;
+ int nMatchOffset = -1;
+ int nFollowsLiteral = 3;
+ size_t nDecompressedSize = 0;
+
+ if (pInputData >= pInputDataEnd)
+ return -1;
+ pInputData++;
+ nDecompressedSize++;
+
+ while (1) {
+ int nResult;
+
+ nResult = apultra_read_bit(&pInputData, pInputDataEnd, &nCurBitMask, &bits);
+ if (nResult < 0) return -1;
+
+ if (!nResult) {
+ /* '0': literal */
+ if (pInputData < pInputDataEnd) {
+ pInputData++;
+ nDecompressedSize++;
+ nFollowsLiteral = 3;
+ }
+ else {
+ return -1;
+ }
+ }
+ else {
+ nResult = apultra_read_bit(&pInputData, pInputDataEnd, &nCurBitMask, &bits);
+ if (nResult < 0) return -1;
+
+ if (nResult == 0) {
+ unsigned int nMatchLen;
+
+ /* '10': 8+n bits offset */
+ int nMatchOffsetHi = apultra_read_gamma2(&pInputData, pInputDataEnd, &nCurBitMask, &bits);
+ nMatchOffsetHi -= nFollowsLiteral;
+ if (nMatchOffsetHi >= 0) {
+ nMatchOffset = ((unsigned int) nMatchOffsetHi) << 8;
+ nMatchOffset |= (unsigned int)(*pInputData++);
+
+ nMatchLen = apultra_read_gamma2(&pInputData, pInputDataEnd, &nCurBitMask, &bits);
+
+ if (nMatchOffset < 128 || nMatchOffset >= MINMATCH4_OFFSET)
+ nMatchLen += 2;
+ else if (nMatchOffset >= MINMATCH3_OFFSET)
+ nMatchLen++;
+ }
+ else {
+ /* else rep-match */
+ nMatchLen = apultra_read_gamma2(&pInputData, pInputDataEnd, &nCurBitMask, &bits);
+ }
+
+ nFollowsLiteral = 2;
+
+ nDecompressedSize += nMatchLen;
+ }
+ else {
+ nResult = apultra_read_bit(&pInputData, pInputDataEnd, &nCurBitMask, &bits);
+ if (nResult < 0) return -1;
+
+ if (nResult == 0) {
+ unsigned int nCommand;
+ unsigned int nMatchLen;
+
+ /* '110': 7 bits offset + 1 bit length */
+ nCommand = (unsigned int)(*pInputData++);
+ if (nCommand == 0x00) {
+ /* EOD. No match len follows. */
+ break;
+ }
+
+ /* Bits 7-1: offset; bit 0: length */
+ nMatchOffset = (nCommand >> 1);
+ nMatchLen = (nCommand & 1) + 2;
+
+ nFollowsLiteral = 2;
+ nDecompressedSize += nMatchLen;
+ }
+ else {
+ unsigned int nShortMatchOffset;
+
+ /* '111': 4 bit offset */
+ nResult = apultra_read_bit(&pInputData, pInputDataEnd, &nCurBitMask, &bits);
+ if (nResult < 0) return -1;
+ nShortMatchOffset = nResult << 3;
+
+ nResult = apultra_read_bit(&pInputData, pInputDataEnd, &nCurBitMask, &bits);
+ if (nResult < 0) return -1;
+ nShortMatchOffset |= nResult << 2;
+
+ nResult = apultra_read_bit(&pInputData, pInputDataEnd, &nCurBitMask, &bits);
+ if (nResult < 0) return -1;
+ nShortMatchOffset |= nResult << 1;
+
+ nResult = apultra_read_bit(&pInputData, pInputDataEnd, &nCurBitMask, &bits);
+ if (nResult < 0) return -1;
+ nShortMatchOffset |= nResult << 0;
+
+ nFollowsLiteral = 3;
+ nDecompressedSize++;
+ }
+ }
+ }
+ }
+
+ return nDecompressedSize;
+}
+
+/**
+ * Decompress data in memory
+ *
+ * @param pInputData compressed data
+ * @param pOutBuffer buffer for decompressed data
+ * @param nInputSize compressed size in bytes
+ * @param nMaxOutBufferSize maximum capacity of decompression buffer
+ * @param nDictionarySize size of dictionary in front of input data (0 for none)
+ * @param nFlags compression flags (set to 0)
+ *
+ * @return actual decompressed size, or -1 for error
+ */
+size_t apultra_decompress(const unsigned char *pInputData, unsigned char *pOutData, size_t nInputSize, size_t nMaxOutBufferSize, size_t nDictionarySize, const unsigned int nFlags) {
+ const unsigned char *pInputDataEnd = pInputData + nInputSize;
+ unsigned char *pCurOutData = pOutData + nDictionarySize;
+ const unsigned char *pOutDataEnd = pCurOutData + nMaxOutBufferSize;
+ const unsigned char *pOutDataFastEnd = pOutDataEnd - 20;
+ int nCurBitMask = 0;
+ unsigned char bits = 0;
+ int nMatchOffset = -1;
+ int nFollowsLiteral = 3;
+
+ if (pInputData >= pInputDataEnd && pCurOutData < pOutDataEnd)
+ return -1;
+ *pCurOutData++ = *pInputData++;
+
+ while (1) {
+ int nResult;
+
+ nResult = apultra_read_bit(&pInputData, pInputDataEnd, &nCurBitMask, &bits);
+ if (nResult < 0) return -1;
+
+ if (!nResult) {
+ /* '0': literal */
+ if (pInputData < pInputDataEnd && pCurOutData < pOutDataEnd) {
+ *pCurOutData++ = *pInputData++;
+ nFollowsLiteral = 3;
+ }
+ else {
+ return -1;
+ }
+ }
+ else {
+ nResult = apultra_read_bit(&pInputData, pInputDataEnd, &nCurBitMask, &bits);
+ if (nResult < 0) return -1;
+
+ if (nResult == 0) {
+ unsigned int nMatchLen;
+
+ /* '10': 8+n bits offset */
+ int nMatchOffsetHi = apultra_read_gamma2(&pInputData, pInputDataEnd, &nCurBitMask, &bits);
+ nMatchOffsetHi -= nFollowsLiteral;
+ if (nMatchOffsetHi >= 0) {
+ nMatchOffset = ((unsigned int) nMatchOffsetHi) << 8;
+ nMatchOffset |= (unsigned int)(*pInputData++);
+
+ nMatchLen = apultra_read_gamma2(&pInputData, pInputDataEnd, &nCurBitMask, &bits);
+
+ if (nMatchOffset < 128 || nMatchOffset >= MINMATCH4_OFFSET)
+ nMatchLen += 2;
+ else if (nMatchOffset >= MINMATCH3_OFFSET)
+ nMatchLen++;
+ }
+ else {
+ /* else rep-match */
+ nMatchLen = apultra_read_gamma2(&pInputData, pInputDataEnd, &nCurBitMask, &bits);
+ }
+
+ nFollowsLiteral = 2;
+ const unsigned char *pSrc = pCurOutData - nMatchOffset;
+ if (pSrc >= pOutData && (pSrc + nMatchLen) <= pOutDataEnd) {
+ if (nMatchLen < 11 && nMatchOffset >= 8 && pCurOutData < pOutDataFastEnd) {
+ memcpy(pCurOutData, pSrc, 8);
+ memcpy(pCurOutData + 8, pSrc + 8, 2);
+ pCurOutData += nMatchLen;
+ }
+ else {
+ if ((pCurOutData + nMatchLen) <= pOutDataEnd) {
+ /* Do a deterministic, left to right byte copy instead of memcpy() so as to handle overlaps */
+
+ if (nMatchOffset >= 16 && (pCurOutData + nMatchLen) < (pOutDataFastEnd - 15)) {
+ const unsigned char *pCopySrc = pSrc;
+ unsigned char *pCopyDst = pCurOutData;
+ const unsigned char *pCopyEndDst = pCurOutData + nMatchLen;
+
+ do {
+ memcpy(pCopyDst, pCopySrc, 16);
+ pCopySrc += 16;
+ pCopyDst += 16;
+ } while (pCopyDst < pCopyEndDst);
+
+ pCurOutData += nMatchLen;
+ }
+ else {
+ while (nMatchLen) {
+ *pCurOutData++ = *pSrc++;
+ nMatchLen--;
+ }
+ }
+ }
+ else {
+ return -1;
+ }
+ }
+ }
+ else {
+ return -1;
+ }
+ }
+ else {
+ nResult = apultra_read_bit(&pInputData, pInputDataEnd, &nCurBitMask, &bits);
+ if (nResult < 0) return -1;
+
+ if (nResult == 0) {
+ unsigned int nCommand;
+ unsigned int nMatchLen;
+
+ /* '110': 7 bits offset + 1 bit length */
+ nCommand = (unsigned int)(*pInputData++);
+ if (nCommand == 0x00) {
+ /* EOD. No match len follows. */
+ break;
+ }
+
+ /* Bits 7-1: offset; bit 0: length */
+ nMatchOffset = (nCommand >> 1);
+ nMatchLen = (nCommand & 1) + 2;
+
+ nFollowsLiteral = 2;
+ const unsigned char *pSrc = pCurOutData - nMatchOffset;
+ if (pSrc >= pOutData && (pSrc + nMatchLen) <= pOutDataEnd) {
+ if (nMatchOffset >= 8 && pCurOutData < pOutDataFastEnd) {
+ memcpy(pCurOutData, pSrc, 8);
+ memcpy(pCurOutData + 8, pSrc + 8, 2);
+ pCurOutData += nMatchLen;
+ }
+ else {
+ if ((pCurOutData + nMatchLen) <= pOutDataEnd) {
+ while (nMatchLen) {
+ *pCurOutData++ = *pSrc++;
+ nMatchLen--;
+ }
+ }
+ else {
+ return -1;
+ }
+ }
+ }
+ else {
+ return -1;
+ }
+ }
+ else {
+ unsigned int nShortMatchOffset;
+
+ /* '111': 4 bit offset */
+ nResult = apultra_read_bit(&pInputData, pInputDataEnd, &nCurBitMask, &bits);
+ if (nResult < 0) return -1;
+ nShortMatchOffset = nResult << 3;
+
+ nResult = apultra_read_bit(&pInputData, pInputDataEnd, &nCurBitMask, &bits);
+ if (nResult < 0) return -1;
+ nShortMatchOffset |= nResult << 2;
+
+ nResult = apultra_read_bit(&pInputData, pInputDataEnd, &nCurBitMask, &bits);
+ if (nResult < 0) return -1;
+ nShortMatchOffset |= nResult << 1;
+
+ nResult = apultra_read_bit(&pInputData, pInputDataEnd, &nCurBitMask, &bits);
+ if (nResult < 0) return -1;
+ nShortMatchOffset |= nResult << 0;
+
+ nFollowsLiteral = 3;
+ if (nShortMatchOffset) {
+ /* Short offset, 1-15 */
+ const unsigned char *pSrc = pCurOutData - nShortMatchOffset;
+ if (pSrc >= pOutData && (pCurOutData + 1) <= pOutDataEnd && (pSrc + 1) <= pOutDataEnd) {
+ *pCurOutData++ = *pSrc++;
+ }
+ else {
+ return -1;
+ }
+ }
+ else {
+ /* Write zero */
+ if ((pCurOutData + 1) <= pOutDataEnd) {
+ *pCurOutData++ = 0;
+ }
+ else {
+ return -1;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ return (size_t)(pCurOutData - pOutData) - nDictionarySize;
+}
diff --git a/tools/z64compress/src/enc/apultra/expand.h b/tools/z64compress/src/enc/apultra/expand.h
new file mode 100644
index 000000000..9cd658ad8
--- /dev/null
+++ b/tools/z64compress/src/enc/apultra/expand.h
@@ -0,0 +1,71 @@
+/*
+ * expand.h - decompressor definitions
+ *
+ * Copyright (C) 2019 Emmanuel Marty
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty. In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ * claim that you wrote the original software. If you use this software
+ * in a product, an acknowledgment in the product documentation would be
+ * appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ * misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+/*
+ * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
+ *
+ * Inspired by cap by Sven-Åke Dahl. https://github.com/svendahl/cap
+ * Also inspired by Charles Bloom's compression blog. http://cbloomrants.blogspot.com/
+ * With ideas from LZ4 by Yann Collet. https://github.com/lz4/lz4
+ * With help and support from spke
+ *
+ */
+
+#ifndef _EXPAND_H
+#define _EXPAND_H
+
+#include
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Get maximum decompressed size of compressed data
+ *
+ * @param pInputData compressed data
+ * @param nInputSize compressed size in bytes
+ * @param nFlags compression flags (set to 0)
+ *
+ * @return maximum decompressed size
+ */
+size_t apultra_get_max_decompressed_size(const unsigned char *pInputData, size_t nInputSize, const unsigned int nFlags);
+
+/**
+ * Decompress data in memory
+ *
+ * @param pInputData compressed data
+ * @param pOutBuffer buffer for decompressed data
+ * @param nInputSize compressed size in bytes
+ * @param nMaxOutBufferSize maximum capacity of decompression buffer
+ * @param nDictionarySize size of dictionary in front of input data (0 for none)
+ * @param nFlags compression flags (set to 0)
+ *
+ * @return actual decompressed size, or -1 for error
+ */
+size_t apultra_decompress(const unsigned char *pInputData, unsigned char *pOutBuffer, size_t nInputSize, size_t nMaxOutBufferSize, size_t nDictionarySize, const unsigned int nFlags);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _EXPAND_H */
diff --git a/tools/z64compress/src/enc/apultra/format.h b/tools/z64compress/src/enc/apultra/format.h
new file mode 100644
index 000000000..1e280c1b3
--- /dev/null
+++ b/tools/z64compress/src/enc/apultra/format.h
@@ -0,0 +1,47 @@
+/*
+ * format.h - byte stream format definitions
+ *
+ * Copyright (C) 2019 Emmanuel Marty
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty. In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ * claim that you wrote the original software. If you use this software
+ * in a product, an acknowledgment in the product documentation would be
+ * appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ * misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+/*
+ * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
+ *
+ * Inspired by cap by Sven-Åke Dahl. https://github.com/svendahl/cap
+ * Also inspired by Charles Bloom's compression blog. http://cbloomrants.blogspot.com/
+ * With ideas from LZ4 by Yann Collet. https://github.com/lz4/lz4
+ * With help and support from spke
+ *
+ */
+
+#ifndef _FORMAT_H
+#define _FORMAT_H
+
+#define MIN_OFFSET 1
+#define MAX_OFFSET 0x1fffff
+
+#define MAX_VARLEN 0x1fffff
+
+#define BLOCK_SIZE 0x100000
+
+#define MIN_MATCH_SIZE 1
+#define MINMATCH3_OFFSET 1280
+#define MINMATCH4_OFFSET 32000
+
+#endif /* _FORMAT_H */
diff --git a/tools/z64compress/src/enc/apultra/libapultra.h b/tools/z64compress/src/enc/apultra/libapultra.h
new file mode 100644
index 000000000..36fd29555
--- /dev/null
+++ b/tools/z64compress/src/enc/apultra/libapultra.h
@@ -0,0 +1,40 @@
+/*
+ * libapultra.h - library definitions
+ *
+ * Copyright (C) 2019 Emmanuel Marty
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty. In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ * claim that you wrote the original software. If you use this software
+ * in a product, an acknowledgment in the product documentation would be
+ * appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ * misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+/*
+ * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
+ *
+ * Inspired by cap by Sven-Åke Dahl. https://github.com/svendahl/cap
+ * Also inspired by Charles Bloom's compression blog. http://cbloomrants.blogspot.com/
+ * With ideas from LZ4 by Yann Collet. https://github.com/lz4/lz4
+ * With help and support from spke
+ *
+ */
+
+#ifndef _LIB_APULTRA_H
+#define _LIB_APULTRA_H
+
+#include "format.h"
+#include "shrink.h"
+#include "expand.h"
+
+#endif /* _LIB_APULTRA_H */
diff --git a/tools/z64compress/src/enc/apultra/matchfinder.c b/tools/z64compress/src/enc/apultra/matchfinder.c
new file mode 100644
index 000000000..8d7802a52
--- /dev/null
+++ b/tools/z64compress/src/enc/apultra/matchfinder.c
@@ -0,0 +1,449 @@
+/*
+ * matchfinder.c - LZ match finder implementation
+ *
+ * The following copying information applies to this specific source code file:
+ *
+ * Written in 2019 by Emmanuel Marty
+ * Portions written in 2014-2015 by Eric Biggers
+ *
+ * To the extent possible under law, the author(s) have dedicated all copyright
+ * and related and neighboring rights to this software to the public domain
+ * worldwide via the Creative Commons Zero 1.0 Universal Public Domain
+ * Dedication (the "CC0").
+ *
+ * This software is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the CC0 for more details.
+ *
+ * You should have received a copy of the CC0 along with this software; if not
+ * see .
+ */
+
+/*
+ * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
+ *
+ * Inspired by cap by Sven-Åke Dahl. https://github.com/svendahl/cap
+ * Also inspired by Charles Bloom's compression blog. http://cbloomrants.blogspot.com/
+ * With ideas from LZ4 by Yann Collet. https://github.com/lz4/lz4
+ * With help and support from spke
+ *
+ */
+
+#include
+#include
+#include "matchfinder.h"
+#include "format.h"
+#include "libapultra.h"
+
+/**
+ * Hash index into TAG_BITS
+ *
+ * @param nIndex index value
+ *
+ * @return hash
+ */
+static inline int apultra_get_index_tag(unsigned int nIndex) {
+ return (int)(((unsigned long long)nIndex * 11400714819323198485ULL) >> (64ULL - TAG_BITS));
+}
+
+/**
+ * Parse input data, build suffix array and overlaid data structures to speed up match finding
+ *
+ * @param pCompressor compression context
+ * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
+ * @param nInWindowSize total input size in bytes (previously compressed bytes + bytes to compress)
+ *
+ * @return 0 for success, non-zero for failure
+ */
+int apultra_build_suffix_array(apultra_compressor *pCompressor, const unsigned char *pInWindow, const int nInWindowSize) {
+ unsigned long long *intervals = pCompressor->intervals;
+
+ /* Build suffix array from input data */
+ saidx_t *suffixArray = (saidx_t*)intervals;
+ if (divsufsort_build_array(&pCompressor->divsufsort_context, pInWindow, suffixArray, nInWindowSize) != 0) {
+ return 100;
+ }
+
+ int i, r;
+
+ for (i = nInWindowSize - 1; i >= 0; i--) {
+ intervals[i] = suffixArray[i];
+ }
+
+ int *PLCP = (int*)pCompressor->pos_data; /* Use temporarily */
+ int *Phi = PLCP;
+ int nCurLen = 0;
+
+ /* Compute the permuted LCP first (Kärkkäinen method) */
+ Phi[intervals[0]] = -1;
+ for (i = 1; i < nInWindowSize; i++)
+ Phi[intervals[i]] = (unsigned int)intervals[i - 1];
+ for (i = 0; i < nInWindowSize; i++) {
+ if (Phi[i] == -1) {
+ PLCP[i] = 0;
+ continue;
+ }
+ int nMaxLen = (i > Phi[i]) ? (nInWindowSize - i) : (nInWindowSize - Phi[i]);
+ while (nCurLen < nMaxLen && pInWindow[i + nCurLen] == pInWindow[Phi[i] + nCurLen]) nCurLen++;
+ PLCP[i] = nCurLen;
+ if (nCurLen > 0)
+ nCurLen--;
+ }
+
+ /* Rotate permuted LCP into the LCP. This has better cache locality than the direct Kasai LCP method. This also
+ * saves us from having to build the inverse suffix array index, as the LCP is calculated without it using this method,
+ * and the interval builder below doesn't need it either. */
+ intervals[0] &= POS_MASK;
+
+ for (i = 1; i < nInWindowSize; i++) {
+ int nIndex = (int)(intervals[i] & POS_MASK);
+ int nLen = PLCP[nIndex];
+ if (nLen < MIN_MATCH_SIZE)
+ nLen = 0;
+ if (nLen > LCP_MAX)
+ nLen = LCP_MAX;
+ int nTaggedLen = 0;
+ if (nLen)
+ nTaggedLen = (nLen << TAG_BITS) | (apultra_get_index_tag((unsigned int)nIndex) & ((1 << TAG_BITS) - 1));
+ intervals[i] = ((unsigned long long)nIndex) | (((unsigned long long)nTaggedLen) << LCP_SHIFT);
+ }
+
+ /**
+ * Build intervals for finding matches
+ *
+ * Methodology and code fragment taken from wimlib (CC0 license):
+ * https://wimlib.net/git/?p=wimlib;a=blob_plain;f=src/lcpit_matchfinder.c;h=a2d6a1e0cd95200d1f3a5464d8359d5736b14cbe;hb=HEAD
+ */
+ unsigned long long * const SA_and_LCP = intervals;
+ unsigned long long *pos_data = pCompressor->pos_data;
+ unsigned long long next_interval_idx;
+ unsigned long long *top = pCompressor->open_intervals;
+ unsigned long long prev_pos = SA_and_LCP[0] & POS_MASK;
+
+ *top = 0;
+ intervals[0] = 0;
+ next_interval_idx = 1;
+
+ for (r = 1; r < nInWindowSize; r++) {
+ const unsigned long long next_pos = SA_and_LCP[r] & POS_MASK;
+ const unsigned long long next_lcp = SA_and_LCP[r] & LCP_MASK;
+ const unsigned long long top_lcp = *top & LCP_MASK;
+
+ if (next_lcp == top_lcp) {
+ /* Continuing the deepest open interval */
+ pos_data[prev_pos] = *top;
+ }
+ else if (next_lcp > top_lcp) {
+ /* Opening a new interval */
+ *++top = next_lcp | next_interval_idx++;
+ pos_data[prev_pos] = *top;
+ }
+ else {
+ /* Closing the deepest open interval */
+ pos_data[prev_pos] = *top;
+ for (;;) {
+ const unsigned long long closed_interval_idx = *top-- & POS_MASK;
+ const unsigned long long superinterval_lcp = *top & LCP_MASK;
+
+ if (next_lcp == superinterval_lcp) {
+ /* Continuing the superinterval */
+ intervals[closed_interval_idx] = *top;
+ break;
+ }
+ else if (next_lcp > superinterval_lcp) {
+ /* Creating a new interval that is a
+ * superinterval of the one being
+ * closed, but still a subinterval of
+ * its superinterval */
+ *++top = next_lcp | next_interval_idx++;
+ intervals[closed_interval_idx] = *top;
+ break;
+ }
+ else {
+ /* Also closing the superinterval */
+ intervals[closed_interval_idx] = *top;
+ }
+ }
+ }
+ prev_pos = next_pos;
+ }
+
+ /* Close any still-open intervals. */
+ pos_data[prev_pos] = *top;
+ for (; top > pCompressor->open_intervals; top--)
+ intervals[*top & POS_MASK] = *(top - 1);
+
+ /* Success */
+ return 0;
+}
+
+/**
+ * Find matches at the specified offset in the input window
+ *
+ * @param pCompressor compression context
+ * @param nOffset offset to find matches at, in the input window
+ * @param pMatches pointer to returned matches
+ * @param pMatchDepth pointer to returned match depths
+ * @param pMatch1 pointer to 1-byte length, 4 bit offset match
+ * @param nMaxMatches maximum number of matches to return (0 for none)
+ * @param nBlockFlags bit 0: 1 for first block, 0 otherwise; bit 1: 1 for last block, 0 otherwise
+ *
+ * @return number of matches
+ */
+int apultra_find_matches_at(apultra_compressor *pCompressor, const int nOffset, apultra_match *pMatches, unsigned short *pMatchDepth, unsigned char *pMatch1, const int nMaxMatches, const int nBlockFlags) {
+ unsigned long long *intervals = pCompressor->intervals;
+ unsigned long long *pos_data = pCompressor->pos_data;
+ unsigned long long ref;
+ unsigned long long super_ref;
+ unsigned long long match_pos;
+ apultra_match *matchptr;
+ unsigned short *depthptr;
+ const int nMaxOffset = pCompressor->max_offset;
+
+ *pMatch1 = 0;
+
+ /**
+ * Find matches using intervals
+ *
+ * Taken from wimlib (CC0 license):
+ * https://wimlib.net/git/?p=wimlib;a=blob_plain;f=src/lcpit_matchfinder.c;h=a2d6a1e0cd95200d1f3a5464d8359d5736b14cbe;hb=HEAD
+ */
+
+ /* Get the deepest lcp-interval containing the current suffix. */
+ ref = pos_data[nOffset];
+
+ pos_data[nOffset] = 0;
+
+ /* Ascend until we reach a visited interval, the root, or a child of the
+ * root. Link unvisited intervals to the current suffix as we go. */
+ while ((super_ref = intervals[ref & POS_MASK]) & LCP_MASK) {
+ intervals[ref & POS_MASK] = nOffset | VISITED_FLAG;
+ ref = super_ref;
+ }
+
+ if (super_ref == 0) {
+ /* In this case, the current interval may be any of:
+ * (1) the root;
+ * (2) an unvisited child of the root */
+
+ if (ref != 0) /* Not the root? */
+ intervals[ref & POS_MASK] = nOffset | VISITED_FLAG;
+ return 0;
+ }
+
+ /* Ascend indirectly via pos_data[] links. */
+ match_pos = super_ref & EXCL_VISITED_MASK;
+ matchptr = pMatches;
+ depthptr = pMatchDepth;
+ int nPrevOffset = 0;
+ int nPrevLen = 0;
+ int nCurDepth = 0;
+ unsigned short *cur_depth = NULL;
+
+ if (nOffset >= match_pos && (nBlockFlags & 3) == 3) {
+ int nMatchOffset = (int)(nOffset - match_pos);
+ int nMatchLen = (int)(ref >> (LCP_SHIFT + TAG_BITS));
+
+ if ((matchptr - pMatches) < nMaxMatches) {
+ if (nMatchOffset <= nMaxOffset) {
+ if (nPrevOffset && nPrevLen > 2 && nMatchOffset == (nPrevOffset - 1) && nMatchLen == (nPrevLen - 1) && cur_depth && nCurDepth < LCP_MAX) {
+ nCurDepth++;
+ *cur_depth = nCurDepth;
+ }
+ else {
+ nCurDepth = 0;
+
+ cur_depth = depthptr;
+ matchptr->length = nMatchLen;
+ matchptr->offset = nMatchOffset;
+ *depthptr = 0;
+ matchptr++;
+ depthptr++;
+ }
+
+ nPrevLen = nMatchLen;
+ nPrevOffset = nMatchOffset;
+ }
+ }
+ }
+
+ for (;;) {
+ if ((super_ref = pos_data[match_pos]) > ref) {
+ match_pos = intervals[super_ref & POS_MASK] & EXCL_VISITED_MASK;
+
+ if (nOffset >= match_pos && (nBlockFlags & 3) == 3) {
+ int nMatchOffset = (int)(nOffset - match_pos);
+ int nMatchLen = (int)(ref >> (LCP_SHIFT + TAG_BITS));
+
+ if ((matchptr - pMatches) < nMaxMatches) {
+ if (nMatchOffset <= nMaxOffset && abs(nMatchOffset - nPrevOffset) >= 128) {
+ if (nPrevOffset && nPrevLen > 2 && nMatchOffset == (nPrevOffset - 1) && nMatchLen == (nPrevLen - 1) && cur_depth && nCurDepth < LCP_MAX) {
+ nCurDepth++;
+ *cur_depth = nCurDepth | 0x8000;
+ }
+ else {
+ nCurDepth = 0;
+
+ cur_depth = depthptr;
+ matchptr->length = nMatchLen;
+ matchptr->offset = nMatchOffset;
+ *depthptr = 0x8000;
+ matchptr++;
+ depthptr++;
+ }
+
+ nPrevLen = nMatchLen;
+ nPrevOffset = nMatchOffset;
+ }
+ }
+ }
+ }
+
+ while ((super_ref = pos_data[match_pos]) > ref) {
+ match_pos = intervals[super_ref & POS_MASK] & EXCL_VISITED_MASK;
+
+ if (nOffset > match_pos && (nBlockFlags & 3) == 3) {
+ int nMatchOffset = (int)(nOffset - match_pos);
+ int nMatchLen = (int)(ref >> (LCP_SHIFT + TAG_BITS));
+
+ if ((matchptr - pMatches) < nMaxMatches) {
+ if (nMatchOffset <= nMaxOffset && (nMatchLen >= 3 || (nMatchLen >= 2 && (matchptr - pMatches) < (nMaxMatches - 1))) && nMatchLen < 1280 && abs(nMatchOffset - nPrevOffset) >= 128) {
+ if (nPrevOffset && nPrevLen > 2 && nMatchOffset == (nPrevOffset - 1) && nMatchLen == (nPrevLen - 1) && cur_depth && nCurDepth < LCP_MAX) {
+ nCurDepth++;
+ *cur_depth = nCurDepth | 0x8000;
+ }
+ else {
+ nCurDepth = 0;
+
+ cur_depth = depthptr;
+ matchptr->length = nMatchLen;
+ matchptr->offset = nMatchOffset;
+ *depthptr = 0x8000;
+ matchptr++;
+ depthptr++;
+ }
+
+ nPrevLen = nMatchLen;
+ nPrevOffset = nMatchOffset;
+ }
+ }
+ }
+ }
+
+ intervals[ref & POS_MASK] = nOffset | VISITED_FLAG;
+ pos_data[match_pos] = (unsigned long long)ref;
+
+ int nMatchOffset = (int)(nOffset - match_pos);
+ int nMatchLen = (int)(ref >> (LCP_SHIFT + TAG_BITS));
+
+ if ((matchptr - pMatches) < nMaxMatches) {
+ if (nMatchOffset <= nMaxOffset && nMatchOffset != nPrevOffset) {
+ if (nPrevOffset && nPrevLen > 2 && nMatchOffset == (nPrevOffset - 1) && nMatchLen == (nPrevLen - 1) && cur_depth && nCurDepth < LCP_MAX) {
+ nCurDepth++;
+ *cur_depth = nCurDepth;
+ }
+ else {
+ nCurDepth = 0;
+
+ cur_depth = depthptr;
+ matchptr->length = nMatchLen;
+ matchptr->offset = nMatchOffset;
+ *depthptr = 0;
+ matchptr++;
+ depthptr++;
+ }
+
+ nPrevLen = nMatchLen;
+ nPrevOffset = nMatchOffset;
+ }
+ }
+
+ if (nMatchOffset && nMatchOffset < 16 && nMatchLen)
+ *pMatch1 = nMatchOffset;
+
+ if (super_ref == 0)
+ break;
+ ref = super_ref;
+ match_pos = intervals[ref & POS_MASK] & EXCL_VISITED_MASK;
+
+ if (nOffset > match_pos && (nBlockFlags & 3) == 3) {
+ int nMatchOffset = (int)(nOffset - match_pos);
+ int nMatchLen = (int)(ref >> (LCP_SHIFT + TAG_BITS));
+
+ if ((matchptr - pMatches) < nMaxMatches) {
+ if (nMatchOffset <= nMaxOffset && nMatchLen >= 2 && abs(nMatchOffset - nPrevOffset) >= 128) {
+ if (nPrevOffset && nPrevLen > 2 && nMatchOffset == (nPrevOffset - 1) && nMatchLen == (nPrevLen - 1) && cur_depth && nCurDepth < LCP_MAX) {
+ nCurDepth++;
+ *cur_depth = nCurDepth | 0x8000;
+ }
+ else {
+ nCurDepth = 0;
+
+ cur_depth = depthptr;
+ matchptr->length = nMatchLen;
+ matchptr->offset = nMatchOffset;
+ *depthptr = 0x8000;
+ matchptr++;
+ depthptr++;
+ }
+
+ nPrevLen = nMatchLen;
+ nPrevOffset = nMatchOffset;
+ }
+ }
+ }
+ }
+
+ return (int)(matchptr - pMatches);
+}
+
+/**
+ * Skip previously compressed bytes
+ *
+ * @param pCompressor compression context
+ * @param nStartOffset current offset in input window (typically 0)
+ * @param nEndOffset offset to skip to in input window (typically the number of previously compressed bytes)
+ */
+void apultra_skip_matches(apultra_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
+ apultra_match match;
+ unsigned short depth;
+ unsigned char match1;
+ int i;
+
+ /* Skipping still requires scanning for matches, as this also performs a lazy update of the intervals. However,
+ * we don't store the matches. */
+ for (i = nStartOffset; i < nEndOffset; i++) {
+ apultra_find_matches_at(pCompressor, i, &match, &depth, &match1, 0, 0);
+ }
+}
+
+/**
+ * Find all matches for the data to be compressed
+ *
+ * @param pCompressor compression context
+ * @param nMatchesPerOffset maximum number of matches to store for each offset
+ * @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
+ * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
+ * @param nBlockFlags bit 0: 1 for first block, 0 otherwise; bit 1: 1 for last block, 0 otherwise
+ */
+void apultra_find_all_matches(apultra_compressor *pCompressor, const int nMatchesPerOffset, const int nStartOffset, const int nEndOffset, const int nBlockFlags) {
+ apultra_match *pMatch = pCompressor->match;
+ unsigned short *pMatchDepth = pCompressor->match_depth;
+ unsigned char *pMatch1 = pCompressor->match1;
+ int i;
+
+ for (i = nStartOffset; i < nEndOffset; i++) {
+ int nMatches = apultra_find_matches_at(pCompressor, i, pMatch, pMatchDepth, pMatch1, nMatchesPerOffset, nBlockFlags);
+
+ while (nMatches < nMatchesPerOffset) {
+ pMatch[nMatches].length = 0;
+ pMatch[nMatches].offset = 0;
+ pMatchDepth[nMatches] = 0;
+ nMatches++;
+ }
+
+ pMatch += nMatchesPerOffset;
+ pMatchDepth += nMatchesPerOffset;
+ pMatch1++;
+ }
+}
diff --git a/tools/z64compress/src/enc/apultra/matchfinder.h b/tools/z64compress/src/enc/apultra/matchfinder.h
new file mode 100644
index 000000000..4a6935435
--- /dev/null
+++ b/tools/z64compress/src/enc/apultra/matchfinder.h
@@ -0,0 +1,94 @@
+/*
+ * matchfinder.h - LZ match finder definitions
+ *
+ * Copyright (C) 2019 Emmanuel Marty
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty. In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ * claim that you wrote the original software. If you use this software
+ * in a product, an acknowledgment in the product documentation would be
+ * appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ * misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+/*
+ * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
+ *
+ * Inspired by cap by Sven-Åke Dahl. https://github.com/svendahl/cap
+ * Also inspired by Charles Bloom's compression blog. http://cbloomrants.blogspot.com/
+ * With ideas from LZ4 by Yann Collet. https://github.com/lz4/lz4
+ * With help and support from spke
+ *
+ */
+
+#ifndef _MATCHFINDER_H
+#define _MATCHFINDER_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Forward declarations */
+typedef struct _apultra_match apultra_match;
+typedef struct _apultra_compressor apultra_compressor;
+
+/**
+ * Parse input data, build suffix array and overlaid data structures to speed up match finding
+ *
+ * @param pCompressor compression context
+ * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
+ * @param nInWindowSize total input size in bytes (previously compressed bytes + bytes to compress)
+ *
+ * @return 0 for success, non-zero for failure
+ */
+int apultra_build_suffix_array(apultra_compressor *pCompressor, const unsigned char *pInWindow, const int nInWindowSize);
+
+/**
+ * Find matches at the specified offset in the input window
+ *
+ * @param pCompressor compression context
+ * @param nOffset offset to find matches at, in the input window
+ * @param pMatches pointer to returned matches
+ * @param pMatchDepth pointer to returned match depths
+ * @param pMatch1 pointer to 1-byte length, 4 bit offset match
+ * @param nMaxMatches maximum number of matches to return (0 for none)
+ * @param nBlockFlags bit 0: 1 for first block, 0 otherwise; bit 1: 1 for last block, 0 otherwise
+ *
+ * @return number of matches
+ */
+int apultra_find_matches_at(apultra_compressor *pCompressor, const int nOffset, apultra_match *pMatches, unsigned short *pMatchDepth, unsigned char *pMatch1, const int nMaxMatches, const int nBlockFlags);
+
+/**
+ * Skip previously compressed bytes
+ *
+ * @param pCompressor compression context
+ * @param nStartOffset current offset in input window (typically 0)
+ * @param nEndOffset offset to skip to in input window (typically the number of previously compressed bytes)
+ */
+void apultra_skip_matches(apultra_compressor *pCompressor, const int nStartOffset, const int nEndOffset);
+
+/**
+ * Find all matches for the data to be compressed
+ *
+ * @param pCompressor compression context
+ * @param nMatchesPerOffset maximum number of matches to store for each offset
+ * @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
+ * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
+ * @param nBlockFlags bit 0: 1 for first block, 0 otherwise; bit 1: 1 for last block, 0 otherwise
+ */
+void apultra_find_all_matches(apultra_compressor *pCompressor, const int nMatchesPerOffset, const int nStartOffset, const int nEndOffset, const int nBlockFlags);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _MATCHFINDER_H */
diff --git a/tools/z64compress/src/enc/apultra/shrink.c b/tools/z64compress/src/enc/apultra/shrink.c
new file mode 100644
index 000000000..ece2144e8
--- /dev/null
+++ b/tools/z64compress/src/enc/apultra/shrink.c
@@ -0,0 +1,1731 @@
+/*
+ * shrink.c - compressor implementation
+ *
+ * Copyright (C) 2019 Emmanuel Marty
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty. In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ * claim that you wrote the original software. If you use this software
+ * in a product, an acknowledgment in the product documentation would be
+ * appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ * misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+/*
+ * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
+ *
+ * Inspired by cap by Sven-Åke Dahl. https://github.com/svendahl/cap
+ * Also inspired by Charles Bloom's compression blog. http://cbloomrants.blogspot.com/
+ * With ideas from LZ4 by Yann Collet. https://github.com/lz4/lz4
+ * With help and support from spke
+ *
+ */
+
+#include
+#include
+#include
+#include "libapultra.h"
+#include "matchfinder.h"
+#include "shrink.h"
+#include "format.h"
+
+#define TOKEN_CODE_LARGE_MATCH 2 /* 10 */
+#define TOKEN_SIZE_LARGE_MATCH 2
+
+#define TOKEN_CODE_7BIT_MATCH 6 /* 110 */
+#define TOKEN_SIZE_7BIT_MATCH 3
+
+#define TOKEN_CODE_4BIT_MATCH 7 /* 111 */
+#define TOKEN_SIZE_4BIT_MATCH 3
+
+#define CountShift(N,bits) if ((N)>>(bits)) { (N)>>=(bits); (n) += (bits); }
+
+/** Gamma2 bit counts for common values, up to 255 */
+static char _gamma2_size[256] = {
+ 0, 0, 2, 2, 4, 4, 4, 4, 6, 6, 6, 6, 6, 6, 6, 6, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+};
+
+/**
+ * Write bitpacked value to output (compressed) buffer
+ *
+ * @param pOutData pointer to output buffer
+ * @param nOutOffset current write index into output buffer
+ * @param nMaxOutDataSize maximum size of output buffer, in bytes
+ * @param nValue value to write
+ * @param nBits number of least significant bits to write in value
+ * @param nCurBitsOffset write index into output buffer, of current byte being filled with bits
+ * @param nCurBitShift bit shift count
+ *
+ * @return updated write index into output buffer, or -1 in case of an error
+ */
+static int apultra_write_bits(unsigned char *pOutData, int nOutOffset, const int nMaxOutDataSize, const int nValue, const int nBits, int *nCurBitsOffset, int *nCurBitShift) {
+ int i;
+
+ if (nOutOffset < 0) return -1;
+
+ for (i = nBits - 1; i >= 0; i--) {
+ if ((*nCurBitsOffset) == INT_MIN) {
+ /* Allocate a new byte in the stream to pack bits in */
+ if (nOutOffset >= nMaxOutDataSize) return -1;
+ (*nCurBitsOffset) = nOutOffset;
+ (*nCurBitShift) = 7;
+ pOutData[nOutOffset++] = 0;
+ }
+
+ pOutData[(*nCurBitsOffset)] |= ((nValue >> i) & 1) << (*nCurBitShift);
+
+ (*nCurBitShift) --;
+ if ((*nCurBitShift) == -1) {
+ /* Current byte is full */
+ (*nCurBitsOffset) = INT_MIN;
+ }
+ }
+
+ return nOutOffset;
+}
+
+/**
+ * Get size of gamma2 encoded value
+ *
+ * @param nValue value of evaluate (2..n)
+ *
+ * @return number of bits required
+ */
+static int apultra_get_gamma2_size(int nValue) {
+ if (nValue >= 0 && nValue < 256)
+ return _gamma2_size[nValue];
+ else {
+ unsigned int n = 0;
+ CountShift(nValue, 16);
+ CountShift(nValue, 8);
+ CountShift(nValue, 4);
+ CountShift(nValue, 2);
+ CountShift(nValue, 1);
+
+ return n << 1;
+ }
+}
+
+/**
+ * Write gamma2 encoded value to output (compressed) buffer
+ *
+ * @param pOutData pointer to output buffer
+ * @param nOutOffset current write index into output buffer
+ * @param nMaxOutDataSize maximum size of output buffer, in bytes
+ * @param nValue value of write (2..n)
+ * @param nCurBitsOffset write index into output buffer, of current byte being filled with bits
+ * @param nCurBitShift bit shift count
+ *
+ * @return updated write index into output buffer, or -1 in case of an error
+ */
+static int apultra_write_gamma2_value(unsigned char *pOutData, int nOutOffset, const int nMaxOutDataSize, int nValue, int *nCurBitsOffset, int *nCurBitShift) {
+ int msb = 30;
+ while ((nValue >> msb--) == 0);
+
+ while (msb > 0) {
+ int bit = (nValue >> msb) & 1;
+
+ nOutOffset = apultra_write_bits(pOutData, nOutOffset, nMaxOutDataSize, bit, 1, nCurBitsOffset, nCurBitShift);
+ msb--;
+ nOutOffset = apultra_write_bits(pOutData, nOutOffset, nMaxOutDataSize, 1, 1, nCurBitsOffset, nCurBitShift);
+ }
+
+ nOutOffset = apultra_write_bits(pOutData, nOutOffset, nMaxOutDataSize, nValue & 1, 1, nCurBitsOffset, nCurBitShift);
+ nOutOffset = apultra_write_bits(pOutData, nOutOffset, nMaxOutDataSize, 0, 1, nCurBitsOffset, nCurBitShift);
+ return nOutOffset;
+}
+
+/**
+ * Get the number of extra bits required to represent a match offset
+ *
+ * @param nLength match length
+ * @param nMatchOffset match offset
+ * @param nFollowsLiteral non-zero if the match follows a literal, zero if it immediately follows another match
+ *
+ * @return number of extra bits required
+ */
+static inline int apultra_get_offset_varlen_size(const int nLength, const int nMatchOffset, const int nFollowsLiteral) {
+ if (nLength <= 3 && nMatchOffset < 128)
+ return 8 + TOKEN_SIZE_7BIT_MATCH;
+ else {
+ if (nFollowsLiteral)
+ return 8 + TOKEN_SIZE_LARGE_MATCH + apultra_get_gamma2_size((nMatchOffset >> 8) + 3);
+ else
+ return 8 + TOKEN_SIZE_LARGE_MATCH + apultra_get_gamma2_size((nMatchOffset >> 8) + 2);
+ }
+}
+
+/**
+ * Get the number of extra bits required to represent a match length
+ *
+ * @param nLength match length
+ * @param nMatchOffset match offset
+ *
+ * @return number of extra bits required
+ */
+static inline int apultra_get_match_varlen_size(int nLength, const int nMatchOffset) {
+ if (nLength <= 3 && nMatchOffset < 128)
+ return 0;
+ else {
+ if (nMatchOffset < 128 || nMatchOffset >= MINMATCH4_OFFSET)
+ return apultra_get_gamma2_size(nLength - 2);
+ else if (nMatchOffset < MINMATCH3_OFFSET)
+ return apultra_get_gamma2_size(nLength);
+ else
+ return apultra_get_gamma2_size(nLength - 1);
+ }
+}
+
+/**
+ * Insert forward rep candidate
+ *
+ * @param pCompressor compression context
+ * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
+ * @param i input data window position whose matches are being considered
+ * @param nMatchOffset match offset to use as rep candidate
+ * @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
+ * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
+ * @param nArrivalsPerPosition maximum number of arrivals per input buffer position
+ * @param nDepth current insertion depth
+ */
+static void apultra_insert_forward_match(apultra_compressor *pCompressor, const unsigned char *pInWindow, const int i, const int nMatchOffset, const int nStartOffset, const int nEndOffset, const int nArrivalsPerPosition, int nDepth) {
+ const apultra_arrival *arrival = pCompressor->arrival + ((i - nStartOffset) * nArrivalsPerPosition);
+ const int *rle_len = (int*)pCompressor->intervals /* reuse */;
+ int* visited = ((int*)pCompressor->pos_data) - nStartOffset /* reuse */;
+ int* visited2 = visited + (nEndOffset - nStartOffset) /* reuse */;
+ int j;
+
+ for (j = 0; j < nArrivalsPerPosition && arrival[j].from_slot; j++) {
+ if (arrival[j].follows_literal) {
+ int nRepOffset = arrival[j].rep_offset;
+
+ if (nMatchOffset != nRepOffset && nRepOffset) {
+ int nRepPos = arrival[j].rep_pos;
+
+ if (nRepPos >= nStartOffset &&
+ nRepPos < nEndOffset &&
+ visited[nRepPos] != nMatchOffset) {
+
+ visited[nRepPos] = nMatchOffset;
+
+ if (visited2[nRepPos] != nMatchOffset && nRepPos >= nMatchOffset && pCompressor->match[((nRepPos - nStartOffset) << MATCHES_PER_INDEX_SHIFT) + NMATCHES_PER_INDEX - 1].length == 0) {
+ const unsigned char* pInWindowAtRepOffset = pInWindow + nRepPos;
+
+ if (pInWindowAtRepOffset[0] == pInWindowAtRepOffset[-nMatchOffset]) {
+ int nLen0 = rle_len[nRepPos - nMatchOffset];
+ int nLen1 = rle_len[nRepPos];
+ int nMinLen = (nLen0 < nLen1) ? nLen0 : nLen1;
+
+ int nMaxRepLen = nEndOffset - nRepPos;
+ if (nMaxRepLen > LCP_MAX)
+ nMaxRepLen = LCP_MAX;
+
+ if (nMinLen > nMaxRepLen)
+ nMinLen = nMaxRepLen;
+
+ const unsigned char* pInWindowMax = pInWindowAtRepOffset + nMaxRepLen;
+ pInWindowAtRepOffset += nMinLen;
+
+ while ((pInWindowAtRepOffset + 8) < pInWindowMax && !memcmp(pInWindowAtRepOffset, pInWindowAtRepOffset - nMatchOffset, 8))
+ pInWindowAtRepOffset += 8;
+ while ((pInWindowAtRepOffset + 4) < pInWindowMax && !memcmp(pInWindowAtRepOffset, pInWindowAtRepOffset - nMatchOffset, 4))
+ pInWindowAtRepOffset += 4;
+ while (pInWindowAtRepOffset < pInWindowMax && pInWindowAtRepOffset[0] == pInWindowAtRepOffset[-nMatchOffset])
+ pInWindowAtRepOffset++;
+
+ int nCurRepLen = (int)(pInWindowAtRepOffset - (pInWindow + nRepPos));
+
+ if (nCurRepLen >= 2) {
+ apultra_match* fwd_match = pCompressor->match + ((nRepPos - nStartOffset) << MATCHES_PER_INDEX_SHIFT);
+ unsigned short* fwd_depth = pCompressor->match_depth + ((nRepPos - nStartOffset) << MATCHES_PER_INDEX_SHIFT);
+ int r;
+
+ for (r = 0; fwd_match[r].length >= MIN_MATCH_SIZE; r++) {
+ if (fwd_match[r].offset == nMatchOffset && (fwd_depth[r] & 0x3fff) == 0) {
+ if ((int)fwd_match[r].length < nCurRepLen) {
+ fwd_match[r].length = nCurRepLen;
+ fwd_depth[r] = 0;
+ }
+ r = NMATCHES_PER_INDEX;
+ break;
+ }
+ }
+
+ if (r < NMATCHES_PER_INDEX) {
+ visited2[nRepPos] = nMatchOffset;
+
+ fwd_match[r].offset = nMatchOffset;
+ fwd_match[r].length = nCurRepLen;
+ fwd_depth[r] = 0;
+
+ if (nDepth < 9)
+ apultra_insert_forward_match(pCompressor, pInWindow, nRepPos, nMatchOffset, nStartOffset, nEndOffset, nArrivalsPerPosition, nDepth + 1);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+/**
+ * Attempt to pick optimal matches, so as to produce the smallest possible output that decompresses to the same input
+ *
+ * @param pCompressor compression context
+ * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
+ * @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
+ * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
+ * @param nInsertForwardReps non-zero to insert forward repmatch candidates, zero to use the previously inserted candidates
+ * @param nCurRepMatchOffset starting rep offset for this block
+ * @param nBlockFlags bit 0: 1 for first block, 0 otherwise; bit 1: 1 for last block, 0 otherwise
+ * @param nArrivalsPerPosition maximum number of arrivals per input buffer position
+ */
+static void apultra_optimize_forward(apultra_compressor *pCompressor, const unsigned char *pInWindow, const int nStartOffset, const int nEndOffset, const int nInsertForwardReps, const int *nCurRepMatchOffset, const int nBlockFlags, const int nArrivalsPerPosition) {
+ apultra_arrival *arrival = pCompressor->arrival - (nStartOffset * nArrivalsPerPosition);
+ const int* rle_len = (int*)pCompressor->intervals /* reuse */;
+ int* visited = ((int*)pCompressor->pos_data) - nStartOffset /* reuse */;
+ int i, j, n;
+
+ if ((nEndOffset - nStartOffset) > pCompressor->block_size) return;
+
+ memset(arrival + (nStartOffset * nArrivalsPerPosition), 0, sizeof(apultra_arrival) * ((nEndOffset - nStartOffset + 1) * nArrivalsPerPosition));
+
+ arrival[nStartOffset * nArrivalsPerPosition].from_slot = -1;
+ arrival[nStartOffset * nArrivalsPerPosition].rep_offset = *nCurRepMatchOffset;
+
+ for (i = (nStartOffset * nArrivalsPerPosition); i != ((nEndOffset+1) * nArrivalsPerPosition); i++) {
+ arrival[i].cost = 0x40000000;
+ }
+
+ if (nInsertForwardReps) {
+ memset(visited + nStartOffset, 0, 2 * (nEndOffset - nStartOffset) * sizeof(int));
+ }
+
+ for (i = nStartOffset; i != nEndOffset; i++) {
+ apultra_arrival *cur_arrival = &arrival[i * nArrivalsPerPosition];
+ int m;
+
+ const unsigned char nMatch1Offs = pCompressor->match1[i - nStartOffset];
+ int nShortOffset;
+ int nShortLen;
+ int nLiteralScore;
+ int nLiteralCost;
+
+ if ((pInWindow[i] != 0 && nMatch1Offs == 0) || (i == nStartOffset && (nBlockFlags & 1))) {
+ nShortOffset = 0;
+ nShortLen = 0;
+ nLiteralCost = 9 /* literal bit + literal byte */;
+ }
+ else {
+ nShortOffset = (pInWindow[i] == 0) ? 0 : nMatch1Offs;
+ nShortLen = 1;
+ nLiteralCost = 4 + TOKEN_SIZE_4BIT_MATCH /* command and offset cost; no length cost */;
+ }
+
+ nLiteralScore = nShortOffset ? 3 : 1;
+
+ if (cur_arrival[nArrivalsPerPosition].from_slot) {
+ for (j = 0; j < nArrivalsPerPosition && cur_arrival[j].from_slot; j++) {
+ int nPrevCost = cur_arrival[j].cost & 0x3fffffff;
+ int nCodingChoiceCost = nPrevCost + nLiteralCost;
+ int nScore = cur_arrival[j].score + nLiteralScore;
+
+ apultra_arrival* pDestSlots = &cur_arrival[nArrivalsPerPosition];
+ if (nCodingChoiceCost < pDestSlots[nArrivalsPerPosition - 1].cost ||
+ (nCodingChoiceCost == pDestSlots[nArrivalsPerPosition - 1].cost && nScore < pDestSlots[nArrivalsPerPosition - 1].score)) {
+ int nRepOffset = cur_arrival[j].rep_offset;
+ int exists = 0;
+
+ for (n = 0;
+ pDestSlots[n].cost < nCodingChoiceCost;
+ n++) {
+ if (pDestSlots[n].rep_offset == nRepOffset) {
+ exists = 1;
+ break;
+ }
+ }
+
+ if (!exists) {
+ for (;
+ n < nArrivalsPerPosition && pDestSlots[n].cost == nCodingChoiceCost && nScore >= pDestSlots[n].score;
+ n++) {
+ if (pDestSlots[n].rep_offset == nRepOffset) {
+ exists = 1;
+ break;
+ }
+ }
+
+ if (!exists) {
+ if (n < nArrivalsPerPosition) {
+ int nn;
+
+ for (nn = n;
+ nn < nArrivalsPerPosition && pDestSlots[nn].cost == nCodingChoiceCost;
+ nn++) {
+ if (pDestSlots[nn].rep_offset == nRepOffset) {
+ exists = 1;
+ break;
+ }
+ }
+
+ if (!exists) {
+ int z;
+
+ for (z = n; z < nArrivalsPerPosition - 1 && pDestSlots[z].from_slot; z++) {
+ if (pDestSlots[z].rep_offset == nRepOffset)
+ break;
+ }
+
+ apultra_arrival* pDestArrival = &pDestSlots[n];
+ memmove(&pDestSlots[n + 1],
+ &pDestSlots[n],
+ sizeof(apultra_arrival) * (z - n));
+
+ pDestArrival->cost = nCodingChoiceCost;
+ pDestArrival->from_pos = i;
+ pDestArrival->from_slot = j + 1;
+ pDestArrival->follows_literal = 1;
+ pDestArrival->rep_offset = nRepOffset;
+ pDestArrival->short_offset = nShortOffset;
+ pDestArrival->rep_pos = cur_arrival[j].rep_pos;
+ pDestArrival->match_len = nShortLen;
+ pDestArrival->score = nScore;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ else {
+ for (j = 0; j < nArrivalsPerPosition && cur_arrival[j].from_slot; j++) {
+ int nPrevCost = cur_arrival[j].cost & 0x3fffffff;
+ int nCodingChoiceCost = nPrevCost + nLiteralCost;
+ int nScore = cur_arrival[j].score + nLiteralScore;
+
+ apultra_arrival* pDestArrival = &cur_arrival[nArrivalsPerPosition + j];
+
+ pDestArrival->cost = nCodingChoiceCost;
+ pDestArrival->from_pos = i;
+ pDestArrival->from_slot = j + 1;
+ pDestArrival->follows_literal = 1;
+ pDestArrival->rep_offset = cur_arrival[j].rep_offset;
+ pDestArrival->short_offset = nShortOffset;
+ pDestArrival->rep_pos = cur_arrival[j].rep_pos;
+ pDestArrival->match_len = nShortLen;
+ pDestArrival->score = nScore;
+ }
+ }
+
+ if (i == nStartOffset && (nBlockFlags & 1)) continue;
+
+ const apultra_match *match = pCompressor->match + ((i - nStartOffset) << MATCHES_PER_INDEX_SHIFT);
+ const unsigned short *match_depth = pCompressor->match_depth + ((i - nStartOffset) << MATCHES_PER_INDEX_SHIFT);
+ int nNumArrivalsForThisPos = j, nOverallMinRepLen = 0, nOverallMaxRepLen = 0;
+
+ int nRepLenForArrival[NARRIVALS_PER_POSITION_MAX];
+ memset(nRepLenForArrival, 0, nArrivalsPerPosition * sizeof(int));
+
+ int nRepMatchArrivalIdx[NARRIVALS_PER_POSITION_MAX + 1];
+ int nNumRepMatchArrivals = 0;
+
+ int nMaxRepLenForPos = nEndOffset - i;
+ if (nMaxRepLenForPos > LCP_MAX)
+ nMaxRepLenForPos = LCP_MAX;
+ const unsigned char* pInWindowStart = pInWindow + i;
+ const unsigned char* pInWindowMax = pInWindowStart + nMaxRepLenForPos;
+ const int nLen1 = rle_len[i];
+
+ for (j = 0; j < nNumArrivalsForThisPos && (i + 2) <= nEndOffset; j++) {
+ if (cur_arrival[j].follows_literal) {
+ int nRepOffset = cur_arrival[j].rep_offset;
+
+ if (nRepOffset && i >= nRepOffset) {
+ if (pInWindowStart[0] == pInWindowStart[-nRepOffset]) {
+ int nLen0 = rle_len[i - nRepOffset];
+ int nMinLen = (nLen0 < nLen1) ? nLen0 : nLen1;
+
+ if (nMinLen > nMaxRepLenForPos)
+ nMinLen = nMaxRepLenForPos;
+
+ const unsigned char* pInWindowAtRepOffset = pInWindowStart + nMinLen;
+ while ((pInWindowAtRepOffset + 8) < pInWindowMax && !memcmp(pInWindowAtRepOffset, pInWindowAtRepOffset - nRepOffset, 8))
+ pInWindowAtRepOffset += 8;
+ while ((pInWindowAtRepOffset + 4) < pInWindowMax && !memcmp(pInWindowAtRepOffset, pInWindowAtRepOffset - nRepOffset, 4))
+ pInWindowAtRepOffset += 4;
+ while (pInWindowAtRepOffset < pInWindowMax && pInWindowAtRepOffset[0] == pInWindowAtRepOffset[-nRepOffset])
+ pInWindowAtRepOffset++;
+
+ int nCurMaxLen = (int)(pInWindowAtRepOffset - pInWindowStart);
+
+ if (nCurMaxLen >= 2) {
+ nRepLenForArrival[j] = nCurMaxLen;
+ nRepMatchArrivalIdx[nNumRepMatchArrivals++] = j;
+
+ if (nOverallMaxRepLen < nCurMaxLen)
+ nOverallMaxRepLen = nCurMaxLen;
+ }
+ }
+ }
+ }
+ }
+ nRepMatchArrivalIdx[nNumRepMatchArrivals] = -1;
+
+ for (m = 0; m < NMATCHES_PER_INDEX && match[m].length; m++) {
+ const int nOrigMatchLen = match[m].length;
+ const int nOrigMatchOffset = match[m].offset;
+ const unsigned int nOrigMatchDepth = match_depth[m] & 0x3fff;
+ const int nScorePenalty = 3 + ((match_depth[m] & 0x8000) >> 15);
+ unsigned int d;
+
+ for (d = 0; d <= nOrigMatchDepth; d += (nOrigMatchDepth ? nOrigMatchDepth : 1)) {
+ const int nMatchOffset = nOrigMatchOffset - d;
+ int nMatchLen = nOrigMatchLen - d;
+
+ if ((i + nMatchLen) > nEndOffset)
+ nMatchLen = nEndOffset - i;
+
+ if (nInsertForwardReps) {
+ apultra_insert_forward_match(pCompressor, pInWindow, i, nMatchOffset, nStartOffset, nEndOffset, nArrivalsPerPosition, 0);
+ }
+
+ if (nMatchLen >= 2) {
+ int nStartingMatchLen, nJumpMatchLen, k;
+ int nNoRepMatchOffsetCostForLit[2], nNoRepMatchOffsetCostDelta;
+ int nMinMatchLenForOffset;
+ int nNoRepCostAdjusment = (nMatchLen >= LCP_MAX) ? 1 : 0;
+
+ if (nMatchOffset < MINMATCH3_OFFSET)
+ nMinMatchLenForOffset = 2;
+ else {
+ if (nMatchOffset < MINMATCH4_OFFSET)
+ nMinMatchLenForOffset = 3;
+ else
+ nMinMatchLenForOffset = 4;
+ }
+
+ if (nMatchLen >= LEAVE_ALONE_MATCH_SIZE && i >= nMatchLen)
+ nStartingMatchLen = nMatchLen;
+ else
+ nStartingMatchLen = 2;
+
+ if ((nBlockFlags & 3) == 3 && nMatchLen > 90 && i >= 90)
+ nJumpMatchLen = 90;
+ else
+ nJumpMatchLen = nMatchLen + 1;
+
+ if (nStartingMatchLen <= 3 && nMatchOffset < 128) {
+ nNoRepMatchOffsetCostForLit[0] = 8 + TOKEN_SIZE_7BIT_MATCH;
+ nNoRepMatchOffsetCostForLit[1] = 8 + TOKEN_SIZE_7BIT_MATCH;
+ }
+ else {
+ nNoRepMatchOffsetCostForLit[0] = 8 + TOKEN_SIZE_LARGE_MATCH + apultra_get_gamma2_size((nMatchOffset >> 8) + 2);
+ nNoRepMatchOffsetCostForLit[1] = 8 + TOKEN_SIZE_LARGE_MATCH + apultra_get_gamma2_size((nMatchOffset >> 8) + 3);
+ }
+ nNoRepMatchOffsetCostDelta = nNoRepMatchOffsetCostForLit[1] - nNoRepMatchOffsetCostForLit[0];
+
+ for (k = nStartingMatchLen; k <= nMatchLen; k++) {
+ int nRepMatchMatchLenCost = apultra_get_gamma2_size(k);
+ apultra_arrival *pDestSlots = &cur_arrival[k * nArrivalsPerPosition];
+
+ /* Insert non-repmatch candidate */
+
+ if (k >= nMinMatchLenForOffset) {
+ int nNoRepMatchMatchLenCost;
+
+ if (k <= 3 && nMatchOffset < 128)
+ nNoRepMatchMatchLenCost = 0;
+ else {
+ if (nMatchOffset < 128 || nMatchOffset >= MINMATCH4_OFFSET)
+ nNoRepMatchMatchLenCost = apultra_get_gamma2_size(k - 2);
+ else if (nMatchOffset < MINMATCH3_OFFSET)
+ nNoRepMatchMatchLenCost = nRepMatchMatchLenCost;
+ else
+ nNoRepMatchMatchLenCost = apultra_get_gamma2_size(k - 1);
+ }
+
+ for (j = 0; j < nNumArrivalsForThisPos; j++) {
+ if (nMatchOffset != cur_arrival[j].rep_offset || cur_arrival[j].follows_literal == 0) {
+ int nPrevCost = cur_arrival[j].cost & 0x3fffffff;
+ int nMatchCmdCost = nNoRepMatchMatchLenCost + nNoRepMatchOffsetCostForLit[cur_arrival[j].follows_literal];
+ int nCodingChoiceCost = nPrevCost + nMatchCmdCost;
+
+ if (nCodingChoiceCost <= (pDestSlots[nArrivalsPerPosition - 1].cost + 1)) {
+ int nScore = cur_arrival[j].score + nScorePenalty;
+
+ if (nCodingChoiceCost < pDestSlots[nArrivalsPerPosition - 2].cost ||
+ (nCodingChoiceCost == pDestSlots[nArrivalsPerPosition - 2].cost && nScore < pDestSlots[nArrivalsPerPosition - 2].score)) {
+ int exists = 0;
+
+ for (n = 0;
+ pDestSlots[n].cost < nCodingChoiceCost;
+ n++) {
+ if (pDestSlots[n].rep_offset == nMatchOffset) {
+ exists = 1;
+ break;
+ }
+ }
+
+ if (!exists) {
+ int nRevisedCodingChoiceCost = nCodingChoiceCost - nNoRepCostAdjusment;
+
+ for (;
+ n < nArrivalsPerPosition - 1 && pDestSlots[n].cost == nRevisedCodingChoiceCost && nScore >= pDestSlots[n].score;
+ n++) {
+ if (pDestSlots[n].rep_offset == nMatchOffset) {
+ exists = 1;
+ break;
+ }
+ }
+
+ if (!exists) {
+ if (n < nArrivalsPerPosition - 1) {
+ int nn;
+
+ for (nn = n;
+ nn < nArrivalsPerPosition && pDestSlots[nn].cost == nCodingChoiceCost;
+ nn++) {
+ if (pDestSlots[nn].rep_offset == nMatchOffset) {
+ exists = 1;
+ break;
+ }
+ }
+
+ if (!exists) {
+ int z;
+
+ for (z = n; z < nArrivalsPerPosition - 1 && pDestSlots[z].from_slot; z++) {
+ if (pDestSlots[z].rep_offset == nMatchOffset)
+ break;
+ }
+
+ apultra_arrival* pDestArrival = &pDestSlots[n];
+ memmove(&pDestSlots[n + 1],
+ &pDestSlots[n],
+ sizeof(apultra_arrival) * (z - n));
+
+ pDestArrival->cost = nRevisedCodingChoiceCost;
+ pDestArrival->from_pos = i;
+ pDestArrival->from_slot = j + 1;
+ pDestArrival->follows_literal = 0;
+ pDestArrival->rep_offset = nMatchOffset;
+ pDestArrival->short_offset = 0;
+ pDestArrival->rep_pos = i;
+ pDestArrival->match_len = k;
+ pDestArrival->score = nScore;
+ }
+ }
+ }
+ }
+ else {
+ if ((nCodingChoiceCost - pDestSlots[n].cost) >= nNoRepMatchOffsetCostDelta)
+ break;
+ }
+ }
+ if (cur_arrival[j].follows_literal == 0 || nNoRepMatchOffsetCostDelta == 0)
+ break;
+ }
+ else {
+ break;
+ }
+ }
+ }
+ }
+
+ /* Insert repmatch candidate */
+
+ if (k > nOverallMinRepLen && k <= nOverallMaxRepLen) {
+ int nRepMatchCmdCost = TOKEN_SIZE_LARGE_MATCH + 2 /* apultra_get_gamma2_size(2) */ + nRepMatchMatchLenCost;
+ int nCurRepMatchArrival;
+
+ if (k <= 90)
+ nOverallMinRepLen = k;
+ else if (nOverallMaxRepLen == k)
+ nOverallMaxRepLen--;
+
+ for (nCurRepMatchArrival = 0; (j = nRepMatchArrivalIdx[nCurRepMatchArrival]) >= 0; nCurRepMatchArrival++) {
+ if (nRepLenForArrival[j] >= k) {
+ int nPrevCost = cur_arrival[j].cost & 0x3fffffff;
+ int nRepCodingChoiceCost = nPrevCost + nRepMatchCmdCost;
+ int nScore = cur_arrival[j].score + 2;
+
+ if (nRepCodingChoiceCost < pDestSlots[nArrivalsPerPosition - 1].cost ||
+ (nRepCodingChoiceCost == pDestSlots[nArrivalsPerPosition - 1].cost && nScore < pDestSlots[nArrivalsPerPosition - 1].score)) {
+ int nRepOffset = cur_arrival[j].rep_offset;
+ int exists = 0;
+
+ for (n = 0;
+ pDestSlots[n].cost < nRepCodingChoiceCost;
+ n++) {
+ if (pDestSlots[n].rep_offset == nRepOffset) {
+ exists = 1;
+ break;
+ }
+ }
+
+ if (!exists) {
+ for (;
+ n < nArrivalsPerPosition && pDestSlots[n].cost == nRepCodingChoiceCost && nScore >= pDestSlots[n].score;
+ n++) {
+ if (pDestSlots[n].rep_offset == nRepOffset) {
+ exists = 1;
+ break;
+ }
+ }
+
+ if (!exists) {
+ if (n < nArrivalsPerPosition) {
+ int nn;
+
+ for (nn = n;
+ nn < nArrivalsPerPosition && pDestSlots[nn].cost == nRepCodingChoiceCost;
+ nn++) {
+ if (pDestSlots[nn].rep_offset == nRepOffset) {
+ exists = 1;
+ break;
+ }
+ }
+
+ if (!exists) {
+ int z;
+
+ for (z = n; z < nArrivalsPerPosition - 1 && pDestSlots[z].from_slot; z++) {
+ if (pDestSlots[z].rep_offset == nRepOffset)
+ break;
+ }
+
+ apultra_arrival* pDestArrival = &pDestSlots[n];
+ memmove(&pDestSlots[n + 1],
+ &pDestSlots[n],
+ sizeof(apultra_arrival) * (z - n));
+
+ pDestArrival->cost = nRepCodingChoiceCost;
+ pDestArrival->from_pos = i;
+ pDestArrival->from_slot = j + 1;
+ pDestArrival->follows_literal = 0;
+ pDestArrival->rep_offset = nRepOffset;
+ pDestArrival->short_offset = 0;
+ pDestArrival->rep_pos = i;
+ pDestArrival->match_len = k;
+ pDestArrival->score = nScore;
+ }
+ }
+ }
+ }
+ }
+ else {
+ break;
+ }
+ }
+ }
+ }
+
+ if (k == 3 && nMatchOffset < 128) {
+ nNoRepMatchOffsetCostForLit[0] = 8 + TOKEN_SIZE_LARGE_MATCH + 2 /* apultra_get_gamma2_size((nMatchOffset >> 8) + 2) */;
+ nNoRepMatchOffsetCostForLit[1] = 8 + TOKEN_SIZE_LARGE_MATCH + 2 /* apultra_get_gamma2_size((nMatchOffset >> 8) + 3) */;
+ }
+
+ if (k == nJumpMatchLen)
+ k = nMatchLen - 1;
+ }
+ }
+
+ if (nOrigMatchLen >= 512)
+ break;
+ }
+ }
+ }
+
+ if (!nInsertForwardReps) {
+ const apultra_arrival* end_arrival = &arrival[(i * nArrivalsPerPosition) + 0];
+ apultra_final_match* pBestMatch = pCompressor->best_match - nStartOffset;
+
+ while (end_arrival->from_slot > 0 && end_arrival->from_pos >= 0 && (int)end_arrival->from_pos < nEndOffset) {
+ pBestMatch[end_arrival->from_pos].length = end_arrival->match_len;
+ if (end_arrival->match_len >= 2)
+ pBestMatch[end_arrival->from_pos].offset = end_arrival->rep_offset;
+ else
+ pBestMatch[end_arrival->from_pos].offset = end_arrival->short_offset;
+
+ end_arrival = &arrival[(end_arrival->from_pos * nArrivalsPerPosition) + (end_arrival->from_slot - 1)];
+ }
+ }
+}
+
+/**
+ * Attempt to replace matches by literals when it makes the final bitstream smaller, and merge large matches
+ *
+ * @param pCompressor compression context
+ * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
+ * @param pBestMatch optimal matches to evaluate and update
+ * @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
+ * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
+ * @param nCurRepMatchOffset starting rep offset for this block
+ * @param nBlockFlags bit 0: 1 for first block, 0 otherwise; bit 1: 1 for last block, 0 otherwise
+ *
+ * @return non-zero if the number of tokens was reduced, 0 if it wasn't
+ */
+static int apultra_reduce_commands(apultra_compressor *pCompressor, const unsigned char *pInWindow, apultra_final_match *pBestMatch, const int nStartOffset, const int nEndOffset, const int *nCurRepMatchOffset, const int nBlockFlags) {
+ int i;
+ int nRepMatchOffset = *nCurRepMatchOffset;
+ int nFollowsLiteral = 0;
+ int nDidReduce = 0;
+ int nLastMatchLen = 0;
+ const unsigned char *match1 = pCompressor->match1 - nStartOffset;
+
+ for (i = nStartOffset + ((nBlockFlags & 1) ? 1 : 0); i < nEndOffset; ) {
+ apultra_final_match *pMatch = pBestMatch + i;
+
+ if (pMatch->length <= 1 &&
+ (i + 1) < nEndOffset &&
+ pBestMatch[i + 1].length >= 2 &&
+ pBestMatch[i + 1].length < MAX_VARLEN &&
+ pBestMatch[i + 1].offset &&
+ i >= pBestMatch[i + 1].offset &&
+ (i + pBestMatch[i + 1].length + 1) <= nEndOffset &&
+ !memcmp(pInWindow + i - (pBestMatch[i + 1].offset), pInWindow + i, pBestMatch[i + 1].length + 1)) {
+ if ((pBestMatch[i + 1].offset < MINMATCH3_OFFSET || (pBestMatch[i + 1].length + 1) >= 3 || (pBestMatch[i + 1].offset == nRepMatchOffset && nFollowsLiteral)) &&
+ (pBestMatch[i + 1].offset < MINMATCH4_OFFSET || (pBestMatch[i + 1].length + 1) >= 4 || (pBestMatch[i + 1].offset == nRepMatchOffset && nFollowsLiteral))) {
+
+ int nCurPartialCommandSize = (pMatch->length == 1) ? (TOKEN_SIZE_4BIT_MATCH + 4) : (1 /* literal bit */ + 8 /* literal size */);
+ if (pBestMatch[i + 1].offset == nRepMatchOffset /* always follows a literal, the one at the current position */) {
+ nCurPartialCommandSize += TOKEN_SIZE_LARGE_MATCH + 2 /* apultra_get_gamma2_size(2) */ + apultra_get_gamma2_size(pBestMatch[i + 1].length);
+ }
+ else {
+ nCurPartialCommandSize += apultra_get_offset_varlen_size(pBestMatch[i + 1].length, pBestMatch[i + 1].offset, 1) + apultra_get_match_varlen_size(pBestMatch[i + 1].length, pBestMatch[i + 1].offset);
+ }
+
+ int nReducedPartialCommandSize;
+ if (pBestMatch[i + 1].offset == nRepMatchOffset && nFollowsLiteral) {
+ nReducedPartialCommandSize = TOKEN_SIZE_LARGE_MATCH + 2 /* apultra_get_gamma2_size(2) */ + apultra_get_gamma2_size(pBestMatch[i + 1].length);
+ }
+ else {
+ nReducedPartialCommandSize = apultra_get_offset_varlen_size(pBestMatch[i + 1].length, pBestMatch[i + 1].offset, nFollowsLiteral) + apultra_get_match_varlen_size(pBestMatch[i + 1].length, pBestMatch[i + 1].offset);
+ }
+
+ if (nReducedPartialCommandSize < nCurPartialCommandSize || (nFollowsLiteral == 0 && nLastMatchLen >= LCP_MAX)) {
+ /* Merge */
+ pBestMatch[i].length = pBestMatch[i + 1].length + 1;
+ pBestMatch[i].offset = pBestMatch[i + 1].offset;
+ pBestMatch[i + 1].length = 0;
+ pBestMatch[i + 1].offset = 0;
+ nDidReduce = 1;
+ continue;
+ }
+ }
+ }
+
+ if (pMatch->length >= 2) {
+ if (pMatch->length < 32 && /* Don't waste time considering large matches, they will always win over literals */
+ (i + pMatch->length) < nEndOffset /* Don't consider the last match in the block, we can only reduce a match inbetween other tokens */) {
+ int nNextIndex = i + pMatch->length;
+ int nNextFollowsLiteral = 0;
+ int nCannotEncode = 0;
+
+ while (nNextIndex < nEndOffset && pBestMatch[nNextIndex].length < 2) {
+ nNextIndex++;
+ nNextFollowsLiteral = 1;
+ }
+
+ if (nNextIndex < nEndOffset && pBestMatch[nNextIndex].length >= 2) {
+ if (nRepMatchOffset && nRepMatchOffset != pMatch->offset && pBestMatch[nNextIndex].offset && pMatch->offset != pBestMatch[nNextIndex].offset &&
+ nNextFollowsLiteral) {
+ /* Try to gain a match forward */
+ if (i >= pBestMatch[nNextIndex].offset && (i - pBestMatch[nNextIndex].offset + pMatch->length) <= nEndOffset) {
+ if ((pBestMatch[nNextIndex].offset < MINMATCH3_OFFSET || pMatch->length >= 3) &&
+ (pBestMatch[nNextIndex].offset < MINMATCH4_OFFSET || pMatch->length >= 4)) {
+ int nMaxLen = 0;
+ const unsigned char* pInWindowAtPos = pInWindow + i;
+ while (nMaxLen < pMatch->length && pInWindowAtPos[nMaxLen - pBestMatch[nNextIndex].offset] == pInWindowAtPos[nMaxLen])
+ nMaxLen++;
+
+ if (nMaxLen >= pMatch->length) {
+ /* Replace */
+ pMatch->offset = pBestMatch[nNextIndex].offset;
+ nDidReduce = 1;
+ }
+ else if (nMaxLen >= 2) {
+ if ((nFollowsLiteral && nRepMatchOffset == pBestMatch[nNextIndex].offset) ||
+ ((pBestMatch[nNextIndex].offset < MINMATCH3_OFFSET || nMaxLen >= 3) &&
+ (pBestMatch[nNextIndex].offset < MINMATCH4_OFFSET || nMaxLen >= 4))) {
+
+ int nPartialSizeBefore, nPartialSizeAfter, j;
+
+ nPartialSizeBefore = apultra_get_offset_varlen_size(pMatch->length, pMatch->offset, nFollowsLiteral);
+ nPartialSizeBefore += apultra_get_match_varlen_size(pMatch->length, pMatch->offset);
+
+ nPartialSizeBefore += apultra_get_offset_varlen_size(pBestMatch[nNextIndex].length, pBestMatch[nNextIndex].offset, 1);
+ nPartialSizeBefore += apultra_get_match_varlen_size(pBestMatch[nNextIndex].length, pBestMatch[nNextIndex].offset);
+
+ nPartialSizeAfter = apultra_get_offset_varlen_size(nMaxLen, pBestMatch[nNextIndex].offset, nFollowsLiteral);
+ if (nFollowsLiteral && nRepMatchOffset == pBestMatch[nNextIndex].offset)
+ nPartialSizeAfter += apultra_get_gamma2_size(nMaxLen);
+ else
+ nPartialSizeAfter += apultra_get_match_varlen_size(nMaxLen, pBestMatch[nNextIndex].offset);
+
+ nPartialSizeAfter += TOKEN_SIZE_LARGE_MATCH + 2 /* apultra_get_gamma2_size(2) */;
+ nPartialSizeAfter += apultra_get_gamma2_size(pBestMatch[nNextIndex].length);
+
+ for (j = nMaxLen; j < pMatch->length; j++) {
+ if (pInWindow[i + j] == 0 || match1[i + j])
+ nPartialSizeAfter += TOKEN_SIZE_4BIT_MATCH + 4;
+ else
+ nPartialSizeAfter += 1 /* literal bit */ + 8 /* literal byte */;
+ }
+
+ if (nPartialSizeAfter < nPartialSizeBefore) {
+ /* We gain a repmatch that is shorter than the original match as this is the best we can do, so it is followed by extra literals, but
+ * we have calculated that this is shorter */
+
+ int nOrigLen = pMatch->length;
+ int j;
+
+ pMatch->offset = pBestMatch[nNextIndex].offset;
+ pMatch->length = nMaxLen;
+
+ for (j = nMaxLen; j < nOrigLen; j++) {
+ pBestMatch[i + j].offset = match1[i + j];
+ pBestMatch[i + j].length = (pInWindow[i + j] && match1[i+j] == 0) ? 0 : 1;
+ }
+
+ nDidReduce = 1;
+ continue;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ /* Calculate this command's current cost */
+
+ int nCurCommandSize;
+ if (pMatch->offset == nRepMatchOffset && nFollowsLiteral) {
+ nCurCommandSize = TOKEN_SIZE_LARGE_MATCH + 2 /* apultra_get_gamma2_size(2) */ + apultra_get_gamma2_size(pMatch->length);
+ }
+ else {
+ nCurCommandSize = apultra_get_offset_varlen_size(pMatch->length, pMatch->offset, nFollowsLiteral) + apultra_get_match_varlen_size(pMatch->length, pMatch->offset);
+ }
+
+ /* Calculate the next command's current cost */
+ int nNextCommandSize;
+ if (pBestMatch[nNextIndex].offset == pMatch->offset && nNextFollowsLiteral && pBestMatch[nNextIndex].length >= 2) {
+ nNextCommandSize = TOKEN_SIZE_LARGE_MATCH + 2 /* apultra_get_gamma2_size(2) */ + apultra_get_gamma2_size(pBestMatch[nNextIndex].length);
+ }
+ else {
+ nNextCommandSize = apultra_get_offset_varlen_size(pBestMatch[nNextIndex].length, pBestMatch[nNextIndex].offset, nNextFollowsLiteral) + apultra_get_match_varlen_size(pBestMatch[nNextIndex].length, pBestMatch[nNextIndex].offset);
+ }
+
+ int nOriginalCombinedCommandSize = nCurCommandSize + nNextCommandSize;
+
+ /* Calculate the cost of replacing this match command by literals + the effect on the cost of the next command */
+ int nReducedCommandSize = 0;
+ int j;
+
+ for (j = 0; j < pMatch->length; j++) {
+ if (pInWindow[i + j] == 0 || match1[i + j])
+ nReducedCommandSize += TOKEN_SIZE_4BIT_MATCH + 4;
+ else
+ nReducedCommandSize += 1 /* literal bit */ + 8;
+ }
+
+ if (pBestMatch[nNextIndex].offset == nRepMatchOffset /* the new command would always follow literals, the ones we create */ && pBestMatch[nNextIndex].length >= 2) {
+ nReducedCommandSize += TOKEN_SIZE_LARGE_MATCH + 2 /* apultra_get_gamma2_size(2) */ + apultra_get_gamma2_size(pBestMatch[nNextIndex].length);
+ }
+ else {
+ if ((pBestMatch[nNextIndex].length < 3 && pBestMatch[nNextIndex].offset >= MINMATCH3_OFFSET) ||
+ (pBestMatch[nNextIndex].length < 4 && pBestMatch[nNextIndex].offset >= MINMATCH4_OFFSET)) {
+ /* This match length can only be encoded with a rep-match */
+ nCannotEncode = 1;
+ }
+ else {
+ nReducedCommandSize += apultra_get_offset_varlen_size(pBestMatch[nNextIndex].length, pBestMatch[nNextIndex].offset, 1 /* follows literals */) + apultra_get_match_varlen_size(pBestMatch[nNextIndex].length, pBestMatch[nNextIndex].offset);
+ }
+ }
+
+ if (!nCannotEncode && nOriginalCombinedCommandSize > nReducedCommandSize) {
+ /* Reduce */
+ int nMatchLen = pMatch->length;
+ int j;
+
+ for (j = 0; j < nMatchLen; j++) {
+ pBestMatch[i + j].offset = match1[i + j];
+ pBestMatch[i + j].length = (pInWindow[i + j] && match1[i + j] == 0) ? 0 : 1;
+ }
+
+ nDidReduce = 1;
+ continue;
+ }
+ }
+ }
+
+ if ((i + pMatch->length) < nEndOffset && pMatch->offset > 0 &&
+ pBestMatch[i + pMatch->length].offset > 0 &&
+ pBestMatch[i + pMatch->length].length >= 2 &&
+ (pMatch->length + pBestMatch[i + pMatch->length].length) >= LEAVE_ALONE_MATCH_SIZE &&
+ (pMatch->length + pBestMatch[i + pMatch->length].length) <= MAX_VARLEN &&
+ (i + pMatch->length) >= pMatch->offset &&
+ (i + pMatch->length) >= pBestMatch[i + pMatch->length].offset &&
+ (i + pMatch->length + pBestMatch[i + pMatch->length].length) <= nEndOffset &&
+ !memcmp(pInWindow + i + pMatch->length - pMatch->offset,
+ pInWindow + i + pMatch->length - pBestMatch[i + pMatch->length].offset,
+ pBestMatch[i + pMatch->length].length)) {
+ int nMatchLen = pMatch->length;
+
+ /* Join large matches */
+
+ int nNextIndex = i + pMatch->length + pBestMatch[i + pMatch->length].length;
+ int nNextFollowsLiteral = 0;
+ int nCannotEncode = 0;
+
+ while (nNextIndex < nEndOffset && pBestMatch[nNextIndex].length < 2) {
+ nNextIndex++;
+ nNextFollowsLiteral = 1;
+ }
+
+ if (nNextIndex < nEndOffset && nNextFollowsLiteral && pBestMatch[nNextIndex].length >= 2 &&
+ pBestMatch[nNextIndex].offset == pBestMatch[i + pMatch->length].offset) {
+ if ((pBestMatch[nNextIndex].offset >= MINMATCH3_OFFSET && pBestMatch[nNextIndex].length < 3) ||
+ (pBestMatch[nNextIndex].offset >= MINMATCH4_OFFSET && pBestMatch[nNextIndex].length < 4)) {
+ nCannotEncode = 1;
+ }
+ }
+
+ if (!nCannotEncode) {
+ pMatch->length += pBestMatch[i + nMatchLen].length;
+ pBestMatch[i + nMatchLen].offset = 0;
+ pBestMatch[i + nMatchLen].length = -1;
+ nDidReduce = 1;
+ continue;
+ }
+ }
+
+ nRepMatchOffset = pMatch->offset;
+ nFollowsLiteral = 0;
+ nLastMatchLen = pMatch->length;
+
+ i += pMatch->length;
+ }
+ else {
+ /* 4 bits offset (1 byte match) or literal */
+ i++;
+ nFollowsLiteral = 1;
+ nLastMatchLen = 0;
+ }
+ }
+
+ return nDidReduce;
+}
+
+/**
+ * Emit a block of compressed data
+ *
+ * @param pCompressor compression context
+ * @param pBestMatch optimal matches to emit
+ * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
+ * @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
+ * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
+ * @param pOutData pointer to output buffer
+ * @param nMaxOutDataSize maximum size of output buffer, in bytes
+ * @param nCurBitsOffset write index into output buffer, of current byte being filled with bits
+ * @param nCurBitShift bit shift count
+ * @param nFollowsLiteral non-zero if the next command to be issued follows a literal, 0 if not
+ * @param nCurRepMatchOffset starting rep offset for this block, updated after the block is compressed successfully
+ * @param nBlockFlags bit 0: 1 for first block, 0 otherwise; bit 1: 1 for last block, 0 otherwise
+ *
+ * @return size of compressed data in output buffer, or -1 if the data is uncompressible
+ */
+static int apultra_write_block(apultra_compressor *pCompressor, apultra_final_match *pBestMatch, const unsigned char *pInWindow, const int nStartOffset, const int nEndOffset, unsigned char *pOutData, int nOutOffset, const int nMaxOutDataSize, int *nCurBitsOffset, int *nCurBitShift, int *nFollowsLiteral, int *nCurRepMatchOffset, const int nBlockFlags) {
+ int i;
+ int nRepMatchOffset = *nCurRepMatchOffset;
+ const int nMaxOffset = pCompressor->max_offset;
+
+ if (nBlockFlags & 1) {
+ if (nOutOffset < 0 || nOutOffset >= nMaxOutDataSize)
+ return -1;
+ pOutData[nOutOffset++] = pInWindow[nStartOffset];
+ *nFollowsLiteral = 1;
+ }
+
+ for (i = nStartOffset + ((nBlockFlags & 1) ? 1 : 0); i < nEndOffset; ) {
+ const apultra_final_match *pMatch = pBestMatch + i;
+
+ if (pMatch->length >= 2) {
+ int nMatchOffset = pMatch->offset;
+ int nMatchLen = pMatch->length;
+
+ if (nMatchOffset < MIN_OFFSET || nMatchOffset > nMaxOffset)
+ return -1;
+
+ if (nMatchOffset == nRepMatchOffset && *nFollowsLiteral) {
+ /* Rep-match */
+ nOutOffset = apultra_write_bits(pOutData, nOutOffset, nMaxOutDataSize, TOKEN_CODE_LARGE_MATCH, TOKEN_SIZE_LARGE_MATCH, nCurBitsOffset, nCurBitShift);
+ nOutOffset = apultra_write_bits(pOutData, nOutOffset, nMaxOutDataSize, 0 /* length of 2 encoded as gamma 2 */, 2, nCurBitsOffset, nCurBitShift);
+
+ /* The match length isn't encoded in the command, emit elias gamma value */
+ nOutOffset = apultra_write_gamma2_value(pOutData, nOutOffset, nMaxOutDataSize, nMatchLen, nCurBitsOffset, nCurBitShift);
+ if (nOutOffset < 0) return -1;
+
+ *nFollowsLiteral = 0;
+
+ pCompressor->stats.num_rep_matches++;
+ }
+ else {
+ if (nMatchLen <= 3 && nMatchOffset < 128) {
+ /* 7 bits offset + 1 bit length */
+ nOutOffset = apultra_write_bits(pOutData, nOutOffset, nMaxOutDataSize, TOKEN_CODE_7BIT_MATCH, TOKEN_SIZE_7BIT_MATCH, nCurBitsOffset, nCurBitShift);
+
+ if (nOutOffset < 0 || nOutOffset >= nMaxOutDataSize)
+ return -1;
+ pOutData[nOutOffset++] = ((nMatchOffset) & 0x7f) << 1 | (nMatchLen - 2);
+
+ *nFollowsLiteral = 0;
+ nRepMatchOffset = nMatchOffset;
+
+ pCompressor->stats.num_7bit_matches++;
+ }
+ else {
+ /* 8+n bits offset */
+ nOutOffset = apultra_write_bits(pOutData, nOutOffset, nMaxOutDataSize, TOKEN_CODE_LARGE_MATCH, TOKEN_SIZE_LARGE_MATCH, nCurBitsOffset, nCurBitShift);
+
+ if (nOutOffset < 0 || nOutOffset >= nMaxOutDataSize)
+ return -1;
+ if (*nFollowsLiteral)
+ nOutOffset = apultra_write_gamma2_value(pOutData, nOutOffset, nMaxOutDataSize, (nMatchOffset >> 8) + 3, nCurBitsOffset, nCurBitShift);
+ else
+ nOutOffset = apultra_write_gamma2_value(pOutData, nOutOffset, nMaxOutDataSize, (nMatchOffset >> 8) + 2, nCurBitsOffset, nCurBitShift);
+ pOutData[nOutOffset++] = nMatchOffset & 0xff;
+
+ /* The match length isn't encoded in the command, emit elias gamma value */
+
+ if (nMatchOffset < 128 || nMatchOffset >= MINMATCH4_OFFSET)
+ nOutOffset = apultra_write_gamma2_value(pOutData, nOutOffset, nMaxOutDataSize, nMatchLen - 2, nCurBitsOffset, nCurBitShift);
+ else if (nMatchOffset < MINMATCH3_OFFSET)
+ nOutOffset = apultra_write_gamma2_value(pOutData, nOutOffset, nMaxOutDataSize, nMatchLen, nCurBitsOffset, nCurBitShift);
+ else
+ nOutOffset = apultra_write_gamma2_value(pOutData, nOutOffset, nMaxOutDataSize, nMatchLen - 1, nCurBitsOffset, nCurBitShift);
+ if (nOutOffset < 0) return -1;
+
+ *nFollowsLiteral = 0;
+ nRepMatchOffset = nMatchOffset;
+
+ pCompressor->stats.num_variable_matches++;
+ }
+ }
+
+ if (nMatchOffset < pCompressor->stats.min_offset || pCompressor->stats.min_offset == -1)
+ pCompressor->stats.min_offset = nMatchOffset;
+ if (nMatchOffset > pCompressor->stats.max_offset)
+ pCompressor->stats.max_offset = nMatchOffset;
+ pCompressor->stats.total_offsets += (long long)nMatchOffset;
+
+ if (nMatchLen < pCompressor->stats.min_match_len || pCompressor->stats.min_match_len == -1)
+ pCompressor->stats.min_match_len = nMatchLen;
+ if (nMatchLen > pCompressor->stats.max_match_len)
+ pCompressor->stats.max_match_len = nMatchLen;
+ pCompressor->stats.total_match_lens += nMatchLen;
+ pCompressor->stats.match_divisor++;
+
+ if (nMatchOffset == 1) {
+ if (nMatchLen < pCompressor->stats.min_rle1_len || pCompressor->stats.min_rle1_len == -1)
+ pCompressor->stats.min_rle1_len = nMatchLen;
+ if (nMatchLen > pCompressor->stats.max_rle1_len)
+ pCompressor->stats.max_rle1_len = nMatchLen;
+ pCompressor->stats.total_rle1_lens += nMatchLen;
+ pCompressor->stats.rle1_divisor++;
+ }
+ else if (nMatchOffset == 2) {
+ if (nMatchLen < pCompressor->stats.min_rle2_len || pCompressor->stats.min_rle2_len == -1)
+ pCompressor->stats.min_rle2_len = nMatchLen;
+ if (nMatchLen > pCompressor->stats.max_rle2_len)
+ pCompressor->stats.max_rle2_len = nMatchLen;
+ pCompressor->stats.total_rle2_lens += nMatchLen;
+ pCompressor->stats.rle2_divisor++;
+ }
+
+ i += nMatchLen;
+
+ pCompressor->stats.commands_divisor++;
+ }
+ else if (pMatch->length == 1) {
+ int nMatchOffset = pMatch->offset;
+
+ /* 4 bits offset */
+
+ if (nMatchOffset < 0 || nMatchOffset > 15)
+ return -1;
+
+ nOutOffset = apultra_write_bits(pOutData, nOutOffset, nMaxOutDataSize, TOKEN_CODE_4BIT_MATCH, TOKEN_SIZE_4BIT_MATCH, nCurBitsOffset, nCurBitShift);
+ nOutOffset = apultra_write_bits(pOutData, nOutOffset, nMaxOutDataSize, nMatchOffset, 4, nCurBitsOffset, nCurBitShift);
+ if (nOutOffset < 0) return -1;
+
+ pCompressor->stats.num_4bit_matches++;
+ pCompressor->stats.commands_divisor++;
+
+ i++;
+ *nFollowsLiteral = 1;
+ }
+ else {
+ /* Literal */
+
+ nOutOffset = apultra_write_bits(pOutData, nOutOffset, nMaxOutDataSize, 0 /* literal */, 1, nCurBitsOffset, nCurBitShift);
+
+ if (nOutOffset < 0 || nOutOffset >= nMaxOutDataSize)
+ return -1;
+ pOutData[nOutOffset++] = pInWindow[i];
+
+ pCompressor->stats.num_literals++;
+ pCompressor->stats.commands_divisor++;
+ i++;
+ *nFollowsLiteral = 1;
+ }
+
+ int nCurSafeDist = (i - nStartOffset) - nOutOffset;
+ if (nCurSafeDist >= 0 && pCompressor->stats.safe_dist < nCurSafeDist)
+ pCompressor->stats.safe_dist = nCurSafeDist;
+ }
+
+ if (nBlockFlags & 2) {
+ /* 8 bits offset */
+
+ nOutOffset = apultra_write_bits(pOutData, nOutOffset, nMaxOutDataSize, TOKEN_CODE_7BIT_MATCH, TOKEN_SIZE_7BIT_MATCH, nCurBitsOffset, nCurBitShift);
+
+ if (nOutOffset < 0 || nOutOffset >= nMaxOutDataSize)
+ return -1;
+ pOutData[nOutOffset++] = 0x00; /* Offset: EOD */
+ pCompressor->stats.num_eod++;
+ pCompressor->stats.commands_divisor++;
+
+ int nCurSafeDist = (i - nStartOffset) - nOutOffset;
+ if (nCurSafeDist >= 0 && pCompressor->stats.safe_dist < nCurSafeDist)
+ pCompressor->stats.safe_dist = nCurSafeDist;
+ }
+
+ *nCurRepMatchOffset = nRepMatchOffset;
+ return nOutOffset;
+}
+
+/**
+ * Select the most optimal matches, reduce the token count if possible, and then emit a block of compressed data
+ *
+ * @param pCompressor compression context
+ * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
+ * @param nPreviousBlockSize number of previously compressed bytes (or 0 for none)
+ * @param nInDataSize number of input bytes to compress
+ * @param pOutData pointer to output buffer
+ * @param nMaxOutDataSize maximum size of output buffer, in bytes
+ * @param nCurBitsOffset write index into output buffer, of current byte being filled with bits
+ * @param nCurBitShift bit shift count
+ * @param nCurFollowsLiteral non-zero if the next command to be issued follows a literal, 0 if not
+ * @param nCurRepMatchOffset starting rep offset for this block, updated after the block is compressed successfully
+ * @param nBlockFlags bit 0: 1 for first block, 0 otherwise; bit 1: 1 for last block, 0 otherwise
+ *
+ * @return size of compressed data in output buffer, or -1 if the data is uncompressible
+ */
+static int apultra_optimize_and_write_block(apultra_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize, int *nCurBitsOffset, int *nCurBitShift, int *nCurFollowsLiteral, int *nCurRepMatchOffset, const int nBlockFlags) {
+ int nOutOffset = 0;
+ const int nEndOffset = nPreviousBlockSize + nInDataSize;
+ const int nArrivalsPerPosition = pCompressor->max_arrivals;
+ int *rle_len = (int*)pCompressor->intervals /* reuse */;
+ int i, nPosition;
+
+ memset(pCompressor->best_match, 0, pCompressor->block_size * sizeof(apultra_final_match));
+
+ if ((nBlockFlags & 3) == 3) {
+ int *first_offset_for_byte = pCompressor->first_offset_for_byte;
+ int *next_offset_for_pos = pCompressor->next_offset_for_pos;
+
+ /* Supplement 2 and 3-byte matches */
+
+ memset(first_offset_for_byte, 0xff, sizeof(int) * 65536);
+ memset(next_offset_for_pos, 0xff, sizeof(int) * nInDataSize);
+
+ for (nPosition = nPreviousBlockSize; nPosition < (nEndOffset - 1); nPosition++) {
+ next_offset_for_pos[nPosition - nPreviousBlockSize] = first_offset_for_byte[((unsigned int)pInWindow[nPosition]) | (((unsigned int)pInWindow[nPosition + 1]) << 8)];
+ first_offset_for_byte[((unsigned int)pInWindow[nPosition]) | (((unsigned int)pInWindow[nPosition + 1]) << 8)] = nPosition;
+ }
+
+ for (nPosition = nPreviousBlockSize + 1; nPosition < (nEndOffset - 1); nPosition++) {
+ apultra_match *match = pCompressor->match + ((nPosition - nPreviousBlockSize) << MATCHES_PER_INDEX_SHIFT);
+ unsigned short *match_depth = pCompressor->match_depth + ((nPosition - nPreviousBlockSize) << MATCHES_PER_INDEX_SHIFT);
+ int m = 0, nInserted = 0;
+ int nMatchPos;
+
+ while (m < 15 && match[m].length)
+ m++;
+
+ for (nMatchPos = next_offset_for_pos[nPosition - nPreviousBlockSize]; m < 15 && nMatchPos >= 0; nMatchPos = next_offset_for_pos[nMatchPos - nPreviousBlockSize]) {
+ int nMatchOffset = nPosition - nMatchPos;
+
+ if (nMatchOffset <= pCompressor->max_offset) {
+ int nExistingMatchIdx;
+ int nAlreadyExists = 0;
+
+ for (nExistingMatchIdx = 0; nExistingMatchIdx < m; nExistingMatchIdx++) {
+ if (match[nExistingMatchIdx].offset == nMatchOffset ||
+ (match[nExistingMatchIdx].offset - (match_depth[nExistingMatchIdx] & 0x3fff)) == nMatchOffset) {
+ nAlreadyExists = 1;
+ break;
+ }
+ }
+
+ if (!nAlreadyExists) {
+ match[m].length = (nPosition < (nEndOffset - 2) && pInWindow[nMatchPos + 2] == pInWindow[nPosition + 2]) ? 3 : 2;
+ match[m].offset = nMatchOffset;
+ match_depth[m] = 0x4000;
+ m++;
+ nInserted++;
+ if (nInserted >= 6)
+ break;
+ }
+ }
+ else {
+ break;
+ }
+ }
+ }
+ }
+
+ i = 0;
+ while (i < nEndOffset) {
+ int nRangeStartIdx = i;
+ unsigned char c = pInWindow[nRangeStartIdx];
+ do {
+ i++;
+ }
+ while (i < nEndOffset && pInWindow[i] == c);
+ while (nRangeStartIdx < i) {
+ rle_len[nRangeStartIdx] = i - nRangeStartIdx;
+ nRangeStartIdx++;
+ }
+ }
+
+ apultra_optimize_forward(pCompressor, pInWindow, nPreviousBlockSize, nEndOffset, 1 /* nInsertForwardReps */, nCurRepMatchOffset, nBlockFlags, nArrivalsPerPosition);
+
+ if ((nBlockFlags & 3) == 3 && nArrivalsPerPosition == NARRIVALS_PER_POSITION_MAX) {
+ const int* next_offset_for_pos = pCompressor->next_offset_for_pos;
+ int* offset_cache = pCompressor->offset_cache;
+
+ /* Supplement matches further */
+
+ memset(offset_cache, 0xff, sizeof(int) * 2048);
+
+ for (nPosition = nPreviousBlockSize + 1; nPosition < (nEndOffset - 1); nPosition++) {
+ apultra_match* match = pCompressor->match + ((nPosition - nPreviousBlockSize) << MATCHES_PER_INDEX_SHIFT);
+
+ if (match[0].length < 8) {
+ unsigned short* match_depth = pCompressor->match_depth + ((nPosition - nPreviousBlockSize) << MATCHES_PER_INDEX_SHIFT);
+ int m = 0, nInserted = 0;
+ int nMatchPos;
+
+ while (m < 46 && match[m].length) {
+ offset_cache[match[m].offset & 2047] = nPosition;
+ offset_cache[(match[m].offset - (match_depth[m] & 0x3fff)) & 2047] = nPosition;
+ m++;
+ }
+
+ for (nMatchPos = next_offset_for_pos[nPosition - nPreviousBlockSize]; m < 46 && nMatchPos >= 0; nMatchPos = next_offset_for_pos[nMatchPos - nPreviousBlockSize]) {
+ int nMatchOffset = nPosition - nMatchPos;
+
+ if (nMatchOffset <= pCompressor->max_offset) {
+ int nAlreadyExists = 0;
+
+ if (offset_cache[nMatchOffset & 2047] == nPosition) {
+ int nExistingMatchIdx;
+
+ for (nExistingMatchIdx = 0; nExistingMatchIdx < m; nExistingMatchIdx++) {
+ if (match[nExistingMatchIdx].offset == nMatchOffset ||
+ (match[nExistingMatchIdx].offset - (match_depth[nExistingMatchIdx] & 0x3fff)) == nMatchOffset) {
+ nAlreadyExists = 1;
+
+ if (match_depth[nExistingMatchIdx] == 0x4000) {
+ int nMatchLen = 2;
+ while (nMatchLen < 16 && nPosition < (nEndOffset - nMatchLen) && pInWindow[nMatchPos + nMatchLen] == pInWindow[nPosition + nMatchLen])
+ nMatchLen++;
+ if (nMatchLen > (int)match[nExistingMatchIdx].length)
+ match[nExistingMatchIdx].length = nMatchLen;
+ }
+
+ break;
+ }
+ }
+ }
+
+ if (!nAlreadyExists) {
+ int nForwardPos = nPosition + 2 + 1;
+ int nGotMatch = 0;
+
+ while (nForwardPos >= nMatchOffset && (nForwardPos + 2) < nEndOffset && nForwardPos < (nPosition + 2 + 1 + 5)) {
+ if (!memcmp(pInWindow + nForwardPos, pInWindow + nForwardPos - nMatchOffset, 2)) {
+ nGotMatch = 1;
+ break;
+ }
+ nForwardPos++;
+ }
+
+ if (nGotMatch) {
+ int nMatchLen = 2;
+ while (nMatchLen < 16 && nPosition < (nEndOffset - nMatchLen) && pInWindow[nMatchPos + nMatchLen] == pInWindow[nPosition + nMatchLen])
+ nMatchLen++;
+ match[m].length = nMatchLen;
+ match[m].offset = nMatchOffset;
+ match_depth[m] = 0;
+ m++;
+
+ apultra_insert_forward_match(pCompressor, pInWindow, nPosition, nMatchOffset, nPreviousBlockSize, nEndOffset, nArrivalsPerPosition, 8);
+
+ nInserted++;
+ if (nInserted >= 18 || (nInserted >= 15 && m >= 38))
+ break;
+ }
+ }
+ }
+ else {
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ /* Pick optimal matches */
+ apultra_optimize_forward(pCompressor, pInWindow, nPreviousBlockSize, nEndOffset, 0 /* nInsertForwardReps */, nCurRepMatchOffset, nBlockFlags, nArrivalsPerPosition);
+
+ /* Apply reduction and merge pass */
+ int nDidReduce;
+ int nPasses = 0;
+ do {
+ nDidReduce = apultra_reduce_commands(pCompressor, pInWindow, pCompressor->best_match - nPreviousBlockSize, nPreviousBlockSize, nEndOffset, nCurRepMatchOffset, nBlockFlags);
+ nPasses++;
+ } while (nDidReduce && nPasses < 20);
+
+ /* Write compressed block */
+
+ return apultra_write_block(pCompressor, pCompressor->best_match - nPreviousBlockSize, pInWindow, nPreviousBlockSize, nEndOffset, pOutData, nOutOffset, nMaxOutDataSize, nCurBitsOffset, nCurBitShift, nCurFollowsLiteral, nCurRepMatchOffset, nBlockFlags);
+}
+
+/* Forward declaration */
+static void apultra_compressor_destroy(apultra_compressor *pCompressor);
+
+/**
+ * Initialize compression context
+ *
+ * @param pCompressor compression context to initialize
+ * @param nBlockSize maximum size of input data (bytes to compress only)
+ * @param nMaxWindowSize maximum size of input data window (previously compressed bytes + bytes to compress)
+ * @param nMaxArrivals maximum number of arrivals per position
+ * @param nFlags compression flags
+ *
+ * @return 0 for success, non-zero for failure
+ */
+static int apultra_compressor_init(apultra_compressor *pCompressor, const int nBlockSize, const int nMaxWindowSize, const int nMaxArrivals, const int nFlags) {
+ int nResult;
+
+ nResult = divsufsort_init(&pCompressor->divsufsort_context);
+ pCompressor->intervals = NULL;
+ pCompressor->pos_data = NULL;
+ pCompressor->open_intervals = NULL;
+ pCompressor->match = NULL;
+ pCompressor->match_depth = NULL;
+ pCompressor->match1 = NULL;
+ pCompressor->best_match = NULL;
+ pCompressor->arrival = NULL;
+ pCompressor->first_offset_for_byte = NULL;
+ pCompressor->next_offset_for_pos = NULL;
+ pCompressor->offset_cache = NULL;
+ pCompressor->flags = nFlags;
+ pCompressor->block_size = nBlockSize;
+ pCompressor->max_arrivals = nMaxArrivals;
+
+ memset(&pCompressor->stats, 0, sizeof(pCompressor->stats));
+ pCompressor->stats.min_match_len = -1;
+ pCompressor->stats.min_offset = -1;
+ pCompressor->stats.min_rle1_len = -1;
+ pCompressor->stats.min_rle2_len = -1;
+
+ if (!nResult) {
+ pCompressor->intervals = (unsigned long long *)malloc(nMaxWindowSize * sizeof(unsigned long long));
+
+ if (pCompressor->intervals) {
+ pCompressor->pos_data = (unsigned long long *)malloc(nMaxWindowSize * sizeof(unsigned long long));
+
+ if (pCompressor->pos_data) {
+ pCompressor->open_intervals = (unsigned long long *)malloc((LCP_AND_TAG_MAX + 1) * sizeof(unsigned long long));
+
+ if (pCompressor->open_intervals) {
+ pCompressor->arrival = (apultra_arrival *)malloc((nBlockSize + 1) * nMaxArrivals * sizeof(apultra_arrival));
+
+ if (pCompressor->arrival) {
+ pCompressor->best_match = (apultra_final_match *)malloc(nBlockSize * sizeof(apultra_final_match));
+
+ if (pCompressor->best_match) {
+ pCompressor->match = (apultra_match *)malloc(nBlockSize * NMATCHES_PER_INDEX * sizeof(apultra_match));
+ if (pCompressor->match) {
+ pCompressor->match_depth = (unsigned short *)malloc(nBlockSize * NMATCHES_PER_INDEX * sizeof(unsigned short));
+ if (pCompressor->match_depth) {
+ pCompressor->match1 = (unsigned char *)malloc(nBlockSize * sizeof(unsigned char));
+ if (pCompressor->match1) {
+ pCompressor->first_offset_for_byte = (int*)malloc(65536 * sizeof(int));
+ if (pCompressor->first_offset_for_byte) {
+ pCompressor->next_offset_for_pos = (int*)malloc(nBlockSize * sizeof(int));
+ if (pCompressor->next_offset_for_pos) {
+ if (nMaxArrivals == NARRIVALS_PER_POSITION_MAX) {
+ pCompressor->offset_cache = (int*)malloc(2048 * sizeof(int));
+ if (pCompressor->offset_cache) {
+ return 0;
+ }
+ }
+ else {
+ return 0;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ apultra_compressor_destroy(pCompressor);
+ return 100;
+}
+
+/**
+ * Clean up compression context and free up any associated resources
+ *
+ * @param pCompressor compression context to clean up
+ */
+static void apultra_compressor_destroy(apultra_compressor *pCompressor) {
+ divsufsort_destroy(&pCompressor->divsufsort_context);
+
+ if (pCompressor->offset_cache) {
+ free(pCompressor->offset_cache);
+ pCompressor->offset_cache = NULL;
+ }
+
+ if (pCompressor->next_offset_for_pos) {
+ free(pCompressor->next_offset_for_pos);
+ pCompressor->next_offset_for_pos = NULL;
+ }
+
+ if (pCompressor->first_offset_for_byte) {
+ free(pCompressor->first_offset_for_byte);
+ pCompressor->first_offset_for_byte = NULL;
+ }
+
+ if (pCompressor->match1) {
+ free(pCompressor->match1);
+ pCompressor->match1 = NULL;
+ }
+
+ if (pCompressor->match_depth) {
+ free(pCompressor->match_depth);
+ pCompressor->match_depth = NULL;
+ }
+
+ if (pCompressor->match) {
+ free(pCompressor->match);
+ pCompressor->match = NULL;
+ }
+
+ if (pCompressor->arrival) {
+ free(pCompressor->arrival);
+ pCompressor->arrival = NULL;
+ }
+
+ if (pCompressor->best_match) {
+ free(pCompressor->best_match);
+ pCompressor->best_match = NULL;
+ }
+
+ if (pCompressor->open_intervals) {
+ free(pCompressor->open_intervals);
+ pCompressor->open_intervals = NULL;
+ }
+
+ if (pCompressor->pos_data) {
+ free(pCompressor->pos_data);
+ pCompressor->pos_data = NULL;
+ }
+
+ if (pCompressor->intervals) {
+ free(pCompressor->intervals);
+ pCompressor->intervals = NULL;
+ }
+}
+
+/**
+ * Compress one block of data
+ *
+ * @param pCompressor compression context
+ * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
+ * @param nPreviousBlockSize number of previously compressed bytes (or 0 for none)
+ * @param nInDataSize number of input bytes to compress
+ * @param pOutData pointer to output buffer
+ * @param nMaxOutDataSize maximum size of output buffer, in bytes
+ * @param nCurBitsOffset write index into output buffer, of current byte being filled with bits
+ * @param nCurBitShift bit shift count
+ * @param nCurFollowsLiteral non-zero if the next command to be issued follows a literal, 0 if not
+ * @param nCurRepMatchOffset starting rep offset for this block, updated after the block is compressed successfully
+ * @param nBlockFlags bit 0: 1 for first block, 0 otherwise; bit 1: 1 for last block, 0 otherwise
+ *
+ * @return size of compressed data in output buffer, or -1 if the data is uncompressible
+ */
+static int apultra_compressor_shrink_block(apultra_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize, int *nCurBitsOffset, int *nCurBitShift, int *nCurFollowsLiteral, int *nCurRepMatchOffset, const int nBlockFlags) {
+ int nCompressedSize;
+
+ if (apultra_build_suffix_array(pCompressor, pInWindow, nPreviousBlockSize + nInDataSize))
+ nCompressedSize = -1;
+ else {
+ if (nPreviousBlockSize) {
+ apultra_skip_matches(pCompressor, 0, nPreviousBlockSize);
+ }
+ apultra_find_all_matches(pCompressor, NMATCHES_PER_INDEX, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, nBlockFlags);
+
+ nCompressedSize = apultra_optimize_and_write_block(pCompressor, pInWindow, nPreviousBlockSize, nInDataSize, pOutData, nMaxOutDataSize, nCurBitsOffset, nCurBitShift, nCurFollowsLiteral, nCurRepMatchOffset, nBlockFlags);
+ }
+
+ return nCompressedSize;
+}
+
+/**
+ * Get maximum compressed size of input(source) data
+ *
+ * @param nInputSize input(source) size in bytes
+ *
+ * @return maximum compressed size
+ */
+size_t apultra_get_max_compressed_size(size_t nInputSize) {
+ return ((nInputSize * 9 /* literals + literal bits */ + 1 /* match bit */ + 2 /* 7+1 command bits */ + 8 /* EOD offset bits */) + 7) >> 3;
+}
+
+/**
+ * Compress memory
+ *
+ * @param pInputData pointer to input(source) data to compress
+ * @param pOutBuffer buffer for compressed data
+ * @param nInputSize input(source) size in bytes
+ * @param nMaxOutBufferSize maximum capacity of compression buffer
+ * @param nFlags compression flags (set to 0)
+ * @param nMaxWindowSize maximum window size to use (0 for default)
+ * @param nDictionarySize size of dictionary in front of input data (0 for none)
+ * @param progress progress function, called after compressing each block, or NULL for none
+ * @param pStats pointer to compression stats that are filled if this function is successful, or NULL
+ *
+ * @return actual compressed size, or -1 for error
+ */
+size_t apultra_compress(const unsigned char *pInputData, unsigned char *pOutBuffer, size_t nInputSize, size_t nMaxOutBufferSize,
+ const unsigned int nFlags, size_t nMaxWindowSize, size_t nDictionarySize, void(*progress)(long long nOriginalSize, long long nCompressedSize), apultra_stats *pStats) {
+ apultra_compressor compressor;
+ size_t nOriginalSize = 0;
+ size_t nCompressedSize = 0L;
+ int nResult;
+ int nMaxArrivals = NARRIVALS_PER_POSITION_SMALL;
+ int nError = 0;
+ const int nBlockSize = (nInputSize < BLOCK_SIZE) ? ((nInputSize < 1024) ? 1024 : (int)nInputSize) : BLOCK_SIZE;
+ const int nMaxOutBlockSize = (int)apultra_get_max_compressed_size(nBlockSize);
+
+ if (nDictionarySize < nInputSize) {
+ int nInDataSize = (int)(nInputSize - nDictionarySize);
+ if (nInDataSize > nBlockSize)
+ nInDataSize = nBlockSize;
+
+ if (nInDataSize > 0 && (nDictionarySize + nInDataSize) >= nInputSize) {
+ if (nInputSize <= 262144)
+ nMaxArrivals = NARRIVALS_PER_POSITION_MAX;
+ else
+ nMaxArrivals = NARRIVALS_PER_POSITION_NORMAL;
+ }
+ }
+
+ nResult = apultra_compressor_init(&compressor, nBlockSize, nBlockSize * 2, nMaxArrivals, nFlags);
+ if (nResult != 0) {
+ return -1;
+ }
+
+ compressor.max_offset = nMaxWindowSize ? (int)nMaxWindowSize : MAX_OFFSET;
+
+ int nPreviousBlockSize = 0;
+ int nNumBlocks = 0;
+ int nCurBitsOffset = INT_MIN, nCurBitShift = 0, nCurFollowsLiteral = 0;
+ int nBlockFlags = 1;
+ int nCurRepMatchOffset = 0;
+
+ if (nDictionarySize) {
+ nOriginalSize = (int)nDictionarySize;
+ nPreviousBlockSize = (int)nDictionarySize;
+ }
+
+ while (nOriginalSize < nInputSize && !nError) {
+ int nInDataSize;
+
+ nInDataSize = (int)(nInputSize - nOriginalSize);
+ if (nInDataSize > nBlockSize)
+ nInDataSize = nBlockSize;
+
+ if (nInDataSize > 0) {
+ int nOutDataSize;
+ int nOutDataEnd = (int)(nMaxOutBufferSize - nCompressedSize);
+
+ if (nOutDataEnd > nMaxOutBlockSize)
+ nOutDataEnd = nMaxOutBlockSize;
+
+ if ((nOriginalSize + nInDataSize) >= nInputSize)
+ nBlockFlags |= 2;
+ nOutDataSize = apultra_compressor_shrink_block(&compressor, pInputData + nOriginalSize - nPreviousBlockSize, nPreviousBlockSize, nInDataSize, pOutBuffer + nCompressedSize, nOutDataEnd,
+ &nCurBitsOffset, &nCurBitShift, &nCurFollowsLiteral, &nCurRepMatchOffset, nBlockFlags);
+ nBlockFlags &= (~1);
+
+ if (nOutDataSize >= 0) {
+ /* Write compressed block */
+
+ if (!nError) {
+ nOriginalSize += nInDataSize;
+ nCompressedSize += nOutDataSize;
+ if (nCurBitsOffset != INT_MIN)
+ nCurBitsOffset -= nOutDataSize;
+ }
+ }
+ else {
+ nError = -1;
+ }
+
+ nPreviousBlockSize = nInDataSize;
+ nNumBlocks++;
+ }
+
+ if (!nError && nOriginalSize < nInputSize) {
+ if (progress)
+ progress(nOriginalSize, nCompressedSize);
+ }
+ }
+
+ if (progress)
+ progress(nOriginalSize, nCompressedSize);
+ if (pStats)
+ *pStats = compressor.stats;
+
+ apultra_compressor_destroy(&compressor);
+
+ if (nError) {
+ return -1;
+ }
+ else {
+ return nCompressedSize;
+ }
+}
diff --git a/tools/z64compress/src/enc/apultra/shrink.h b/tools/z64compress/src/enc/apultra/shrink.h
new file mode 100644
index 000000000..bd905936f
--- /dev/null
+++ b/tools/z64compress/src/enc/apultra/shrink.h
@@ -0,0 +1,174 @@
+/*
+ * shrink.h - compressor definitions
+ *
+ * Copyright (C) 2019 Emmanuel Marty
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty. In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ * claim that you wrote the original software. If you use this software
+ * in a product, an acknowledgment in the product documentation would be
+ * appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ * misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+/*
+ * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
+ *
+ * Inspired by cap by Sven-Åke Dahl. https://github.com/svendahl/cap
+ * Also inspired by Charles Bloom's compression blog. http://cbloomrants.blogspot.com/
+ * With ideas from LZ4 by Yann Collet. https://github.com/lz4/lz4
+ * With help and support from spke
+ *
+ */
+
+#ifndef _SHRINK_H
+#define _SHRINK_H
+
+#include "divsufsort.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define LCP_BITS 15
+#define TAG_BITS 4
+#define LCP_MAX ((1U<<(LCP_BITS - TAG_BITS)) - 1)
+#define LCP_AND_TAG_MAX ((1U<
+ * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
+ * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
+ *
+ */
+
+#include
+#include
+//#include "shrink_context.h"
+//#include "shrink_block.h"
+#include "format.h"
+#include "matchfinder.h"
+//#include "lib.h"
diff --git a/tools/z64compress/src/enc/apultra/sssort.c b/tools/z64compress/src/enc/apultra/sssort.c
new file mode 100644
index 000000000..4a18fd2ab
--- /dev/null
+++ b/tools/z64compress/src/enc/apultra/sssort.c
@@ -0,0 +1,815 @@
+/*
+ * sssort.c for libdivsufsort
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "divsufsort_private.h"
+
+
+/*- Private Functions -*/
+
+static const saint_t lg_table[256]= {
+ -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+ 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+ 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+ 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+ 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+ 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+ 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+ 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
+};
+
+#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
+
+static INLINE
+saint_t
+ss_ilg(saidx_t n) {
+#if SS_BLOCKSIZE == 0
+# if defined(BUILD_DIVSUFSORT64)
+ return (n >> 32) ?
+ ((n >> 48) ?
+ ((n >> 56) ?
+ 56 + lg_table[(n >> 56) & 0xff] :
+ 48 + lg_table[(n >> 48) & 0xff]) :
+ ((n >> 40) ?
+ 40 + lg_table[(n >> 40) & 0xff] :
+ 32 + lg_table[(n >> 32) & 0xff])) :
+ ((n & 0xffff0000) ?
+ ((n & 0xff000000) ?
+ 24 + lg_table[(n >> 24) & 0xff] :
+ 16 + lg_table[(n >> 16) & 0xff]) :
+ ((n & 0x0000ff00) ?
+ 8 + lg_table[(n >> 8) & 0xff] :
+ 0 + lg_table[(n >> 0) & 0xff]));
+# else
+ return (n & 0xffff0000) ?
+ ((n & 0xff000000) ?
+ 24 + lg_table[(n >> 24) & 0xff] :
+ 16 + lg_table[(n >> 16) & 0xff]) :
+ ((n & 0x0000ff00) ?
+ 8 + lg_table[(n >> 8) & 0xff] :
+ 0 + lg_table[(n >> 0) & 0xff]);
+# endif
+#elif SS_BLOCKSIZE < 256
+ return lg_table[n];
+#else
+ return (n & 0xff00) ?
+ 8 + lg_table[(n >> 8) & 0xff] :
+ 0 + lg_table[(n >> 0) & 0xff];
+#endif
+}
+
+#endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
+
+#if SS_BLOCKSIZE != 0
+
+static const saint_t sqq_table[256] = {
+ 0, 16, 22, 27, 32, 35, 39, 42, 45, 48, 50, 53, 55, 57, 59, 61,
+ 64, 65, 67, 69, 71, 73, 75, 76, 78, 80, 81, 83, 84, 86, 87, 89,
+ 90, 91, 93, 94, 96, 97, 98, 99, 101, 102, 103, 104, 106, 107, 108, 109,
+110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
+128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
+143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155,
+156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168,
+169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180,
+181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191,
+192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201,
+202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211,
+212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221,
+221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230,
+230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238,
+239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247,
+247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255
+};
+
+static INLINE
+saidx_t
+ss_isqrt(saidx_t x) {
+ saidx_t y, e;
+
+ if(x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) { return SS_BLOCKSIZE; }
+ e = (x & 0xffff0000) ?
+ ((x & 0xff000000) ?
+ 24 + lg_table[(x >> 24) & 0xff] :
+ 16 + lg_table[(x >> 16) & 0xff]) :
+ ((x & 0x0000ff00) ?
+ 8 + lg_table[(x >> 8) & 0xff] :
+ 0 + lg_table[(x >> 0) & 0xff]);
+
+ if(e >= 16) {
+ y = sqq_table[x >> ((e - 6) - (e & 1))] << ((e >> 1) - 7);
+ if(e >= 24) { y = (y + 1 + x / y) >> 1; }
+ y = (y + 1 + x / y) >> 1;
+ } else if(e >= 8) {
+ y = (sqq_table[x >> ((e - 6) - (e & 1))] >> (7 - (e >> 1))) + 1;
+ } else {
+ return sqq_table[x] >> 4;
+ }
+
+ return (x < (y * y)) ? y - 1 : y;
+}
+
+#endif /* SS_BLOCKSIZE != 0 */
+
+
+/*---------------------------------------------------------------------------*/
+
+/* Compares two suffixes. */
+static INLINE
+saint_t
+ss_compare(const sauchar_t *T,
+ const saidx_t *p1, const saidx_t *p2,
+ saidx_t depth) {
+ const sauchar_t *U1, *U2, *U1n, *U2n;
+
+ for(U1 = T + depth + *p1,
+ U2 = T + depth + *p2,
+ U1n = T + *(p1 + 1) + 2,
+ U2n = T + *(p2 + 1) + 2;
+ (U1 < U1n) && (U2 < U2n) && (*U1 == *U2);
+ ++U1, ++U2) {
+ }
+
+ return U1 < U1n ?
+ (U2 < U2n ? *U1 - *U2 : 1) :
+ (U2 < U2n ? -1 : 0);
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+#if (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1)
+
+/* Insertionsort for small size groups */
+static
+void
+ss_insertionsort(const sauchar_t *T, const saidx_t *PA,
+ saidx_t *first, saidx_t *last, saidx_t depth) {
+ saidx_t *i, *j;
+ saidx_t t;
+ saint_t r;
+
+ for(i = last - 2; first <= i; --i) {
+ for(t = *i, j = i + 1; 0 < (r = ss_compare(T, PA + t, PA + *j, depth));) {
+ do { *(j - 1) = *j; } while((++j < last) && (*j < 0));
+ if(last <= j) { break; }
+ }
+ if(r == 0) { *j = ~*j; }
+ *(j - 1) = t;
+ }
+}
+
+#endif /* (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) */
+
+
+/*---------------------------------------------------------------------------*/
+
+#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
+
+static INLINE
+void
+ss_fixdown(const sauchar_t *Td, const saidx_t *PA,
+ saidx_t *SA, saidx_t i, saidx_t size) {
+ saidx_t j, k;
+ saidx_t v;
+ saint_t c, d, e;
+
+ for(v = SA[i], c = Td[PA[v]]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
+ d = Td[PA[SA[k = j++]]];
+ if(d < (e = Td[PA[SA[j]]])) { k = j; d = e; }
+ if(d <= c) { break; }
+ }
+ SA[i] = v;
+}
+
+/* Simple top-down heapsort. */
+static
+void
+ss_heapsort(const sauchar_t *Td, const saidx_t *PA, saidx_t *SA, saidx_t size) {
+ saidx_t i, m;
+ saidx_t t;
+
+ m = size;
+ if((size % 2) == 0) {
+ m--;
+ if(Td[PA[SA[m / 2]]] < Td[PA[SA[m]]]) { SWAP(SA[m], SA[m / 2]); }
+ }
+
+ for(i = m / 2 - 1; 0 <= i; --i) { ss_fixdown(Td, PA, SA, i, m); }
+ if((size % 2) == 0) { SWAP(SA[0], SA[m]); ss_fixdown(Td, PA, SA, 0, m); }
+ for(i = m - 1; 0 < i; --i) {
+ t = SA[0], SA[0] = SA[i];
+ ss_fixdown(Td, PA, SA, 0, i);
+ SA[i] = t;
+ }
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+/* Returns the median of three elements. */
+static INLINE
+saidx_t *
+ss_median3(const sauchar_t *Td, const saidx_t *PA,
+ saidx_t *v1, saidx_t *v2, saidx_t *v3) {
+ saidx_t *t;
+ if(Td[PA[*v1]] > Td[PA[*v2]]) { SWAP(v1, v2); }
+ if(Td[PA[*v2]] > Td[PA[*v3]]) {
+ if(Td[PA[*v1]] > Td[PA[*v3]]) { return v1; }
+ else { return v3; }
+ }
+ return v2;
+}
+
+/* Returns the median of five elements. */
+static INLINE
+saidx_t *
+ss_median5(const sauchar_t *Td, const saidx_t *PA,
+ saidx_t *v1, saidx_t *v2, saidx_t *v3, saidx_t *v4, saidx_t *v5) {
+ saidx_t *t;
+ if(Td[PA[*v2]] > Td[PA[*v3]]) { SWAP(v2, v3); }
+ if(Td[PA[*v4]] > Td[PA[*v5]]) { SWAP(v4, v5); }
+ if(Td[PA[*v2]] > Td[PA[*v4]]) { SWAP(v2, v4); SWAP(v3, v5); }
+ if(Td[PA[*v1]] > Td[PA[*v3]]) { SWAP(v1, v3); }
+ if(Td[PA[*v1]] > Td[PA[*v4]]) { SWAP(v1, v4); SWAP(v3, v5); }
+ if(Td[PA[*v3]] > Td[PA[*v4]]) { return v4; }
+ return v3;
+}
+
+/* Returns the pivot element. */
+static INLINE
+saidx_t *
+ss_pivot(const sauchar_t *Td, const saidx_t *PA, saidx_t *first, saidx_t *last) {
+ saidx_t *middle;
+ saidx_t t;
+
+ t = last - first;
+ middle = first + t / 2;
+
+ if(t <= 512) {
+ if(t <= 32) {
+ return ss_median3(Td, PA, first, middle, last - 1);
+ } else {
+ t >>= 2;
+ return ss_median5(Td, PA, first, first + t, middle, last - 1 - t, last - 1);
+ }
+ }
+ t >>= 3;
+ first = ss_median3(Td, PA, first, first + t, first + (t << 1));
+ middle = ss_median3(Td, PA, middle - t, middle, middle + t);
+ last = ss_median3(Td, PA, last - 1 - (t << 1), last - 1 - t, last - 1);
+ return ss_median3(Td, PA, first, middle, last);
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+/* Binary partition for substrings. */
+static INLINE
+saidx_t *
+ss_partition(const saidx_t *PA,
+ saidx_t *first, saidx_t *last, saidx_t depth) {
+ saidx_t *a, *b;
+ saidx_t t;
+ for(a = first - 1, b = last;;) {
+ for(; (++a < b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1));) { *a = ~*a; }
+ for(; (a < --b) && ((PA[*b] + depth) < (PA[*b + 1] + 1));) { }
+ if(b <= a) { break; }
+ t = ~*b;
+ *b = *a;
+ *a = t;
+ }
+ if(first < a) { *first = ~*first; }
+ return a;
+}
+
+/* Multikey introsort for medium size groups. */
+static
+void
+ss_mintrosort(const sauchar_t *T, const saidx_t *PA,
+ saidx_t *first, saidx_t *last,
+ saidx_t depth) {
+#define STACK_SIZE SS_MISORT_STACKSIZE
+ struct { saidx_t *a, *b, c; saint_t d; } stack[STACK_SIZE];
+ const sauchar_t *Td;
+ saidx_t *a, *b, *c, *d, *e, *f;
+ saidx_t s, t;
+ saint_t ssize;
+ saint_t limit;
+ saint_t v, x = 0;
+
+ for(ssize = 0, limit = ss_ilg(last - first);;) {
+
+ if((last - first) <= SS_INSERTIONSORT_THRESHOLD) {
+#if 1 < SS_INSERTIONSORT_THRESHOLD
+ if(1 < (last - first)) { ss_insertionsort(T, PA, first, last, depth); }
+#endif
+ STACK_POP(first, last, depth, limit);
+ continue;
+ }
+
+ Td = T + depth;
+ if(limit-- == 0) { ss_heapsort(Td, PA, first, last - first); }
+ if(limit < 0) {
+ for(a = first + 1, v = Td[PA[*first]]; a < last; ++a) {
+ if((x = Td[PA[*a]]) != v) {
+ if(1 < (a - first)) { break; }
+ v = x;
+ first = a;
+ }
+ }
+ if(Td[PA[*first] - 1] < v) {
+ first = ss_partition(PA, first, a, depth);
+ }
+ if((a - first) <= (last - a)) {
+ if(1 < (a - first)) {
+ STACK_PUSH(a, last, depth, -1);
+ last = a, depth += 1, limit = ss_ilg(a - first);
+ } else {
+ first = a, limit = -1;
+ }
+ } else {
+ if(1 < (last - a)) {
+ STACK_PUSH(first, a, depth + 1, ss_ilg(a - first));
+ first = a, limit = -1;
+ } else {
+ last = a, depth += 1, limit = ss_ilg(a - first);
+ }
+ }
+ continue;
+ }
+
+ /* choose pivot */
+ a = ss_pivot(Td, PA, first, last);
+ v = Td[PA[*a]];
+ SWAP(*first, *a);
+
+ /* partition */
+ for(b = first; (++b < last) && ((x = Td[PA[*b]]) == v);) { }
+ if(((a = b) < last) && (x < v)) {
+ for(; (++b < last) && ((x = Td[PA[*b]]) <= v);) {
+ if(x == v) { SWAP(*b, *a); ++a; }
+ }
+ }
+ for(c = last; (b < --c) && ((x = Td[PA[*c]]) == v);) { }
+ if((b < (d = c)) && (x > v)) {
+ for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
+ if(x == v) { SWAP(*c, *d); --d; }
+ }
+ }
+ for(; b < c;) {
+ SWAP(*b, *c);
+ for(; (++b < c) && ((x = Td[PA[*b]]) <= v);) {
+ if(x == v) { SWAP(*b, *a); ++a; }
+ }
+ for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
+ if(x == v) { SWAP(*c, *d); --d; }
+ }
+ }
+
+ if(a <= d) {
+ c = b - 1;
+
+ if((s = a - first) > (t = b - a)) { s = t; }
+ for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
+ if((s = d - c) > (t = last - d - 1)) { s = t; }
+ for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
+
+ a = first + (b - a), c = last - (d - c);
+ b = (v <= Td[PA[*a] - 1]) ? a : ss_partition(PA, a, c, depth);
+
+ if((a - first) <= (last - c)) {
+ if((last - c) <= (c - b)) {
+ STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
+ STACK_PUSH(c, last, depth, limit);
+ last = a;
+ } else if((a - first) <= (c - b)) {
+ STACK_PUSH(c, last, depth, limit);
+ STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
+ last = a;
+ } else {
+ STACK_PUSH(c, last, depth, limit);
+ STACK_PUSH(first, a, depth, limit);
+ first = b, last = c, depth += 1, limit = ss_ilg(c - b);
+ }
+ } else {
+ if((a - first) <= (c - b)) {
+ STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
+ STACK_PUSH(first, a, depth, limit);
+ first = c;
+ } else if((last - c) <= (c - b)) {
+ STACK_PUSH(first, a, depth, limit);
+ STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
+ first = c;
+ } else {
+ STACK_PUSH(first, a, depth, limit);
+ STACK_PUSH(c, last, depth, limit);
+ first = b, last = c, depth += 1, limit = ss_ilg(c - b);
+ }
+ }
+ } else {
+ limit += 1;
+ if(Td[PA[*first] - 1] < v) {
+ first = ss_partition(PA, first, last, depth);
+ limit = ss_ilg(last - first);
+ }
+ depth += 1;
+ }
+ }
+#undef STACK_SIZE
+}
+
+#endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
+
+
+/*---------------------------------------------------------------------------*/
+
+#if SS_BLOCKSIZE != 0
+
+static INLINE
+void
+ss_blockswap(saidx_t *a, saidx_t *b, saidx_t n) {
+ saidx_t t;
+ for(; 0 < n; --n, ++a, ++b) {
+ t = *a, *a = *b, *b = t;
+ }
+}
+
+static INLINE
+void
+ss_rotate(saidx_t *first, saidx_t *middle, saidx_t *last) {
+ saidx_t *a, *b, t;
+ saidx_t l, r;
+ l = middle - first, r = last - middle;
+ for(; (0 < l) && (0 < r);) {
+ if(l == r) { ss_blockswap(first, middle, l); break; }
+ if(l < r) {
+ a = last - 1, b = middle - 1;
+ t = *a;
+ do {
+ *a-- = *b, *b-- = *a;
+ if(b < first) {
+ *a = t;
+ last = a;
+ if((r -= l + 1) <= l) { break; }
+ a -= 1, b = middle - 1;
+ t = *a;
+ }
+ } while(1);
+ } else {
+ a = first, b = middle;
+ t = *a;
+ do {
+ *a++ = *b, *b++ = *a;
+ if(last <= b) {
+ *a = t;
+ first = a + 1;
+ if((l -= r + 1) <= r) { break; }
+ a += 1, b = middle;
+ t = *a;
+ }
+ } while(1);
+ }
+ }
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+static
+void
+ss_inplacemerge(const sauchar_t *T, const saidx_t *PA,
+ saidx_t *first, saidx_t *middle, saidx_t *last,
+ saidx_t depth) {
+ const saidx_t *p;
+ saidx_t *a, *b;
+ saidx_t len, half;
+ saint_t q, r;
+ saint_t x;
+
+ for(;;) {
+ if(*(last - 1) < 0) { x = 1; p = PA + ~*(last - 1); }
+ else { x = 0; p = PA + *(last - 1); }
+ for(a = first, len = middle - first, half = len >> 1, r = -1;
+ 0 < len;
+ len = half, half >>= 1) {
+ b = a + half;
+ q = ss_compare(T, PA + ((0 <= *b) ? *b : ~*b), p, depth);
+ if(q < 0) {
+ a = b + 1;
+ half -= (len & 1) ^ 1;
+ } else {
+ r = q;
+ }
+ }
+ if(a < middle) {
+ if(r == 0) { *a = ~*a; }
+ ss_rotate(a, middle, last);
+ last -= middle - a;
+ middle = a;
+ if(first == middle) { break; }
+ }
+ --last;
+ if(x != 0) { while(*--last < 0) { } }
+ if(middle == last) { break; }
+ }
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+/* Merge-forward with internal buffer. */
+static
+void
+ss_mergeforward(const sauchar_t *T, const saidx_t *PA,
+ saidx_t *first, saidx_t *middle, saidx_t *last,
+ saidx_t *buf, saidx_t depth) {
+ saidx_t *a, *b, *c, *bufend;
+ saidx_t t;
+ saint_t r;
+
+ bufend = buf + (middle - first) - 1;
+ ss_blockswap(buf, first, middle - first);
+
+ for(t = *(a = first), b = buf, c = middle;;) {
+ r = ss_compare(T, PA + *b, PA + *c, depth);
+ if(r < 0) {
+ do {
+ *a++ = *b;
+ if(bufend <= b) { *bufend = t; return; }
+ *b++ = *a;
+ } while(*b < 0);
+ } else if(r > 0) {
+ do {
+ *a++ = *c, *c++ = *a;
+ if(last <= c) {
+ while(b < bufend) { *a++ = *b, *b++ = *a; }
+ *a = *b, *b = t;
+ return;
+ }
+ } while(*c < 0);
+ } else {
+ *c = ~*c;
+ do {
+ *a++ = *b;
+ if(bufend <= b) { *bufend = t; return; }
+ *b++ = *a;
+ } while(*b < 0);
+
+ do {
+ *a++ = *c, *c++ = *a;
+ if(last <= c) {
+ while(b < bufend) { *a++ = *b, *b++ = *a; }
+ *a = *b, *b = t;
+ return;
+ }
+ } while(*c < 0);
+ }
+ }
+}
+
+/* Merge-backward with internal buffer. */
+static
+void
+ss_mergebackward(const sauchar_t *T, const saidx_t *PA,
+ saidx_t *first, saidx_t *middle, saidx_t *last,
+ saidx_t *buf, saidx_t depth) {
+ const saidx_t *p1, *p2;
+ saidx_t *a, *b, *c, *bufend;
+ saidx_t t;
+ saint_t r;
+ saint_t x;
+
+ bufend = buf + (last - middle) - 1;
+ ss_blockswap(buf, middle, last - middle);
+
+ x = 0;
+ if(*bufend < 0) { p1 = PA + ~*bufend; x |= 1; }
+ else { p1 = PA + *bufend; }
+ if(*(middle - 1) < 0) { p2 = PA + ~*(middle - 1); x |= 2; }
+ else { p2 = PA + *(middle - 1); }
+ for(t = *(a = last - 1), b = bufend, c = middle - 1;;) {
+ r = ss_compare(T, p1, p2, depth);
+ if(0 < r) {
+ if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; }
+ *a-- = *b;
+ if(b <= buf) { *buf = t; break; }
+ *b-- = *a;
+ if(*b < 0) { p1 = PA + ~*b; x |= 1; }
+ else { p1 = PA + *b; }
+ } else if(r < 0) {
+ if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; }
+ *a-- = *c, *c-- = *a;
+ if(c < first) {
+ while(buf < b) { *a-- = *b, *b-- = *a; }
+ *a = *b, *b = t;
+ break;
+ }
+ if(*c < 0) { p2 = PA + ~*c; x |= 2; }
+ else { p2 = PA + *c; }
+ } else {
+ if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; }
+ *a-- = ~*b;
+ if(b <= buf) { *buf = t; break; }
+ *b-- = *a;
+ if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; }
+ *a-- = *c, *c-- = *a;
+ if(c < first) {
+ while(buf < b) { *a-- = *b, *b-- = *a; }
+ *a = *b, *b = t;
+ break;
+ }
+ if(*b < 0) { p1 = PA + ~*b; x |= 1; }
+ else { p1 = PA + *b; }
+ if(*c < 0) { p2 = PA + ~*c; x |= 2; }
+ else { p2 = PA + *c; }
+ }
+ }
+}
+
+/* D&C based merge. */
+static
+void
+ss_swapmerge(const sauchar_t *T, const saidx_t *PA,
+ saidx_t *first, saidx_t *middle, saidx_t *last,
+ saidx_t *buf, saidx_t bufsize, saidx_t depth) {
+#define STACK_SIZE SS_SMERGE_STACKSIZE
+#define GETIDX(a) ((0 <= (a)) ? (a) : (~(a)))
+#define MERGE_CHECK(a, b, c)\
+ do {\
+ if(((c) & 1) ||\
+ (((c) & 2) && (ss_compare(T, PA + GETIDX(*((a) - 1)), PA + *(a), depth) == 0))) {\
+ *(a) = ~*(a);\
+ }\
+ if(((c) & 4) && ((ss_compare(T, PA + GETIDX(*((b) - 1)), PA + *(b), depth) == 0))) {\
+ *(b) = ~*(b);\
+ }\
+ } while(0)
+ struct { saidx_t *a, *b, *c; saint_t d; } stack[STACK_SIZE];
+ saidx_t *l, *r, *lm, *rm;
+ saidx_t m, len, half;
+ saint_t ssize;
+ saint_t check, next;
+
+ for(check = 0, ssize = 0;;) {
+ if((last - middle) <= bufsize) {
+ if((first < middle) && (middle < last)) {
+ ss_mergebackward(T, PA, first, middle, last, buf, depth);
+ }
+ MERGE_CHECK(first, last, check);
+ STACK_POP(first, middle, last, check);
+ continue;
+ }
+
+ if((middle - first) <= bufsize) {
+ if(first < middle) {
+ ss_mergeforward(T, PA, first, middle, last, buf, depth);
+ }
+ MERGE_CHECK(first, last, check);
+ STACK_POP(first, middle, last, check);
+ continue;
+ }
+
+ for(m = 0, len = MIN(middle - first, last - middle), half = len >> 1;
+ 0 < len;
+ len = half, half >>= 1) {
+ if(ss_compare(T, PA + GETIDX(*(middle + m + half)),
+ PA + GETIDX(*(middle - m - half - 1)), depth) < 0) {
+ m += half + 1;
+ half -= (len & 1) ^ 1;
+ }
+ }
+
+ if(0 < m) {
+ lm = middle - m, rm = middle + m;
+ ss_blockswap(lm, middle, m);
+ l = r = middle, next = 0;
+ if(rm < last) {
+ if(*rm < 0) {
+ *rm = ~*rm;
+ if(first < lm) { for(; *--l < 0;) { } next |= 4; }
+ next |= 1;
+ } else if(first < lm) {
+ for(; *r < 0; ++r) { }
+ next |= 2;
+ }
+ }
+
+ if((l - first) <= (last - r)) {
+ STACK_PUSH(r, rm, last, (next & 3) | (check & 4));
+ middle = lm, last = l, check = (check & 3) | (next & 4);
+ } else {
+ if((next & 2) && (r == middle)) { next ^= 6; }
+ STACK_PUSH(first, lm, l, (check & 3) | (next & 4));
+ first = r, middle = rm, check = (next & 3) | (check & 4);
+ }
+ } else {
+ if(ss_compare(T, PA + GETIDX(*(middle - 1)), PA + *middle, depth) == 0) {
+ *middle = ~*middle;
+ }
+ MERGE_CHECK(first, last, check);
+ STACK_POP(first, middle, last, check);
+ }
+ }
+#undef STACK_SIZE
+}
+
+#endif /* SS_BLOCKSIZE != 0 */
+
+
+/*---------------------------------------------------------------------------*/
+
+/*- Function -*/
+
+/* Substring sort */
+void
+sssort(const sauchar_t *T, const saidx_t *PA,
+ saidx_t *first, saidx_t *last,
+ saidx_t *buf, saidx_t bufsize,
+ saidx_t depth, saidx_t n, saint_t lastsuffix) {
+ saidx_t *a;
+#if SS_BLOCKSIZE != 0
+ saidx_t *b, *middle, *curbuf;
+ saidx_t j, k, curbufsize, limit;
+#endif
+ saidx_t i;
+
+ if(lastsuffix != 0) { ++first; }
+
+#if SS_BLOCKSIZE == 0
+ ss_mintrosort(T, PA, first, last, depth);
+#else
+ if((bufsize < SS_BLOCKSIZE) &&
+ (bufsize < (last - first)) &&
+ (bufsize < (limit = ss_isqrt(last - first)))) {
+ if(SS_BLOCKSIZE < limit) { limit = SS_BLOCKSIZE; }
+ buf = middle = last - limit, bufsize = limit;
+ } else {
+ middle = last, limit = 0;
+ }
+ for(a = first, i = 0; SS_BLOCKSIZE < (middle - a); a += SS_BLOCKSIZE, ++i) {
+#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
+ ss_mintrosort(T, PA, a, a + SS_BLOCKSIZE, depth);
+#elif 1 < SS_BLOCKSIZE
+ ss_insertionsort(T, PA, a, a + SS_BLOCKSIZE, depth);
+#endif
+ curbufsize = last - (a + SS_BLOCKSIZE);
+ curbuf = a + SS_BLOCKSIZE;
+ if(curbufsize <= bufsize) { curbufsize = bufsize, curbuf = buf; }
+ for(b = a, k = SS_BLOCKSIZE, j = i; j & 1; b -= k, k <<= 1, j >>= 1) {
+ ss_swapmerge(T, PA, b - k, b, b + k, curbuf, curbufsize, depth);
+ }
+ }
+#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
+ ss_mintrosort(T, PA, a, middle, depth);
+#elif 1 < SS_BLOCKSIZE
+ ss_insertionsort(T, PA, a, middle, depth);
+#endif
+ for(k = SS_BLOCKSIZE; i != 0; k <<= 1, i >>= 1) {
+ if(i & 1) {
+ ss_swapmerge(T, PA, a - k, a, middle, buf, bufsize, depth);
+ a -= k;
+ }
+ }
+ if(limit != 0) {
+#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
+ ss_mintrosort(T, PA, middle, last, depth);
+#elif 1 < SS_BLOCKSIZE
+ ss_insertionsort(T, PA, middle, last, depth);
+#endif
+ ss_inplacemerge(T, PA, first, middle, last, depth);
+ }
+#endif
+
+ if(lastsuffix != 0) {
+ /* Insert last type B* suffix. */
+ saidx_t PAi[2]; PAi[0] = PA[*(first - 1)], PAi[1] = n - 2;
+ for(a = first, i = *(first - 1);
+ (a < last) && ((*a < 0) || (0 < ss_compare(T, &(PAi[0]), PA + *a, depth)));
+ ++a) {
+ *(a - 1) = *a;
+ }
+ *(a - 1) = i;
+ }
+}
diff --git a/tools/z64compress/src/enc/apultra/trsort.c b/tools/z64compress/src/enc/apultra/trsort.c
new file mode 100644
index 000000000..6fe3e67ba
--- /dev/null
+++ b/tools/z64compress/src/enc/apultra/trsort.c
@@ -0,0 +1,586 @@
+/*
+ * trsort.c for libdivsufsort
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "divsufsort_private.h"
+
+
+/*- Private Functions -*/
+
+static const saint_t lg_table[256]= {
+ -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+ 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+ 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+ 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+ 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+ 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+ 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+ 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
+};
+
+static INLINE
+saint_t
+tr_ilg(saidx_t n) {
+#if defined(BUILD_DIVSUFSORT64)
+ return (n >> 32) ?
+ ((n >> 48) ?
+ ((n >> 56) ?
+ 56 + lg_table[(n >> 56) & 0xff] :
+ 48 + lg_table[(n >> 48) & 0xff]) :
+ ((n >> 40) ?
+ 40 + lg_table[(n >> 40) & 0xff] :
+ 32 + lg_table[(n >> 32) & 0xff])) :
+ ((n & 0xffff0000) ?
+ ((n & 0xff000000) ?
+ 24 + lg_table[(n >> 24) & 0xff] :
+ 16 + lg_table[(n >> 16) & 0xff]) :
+ ((n & 0x0000ff00) ?
+ 8 + lg_table[(n >> 8) & 0xff] :
+ 0 + lg_table[(n >> 0) & 0xff]));
+#else
+ return (n & 0xffff0000) ?
+ ((n & 0xff000000) ?
+ 24 + lg_table[(n >> 24) & 0xff] :
+ 16 + lg_table[(n >> 16) & 0xff]) :
+ ((n & 0x0000ff00) ?
+ 8 + lg_table[(n >> 8) & 0xff] :
+ 0 + lg_table[(n >> 0) & 0xff]);
+#endif
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+/* Simple insertionsort for small size groups. */
+static
+void
+tr_insertionsort(const saidx_t *ISAd, saidx_t *first, saidx_t *last) {
+ saidx_t *a, *b;
+ saidx_t t, r;
+
+ for(a = first + 1; a < last; ++a) {
+ for(t = *a, b = a - 1; 0 > (r = ISAd[t] - ISAd[*b]);) {
+ do { *(b + 1) = *b; } while((first <= --b) && (*b < 0));
+ if(b < first) { break; }
+ }
+ if(r == 0) { *b = ~*b; }
+ *(b + 1) = t;
+ }
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+static INLINE
+void
+tr_fixdown(const saidx_t *ISAd, saidx_t *SA, saidx_t i, saidx_t size) {
+ saidx_t j, k;
+ saidx_t v;
+ saidx_t c, d, e;
+
+ for(v = SA[i], c = ISAd[v]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
+ d = ISAd[SA[k = j++]];
+ if(d < (e = ISAd[SA[j]])) { k = j; d = e; }
+ if(d <= c) { break; }
+ }
+ SA[i] = v;
+}
+
+/* Simple top-down heapsort. */
+static
+void
+tr_heapsort(const saidx_t *ISAd, saidx_t *SA, saidx_t size) {
+ saidx_t i, m;
+ saidx_t t;
+
+ m = size;
+ if((size % 2) == 0) {
+ m--;
+ if(ISAd[SA[m / 2]] < ISAd[SA[m]]) { SWAP(SA[m], SA[m / 2]); }
+ }
+
+ for(i = m / 2 - 1; 0 <= i; --i) { tr_fixdown(ISAd, SA, i, m); }
+ if((size % 2) == 0) { SWAP(SA[0], SA[m]); tr_fixdown(ISAd, SA, 0, m); }
+ for(i = m - 1; 0 < i; --i) {
+ t = SA[0], SA[0] = SA[i];
+ tr_fixdown(ISAd, SA, 0, i);
+ SA[i] = t;
+ }
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+/* Returns the median of three elements. */
+static INLINE
+saidx_t *
+tr_median3(const saidx_t *ISAd, saidx_t *v1, saidx_t *v2, saidx_t *v3) {
+ saidx_t *t;
+ if(ISAd[*v1] > ISAd[*v2]) { SWAP(v1, v2); }
+ if(ISAd[*v2] > ISAd[*v3]) {
+ if(ISAd[*v1] > ISAd[*v3]) { return v1; }
+ else { return v3; }
+ }
+ return v2;
+}
+
+/* Returns the median of five elements. */
+static INLINE
+saidx_t *
+tr_median5(const saidx_t *ISAd,
+ saidx_t *v1, saidx_t *v2, saidx_t *v3, saidx_t *v4, saidx_t *v5) {
+ saidx_t *t;
+ if(ISAd[*v2] > ISAd[*v3]) { SWAP(v2, v3); }
+ if(ISAd[*v4] > ISAd[*v5]) { SWAP(v4, v5); }
+ if(ISAd[*v2] > ISAd[*v4]) { SWAP(v2, v4); SWAP(v3, v5); }
+ if(ISAd[*v1] > ISAd[*v3]) { SWAP(v1, v3); }
+ if(ISAd[*v1] > ISAd[*v4]) { SWAP(v1, v4); SWAP(v3, v5); }
+ if(ISAd[*v3] > ISAd[*v4]) { return v4; }
+ return v3;
+}
+
+/* Returns the pivot element. */
+static INLINE
+saidx_t *
+tr_pivot(const saidx_t *ISAd, saidx_t *first, saidx_t *last) {
+ saidx_t *middle;
+ saidx_t t;
+
+ t = last - first;
+ middle = first + t / 2;
+
+ if(t <= 512) {
+ if(t <= 32) {
+ return tr_median3(ISAd, first, middle, last - 1);
+ } else {
+ t >>= 2;
+ return tr_median5(ISAd, first, first + t, middle, last - 1 - t, last - 1);
+ }
+ }
+ t >>= 3;
+ first = tr_median3(ISAd, first, first + t, first + (t << 1));
+ middle = tr_median3(ISAd, middle - t, middle, middle + t);
+ last = tr_median3(ISAd, last - 1 - (t << 1), last - 1 - t, last - 1);
+ return tr_median3(ISAd, first, middle, last);
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+typedef struct _trbudget_t trbudget_t;
+struct _trbudget_t {
+ saidx_t chance;
+ saidx_t remain;
+ saidx_t incval;
+ saidx_t count;
+};
+
+static INLINE
+void
+trbudget_init(trbudget_t *budget, saidx_t chance, saidx_t incval) {
+ budget->chance = chance;
+ budget->remain = budget->incval = incval;
+}
+
+static INLINE
+saint_t
+trbudget_check(trbudget_t *budget, saidx_t size) {
+ if(size <= budget->remain) { budget->remain -= size; return 1; }
+ if(budget->chance == 0) { budget->count += size; return 0; }
+ budget->remain += budget->incval - size;
+ budget->chance -= 1;
+ return 1;
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+static INLINE
+void
+tr_partition(const saidx_t *ISAd,
+ saidx_t *first, saidx_t *middle, saidx_t *last,
+ saidx_t **pa, saidx_t **pb, saidx_t v) {
+ saidx_t *a, *b, *c, *d, *e, *f;
+ saidx_t t, s;
+ saidx_t x = 0;
+
+ for(b = middle - 1; (++b < last) && ((x = ISAd[*b]) == v);) { }
+ if(((a = b) < last) && (x < v)) {
+ for(; (++b < last) && ((x = ISAd[*b]) <= v);) {
+ if(x == v) { SWAP(*b, *a); ++a; }
+ }
+ }
+ for(c = last; (b < --c) && ((x = ISAd[*c]) == v);) { }
+ if((b < (d = c)) && (x > v)) {
+ for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
+ if(x == v) { SWAP(*c, *d); --d; }
+ }
+ }
+ for(; b < c;) {
+ SWAP(*b, *c);
+ for(; (++b < c) && ((x = ISAd[*b]) <= v);) {
+ if(x == v) { SWAP(*b, *a); ++a; }
+ }
+ for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
+ if(x == v) { SWAP(*c, *d); --d; }
+ }
+ }
+
+ if(a <= d) {
+ c = b - 1;
+ if((s = a - first) > (t = b - a)) { s = t; }
+ for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
+ if((s = d - c) > (t = last - d - 1)) { s = t; }
+ for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
+ first += (b - a), last -= (d - c);
+ }
+ *pa = first, *pb = last;
+}
+
+static
+void
+tr_copy(saidx_t *ISA, const saidx_t *SA,
+ saidx_t *first, saidx_t *a, saidx_t *b, saidx_t *last,
+ saidx_t depth) {
+ /* sort suffixes of middle partition
+ by using sorted order of suffixes of left and right partition. */
+ saidx_t *c, *d, *e;
+ saidx_t s, v;
+
+ v = b - SA - 1;
+ for(c = first, d = a - 1; c <= d; ++c) {
+ if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
+ *++d = s;
+ ISA[s] = d - SA;
+ }
+ }
+ for(c = last - 1, e = d + 1, d = b; e < d; --c) {
+ if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
+ *--d = s;
+ ISA[s] = d - SA;
+ }
+ }
+}
+
+static
+void
+tr_partialcopy(saidx_t *ISA, const saidx_t *SA,
+ saidx_t *first, saidx_t *a, saidx_t *b, saidx_t *last,
+ saidx_t depth) {
+ saidx_t *c, *d, *e;
+ saidx_t s, v;
+ saidx_t rank, lastrank, newrank = -1;
+
+ v = b - SA - 1;
+ lastrank = -1;
+ for(c = first, d = a - 1; c <= d; ++c) {
+ if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
+ *++d = s;
+ rank = ISA[s + depth];
+ if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
+ ISA[s] = newrank;
+ }
+ }
+
+ lastrank = -1;
+ for(e = d; first <= e; --e) {
+ rank = ISA[*e];
+ if(lastrank != rank) { lastrank = rank; newrank = e - SA; }
+ if(newrank != rank) { ISA[*e] = newrank; }
+ }
+
+ lastrank = -1;
+ for(c = last - 1, e = d + 1, d = b; e < d; --c) {
+ if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
+ *--d = s;
+ rank = ISA[s + depth];
+ if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
+ ISA[s] = newrank;
+ }
+ }
+}
+
+static
+void
+tr_introsort(saidx_t *ISA, const saidx_t *ISAd,
+ saidx_t *SA, saidx_t *first, saidx_t *last,
+ trbudget_t *budget) {
+#define STACK_SIZE TR_STACKSIZE
+ struct { const saidx_t *a; saidx_t *b, *c; saint_t d, e; }stack[STACK_SIZE];
+ saidx_t *a, *b, *c;
+ saidx_t t;
+ saidx_t v, x = 0;
+ saidx_t incr = ISAd - ISA;
+ saint_t limit, next;
+ saint_t ssize, trlink = -1;
+
+ for(ssize = 0, limit = tr_ilg(last - first);;) {
+
+ if(limit < 0) {
+ if(limit == -1) {
+ /* tandem repeat partition */
+ tr_partition(ISAd - incr, first, first, last, &a, &b, last - SA - 1);
+
+ /* update ranks */
+ if(a < last) {
+ for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
+ }
+ if(b < last) {
+ for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; }
+ }
+
+ /* push */
+ if(1 < (b - a)) {
+ STACK_PUSH5(NULL, a, b, 0, 0);
+ STACK_PUSH5(ISAd - incr, first, last, -2, trlink);
+ trlink = ssize - 2;
+ }
+ if((a - first) <= (last - b)) {
+ if(1 < (a - first)) {
+ STACK_PUSH5(ISAd, b, last, tr_ilg(last - b), trlink);
+ last = a, limit = tr_ilg(a - first);
+ } else if(1 < (last - b)) {
+ first = b, limit = tr_ilg(last - b);
+ } else {
+ STACK_POP5(ISAd, first, last, limit, trlink);
+ }
+ } else {
+ if(1 < (last - b)) {
+ STACK_PUSH5(ISAd, first, a, tr_ilg(a - first), trlink);
+ first = b, limit = tr_ilg(last - b);
+ } else if(1 < (a - first)) {
+ last = a, limit = tr_ilg(a - first);
+ } else {
+ STACK_POP5(ISAd, first, last, limit, trlink);
+ }
+ }
+ } else if(limit == -2) {
+ /* tandem repeat copy */
+ a = stack[--ssize].b, b = stack[ssize].c;
+ if(stack[ssize].d == 0) {
+ tr_copy(ISA, SA, first, a, b, last, ISAd - ISA);
+ } else {
+ if(0 <= trlink) { stack[trlink].d = -1; }
+ tr_partialcopy(ISA, SA, first, a, b, last, ISAd - ISA);
+ }
+ STACK_POP5(ISAd, first, last, limit, trlink);
+ } else {
+ /* sorted partition */
+ if(0 <= *first) {
+ a = first;
+ do { ISA[*a] = a - SA; } while((++a < last) && (0 <= *a));
+ first = a;
+ }
+ if(first < last) {
+ a = first; do { *a = ~*a; } while(*++a < 0);
+ next = (ISA[*a] != ISAd[*a]) ? tr_ilg(a - first + 1) : -1;
+ if(++a < last) { for(b = first, v = a - SA - 1; b < a; ++b) { ISA[*b] = v; } }
+
+ /* push */
+ if(trbudget_check(budget, a - first)) {
+ if((a - first) <= (last - a)) {
+ STACK_PUSH5(ISAd, a, last, -3, trlink);
+ ISAd += incr, last = a, limit = next;
+ } else {
+ if(1 < (last - a)) {
+ STACK_PUSH5(ISAd + incr, first, a, next, trlink);
+ first = a, limit = -3;
+ } else {
+ ISAd += incr, last = a, limit = next;
+ }
+ }
+ } else {
+ if(0 <= trlink) { stack[trlink].d = -1; }
+ if(1 < (last - a)) {
+ first = a, limit = -3;
+ } else {
+ STACK_POP5(ISAd, first, last, limit, trlink);
+ }
+ }
+ } else {
+ STACK_POP5(ISAd, first, last, limit, trlink);
+ }
+ }
+ continue;
+ }
+
+ if((last - first) <= TR_INSERTIONSORT_THRESHOLD) {
+ tr_insertionsort(ISAd, first, last);
+ limit = -3;
+ continue;
+ }
+
+ if(limit-- == 0) {
+ tr_heapsort(ISAd, first, last - first);
+ for(a = last - 1; first < a; a = b) {
+ for(x = ISAd[*a], b = a - 1; (first <= b) && (ISAd[*b] == x); --b) { *b = ~*b; }
+ }
+ limit = -3;
+ continue;
+ }
+
+ /* choose pivot */
+ a = tr_pivot(ISAd, first, last);
+ SWAP(*first, *a);
+ v = ISAd[*first];
+
+ /* partition */
+ tr_partition(ISAd, first, first + 1, last, &a, &b, v);
+ if((last - first) != (b - a)) {
+ next = (ISA[*a] != v) ? tr_ilg(b - a) : -1;
+
+ /* update ranks */
+ for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
+ if(b < last) { for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } }
+
+ /* push */
+ if((1 < (b - a)) && (trbudget_check(budget, b - a))) {
+ if((a - first) <= (last - b)) {
+ if((last - b) <= (b - a)) {
+ if(1 < (a - first)) {
+ STACK_PUSH5(ISAd + incr, a, b, next, trlink);
+ STACK_PUSH5(ISAd, b, last, limit, trlink);
+ last = a;
+ } else if(1 < (last - b)) {
+ STACK_PUSH5(ISAd + incr, a, b, next, trlink);
+ first = b;
+ } else {
+ ISAd += incr, first = a, last = b, limit = next;
+ }
+ } else if((a - first) <= (b - a)) {
+ if(1 < (a - first)) {
+ STACK_PUSH5(ISAd, b, last, limit, trlink);
+ STACK_PUSH5(ISAd + incr, a, b, next, trlink);
+ last = a;
+ } else {
+ STACK_PUSH5(ISAd, b, last, limit, trlink);
+ ISAd += incr, first = a, last = b, limit = next;
+ }
+ } else {
+ STACK_PUSH5(ISAd, b, last, limit, trlink);
+ STACK_PUSH5(ISAd, first, a, limit, trlink);
+ ISAd += incr, first = a, last = b, limit = next;
+ }
+ } else {
+ if((a - first) <= (b - a)) {
+ if(1 < (last - b)) {
+ STACK_PUSH5(ISAd + incr, a, b, next, trlink);
+ STACK_PUSH5(ISAd, first, a, limit, trlink);
+ first = b;
+ } else if(1 < (a - first)) {
+ STACK_PUSH5(ISAd + incr, a, b, next, trlink);
+ last = a;
+ } else {
+ ISAd += incr, first = a, last = b, limit = next;
+ }
+ } else if((last - b) <= (b - a)) {
+ if(1 < (last - b)) {
+ STACK_PUSH5(ISAd, first, a, limit, trlink);
+ STACK_PUSH5(ISAd + incr, a, b, next, trlink);
+ first = b;
+ } else {
+ STACK_PUSH5(ISAd, first, a, limit, trlink);
+ ISAd += incr, first = a, last = b, limit = next;
+ }
+ } else {
+ STACK_PUSH5(ISAd, first, a, limit, trlink);
+ STACK_PUSH5(ISAd, b, last, limit, trlink);
+ ISAd += incr, first = a, last = b, limit = next;
+ }
+ }
+ } else {
+ if((1 < (b - a)) && (0 <= trlink)) { stack[trlink].d = -1; }
+ if((a - first) <= (last - b)) {
+ if(1 < (a - first)) {
+ STACK_PUSH5(ISAd, b, last, limit, trlink);
+ last = a;
+ } else if(1 < (last - b)) {
+ first = b;
+ } else {
+ STACK_POP5(ISAd, first, last, limit, trlink);
+ }
+ } else {
+ if(1 < (last - b)) {
+ STACK_PUSH5(ISAd, first, a, limit, trlink);
+ first = b;
+ } else if(1 < (a - first)) {
+ last = a;
+ } else {
+ STACK_POP5(ISAd, first, last, limit, trlink);
+ }
+ }
+ }
+ } else {
+ if(trbudget_check(budget, last - first)) {
+ limit = tr_ilg(last - first), ISAd += incr;
+ } else {
+ if(0 <= trlink) { stack[trlink].d = -1; }
+ STACK_POP5(ISAd, first, last, limit, trlink);
+ }
+ }
+ }
+#undef STACK_SIZE
+}
+
+
+
+/*---------------------------------------------------------------------------*/
+
+/*- Function -*/
+
+/* Tandem repeat sort */
+void
+trsort(saidx_t *ISA, saidx_t *SA, saidx_t n, saidx_t depth) {
+ saidx_t *ISAd;
+ saidx_t *first, *last;
+ trbudget_t budget;
+ saidx_t t, skip, unsorted;
+
+ trbudget_init(&budget, tr_ilg(n) * 2 / 3, n);
+/* trbudget_init(&budget, tr_ilg(n) * 3 / 4, n); */
+ for(ISAd = ISA + depth; -n < *SA; ISAd += ISAd - ISA) {
+ first = SA;
+ skip = 0;
+ unsorted = 0;
+ do {
+ if((t = *first) < 0) { first -= t; skip += t; }
+ else {
+ if(skip != 0) { *(first + skip) = skip; skip = 0; }
+ last = SA + ISA[t] + 1;
+ if(1 < (last - first)) {
+ budget.count = 0;
+ tr_introsort(ISA, ISAd, SA, first, last, &budget);
+ if(budget.count != 0) { unsorted += budget.count; }
+ else { skip = first - last; }
+ } else if((last - first) == 1) {
+ skip = -1;
+ }
+ first = last;
+ }
+ } while(first < (SA + n));
+ if(skip != 0) { *(first + skip) = skip; }
+ if(unsorted == 0) { break; }
+ }
+}
diff --git a/tools/z64compress/src/enc/enc.h b/tools/z64compress/src/enc/enc.h
new file mode 100644
index 000000000..60bae3b96
--- /dev/null
+++ b/tools/z64compress/src/enc/enc.h
@@ -0,0 +1,59 @@
+#ifndef Z64COMPRESS_ENC_H_INCLUDED
+#define Z64COMPRESS_ENC_H_INCLUDED
+
+int yazenc(
+ void *src
+ , unsigned src_sz
+ , void *dst
+ , unsigned *dst_sz
+ , void *_ctx
+);
+void *yazCtx_new(void);
+void yazCtx_free(void *_ctx);
+int yazdec(void *_src, void *_dst, unsigned dstSz, unsigned *srcSz);
+
+int lzoenc(
+ void *src
+ , unsigned src_sz
+ , void *dst
+ , unsigned *dst_sz
+ , void *_ctx
+);
+void *lzoCtx_new(void);
+void lzoCtx_free(void *_ctx);
+
+int uclenc(
+ void *src
+ , unsigned src_sz
+ , void *dst
+ , unsigned *dst_sz
+ , void *_ctx
+);
+
+int zx7enc(
+ void *src
+ , unsigned src_sz
+ , void *dst
+ , unsigned *dst_sz
+ , void *_ctx
+);
+
+int
+zlibenc(
+ void *_src
+ , unsigned src_sz
+ , void *_dst
+ , unsigned *dst_sz
+ , void *_ctx
+);
+
+int aplenc(
+ void *_src
+ , unsigned src_sz
+ , void *_dst
+ , unsigned *dst_sz
+ , void *_ctx
+);
+
+#endif /* Z64COMPRESS_ENC_H_INCLUDED */
+
diff --git a/tools/z64compress/src/enc/libdeflate/.cirrus.yml b/tools/z64compress/src/enc/libdeflate/.cirrus.yml
new file mode 100644
index 000000000..a4f5cad51
--- /dev/null
+++ b/tools/z64compress/src/enc/libdeflate/.cirrus.yml
@@ -0,0 +1,10 @@
+task:
+ freebsd_instance:
+ matrix:
+ - image_family: freebsd-12-3
+ - image_family: freebsd-13-0
+ install_script: pkg install -y cmake
+ script:
+ - cmake -B build -DLIBDEFLATE_BUILD_TESTS=1
+ - cmake --build build
+ - ctest --test-dir build
diff --git a/tools/z64compress/src/enc/libdeflate/.github/workflows/ci.yml b/tools/z64compress/src/enc/libdeflate/.github/workflows/ci.yml
new file mode 100644
index 000000000..6902e8e16
--- /dev/null
+++ b/tools/z64compress/src/enc/libdeflate/.github/workflows/ci.yml
@@ -0,0 +1,192 @@
+name: CI
+on: [pull_request]
+
+jobs:
+ x86_64-build-and-test:
+ name: Build and test (x86_64, ${{ matrix.os }}, ${{ matrix.compiler }})
+ strategy:
+ matrix:
+ os: [ubuntu-20.04, ubuntu-18.04]
+ compiler: [gcc, clang]
+ runs-on: ${{ matrix.os }}
+ env:
+ CC: ${{ matrix.compiler }}
+ steps:
+ - uses: actions/checkout@v2
+ - name: Install dependencies
+ run: |
+ sudo apt-get update
+ sudo apt-get install -y clang llvm libz-dev valgrind
+ - run: scripts/run_tests.sh
+
+ other-arch-build-and-test:
+ name: Build and test (${{ matrix.arch }}, Debian Bullseye, ${{ matrix.compiler }})
+ strategy:
+ matrix:
+ arch: [armv6, armv7, aarch64, s390x, ppc64le]
+ compiler: [gcc, clang]
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v2
+ - uses: uraimo/run-on-arch-action@v2.2.0
+ with:
+ arch: ${{ matrix.arch }}
+ distro: bullseye
+ githubToken: ${{ github.token }}
+ install: |
+ apt-get update
+ apt-get install -y build-essential cmake clang llvm libz-dev
+ run: |
+ tests=(regular)
+ if [ ${{matrix.compiler}} = clang ]; then
+ tests+=(ubsan)
+ fi
+ CC=${{matrix.compiler}} scripts/run_tests.sh "${tests[@]}"
+
+ macos-build-and-test:
+ name: Build and test (macOS)
+ runs-on: macos-latest
+ env:
+ CFLAGS: -Werror -DLIBDEFLATE_ENABLE_ASSERTIONS
+ steps:
+ - uses: actions/checkout@v2
+ - run: cmake -B build -DLIBDEFLATE_BUILD_TESTS=1
+ - run: cmake --build build --verbose
+ - run: ctest --test-dir build
+
+ windows-msys2-build-and-test:
+ name: Build and test (Windows, MSYS2, ${{matrix.sys}})
+ runs-on: windows-latest
+ strategy:
+ matrix:
+ include:
+ - { sys: mingw64, env: x86_64 }
+ - { sys: mingw32, env: i686 }
+ defaults:
+ run:
+ shell: msys2 {0}
+ env:
+ CFLAGS: -Werror -DLIBDEFLATE_ENABLE_ASSERTIONS
+ steps:
+ - uses: actions/checkout@v2
+ - uses: msys2/setup-msys2@v2
+ with:
+ msystem: ${{matrix.sys}}
+ update: true
+ install: >
+ make
+ mingw-w64-${{matrix.env}}-cc
+ mingw-w64-${{matrix.env}}-cmake
+ mingw-w64-${{matrix.env}}-ninja
+ mingw-w64-${{matrix.env}}-zlib
+ - run: cmake -B build -G Ninja -DLIBDEFLATE_BUILD_TESTS=1
+ - run: cmake --build build --verbose
+ - run: ctest --test-dir build
+
+ windows-visualstudio-build-and-test:
+ name: Build and test (Windows, Visual Studio ${{matrix.toolset}}, ${{matrix.platform.vs}})
+ strategy:
+ matrix:
+ platform: [ {vs: x64, vcpkg: x64-windows},
+ {vs: Win32, vcpkg: x86-windows} ]
+ toolset: [v143, ClangCL]
+ runs-on: windows-latest
+ steps:
+ - uses: actions/checkout@v2
+ - uses: microsoft/setup-msbuild@v1.1
+ - run: vcpkg install zlib:${{matrix.platform.vcpkg}}
+ - run: >
+ echo C:\vcpkg\packages\zlib_${{matrix.platform.vcpkg}}\bin
+ | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
+ - run: >
+ cmake -B build -G "Visual Studio 17 2022" -T ${{matrix.toolset}}
+ -A ${{matrix.platform.vs}} -DLIBDEFLATE_BUILD_TESTS=1
+ -DCMAKE_C_FLAGS="/W4 /WX /DLIBDEFLATE_ENABLE_ASSERTIONS /IC:\vcpkg\packages\zlib_${{matrix.platform.vcpkg}}\include"
+ -DZLIB_LIBRARY=C:\vcpkg\packages\zlib_${{matrix.platform.vcpkg}}\lib\zlib.lib
+ - run: cmake --build build --verbose --config Debug
+ - run: ctest --test-dir build -C Debug
+
+ windows-visualstudio-build:
+ name: Build (Windows, Visual Studio ${{matrix.toolset}}, ${{matrix.platform}})
+ strategy:
+ matrix:
+ platform: [ARM64, ARM]
+ toolset: [v143, ClangCL]
+ exclude: # Exclude unsupported combinations
+ - platform: ARM
+ toolset: ClangCL
+ runs-on: windows-latest
+ steps:
+ - uses: actions/checkout@v2
+ - uses: microsoft/setup-msbuild@v1.1
+ - run: >
+ cmake -B build -G "Visual Studio 17 2022" -T ${{matrix.toolset}}
+ -A ${{matrix.platform}} -DCMAKE_C_FLAGS="/W4 /WX"
+ - run: cmake --build build --verbose
+
+ run-clang-static-analyzer:
+ name: Run clang static analyzer
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v2
+ - name: Install dependencies
+ run: |
+ sudo apt-get update
+ sudo apt-get install -y clang-tools
+ - run: scan-build cmake -B build -DLIBDEFLATE_BUILD_TESTS=1
+ - run: scan-build cmake --build build --verbose
+
+ run-shellcheck:
+ name: Run shellcheck
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v2
+ - name: Install dependencies
+ run: |
+ sudo apt-get update
+ sudo apt-get install -y shellcheck
+ - name: Run shellcheck
+ run: shellcheck scripts/*.sh
+
+ cross-compile-for-windows:
+ name: Cross compile for Windows
+ runs-on: ubuntu-latest
+ env:
+ CFLAGS: -Werror -DLIBDEFLATE_ENABLE_ASSERTIONS
+ steps:
+ - uses: actions/checkout@v2
+ - name: Install dependencies
+ run: |
+ sudo apt-get update
+ sudo apt-get install -y gcc-mingw-w64-i686 gcc-mingw-w64-x86-64 libz-mingw-w64-dev
+ # Unfortunately Ubuntu doesn't have {i686,x86_64}-w64-mingw32-cmake like
+ # some distros have, so we have to provide our own toolchain files here.
+ - name: 32-bit build
+ run: |
+ scripts/cmake-helper.sh -DLIBDEFLATE_BUILD_TESTS=1 \
+ -DCMAKE_TOOLCHAIN_FILE=scripts/toolchain-i686-w64-mingw32.cmake
+ cmake --build build --verbose
+ - name: 64-bit build
+ run: |
+ scripts/cmake-helper.sh -DLIBDEFLATE_BUILD_TESTS=1 \
+ -DCMAKE_TOOLCHAIN_FILE=scripts/toolchain-x86_64-w64-mingw32.cmake
+ cmake --build build --verbose
+
+ cross-compile-for-android:
+ name: Cross compile for ${{matrix.abi}} Android on ${{matrix.os}}
+ strategy:
+ matrix:
+ os: [ubuntu-18.04, ubuntu-20.04, macos-latest]
+ abi: [armeabi-v7a, arm64-v8a, x86, x86_64]
+ runs-on: ${{matrix.os}}
+ env:
+ CFLAGS: -Werror -DLIBDEFLATE_ENABLE_ASSERTIONS
+ steps:
+ - uses: actions/checkout@v2
+ - run: |
+ scripts/cmake-helper.sh \
+ -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK_LATEST_HOME"/build/cmake/android.toolchain.cmake \
+ -DANDROID_ABI=${{matrix.abi}} \
+ -DANDROID_PLATFORM=28 \
+ -DLIBDEFLATE_BUILD_TESTS=1
+ cmake --build build --verbose
diff --git a/tools/z64compress/src/enc/libdeflate/.gitignore b/tools/z64compress/src/enc/libdeflate/.gitignore
new file mode 100644
index 000000000..3a696efc5
--- /dev/null
+++ b/tools/z64compress/src/enc/libdeflate/.gitignore
@@ -0,0 +1,3 @@
+/build*
+cscope*
+tags
diff --git a/tools/z64compress/src/enc/libdeflate/COPYING b/tools/z64compress/src/enc/libdeflate/COPYING
new file mode 100644
index 000000000..1f1b81cd5
--- /dev/null
+++ b/tools/z64compress/src/enc/libdeflate/COPYING
@@ -0,0 +1,21 @@
+Copyright 2016 Eric Biggers
+
+Permission is hereby granted, free of charge, to any person
+obtaining a copy of this software and associated documentation files
+(the "Software"), to deal in the Software without restriction,
+including without limitation the rights to use, copy, modify, merge,
+publish, distribute, sublicense, and/or sell copies of the Software,
+and to permit persons to whom the Software is furnished to do so,
+subject to the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/tools/z64compress/src/enc/libdeflate/NEWS.md b/tools/z64compress/src/enc/libdeflate/NEWS.md
new file mode 100644
index 000000000..497ae2199
--- /dev/null
+++ b/tools/z64compress/src/enc/libdeflate/NEWS.md
@@ -0,0 +1,389 @@
+# libdeflate release notes
+
+## Version 1.15
+
+* libdeflate now uses CMake instead of a plain Makefile.
+
+* Improved MSVC support. Enabled most architecture-specific code with MSVC,
+ fixed building with clang in MSVC compatibility mode, and other improvements.
+
+* When libdeflate is built with MinGW, the static library and import library are
+ now named using the MinGW convention (`*.a` and `*.dll.a`) instead of the
+ Visual Studio convention. This affects the official Windows binaries.
+
+## Version 1.14
+
+Significantly improved decompression performance on all platforms. Examples
+include (measuring DEFLATE only):
+
+| Platform | Speedup over v1.13 |
+|------------------------------------|--------------------|
+| x86_64 (Intel Comet Lake), gcc | 1.287x |
+| x86_64 (Intel Comet Lake), clang | 1.437x |
+| x86_64 (Intel Ice Lake), gcc | 1.332x |
+| x86_64 (Intel Ice Lake), clang | 1.296x |
+| x86_64 (Intel Sandy Bridge), gcc | 1.162x |
+| x86_64 (Intel Sandy Bridge), clang | 1.092x |
+| x86_64 (AMD Zen 2), gcc | 1.263x |
+| x86_64 (AMD Zen 2), clang | 1.259x |
+| i386 (Intel Comet Lake), gcc | 1.570x |
+| i386 (Intel Comet Lake), clang | 1.344x |
+| arm64 (Apple M1), clang | 1.306x |
+| arm64 (Cortex-A76), clang | 1.355x |
+| arm64 (Cortex-A55), clang | 1.190x |
+| arm32 (Cortex-A76), clang | 1.665x |
+| arm32 (Cortex-A55), clang | 1.283x |
+
+Thanks to Dougall Johnson (https://dougallj.wordpress.com/) for ideas for many
+of the improvements.
+
+## Version 1.13
+
+* Changed the 32-bit Windows build of the library to use the default calling
+ convention (cdecl) instead of stdcall, reverting a change from libdeflate 1.4.
+
+* Fixed a couple macOS compatibility issues with the gzip program.
+
+## Version 1.12
+
+This release focuses on improving the performance of the CRC-32 and Adler-32
+checksum algorithms on x86 and ARM (both 32-bit and 64-bit).
+
+* Build updates:
+
+ * Fixed building libdeflate on Apple platforms.
+
+ * For Visual Studio builds, Visual Studio 2015 or later is now required.
+
+* CRC-32 algorithm updates:
+
+ * Improved CRC-32 performance on short inputs on x86 and ARM.
+
+ * Improved CRC-32 performance on Apple Silicon Macs by using a 12-way pmull
+ implementation. Performance on large inputs on M1 is now about 67 GB/s,
+ compared to 8 GB/s before, or 31 GB/s with the Apple-provided zlib.
+
+ * Improved CRC-32 performance on some other ARM CPUs by reworking the code so
+ that multiple crc32 instructions can be issued in parallel.
+
+ * Improved CRC-32 performance on some x86 CPUs by increasing the stride length
+ of the pclmul implementation.
+
+* Adler-32 algorithm updates:
+
+ * Improved Adler-32 performance on some x86 CPUs by optimizing the AVX-2
+ implementation. E.g., performance on Zen 1 improved from 19 to 30 GB/s, and
+ on Ice Lake from 35 to 41 GB/s (if the AVX-512 implementation is excluded).
+
+ * Removed the AVX-512 implementation of Adler-32 to avoid CPU frequency
+ downclocking, and because the AVX-2 implementation was made faster.
+
+ * Improved Adler-32 performance on some ARM CPUs by optimizing the NEON
+ implementation. E.g., Apple M1 improved from about 36 to 52 GB/s.
+
+## Version 1.11
+
+* Library updates:
+
+ * Improved compression performance slightly.
+
+ * Detect arm64 CPU features on Apple platforms, which should improve
+ performance in some areas such as CRC-32 computation.
+
+* Program updates:
+
+ * The included `gzip` and `gunzip` programs now support the `-q` option.
+
+ * The included `gunzip` program now passes through non-gzip data when both
+ the `-f` and `-c` options are used.
+
+* Build updates:
+
+ * Avoided a build error on arm32 with certain gcc versions, by disabling
+ building `crc32_arm()` as dynamically-dispatched code when needed.
+
+ * Support building with the LLVM toolchain on Windows.
+
+ * Disabled the use of the "stdcall" ABI in static library builds on Windows.
+
+ * Use the correct `install_name` in macOS builds.
+
+ * Support Haiku builds.
+
+## Version 1.10
+
+* Added an additional check to the decompressor to make it quickly detect
+ certain bad inputs and not try to generate an unbounded amount of output.
+
+ Note: this was only a problem when decompressing with an unknown output size,
+ which isn't the recommended use case of libdeflate. However,
+ `libdeflate-gunzip` has to do this, and it would run out of memory as it would
+ keep trying to allocate a larger output buffer.
+
+* Fixed a build error on Solaris.
+
+* Cleaned up a few things in the compression code.
+
+## Version 1.9
+
+* Made many improvements to the compression algorithms, and rebalanced the
+ compression levels:
+
+ * Heuristics were implemented which significantly improve the compression
+ ratio on data where short matches aren't useful, such as DNA sequencing
+ data. This applies to all compression levels, but primarily to levels 1-9.
+
+ * Level 1 was made much faster, though it often compresses slightly worse than
+ before (but still better than zlib).
+
+ * Levels 8-9 were also made faster, though they often compress slightly worse
+ than before (but still better than zlib). On some data, levels 8-9 are much
+ faster and compress much better than before; this change addressed an issue
+ where levels 8-9 did poorly on certain files. The algorithm used by levels
+ 8-9 is now more similar to that of levels 6-7 than to that of levels 10-12.
+
+ * Levels 2-3, 7, and 10-12 were strengthened slightly.
+
+ * Levels 4-6 were also strengthened slightly, but some of this improvement was
+ traded off to speed them up slightly as well.
+
+ * Levels 1-9 had their per-compressor memory usage greatly reduced.
+
+ As always, compression ratios will vary depending on the input data, and
+ compression speeds will vary depending on the input data and target platform.
+
+* `make install` will now install a pkg-config file for libdeflate.
+
+* The Makefile now supports the `DISABLE_SHARED` parameter to disable building
+ the shared library.
+
+* Improved the Android build support in the Makefile.
+
+## Version 1.8
+
+* Added `-t` (test) option to `libdeflate-gunzip`.
+
+* Unaligned access optimizations are now enabled on WebAssembly builds.
+
+* Fixed a build error when building with the Intel C Compiler (ICC).
+
+* Fixed a build error when building with uClibc.
+
+* libdeflate's CI system has switched from Travis CI to GitHub Actions.
+
+* Made some improvements to test scripts.
+
+## Version 1.7
+
+* Added support for compression level 0, "no compression".
+
+* Added an ARM CRC32 instruction accelerated implementation of CRC32.
+
+* Added support for linking the programs to the shared library version of
+ libdeflate rather than to the static library version.
+
+* Made the compression level affect the minimum input size at which compression
+ is attempted.
+
+* Fixed undefined behavior in x86 Adler32 implementation. (No miscompilations
+ were observed in practice.)
+
+* Fixed undefined behavior in x86 CPU feature code. (No miscompilations were
+ observed in practice.)
+
+* Fixed installing shared lib symlink on macOS.
+
+* Documented third-party bindings.
+
+* Made a lot of improvements to the testing scripts and the CI configuration
+ file.
+
+* Lots of other small improvements and cleanups.
+
+## Version 1.6
+
+* Prevented gcc 10 from miscompiling libdeflate (workaround for
+ https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94994).
+
+* Removed workaround for gcc 5 and earlier producing slow code on ARM32. If
+ this affects you, please upgrade your compiler.
+
+* New API function: `libdeflate_zlib_decompress_ex()`. It provides the actual
+ size of the stream that was decompressed, like the gzip and DEFLATE
+ equivalents.
+
+* `libdeflate_zlib_decompress()` now accepts trailing bytes after the end of the
+ stream, like the gzip and DEFLATE equivalents.
+
+* Added support for custom memory allocators. (New API function:
+ `libdeflate_set_memory_allocator()`)
+
+* Added support for building the library in freestanding mode.
+
+* Building libdeflate no longer requires `CPPFLAGS=-Icommon`.
+
+## Version 1.5
+
+* Fixed up stdcall support on 32-bit Windows: the functions are now exported
+ using both suffixed and non-suffixed names, and fixed `libdeflate.h` to be
+ MSVC-compatible again.
+
+## Version 1.4
+
+* The 32-bit Windows build of libdeflate now uses the "stdcall" calling
+ convention instead of "cdecl". If you're calling `libdeflate.dll` directly
+ from C or C++, you'll need to recompile your code. If you're calling it from
+ another language, or calling it indirectly using `LoadLibrary()`, you'll need
+ to update your code to use the stdcall calling convention.
+
+* The Makefile now supports building libdeflate as a shared
+ library (`.dylib`) on macOS.
+
+* Fixed a bug where support for certain optimizations and optional features
+ (file access hints and more precise timestamps) was incorrectly omitted when
+ libdeflate was compiled with `-Werror`.
+
+* Added `make check` target to the Makefile.
+
+* Added CI configuration files.
+
+## Version 1.3
+
+* `make install` now supports customizing the directories into which binaries,
+ headers, and libraries are installed.
+
+* `make install` now installs into `/usr/local` by default. To change it, use
+ e.g. `make install PREFIX=/usr`.
+
+* `make install` now works on more platforms.
+
+* The Makefile now supports overriding the optimization flags.
+
+* The compression functions now correctly handle an output data buffer >= 4 GiB
+ in size, and `gzip` and `gunzip` now correctly handle multi-gigabyte files (if
+ enough memory is available).
+
+## Version 1.2
+
+* Slight improvements to decompression speed.
+
+* Added an AVX-512BW implementation of Adler-32.
+
+* The Makefile now supports a user-specified installation `PREFIX`.
+
+* Fixed build error with some Visual Studio versions.
+
+## Version 1.1
+
+* Fixed crash in CRC-32 code when the prebuilt libdeflate for 32-bit Windows was
+ called by a program built with Visual Studio.
+
+* Improved the worst-case decompression speed of malicious data.
+
+* Fixed build error when compiling for an ARM processor without hardware
+ floating point support.
+
+* Improved performance on the PowerPC64 architecture.
+
+* Added soname to `libdeflate.so`, to make packaging easier.
+
+* Added `make install` target to the Makefile.
+
+* The Makefile now supports user-specified `CPPFLAGS`.
+
+* The Windows binary releases now include the import library for
+ `libdeflate.dll`. `libdeflate.lib` is now the import library, and
+ `libdeflatestatic.lib` is the static library.
+
+## Version 1.0
+
+* Added support for multi-member gzip files.
+
+* Moved architecture-specific code into subdirectories. If you aren't using the
+ provided Makefile to build libdeflate, you now need to compile `lib/*.c` and
+ `lib/*/*.c` instead of just `lib/*.c`.
+
+* Added an ARM PMULL implementation of CRC-32, which speeds up gzip compression
+ and decompression on 32-bit and 64-bit ARM processors that have the
+ Cryptography Extensions.
+
+* Improved detection of CPU features, resulting in accelerated functions being
+ used in more cases. This includes:
+
+ * Detect CPU features on 32-bit x86, not just 64-bit as was done previously.
+
+ * Detect CPU features on ARM, both 32 and 64-bit. (Limited to Linux only
+ currently.)
+
+## Version 0.8
+
+* Build fixes for certain platforms and compilers.
+
+* libdeflate now produces the same output on all CPU architectures.
+
+* Improved documentation for building libdeflate on Windows.
+
+## Version 0.7
+
+* Fixed a very rare bug that caused data to be compressed incorrectly. The bug
+ affected compression levels 7 and below since libdeflate v0.2. Although there
+ have been no user reports of the bug, and I believe it would have been highly
+ unlikely to encounter on realistic data, it could occur on data specially
+ crafted to reproduce it.
+
+* Fixed a compilation error when building with clang 3.7.
+
+## Version 0.6
+
+* Various improvements to the gzip program's behavior.
+
+* Faster CRC-32 on AVX-capable processors.
+
+* Other minor changes.
+
+## Version 0.5
+
+* The CRC-32 checksum algorithm has been optimized with carryless multiplication
+ instructions for `x86_64` (PCLMUL). This speeds up gzip compression and
+ decompression.
+
+* Build fixes for certain platforms and compilers.
+
+* Added more test programs and scripts.
+
+* libdeflate is now entirely MIT-licensed.
+
+## Version 0.4
+
+* The Adler-32 checksum algorithm has been optimized with vector instructions
+ for `x86_64` (SSE2 and AVX2) and ARM (NEON). This speeds up zlib compression
+ and decompression.
+
+* To avoid naming collisions, functions and definitions in libdeflate's API have
+ been renamed to be prefixed with `libdeflate_` or `LIBDEFLATE_`. Programs
+ using the old API will need to be updated.
+
+* Various bug fixes and other improvements.
+
+## Version 0.3
+
+* Some bug fixes and other minor changes.
+
+## Version 0.2
+
+* Implemented a new block splitting algorithm which typically improves the
+ compression ratio slightly at all compression levels.
+
+* The compressor now outputs each block using the cheapest type (dynamic
+ Huffman, static Huffman, or uncompressed).
+
+* The gzip program has received an overhaul and now behaves more like the
+ standard version.
+
+* Build system updates, including: some build options were changed and some
+ build options were removed, and the default 'make' target now includes the
+ gzip program as well as the library.
+
+## Version 0.1
+
+* Initial official release.
diff --git a/tools/z64compress/src/enc/libdeflate/README.md b/tools/z64compress/src/enc/libdeflate/README.md
new file mode 100644
index 000000000..f5bbd93c2
--- /dev/null
+++ b/tools/z64compress/src/enc/libdeflate/README.md
@@ -0,0 +1,204 @@
+# Overview
+
+libdeflate is a library for fast, whole-buffer DEFLATE-based compression and
+decompression.
+
+The supported formats are:
+
+- DEFLATE (raw)
+- zlib (a.k.a. DEFLATE with a zlib wrapper)
+- gzip (a.k.a. DEFLATE with a gzip wrapper)
+
+libdeflate is heavily optimized. It is significantly faster than the zlib
+library, both for compression and decompression, and especially on x86
+processors. In addition, libdeflate provides optional high compression modes
+that provide a better compression ratio than the zlib's "level 9".
+
+libdeflate itself is a library. The following command-line programs which use
+this library are also included:
+
+* `libdeflate-gzip`, a program which can be a drop-in replacement for standard
+ `gzip` under some circumstances. Note that `libdeflate-gzip` has some
+ limitations; it is provided for convenience and is **not** meant to be the
+ main use case of libdeflate. It needs a lot of memory to process large files,
+ and it omits support for some infrequently-used options of GNU gzip.
+
+* `benchmark`, a test program that does round-trip compression and decompression
+ of the provided data, and measures the compression and decompression speed.
+ It can use libdeflate, zlib, or a combination of the two.
+
+* `checksum`, a test program that checksums the provided data with Adler-32 or
+ CRC-32, and optionally measures the speed. It can use libdeflate or zlib.
+
+For the release notes, see the [NEWS file](NEWS.md).
+
+## Table of Contents
+
+- [Building](#building)
+ - [Using CMake](#using-cmake)
+ - [Directly integrating the library sources](#directly-integrating-the-library-sources)
+- [API](#api)
+- [Bindings for other programming languages](#bindings-for-other-programming-languages)
+- [DEFLATE vs. zlib vs. gzip](#deflate-vs-zlib-vs-gzip)
+- [Compression levels](#compression-levels)
+- [Motivation](#motivation)
+- [License](#license)
+
+# Building
+
+## Using CMake
+
+libdeflate uses [CMake](https://cmake.org/). It can be built just like any
+other CMake project, e.g. with:
+
+ cmake -B build && cmake --build build
+
+By default the following targets are built:
+
+- The static library (normally called `libdeflate.a`)
+- The shared library (normally called `libdeflate.so`)
+- The `libdeflate-gzip` program, including its alias `libdeflate-gunzip`
+
+Besides the standard CMake build and installation options, there are some
+libdeflate-specific build options. See `CMakeLists.txt` for the list of these
+options. To set an option, add `-DOPTION=VALUE` to the `cmake` command.
+
+Prebuilt Windows binaries can be downloaded from
+https://github.com/ebiggers/libdeflate/releases.
+
+## Directly integrating the library sources
+
+Although the official build system is CMake, care has been taken to keep the
+library source files compilable directly, without a prerequisite configuration
+step. Therefore, it is also fine to just add the library source files directly
+to your application, without using CMake.
+
+You should compile both `lib/*.c` and `lib/*/*.c`. You don't need to worry
+about excluding irrelevant architecture-specific code, as this is already
+handled in the source files themselves using `#ifdef`s.
+
+It is strongly recommended to use either gcc or clang, and to use `-O2`.
+
+If you are doing a freestanding build with `-ffreestanding`, you must add
+`-DFREESTANDING` as well (matching what the `CMakeLists.txt` does).
+
+# API
+
+libdeflate has a simple API that is not zlib-compatible. You can create
+compressors and decompressors and use them to compress or decompress buffers.
+See libdeflate.h for details.
+
+There is currently no support for streaming. This has been considered, but it
+always significantly increases complexity and slows down fast paths.
+Unfortunately, at this point it remains a future TODO. So: if your application
+compresses data in "chunks", say, less than 1 MB in size, then libdeflate is a
+great choice for you; that's what it's designed to do. This is perfect for
+certain use cases such as transparent filesystem compression. But if your
+application compresses large files as a single compressed stream, similarly to
+the `gzip` program, then libdeflate isn't for you.
+
+Note that with chunk-based compression, you generally should have the
+uncompressed size of each chunk stored outside of the compressed data itself.
+This enables you to allocate an output buffer of the correct size without
+guessing. However, libdeflate's decompression routines do optionally provide
+the actual number of output bytes in case you need it.
+
+Windows developers: note that the calling convention of libdeflate.dll is
+"cdecl". (libdeflate v1.4 through v1.12 used "stdcall" instead.)
+
+# Bindings for other programming languages
+
+The libdeflate project itself only provides a C library. If you need to use
+libdeflate from a programming language other than C or C++, consider using the
+following bindings:
+
+* C#: [LibDeflate.NET](https://github.com/jzebedee/LibDeflate.NET)
+* Go: [go-libdeflate](https://github.com/4kills/go-libdeflate)
+* Java: [libdeflate-java](https://github.com/astei/libdeflate-java)
+* Julia: [LibDeflate.jl](https://github.com/jakobnissen/LibDeflate.jl)
+* Perl: [Gzip::Libdeflate](https://github.com/benkasminbullock/gzip-libdeflate)
+* Python: [deflate](https://github.com/dcwatson/deflate)
+* Ruby: [libdeflate-ruby](https://github.com/kaorimatz/libdeflate-ruby)
+* Rust: [libdeflater](https://github.com/adamkewley/libdeflater)
+
+Note: these are third-party projects which haven't necessarily been vetted by
+the authors of libdeflate. Please direct all questions, bugs, and improvements
+for these bindings to their authors.
+
+# DEFLATE vs. zlib vs. gzip
+
+The DEFLATE format ([rfc1951](https://www.ietf.org/rfc/rfc1951.txt)), the zlib
+format ([rfc1950](https://www.ietf.org/rfc/rfc1950.txt)), and the gzip format
+([rfc1952](https://www.ietf.org/rfc/rfc1952.txt)) are commonly confused with
+each other as well as with the [zlib software library](http://zlib.net), which
+actually supports all three formats. libdeflate (this library) also supports
+all three formats.
+
+Briefly, DEFLATE is a raw compressed stream, whereas zlib and gzip are different
+wrappers for this stream. Both zlib and gzip include checksums, but gzip can
+include extra information such as the original filename. Generally, you should
+choose a format as follows:
+
+- If you are compressing whole files with no subdivisions, similar to the `gzip`
+ program, you probably should use the gzip format.
+- Otherwise, if you don't need the features of the gzip header and footer but do
+ still want a checksum for corruption detection, you probably should use the
+ zlib format.
+- Otherwise, you probably should use raw DEFLATE. This is ideal if you don't
+ need checksums, e.g. because they're simply not needed for your use case or
+ because you already compute your own checksums that are stored separately from
+ the compressed stream.
+
+Note that gzip and zlib streams can be distinguished from each other based on
+their starting bytes, but this is not necessarily true of raw DEFLATE streams.
+
+# Compression levels
+
+An often-underappreciated fact of compression formats such as DEFLATE is that
+there are an enormous number of different ways that a given input could be
+compressed. Different algorithms and different amounts of computation time will
+result in different compression ratios, while remaining equally compatible with
+the decompressor.
+
+For this reason, the commonly used zlib library provides nine compression
+levels. Level 1 is the fastest but provides the worst compression; level 9
+provides the best compression but is the slowest. It defaults to level 6.
+libdeflate uses this same design but is designed to improve on both zlib's
+performance *and* compression ratio at every compression level. In addition,
+libdeflate's levels go [up to 12](https://xkcd.com/670/) to make room for a
+minimum-cost-path based algorithm (sometimes called "optimal parsing") that can
+significantly improve on zlib's compression ratio.
+
+If you are using DEFLATE (or zlib, or gzip) in your application, you should test
+different levels to see which works best for your application.
+
+# Motivation
+
+Despite DEFLATE's widespread use mainly through the zlib library, in the
+compression community this format from the early 1990s is often considered
+obsolete. And in a few significant ways, it is.
+
+So why implement DEFLATE at all, instead of focusing entirely on
+bzip2/LZMA/xz/LZ4/LZX/ZSTD/Brotli/LZHAM/LZFSE/[insert cool new format here]?
+
+To do something better, you need to understand what came before. And it turns
+out that most ideas from DEFLATE are still relevant. Many of the newer formats
+share a similar structure as DEFLATE, with different tweaks. The effects of
+trivial but very useful tweaks, such as increasing the sliding window size, are
+often confused with the effects of nontrivial but less useful tweaks. And
+actually, many of these formats are similar enough that common algorithms and
+optimizations (e.g. those dealing with LZ77 matchfinding) can be reused.
+
+In addition, comparing compressors fairly is difficult because the performance
+of a compressor depends heavily on optimizations which are not intrinsic to the
+compression format itself. In this respect, the zlib library sometimes compares
+poorly to certain newer code because zlib is not well optimized for modern
+processors. libdeflate addresses this by providing an optimized DEFLATE
+implementation which can be used for benchmarking purposes. And, of course,
+real applications can use it as well.
+
+# License
+
+libdeflate is [MIT-licensed](COPYING).
+
+I am not aware of any patents or patent applications relevant to libdeflate.
diff --git a/tools/z64compress/src/enc/libdeflate/common_defs.h b/tools/z64compress/src/enc/libdeflate/common_defs.h
new file mode 100644
index 000000000..debdc7d41
--- /dev/null
+++ b/tools/z64compress/src/enc/libdeflate/common_defs.h
@@ -0,0 +1,716 @@
+/*
+ * common_defs.h
+ *
+ * Copyright 2016 Eric Biggers
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef COMMON_DEFS_H
+#define COMMON_DEFS_H
+
+#include
+#include /* for size_t */
+#include
+#ifdef _MSC_VER
+# include /* for _BitScan*() and other intrinsics */
+# include /* for _byteswap_*() */
+ /* Disable MSVC warnings that are expected. */
+ /* /W2 */
+# pragma warning(disable : 4146) /* unary minus on unsigned type */
+ /* /W3 */
+# pragma warning(disable : 4018) /* signed/unsigned mismatch */
+# pragma warning(disable : 4244) /* possible loss of data */
+# pragma warning(disable : 4267) /* possible loss of precision */
+# pragma warning(disable : 4310) /* cast truncates constant value */
+ /* /W4 */
+# pragma warning(disable : 4100) /* unreferenced formal parameter */
+# pragma warning(disable : 4127) /* conditional expression is constant */
+# pragma warning(disable : 4189) /* local variable initialized but not referenced */
+# pragma warning(disable : 4232) /* nonstandard extension used */
+# pragma warning(disable : 4245) /* conversion from 'int' to 'unsigned int' */
+# pragma warning(disable : 4295) /* array too small to include terminating null */
+#endif
+#ifndef FREESTANDING
+# include /* for memcpy() */
+#endif
+
+/* ========================================================================== */
+/* Target architecture */
+/* ========================================================================== */
+
+/* If possible, define a compiler-independent ARCH_* macro. */
+#undef ARCH_X86_64
+#undef ARCH_X86_32
+#undef ARCH_ARM64
+#undef ARCH_ARM32
+#ifdef _MSC_VER
+# if defined(_M_X64)
+# define ARCH_X86_64
+# elif defined(_M_IX86)
+# define ARCH_X86_32
+# elif defined(_M_ARM64)
+# define ARCH_ARM64
+# elif defined(_M_ARM)
+# define ARCH_ARM32
+# endif
+#else
+# if defined(__x86_64__)
+# define ARCH_X86_64
+# elif defined(__i386__)
+# define ARCH_X86_32
+# elif defined(__aarch64__)
+# define ARCH_ARM64
+# elif defined(__arm__)
+# define ARCH_ARM32
+# endif
+#endif
+
+/* ========================================================================== */
+/* Type definitions */
+/* ========================================================================== */
+
+/* Fixed-width integer types */
+typedef uint8_t u8;
+typedef uint16_t u16;
+typedef uint32_t u32;
+typedef uint64_t u64;
+typedef int8_t s8;
+typedef int16_t s16;
+typedef int32_t s32;
+typedef int64_t s64;
+
+/* ssize_t, if not available in */
+#ifdef _MSC_VER
+# ifdef _WIN64
+ typedef long long ssize_t;
+# else
+ typedef long ssize_t;
+# endif
+#endif
+
+/*
+ * Word type of the target architecture. Use 'size_t' instead of
+ * 'unsigned long' to account for platforms such as Windows that use 32-bit
+ * 'unsigned long' on 64-bit architectures.
+ */
+typedef size_t machine_word_t;
+
+/* Number of bytes in a word */
+#define WORDBYTES ((int)sizeof(machine_word_t))
+
+/* Number of bits in a word */
+#define WORDBITS (8 * WORDBYTES)
+
+/* ========================================================================== */
+/* Optional compiler features */
+/* ========================================================================== */
+
+/* Compiler version checks. Only use when absolutely necessary. */
+#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER)
+# define GCC_PREREQ(major, minor) \
+ (__GNUC__ > (major) || \
+ (__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))
+#else
+# define GCC_PREREQ(major, minor) 0
+#endif
+#ifdef __clang__
+# ifdef __apple_build_version__
+# define CLANG_PREREQ(major, minor, apple_version) \
+ (__apple_build_version__ >= (apple_version))
+# else
+# define CLANG_PREREQ(major, minor, apple_version) \
+ (__clang_major__ > (major) || \
+ (__clang_major__ == (major) && __clang_minor__ >= (minor)))
+# endif
+#else
+# define CLANG_PREREQ(major, minor, apple_version) 0
+#endif
+
+/*
+ * Macros to check for compiler support for attributes and builtins. clang
+ * implements these macros, but gcc doesn't, so generally any use of one of
+ * these macros must also be combined with a gcc version check.
+ */
+#ifndef __has_attribute
+# define __has_attribute(attribute) 0
+#endif
+#ifndef __has_builtin
+# define __has_builtin(builtin) 0
+#endif
+
+/* inline - suggest that a function be inlined */
+#ifdef _MSC_VER
+# define inline __inline
+#endif /* else assume 'inline' is usable as-is */
+
+/* forceinline - force a function to be inlined, if possible */
+#if defined(__GNUC__) || __has_attribute(always_inline)
+# define forceinline inline __attribute__((always_inline))
+#elif defined(_MSC_VER)
+# define forceinline __forceinline
+#else
+# define forceinline inline
+#endif
+
+/* MAYBE_UNUSED - mark a function or variable as maybe unused */
+#if defined(__GNUC__) || __has_attribute(unused)
+# define MAYBE_UNUSED __attribute__((unused))
+#else
+# define MAYBE_UNUSED
+#endif
+
+/*
+ * restrict - hint that writes only occur through the given pointer.
+ *
+ * Don't use MSVC's __restrict, since it has nonstandard behavior.
+ * Standard restrict is okay, if it is supported.
+ */
+#if !defined(__STDC_VERSION__) || (__STDC_VERSION__ < 201112L)
+# if defined(__GNUC__) || defined(__clang__)
+# define restrict __restrict__
+# else
+# define restrict
+# endif
+#endif /* else assume 'restrict' is usable as-is */
+
+/* likely(expr) - hint that an expression is usually true */
+#if defined(__GNUC__) || __has_builtin(__builtin_expect)
+# define likely(expr) __builtin_expect(!!(expr), 1)
+#else
+# define likely(expr) (expr)
+#endif
+
+/* unlikely(expr) - hint that an expression is usually false */
+#if defined(__GNUC__) || __has_builtin(__builtin_expect)
+# define unlikely(expr) __builtin_expect(!!(expr), 0)
+#else
+# define unlikely(expr) (expr)
+#endif
+
+/* prefetchr(addr) - prefetch into L1 cache for read */
+#undef prefetchr
+#if defined(__GNUC__) || __has_builtin(__builtin_prefetch)
+# define prefetchr(addr) __builtin_prefetch((addr), 0)
+#elif defined(_MSC_VER)
+# if defined(ARCH_X86_32) || defined(ARCH_X86_64)
+# define prefetchr(addr) _mm_prefetch((addr), _MM_HINT_T0)
+# elif defined(ARCH_ARM64)
+# define prefetchr(addr) __prefetch2((addr), 0x00 /* prfop=PLDL1KEEP */)
+# elif defined(ARCH_ARM32)
+# define prefetchr(addr) __prefetch(addr)
+# endif
+#endif
+#ifndef prefetchr
+# define prefetchr(addr)
+#endif
+
+/* prefetchw(addr) - prefetch into L1 cache for write */
+#undef prefetchw
+#if defined(__GNUC__) || __has_builtin(__builtin_prefetch)
+# define prefetchw(addr) __builtin_prefetch((addr), 1)
+#elif defined(_MSC_VER)
+# if defined(ARCH_X86_32) || defined(ARCH_X86_64)
+# define prefetchw(addr) _m_prefetchw(addr)
+# elif defined(ARCH_ARM64)
+# define prefetchw(addr) __prefetch2((addr), 0x10 /* prfop=PSTL1KEEP */)
+# elif defined(ARCH_ARM32)
+# define prefetchw(addr) __prefetchw(addr)
+# endif
+#endif
+#ifndef prefetchw
+# define prefetchw(addr)
+#endif
+
+/*
+ * _aligned_attribute(n) - declare that the annotated variable, or variables of
+ * the annotated type, must be aligned on n-byte boundaries.
+ */
+#undef _aligned_attribute
+#if defined(__GNUC__) || __has_attribute(aligned)
+# define _aligned_attribute(n) __attribute__((aligned(n)))
+#elif defined(_MSC_VER)
+# define _aligned_attribute(n) __declspec(align(n))
+#endif
+
+/*
+ * _target_attribute(attrs) - override the compilation target for a function.
+ *
+ * This accepts one or more comma-separated suffixes to the -m prefix jointly
+ * forming the name of a machine-dependent option. On gcc-like compilers, this
+ * enables codegen for the given targets, including arbitrary compiler-generated
+ * code as well as the corresponding intrinsics. On other compilers this macro
+ * expands to nothing, though MSVC allows intrinsics to be used anywhere anyway.
+ */
+#if GCC_PREREQ(4, 4) || __has_attribute(target)
+# define _target_attribute(attrs) __attribute__((target(attrs)))
+# define COMPILER_SUPPORTS_TARGET_FUNCTION_ATTRIBUTE 1
+#else
+# define _target_attribute(attrs)
+# define COMPILER_SUPPORTS_TARGET_FUNCTION_ATTRIBUTE 0
+#endif
+
+/* ========================================================================== */
+/* Miscellaneous macros */
+/* ========================================================================== */
+
+#define ARRAY_LEN(A) (sizeof(A) / sizeof((A)[0]))
+#define MIN(a, b) ((a) <= (b) ? (a) : (b))
+#define MAX(a, b) ((a) >= (b) ? (a) : (b))
+#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
+#define STATIC_ASSERT(expr) ((void)sizeof(char[1 - 2 * !(expr)]))
+#define ALIGN(n, a) (((n) + (a) - 1) & ~((a) - 1))
+#define ROUND_UP(n, d) ((d) * DIV_ROUND_UP((n), (d)))
+
+/* ========================================================================== */
+/* Endianness handling */
+/* ========================================================================== */
+
+/*
+ * CPU_IS_LITTLE_ENDIAN() - 1 if the CPU is little endian, or 0 if it is big
+ * endian. When possible this is a compile-time macro that can be used in
+ * preprocessor conditionals. As a fallback, a generic method is used that
+ * can't be used in preprocessor conditionals but should still be optimized out.
+ */
+#if defined(__BYTE_ORDER__) /* gcc v4.6+ and clang */
+# define CPU_IS_LITTLE_ENDIAN() (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
+#elif defined(_MSC_VER)
+# define CPU_IS_LITTLE_ENDIAN() true
+#else
+static forceinline bool CPU_IS_LITTLE_ENDIAN(void)
+{
+ union {
+ u32 w;
+ u8 b;
+ } u;
+
+ u.w = 1;
+ return u.b;
+}
+#endif
+
+/* bswap16(v) - swap the bytes of a 16-bit integer */
+static forceinline u16 bswap16(u16 v)
+{
+#if GCC_PREREQ(4, 8) || __has_builtin(__builtin_bswap16)
+ return __builtin_bswap16(v);
+#elif defined(_MSC_VER)
+ return _byteswap_ushort(v);
+#else
+ return (v << 8) | (v >> 8);
+#endif
+}
+
+/* bswap32(v) - swap the bytes of a 32-bit integer */
+static forceinline u32 bswap32(u32 v)
+{
+#if GCC_PREREQ(4, 3) || __has_builtin(__builtin_bswap32)
+ return __builtin_bswap32(v);
+#elif defined(_MSC_VER)
+ return _byteswap_ulong(v);
+#else
+ return ((v & 0x000000FF) << 24) |
+ ((v & 0x0000FF00) << 8) |
+ ((v & 0x00FF0000) >> 8) |
+ ((v & 0xFF000000) >> 24);
+#endif
+}
+
+/* bswap64(v) - swap the bytes of a 64-bit integer */
+static forceinline u64 bswap64(u64 v)
+{
+#if GCC_PREREQ(4, 3) || __has_builtin(__builtin_bswap64)
+ return __builtin_bswap64(v);
+#elif defined(_MSC_VER)
+ return _byteswap_uint64(v);
+#else
+ return ((v & 0x00000000000000FF) << 56) |
+ ((v & 0x000000000000FF00) << 40) |
+ ((v & 0x0000000000FF0000) << 24) |
+ ((v & 0x00000000FF000000) << 8) |
+ ((v & 0x000000FF00000000) >> 8) |
+ ((v & 0x0000FF0000000000) >> 24) |
+ ((v & 0x00FF000000000000) >> 40) |
+ ((v & 0xFF00000000000000) >> 56);
+#endif
+}
+
+#define le16_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap16(v))
+#define le32_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap32(v))
+#define le64_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap64(v))
+#define be16_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap16(v) : (v))
+#define be32_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap32(v) : (v))
+#define be64_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap64(v) : (v))
+
+/* ========================================================================== */
+/* Unaligned memory accesses */
+/* ========================================================================== */
+
+/*
+ * UNALIGNED_ACCESS_IS_FAST() - 1 if unaligned memory accesses can be performed
+ * efficiently on the target platform, otherwise 0.
+ */
+#if (defined(__GNUC__) || defined(__clang__)) && \
+ (defined(ARCH_X86_64) || defined(ARCH_X86_32) || \
+ defined(__ARM_FEATURE_UNALIGNED) || defined(__powerpc64__) || \
+ /*
+ * For all compilation purposes, WebAssembly behaves like any other CPU
+ * instruction set. Even though WebAssembly engine might be running on
+ * top of different actual CPU architectures, the WebAssembly spec
+ * itself permits unaligned access and it will be fast on most of those
+ * platforms, and simulated at the engine level on others, so it's
+ * worth treating it as a CPU architecture with fast unaligned access.
+ */ defined(__wasm__))
+# define UNALIGNED_ACCESS_IS_FAST 1
+#elif defined(_MSC_VER)
+# define UNALIGNED_ACCESS_IS_FAST 1
+#else
+# define UNALIGNED_ACCESS_IS_FAST 0
+#endif
+
+/*
+ * Implementing unaligned memory accesses using memcpy() is portable, and it
+ * usually gets optimized appropriately by modern compilers. I.e., each
+ * memcpy() of 1, 2, 4, or WORDBYTES bytes gets compiled to a load or store
+ * instruction, not to an actual function call.
+ *
+ * We no longer use the "packed struct" approach to unaligned accesses, as that
+ * is nonstandard, has unclear semantics, and doesn't receive enough testing
+ * (see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94994).
+ *
+ * arm32 with __ARM_FEATURE_UNALIGNED in gcc 5 and earlier is a known exception
+ * where memcpy() generates inefficient code
+ * (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67366). However, we no longer
+ * consider that one case important enough to maintain different code for.
+ * If you run into it, please just use a newer version of gcc (or use clang).
+ */
+
+#ifdef FREESTANDING
+# define MEMCOPY __builtin_memcpy
+#else
+# define MEMCOPY memcpy
+#endif
+
+/* Unaligned loads and stores without endianness conversion */
+
+#define DEFINE_UNALIGNED_TYPE(type) \
+static forceinline type \
+load_##type##_unaligned(const void *p) \
+{ \
+ type v; \
+ \
+ MEMCOPY(&v, p, sizeof(v)); \
+ return v; \
+} \
+ \
+static forceinline void \
+store_##type##_unaligned(type v, void *p) \
+{ \
+ MEMCOPY(p, &v, sizeof(v)); \
+}
+
+DEFINE_UNALIGNED_TYPE(u16)
+DEFINE_UNALIGNED_TYPE(u32)
+DEFINE_UNALIGNED_TYPE(u64)
+DEFINE_UNALIGNED_TYPE(machine_word_t)
+
+#undef MEMCOPY
+
+#define load_word_unaligned load_machine_word_t_unaligned
+#define store_word_unaligned store_machine_word_t_unaligned
+
+/* Unaligned loads with endianness conversion */
+
+static forceinline u16
+get_unaligned_le16(const u8 *p)
+{
+ if (UNALIGNED_ACCESS_IS_FAST)
+ return le16_bswap(load_u16_unaligned(p));
+ else
+ return ((u16)p[1] << 8) | p[0];
+}
+
+static forceinline u16
+get_unaligned_be16(const u8 *p)
+{
+ if (UNALIGNED_ACCESS_IS_FAST)
+ return be16_bswap(load_u16_unaligned(p));
+ else
+ return ((u16)p[0] << 8) | p[1];
+}
+
+static forceinline u32
+get_unaligned_le32(const u8 *p)
+{
+ if (UNALIGNED_ACCESS_IS_FAST)
+ return le32_bswap(load_u32_unaligned(p));
+ else
+ return ((u32)p[3] << 24) | ((u32)p[2] << 16) |
+ ((u32)p[1] << 8) | p[0];
+}
+
+static forceinline u32
+get_unaligned_be32(const u8 *p)
+{
+ if (UNALIGNED_ACCESS_IS_FAST)
+ return be32_bswap(load_u32_unaligned(p));
+ else
+ return ((u32)p[0] << 24) | ((u32)p[1] << 16) |
+ ((u32)p[2] << 8) | p[3];
+}
+
+static forceinline u64
+get_unaligned_le64(const u8 *p)
+{
+ if (UNALIGNED_ACCESS_IS_FAST)
+ return le64_bswap(load_u64_unaligned(p));
+ else
+ return ((u64)p[7] << 56) | ((u64)p[6] << 48) |
+ ((u64)p[5] << 40) | ((u64)p[4] << 32) |
+ ((u64)p[3] << 24) | ((u64)p[2] << 16) |
+ ((u64)p[1] << 8) | p[0];
+}
+
+static forceinline machine_word_t
+get_unaligned_leword(const u8 *p)
+{
+ STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64);
+ if (WORDBITS == 32)
+ return get_unaligned_le32(p);
+ else
+ return get_unaligned_le64(p);
+}
+
+/* Unaligned stores with endianness conversion */
+
+static forceinline void
+put_unaligned_le16(u16 v, u8 *p)
+{
+ if (UNALIGNED_ACCESS_IS_FAST) {
+ store_u16_unaligned(le16_bswap(v), p);
+ } else {
+ p[0] = (u8)(v >> 0);
+ p[1] = (u8)(v >> 8);
+ }
+}
+
+static forceinline void
+put_unaligned_be16(u16 v, u8 *p)
+{
+ if (UNALIGNED_ACCESS_IS_FAST) {
+ store_u16_unaligned(be16_bswap(v), p);
+ } else {
+ p[0] = (u8)(v >> 8);
+ p[1] = (u8)(v >> 0);
+ }
+}
+
+static forceinline void
+put_unaligned_le32(u32 v, u8 *p)
+{
+ if (UNALIGNED_ACCESS_IS_FAST) {
+ store_u32_unaligned(le32_bswap(v), p);
+ } else {
+ p[0] = (u8)(v >> 0);
+ p[1] = (u8)(v >> 8);
+ p[2] = (u8)(v >> 16);
+ p[3] = (u8)(v >> 24);
+ }
+}
+
+static forceinline void
+put_unaligned_be32(u32 v, u8 *p)
+{
+ if (UNALIGNED_ACCESS_IS_FAST) {
+ store_u32_unaligned(be32_bswap(v), p);
+ } else {
+ p[0] = (u8)(v >> 24);
+ p[1] = (u8)(v >> 16);
+ p[2] = (u8)(v >> 8);
+ p[3] = (u8)(v >> 0);
+ }
+}
+
+static forceinline void
+put_unaligned_le64(u64 v, u8 *p)
+{
+ if (UNALIGNED_ACCESS_IS_FAST) {
+ store_u64_unaligned(le64_bswap(v), p);
+ } else {
+ p[0] = (u8)(v >> 0);
+ p[1] = (u8)(v >> 8);
+ p[2] = (u8)(v >> 16);
+ p[3] = (u8)(v >> 24);
+ p[4] = (u8)(v >> 32);
+ p[5] = (u8)(v >> 40);
+ p[6] = (u8)(v >> 48);
+ p[7] = (u8)(v >> 56);
+ }
+}
+
+static forceinline void
+put_unaligned_leword(machine_word_t v, u8 *p)
+{
+ STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64);
+ if (WORDBITS == 32)
+ put_unaligned_le32(v, p);
+ else
+ put_unaligned_le64(v, p);
+}
+
+/* ========================================================================== */
+/* Bit manipulation functions */
+/* ========================================================================== */
+
+/*
+ * Bit Scan Reverse (BSR) - find the 0-based index (relative to the least
+ * significant end) of the *most* significant 1 bit in the input value. The
+ * input value must be nonzero!
+ */
+
+static forceinline unsigned
+bsr32(u32 v)
+{
+#if defined(__GNUC__) || __has_builtin(__builtin_clz)
+ return 31 - __builtin_clz(v);
+#elif defined(_MSC_VER)
+ unsigned long i;
+
+ _BitScanReverse(&i, v);
+ return i;
+#else
+ unsigned i = 0;
+
+ while ((v >>= 1) != 0)
+ i++;
+ return i;
+#endif
+}
+
+static forceinline unsigned
+bsr64(u64 v)
+{
+#if defined(__GNUC__) || __has_builtin(__builtin_clzll)
+ return 63 - __builtin_clzll(v);
+#elif defined(_MSC_VER) && defined(_WIN64)
+ unsigned long i;
+
+ _BitScanReverse64(&i, v);
+ return i;
+#else
+ unsigned i = 0;
+
+ while ((v >>= 1) != 0)
+ i++;
+ return i;
+#endif
+}
+
+static forceinline unsigned
+bsrw(machine_word_t v)
+{
+ STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64);
+ if (WORDBITS == 32)
+ return bsr32(v);
+ else
+ return bsr64(v);
+}
+
+/*
+ * Bit Scan Forward (BSF) - find the 0-based index (relative to the least
+ * significant end) of the *least* significant 1 bit in the input value. The
+ * input value must be nonzero!
+ */
+
+static forceinline unsigned
+bsf32(u32 v)
+{
+#if defined(__GNUC__) || __has_builtin(__builtin_ctz)
+ return __builtin_ctz(v);
+#elif defined(_MSC_VER)
+ unsigned long i;
+
+ _BitScanForward(&i, v);
+ return i;
+#else
+ unsigned i = 0;
+
+ for (; (v & 1) == 0; v >>= 1)
+ i++;
+ return i;
+#endif
+}
+
+static forceinline unsigned
+bsf64(u64 v)
+{
+#if defined(__GNUC__) || __has_builtin(__builtin_ctzll)
+ return __builtin_ctzll(v);
+#elif defined(_MSC_VER) && defined(_WIN64)
+ unsigned long i;
+
+ _BitScanForward64(&i, v);
+ return i;
+#else
+ unsigned i = 0;
+
+ for (; (v & 1) == 0; v >>= 1)
+ i++;
+ return i;
+#endif
+}
+
+static forceinline unsigned
+bsfw(machine_word_t v)
+{
+ STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64);
+ if (WORDBITS == 32)
+ return bsf32(v);
+ else
+ return bsf64(v);
+}
+
+/*
+ * rbit32(v): reverse the bits in a 32-bit integer. This doesn't have a
+ * fallback implementation; use '#ifdef rbit32' to check if this is available.
+ */
+#undef rbit32
+#if (defined(__GNUC__) || defined(__clang__)) && defined(ARCH_ARM32) && \
+ (__ARM_ARCH >= 7 || (__ARM_ARCH == 6 && defined(__ARM_ARCH_6T2__)))
+static forceinline u32
+rbit32(u32 v)
+{
+ __asm__("rbit %0, %1" : "=r" (v) : "r" (v));
+ return v;
+}
+#define rbit32 rbit32
+#elif (defined(__GNUC__) || defined(__clang__)) && defined(ARCH_ARM64)
+static forceinline u32
+rbit32(u32 v)
+{
+ __asm__("rbit %w0, %w1" : "=r" (v) : "r" (v));
+ return v;
+}
+#define rbit32 rbit32
+#endif
+
+#endif /* COMMON_DEFS_H */
diff --git a/tools/z64compress/src/enc/libdeflate/lib/adler32.c b/tools/z64compress/src/enc/libdeflate/lib/adler32.c
new file mode 100644
index 000000000..b743c6943
--- /dev/null
+++ b/tools/z64compress/src/enc/libdeflate/lib/adler32.c
@@ -0,0 +1,131 @@
+/*
+ * adler32.c - Adler-32 checksum algorithm
+ *
+ * Copyright 2016 Eric Biggers
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "lib_common.h"
+#include "libdeflate.h"
+
+/* The Adler-32 divisor, or "base", value */
+#define DIVISOR 65521
+
+/*
+ * MAX_CHUNK_LEN is the most bytes that can be processed without the possibility
+ * of s2 overflowing when it is represented as an unsigned 32-bit integer. This
+ * value was computed using the following Python script:
+ *
+ * divisor = 65521
+ * count = 0
+ * s1 = divisor - 1
+ * s2 = divisor - 1
+ * while True:
+ * s1 += 0xFF
+ * s2 += s1
+ * if s2 > 0xFFFFFFFF:
+ * break
+ * count += 1
+ * print(count)
+ *
+ * Note that to get the correct worst-case value, we must assume that every byte
+ * has value 0xFF and that s1 and s2 started with the highest possible values
+ * modulo the divisor.
+ */
+#define MAX_CHUNK_LEN 5552
+
+static u32 MAYBE_UNUSED
+adler32_generic(u32 adler, const u8 *p, size_t len)
+{
+ u32 s1 = adler & 0xFFFF;
+ u32 s2 = adler >> 16;
+ const u8 * const end = p + len;
+
+ while (p != end) {
+ size_t chunk_len = MIN(end - p, MAX_CHUNK_LEN);
+ const u8 *chunk_end = p + chunk_len;
+ size_t num_unrolled_iterations = chunk_len / 4;
+
+ while (num_unrolled_iterations--) {
+ s1 += *p++;
+ s2 += s1;
+ s1 += *p++;
+ s2 += s1;
+ s1 += *p++;
+ s2 += s1;
+ s1 += *p++;
+ s2 += s1;
+ }
+ while (p != chunk_end) {
+ s1 += *p++;
+ s2 += s1;
+ }
+ s1 %= DIVISOR;
+ s2 %= DIVISOR;
+ }
+
+ return (s2 << 16) | s1;
+}
+
+/* Include architecture-specific implementation(s) if available. */
+#undef DEFAULT_IMPL
+#undef arch_select_adler32_func
+typedef u32 (*adler32_func_t)(u32 adler, const u8 *p, size_t len);
+#if defined(ARCH_ARM32) || defined(ARCH_ARM64)
+# include "arm/adler32_impl.h"
+#elif defined(ARCH_X86_32) || defined(ARCH_X86_64)
+# include "x86/adler32_impl.h"
+#endif
+
+#ifndef DEFAULT_IMPL
+# define DEFAULT_IMPL adler32_generic
+#endif
+
+#ifdef arch_select_adler32_func
+static u32 dispatch_adler32(u32 adler, const u8 *p, size_t len);
+
+static volatile adler32_func_t adler32_impl = dispatch_adler32;
+
+/* Choose the best implementation at runtime. */
+static u32 dispatch_adler32(u32 adler, const u8 *p, size_t len)
+{
+ adler32_func_t f = arch_select_adler32_func();
+
+ if (f == NULL)
+ f = DEFAULT_IMPL;
+
+ adler32_impl = f;
+ return f(adler, p, len);
+}
+#else
+/* The best implementation is statically known, so call it directly. */
+#define adler32_impl DEFAULT_IMPL
+#endif
+
+LIBDEFLATEAPI u32
+libdeflate_adler32(u32 adler, const void *buffer, size_t len)
+{
+ if (buffer == NULL) /* Return initial value. */
+ return 1;
+ return adler32_impl(adler, buffer, len);
+}
diff --git a/tools/z64compress/src/enc/libdeflate/lib/adler32_vec_template.h b/tools/z64compress/src/enc/libdeflate/lib/adler32_vec_template.h
new file mode 100644
index 000000000..98c086bbc
--- /dev/null
+++ b/tools/z64compress/src/enc/libdeflate/lib/adler32_vec_template.h
@@ -0,0 +1,123 @@
+/*
+ * adler32_vec_template.h - template for vectorized Adler-32 implementations
+ *
+ * Copyright 2016 Eric Biggers
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * This file contains a template for vectorized Adler-32 implementations.
+ *
+ * The inner loop between reductions modulo 65521 of an unvectorized Adler-32
+ * implementation looks something like this:
+ *
+ * do {
+ * s1 += *p;
+ * s2 += s1;
+ * } while (++p != chunk_end);
+ *
+ * For vectorized calculation of s1, we only need to sum the input bytes. They
+ * can be accumulated into multiple counters which are eventually summed
+ * together.
+ *
+ * For vectorized calculation of s2, the basic idea is that for each iteration
+ * that processes N bytes, we can perform the following vectorizable
+ * calculation:
+ *
+ * s2 += N*byte_1 + (N-1)*byte_2 + (N-2)*byte_3 + ... + 1*byte_N
+ *
+ * Or, equivalently, we can sum the byte_1...byte_N for each iteration into N
+ * separate counters, then do the multiplications by N...1 just once at the end
+ * rather than once per iteration.
+ *
+ * Also, we must account for how previous bytes will affect s2 by doing the
+ * following at beginning of each iteration:
+ *
+ * s2 += s1 * N
+ *
+ * Furthermore, like s1, "s2" can actually be multiple counters which are
+ * eventually summed together.
+ */
+
+static u32 ATTRIBUTES MAYBE_UNUSED
+FUNCNAME(u32 adler, const u8 *p, size_t len)
+{
+ const size_t max_chunk_len =
+ MIN(MAX_CHUNK_LEN, IMPL_MAX_CHUNK_LEN) -
+ (MIN(MAX_CHUNK_LEN, IMPL_MAX_CHUNK_LEN) % IMPL_SEGMENT_LEN);
+ u32 s1 = adler & 0xFFFF;
+ u32 s2 = adler >> 16;
+ const u8 * const end = p + len;
+ const u8 *vend;
+
+ /* Process a byte at a time until the needed alignment is reached. */
+ if (p != end && (uintptr_t)p % IMPL_ALIGNMENT) {
+ do {
+ s1 += *p++;
+ s2 += s1;
+ } while (p != end && (uintptr_t)p % IMPL_ALIGNMENT);
+ s1 %= DIVISOR;
+ s2 %= DIVISOR;
+ }
+
+ /*
+ * Process "chunks" of bytes using vector instructions. Chunk lengths
+ * are limited to MAX_CHUNK_LEN, which guarantees that s1 and s2 never
+ * overflow before being reduced modulo DIVISOR. For vector processing,
+ * chunk lengths are also made evenly divisible by IMPL_SEGMENT_LEN and
+ * may be further limited to IMPL_MAX_CHUNK_LEN.
+ */
+ STATIC_ASSERT(IMPL_SEGMENT_LEN % IMPL_ALIGNMENT == 0);
+ vend = end - ((size_t)(end - p) % IMPL_SEGMENT_LEN);
+ while (p != vend) {
+ size_t chunk_len = MIN((size_t)(vend - p), max_chunk_len);
+
+ s2 += s1 * chunk_len;
+
+ FUNCNAME_CHUNK((const void *)p, (const void *)(p + chunk_len),
+ &s1, &s2);
+
+ p += chunk_len;
+ s1 %= DIVISOR;
+ s2 %= DIVISOR;
+ }
+
+ /* Process any remaining bytes. */
+ if (p != end) {
+ do {
+ s1 += *p++;
+ s2 += s1;
+ } while (p != end);
+ s1 %= DIVISOR;
+ s2 %= DIVISOR;
+ }
+
+ return (s2 << 16) | s1;
+}
+
+#undef FUNCNAME
+#undef FUNCNAME_CHUNK
+#undef ATTRIBUTES
+#undef IMPL_ALIGNMENT
+#undef IMPL_SEGMENT_LEN
+#undef IMPL_MAX_CHUNK_LEN
diff --git a/tools/z64compress/src/enc/libdeflate/lib/arm/adler32_impl.h b/tools/z64compress/src/enc/libdeflate/lib/arm/adler32_impl.h
new file mode 100644
index 000000000..4083b2ef3
--- /dev/null
+++ b/tools/z64compress/src/enc/libdeflate/lib/arm/adler32_impl.h
@@ -0,0 +1,272 @@
+/*
+ * arm/adler32_impl.h - ARM implementations of Adler-32 checksum algorithm
+ *
+ * Copyright 2016 Eric Biggers
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef LIB_ARM_ADLER32_IMPL_H
+#define LIB_ARM_ADLER32_IMPL_H
+
+#include "cpu_features.h"
+
+/* Regular NEON implementation */
+#if HAVE_NEON_INTRIN && CPU_IS_LITTLE_ENDIAN()
+# define adler32_neon adler32_neon
+# define FUNCNAME adler32_neon
+# define FUNCNAME_CHUNK adler32_neon_chunk
+# define IMPL_ALIGNMENT 16
+# define IMPL_SEGMENT_LEN 64
+/* Prevent unsigned overflow of the 16-bit precision byte counters */
+# define IMPL_MAX_CHUNK_LEN (64 * (0xFFFF / 0xFF))
+# if HAVE_NEON_NATIVE
+# define ATTRIBUTES
+# else
+# ifdef ARCH_ARM32
+# define ATTRIBUTES _target_attribute("fpu=neon")
+# else
+# define ATTRIBUTES _target_attribute("+simd")
+# endif
+# endif
+# include
+static forceinline ATTRIBUTES void
+adler32_neon_chunk(const uint8x16_t *p, const uint8x16_t * const end,
+ u32 *s1, u32 *s2)
+{
+ static const u16 _aligned_attribute(16) mults[64] = {
+ 64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49,
+ 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33,
+ 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
+ 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
+ };
+ const uint16x8_t mults_a = vld1q_u16(&mults[0]);
+ const uint16x8_t mults_b = vld1q_u16(&mults[8]);
+ const uint16x8_t mults_c = vld1q_u16(&mults[16]);
+ const uint16x8_t mults_d = vld1q_u16(&mults[24]);
+ const uint16x8_t mults_e = vld1q_u16(&mults[32]);
+ const uint16x8_t mults_f = vld1q_u16(&mults[40]);
+ const uint16x8_t mults_g = vld1q_u16(&mults[48]);
+ const uint16x8_t mults_h = vld1q_u16(&mults[56]);
+
+ uint32x4_t v_s1 = vdupq_n_u32(0);
+ uint32x4_t v_s2 = vdupq_n_u32(0);
+ /*
+ * v_byte_sums_* contain the sum of the bytes at index i across all
+ * 64-byte segments, for each index 0..63.
+ */
+ uint16x8_t v_byte_sums_a = vdupq_n_u16(0);
+ uint16x8_t v_byte_sums_b = vdupq_n_u16(0);
+ uint16x8_t v_byte_sums_c = vdupq_n_u16(0);
+ uint16x8_t v_byte_sums_d = vdupq_n_u16(0);
+ uint16x8_t v_byte_sums_e = vdupq_n_u16(0);
+ uint16x8_t v_byte_sums_f = vdupq_n_u16(0);
+ uint16x8_t v_byte_sums_g = vdupq_n_u16(0);
+ uint16x8_t v_byte_sums_h = vdupq_n_u16(0);
+
+ do {
+ /* Load the next 64 bytes. */
+ const uint8x16_t bytes1 = *p++;
+ const uint8x16_t bytes2 = *p++;
+ const uint8x16_t bytes3 = *p++;
+ const uint8x16_t bytes4 = *p++;
+ uint16x8_t tmp;
+
+ /*
+ * Accumulate the previous s1 counters into the s2 counters.
+ * The needed multiplication by 64 is delayed to later.
+ */
+ v_s2 = vaddq_u32(v_s2, v_s1);
+
+ /*
+ * Add the 64 bytes to their corresponding v_byte_sums counters,
+ * while also accumulating the sums of each adjacent set of 4
+ * bytes into v_s1.
+ */
+ tmp = vpaddlq_u8(bytes1);
+ v_byte_sums_a = vaddw_u8(v_byte_sums_a, vget_low_u8(bytes1));
+ v_byte_sums_b = vaddw_u8(v_byte_sums_b, vget_high_u8(bytes1));
+ tmp = vpadalq_u8(tmp, bytes2);
+ v_byte_sums_c = vaddw_u8(v_byte_sums_c, vget_low_u8(bytes2));
+ v_byte_sums_d = vaddw_u8(v_byte_sums_d, vget_high_u8(bytes2));
+ tmp = vpadalq_u8(tmp, bytes3);
+ v_byte_sums_e = vaddw_u8(v_byte_sums_e, vget_low_u8(bytes3));
+ v_byte_sums_f = vaddw_u8(v_byte_sums_f, vget_high_u8(bytes3));
+ tmp = vpadalq_u8(tmp, bytes4);
+ v_byte_sums_g = vaddw_u8(v_byte_sums_g, vget_low_u8(bytes4));
+ v_byte_sums_h = vaddw_u8(v_byte_sums_h, vget_high_u8(bytes4));
+ v_s1 = vpadalq_u16(v_s1, tmp);
+
+ } while (p != end);
+
+ /* s2 = 64*s2 + (64*bytesum0 + 63*bytesum1 + ... + 1*bytesum63) */
+#ifdef ARCH_ARM32
+# define umlal2(a, b, c) vmlal_u16((a), vget_high_u16(b), vget_high_u16(c))
+#else
+# define umlal2 vmlal_high_u16
+#endif
+ v_s2 = vqshlq_n_u32(v_s2, 6);
+ v_s2 = vmlal_u16(v_s2, vget_low_u16(v_byte_sums_a), vget_low_u16(mults_a));
+ v_s2 = umlal2(v_s2, v_byte_sums_a, mults_a);
+ v_s2 = vmlal_u16(v_s2, vget_low_u16(v_byte_sums_b), vget_low_u16(mults_b));
+ v_s2 = umlal2(v_s2, v_byte_sums_b, mults_b);
+ v_s2 = vmlal_u16(v_s2, vget_low_u16(v_byte_sums_c), vget_low_u16(mults_c));
+ v_s2 = umlal2(v_s2, v_byte_sums_c, mults_c);
+ v_s2 = vmlal_u16(v_s2, vget_low_u16(v_byte_sums_d), vget_low_u16(mults_d));
+ v_s2 = umlal2(v_s2, v_byte_sums_d, mults_d);
+ v_s2 = vmlal_u16(v_s2, vget_low_u16(v_byte_sums_e), vget_low_u16(mults_e));
+ v_s2 = umlal2(v_s2, v_byte_sums_e, mults_e);
+ v_s2 = vmlal_u16(v_s2, vget_low_u16(v_byte_sums_f), vget_low_u16(mults_f));
+ v_s2 = umlal2(v_s2, v_byte_sums_f, mults_f);
+ v_s2 = vmlal_u16(v_s2, vget_low_u16(v_byte_sums_g), vget_low_u16(mults_g));
+ v_s2 = umlal2(v_s2, v_byte_sums_g, mults_g);
+ v_s2 = vmlal_u16(v_s2, vget_low_u16(v_byte_sums_h), vget_low_u16(mults_h));
+ v_s2 = umlal2(v_s2, v_byte_sums_h, mults_h);
+#undef umlal2
+
+ /* Horizontal sum to finish up */
+#ifdef ARCH_ARM32
+ *s1 += vgetq_lane_u32(v_s1, 0) + vgetq_lane_u32(v_s1, 1) +
+ vgetq_lane_u32(v_s1, 2) + vgetq_lane_u32(v_s1, 3);
+ *s2 += vgetq_lane_u32(v_s2, 0) + vgetq_lane_u32(v_s2, 1) +
+ vgetq_lane_u32(v_s2, 2) + vgetq_lane_u32(v_s2, 3);
+#else
+ *s1 += vaddvq_u32(v_s1);
+ *s2 += vaddvq_u32(v_s2);
+#endif
+}
+# include "../adler32_vec_template.h"
+#endif /* Regular NEON implementation */
+
+/* NEON+dotprod implementation */
+#if HAVE_DOTPROD_INTRIN && CPU_IS_LITTLE_ENDIAN()
+# define adler32_neon_dotprod adler32_neon_dotprod
+# define FUNCNAME adler32_neon_dotprod
+# define FUNCNAME_CHUNK adler32_neon_dotprod_chunk
+# define IMPL_ALIGNMENT 16
+# define IMPL_SEGMENT_LEN 64
+# define IMPL_MAX_CHUNK_LEN MAX_CHUNK_LEN
+# if HAVE_DOTPROD_NATIVE
+# define ATTRIBUTES
+# else
+# ifdef __clang__
+# define ATTRIBUTES _target_attribute("dotprod")
+ /*
+ * With gcc, arch=armv8.2-a is needed for dotprod intrinsics, unless the
+ * default target is armv8.3-a or later in which case it must be omitted.
+ * armv8.3-a or later can be detected by checking for __ARM_FEATURE_JCVT.
+ */
+# elif defined(__ARM_FEATURE_JCVT)
+# define ATTRIBUTES _target_attribute("+dotprod")
+# else
+# define ATTRIBUTES _target_attribute("arch=armv8.2-a+dotprod")
+# endif
+# endif
+# include
+static forceinline ATTRIBUTES void
+adler32_neon_dotprod_chunk(const uint8x16_t *p, const uint8x16_t * const end,
+ u32 *s1, u32 *s2)
+{
+ static const u8 _aligned_attribute(16) mults[64] = {
+ 64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49,
+ 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33,
+ 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
+ 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
+ };
+ const uint8x16_t mults_a = vld1q_u8(&mults[0]);
+ const uint8x16_t mults_b = vld1q_u8(&mults[16]);
+ const uint8x16_t mults_c = vld1q_u8(&mults[32]);
+ const uint8x16_t mults_d = vld1q_u8(&mults[48]);
+ const uint8x16_t ones = vdupq_n_u8(1);
+ uint32x4_t v_s1_a = vdupq_n_u32(0);
+ uint32x4_t v_s1_b = vdupq_n_u32(0);
+ uint32x4_t v_s1_c = vdupq_n_u32(0);
+ uint32x4_t v_s1_d = vdupq_n_u32(0);
+ uint32x4_t v_s2_a = vdupq_n_u32(0);
+ uint32x4_t v_s2_b = vdupq_n_u32(0);
+ uint32x4_t v_s2_c = vdupq_n_u32(0);
+ uint32x4_t v_s2_d = vdupq_n_u32(0);
+ uint32x4_t v_s1_sums_a = vdupq_n_u32(0);
+ uint32x4_t v_s1_sums_b = vdupq_n_u32(0);
+ uint32x4_t v_s1_sums_c = vdupq_n_u32(0);
+ uint32x4_t v_s1_sums_d = vdupq_n_u32(0);
+ uint32x4_t v_s1;
+ uint32x4_t v_s2;
+ uint32x4_t v_s1_sums;
+
+ do {
+ uint8x16_t bytes_a = *p++;
+ uint8x16_t bytes_b = *p++;
+ uint8x16_t bytes_c = *p++;
+ uint8x16_t bytes_d = *p++;
+
+ v_s1_sums_a = vaddq_u32(v_s1_sums_a, v_s1_a);
+ v_s1_a = vdotq_u32(v_s1_a, bytes_a, ones);
+ v_s2_a = vdotq_u32(v_s2_a, bytes_a, mults_a);
+
+ v_s1_sums_b = vaddq_u32(v_s1_sums_b, v_s1_b);
+ v_s1_b = vdotq_u32(v_s1_b, bytes_b, ones);
+ v_s2_b = vdotq_u32(v_s2_b, bytes_b, mults_b);
+
+ v_s1_sums_c = vaddq_u32(v_s1_sums_c, v_s1_c);
+ v_s1_c = vdotq_u32(v_s1_c, bytes_c, ones);
+ v_s2_c = vdotq_u32(v_s2_c, bytes_c, mults_c);
+
+ v_s1_sums_d = vaddq_u32(v_s1_sums_d, v_s1_d);
+ v_s1_d = vdotq_u32(v_s1_d, bytes_d, ones);
+ v_s2_d = vdotq_u32(v_s2_d, bytes_d, mults_d);
+ } while (p != end);
+
+ v_s1 = vaddq_u32(vaddq_u32(v_s1_a, v_s1_b), vaddq_u32(v_s1_c, v_s1_d));
+ v_s2 = vaddq_u32(vaddq_u32(v_s2_a, v_s2_b), vaddq_u32(v_s2_c, v_s2_d));
+ v_s1_sums = vaddq_u32(vaddq_u32(v_s1_sums_a, v_s1_sums_b),
+ vaddq_u32(v_s1_sums_c, v_s1_sums_d));
+ v_s2 = vaddq_u32(v_s2, vqshlq_n_u32(v_s1_sums, 6));
+
+ *s1 += vaddvq_u32(v_s1);
+ *s2 += vaddvq_u32(v_s2);
+}
+# include "../adler32_vec_template.h"
+#endif /* NEON+dotprod implementation */
+
+#if defined(adler32_neon_dotprod) && HAVE_DOTPROD_NATIVE
+#define DEFAULT_IMPL adler32_neon_dotprod
+#else
+static inline adler32_func_t
+arch_select_adler32_func(void)
+{
+ const u32 features MAYBE_UNUSED = get_arm_cpu_features();
+
+#ifdef adler32_neon_dotprod
+ if (HAVE_NEON(features) && HAVE_DOTPROD(features))
+ return adler32_neon_dotprod;
+#endif
+#ifdef adler32_neon
+ if (HAVE_NEON(features))
+ return adler32_neon;
+#endif
+ return NULL;
+}
+#define arch_select_adler32_func arch_select_adler32_func
+#endif
+
+#endif /* LIB_ARM_ADLER32_IMPL_H */
diff --git a/tools/z64compress/src/enc/libdeflate/lib/arm/cpu_features.c b/tools/z64compress/src/enc/libdeflate/lib/arm/cpu_features.c
new file mode 100644
index 000000000..ed710bc6f
--- /dev/null
+++ b/tools/z64compress/src/enc/libdeflate/lib/arm/cpu_features.c
@@ -0,0 +1,211 @@
+/*
+ * arm/cpu_features.c - feature detection for ARM CPUs
+ *
+ * Copyright 2018 Eric Biggers
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * ARM CPUs don't have a standard way for unprivileged programs to detect CPU
+ * features. But an OS-specific way can be used when available.
+ */
+
+#ifdef __APPLE__
+#undef _ANSI_SOURCE
+#define _DARWIN_C_SOURCE /* for sysctlbyname() */
+#endif
+
+#include "../cpu_features_common.h" /* must be included first */
+#include "cpu_features.h"
+
+#if HAVE_DYNAMIC_ARM_CPU_FEATURES
+
+#ifdef __linux__
+/*
+ * On Linux, arm32 and arm64 CPU features can be detected by reading the
+ * AT_HWCAP and AT_HWCAP2 values from /proc/self/auxv.
+ *
+ * Ideally we'd use the C library function getauxval(), but it's not guaranteed
+ * to be available: it was only added to glibc in 2.16, and in Android it was
+ * added to API level 18 for arm32 and level 21 for arm64.
+ */
+
+#include
+#include
+#include
+#include
+
+#define AT_HWCAP 16
+#define AT_HWCAP2 26
+
+static void scan_auxv(unsigned long *hwcap, unsigned long *hwcap2)
+{
+ int fd;
+ unsigned long auxbuf[32];
+ int filled = 0;
+ int i;
+
+ fd = open("/proc/self/auxv", O_RDONLY);
+ if (fd < 0)
+ return;
+
+ for (;;) {
+ do {
+ int ret = read(fd, &((char *)auxbuf)[filled],
+ sizeof(auxbuf) - filled);
+ if (ret <= 0) {
+ if (ret < 0 && errno == EINTR)
+ continue;
+ goto out;
+ }
+ filled += ret;
+ } while (filled < 2 * sizeof(long));
+
+ i = 0;
+ do {
+ unsigned long type = auxbuf[i];
+ unsigned long value = auxbuf[i + 1];
+
+ if (type == AT_HWCAP)
+ *hwcap = value;
+ else if (type == AT_HWCAP2)
+ *hwcap2 = value;
+ i += 2;
+ filled -= 2 * sizeof(long);
+ } while (filled >= 2 * sizeof(long));
+
+ memmove(auxbuf, &auxbuf[i], filled);
+ }
+out:
+ close(fd);
+}
+
+static u32 query_arm_cpu_features(void)
+{
+ u32 features = 0;
+ unsigned long hwcap = 0;
+ unsigned long hwcap2 = 0;
+
+ scan_auxv(&hwcap, &hwcap2);
+
+#ifdef ARCH_ARM32
+ STATIC_ASSERT(sizeof(long) == 4);
+ if (hwcap & (1 << 12)) /* HWCAP_NEON */
+ features |= ARM_CPU_FEATURE_NEON;
+ if (hwcap2 & (1 << 1)) /* HWCAP2_PMULL */
+ features |= ARM_CPU_FEATURE_PMULL;
+ if (hwcap2 & (1 << 4)) /* HWCAP2_CRC32 */
+ features |= ARM_CPU_FEATURE_CRC32;
+#else
+ STATIC_ASSERT(sizeof(long) == 8);
+ if (hwcap & (1 << 1)) /* HWCAP_ASIMD */
+ features |= ARM_CPU_FEATURE_NEON;
+ if (hwcap & (1 << 4)) /* HWCAP_PMULL */
+ features |= ARM_CPU_FEATURE_PMULL;
+ if (hwcap & (1 << 7)) /* HWCAP_CRC32 */
+ features |= ARM_CPU_FEATURE_CRC32;
+ if (hwcap & (1 << 17)) /* HWCAP_SHA3 */
+ features |= ARM_CPU_FEATURE_SHA3;
+ if (hwcap & (1 << 20)) /* HWCAP_ASIMDDP */
+ features |= ARM_CPU_FEATURE_DOTPROD;
+#endif
+ return features;
+}
+
+#elif defined(__APPLE__)
+/* On Apple platforms, arm64 CPU features can be detected via sysctlbyname(). */
+
+#include
+#include
+
+static const struct {
+ const char *name;
+ u32 feature;
+} feature_sysctls[] = {
+ { "hw.optional.neon", ARM_CPU_FEATURE_NEON },
+ { "hw.optional.AdvSIMD", ARM_CPU_FEATURE_NEON },
+ { "hw.optional.arm.FEAT_PMULL", ARM_CPU_FEATURE_PMULL },
+ { "hw.optional.armv8_crc32", ARM_CPU_FEATURE_CRC32 },
+ { "hw.optional.armv8_2_sha3", ARM_CPU_FEATURE_SHA3 },
+ { "hw.optional.arm.FEAT_SHA3", ARM_CPU_FEATURE_SHA3 },
+ { "hw.optional.arm.FEAT_DotProd", ARM_CPU_FEATURE_DOTPROD },
+};
+
+static u32 query_arm_cpu_features(void)
+{
+ u32 features = 0;
+ size_t i;
+
+ for (i = 0; i < ARRAY_LEN(feature_sysctls); i++) {
+ const char *name = feature_sysctls[i].name;
+ u32 val = 0;
+ size_t valsize = sizeof(val);
+
+ if (sysctlbyname(name, &val, &valsize, NULL, 0) == 0 &&
+ valsize == sizeof(val) && val == 1)
+ features |= feature_sysctls[i].feature;
+ }
+ return features;
+}
+#elif defined(_WIN32)
+
+#include
+
+static u32 query_arm_cpu_features(void)
+{
+ u32 features = ARM_CPU_FEATURE_NEON;
+
+ if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE))
+ features |= ARM_CPU_FEATURE_PMULL;
+ if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE))
+ features |= ARM_CPU_FEATURE_CRC32;
+
+ /* FIXME: detect SHA3 and DOTPROD support too. */
+
+ return features;
+}
+#else
+#error "unhandled case"
+#endif
+
+static const struct cpu_feature arm_cpu_feature_table[] = {
+ {ARM_CPU_FEATURE_NEON, "neon"},
+ {ARM_CPU_FEATURE_PMULL, "pmull"},
+ {ARM_CPU_FEATURE_CRC32, "crc32"},
+ {ARM_CPU_FEATURE_SHA3, "sha3"},
+ {ARM_CPU_FEATURE_DOTPROD, "dotprod"},
+};
+
+volatile u32 libdeflate_arm_cpu_features = 0;
+
+void libdeflate_init_arm_cpu_features(void)
+{
+ u32 features = query_arm_cpu_features();
+
+ disable_cpu_features_for_testing(&features, arm_cpu_feature_table,
+ ARRAY_LEN(arm_cpu_feature_table));
+
+ libdeflate_arm_cpu_features = features | ARM_CPU_FEATURES_KNOWN;
+}
+
+#endif /* HAVE_DYNAMIC_ARM_CPU_FEATURES */
diff --git a/tools/z64compress/src/enc/libdeflate/lib/arm/cpu_features.h b/tools/z64compress/src/enc/libdeflate/lib/arm/cpu_features.h
new file mode 100644
index 000000000..548d31ea8
--- /dev/null
+++ b/tools/z64compress/src/enc/libdeflate/lib/arm/cpu_features.h
@@ -0,0 +1,223 @@
+/*
+ * arm/cpu_features.h - feature detection for ARM CPUs
+ *
+ * Copyright 2018 Eric Biggers
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef LIB_ARM_CPU_FEATURES_H
+#define LIB_ARM_CPU_FEATURES_H
+
+#include "../lib_common.h"
+
+#define HAVE_DYNAMIC_ARM_CPU_FEATURES 0
+
+#if defined(ARCH_ARM32) || defined(ARCH_ARM64)
+
+#if !defined(FREESTANDING) && \
+ (COMPILER_SUPPORTS_TARGET_FUNCTION_ATTRIBUTE || defined(_MSC_VER)) && \
+ (defined(__linux__) || \
+ (defined(__APPLE__) && defined(ARCH_ARM64)) || \
+ (defined(_WIN32) && defined(ARCH_ARM64)))
+# undef HAVE_DYNAMIC_ARM_CPU_FEATURES
+# define HAVE_DYNAMIC_ARM_CPU_FEATURES 1
+#endif
+
+#define ARM_CPU_FEATURE_NEON 0x00000001
+#define ARM_CPU_FEATURE_PMULL 0x00000002
+#define ARM_CPU_FEATURE_CRC32 0x00000004
+#define ARM_CPU_FEATURE_SHA3 0x00000008
+#define ARM_CPU_FEATURE_DOTPROD 0x00000010
+
+#define HAVE_NEON(features) (HAVE_NEON_NATIVE || ((features) & ARM_CPU_FEATURE_NEON))
+#define HAVE_PMULL(features) (HAVE_PMULL_NATIVE || ((features) & ARM_CPU_FEATURE_PMULL))
+#define HAVE_CRC32(features) (HAVE_CRC32_NATIVE || ((features) & ARM_CPU_FEATURE_CRC32))
+#define HAVE_SHA3(features) (HAVE_SHA3_NATIVE || ((features) & ARM_CPU_FEATURE_SHA3))
+#define HAVE_DOTPROD(features) (HAVE_DOTPROD_NATIVE || ((features) & ARM_CPU_FEATURE_DOTPROD))
+
+#if HAVE_DYNAMIC_ARM_CPU_FEATURES
+#define ARM_CPU_FEATURES_KNOWN 0x80000000
+extern volatile u32 libdeflate_arm_cpu_features;
+
+void libdeflate_init_arm_cpu_features(void);
+
+static inline u32 get_arm_cpu_features(void)
+{
+ if (libdeflate_arm_cpu_features == 0)
+ libdeflate_init_arm_cpu_features();
+ return libdeflate_arm_cpu_features;
+}
+#else /* HAVE_DYNAMIC_ARM_CPU_FEATURES */
+static inline u32 get_arm_cpu_features(void) { return 0; }
+#endif /* !HAVE_DYNAMIC_ARM_CPU_FEATURES */
+
+/* NEON */
+#if defined(__ARM_NEON) || defined(ARCH_ARM64)
+# define HAVE_NEON_NATIVE 1
+#else
+# define HAVE_NEON_NATIVE 0
+#endif
+/*
+ * With both gcc and clang, NEON intrinsics require that the main target has
+ * NEON enabled already. Exception: with gcc 6.1 and later (r230411 for arm32,
+ * r226563 for arm64), hardware floating point support is sufficient.
+ */
+#if HAVE_NEON_NATIVE || \
+ (HAVE_DYNAMIC_ARM_CPU_FEATURES && GCC_PREREQ(6, 1) && defined(__ARM_FP))
+# define HAVE_NEON_INTRIN 1
+#else
+# define HAVE_NEON_INTRIN 0
+#endif
+
+/* PMULL */
+#ifdef __ARM_FEATURE_CRYPTO
+# define HAVE_PMULL_NATIVE 1
+#else
+# define HAVE_PMULL_NATIVE 0
+#endif
+#if HAVE_PMULL_NATIVE || \
+ (HAVE_DYNAMIC_ARM_CPU_FEATURES && \
+ (GCC_PREREQ(6, 1) || __has_builtin(__builtin_neon_vmull_p64) || \
+ defined(_MSC_VER)) && \
+ /*
+ * On arm32 with clang, the crypto intrinsics (which include pmull)
+ * are not defined, even when using -mfpu=crypto-neon-fp-armv8,
+ * because clang's puts their definitions behind
+ * __aarch64__.
+ */ \
+ !(defined(ARCH_ARM32) && defined(__clang__)))
+# define HAVE_PMULL_INTRIN CPU_IS_LITTLE_ENDIAN() /* untested on big endian */
+ /* Work around MSVC's vmull_p64() taking poly64x1_t instead of poly64_t */
+# ifdef _MSC_VER
+# define compat_vmull_p64(a, b) vmull_p64(vcreate_p64(a), vcreate_p64(b))
+# else
+# define compat_vmull_p64(a, b) vmull_p64((a), (b))
+# endif
+#else
+# define HAVE_PMULL_INTRIN 0
+#endif
+
+/* CRC32 */
+#ifdef __ARM_FEATURE_CRC32
+# define HAVE_CRC32_NATIVE 1
+#else
+# define HAVE_CRC32_NATIVE 0
+#endif
+/*
+ * Support for ARM CRC32 intrinsics when CRC32 instructions are not enabled in
+ * the main target has been affected by two gcc bugs, which we must avoid by
+ * only allowing gcc versions that have the corresponding fixes. First, gcc
+ * commit 943766d37ae4 ("[arm] Fix use of CRC32 intrinsics with Armv8-a and
+ * hard-float"), i.e. gcc 8.4+, 9.3+, 10.1+, or 11+, is needed. Second, gcc
+ * commit c1cdabe3aab8 ("arm: reorder assembler architecture directives
+ * [PR101723]"), i.e. gcc 9.5+, 10.4+, 11.3+, or 12+, is needed when binutils is
+ * 2.34 or later, due to https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104439.
+ * We use the second set of prerequisites, as they are stricter and we have no
+ * way to detect the binutils version directly from a C source file.
+ */
+#if HAVE_CRC32_NATIVE || \
+ (HAVE_DYNAMIC_ARM_CPU_FEATURES && \
+ (__has_builtin(__builtin_arm_crc32b) || \
+ GCC_PREREQ(11, 3) || \
+ (GCC_PREREQ(10, 4) && !GCC_PREREQ(11, 0)) || \
+ (GCC_PREREQ(9, 5) && !GCC_PREREQ(10, 0)) || \
+ defined(_MSC_VER)))
+# define HAVE_CRC32_INTRIN 1
+#else
+# define HAVE_CRC32_INTRIN 0
+#endif
+
+/* SHA3 (needed for the eor3 instruction) */
+#if defined(ARCH_ARM64) && !defined(_MSC_VER)
+# ifdef __ARM_FEATURE_SHA3
+# define HAVE_SHA3_NATIVE 1
+# else
+# define HAVE_SHA3_NATIVE 0
+# endif
+# define HAVE_SHA3_TARGET (HAVE_DYNAMIC_ARM_CPU_FEATURES && \
+ (GCC_PREREQ(8, 1) /* r256478 */ || \
+ CLANG_PREREQ(7, 0, 10010463) /* r338010 */))
+# define HAVE_SHA3_INTRIN (HAVE_NEON_INTRIN && \
+ (HAVE_SHA3_NATIVE || HAVE_SHA3_TARGET) && \
+ (GCC_PREREQ(9, 1) /* r268049 */ || \
+ __has_builtin(__builtin_neon_veor3q_v)))
+#else
+# define HAVE_SHA3_NATIVE 0
+# define HAVE_SHA3_TARGET 0
+# define HAVE_SHA3_INTRIN 0
+#endif
+
+/* dotprod */
+#ifdef ARCH_ARM64
+# ifdef __ARM_FEATURE_DOTPROD
+# define HAVE_DOTPROD_NATIVE 1
+# else
+# define HAVE_DOTPROD_NATIVE 0
+# endif
+# if HAVE_DOTPROD_NATIVE || \
+ (HAVE_DYNAMIC_ARM_CPU_FEATURES && \
+ (GCC_PREREQ(8, 1) || __has_builtin(__builtin_neon_vdotq_v) || \
+ defined(_MSC_VER)))
+# define HAVE_DOTPROD_INTRIN 1
+# else
+# define HAVE_DOTPROD_INTRIN 0
+# endif
+#else
+# define HAVE_DOTPROD_NATIVE 0
+# define HAVE_DOTPROD_INTRIN 0
+#endif
+
+/*
+ * Work around bugs in arm_acle.h and arm_neon.h where sometimes intrinsics are
+ * only defined when the corresponding __ARM_FEATURE_* macro is defined. The
+ * intrinsics actually work in target attribute functions too if they are
+ * defined, though, so work around this by temporarily defining the
+ * corresponding __ARM_FEATURE_* macros while including the headers.
+ */
+#if HAVE_CRC32_INTRIN && !HAVE_CRC32_NATIVE && \
+ (defined(__clang__) || defined(ARCH_ARM32))
+# define __ARM_FEATURE_CRC32 1
+#endif
+#if HAVE_SHA3_INTRIN && !HAVE_SHA3_NATIVE && defined(__clang__)
+# define __ARM_FEATURE_SHA3 1
+#endif
+#if HAVE_DOTPROD_INTRIN && !HAVE_DOTPROD_NATIVE && defined(__clang__)
+# define __ARM_FEATURE_DOTPROD 1
+#endif
+#if HAVE_CRC32_INTRIN && !HAVE_CRC32_NATIVE && \
+ (defined(__clang__) || defined(ARCH_ARM32))
+# include
+# undef __ARM_FEATURE_CRC32
+#endif
+#if HAVE_SHA3_INTRIN && !HAVE_SHA3_NATIVE && defined(__clang__)
+# include
+# undef __ARM_FEATURE_SHA3
+#endif
+#if HAVE_DOTPROD_INTRIN && !HAVE_DOTPROD_NATIVE && defined(__clang__)
+# include
+# undef __ARM_FEATURE_DOTPROD
+#endif
+
+#endif /* ARCH_ARM32 || ARCH_ARM64 */
+
+#endif /* LIB_ARM_CPU_FEATURES_H */
diff --git a/tools/z64compress/src/enc/libdeflate/lib/arm/crc32_impl.h b/tools/z64compress/src/enc/libdeflate/lib/arm/crc32_impl.h
new file mode 100644
index 000000000..e426a63d6
--- /dev/null
+++ b/tools/z64compress/src/enc/libdeflate/lib/arm/crc32_impl.h
@@ -0,0 +1,665 @@
+/*
+ * arm/crc32_impl.h - ARM implementations of the gzip CRC-32 algorithm
+ *
+ * Copyright 2022 Eric Biggers
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef LIB_ARM_CRC32_IMPL_H
+#define LIB_ARM_CRC32_IMPL_H
+
+#include "cpu_features.h"
+
+/*
+ * crc32_arm_crc() - implementation using crc32 instructions (only)
+ *
+ * In general this implementation is straightforward. However, naive use of the
+ * crc32 instructions is serial: one of the two inputs to each crc32 instruction
+ * is the output of the previous one. To take advantage of CPUs that can
+ * execute multiple crc32 instructions in parallel, when possible we interleave
+ * the checksumming of several adjacent chunks, then combine their CRCs.
+ *
+ * However, without pmull, combining CRCs is fairly slow. So in this pmull-less
+ * version, we only use a large chunk length, and thus we only do chunked
+ * processing if there is a lot of data to checksum. This also means that a
+ * variable chunk length wouldn't help much, so we just support a fixed length.
+ */
+#if HAVE_CRC32_INTRIN
+# if HAVE_CRC32_NATIVE
+# define ATTRIBUTES
+# else
+# ifdef ARCH_ARM32
+# ifdef __clang__
+# define ATTRIBUTES _target_attribute("armv8-a,crc")
+# else
+# define ATTRIBUTES _target_attribute("arch=armv8-a+crc")
+# endif
+# else
+# ifdef __clang__
+# define ATTRIBUTES _target_attribute("crc")
+# else
+# define ATTRIBUTES _target_attribute("+crc")
+# endif
+# endif
+# endif
+
+#ifndef _MSC_VER
+# include
+#endif
+
+/*
+ * Combine the CRCs for 4 adjacent chunks of length L = CRC32_FIXED_CHUNK_LEN
+ * bytes each by computing:
+ *
+ * [ crc0*x^(3*8*L) + crc1*x^(2*8*L) + crc2*x^(1*8*L) + crc3 ] mod G(x)
+ *
+ * This has been optimized in several ways:
+ *
+ * - The needed multipliers (x to some power, reduced mod G(x)) were
+ * precomputed.
+ *
+ * - The 3 multiplications are interleaved.
+ *
+ * - The reduction mod G(x) is delayed to the end and done using __crc32d.
+ * Note that the use of __crc32d introduces an extra factor of x^32. To
+ * cancel that out along with the extra factor of x^1 that gets introduced
+ * because of how the 63-bit products are aligned in their 64-bit integers,
+ * the multipliers are actually x^(j*8*L - 33) instead of x^(j*8*L).
+ */
+static forceinline ATTRIBUTES u32
+combine_crcs_slow(u32 crc0, u32 crc1, u32 crc2, u32 crc3)
+{
+ u64 res0 = 0, res1 = 0, res2 = 0;
+ int i;
+
+ /* Multiply crc{0,1,2} by CRC32_FIXED_CHUNK_MULT_{3,2,1}. */
+ for (i = 0; i < 32; i++) {
+ if (CRC32_FIXED_CHUNK_MULT_3 & (1U << i))
+ res0 ^= (u64)crc0 << i;
+ if (CRC32_FIXED_CHUNK_MULT_2 & (1U << i))
+ res1 ^= (u64)crc1 << i;
+ if (CRC32_FIXED_CHUNK_MULT_1 & (1U << i))
+ res2 ^= (u64)crc2 << i;
+ }
+ /* Add the different parts and reduce mod G(x). */
+ return __crc32d(0, res0 ^ res1 ^ res2) ^ crc3;
+}
+
+#define crc32_arm_crc crc32_arm_crc
+static u32 ATTRIBUTES MAYBE_UNUSED
+crc32_arm_crc(u32 crc, const u8 *p, size_t len)
+{
+ if (len >= 64) {
+ const size_t align = -(uintptr_t)p & 7;
+
+ /* Align p to the next 8-byte boundary. */
+ if (align) {
+ if (align & 1)
+ crc = __crc32b(crc, *p++);
+ if (align & 2) {
+ crc = __crc32h(crc, le16_bswap(*(u16 *)p));
+ p += 2;
+ }
+ if (align & 4) {
+ crc = __crc32w(crc, le32_bswap(*(u32 *)p));
+ p += 4;
+ }
+ len -= align;
+ }
+ /*
+ * Interleave the processing of multiple adjacent data chunks to
+ * take advantage of instruction-level parallelism.
+ *
+ * Some CPUs don't prefetch the data if it's being fetched in
+ * multiple interleaved streams, so do explicit prefetching.
+ */
+ while (len >= CRC32_NUM_CHUNKS * CRC32_FIXED_CHUNK_LEN) {
+ const u64 *wp0 = (const u64 *)p;
+ const u64 * const wp0_end =
+ (const u64 *)(p + CRC32_FIXED_CHUNK_LEN);
+ u32 crc1 = 0, crc2 = 0, crc3 = 0;
+
+ STATIC_ASSERT(CRC32_NUM_CHUNKS == 4);
+ STATIC_ASSERT(CRC32_FIXED_CHUNK_LEN % (4 * 8) == 0);
+ do {
+ prefetchr(&wp0[64 + 0*CRC32_FIXED_CHUNK_LEN/8]);
+ prefetchr(&wp0[64 + 1*CRC32_FIXED_CHUNK_LEN/8]);
+ prefetchr(&wp0[64 + 2*CRC32_FIXED_CHUNK_LEN/8]);
+ prefetchr(&wp0[64 + 3*CRC32_FIXED_CHUNK_LEN/8]);
+ crc = __crc32d(crc, le64_bswap(wp0[0*CRC32_FIXED_CHUNK_LEN/8]));
+ crc1 = __crc32d(crc1, le64_bswap(wp0[1*CRC32_FIXED_CHUNK_LEN/8]));
+ crc2 = __crc32d(crc2, le64_bswap(wp0[2*CRC32_FIXED_CHUNK_LEN/8]));
+ crc3 = __crc32d(crc3, le64_bswap(wp0[3*CRC32_FIXED_CHUNK_LEN/8]));
+ wp0++;
+ crc = __crc32d(crc, le64_bswap(wp0[0*CRC32_FIXED_CHUNK_LEN/8]));
+ crc1 = __crc32d(crc1, le64_bswap(wp0[1*CRC32_FIXED_CHUNK_LEN/8]));
+ crc2 = __crc32d(crc2, le64_bswap(wp0[2*CRC32_FIXED_CHUNK_LEN/8]));
+ crc3 = __crc32d(crc3, le64_bswap(wp0[3*CRC32_FIXED_CHUNK_LEN/8]));
+ wp0++;
+ crc = __crc32d(crc, le64_bswap(wp0[0*CRC32_FIXED_CHUNK_LEN/8]));
+ crc1 = __crc32d(crc1, le64_bswap(wp0[1*CRC32_FIXED_CHUNK_LEN/8]));
+ crc2 = __crc32d(crc2, le64_bswap(wp0[2*CRC32_FIXED_CHUNK_LEN/8]));
+ crc3 = __crc32d(crc3, le64_bswap(wp0[3*CRC32_FIXED_CHUNK_LEN/8]));
+ wp0++;
+ crc = __crc32d(crc, le64_bswap(wp0[0*CRC32_FIXED_CHUNK_LEN/8]));
+ crc1 = __crc32d(crc1, le64_bswap(wp0[1*CRC32_FIXED_CHUNK_LEN/8]));
+ crc2 = __crc32d(crc2, le64_bswap(wp0[2*CRC32_FIXED_CHUNK_LEN/8]));
+ crc3 = __crc32d(crc3, le64_bswap(wp0[3*CRC32_FIXED_CHUNK_LEN/8]));
+ wp0++;
+ } while (wp0 != wp0_end);
+ crc = combine_crcs_slow(crc, crc1, crc2, crc3);
+ p += CRC32_NUM_CHUNKS * CRC32_FIXED_CHUNK_LEN;
+ len -= CRC32_NUM_CHUNKS * CRC32_FIXED_CHUNK_LEN;
+ }
+ /*
+ * Due to the large fixed chunk length used above, there might
+ * still be a lot of data left. So use a 64-byte loop here,
+ * instead of a loop that is less unrolled.
+ */
+ while (len >= 64) {
+ crc = __crc32d(crc, le64_bswap(*(u64 *)(p + 0)));
+ crc = __crc32d(crc, le64_bswap(*(u64 *)(p + 8)));
+ crc = __crc32d(crc, le64_bswap(*(u64 *)(p + 16)));
+ crc = __crc32d(crc, le64_bswap(*(u64 *)(p + 24)));
+ crc = __crc32d(crc, le64_bswap(*(u64 *)(p + 32)));
+ crc = __crc32d(crc, le64_bswap(*(u64 *)(p + 40)));
+ crc = __crc32d(crc, le64_bswap(*(u64 *)(p + 48)));
+ crc = __crc32d(crc, le64_bswap(*(u64 *)(p + 56)));
+ p += 64;
+ len -= 64;
+ }
+ }
+ if (len & 32) {
+ crc = __crc32d(crc, get_unaligned_le64(p + 0));
+ crc = __crc32d(crc, get_unaligned_le64(p + 8));
+ crc = __crc32d(crc, get_unaligned_le64(p + 16));
+ crc = __crc32d(crc, get_unaligned_le64(p + 24));
+ p += 32;
+ }
+ if (len & 16) {
+ crc = __crc32d(crc, get_unaligned_le64(p + 0));
+ crc = __crc32d(crc, get_unaligned_le64(p + 8));
+ p += 16;
+ }
+ if (len & 8) {
+ crc = __crc32d(crc, get_unaligned_le64(p));
+ p += 8;
+ }
+ if (len & 4) {
+ crc = __crc32w(crc, get_unaligned_le32(p));
+ p += 4;
+ }
+ if (len & 2) {
+ crc = __crc32h(crc, get_unaligned_le16(p));
+ p += 2;
+ }
+ if (len & 1)
+ crc = __crc32b(crc, *p);
+ return crc;
+}
+#undef ATTRIBUTES
+#endif /* crc32_arm_crc() */
+
+/*
+ * crc32_arm_crc_pmullcombine() - implementation using crc32 instructions, plus
+ * pmull instructions for CRC combining
+ *
+ * This is similar to crc32_arm_crc(), but it enables the use of pmull
+ * (carryless multiplication) instructions for the steps where the CRCs of
+ * adjacent data chunks are combined. As this greatly speeds up CRC
+ * combination, this implementation also differs from crc32_arm_crc() in that it
+ * uses a variable chunk length which can get fairly small. The precomputed
+ * multipliers needed for the selected chunk length are loaded from a table.
+ *
+ * Note that pmull is used here only for combining the CRCs of separately
+ * checksummed chunks, not for folding the data itself. See crc32_arm_pmull*()
+ * for implementations that use pmull for folding the data itself.
+ */
+#if HAVE_CRC32_INTRIN && HAVE_PMULL_INTRIN
+# if HAVE_CRC32_NATIVE && HAVE_PMULL_NATIVE
+# define ATTRIBUTES
+# else
+# ifdef ARCH_ARM32
+# define ATTRIBUTES _target_attribute("arch=armv8-a+crc,fpu=crypto-neon-fp-armv8")
+# else
+# ifdef __clang__
+# define ATTRIBUTES _target_attribute("crc,crypto")
+# else
+# define ATTRIBUTES _target_attribute("+crc,+crypto")
+# endif
+# endif
+# endif
+
+#ifndef _MSC_VER
+# include
+#endif
+#include
+
+/* Do carryless multiplication of two 32-bit values. */
+static forceinline ATTRIBUTES u64
+clmul_u32(u32 a, u32 b)
+{
+ uint64x2_t res = vreinterpretq_u64_p128(
+ compat_vmull_p64((poly64_t)a, (poly64_t)b));
+
+ return vgetq_lane_u64(res, 0);
+}
+
+/*
+ * Like combine_crcs_slow(), but uses vmull_p64 to do the multiplications more
+ * quickly, and supports a variable chunk length. The chunk length is
+ * 'i * CRC32_MIN_VARIABLE_CHUNK_LEN'
+ * where 1 <= i < ARRAY_LEN(crc32_mults_for_chunklen).
+ */
+static forceinline ATTRIBUTES u32
+combine_crcs_fast(u32 crc0, u32 crc1, u32 crc2, u32 crc3, size_t i)
+{
+ u64 res0 = clmul_u32(crc0, crc32_mults_for_chunklen[i][0]);
+ u64 res1 = clmul_u32(crc1, crc32_mults_for_chunklen[i][1]);
+ u64 res2 = clmul_u32(crc2, crc32_mults_for_chunklen[i][2]);
+
+ return __crc32d(0, res0 ^ res1 ^ res2) ^ crc3;
+}
+
+#define crc32_arm_crc_pmullcombine crc32_arm_crc_pmullcombine
+static u32 ATTRIBUTES MAYBE_UNUSED
+crc32_arm_crc_pmullcombine(u32 crc, const u8 *p, size_t len)
+{
+ const size_t align = -(uintptr_t)p & 7;
+
+ if (len >= align + CRC32_NUM_CHUNKS * CRC32_MIN_VARIABLE_CHUNK_LEN) {
+ /* Align p to the next 8-byte boundary. */
+ if (align) {
+ if (align & 1)
+ crc = __crc32b(crc, *p++);
+ if (align & 2) {
+ crc = __crc32h(crc, le16_bswap(*(u16 *)p));
+ p += 2;
+ }
+ if (align & 4) {
+ crc = __crc32w(crc, le32_bswap(*(u32 *)p));
+ p += 4;
+ }
+ len -= align;
+ }
+ /*
+ * Handle CRC32_MAX_VARIABLE_CHUNK_LEN specially, so that better
+ * code is generated for it.
+ */
+ while (len >= CRC32_NUM_CHUNKS * CRC32_MAX_VARIABLE_CHUNK_LEN) {
+ const u64 *wp0 = (const u64 *)p;
+ const u64 * const wp0_end =
+ (const u64 *)(p + CRC32_MAX_VARIABLE_CHUNK_LEN);
+ u32 crc1 = 0, crc2 = 0, crc3 = 0;
+
+ STATIC_ASSERT(CRC32_NUM_CHUNKS == 4);
+ STATIC_ASSERT(CRC32_MAX_VARIABLE_CHUNK_LEN % (4 * 8) == 0);
+ do {
+ prefetchr(&wp0[64 + 0*CRC32_MAX_VARIABLE_CHUNK_LEN/8]);
+ prefetchr(&wp0[64 + 1*CRC32_MAX_VARIABLE_CHUNK_LEN/8]);
+ prefetchr(&wp0[64 + 2*CRC32_MAX_VARIABLE_CHUNK_LEN/8]);
+ prefetchr(&wp0[64 + 3*CRC32_MAX_VARIABLE_CHUNK_LEN/8]);
+ crc = __crc32d(crc, le64_bswap(wp0[0*CRC32_MAX_VARIABLE_CHUNK_LEN/8]));
+ crc1 = __crc32d(crc1, le64_bswap(wp0[1*CRC32_MAX_VARIABLE_CHUNK_LEN/8]));
+ crc2 = __crc32d(crc2, le64_bswap(wp0[2*CRC32_MAX_VARIABLE_CHUNK_LEN/8]));
+ crc3 = __crc32d(crc3, le64_bswap(wp0[3*CRC32_MAX_VARIABLE_CHUNK_LEN/8]));
+ wp0++;
+ crc = __crc32d(crc, le64_bswap(wp0[0*CRC32_MAX_VARIABLE_CHUNK_LEN/8]));
+ crc1 = __crc32d(crc1, le64_bswap(wp0[1*CRC32_MAX_VARIABLE_CHUNK_LEN/8]));
+ crc2 = __crc32d(crc2, le64_bswap(wp0[2*CRC32_MAX_VARIABLE_CHUNK_LEN/8]));
+ crc3 = __crc32d(crc3, le64_bswap(wp0[3*CRC32_MAX_VARIABLE_CHUNK_LEN/8]));
+ wp0++;
+ crc = __crc32d(crc, le64_bswap(wp0[0*CRC32_MAX_VARIABLE_CHUNK_LEN/8]));
+ crc1 = __crc32d(crc1, le64_bswap(wp0[1*CRC32_MAX_VARIABLE_CHUNK_LEN/8]));
+ crc2 = __crc32d(crc2, le64_bswap(wp0[2*CRC32_MAX_VARIABLE_CHUNK_LEN/8]));
+ crc3 = __crc32d(crc3, le64_bswap(wp0[3*CRC32_MAX_VARIABLE_CHUNK_LEN/8]));
+ wp0++;
+ crc = __crc32d(crc, le64_bswap(wp0[0*CRC32_MAX_VARIABLE_CHUNK_LEN/8]));
+ crc1 = __crc32d(crc1, le64_bswap(wp0[1*CRC32_MAX_VARIABLE_CHUNK_LEN/8]));
+ crc2 = __crc32d(crc2, le64_bswap(wp0[2*CRC32_MAX_VARIABLE_CHUNK_LEN/8]));
+ crc3 = __crc32d(crc3, le64_bswap(wp0[3*CRC32_MAX_VARIABLE_CHUNK_LEN/8]));
+ wp0++;
+ } while (wp0 != wp0_end);
+ crc = combine_crcs_fast(crc, crc1, crc2, crc3,
+ ARRAY_LEN(crc32_mults_for_chunklen) - 1);
+ p += CRC32_NUM_CHUNKS * CRC32_MAX_VARIABLE_CHUNK_LEN;
+ len -= CRC32_NUM_CHUNKS * CRC32_MAX_VARIABLE_CHUNK_LEN;
+ }
+ /* Handle up to one variable-length chunk. */
+ if (len >= CRC32_NUM_CHUNKS * CRC32_MIN_VARIABLE_CHUNK_LEN) {
+ const size_t i = len / (CRC32_NUM_CHUNKS *
+ CRC32_MIN_VARIABLE_CHUNK_LEN);
+ const size_t chunk_len =
+ i * CRC32_MIN_VARIABLE_CHUNK_LEN;
+ const u64 *wp0 = (const u64 *)(p + 0*chunk_len);
+ const u64 *wp1 = (const u64 *)(p + 1*chunk_len);
+ const u64 *wp2 = (const u64 *)(p + 2*chunk_len);
+ const u64 *wp3 = (const u64 *)(p + 3*chunk_len);
+ const u64 * const wp0_end = wp1;
+ u32 crc1 = 0, crc2 = 0, crc3 = 0;
+
+ STATIC_ASSERT(CRC32_NUM_CHUNKS == 4);
+ STATIC_ASSERT(CRC32_MIN_VARIABLE_CHUNK_LEN % (4 * 8) == 0);
+ do {
+ prefetchr(wp0 + 64);
+ prefetchr(wp1 + 64);
+ prefetchr(wp2 + 64);
+ prefetchr(wp3 + 64);
+ crc = __crc32d(crc, le64_bswap(*wp0++));
+ crc1 = __crc32d(crc1, le64_bswap(*wp1++));
+ crc2 = __crc32d(crc2, le64_bswap(*wp2++));
+ crc3 = __crc32d(crc3, le64_bswap(*wp3++));
+ crc = __crc32d(crc, le64_bswap(*wp0++));
+ crc1 = __crc32d(crc1, le64_bswap(*wp1++));
+ crc2 = __crc32d(crc2, le64_bswap(*wp2++));
+ crc3 = __crc32d(crc3, le64_bswap(*wp3++));
+ crc = __crc32d(crc, le64_bswap(*wp0++));
+ crc1 = __crc32d(crc1, le64_bswap(*wp1++));
+ crc2 = __crc32d(crc2, le64_bswap(*wp2++));
+ crc3 = __crc32d(crc3, le64_bswap(*wp3++));
+ crc = __crc32d(crc, le64_bswap(*wp0++));
+ crc1 = __crc32d(crc1, le64_bswap(*wp1++));
+ crc2 = __crc32d(crc2, le64_bswap(*wp2++));
+ crc3 = __crc32d(crc3, le64_bswap(*wp3++));
+ } while (wp0 != wp0_end);
+ crc = combine_crcs_fast(crc, crc1, crc2, crc3, i);
+ p += CRC32_NUM_CHUNKS * chunk_len;
+ len -= CRC32_NUM_CHUNKS * chunk_len;
+ }
+
+ while (len >= 32) {
+ crc = __crc32d(crc, le64_bswap(*(u64 *)(p + 0)));
+ crc = __crc32d(crc, le64_bswap(*(u64 *)(p + 8)));
+ crc = __crc32d(crc, le64_bswap(*(u64 *)(p + 16)));
+ crc = __crc32d(crc, le64_bswap(*(u64 *)(p + 24)));
+ p += 32;
+ len -= 32;
+ }
+ } else {
+ while (len >= 32) {
+ crc = __crc32d(crc, get_unaligned_le64(p + 0));
+ crc = __crc32d(crc, get_unaligned_le64(p + 8));
+ crc = __crc32d(crc, get_unaligned_le64(p + 16));
+ crc = __crc32d(crc, get_unaligned_le64(p + 24));
+ p += 32;
+ len -= 32;
+ }
+ }
+ if (len & 16) {
+ crc = __crc32d(crc, get_unaligned_le64(p + 0));
+ crc = __crc32d(crc, get_unaligned_le64(p + 8));
+ p += 16;
+ }
+ if (len & 8) {
+ crc = __crc32d(crc, get_unaligned_le64(p));
+ p += 8;
+ }
+ if (len & 4) {
+ crc = __crc32w(crc, get_unaligned_le32(p));
+ p += 4;
+ }
+ if (len & 2) {
+ crc = __crc32h(crc, get_unaligned_le16(p));
+ p += 2;
+ }
+ if (len & 1)
+ crc = __crc32b(crc, *p);
+ return crc;
+}
+#undef ATTRIBUTES
+#endif /* crc32_arm_crc_pmullcombine() */
+
+/*
+ * crc32_arm_pmullx4() - implementation using "folding" with pmull instructions
+ *
+ * This implementation is intended for CPUs that support pmull instructions but
+ * not crc32 instructions.
+ */
+#if HAVE_PMULL_INTRIN
+# define crc32_arm_pmullx4 crc32_arm_pmullx4
+# define SUFFIX _pmullx4
+# if HAVE_PMULL_NATIVE
+# define ATTRIBUTES
+# else
+# ifdef ARCH_ARM32
+# define ATTRIBUTES _target_attribute("fpu=crypto-neon-fp-armv8")
+# else
+# ifdef __clang__
+# define ATTRIBUTES _target_attribute("crypto")
+# else
+# define ATTRIBUTES _target_attribute("+crypto")
+# endif
+# endif
+# endif
+# define ENABLE_EOR3 0
+# include "crc32_pmull_helpers.h"
+
+static u32 ATTRIBUTES MAYBE_UNUSED
+crc32_arm_pmullx4(u32 crc, const u8 *p, size_t len)
+{
+ static const u64 _aligned_attribute(16) mults[3][2] = {
+ CRC32_1VECS_MULTS,
+ CRC32_4VECS_MULTS,
+ CRC32_2VECS_MULTS,
+ };
+ static const u64 _aligned_attribute(16) final_mults[3][2] = {
+ { CRC32_FINAL_MULT, 0 },
+ { CRC32_BARRETT_CONSTANT_1, 0 },
+ { CRC32_BARRETT_CONSTANT_2, 0 },
+ };
+ const uint8x16_t zeroes = vdupq_n_u8(0);
+ const uint8x16_t mask32 = vreinterpretq_u8_u64(vdupq_n_u64(0xFFFFFFFF));
+ const poly64x2_t multipliers_1 = load_multipliers(mults[0]);
+ uint8x16_t v0, v1, v2, v3;
+
+ if (len < 64 + 15) {
+ if (len < 16)
+ return crc32_slice1(crc, p, len);
+ v0 = veorq_u8(vld1q_u8(p), u32_to_bytevec(crc));
+ p += 16;
+ len -= 16;
+ while (len >= 16) {
+ v0 = fold_vec(v0, vld1q_u8(p), multipliers_1);
+ p += 16;
+ len -= 16;
+ }
+ } else {
+ const poly64x2_t multipliers_4 = load_multipliers(mults[1]);
+ const poly64x2_t multipliers_2 = load_multipliers(mults[2]);
+ const size_t align = -(uintptr_t)p & 15;
+ const uint8x16_t *vp;
+
+ v0 = veorq_u8(vld1q_u8(p), u32_to_bytevec(crc));
+ p += 16;
+ /* Align p to the next 16-byte boundary. */
+ if (align) {
+ v0 = fold_partial_vec(v0, p, align, multipliers_1);
+ p += align;
+ len -= align;
+ }
+ vp = (const uint8x16_t *)p;
+ v1 = *vp++;
+ v2 = *vp++;
+ v3 = *vp++;
+ while (len >= 64 + 64) {
+ v0 = fold_vec(v0, *vp++, multipliers_4);
+ v1 = fold_vec(v1, *vp++, multipliers_4);
+ v2 = fold_vec(v2, *vp++, multipliers_4);
+ v3 = fold_vec(v3, *vp++, multipliers_4);
+ len -= 64;
+ }
+ v0 = fold_vec(v0, v2, multipliers_2);
+ v1 = fold_vec(v1, v3, multipliers_2);
+ if (len & 32) {
+ v0 = fold_vec(v0, *vp++, multipliers_2);
+ v1 = fold_vec(v1, *vp++, multipliers_2);
+ }
+ v0 = fold_vec(v0, v1, multipliers_1);
+ if (len & 16)
+ v0 = fold_vec(v0, *vp++, multipliers_1);
+ p = (const u8 *)vp;
+ len &= 15;
+ }
+
+ /* Handle any remaining partial block now before reducing to 32 bits. */
+ if (len)
+ v0 = fold_partial_vec(v0, p, len, multipliers_1);
+
+ /*
+ * Fold 128 => 96 bits. This also implicitly appends 32 zero bits,
+ * which is equivalent to multiplying by x^32. This is needed because
+ * the CRC is defined as M(x)*x^32 mod G(x), not just M(x) mod G(x).
+ */
+
+ v0 = veorq_u8(vextq_u8(v0, zeroes, 8),
+ clmul_high(vextq_u8(zeroes, v0, 8), multipliers_1));
+
+ /* Fold 96 => 64 bits. */
+ v0 = veorq_u8(vextq_u8(v0, zeroes, 4),
+ clmul_low(vandq_u8(v0, mask32),
+ load_multipliers(final_mults[0])));
+
+ /* Reduce 64 => 32 bits using Barrett reduction. */
+ v1 = clmul_low(vandq_u8(v0, mask32), load_multipliers(final_mults[1]));
+ v1 = clmul_low(vandq_u8(v1, mask32), load_multipliers(final_mults[2]));
+ return vgetq_lane_u32(vreinterpretq_u32_u8(veorq_u8(v0, v1)), 1);
+}
+#undef SUFFIX
+#undef ATTRIBUTES
+#undef ENABLE_EOR3
+#endif /* crc32_arm_pmullx4() */
+
+/*
+ * crc32_arm_pmullx12_crc() - large-stride implementation using "folding" with
+ * pmull instructions, where crc32 instructions are also available
+ *
+ * See crc32_pmull_wide.h for explanation.
+ */
+#if defined(ARCH_ARM64) && HAVE_PMULL_INTRIN && HAVE_CRC32_INTRIN
+# define crc32_arm_pmullx12_crc crc32_arm_pmullx12_crc
+# define SUFFIX _pmullx12_crc
+# if HAVE_PMULL_NATIVE && HAVE_CRC32_NATIVE
+# define ATTRIBUTES
+# else
+# ifdef __clang__
+# define ATTRIBUTES _target_attribute("crypto,crc")
+# else
+# define ATTRIBUTES _target_attribute("+crypto,+crc")
+# endif
+# endif
+# define ENABLE_EOR3 0
+# include "crc32_pmull_wide.h"
+#endif
+
+/*
+ * crc32_arm_pmullx12_crc_eor3()
+ *
+ * This like crc32_arm_pmullx12_crc(), but it adds the eor3 instruction (from
+ * the sha3 extension) for even better performance.
+ *
+ * Note: we require HAVE_SHA3_TARGET (or HAVE_SHA3_NATIVE) rather than
+ * HAVE_SHA3_INTRIN, as we have an inline asm fallback for eor3.
+ */
+#if defined(ARCH_ARM64) && HAVE_PMULL_INTRIN && HAVE_CRC32_INTRIN && \
+ (HAVE_SHA3_TARGET || HAVE_SHA3_NATIVE)
+# define crc32_arm_pmullx12_crc_eor3 crc32_arm_pmullx12_crc_eor3
+# define SUFFIX _pmullx12_crc_eor3
+# if HAVE_PMULL_NATIVE && HAVE_CRC32_NATIVE && HAVE_SHA3_NATIVE
+# define ATTRIBUTES
+# else
+# ifdef __clang__
+# define ATTRIBUTES _target_attribute("crypto,crc,sha3")
+ /*
+ * With gcc, arch=armv8.2-a is needed for the sha3 intrinsics, unless the
+ * default target is armv8.3-a or later in which case it must be omitted.
+ * armv8.3-a or later can be detected by checking for __ARM_FEATURE_JCVT.
+ */
+# elif defined(__ARM_FEATURE_JCVT)
+# define ATTRIBUTES _target_attribute("+crypto,+crc,+sha3")
+# else
+# define ATTRIBUTES _target_attribute("arch=armv8.2-a+crypto+crc+sha3")
+# endif
+# endif
+# define ENABLE_EOR3 1
+# include "crc32_pmull_wide.h"
+#endif
+
+/*
+ * On the Apple M1 processor, crc32 instructions max out at about 25.5 GB/s in
+ * the best case of using a 3-way or greater interleaved chunked implementation,
+ * whereas a pmull-based implementation achieves 68 GB/s provided that the
+ * stride length is large enough (about 10+ vectors with eor3, or 12+ without).
+ *
+ * For now we assume that crc32 instructions are preferable in other cases.
+ */
+#define PREFER_PMULL_TO_CRC 0
+#ifdef __APPLE__
+# include
+# if TARGET_OS_OSX
+# undef PREFER_PMULL_TO_CRC
+# define PREFER_PMULL_TO_CRC 1
+# endif
+#endif
+
+/*
+ * If the best implementation is statically available, use it unconditionally.
+ * Otherwise choose the best implementation at runtime.
+ */
+#if PREFER_PMULL_TO_CRC && defined(crc32_arm_pmullx12_crc_eor3) && \
+ HAVE_PMULL_NATIVE && HAVE_CRC32_NATIVE && HAVE_SHA3_NATIVE
+# define DEFAULT_IMPL crc32_arm_pmullx12_crc_eor3
+#elif !PREFER_PMULL_TO_CRC && defined(crc32_arm_crc_pmullcombine) && \
+ HAVE_CRC32_NATIVE && HAVE_PMULL_NATIVE
+# define DEFAULT_IMPL crc32_arm_crc_pmullcombine
+#else
+static inline crc32_func_t
+arch_select_crc32_func(void)
+{
+ const u32 features MAYBE_UNUSED = get_arm_cpu_features();
+
+#if PREFER_PMULL_TO_CRC && defined(crc32_arm_pmullx12_crc_eor3)
+ if (HAVE_PMULL(features) && HAVE_CRC32(features) && HAVE_SHA3(features))
+ return crc32_arm_pmullx12_crc_eor3;
+#endif
+#if PREFER_PMULL_TO_CRC && defined(crc32_arm_pmullx12_crc)
+ if (HAVE_PMULL(features) && HAVE_CRC32(features))
+ return crc32_arm_pmullx12_crc;
+#endif
+#ifdef crc32_arm_crc_pmullcombine
+ if (HAVE_CRC32(features) && HAVE_PMULL(features))
+ return crc32_arm_crc_pmullcombine;
+#endif
+#ifdef crc32_arm_crc
+ if (HAVE_CRC32(features))
+ return crc32_arm_crc;
+#endif
+#ifdef crc32_arm_pmullx4
+ if (HAVE_PMULL(features))
+ return crc32_arm_pmullx4;
+#endif
+ return NULL;
+}
+#define arch_select_crc32_func arch_select_crc32_func
+#endif
+
+#endif /* LIB_ARM_CRC32_IMPL_H */
diff --git a/tools/z64compress/src/enc/libdeflate/lib/arm/crc32_pmull_helpers.h b/tools/z64compress/src/enc/libdeflate/lib/arm/crc32_pmull_helpers.h
new file mode 100644
index 000000000..1cd1cc188
--- /dev/null
+++ b/tools/z64compress/src/enc/libdeflate/lib/arm/crc32_pmull_helpers.h
@@ -0,0 +1,184 @@
+/*
+ * arm/crc32_pmull_helpers.h - helper functions for CRC-32 folding with PMULL
+ *
+ * Copyright 2022 Eric Biggers
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * This file is a "template" for instantiating helper functions for CRC folding
+ * with pmull instructions. It accepts the following parameters:
+ *
+ * SUFFIX:
+ * Name suffix to append to all instantiated functions.
+ * ATTRIBUTES:
+ * Target function attributes to use.
+ * ENABLE_EOR3:
+ * Use the eor3 instruction (from the sha3 extension).
+ */
+
+#include
+
+/* Create a vector with 'a' in the first 4 bytes, and the rest zeroed out. */
+#undef u32_to_bytevec
+static forceinline ATTRIBUTES uint8x16_t
+ADD_SUFFIX(u32_to_bytevec)(u32 a)
+{
+ return vreinterpretq_u8_u32(vsetq_lane_u32(a, vdupq_n_u32(0), 0));
+}
+#define u32_to_bytevec ADD_SUFFIX(u32_to_bytevec)
+
+/* Load two 64-bit values into a vector. */
+#undef load_multipliers
+static forceinline ATTRIBUTES poly64x2_t
+ADD_SUFFIX(load_multipliers)(const u64 p[2])
+{
+ return vreinterpretq_p64_u64(vld1q_u64(p));
+}
+#define load_multipliers ADD_SUFFIX(load_multipliers)
+
+/* Do carryless multiplication of the low halves of two vectors. */
+#undef clmul_low
+static forceinline ATTRIBUTES uint8x16_t
+ADD_SUFFIX(clmul_low)(uint8x16_t a, poly64x2_t b)
+{
+ return vreinterpretq_u8_p128(
+ compat_vmull_p64(vgetq_lane_p64(vreinterpretq_p64_u8(a), 0),
+ vgetq_lane_p64(b, 0)));
+}
+#define clmul_low ADD_SUFFIX(clmul_low)
+
+/* Do carryless multiplication of the high halves of two vectors. */
+#undef clmul_high
+static forceinline ATTRIBUTES uint8x16_t
+ADD_SUFFIX(clmul_high)(uint8x16_t a, poly64x2_t b)
+{
+#if defined(__clang__) && defined(ARCH_ARM64)
+ /*
+ * Use inline asm to ensure that pmull2 is really used. This works
+ * around clang bug https://github.com/llvm/llvm-project/issues/52868.
+ */
+ uint8x16_t res;
+
+ __asm__("pmull2 %0.1q, %1.2d, %2.2d" : "=w" (res) : "w" (a), "w" (b));
+ return res;
+#else
+ return vreinterpretq_u8_p128(vmull_high_p64(vreinterpretq_p64_u8(a), b));
+#endif
+}
+#define clmul_high ADD_SUFFIX(clmul_high)
+
+#undef eor3
+static forceinline ATTRIBUTES uint8x16_t
+ADD_SUFFIX(eor3)(uint8x16_t a, uint8x16_t b, uint8x16_t c)
+{
+#if ENABLE_EOR3
+#if HAVE_SHA3_INTRIN
+ return veor3q_u8(a, b, c);
+#else
+ uint8x16_t res;
+
+ __asm__("eor3 %0.16b, %1.16b, %2.16b, %3.16b"
+ : "=w" (res) : "w" (a), "w" (b), "w" (c));
+ return res;
+#endif
+#else /* ENABLE_EOR3 */
+ return veorq_u8(veorq_u8(a, b), c);
+#endif /* !ENABLE_EOR3 */
+}
+#define eor3 ADD_SUFFIX(eor3)
+
+#undef fold_vec
+static forceinline ATTRIBUTES uint8x16_t
+ADD_SUFFIX(fold_vec)(uint8x16_t src, uint8x16_t dst, poly64x2_t multipliers)
+{
+ uint8x16_t a = clmul_low(src, multipliers);
+ uint8x16_t b = clmul_high(src, multipliers);
+
+ return eor3(a, b, dst);
+}
+#define fold_vec ADD_SUFFIX(fold_vec)
+
+#undef vtbl
+static forceinline ATTRIBUTES uint8x16_t
+ADD_SUFFIX(vtbl)(uint8x16_t table, uint8x16_t indices)
+{
+#ifdef ARCH_ARM64
+ return vqtbl1q_u8(table, indices);
+#else
+ uint8x8x2_t tab2;
+
+ tab2.val[0] = vget_low_u8(table);
+ tab2.val[1] = vget_high_u8(table);
+
+ return vcombine_u8(vtbl2_u8(tab2, vget_low_u8(indices)),
+ vtbl2_u8(tab2, vget_high_u8(indices)));
+#endif
+}
+#define vtbl ADD_SUFFIX(vtbl)
+
+/*
+ * Given v containing a 16-byte polynomial, and a pointer 'p' that points to the
+ * next '1 <= len <= 15' data bytes, rearrange the concatenation of v and the
+ * data into vectors x0 and x1 that contain 'len' bytes and 16 bytes,
+ * respectively. Then fold x0 into x1 and return the result. Assumes that
+ * 'p + len - 16' is in-bounds.
+ */
+#undef fold_partial_vec
+static forceinline ATTRIBUTES MAYBE_UNUSED uint8x16_t
+ADD_SUFFIX(fold_partial_vec)(uint8x16_t v, const u8 *p, size_t len,
+ poly64x2_t multipliers_1)
+{
+ /*
+ * vtbl(v, shift_tab[len..len+15]) left shifts v by 16-len bytes.
+ * vtbl(v, shift_tab[len+16..len+31]) right shifts v by len bytes.
+ */
+ static const u8 shift_tab[48] = {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ };
+ const uint8x16_t lshift = vld1q_u8(&shift_tab[len]);
+ const uint8x16_t rshift = vld1q_u8(&shift_tab[len + 16]);
+ uint8x16_t x0, x1, bsl_mask;
+
+ /* x0 = v left-shifted by '16 - len' bytes */
+ x0 = vtbl(v, lshift);
+
+ /* Create a vector of '16 - len' 0x00 bytes, then 'len' 0xff bytes. */
+ bsl_mask = vreinterpretq_u8_s8(
+ vshrq_n_s8(vreinterpretq_s8_u8(rshift), 7));
+
+ /*
+ * x1 = the last '16 - len' bytes from v (i.e. v right-shifted by 'len'
+ * bytes) followed by the remaining data.
+ */
+ x1 = vbslq_u8(bsl_mask /* 0 bits select from arg3, 1 bits from arg2 */,
+ vld1q_u8(p + len - 16), vtbl(v, rshift));
+
+ return fold_vec(x0, x1, multipliers_1);
+}
+#define fold_partial_vec ADD_SUFFIX(fold_partial_vec)
diff --git a/tools/z64compress/src/enc/libdeflate/lib/arm/crc32_pmull_wide.h b/tools/z64compress/src/enc/libdeflate/lib/arm/crc32_pmull_wide.h
new file mode 100644
index 000000000..a72e1d876
--- /dev/null
+++ b/tools/z64compress/src/enc/libdeflate/lib/arm/crc32_pmull_wide.h
@@ -0,0 +1,227 @@
+/*
+ * arm/crc32_pmull_wide.h - gzip CRC-32 with PMULL (extra-wide version)
+ *
+ * Copyright 2022 Eric Biggers
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * This file is a "template" for instantiating PMULL-based crc32_arm functions.
+ * The "parameters" are:
+ *
+ * SUFFIX:
+ * Name suffix to append to all instantiated functions.
+ * ATTRIBUTES:
+ * Target function attributes to use.
+ * ENABLE_EOR3:
+ * Use the eor3 instruction (from the sha3 extension).
+ *
+ * This is the extra-wide version; it uses an unusually large stride length of
+ * 12, and it assumes that crc32 instructions are available too. It's intended
+ * for powerful CPUs that support both pmull and crc32 instructions, but where
+ * throughput of pmull and xor (given enough instructions issued in parallel) is
+ * significantly higher than that of crc32, thus making the crc32 instructions
+ * (counterintuitively) not actually the fastest way to compute the CRC-32. The
+ * Apple M1 processor is an example of such a CPU.
+ */
+
+#ifndef _MSC_VER
+# include
+#endif
+#include
+
+#include "crc32_pmull_helpers.h"
+
+static u32 ATTRIBUTES MAYBE_UNUSED
+ADD_SUFFIX(crc32_arm)(u32 crc, const u8 *p, size_t len)
+{
+ uint8x16_t v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11;
+
+ if (len < 3 * 192) {
+ static const u64 _aligned_attribute(16) mults[3][2] = {
+ CRC32_4VECS_MULTS, CRC32_2VECS_MULTS, CRC32_1VECS_MULTS,
+ };
+ poly64x2_t multipliers_4, multipliers_2, multipliers_1;
+
+ if (len < 64)
+ goto tail;
+ multipliers_4 = load_multipliers(mults[0]);
+ multipliers_2 = load_multipliers(mults[1]);
+ multipliers_1 = load_multipliers(mults[2]);
+ /*
+ * Short length; don't bother aligning the pointer, and fold
+ * 64 bytes (4 vectors) at a time, at most.
+ */
+ v0 = veorq_u8(vld1q_u8(p + 0), u32_to_bytevec(crc));
+ v1 = vld1q_u8(p + 16);
+ v2 = vld1q_u8(p + 32);
+ v3 = vld1q_u8(p + 48);
+ p += 64;
+ len -= 64;
+ while (len >= 64) {
+ v0 = fold_vec(v0, vld1q_u8(p + 0), multipliers_4);
+ v1 = fold_vec(v1, vld1q_u8(p + 16), multipliers_4);
+ v2 = fold_vec(v2, vld1q_u8(p + 32), multipliers_4);
+ v3 = fold_vec(v3, vld1q_u8(p + 48), multipliers_4);
+ p += 64;
+ len -= 64;
+ }
+ v0 = fold_vec(v0, v2, multipliers_2);
+ v1 = fold_vec(v1, v3, multipliers_2);
+ if (len >= 32) {
+ v0 = fold_vec(v0, vld1q_u8(p + 0), multipliers_2);
+ v1 = fold_vec(v1, vld1q_u8(p + 16), multipliers_2);
+ p += 32;
+ len -= 32;
+ }
+ v0 = fold_vec(v0, v1, multipliers_1);
+ } else {
+ static const u64 _aligned_attribute(16) mults[4][2] = {
+ CRC32_12VECS_MULTS, CRC32_6VECS_MULTS,
+ CRC32_3VECS_MULTS, CRC32_1VECS_MULTS,
+ };
+ const poly64x2_t multipliers_12 = load_multipliers(mults[0]);
+ const poly64x2_t multipliers_6 = load_multipliers(mults[1]);
+ const poly64x2_t multipliers_3 = load_multipliers(mults[2]);
+ const poly64x2_t multipliers_1 = load_multipliers(mults[3]);
+ const size_t align = -(uintptr_t)p & 15;
+ const uint8x16_t *vp;
+
+ /* Align p to the next 16-byte boundary. */
+ if (align) {
+ if (align & 1)
+ crc = __crc32b(crc, *p++);
+ if (align & 2) {
+ crc = __crc32h(crc, le16_bswap(*(u16 *)p));
+ p += 2;
+ }
+ if (align & 4) {
+ crc = __crc32w(crc, le32_bswap(*(u32 *)p));
+ p += 4;
+ }
+ if (align & 8) {
+ crc = __crc32d(crc, le64_bswap(*(u64 *)p));
+ p += 8;
+ }
+ len -= align;
+ }
+ vp = (const uint8x16_t *)p;
+ v0 = veorq_u8(*vp++, u32_to_bytevec(crc));
+ v1 = *vp++;
+ v2 = *vp++;
+ v3 = *vp++;
+ v4 = *vp++;
+ v5 = *vp++;
+ v6 = *vp++;
+ v7 = *vp++;
+ v8 = *vp++;
+ v9 = *vp++;
+ v10 = *vp++;
+ v11 = *vp++;
+ len -= 192;
+ /* Fold 192 bytes (12 vectors) at a time. */
+ do {
+ v0 = fold_vec(v0, *vp++, multipliers_12);
+ v1 = fold_vec(v1, *vp++, multipliers_12);
+ v2 = fold_vec(v2, *vp++, multipliers_12);
+ v3 = fold_vec(v3, *vp++, multipliers_12);
+ v4 = fold_vec(v4, *vp++, multipliers_12);
+ v5 = fold_vec(v5, *vp++, multipliers_12);
+ v6 = fold_vec(v6, *vp++, multipliers_12);
+ v7 = fold_vec(v7, *vp++, multipliers_12);
+ v8 = fold_vec(v8, *vp++, multipliers_12);
+ v9 = fold_vec(v9, *vp++, multipliers_12);
+ v10 = fold_vec(v10, *vp++, multipliers_12);
+ v11 = fold_vec(v11, *vp++, multipliers_12);
+ len -= 192;
+ } while (len >= 192);
+
+ /*
+ * Fewer than 192 bytes left. Fold v0-v11 down to just v0,
+ * while processing up to 144 more bytes.
+ */
+ v0 = fold_vec(v0, v6, multipliers_6);
+ v1 = fold_vec(v1, v7, multipliers_6);
+ v2 = fold_vec(v2, v8, multipliers_6);
+ v3 = fold_vec(v3, v9, multipliers_6);
+ v4 = fold_vec(v4, v10, multipliers_6);
+ v5 = fold_vec(v5, v11, multipliers_6);
+ if (len >= 96) {
+ v0 = fold_vec(v0, *vp++, multipliers_6);
+ v1 = fold_vec(v1, *vp++, multipliers_6);
+ v2 = fold_vec(v2, *vp++, multipliers_6);
+ v3 = fold_vec(v3, *vp++, multipliers_6);
+ v4 = fold_vec(v4, *vp++, multipliers_6);
+ v5 = fold_vec(v5, *vp++, multipliers_6);
+ len -= 96;
+ }
+ v0 = fold_vec(v0, v3, multipliers_3);
+ v1 = fold_vec(v1, v4, multipliers_3);
+ v2 = fold_vec(v2, v5, multipliers_3);
+ if (len >= 48) {
+ v0 = fold_vec(v0, *vp++, multipliers_3);
+ v1 = fold_vec(v1, *vp++, multipliers_3);
+ v2 = fold_vec(v2, *vp++, multipliers_3);
+ len -= 48;
+ }
+ v0 = fold_vec(v0, v1, multipliers_1);
+ v0 = fold_vec(v0, v2, multipliers_1);
+ p = (const u8 *)vp;
+ }
+ /* Reduce 128 to 32 bits using crc32 instructions. */
+ crc = __crc32d(0, vgetq_lane_u64(vreinterpretq_u64_u8(v0), 0));
+ crc = __crc32d(crc, vgetq_lane_u64(vreinterpretq_u64_u8(v0), 1));
+tail:
+ /* Finish up the remainder using crc32 instructions. */
+ if (len & 32) {
+ crc = __crc32d(crc, get_unaligned_le64(p + 0));
+ crc = __crc32d(crc, get_unaligned_le64(p + 8));
+ crc = __crc32d(crc, get_unaligned_le64(p + 16));
+ crc = __crc32d(crc, get_unaligned_le64(p + 24));
+ p += 32;
+ }
+ if (len & 16) {
+ crc = __crc32d(crc, get_unaligned_le64(p + 0));
+ crc = __crc32d(crc, get_unaligned_le64(p + 8));
+ p += 16;
+ }
+ if (len & 8) {
+ crc = __crc32d(crc, get_unaligned_le64(p));
+ p += 8;
+ }
+ if (len & 4) {
+ crc = __crc32w(crc, get_unaligned_le32(p));
+ p += 4;
+ }
+ if (len & 2) {
+ crc = __crc32h(crc, get_unaligned_le16(p));
+ p += 2;
+ }
+ if (len & 1)
+ crc = __crc32b(crc, *p);
+ return crc;
+}
+
+#undef SUFFIX
+#undef ATTRIBUTES
+#undef ENABLE_EOR3
diff --git a/tools/z64compress/src/enc/libdeflate/lib/arm/matchfinder_impl.h b/tools/z64compress/src/enc/libdeflate/lib/arm/matchfinder_impl.h
new file mode 100644
index 000000000..b20f56a3b
--- /dev/null
+++ b/tools/z64compress/src/enc/libdeflate/lib/arm/matchfinder_impl.h
@@ -0,0 +1,79 @@
+/*
+ * arm/matchfinder_impl.h - ARM implementations of matchfinder functions
+ *
+ * Copyright 2016 Eric Biggers
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef LIB_ARM_MATCHFINDER_IMPL_H
+#define LIB_ARM_MATCHFINDER_IMPL_H
+
+#include "cpu_features.h"
+
+#if HAVE_NEON_NATIVE
+# include
+static forceinline void
+matchfinder_init_neon(mf_pos_t *data, size_t size)
+{
+ int16x8_t *p = (int16x8_t *)data;
+ int16x8_t v = vdupq_n_s16(MATCHFINDER_INITVAL);
+
+ STATIC_ASSERT(MATCHFINDER_MEM_ALIGNMENT % sizeof(*p) == 0);
+ STATIC_ASSERT(MATCHFINDER_SIZE_ALIGNMENT % (4 * sizeof(*p)) == 0);
+ STATIC_ASSERT(sizeof(mf_pos_t) == 2);
+
+ do {
+ p[0] = v;
+ p[1] = v;
+ p[2] = v;
+ p[3] = v;
+ p += 4;
+ size -= 4 * sizeof(*p);
+ } while (size != 0);
+}
+#define matchfinder_init matchfinder_init_neon
+
+static forceinline void
+matchfinder_rebase_neon(mf_pos_t *data, size_t size)
+{
+ int16x8_t *p = (int16x8_t *)data;
+ int16x8_t v = vdupq_n_s16((u16)-MATCHFINDER_WINDOW_SIZE);
+
+ STATIC_ASSERT(MATCHFINDER_MEM_ALIGNMENT % sizeof(*p) == 0);
+ STATIC_ASSERT(MATCHFINDER_SIZE_ALIGNMENT % (4 * sizeof(*p)) == 0);
+ STATIC_ASSERT(sizeof(mf_pos_t) == 2);
+
+ do {
+ p[0] = vqaddq_s16(p[0], v);
+ p[1] = vqaddq_s16(p[1], v);
+ p[2] = vqaddq_s16(p[2], v);
+ p[3] = vqaddq_s16(p[3], v);
+ p += 4;
+ size -= 4 * sizeof(*p);
+ } while (size != 0);
+}
+#define matchfinder_rebase matchfinder_rebase_neon
+
+#endif /* HAVE_NEON_NATIVE */
+
+#endif /* LIB_ARM_MATCHFINDER_IMPL_H */
diff --git a/tools/z64compress/src/enc/libdeflate/lib/bt_matchfinder.h b/tools/z64compress/src/enc/libdeflate/lib/bt_matchfinder.h
new file mode 100644
index 000000000..b247d4bcc
--- /dev/null
+++ b/tools/z64compress/src/enc/libdeflate/lib/bt_matchfinder.h
@@ -0,0 +1,342 @@
+/*
+ * bt_matchfinder.h - Lempel-Ziv matchfinding with a hash table of binary trees
+ *
+ * Copyright 2016 Eric Biggers
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ----------------------------------------------------------------------------
+ *
+ * This is a Binary Trees (bt) based matchfinder.
+ *
+ * The main data structure is a hash table where each hash bucket contains a
+ * binary tree of sequences whose first 4 bytes share the same hash code. Each
+ * sequence is identified by its starting position in the input buffer. Each
+ * binary tree is always sorted such that each left child represents a sequence
+ * lexicographically lesser than its parent and each right child represents a
+ * sequence lexicographically greater than its parent.
+ *
+ * The algorithm processes the input buffer sequentially. At each byte
+ * position, the hash code of the first 4 bytes of the sequence beginning at
+ * that position (the sequence being matched against) is computed. This
+ * identifies the hash bucket to use for that position. Then, a new binary tree
+ * node is created to represent the current sequence. Then, in a single tree
+ * traversal, the hash bucket's binary tree is searched for matches and is
+ * re-rooted at the new node.
+ *
+ * Compared to the simpler algorithm that uses linked lists instead of binary
+ * trees (see hc_matchfinder.h), the binary tree version gains more information
+ * at each node visitation. Ideally, the binary tree version will examine only
+ * 'log(n)' nodes to find the same matches that the linked list version will
+ * find by examining 'n' nodes. In addition, the binary tree version can
+ * examine fewer bytes at each node by taking advantage of the common prefixes
+ * that result from the sort order, whereas the linked list version may have to
+ * examine up to the full length of the match at each node.
+ *
+ * However, it is not always best to use the binary tree version. It requires
+ * nearly twice as much memory as the linked list version, and it takes time to
+ * keep the binary trees sorted, even at positions where the compressor does not
+ * need matches. Generally, when doing fast compression on small buffers,
+ * binary trees are the wrong approach. They are best suited for thorough
+ * compression and/or large buffers.
+ *
+ * ----------------------------------------------------------------------------
+ */
+
+#ifndef LIB_BT_MATCHFINDER_H
+#define LIB_BT_MATCHFINDER_H
+
+#include "matchfinder_common.h"
+
+#define BT_MATCHFINDER_HASH3_ORDER 16
+#define BT_MATCHFINDER_HASH3_WAYS 2
+#define BT_MATCHFINDER_HASH4_ORDER 16
+
+#define BT_MATCHFINDER_TOTAL_HASH_SIZE \
+ (((1UL << BT_MATCHFINDER_HASH3_ORDER) * BT_MATCHFINDER_HASH3_WAYS + \
+ (1UL << BT_MATCHFINDER_HASH4_ORDER)) * sizeof(mf_pos_t))
+
+/* Representation of a match found by the bt_matchfinder */
+struct lz_match {
+
+ /* The number of bytes matched. */
+ u16 length;
+
+ /* The offset back from the current position that was matched. */
+ u16 offset;
+};
+
+struct MATCHFINDER_ALIGNED bt_matchfinder {
+
+ /* The hash table for finding length 3 matches */
+ mf_pos_t hash3_tab[1UL << BT_MATCHFINDER_HASH3_ORDER][BT_MATCHFINDER_HASH3_WAYS];
+
+ /* The hash table which contains the roots of the binary trees for
+ * finding length 4+ matches */
+ mf_pos_t hash4_tab[1UL << BT_MATCHFINDER_HASH4_ORDER];
+
+ /* The child node references for the binary trees. The left and right
+ * children of the node for the sequence with position 'pos' are
+ * 'child_tab[pos * 2]' and 'child_tab[pos * 2 + 1]', respectively. */
+ mf_pos_t child_tab[2UL * MATCHFINDER_WINDOW_SIZE];
+};
+
+/* Prepare the matchfinder for a new input buffer. */
+static forceinline void
+bt_matchfinder_init(struct bt_matchfinder *mf)
+{
+ STATIC_ASSERT(BT_MATCHFINDER_TOTAL_HASH_SIZE %
+ MATCHFINDER_SIZE_ALIGNMENT == 0);
+
+ matchfinder_init((mf_pos_t *)mf, BT_MATCHFINDER_TOTAL_HASH_SIZE);
+}
+
+static forceinline void
+bt_matchfinder_slide_window(struct bt_matchfinder *mf)
+{
+ STATIC_ASSERT(sizeof(*mf) % MATCHFINDER_SIZE_ALIGNMENT == 0);
+
+ matchfinder_rebase((mf_pos_t *)mf, sizeof(*mf));
+}
+
+static forceinline mf_pos_t *
+bt_left_child(struct bt_matchfinder *mf, s32 node)
+{
+ return &mf->child_tab[2 * (node & (MATCHFINDER_WINDOW_SIZE - 1)) + 0];
+}
+
+static forceinline mf_pos_t *
+bt_right_child(struct bt_matchfinder *mf, s32 node)
+{
+ return &mf->child_tab[2 * (node & (MATCHFINDER_WINDOW_SIZE - 1)) + 1];
+}
+
+/* The minimum permissible value of 'max_len' for bt_matchfinder_get_matches()
+ * and bt_matchfinder_skip_byte(). There must be sufficiently many bytes
+ * remaining to load a 32-bit integer from the *next* position. */
+#define BT_MATCHFINDER_REQUIRED_NBYTES 5
+
+/* Advance the binary tree matchfinder by one byte, optionally recording
+ * matches. @record_matches should be a compile-time constant. */
+static forceinline struct lz_match *
+bt_matchfinder_advance_one_byte(struct bt_matchfinder * const mf,
+ const u8 * const in_base,
+ const ptrdiff_t cur_pos,
+ const u32 max_len,
+ const u32 nice_len,
+ const u32 max_search_depth,
+ u32 * const next_hashes,
+ struct lz_match *lz_matchptr,
+ const bool record_matches)
+{
+ const u8 *in_next = in_base + cur_pos;
+ u32 depth_remaining = max_search_depth;
+ const s32 cutoff = cur_pos - MATCHFINDER_WINDOW_SIZE;
+ u32 next_hashseq;
+ u32 hash3;
+ u32 hash4;
+ s32 cur_node;
+#if BT_MATCHFINDER_HASH3_WAYS >= 2
+ s32 cur_node_2;
+#endif
+ const u8 *matchptr;
+ mf_pos_t *pending_lt_ptr, *pending_gt_ptr;
+ u32 best_lt_len, best_gt_len;
+ u32 len;
+ u32 best_len = 3;
+
+ STATIC_ASSERT(BT_MATCHFINDER_HASH3_WAYS >= 1 &&
+ BT_MATCHFINDER_HASH3_WAYS <= 2);
+
+ next_hashseq = get_unaligned_le32(in_next + 1);
+
+ hash3 = next_hashes[0];
+ hash4 = next_hashes[1];
+
+ next_hashes[0] = lz_hash(next_hashseq & 0xFFFFFF, BT_MATCHFINDER_HASH3_ORDER);
+ next_hashes[1] = lz_hash(next_hashseq, BT_MATCHFINDER_HASH4_ORDER);
+ prefetchw(&mf->hash3_tab[next_hashes[0]]);
+ prefetchw(&mf->hash4_tab[next_hashes[1]]);
+
+ cur_node = mf->hash3_tab[hash3][0];
+ mf->hash3_tab[hash3][0] = cur_pos;
+#if BT_MATCHFINDER_HASH3_WAYS >= 2
+ cur_node_2 = mf->hash3_tab[hash3][1];
+ mf->hash3_tab[hash3][1] = cur_node;
+#endif
+ if (record_matches && cur_node > cutoff) {
+ u32 seq3 = load_u24_unaligned(in_next);
+ if (seq3 == load_u24_unaligned(&in_base[cur_node])) {
+ lz_matchptr->length = 3;
+ lz_matchptr->offset = in_next - &in_base[cur_node];
+ lz_matchptr++;
+ }
+ #if BT_MATCHFINDER_HASH3_WAYS >= 2
+ else if (cur_node_2 > cutoff &&
+ seq3 == load_u24_unaligned(&in_base[cur_node_2]))
+ {
+ lz_matchptr->length = 3;
+ lz_matchptr->offset = in_next - &in_base[cur_node_2];
+ lz_matchptr++;
+ }
+ #endif
+ }
+
+ cur_node = mf->hash4_tab[hash4];
+ mf->hash4_tab[hash4] = cur_pos;
+
+ pending_lt_ptr = bt_left_child(mf, cur_pos);
+ pending_gt_ptr = bt_right_child(mf, cur_pos);
+
+ if (cur_node <= cutoff) {
+ *pending_lt_ptr = MATCHFINDER_INITVAL;
+ *pending_gt_ptr = MATCHFINDER_INITVAL;
+ return lz_matchptr;
+ }
+
+ best_lt_len = 0;
+ best_gt_len = 0;
+ len = 0;
+
+ for (;;) {
+ matchptr = &in_base[cur_node];
+
+ if (matchptr[len] == in_next[len]) {
+ len = lz_extend(in_next, matchptr, len + 1, max_len);
+ if (!record_matches || len > best_len) {
+ if (record_matches) {
+ best_len = len;
+ lz_matchptr->length = len;
+ lz_matchptr->offset = in_next - matchptr;
+ lz_matchptr++;
+ }
+ if (len >= nice_len) {
+ *pending_lt_ptr = *bt_left_child(mf, cur_node);
+ *pending_gt_ptr = *bt_right_child(mf, cur_node);
+ return lz_matchptr;
+ }
+ }
+ }
+
+ if (matchptr[len] < in_next[len]) {
+ *pending_lt_ptr = cur_node;
+ pending_lt_ptr = bt_right_child(mf, cur_node);
+ cur_node = *pending_lt_ptr;
+ best_lt_len = len;
+ if (best_gt_len < len)
+ len = best_gt_len;
+ } else {
+ *pending_gt_ptr = cur_node;
+ pending_gt_ptr = bt_left_child(mf, cur_node);
+ cur_node = *pending_gt_ptr;
+ best_gt_len = len;
+ if (best_lt_len < len)
+ len = best_lt_len;
+ }
+
+ if (cur_node <= cutoff || !--depth_remaining) {
+ *pending_lt_ptr = MATCHFINDER_INITVAL;
+ *pending_gt_ptr = MATCHFINDER_INITVAL;
+ return lz_matchptr;
+ }
+ }
+}
+
+/*
+ * Retrieve a list of matches with the current position.
+ *
+ * @mf
+ * The matchfinder structure.
+ * @in_base
+ * Pointer to the next byte in the input buffer to process _at the last
+ * time bt_matchfinder_init() or bt_matchfinder_slide_window() was called_.
+ * @cur_pos
+ * The current position in the input buffer relative to @in_base (the
+ * position of the sequence being matched against).
+ * @max_len
+ * The maximum permissible match length at this position. Must be >=
+ * BT_MATCHFINDER_REQUIRED_NBYTES.
+ * @nice_len
+ * Stop searching if a match of at least this length is found.
+ * Must be <= @max_len.
+ * @max_search_depth
+ * Limit on the number of potential matches to consider. Must be >= 1.
+ * @next_hashes
+ * The precomputed hash codes for the sequence beginning at @in_next.
+ * These will be used and then updated with the precomputed hashcodes for
+ * the sequence beginning at @in_next + 1.
+ * @lz_matchptr
+ * An array in which this function will record the matches. The recorded
+ * matches will be sorted by strictly increasing length and (non-strictly)
+ * increasing offset. The maximum number of matches that may be found is
+ * 'nice_len - 2'.
+ *
+ * The return value is a pointer to the next available slot in the @lz_matchptr
+ * array. (If no matches were found, this will be the same as @lz_matchptr.)
+ */
+static forceinline struct lz_match *
+bt_matchfinder_get_matches(struct bt_matchfinder *mf,
+ const u8 *in_base,
+ ptrdiff_t cur_pos,
+ u32 max_len,
+ u32 nice_len,
+ u32 max_search_depth,
+ u32 next_hashes[2],
+ struct lz_match *lz_matchptr)
+{
+ return bt_matchfinder_advance_one_byte(mf,
+ in_base,
+ cur_pos,
+ max_len,
+ nice_len,
+ max_search_depth,
+ next_hashes,
+ lz_matchptr,
+ true);
+}
+
+/*
+ * Advance the matchfinder, but don't record any matches.
+ *
+ * This is very similar to bt_matchfinder_get_matches() because both functions
+ * must do hashing and tree re-rooting.
+ */
+static forceinline void
+bt_matchfinder_skip_byte(struct bt_matchfinder *mf,
+ const u8 *in_base,
+ ptrdiff_t cur_pos,
+ u32 nice_len,
+ u32 max_search_depth,
+ u32 next_hashes[2])
+{
+ bt_matchfinder_advance_one_byte(mf,
+ in_base,
+ cur_pos,
+ nice_len,
+ nice_len,
+ max_search_depth,
+ next_hashes,
+ NULL,
+ false);
+}
+
+#endif /* LIB_BT_MATCHFINDER_H */
diff --git a/tools/z64compress/src/enc/libdeflate/lib/cpu_features_common.h b/tools/z64compress/src/enc/libdeflate/lib/cpu_features_common.h
new file mode 100644
index 000000000..bfcaa3637
--- /dev/null
+++ b/tools/z64compress/src/enc/libdeflate/lib/cpu_features_common.h
@@ -0,0 +1,91 @@
+/*
+ * cpu_features_common.h - code shared by all lib/$arch/cpu_features.c
+ *
+ * Copyright 2020 Eric Biggers
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef LIB_CPU_FEATURES_COMMON_H
+#define LIB_CPU_FEATURES_COMMON_H
+
+#if defined(TEST_SUPPORT__DO_NOT_USE) && !defined(FREESTANDING)
+# undef _ANSI_SOURCE /* for strdup() and strtok_r() */
+# ifndef __APPLE__
+# define _GNU_SOURCE 1
+# endif
+# include
+# include
+# include
+#endif
+
+#include "lib_common.h"
+
+struct cpu_feature {
+ u32 bit;
+ const char *name;
+};
+
+#if defined(TEST_SUPPORT__DO_NOT_USE) && !defined(FREESTANDING)
+/* Disable any features that are listed in $LIBDEFLATE_DISABLE_CPU_FEATURES. */
+static inline void
+disable_cpu_features_for_testing(u32 *features,
+ const struct cpu_feature *feature_table,
+ size_t feature_table_length)
+{
+ char *env_value, *strbuf, *p, *saveptr = NULL;
+ size_t i;
+
+ env_value = getenv("LIBDEFLATE_DISABLE_CPU_FEATURES");
+ if (!env_value)
+ return;
+ strbuf = strdup(env_value);
+ if (!strbuf)
+ abort();
+ p = strtok_r(strbuf, ",", &saveptr);
+ while (p) {
+ for (i = 0; i < feature_table_length; i++) {
+ if (strcmp(p, feature_table[i].name) == 0) {
+ *features &= ~feature_table[i].bit;
+ break;
+ }
+ }
+ if (i == feature_table_length) {
+ fprintf(stderr,
+ "unrecognized feature in LIBDEFLATE_DISABLE_CPU_FEATURES: \"%s\"\n",
+ p);
+ abort();
+ }
+ p = strtok_r(NULL, ",", &saveptr);
+ }
+ free(strbuf);
+}
+#else /* TEST_SUPPORT__DO_NOT_USE */
+static inline void
+disable_cpu_features_for_testing(u32 *features,
+ const struct cpu_feature *feature_table,
+ size_t feature_table_length)
+{
+}
+#endif /* !TEST_SUPPORT__DO_NOT_USE */
+
+#endif /* LIB_CPU_FEATURES_COMMON_H */
diff --git a/tools/z64compress/src/enc/libdeflate/lib/crc32.c b/tools/z64compress/src/enc/libdeflate/lib/crc32.c
new file mode 100644
index 000000000..61c2cc763
--- /dev/null
+++ b/tools/z64compress/src/enc/libdeflate/lib/crc32.c
@@ -0,0 +1,263 @@
+/*
+ * crc32.c - CRC-32 checksum algorithm for the gzip format
+ *
+ * Copyright 2016 Eric Biggers
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * High-level description of CRC
+ * =============================
+ *
+ * Consider a bit sequence 'bits[1...len]'. Interpret 'bits' as the "message"
+ * polynomial M(x) with coefficients in GF(2) (the field of integers modulo 2),
+ * where the coefficient of 'x^i' is 'bits[len - i]'. Then, compute:
+ *
+ * R(x) = M(x)*x^n mod G(x)
+ *
+ * where G(x) is a selected "generator" polynomial of degree 'n'. The remainder
+ * R(x) is a polynomial of max degree 'n - 1'. The CRC of 'bits' is R(x)
+ * interpreted as a bitstring of length 'n'.
+ *
+ * CRC used in gzip
+ * ================
+ *
+ * In the gzip format (RFC 1952):
+ *
+ * - The bitstring to checksum is formed from the bytes of the uncompressed
+ * data by concatenating the bits from the bytes in order, proceeding
+ * from the low-order bit to the high-order bit within each byte.
+ *
+ * - The generator polynomial G(x) is: x^32 + x^26 + x^23 + x^22 + x^16 +
+ * x^12 + x^11 + x^10 + x^8 + x^7 + x^5 + x^4 + x^2 + x + 1.
+ * Consequently, the CRC length is 32 bits ("CRC-32").
+ *
+ * - The highest order 32 coefficients of M(x)*x^n are inverted.
+ *
+ * - All 32 coefficients of R(x) are inverted.
+ *
+ * The two inversions cause added leading and trailing zero bits to affect the
+ * resulting CRC, whereas with a regular CRC such bits would have no effect on
+ * the CRC.
+ *
+ * Computation and optimizations
+ * =============================
+ *
+ * We can compute R(x) through "long division", maintaining only 32 bits of
+ * state at any given time. Multiplication by 'x' can be implemented as
+ * right-shifting by 1 (assuming the polynomial<=>bitstring mapping where the
+ * highest order bit represents the coefficient of x^0), and both addition and
+ * subtraction can be implemented as bitwise exclusive OR (since we are working
+ * in GF(2)). Here is an unoptimized implementation:
+ *
+ * static u32 crc32_gzip(const u8 *p, size_t len)
+ * {
+ * u32 crc = 0;
+ * const u32 divisor = 0xEDB88320;
+ *
+ * for (size_t i = 0; i < len * 8 + 32; i++) {
+ * int bit;
+ * u32 multiple;
+ *
+ * if (i < len * 8)
+ * bit = (p[i / 8] >> (i % 8)) & 1;
+ * else
+ * bit = 0; // one of the 32 appended 0 bits
+ *
+ * if (i < 32) // the first 32 bits are inverted
+ * bit ^= 1;
+ *
+ * if (crc & 1)
+ * multiple = divisor;
+ * else
+ * multiple = 0;
+ *
+ * crc >>= 1;
+ * crc |= (u32)bit << 31;
+ * crc ^= multiple;
+ * }
+ *
+ * return ~crc;
+ * }
+ *
+ * In this implementation, the 32-bit integer 'crc' maintains the remainder of
+ * the currently processed portion of the message (with 32 zero bits appended)
+ * when divided by the generator polynomial. 'crc' is the representation of
+ * R(x), and 'divisor' is the representation of G(x) excluding the x^32
+ * coefficient. For each bit to process, we multiply R(x) by 'x^1', then add
+ * 'x^0' if the new bit is a 1. If this causes R(x) to gain a nonzero x^32
+ * term, then we subtract G(x) from R(x).
+ *
+ * We can speed this up by taking advantage of the fact that XOR is commutative
+ * and associative, so the order in which we combine the inputs into 'crc' is
+ * unimportant. And since each message bit we add doesn't affect the choice of
+ * 'multiple' until 32 bits later, we need not actually add each message bit
+ * until that point:
+ *
+ * static u32 crc32_gzip(const u8 *p, size_t len)
+ * {
+ * u32 crc = ~0;
+ * const u32 divisor = 0xEDB88320;
+ *
+ * for (size_t i = 0; i < len * 8; i++) {
+ * int bit;
+ * u32 multiple;
+ *
+ * bit = (p[i / 8] >> (i % 8)) & 1;
+ * crc ^= bit;
+ * if (crc & 1)
+ * multiple = divisor;
+ * else
+ * multiple = 0;
+ * crc >>= 1;
+ * crc ^= multiple;
+ * }
+ *
+ * return ~crc;
+ * }
+ *
+ * With the above implementation we get the effect of 32 appended 0 bits for
+ * free; they never affect the choice of a divisor, nor would they change the
+ * value of 'crc' if they were to be actually XOR'ed in. And by starting with a
+ * remainder of all 1 bits, we get the effect of complementing the first 32
+ * message bits.
+ *
+ * The next optimization is to process the input in multi-bit units. Suppose
+ * that we insert the next 'n' message bits into the remainder. Then we get an
+ * intermediate remainder of length '32 + n' bits, and the CRC of the extra 'n'
+ * bits is the amount by which the low 32 bits of the remainder will change as a
+ * result of cancelling out those 'n' bits. Taking n=8 (one byte) and
+ * precomputing a table containing the CRC of each possible byte, we get
+ * crc32_slice1() defined below.
+ *
+ * As a further optimization, we could increase the multi-bit unit size to 16.
+ * However, that is inefficient because the table size explodes from 256 entries
+ * (1024 bytes) to 65536 entries (262144 bytes), which wastes memory and won't
+ * fit in L1 cache on typical processors.
+ *
+ * However, we can actually process 4 bytes at a time using 4 different tables
+ * with 256 entries each. Logically, we form a 64-bit intermediate remainder
+ * and cancel out the high 32 bits in 8-bit chunks. Bits 32-39 are cancelled
+ * out by the CRC of those bits, whereas bits 40-47 are be cancelled out by the
+ * CRC of those bits with 8 zero bits appended, and so on.
+ *
+ * In crc32_slice8(), this method is extended to 8 bytes at a time. The
+ * intermediate remainder (which we never actually store explicitly) is 96 bits.
+ *
+ * On CPUs that support fast carryless multiplication, CRCs can be computed even
+ * more quickly via "folding". See e.g. the x86 PCLMUL implementation.
+ */
+
+#include "lib_common.h"
+#include "libdeflate.h"
+#include "crc32_multipliers.h"
+#include "crc32_tables.h"
+
+/* This is the default implementation. It uses the slice-by-8 method. */
+static u32 MAYBE_UNUSED
+crc32_slice8(u32 crc, const u8 *p, size_t len)
+{
+ const u8 * const end = p + len;
+ const u8 *end64;
+
+ for (; ((uintptr_t)p & 7) && p != end; p++)
+ crc = (crc >> 8) ^ crc32_slice8_table[(u8)crc ^ *p];
+
+ end64 = p + ((end - p) & ~7);
+ for (; p != end64; p += 8) {
+ u32 v1 = le32_bswap(*(const u32 *)(p + 0));
+ u32 v2 = le32_bswap(*(const u32 *)(p + 4));
+
+ crc = crc32_slice8_table[0x700 + (u8)((crc ^ v1) >> 0)] ^
+ crc32_slice8_table[0x600 + (u8)((crc ^ v1) >> 8)] ^
+ crc32_slice8_table[0x500 + (u8)((crc ^ v1) >> 16)] ^
+ crc32_slice8_table[0x400 + (u8)((crc ^ v1) >> 24)] ^
+ crc32_slice8_table[0x300 + (u8)(v2 >> 0)] ^
+ crc32_slice8_table[0x200 + (u8)(v2 >> 8)] ^
+ crc32_slice8_table[0x100 + (u8)(v2 >> 16)] ^
+ crc32_slice8_table[0x000 + (u8)(v2 >> 24)];
+ }
+
+ for (; p != end; p++)
+ crc = (crc >> 8) ^ crc32_slice8_table[(u8)crc ^ *p];
+
+ return crc;
+}
+
+/*
+ * This is a more lightweight generic implementation, which can be used as a
+ * subroutine by architecture-specific implementations to process small amounts
+ * of unaligned data at the beginning and/or end of the buffer.
+ */
+static forceinline u32 MAYBE_UNUSED
+crc32_slice1(u32 crc, const u8 *p, size_t len)
+{
+ size_t i;
+
+ for (i = 0; i < len; i++)
+ crc = (crc >> 8) ^ crc32_slice1_table[(u8)crc ^ p[i]];
+ return crc;
+}
+
+/* Include architecture-specific implementation(s) if available. */
+#undef DEFAULT_IMPL
+#undef arch_select_crc32_func
+typedef u32 (*crc32_func_t)(u32 crc, const u8 *p, size_t len);
+#if defined(ARCH_ARM32) || defined(ARCH_ARM64)
+# include "arm/crc32_impl.h"
+#elif defined(ARCH_X86_32) || defined(ARCH_X86_64)
+# include "x86/crc32_impl.h"
+#endif
+
+#ifndef DEFAULT_IMPL
+# define DEFAULT_IMPL crc32_slice8
+#endif
+
+#ifdef arch_select_crc32_func
+static u32 dispatch_crc32(u32 crc, const u8 *p, size_t len);
+
+static volatile crc32_func_t crc32_impl = dispatch_crc32;
+
+/* Choose the best implementation at runtime. */
+static u32 dispatch_crc32(u32 crc, const u8 *p, size_t len)
+{
+ crc32_func_t f = arch_select_crc32_func();
+
+ if (f == NULL)
+ f = DEFAULT_IMPL;
+
+ crc32_impl = f;
+ return f(crc, p, len);
+}
+#else
+/* The best implementation is statically known, so call it directly. */
+#define crc32_impl DEFAULT_IMPL
+#endif
+
+LIBDEFLATEAPI u32
+libdeflate_crc32(u32 crc, const void *p, size_t len)
+{
+ if (p == NULL) /* Return initial value. */
+ return 0;
+ return ~crc32_impl(~crc, p, len);
+}
diff --git a/tools/z64compress/src/enc/libdeflate/lib/crc32_multipliers.h b/tools/z64compress/src/enc/libdeflate/lib/crc32_multipliers.h
new file mode 100644
index 000000000..580b775bd
--- /dev/null
+++ b/tools/z64compress/src/enc/libdeflate/lib/crc32_multipliers.h
@@ -0,0 +1,329 @@
+/*
+ * crc32_multipliers.h - constants for CRC-32 folding
+ *
+ * THIS FILE WAS GENERATED BY gen_crc32_multipliers.c. DO NOT EDIT.
+ */
+
+#define CRC32_1VECS_MULT_1 0xae689191 /* x^159 mod G(x) */
+#define CRC32_1VECS_MULT_2 0xccaa009e /* x^95 mod G(x) */
+#define CRC32_1VECS_MULTS { CRC32_1VECS_MULT_1, CRC32_1VECS_MULT_2 }
+
+#define CRC32_2VECS_MULT_1 0xf1da05aa /* x^287 mod G(x) */
+#define CRC32_2VECS_MULT_2 0x81256527 /* x^223 mod G(x) */
+#define CRC32_2VECS_MULTS { CRC32_2VECS_MULT_1, CRC32_2VECS_MULT_2 }
+
+#define CRC32_3VECS_MULT_1 0x3db1ecdc /* x^415 mod G(x) */
+#define CRC32_3VECS_MULT_2 0xaf449247 /* x^351 mod G(x) */
+#define CRC32_3VECS_MULTS { CRC32_3VECS_MULT_1, CRC32_3VECS_MULT_2 }
+
+#define CRC32_4VECS_MULT_1 0x8f352d95 /* x^543 mod G(x) */
+#define CRC32_4VECS_MULT_2 0x1d9513d7 /* x^479 mod G(x) */
+#define CRC32_4VECS_MULTS { CRC32_4VECS_MULT_1, CRC32_4VECS_MULT_2 }
+
+#define CRC32_5VECS_MULT_1 0x1c279815 /* x^671 mod G(x) */
+#define CRC32_5VECS_MULT_2 0xae0b5394 /* x^607 mod G(x) */
+#define CRC32_5VECS_MULTS { CRC32_5VECS_MULT_1, CRC32_5VECS_MULT_2 }
+
+#define CRC32_6VECS_MULT_1 0xdf068dc2 /* x^799 mod G(x) */
+#define CRC32_6VECS_MULT_2 0x57c54819 /* x^735 mod G(x) */
+#define CRC32_6VECS_MULTS { CRC32_6VECS_MULT_1, CRC32_6VECS_MULT_2 }
+
+#define CRC32_7VECS_MULT_1 0x31f8303f /* x^927 mod G(x) */
+#define CRC32_7VECS_MULT_2 0x0cbec0ed /* x^863 mod G(x) */
+#define CRC32_7VECS_MULTS { CRC32_7VECS_MULT_1, CRC32_7VECS_MULT_2 }
+
+#define CRC32_8VECS_MULT_1 0x33fff533 /* x^1055 mod G(x) */
+#define CRC32_8VECS_MULT_2 0x910eeec1 /* x^991 mod G(x) */
+#define CRC32_8VECS_MULTS { CRC32_8VECS_MULT_1, CRC32_8VECS_MULT_2 }
+
+#define CRC32_9VECS_MULT_1 0x26b70c3d /* x^1183 mod G(x) */
+#define CRC32_9VECS_MULT_2 0x3f41287a /* x^1119 mod G(x) */
+#define CRC32_9VECS_MULTS { CRC32_9VECS_MULT_1, CRC32_9VECS_MULT_2 }
+
+#define CRC32_10VECS_MULT_1 0xe3543be0 /* x^1311 mod G(x) */
+#define CRC32_10VECS_MULT_2 0x9026d5b1 /* x^1247 mod G(x) */
+#define CRC32_10VECS_MULTS { CRC32_10VECS_MULT_1, CRC32_10VECS_MULT_2 }
+
+#define CRC32_11VECS_MULT_1 0x5a1bb05d /* x^1439 mod G(x) */
+#define CRC32_11VECS_MULT_2 0xd1df2327 /* x^1375 mod G(x) */
+#define CRC32_11VECS_MULTS { CRC32_11VECS_MULT_1, CRC32_11VECS_MULT_2 }
+
+#define CRC32_12VECS_MULT_1 0x596c8d81 /* x^1567 mod G(x) */
+#define CRC32_12VECS_MULT_2 0xf5e48c85 /* x^1503 mod G(x) */
+#define CRC32_12VECS_MULTS { CRC32_12VECS_MULT_1, CRC32_12VECS_MULT_2 }
+
+#define CRC32_FINAL_MULT 0xb8bc6765 /* x^63 mod G(x) */
+#define CRC32_BARRETT_CONSTANT_1 0x00000001f7011641ULL /* floor(x^64 / G(x)) */
+#define CRC32_BARRETT_CONSTANT_2 0x00000001db710641ULL /* G(x) */
+#define CRC32_BARRETT_CONSTANTS { CRC32_BARRETT_CONSTANT_1, CRC32_BARRETT_CONSTANT_2 }
+
+#define CRC32_NUM_CHUNKS 4
+#define CRC32_MIN_VARIABLE_CHUNK_LEN 128UL
+#define CRC32_MAX_VARIABLE_CHUNK_LEN 16384UL
+
+/* Multipliers for implementations that use a variable chunk length */
+static const u32 crc32_mults_for_chunklen[][CRC32_NUM_CHUNKS - 1] MAYBE_UNUSED = {
+ { 0 /* unused row */ },
+ /* chunk_len=128 */
+ { 0xd31343ea /* x^3039 mod G(x) */, 0xe95c1271 /* x^2015 mod G(x) */, 0x910eeec1 /* x^991 mod G(x) */, },
+ /* chunk_len=256 */
+ { 0x1d6708a0 /* x^6111 mod G(x) */, 0x0c30f51d /* x^4063 mod G(x) */, 0xe95c1271 /* x^2015 mod G(x) */, },
+ /* chunk_len=384 */
+ { 0xdb3839f3 /* x^9183 mod G(x) */, 0x1d6708a0 /* x^6111 mod G(x) */, 0xd31343ea /* x^3039 mod G(x) */, },
+ /* chunk_len=512 */
+ { 0x1753ab84 /* x^12255 mod G(x) */, 0xbbf2f6d6 /* x^8159 mod G(x) */, 0x0c30f51d /* x^4063 mod G(x) */, },
+ /* chunk_len=640 */
+ { 0x3796455c /* x^15327 mod G(x) */, 0xb8e0e4a8 /* x^10207 mod G(x) */, 0xc352f6de /* x^5087 mod G(x) */, },
+ /* chunk_len=768 */
+ { 0x3954de39 /* x^18399 mod G(x) */, 0x1753ab84 /* x^12255 mod G(x) */, 0x1d6708a0 /* x^6111 mod G(x) */, },
+ /* chunk_len=896 */
+ { 0x632d78c5 /* x^21471 mod G(x) */, 0x3fc33de4 /* x^14303 mod G(x) */, 0x9a1b53c8 /* x^7135 mod G(x) */, },
+ /* chunk_len=1024 */
+ { 0xa0decef3 /* x^24543 mod G(x) */, 0x7b4aa8b7 /* x^16351 mod G(x) */, 0xbbf2f6d6 /* x^8159 mod G(x) */, },
+ /* chunk_len=1152 */
+ { 0xe9c09bb0 /* x^27615 mod G(x) */, 0x3954de39 /* x^18399 mod G(x) */, 0xdb3839f3 /* x^9183 mod G(x) */, },
+ /* chunk_len=1280 */
+ { 0xd51917a4 /* x^30687 mod G(x) */, 0xcae68461 /* x^20447 mod G(x) */, 0xb8e0e4a8 /* x^10207 mod G(x) */, },
+ /* chunk_len=1408 */
+ { 0x154a8a62 /* x^33759 mod G(x) */, 0x41e7589c /* x^22495 mod G(x) */, 0x3e9a43cd /* x^11231 mod G(x) */, },
+ /* chunk_len=1536 */
+ { 0xf196555d /* x^36831 mod G(x) */, 0xa0decef3 /* x^24543 mod G(x) */, 0x1753ab84 /* x^12255 mod G(x) */, },
+ /* chunk_len=1664 */
+ { 0x8eec2999 /* x^39903 mod G(x) */, 0xefb0a128 /* x^26591 mod G(x) */, 0x6044fbb0 /* x^13279 mod G(x) */, },
+ /* chunk_len=1792 */
+ { 0x27892abf /* x^42975 mod G(x) */, 0x48d72bb1 /* x^28639 mod G(x) */, 0x3fc33de4 /* x^14303 mod G(x) */, },
+ /* chunk_len=1920 */
+ { 0x77bc2419 /* x^46047 mod G(x) */, 0xd51917a4 /* x^30687 mod G(x) */, 0x3796455c /* x^15327 mod G(x) */, },
+ /* chunk_len=2048 */
+ { 0xcea114a5 /* x^49119 mod G(x) */, 0x68c0a2c5 /* x^32735 mod G(x) */, 0x7b4aa8b7 /* x^16351 mod G(x) */, },
+ /* chunk_len=2176 */
+ { 0xa1077e85 /* x^52191 mod G(x) */, 0x188cc628 /* x^34783 mod G(x) */, 0x0c21f835 /* x^17375 mod G(x) */, },
+ /* chunk_len=2304 */
+ { 0xc5ed75e1 /* x^55263 mod G(x) */, 0xf196555d /* x^36831 mod G(x) */, 0x3954de39 /* x^18399 mod G(x) */, },
+ /* chunk_len=2432 */
+ { 0xca4fba3f /* x^58335 mod G(x) */, 0x0acfa26f /* x^38879 mod G(x) */, 0x6cb21510 /* x^19423 mod G(x) */, },
+ /* chunk_len=2560 */
+ { 0xcf5bcdc4 /* x^61407 mod G(x) */, 0x4fae7fc0 /* x^40927 mod G(x) */, 0xcae68461 /* x^20447 mod G(x) */, },
+ /* chunk_len=2688 */
+ { 0xf36b9d16 /* x^64479 mod G(x) */, 0x27892abf /* x^42975 mod G(x) */, 0x632d78c5 /* x^21471 mod G(x) */, },
+ /* chunk_len=2816 */
+ { 0xf76fd988 /* x^67551 mod G(x) */, 0xed5c39b1 /* x^45023 mod G(x) */, 0x41e7589c /* x^22495 mod G(x) */, },
+ /* chunk_len=2944 */
+ { 0x6c45d92e /* x^70623 mod G(x) */, 0xff809fcd /* x^47071 mod G(x) */, 0x0c46baec /* x^23519 mod G(x) */, },
+ /* chunk_len=3072 */
+ { 0x6116b82b /* x^73695 mod G(x) */, 0xcea114a5 /* x^49119 mod G(x) */, 0xa0decef3 /* x^24543 mod G(x) */, },
+ /* chunk_len=3200 */
+ { 0x4d9899bb /* x^76767 mod G(x) */, 0x9f9d8d9c /* x^51167 mod G(x) */, 0x53deb236 /* x^25567 mod G(x) */, },
+ /* chunk_len=3328 */
+ { 0x3e7c93b9 /* x^79839 mod G(x) */, 0x6666b805 /* x^53215 mod G(x) */, 0xefb0a128 /* x^26591 mod G(x) */, },
+ /* chunk_len=3456 */
+ { 0x388b20ac /* x^82911 mod G(x) */, 0xc5ed75e1 /* x^55263 mod G(x) */, 0xe9c09bb0 /* x^27615 mod G(x) */, },
+ /* chunk_len=3584 */
+ { 0x0956d953 /* x^85983 mod G(x) */, 0x97fbdb14 /* x^57311 mod G(x) */, 0x48d72bb1 /* x^28639 mod G(x) */, },
+ /* chunk_len=3712 */
+ { 0x55cb4dfe /* x^89055 mod G(x) */, 0x1b37c832 /* x^59359 mod G(x) */, 0xc07331b3 /* x^29663 mod G(x) */, },
+ /* chunk_len=3840 */
+ { 0x52222fea /* x^92127 mod G(x) */, 0xcf5bcdc4 /* x^61407 mod G(x) */, 0xd51917a4 /* x^30687 mod G(x) */, },
+ /* chunk_len=3968 */
+ { 0x0603989b /* x^95199 mod G(x) */, 0xb03c8112 /* x^63455 mod G(x) */, 0x5e04b9a5 /* x^31711 mod G(x) */, },
+ /* chunk_len=4096 */
+ { 0x4470c029 /* x^98271 mod G(x) */, 0x2339d155 /* x^65503 mod G(x) */, 0x68c0a2c5 /* x^32735 mod G(x) */, },
+ /* chunk_len=4224 */
+ { 0xb6f35093 /* x^101343 mod G(x) */, 0xf76fd988 /* x^67551 mod G(x) */, 0x154a8a62 /* x^33759 mod G(x) */, },
+ /* chunk_len=4352 */
+ { 0xc46805ba /* x^104415 mod G(x) */, 0x416f9449 /* x^69599 mod G(x) */, 0x188cc628 /* x^34783 mod G(x) */, },
+ /* chunk_len=4480 */
+ { 0xc3876592 /* x^107487 mod G(x) */, 0x4b809189 /* x^71647 mod G(x) */, 0xc35cf6e7 /* x^35807 mod G(x) */, },
+ /* chunk_len=4608 */
+ { 0x5b0c98b9 /* x^110559 mod G(x) */, 0x6116b82b /* x^73695 mod G(x) */, 0xf196555d /* x^36831 mod G(x) */, },
+ /* chunk_len=4736 */
+ { 0x30d13e5f /* x^113631 mod G(x) */, 0x4c5a315a /* x^75743 mod G(x) */, 0x8c224466 /* x^37855 mod G(x) */, },
+ /* chunk_len=4864 */
+ { 0x54afca53 /* x^116703 mod G(x) */, 0xbccfa2c1 /* x^77791 mod G(x) */, 0x0acfa26f /* x^38879 mod G(x) */, },
+ /* chunk_len=4992 */
+ { 0x93102436 /* x^119775 mod G(x) */, 0x3e7c93b9 /* x^79839 mod G(x) */, 0x8eec2999 /* x^39903 mod G(x) */, },
+ /* chunk_len=5120 */
+ { 0xbd2655a8 /* x^122847 mod G(x) */, 0x3e116c9d /* x^81887 mod G(x) */, 0x4fae7fc0 /* x^40927 mod G(x) */, },
+ /* chunk_len=5248 */
+ { 0x70cd7f26 /* x^125919 mod G(x) */, 0x408e57f2 /* x^83935 mod G(x) */, 0x1691be45 /* x^41951 mod G(x) */, },
+ /* chunk_len=5376 */
+ { 0x2d546c53 /* x^128991 mod G(x) */, 0x0956d953 /* x^85983 mod G(x) */, 0x27892abf /* x^42975 mod G(x) */, },
+ /* chunk_len=5504 */
+ { 0xb53410a8 /* x^132063 mod G(x) */, 0x42ebf0ad /* x^88031 mod G(x) */, 0x161f3c12 /* x^43999 mod G(x) */, },
+ /* chunk_len=5632 */
+ { 0x67a93f75 /* x^135135 mod G(x) */, 0xcf3233e4 /* x^90079 mod G(x) */, 0xed5c39b1 /* x^45023 mod G(x) */, },
+ /* chunk_len=5760 */
+ { 0x9830ac33 /* x^138207 mod G(x) */, 0x52222fea /* x^92127 mod G(x) */, 0x77bc2419 /* x^46047 mod G(x) */, },
+ /* chunk_len=5888 */
+ { 0xb0b6fc3e /* x^141279 mod G(x) */, 0x2fde73f8 /* x^94175 mod G(x) */, 0xff809fcd /* x^47071 mod G(x) */, },
+ /* chunk_len=6016 */
+ { 0x84170f16 /* x^144351 mod G(x) */, 0xced90d99 /* x^96223 mod G(x) */, 0x30de0f98 /* x^48095 mod G(x) */, },
+ /* chunk_len=6144 */
+ { 0xd7017a0c /* x^147423 mod G(x) */, 0x4470c029 /* x^98271 mod G(x) */, 0xcea114a5 /* x^49119 mod G(x) */, },
+ /* chunk_len=6272 */
+ { 0xadb25de6 /* x^150495 mod G(x) */, 0x84f40beb /* x^100319 mod G(x) */, 0x2b7e0e1b /* x^50143 mod G(x) */, },
+ /* chunk_len=6400 */
+ { 0x8282fddc /* x^153567 mod G(x) */, 0xec855937 /* x^102367 mod G(x) */, 0x9f9d8d9c /* x^51167 mod G(x) */, },
+ /* chunk_len=6528 */
+ { 0x46362bee /* x^156639 mod G(x) */, 0xc46805ba /* x^104415 mod G(x) */, 0xa1077e85 /* x^52191 mod G(x) */, },
+ /* chunk_len=6656 */
+ { 0xb9077a01 /* x^159711 mod G(x) */, 0xdf7a24ac /* x^106463 mod G(x) */, 0x6666b805 /* x^53215 mod G(x) */, },
+ /* chunk_len=6784 */
+ { 0xf51d9bc6 /* x^162783 mod G(x) */, 0x2b52dc39 /* x^108511 mod G(x) */, 0x7e774cf6 /* x^54239 mod G(x) */, },
+ /* chunk_len=6912 */
+ { 0x4ca19a29 /* x^165855 mod G(x) */, 0x5b0c98b9 /* x^110559 mod G(x) */, 0xc5ed75e1 /* x^55263 mod G(x) */, },
+ /* chunk_len=7040 */
+ { 0xdc0fc3fc /* x^168927 mod G(x) */, 0xb939fcdf /* x^112607 mod G(x) */, 0x3678fed2 /* x^56287 mod G(x) */, },
+ /* chunk_len=7168 */
+ { 0x63c3d167 /* x^171999 mod G(x) */, 0x70f9947d /* x^114655 mod G(x) */, 0x97fbdb14 /* x^57311 mod G(x) */, },
+ /* chunk_len=7296 */
+ { 0x5851d254 /* x^175071 mod G(x) */, 0x54afca53 /* x^116703 mod G(x) */, 0xca4fba3f /* x^58335 mod G(x) */, },
+ /* chunk_len=7424 */
+ { 0xfeacf2a1 /* x^178143 mod G(x) */, 0x7a3c0a6a /* x^118751 mod G(x) */, 0x1b37c832 /* x^59359 mod G(x) */, },
+ /* chunk_len=7552 */
+ { 0x93b7edc8 /* x^181215 mod G(x) */, 0x1fea4d2a /* x^120799 mod G(x) */, 0x58fa96ee /* x^60383 mod G(x) */, },
+ /* chunk_len=7680 */
+ { 0x5539e44a /* x^184287 mod G(x) */, 0xbd2655a8 /* x^122847 mod G(x) */, 0xcf5bcdc4 /* x^61407 mod G(x) */, },
+ /* chunk_len=7808 */
+ { 0xde32a3d2 /* x^187359 mod G(x) */, 0x4ff61aa1 /* x^124895 mod G(x) */, 0x6a6a3694 /* x^62431 mod G(x) */, },
+ /* chunk_len=7936 */
+ { 0xf0baeeb6 /* x^190431 mod G(x) */, 0x7ae2f6f4 /* x^126943 mod G(x) */, 0xb03c8112 /* x^63455 mod G(x) */, },
+ /* chunk_len=8064 */
+ { 0xbe15887f /* x^193503 mod G(x) */, 0x2d546c53 /* x^128991 mod G(x) */, 0xf36b9d16 /* x^64479 mod G(x) */, },
+ /* chunk_len=8192 */
+ { 0x64f34a05 /* x^196575 mod G(x) */, 0xe0ee5efe /* x^131039 mod G(x) */, 0x2339d155 /* x^65503 mod G(x) */, },
+ /* chunk_len=8320 */
+ { 0x1b6d1aea /* x^199647 mod G(x) */, 0xfeafb67c /* x^133087 mod G(x) */, 0x4fb001a8 /* x^66527 mod G(x) */, },
+ /* chunk_len=8448 */
+ { 0x82adb0b8 /* x^202719 mod G(x) */, 0x67a93f75 /* x^135135 mod G(x) */, 0xf76fd988 /* x^67551 mod G(x) */, },
+ /* chunk_len=8576 */
+ { 0x694587c7 /* x^205791 mod G(x) */, 0x3b34408b /* x^137183 mod G(x) */, 0xeccb2978 /* x^68575 mod G(x) */, },
+ /* chunk_len=8704 */
+ { 0xd2fc57c3 /* x^208863 mod G(x) */, 0x07fcf8c6 /* x^139231 mod G(x) */, 0x416f9449 /* x^69599 mod G(x) */, },
+ /* chunk_len=8832 */
+ { 0x9dd6837c /* x^211935 mod G(x) */, 0xb0b6fc3e /* x^141279 mod G(x) */, 0x6c45d92e /* x^70623 mod G(x) */, },
+ /* chunk_len=8960 */
+ { 0x3a9d1f97 /* x^215007 mod G(x) */, 0xefd033b2 /* x^143327 mod G(x) */, 0x4b809189 /* x^71647 mod G(x) */, },
+ /* chunk_len=9088 */
+ { 0x1eee1d2a /* x^218079 mod G(x) */, 0xf2a6e46e /* x^145375 mod G(x) */, 0x55b4c814 /* x^72671 mod G(x) */, },
+ /* chunk_len=9216 */
+ { 0xb57c7728 /* x^221151 mod G(x) */, 0xd7017a0c /* x^147423 mod G(x) */, 0x6116b82b /* x^73695 mod G(x) */, },
+ /* chunk_len=9344 */
+ { 0xf2fc5d61 /* x^224223 mod G(x) */, 0x242aac86 /* x^149471 mod G(x) */, 0x05245cf0 /* x^74719 mod G(x) */, },
+ /* chunk_len=9472 */
+ { 0x26387824 /* x^227295 mod G(x) */, 0xc15c4ca5 /* x^151519 mod G(x) */, 0x4c5a315a /* x^75743 mod G(x) */, },
+ /* chunk_len=9600 */
+ { 0x8c151e77 /* x^230367 mod G(x) */, 0x8282fddc /* x^153567 mod G(x) */, 0x4d9899bb /* x^76767 mod G(x) */, },
+ /* chunk_len=9728 */
+ { 0x8ea1f680 /* x^233439 mod G(x) */, 0xf5ff6cdd /* x^155615 mod G(x) */, 0xbccfa2c1 /* x^77791 mod G(x) */, },
+ /* chunk_len=9856 */
+ { 0xe8cf3d2a /* x^236511 mod G(x) */, 0x338b1fb1 /* x^157663 mod G(x) */, 0xeda61f70 /* x^78815 mod G(x) */, },
+ /* chunk_len=9984 */
+ { 0x21f15b59 /* x^239583 mod G(x) */, 0xb9077a01 /* x^159711 mod G(x) */, 0x3e7c93b9 /* x^79839 mod G(x) */, },
+ /* chunk_len=10112 */
+ { 0x6f68d64a /* x^242655 mod G(x) */, 0x901b0161 /* x^161759 mod G(x) */, 0xb9fd3537 /* x^80863 mod G(x) */, },
+ /* chunk_len=10240 */
+ { 0x71b74d95 /* x^245727 mod G(x) */, 0xf5ddd5ad /* x^163807 mod G(x) */, 0x3e116c9d /* x^81887 mod G(x) */, },
+ /* chunk_len=10368 */
+ { 0x4c2e7261 /* x^248799 mod G(x) */, 0x4ca19a29 /* x^165855 mod G(x) */, 0x388b20ac /* x^82911 mod G(x) */, },
+ /* chunk_len=10496 */
+ { 0x8a2d38e8 /* x^251871 mod G(x) */, 0xd27ee0a1 /* x^167903 mod G(x) */, 0x408e57f2 /* x^83935 mod G(x) */, },
+ /* chunk_len=10624 */
+ { 0x7e58ca17 /* x^254943 mod G(x) */, 0x69dfedd2 /* x^169951 mod G(x) */, 0x3a76805e /* x^84959 mod G(x) */, },
+ /* chunk_len=10752 */
+ { 0xf997967f /* x^258015 mod G(x) */, 0x63c3d167 /* x^171999 mod G(x) */, 0x0956d953 /* x^85983 mod G(x) */, },
+ /* chunk_len=10880 */
+ { 0x48215963 /* x^261087 mod G(x) */, 0x71e1dfe0 /* x^174047 mod G(x) */, 0x42a6d410 /* x^87007 mod G(x) */, },
+ /* chunk_len=11008 */
+ { 0xa704b94c /* x^264159 mod G(x) */, 0x679f198a /* x^176095 mod G(x) */, 0x42ebf0ad /* x^88031 mod G(x) */, },
+ /* chunk_len=11136 */
+ { 0x1d699056 /* x^267231 mod G(x) */, 0xfeacf2a1 /* x^178143 mod G(x) */, 0x55cb4dfe /* x^89055 mod G(x) */, },
+ /* chunk_len=11264 */
+ { 0x6800bcc5 /* x^270303 mod G(x) */, 0x16024f15 /* x^180191 mod G(x) */, 0xcf3233e4 /* x^90079 mod G(x) */, },
+ /* chunk_len=11392 */
+ { 0x2d48e4ca /* x^273375 mod G(x) */, 0xbe61582f /* x^182239 mod G(x) */, 0x46026283 /* x^91103 mod G(x) */, },
+ /* chunk_len=11520 */
+ { 0x4c4c2b55 /* x^276447 mod G(x) */, 0x5539e44a /* x^184287 mod G(x) */, 0x52222fea /* x^92127 mod G(x) */, },
+ /* chunk_len=11648 */
+ { 0xd8ce94cb /* x^279519 mod G(x) */, 0xbc613c26 /* x^186335 mod G(x) */, 0x33776b4b /* x^93151 mod G(x) */, },
+ /* chunk_len=11776 */
+ { 0xd0b5a02b /* x^282591 mod G(x) */, 0x490d3cc6 /* x^188383 mod G(x) */, 0x2fde73f8 /* x^94175 mod G(x) */, },
+ /* chunk_len=11904 */
+ { 0xa223f7ec /* x^285663 mod G(x) */, 0xf0baeeb6 /* x^190431 mod G(x) */, 0x0603989b /* x^95199 mod G(x) */, },
+ /* chunk_len=12032 */
+ { 0x58de337a /* x^288735 mod G(x) */, 0x3bf3d597 /* x^192479 mod G(x) */, 0xced90d99 /* x^96223 mod G(x) */, },
+ /* chunk_len=12160 */
+ { 0x37f5d8f4 /* x^291807 mod G(x) */, 0x4d5b699b /* x^194527 mod G(x) */, 0xd7262e5f /* x^97247 mod G(x) */, },
+ /* chunk_len=12288 */
+ { 0xfa8a435d /* x^294879 mod G(x) */, 0x64f34a05 /* x^196575 mod G(x) */, 0x4470c029 /* x^98271 mod G(x) */, },
+ /* chunk_len=12416 */
+ { 0x238709fe /* x^297951 mod G(x) */, 0x52e7458f /* x^198623 mod G(x) */, 0x9a174cd3 /* x^99295 mod G(x) */, },
+ /* chunk_len=12544 */
+ { 0x9e1ba6f5 /* x^301023 mod G(x) */, 0xef0272f7 /* x^200671 mod G(x) */, 0x84f40beb /* x^100319 mod G(x) */, },
+ /* chunk_len=12672 */
+ { 0xcd8b57fa /* x^304095 mod G(x) */, 0x82adb0b8 /* x^202719 mod G(x) */, 0xb6f35093 /* x^101343 mod G(x) */, },
+ /* chunk_len=12800 */
+ { 0x0aed142f /* x^307167 mod G(x) */, 0xb1650290 /* x^204767 mod G(x) */, 0xec855937 /* x^102367 mod G(x) */, },
+ /* chunk_len=12928 */
+ { 0xd1f064db /* x^310239 mod G(x) */, 0x6e7340d3 /* x^206815 mod G(x) */, 0x5c28cb52 /* x^103391 mod G(x) */, },
+ /* chunk_len=13056 */
+ { 0x464ac895 /* x^313311 mod G(x) */, 0xd2fc57c3 /* x^208863 mod G(x) */, 0xc46805ba /* x^104415 mod G(x) */, },
+ /* chunk_len=13184 */
+ { 0xa0e6beea /* x^316383 mod G(x) */, 0xcfeec3d0 /* x^210911 mod G(x) */, 0x0225d214 /* x^105439 mod G(x) */, },
+ /* chunk_len=13312 */
+ { 0x78703ce0 /* x^319455 mod G(x) */, 0xc60f6075 /* x^212959 mod G(x) */, 0xdf7a24ac /* x^106463 mod G(x) */, },
+ /* chunk_len=13440 */
+ { 0xfea48165 /* x^322527 mod G(x) */, 0x3a9d1f97 /* x^215007 mod G(x) */, 0xc3876592 /* x^107487 mod G(x) */, },
+ /* chunk_len=13568 */
+ { 0xdb89b8db /* x^325599 mod G(x) */, 0xa6172211 /* x^217055 mod G(x) */, 0x2b52dc39 /* x^108511 mod G(x) */, },
+ /* chunk_len=13696 */
+ { 0x7ca03731 /* x^328671 mod G(x) */, 0x1db42849 /* x^219103 mod G(x) */, 0xc5df246e /* x^109535 mod G(x) */, },
+ /* chunk_len=13824 */
+ { 0x8801d0aa /* x^331743 mod G(x) */, 0xb57c7728 /* x^221151 mod G(x) */, 0x5b0c98b9 /* x^110559 mod G(x) */, },
+ /* chunk_len=13952 */
+ { 0xf89cd7f0 /* x^334815 mod G(x) */, 0xcc396a0b /* x^223199 mod G(x) */, 0xdb799c51 /* x^111583 mod G(x) */, },
+ /* chunk_len=14080 */
+ { 0x1611a808 /* x^337887 mod G(x) */, 0xaeae6105 /* x^225247 mod G(x) */, 0xb939fcdf /* x^112607 mod G(x) */, },
+ /* chunk_len=14208 */
+ { 0xe3cdb888 /* x^340959 mod G(x) */, 0x26387824 /* x^227295 mod G(x) */, 0x30d13e5f /* x^113631 mod G(x) */, },
+ /* chunk_len=14336 */
+ { 0x552a4cf6 /* x^344031 mod G(x) */, 0xee2d04bb /* x^229343 mod G(x) */, 0x70f9947d /* x^114655 mod G(x) */, },
+ /* chunk_len=14464 */
+ { 0x85e248e9 /* x^347103 mod G(x) */, 0x0a79663f /* x^231391 mod G(x) */, 0x53339cf7 /* x^115679 mod G(x) */, },
+ /* chunk_len=14592 */
+ { 0x1c61c3e9 /* x^350175 mod G(x) */, 0x8ea1f680 /* x^233439 mod G(x) */, 0x54afca53 /* x^116703 mod G(x) */, },
+ /* chunk_len=14720 */
+ { 0xb14cfc2b /* x^353247 mod G(x) */, 0x2e073302 /* x^235487 mod G(x) */, 0x10897992 /* x^117727 mod G(x) */, },
+ /* chunk_len=14848 */
+ { 0x6ec444cc /* x^356319 mod G(x) */, 0x9e819f13 /* x^237535 mod G(x) */, 0x7a3c0a6a /* x^118751 mod G(x) */, },
+ /* chunk_len=14976 */
+ { 0xe2fa5f80 /* x^359391 mod G(x) */, 0x21f15b59 /* x^239583 mod G(x) */, 0x93102436 /* x^119775 mod G(x) */, },
+ /* chunk_len=15104 */
+ { 0x6d33f4c6 /* x^362463 mod G(x) */, 0x31a27455 /* x^241631 mod G(x) */, 0x1fea4d2a /* x^120799 mod G(x) */, },
+ /* chunk_len=15232 */
+ { 0xb6dec609 /* x^365535 mod G(x) */, 0x4d437056 /* x^243679 mod G(x) */, 0x42eb1e2a /* x^121823 mod G(x) */, },
+ /* chunk_len=15360 */
+ { 0x1846c518 /* x^368607 mod G(x) */, 0x71b74d95 /* x^245727 mod G(x) */, 0xbd2655a8 /* x^122847 mod G(x) */, },
+ /* chunk_len=15488 */
+ { 0x9f947f8a /* x^371679 mod G(x) */, 0x2b501619 /* x^247775 mod G(x) */, 0xa4924b0e /* x^123871 mod G(x) */, },
+ /* chunk_len=15616 */
+ { 0xb7442f4d /* x^374751 mod G(x) */, 0xba30a5d8 /* x^249823 mod G(x) */, 0x4ff61aa1 /* x^124895 mod G(x) */, },
+ /* chunk_len=15744 */
+ { 0xe2c93242 /* x^377823 mod G(x) */, 0x8a2d38e8 /* x^251871 mod G(x) */, 0x70cd7f26 /* x^125919 mod G(x) */, },
+ /* chunk_len=15872 */
+ { 0xcd6863df /* x^380895 mod G(x) */, 0x78fd88dc /* x^253919 mod G(x) */, 0x7ae2f6f4 /* x^126943 mod G(x) */, },
+ /* chunk_len=16000 */
+ { 0xd512001d /* x^383967 mod G(x) */, 0xe6612dff /* x^255967 mod G(x) */, 0x5c4d0ca9 /* x^127967 mod G(x) */, },
+ /* chunk_len=16128 */
+ { 0x4e8d6b6c /* x^387039 mod G(x) */, 0xf997967f /* x^258015 mod G(x) */, 0x2d546c53 /* x^128991 mod G(x) */, },
+ /* chunk_len=16256 */
+ { 0xfa653ba1 /* x^390111 mod G(x) */, 0xc99014d4 /* x^260063 mod G(x) */, 0xa0c9fd27 /* x^130015 mod G(x) */, },
+ /* chunk_len=16384 */
+ { 0x49893408 /* x^393183 mod G(x) */, 0x29c2448b /* x^262111 mod G(x) */, 0xe0ee5efe /* x^131039 mod G(x) */, },
+};
+
+/* Multipliers for implementations that use a large fixed chunk length */
+#define CRC32_FIXED_CHUNK_LEN 32768UL
+#define CRC32_FIXED_CHUNK_MULT_1 0x29c2448b /* x^262111 mod G(x) */
+#define CRC32_FIXED_CHUNK_MULT_2 0x4b912f53 /* x^524255 mod G(x) */
+#define CRC32_FIXED_CHUNK_MULT_3 0x454c93be /* x^786399 mod G(x) */
diff --git a/tools/z64compress/src/enc/libdeflate/lib/crc32_tables.h b/tools/z64compress/src/enc/libdeflate/lib/crc32_tables.h
new file mode 100644
index 000000000..86228c72a
--- /dev/null
+++ b/tools/z64compress/src/enc/libdeflate/lib/crc32_tables.h
@@ -0,0 +1,587 @@
+/*
+ * crc32_tables.h - data tables for CRC-32 computation
+ *
+ * THIS FILE WAS GENERATED BY gen_crc32_tables.c. DO NOT EDIT.
+ */
+
+static const u32 crc32_slice1_table[] MAYBE_UNUSED = {
+ 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba,
+ 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3,
+ 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
+ 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91,
+ 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
+ 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
+ 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec,
+ 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5,
+ 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
+ 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
+ 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940,
+ 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
+ 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116,
+ 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f,
+ 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
+ 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d,
+ 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a,
+ 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
+ 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818,
+ 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
+ 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
+ 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457,
+ 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c,
+ 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
+ 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
+ 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb,
+ 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
+ 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9,
+ 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086,
+ 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
+ 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4,
+ 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad,
+ 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
+ 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683,
+ 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
+ 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
+ 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe,
+ 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7,
+ 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
+ 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
+ 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252,
+ 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
+ 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60,
+ 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79,
+ 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
+ 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f,
+ 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04,
+ 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
+ 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a,
+ 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
+ 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
+ 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21,
+ 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e,
+ 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
+ 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
+ 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45,
+ 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
+ 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db,
+ 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0,
+ 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
+ 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6,
+ 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf,
+ 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
+ 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d,
+};
+
+static const u32 crc32_slice8_table[] MAYBE_UNUSED = {
+ 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba,
+ 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3,
+ 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
+ 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91,
+ 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
+ 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
+ 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec,
+ 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5,
+ 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
+ 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
+ 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940,
+ 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
+ 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116,
+ 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f,
+ 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
+ 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d,
+ 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a,
+ 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
+ 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818,
+ 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
+ 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
+ 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457,
+ 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c,
+ 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
+ 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
+ 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb,
+ 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
+ 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9,
+ 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086,
+ 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
+ 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4,
+ 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad,
+ 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
+ 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683,
+ 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
+ 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
+ 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe,
+ 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7,
+ 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
+ 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
+ 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252,
+ 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
+ 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60,
+ 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79,
+ 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
+ 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f,
+ 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04,
+ 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
+ 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a,
+ 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
+ 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
+ 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21,
+ 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e,
+ 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
+ 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
+ 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45,
+ 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
+ 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db,
+ 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0,
+ 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
+ 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6,
+ 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf,
+ 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
+ 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d,
+ 0x00000000, 0x191b3141, 0x32366282, 0x2b2d53c3,
+ 0x646cc504, 0x7d77f445, 0x565aa786, 0x4f4196c7,
+ 0xc8d98a08, 0xd1c2bb49, 0xfaefe88a, 0xe3f4d9cb,
+ 0xacb54f0c, 0xb5ae7e4d, 0x9e832d8e, 0x87981ccf,
+ 0x4ac21251, 0x53d92310, 0x78f470d3, 0x61ef4192,
+ 0x2eaed755, 0x37b5e614, 0x1c98b5d7, 0x05838496,
+ 0x821b9859, 0x9b00a918, 0xb02dfadb, 0xa936cb9a,
+ 0xe6775d5d, 0xff6c6c1c, 0xd4413fdf, 0xcd5a0e9e,
+ 0x958424a2, 0x8c9f15e3, 0xa7b24620, 0xbea97761,
+ 0xf1e8e1a6, 0xe8f3d0e7, 0xc3de8324, 0xdac5b265,
+ 0x5d5daeaa, 0x44469feb, 0x6f6bcc28, 0x7670fd69,
+ 0x39316bae, 0x202a5aef, 0x0b07092c, 0x121c386d,
+ 0xdf4636f3, 0xc65d07b2, 0xed705471, 0xf46b6530,
+ 0xbb2af3f7, 0xa231c2b6, 0x891c9175, 0x9007a034,
+ 0x179fbcfb, 0x0e848dba, 0x25a9de79, 0x3cb2ef38,
+ 0x73f379ff, 0x6ae848be, 0x41c51b7d, 0x58de2a3c,
+ 0xf0794f05, 0xe9627e44, 0xc24f2d87, 0xdb541cc6,
+ 0x94158a01, 0x8d0ebb40, 0xa623e883, 0xbf38d9c2,
+ 0x38a0c50d, 0x21bbf44c, 0x0a96a78f, 0x138d96ce,
+ 0x5ccc0009, 0x45d73148, 0x6efa628b, 0x77e153ca,
+ 0xbabb5d54, 0xa3a06c15, 0x888d3fd6, 0x91960e97,
+ 0xded79850, 0xc7cca911, 0xece1fad2, 0xf5facb93,
+ 0x7262d75c, 0x6b79e61d, 0x4054b5de, 0x594f849f,
+ 0x160e1258, 0x0f152319, 0x243870da, 0x3d23419b,
+ 0x65fd6ba7, 0x7ce65ae6, 0x57cb0925, 0x4ed03864,
+ 0x0191aea3, 0x188a9fe2, 0x33a7cc21, 0x2abcfd60,
+ 0xad24e1af, 0xb43fd0ee, 0x9f12832d, 0x8609b26c,
+ 0xc94824ab, 0xd05315ea, 0xfb7e4629, 0xe2657768,
+ 0x2f3f79f6, 0x362448b7, 0x1d091b74, 0x04122a35,
+ 0x4b53bcf2, 0x52488db3, 0x7965de70, 0x607eef31,
+ 0xe7e6f3fe, 0xfefdc2bf, 0xd5d0917c, 0xcccba03d,
+ 0x838a36fa, 0x9a9107bb, 0xb1bc5478, 0xa8a76539,
+ 0x3b83984b, 0x2298a90a, 0x09b5fac9, 0x10aecb88,
+ 0x5fef5d4f, 0x46f46c0e, 0x6dd93fcd, 0x74c20e8c,
+ 0xf35a1243, 0xea412302, 0xc16c70c1, 0xd8774180,
+ 0x9736d747, 0x8e2de606, 0xa500b5c5, 0xbc1b8484,
+ 0x71418a1a, 0x685abb5b, 0x4377e898, 0x5a6cd9d9,
+ 0x152d4f1e, 0x0c367e5f, 0x271b2d9c, 0x3e001cdd,
+ 0xb9980012, 0xa0833153, 0x8bae6290, 0x92b553d1,
+ 0xddf4c516, 0xc4eff457, 0xefc2a794, 0xf6d996d5,
+ 0xae07bce9, 0xb71c8da8, 0x9c31de6b, 0x852aef2a,
+ 0xca6b79ed, 0xd37048ac, 0xf85d1b6f, 0xe1462a2e,
+ 0x66de36e1, 0x7fc507a0, 0x54e85463, 0x4df36522,
+ 0x02b2f3e5, 0x1ba9c2a4, 0x30849167, 0x299fa026,
+ 0xe4c5aeb8, 0xfdde9ff9, 0xd6f3cc3a, 0xcfe8fd7b,
+ 0x80a96bbc, 0x99b25afd, 0xb29f093e, 0xab84387f,
+ 0x2c1c24b0, 0x350715f1, 0x1e2a4632, 0x07317773,
+ 0x4870e1b4, 0x516bd0f5, 0x7a468336, 0x635db277,
+ 0xcbfad74e, 0xd2e1e60f, 0xf9ccb5cc, 0xe0d7848d,
+ 0xaf96124a, 0xb68d230b, 0x9da070c8, 0x84bb4189,
+ 0x03235d46, 0x1a386c07, 0x31153fc4, 0x280e0e85,
+ 0x674f9842, 0x7e54a903, 0x5579fac0, 0x4c62cb81,
+ 0x8138c51f, 0x9823f45e, 0xb30ea79d, 0xaa1596dc,
+ 0xe554001b, 0xfc4f315a, 0xd7626299, 0xce7953d8,
+ 0x49e14f17, 0x50fa7e56, 0x7bd72d95, 0x62cc1cd4,
+ 0x2d8d8a13, 0x3496bb52, 0x1fbbe891, 0x06a0d9d0,
+ 0x5e7ef3ec, 0x4765c2ad, 0x6c48916e, 0x7553a02f,
+ 0x3a1236e8, 0x230907a9, 0x0824546a, 0x113f652b,
+ 0x96a779e4, 0x8fbc48a5, 0xa4911b66, 0xbd8a2a27,
+ 0xf2cbbce0, 0xebd08da1, 0xc0fdde62, 0xd9e6ef23,
+ 0x14bce1bd, 0x0da7d0fc, 0x268a833f, 0x3f91b27e,
+ 0x70d024b9, 0x69cb15f8, 0x42e6463b, 0x5bfd777a,
+ 0xdc656bb5, 0xc57e5af4, 0xee530937, 0xf7483876,
+ 0xb809aeb1, 0xa1129ff0, 0x8a3fcc33, 0x9324fd72,
+ 0x00000000, 0x01c26a37, 0x0384d46e, 0x0246be59,
+ 0x0709a8dc, 0x06cbc2eb, 0x048d7cb2, 0x054f1685,
+ 0x0e1351b8, 0x0fd13b8f, 0x0d9785d6, 0x0c55efe1,
+ 0x091af964, 0x08d89353, 0x0a9e2d0a, 0x0b5c473d,
+ 0x1c26a370, 0x1de4c947, 0x1fa2771e, 0x1e601d29,
+ 0x1b2f0bac, 0x1aed619b, 0x18abdfc2, 0x1969b5f5,
+ 0x1235f2c8, 0x13f798ff, 0x11b126a6, 0x10734c91,
+ 0x153c5a14, 0x14fe3023, 0x16b88e7a, 0x177ae44d,
+ 0x384d46e0, 0x398f2cd7, 0x3bc9928e, 0x3a0bf8b9,
+ 0x3f44ee3c, 0x3e86840b, 0x3cc03a52, 0x3d025065,
+ 0x365e1758, 0x379c7d6f, 0x35dac336, 0x3418a901,
+ 0x3157bf84, 0x3095d5b3, 0x32d36bea, 0x331101dd,
+ 0x246be590, 0x25a98fa7, 0x27ef31fe, 0x262d5bc9,
+ 0x23624d4c, 0x22a0277b, 0x20e69922, 0x2124f315,
+ 0x2a78b428, 0x2bbade1f, 0x29fc6046, 0x283e0a71,
+ 0x2d711cf4, 0x2cb376c3, 0x2ef5c89a, 0x2f37a2ad,
+ 0x709a8dc0, 0x7158e7f7, 0x731e59ae, 0x72dc3399,
+ 0x7793251c, 0x76514f2b, 0x7417f172, 0x75d59b45,
+ 0x7e89dc78, 0x7f4bb64f, 0x7d0d0816, 0x7ccf6221,
+ 0x798074a4, 0x78421e93, 0x7a04a0ca, 0x7bc6cafd,
+ 0x6cbc2eb0, 0x6d7e4487, 0x6f38fade, 0x6efa90e9,
+ 0x6bb5866c, 0x6a77ec5b, 0x68315202, 0x69f33835,
+ 0x62af7f08, 0x636d153f, 0x612bab66, 0x60e9c151,
+ 0x65a6d7d4, 0x6464bde3, 0x662203ba, 0x67e0698d,
+ 0x48d7cb20, 0x4915a117, 0x4b531f4e, 0x4a917579,
+ 0x4fde63fc, 0x4e1c09cb, 0x4c5ab792, 0x4d98dda5,
+ 0x46c49a98, 0x4706f0af, 0x45404ef6, 0x448224c1,
+ 0x41cd3244, 0x400f5873, 0x4249e62a, 0x438b8c1d,
+ 0x54f16850, 0x55330267, 0x5775bc3e, 0x56b7d609,
+ 0x53f8c08c, 0x523aaabb, 0x507c14e2, 0x51be7ed5,
+ 0x5ae239e8, 0x5b2053df, 0x5966ed86, 0x58a487b1,
+ 0x5deb9134, 0x5c29fb03, 0x5e6f455a, 0x5fad2f6d,
+ 0xe1351b80, 0xe0f771b7, 0xe2b1cfee, 0xe373a5d9,
+ 0xe63cb35c, 0xe7fed96b, 0xe5b86732, 0xe47a0d05,
+ 0xef264a38, 0xeee4200f, 0xeca29e56, 0xed60f461,
+ 0xe82fe2e4, 0xe9ed88d3, 0xebab368a, 0xea695cbd,
+ 0xfd13b8f0, 0xfcd1d2c7, 0xfe976c9e, 0xff5506a9,
+ 0xfa1a102c, 0xfbd87a1b, 0xf99ec442, 0xf85cae75,
+ 0xf300e948, 0xf2c2837f, 0xf0843d26, 0xf1465711,
+ 0xf4094194, 0xf5cb2ba3, 0xf78d95fa, 0xf64fffcd,
+ 0xd9785d60, 0xd8ba3757, 0xdafc890e, 0xdb3ee339,
+ 0xde71f5bc, 0xdfb39f8b, 0xddf521d2, 0xdc374be5,
+ 0xd76b0cd8, 0xd6a966ef, 0xd4efd8b6, 0xd52db281,
+ 0xd062a404, 0xd1a0ce33, 0xd3e6706a, 0xd2241a5d,
+ 0xc55efe10, 0xc49c9427, 0xc6da2a7e, 0xc7184049,
+ 0xc25756cc, 0xc3953cfb, 0xc1d382a2, 0xc011e895,
+ 0xcb4dafa8, 0xca8fc59f, 0xc8c97bc6, 0xc90b11f1,
+ 0xcc440774, 0xcd866d43, 0xcfc0d31a, 0xce02b92d,
+ 0x91af9640, 0x906dfc77, 0x922b422e, 0x93e92819,
+ 0x96a63e9c, 0x976454ab, 0x9522eaf2, 0x94e080c5,
+ 0x9fbcc7f8, 0x9e7eadcf, 0x9c381396, 0x9dfa79a1,
+ 0x98b56f24, 0x99770513, 0x9b31bb4a, 0x9af3d17d,
+ 0x8d893530, 0x8c4b5f07, 0x8e0de15e, 0x8fcf8b69,
+ 0x8a809dec, 0x8b42f7db, 0x89044982, 0x88c623b5,
+ 0x839a6488, 0x82580ebf, 0x801eb0e6, 0x81dcdad1,
+ 0x8493cc54, 0x8551a663, 0x8717183a, 0x86d5720d,
+ 0xa9e2d0a0, 0xa820ba97, 0xaa6604ce, 0xaba46ef9,
+ 0xaeeb787c, 0xaf29124b, 0xad6fac12, 0xacadc625,
+ 0xa7f18118, 0xa633eb2f, 0xa4755576, 0xa5b73f41,
+ 0xa0f829c4, 0xa13a43f3, 0xa37cfdaa, 0xa2be979d,
+ 0xb5c473d0, 0xb40619e7, 0xb640a7be, 0xb782cd89,
+ 0xb2cddb0c, 0xb30fb13b, 0xb1490f62, 0xb08b6555,
+ 0xbbd72268, 0xba15485f, 0xb853f606, 0xb9919c31,
+ 0xbcde8ab4, 0xbd1ce083, 0xbf5a5eda, 0xbe9834ed,
+ 0x00000000, 0xb8bc6765, 0xaa09c88b, 0x12b5afee,
+ 0x8f629757, 0x37def032, 0x256b5fdc, 0x9dd738b9,
+ 0xc5b428ef, 0x7d084f8a, 0x6fbde064, 0xd7018701,
+ 0x4ad6bfb8, 0xf26ad8dd, 0xe0df7733, 0x58631056,
+ 0x5019579f, 0xe8a530fa, 0xfa109f14, 0x42acf871,
+ 0xdf7bc0c8, 0x67c7a7ad, 0x75720843, 0xcdce6f26,
+ 0x95ad7f70, 0x2d111815, 0x3fa4b7fb, 0x8718d09e,
+ 0x1acfe827, 0xa2738f42, 0xb0c620ac, 0x087a47c9,
+ 0xa032af3e, 0x188ec85b, 0x0a3b67b5, 0xb28700d0,
+ 0x2f503869, 0x97ec5f0c, 0x8559f0e2, 0x3de59787,
+ 0x658687d1, 0xdd3ae0b4, 0xcf8f4f5a, 0x7733283f,
+ 0xeae41086, 0x525877e3, 0x40edd80d, 0xf851bf68,
+ 0xf02bf8a1, 0x48979fc4, 0x5a22302a, 0xe29e574f,
+ 0x7f496ff6, 0xc7f50893, 0xd540a77d, 0x6dfcc018,
+ 0x359fd04e, 0x8d23b72b, 0x9f9618c5, 0x272a7fa0,
+ 0xbafd4719, 0x0241207c, 0x10f48f92, 0xa848e8f7,
+ 0x9b14583d, 0x23a83f58, 0x311d90b6, 0x89a1f7d3,
+ 0x1476cf6a, 0xaccaa80f, 0xbe7f07e1, 0x06c36084,
+ 0x5ea070d2, 0xe61c17b7, 0xf4a9b859, 0x4c15df3c,
+ 0xd1c2e785, 0x697e80e0, 0x7bcb2f0e, 0xc377486b,
+ 0xcb0d0fa2, 0x73b168c7, 0x6104c729, 0xd9b8a04c,
+ 0x446f98f5, 0xfcd3ff90, 0xee66507e, 0x56da371b,
+ 0x0eb9274d, 0xb6054028, 0xa4b0efc6, 0x1c0c88a3,
+ 0x81dbb01a, 0x3967d77f, 0x2bd27891, 0x936e1ff4,
+ 0x3b26f703, 0x839a9066, 0x912f3f88, 0x299358ed,
+ 0xb4446054, 0x0cf80731, 0x1e4da8df, 0xa6f1cfba,
+ 0xfe92dfec, 0x462eb889, 0x549b1767, 0xec277002,
+ 0x71f048bb, 0xc94c2fde, 0xdbf98030, 0x6345e755,
+ 0x6b3fa09c, 0xd383c7f9, 0xc1366817, 0x798a0f72,
+ 0xe45d37cb, 0x5ce150ae, 0x4e54ff40, 0xf6e89825,
+ 0xae8b8873, 0x1637ef16, 0x048240f8, 0xbc3e279d,
+ 0x21e91f24, 0x99557841, 0x8be0d7af, 0x335cb0ca,
+ 0xed59b63b, 0x55e5d15e, 0x47507eb0, 0xffec19d5,
+ 0x623b216c, 0xda874609, 0xc832e9e7, 0x708e8e82,
+ 0x28ed9ed4, 0x9051f9b1, 0x82e4565f, 0x3a58313a,
+ 0xa78f0983, 0x1f336ee6, 0x0d86c108, 0xb53aa66d,
+ 0xbd40e1a4, 0x05fc86c1, 0x1749292f, 0xaff54e4a,
+ 0x322276f3, 0x8a9e1196, 0x982bbe78, 0x2097d91d,
+ 0x78f4c94b, 0xc048ae2e, 0xd2fd01c0, 0x6a4166a5,
+ 0xf7965e1c, 0x4f2a3979, 0x5d9f9697, 0xe523f1f2,
+ 0x4d6b1905, 0xf5d77e60, 0xe762d18e, 0x5fdeb6eb,
+ 0xc2098e52, 0x7ab5e937, 0x680046d9, 0xd0bc21bc,
+ 0x88df31ea, 0x3063568f, 0x22d6f961, 0x9a6a9e04,
+ 0x07bda6bd, 0xbf01c1d8, 0xadb46e36, 0x15080953,
+ 0x1d724e9a, 0xa5ce29ff, 0xb77b8611, 0x0fc7e174,
+ 0x9210d9cd, 0x2aacbea8, 0x38191146, 0x80a57623,
+ 0xd8c66675, 0x607a0110, 0x72cfaefe, 0xca73c99b,
+ 0x57a4f122, 0xef189647, 0xfdad39a9, 0x45115ecc,
+ 0x764dee06, 0xcef18963, 0xdc44268d, 0x64f841e8,
+ 0xf92f7951, 0x41931e34, 0x5326b1da, 0xeb9ad6bf,
+ 0xb3f9c6e9, 0x0b45a18c, 0x19f00e62, 0xa14c6907,
+ 0x3c9b51be, 0x842736db, 0x96929935, 0x2e2efe50,
+ 0x2654b999, 0x9ee8defc, 0x8c5d7112, 0x34e11677,
+ 0xa9362ece, 0x118a49ab, 0x033fe645, 0xbb838120,
+ 0xe3e09176, 0x5b5cf613, 0x49e959fd, 0xf1553e98,
+ 0x6c820621, 0xd43e6144, 0xc68bceaa, 0x7e37a9cf,
+ 0xd67f4138, 0x6ec3265d, 0x7c7689b3, 0xc4caeed6,
+ 0x591dd66f, 0xe1a1b10a, 0xf3141ee4, 0x4ba87981,
+ 0x13cb69d7, 0xab770eb2, 0xb9c2a15c, 0x017ec639,
+ 0x9ca9fe80, 0x241599e5, 0x36a0360b, 0x8e1c516e,
+ 0x866616a7, 0x3eda71c2, 0x2c6fde2c, 0x94d3b949,
+ 0x090481f0, 0xb1b8e695, 0xa30d497b, 0x1bb12e1e,
+ 0x43d23e48, 0xfb6e592d, 0xe9dbf6c3, 0x516791a6,
+ 0xccb0a91f, 0x740cce7a, 0x66b96194, 0xde0506f1,
+ 0x00000000, 0x3d6029b0, 0x7ac05360, 0x47a07ad0,
+ 0xf580a6c0, 0xc8e08f70, 0x8f40f5a0, 0xb220dc10,
+ 0x30704bc1, 0x0d106271, 0x4ab018a1, 0x77d03111,
+ 0xc5f0ed01, 0xf890c4b1, 0xbf30be61, 0x825097d1,
+ 0x60e09782, 0x5d80be32, 0x1a20c4e2, 0x2740ed52,
+ 0x95603142, 0xa80018f2, 0xefa06222, 0xd2c04b92,
+ 0x5090dc43, 0x6df0f5f3, 0x2a508f23, 0x1730a693,
+ 0xa5107a83, 0x98705333, 0xdfd029e3, 0xe2b00053,
+ 0xc1c12f04, 0xfca106b4, 0xbb017c64, 0x866155d4,
+ 0x344189c4, 0x0921a074, 0x4e81daa4, 0x73e1f314,
+ 0xf1b164c5, 0xccd14d75, 0x8b7137a5, 0xb6111e15,
+ 0x0431c205, 0x3951ebb5, 0x7ef19165, 0x4391b8d5,
+ 0xa121b886, 0x9c419136, 0xdbe1ebe6, 0xe681c256,
+ 0x54a11e46, 0x69c137f6, 0x2e614d26, 0x13016496,
+ 0x9151f347, 0xac31daf7, 0xeb91a027, 0xd6f18997,
+ 0x64d15587, 0x59b17c37, 0x1e1106e7, 0x23712f57,
+ 0x58f35849, 0x659371f9, 0x22330b29, 0x1f532299,
+ 0xad73fe89, 0x9013d739, 0xd7b3ade9, 0xead38459,
+ 0x68831388, 0x55e33a38, 0x124340e8, 0x2f236958,
+ 0x9d03b548, 0xa0639cf8, 0xe7c3e628, 0xdaa3cf98,
+ 0x3813cfcb, 0x0573e67b, 0x42d39cab, 0x7fb3b51b,
+ 0xcd93690b, 0xf0f340bb, 0xb7533a6b, 0x8a3313db,
+ 0x0863840a, 0x3503adba, 0x72a3d76a, 0x4fc3feda,
+ 0xfde322ca, 0xc0830b7a, 0x872371aa, 0xba43581a,
+ 0x9932774d, 0xa4525efd, 0xe3f2242d, 0xde920d9d,
+ 0x6cb2d18d, 0x51d2f83d, 0x167282ed, 0x2b12ab5d,
+ 0xa9423c8c, 0x9422153c, 0xd3826fec, 0xeee2465c,
+ 0x5cc29a4c, 0x61a2b3fc, 0x2602c92c, 0x1b62e09c,
+ 0xf9d2e0cf, 0xc4b2c97f, 0x8312b3af, 0xbe729a1f,
+ 0x0c52460f, 0x31326fbf, 0x7692156f, 0x4bf23cdf,
+ 0xc9a2ab0e, 0xf4c282be, 0xb362f86e, 0x8e02d1de,
+ 0x3c220dce, 0x0142247e, 0x46e25eae, 0x7b82771e,
+ 0xb1e6b092, 0x8c869922, 0xcb26e3f2, 0xf646ca42,
+ 0x44661652, 0x79063fe2, 0x3ea64532, 0x03c66c82,
+ 0x8196fb53, 0xbcf6d2e3, 0xfb56a833, 0xc6368183,
+ 0x74165d93, 0x49767423, 0x0ed60ef3, 0x33b62743,
+ 0xd1062710, 0xec660ea0, 0xabc67470, 0x96a65dc0,
+ 0x248681d0, 0x19e6a860, 0x5e46d2b0, 0x6326fb00,
+ 0xe1766cd1, 0xdc164561, 0x9bb63fb1, 0xa6d61601,
+ 0x14f6ca11, 0x2996e3a1, 0x6e369971, 0x5356b0c1,
+ 0x70279f96, 0x4d47b626, 0x0ae7ccf6, 0x3787e546,
+ 0x85a73956, 0xb8c710e6, 0xff676a36, 0xc2074386,
+ 0x4057d457, 0x7d37fde7, 0x3a978737, 0x07f7ae87,
+ 0xb5d77297, 0x88b75b27, 0xcf1721f7, 0xf2770847,
+ 0x10c70814, 0x2da721a4, 0x6a075b74, 0x576772c4,
+ 0xe547aed4, 0xd8278764, 0x9f87fdb4, 0xa2e7d404,
+ 0x20b743d5, 0x1dd76a65, 0x5a7710b5, 0x67173905,
+ 0xd537e515, 0xe857cca5, 0xaff7b675, 0x92979fc5,
+ 0xe915e8db, 0xd475c16b, 0x93d5bbbb, 0xaeb5920b,
+ 0x1c954e1b, 0x21f567ab, 0x66551d7b, 0x5b3534cb,
+ 0xd965a31a, 0xe4058aaa, 0xa3a5f07a, 0x9ec5d9ca,
+ 0x2ce505da, 0x11852c6a, 0x562556ba, 0x6b457f0a,
+ 0x89f57f59, 0xb49556e9, 0xf3352c39, 0xce550589,
+ 0x7c75d999, 0x4115f029, 0x06b58af9, 0x3bd5a349,
+ 0xb9853498, 0x84e51d28, 0xc34567f8, 0xfe254e48,
+ 0x4c059258, 0x7165bbe8, 0x36c5c138, 0x0ba5e888,
+ 0x28d4c7df, 0x15b4ee6f, 0x521494bf, 0x6f74bd0f,
+ 0xdd54611f, 0xe03448af, 0xa794327f, 0x9af41bcf,
+ 0x18a48c1e, 0x25c4a5ae, 0x6264df7e, 0x5f04f6ce,
+ 0xed242ade, 0xd044036e, 0x97e479be, 0xaa84500e,
+ 0x4834505d, 0x755479ed, 0x32f4033d, 0x0f942a8d,
+ 0xbdb4f69d, 0x80d4df2d, 0xc774a5fd, 0xfa148c4d,
+ 0x78441b9c, 0x4524322c, 0x028448fc, 0x3fe4614c,
+ 0x8dc4bd5c, 0xb0a494ec, 0xf704ee3c, 0xca64c78c,
+ 0x00000000, 0xcb5cd3a5, 0x4dc8a10b, 0x869472ae,
+ 0x9b914216, 0x50cd91b3, 0xd659e31d, 0x1d0530b8,
+ 0xec53826d, 0x270f51c8, 0xa19b2366, 0x6ac7f0c3,
+ 0x77c2c07b, 0xbc9e13de, 0x3a0a6170, 0xf156b2d5,
+ 0x03d6029b, 0xc88ad13e, 0x4e1ea390, 0x85427035,
+ 0x9847408d, 0x531b9328, 0xd58fe186, 0x1ed33223,
+ 0xef8580f6, 0x24d95353, 0xa24d21fd, 0x6911f258,
+ 0x7414c2e0, 0xbf481145, 0x39dc63eb, 0xf280b04e,
+ 0x07ac0536, 0xccf0d693, 0x4a64a43d, 0x81387798,
+ 0x9c3d4720, 0x57619485, 0xd1f5e62b, 0x1aa9358e,
+ 0xebff875b, 0x20a354fe, 0xa6372650, 0x6d6bf5f5,
+ 0x706ec54d, 0xbb3216e8, 0x3da66446, 0xf6fab7e3,
+ 0x047a07ad, 0xcf26d408, 0x49b2a6a6, 0x82ee7503,
+ 0x9feb45bb, 0x54b7961e, 0xd223e4b0, 0x197f3715,
+ 0xe82985c0, 0x23755665, 0xa5e124cb, 0x6ebdf76e,
+ 0x73b8c7d6, 0xb8e41473, 0x3e7066dd, 0xf52cb578,
+ 0x0f580a6c, 0xc404d9c9, 0x4290ab67, 0x89cc78c2,
+ 0x94c9487a, 0x5f959bdf, 0xd901e971, 0x125d3ad4,
+ 0xe30b8801, 0x28575ba4, 0xaec3290a, 0x659ffaaf,
+ 0x789aca17, 0xb3c619b2, 0x35526b1c, 0xfe0eb8b9,
+ 0x0c8e08f7, 0xc7d2db52, 0x4146a9fc, 0x8a1a7a59,
+ 0x971f4ae1, 0x5c439944, 0xdad7ebea, 0x118b384f,
+ 0xe0dd8a9a, 0x2b81593f, 0xad152b91, 0x6649f834,
+ 0x7b4cc88c, 0xb0101b29, 0x36846987, 0xfdd8ba22,
+ 0x08f40f5a, 0xc3a8dcff, 0x453cae51, 0x8e607df4,
+ 0x93654d4c, 0x58399ee9, 0xdeadec47, 0x15f13fe2,
+ 0xe4a78d37, 0x2ffb5e92, 0xa96f2c3c, 0x6233ff99,
+ 0x7f36cf21, 0xb46a1c84, 0x32fe6e2a, 0xf9a2bd8f,
+ 0x0b220dc1, 0xc07ede64, 0x46eaacca, 0x8db67f6f,
+ 0x90b34fd7, 0x5bef9c72, 0xdd7beedc, 0x16273d79,
+ 0xe7718fac, 0x2c2d5c09, 0xaab92ea7, 0x61e5fd02,
+ 0x7ce0cdba, 0xb7bc1e1f, 0x31286cb1, 0xfa74bf14,
+ 0x1eb014d8, 0xd5ecc77d, 0x5378b5d3, 0x98246676,
+ 0x852156ce, 0x4e7d856b, 0xc8e9f7c5, 0x03b52460,
+ 0xf2e396b5, 0x39bf4510, 0xbf2b37be, 0x7477e41b,
+ 0x6972d4a3, 0xa22e0706, 0x24ba75a8, 0xefe6a60d,
+ 0x1d661643, 0xd63ac5e6, 0x50aeb748, 0x9bf264ed,
+ 0x86f75455, 0x4dab87f0, 0xcb3ff55e, 0x006326fb,
+ 0xf135942e, 0x3a69478b, 0xbcfd3525, 0x77a1e680,
+ 0x6aa4d638, 0xa1f8059d, 0x276c7733, 0xec30a496,
+ 0x191c11ee, 0xd240c24b, 0x54d4b0e5, 0x9f886340,
+ 0x828d53f8, 0x49d1805d, 0xcf45f2f3, 0x04192156,
+ 0xf54f9383, 0x3e134026, 0xb8873288, 0x73dbe12d,
+ 0x6eded195, 0xa5820230, 0x2316709e, 0xe84aa33b,
+ 0x1aca1375, 0xd196c0d0, 0x5702b27e, 0x9c5e61db,
+ 0x815b5163, 0x4a0782c6, 0xcc93f068, 0x07cf23cd,
+ 0xf6999118, 0x3dc542bd, 0xbb513013, 0x700de3b6,
+ 0x6d08d30e, 0xa65400ab, 0x20c07205, 0xeb9ca1a0,
+ 0x11e81eb4, 0xdab4cd11, 0x5c20bfbf, 0x977c6c1a,
+ 0x8a795ca2, 0x41258f07, 0xc7b1fda9, 0x0ced2e0c,
+ 0xfdbb9cd9, 0x36e74f7c, 0xb0733dd2, 0x7b2fee77,
+ 0x662adecf, 0xad760d6a, 0x2be27fc4, 0xe0beac61,
+ 0x123e1c2f, 0xd962cf8a, 0x5ff6bd24, 0x94aa6e81,
+ 0x89af5e39, 0x42f38d9c, 0xc467ff32, 0x0f3b2c97,
+ 0xfe6d9e42, 0x35314de7, 0xb3a53f49, 0x78f9ecec,
+ 0x65fcdc54, 0xaea00ff1, 0x28347d5f, 0xe368aefa,
+ 0x16441b82, 0xdd18c827, 0x5b8cba89, 0x90d0692c,
+ 0x8dd55994, 0x46898a31, 0xc01df89f, 0x0b412b3a,
+ 0xfa1799ef, 0x314b4a4a, 0xb7df38e4, 0x7c83eb41,
+ 0x6186dbf9, 0xaada085c, 0x2c4e7af2, 0xe712a957,
+ 0x15921919, 0xdececabc, 0x585ab812, 0x93066bb7,
+ 0x8e035b0f, 0x455f88aa, 0xc3cbfa04, 0x089729a1,
+ 0xf9c19b74, 0x329d48d1, 0xb4093a7f, 0x7f55e9da,
+ 0x6250d962, 0xa90c0ac7, 0x2f987869, 0xe4c4abcc,
+ 0x00000000, 0xa6770bb4, 0x979f1129, 0x31e81a9d,
+ 0xf44f2413, 0x52382fa7, 0x63d0353a, 0xc5a73e8e,
+ 0x33ef4e67, 0x959845d3, 0xa4705f4e, 0x020754fa,
+ 0xc7a06a74, 0x61d761c0, 0x503f7b5d, 0xf64870e9,
+ 0x67de9cce, 0xc1a9977a, 0xf0418de7, 0x56368653,
+ 0x9391b8dd, 0x35e6b369, 0x040ea9f4, 0xa279a240,
+ 0x5431d2a9, 0xf246d91d, 0xc3aec380, 0x65d9c834,
+ 0xa07ef6ba, 0x0609fd0e, 0x37e1e793, 0x9196ec27,
+ 0xcfbd399c, 0x69ca3228, 0x582228b5, 0xfe552301,
+ 0x3bf21d8f, 0x9d85163b, 0xac6d0ca6, 0x0a1a0712,
+ 0xfc5277fb, 0x5a257c4f, 0x6bcd66d2, 0xcdba6d66,
+ 0x081d53e8, 0xae6a585c, 0x9f8242c1, 0x39f54975,
+ 0xa863a552, 0x0e14aee6, 0x3ffcb47b, 0x998bbfcf,
+ 0x5c2c8141, 0xfa5b8af5, 0xcbb39068, 0x6dc49bdc,
+ 0x9b8ceb35, 0x3dfbe081, 0x0c13fa1c, 0xaa64f1a8,
+ 0x6fc3cf26, 0xc9b4c492, 0xf85cde0f, 0x5e2bd5bb,
+ 0x440b7579, 0xe27c7ecd, 0xd3946450, 0x75e36fe4,
+ 0xb044516a, 0x16335ade, 0x27db4043, 0x81ac4bf7,
+ 0x77e43b1e, 0xd19330aa, 0xe07b2a37, 0x460c2183,
+ 0x83ab1f0d, 0x25dc14b9, 0x14340e24, 0xb2430590,
+ 0x23d5e9b7, 0x85a2e203, 0xb44af89e, 0x123df32a,
+ 0xd79acda4, 0x71edc610, 0x4005dc8d, 0xe672d739,
+ 0x103aa7d0, 0xb64dac64, 0x87a5b6f9, 0x21d2bd4d,
+ 0xe47583c3, 0x42028877, 0x73ea92ea, 0xd59d995e,
+ 0x8bb64ce5, 0x2dc14751, 0x1c295dcc, 0xba5e5678,
+ 0x7ff968f6, 0xd98e6342, 0xe86679df, 0x4e11726b,
+ 0xb8590282, 0x1e2e0936, 0x2fc613ab, 0x89b1181f,
+ 0x4c162691, 0xea612d25, 0xdb8937b8, 0x7dfe3c0c,
+ 0xec68d02b, 0x4a1fdb9f, 0x7bf7c102, 0xdd80cab6,
+ 0x1827f438, 0xbe50ff8c, 0x8fb8e511, 0x29cfeea5,
+ 0xdf879e4c, 0x79f095f8, 0x48188f65, 0xee6f84d1,
+ 0x2bc8ba5f, 0x8dbfb1eb, 0xbc57ab76, 0x1a20a0c2,
+ 0x8816eaf2, 0x2e61e146, 0x1f89fbdb, 0xb9fef06f,
+ 0x7c59cee1, 0xda2ec555, 0xebc6dfc8, 0x4db1d47c,
+ 0xbbf9a495, 0x1d8eaf21, 0x2c66b5bc, 0x8a11be08,
+ 0x4fb68086, 0xe9c18b32, 0xd82991af, 0x7e5e9a1b,
+ 0xefc8763c, 0x49bf7d88, 0x78576715, 0xde206ca1,
+ 0x1b87522f, 0xbdf0599b, 0x8c184306, 0x2a6f48b2,
+ 0xdc27385b, 0x7a5033ef, 0x4bb82972, 0xedcf22c6,
+ 0x28681c48, 0x8e1f17fc, 0xbff70d61, 0x198006d5,
+ 0x47abd36e, 0xe1dcd8da, 0xd034c247, 0x7643c9f3,
+ 0xb3e4f77d, 0x1593fcc9, 0x247be654, 0x820cede0,
+ 0x74449d09, 0xd23396bd, 0xe3db8c20, 0x45ac8794,
+ 0x800bb91a, 0x267cb2ae, 0x1794a833, 0xb1e3a387,
+ 0x20754fa0, 0x86024414, 0xb7ea5e89, 0x119d553d,
+ 0xd43a6bb3, 0x724d6007, 0x43a57a9a, 0xe5d2712e,
+ 0x139a01c7, 0xb5ed0a73, 0x840510ee, 0x22721b5a,
+ 0xe7d525d4, 0x41a22e60, 0x704a34fd, 0xd63d3f49,
+ 0xcc1d9f8b, 0x6a6a943f, 0x5b828ea2, 0xfdf58516,
+ 0x3852bb98, 0x9e25b02c, 0xafcdaab1, 0x09baa105,
+ 0xfff2d1ec, 0x5985da58, 0x686dc0c5, 0xce1acb71,
+ 0x0bbdf5ff, 0xadcafe4b, 0x9c22e4d6, 0x3a55ef62,
+ 0xabc30345, 0x0db408f1, 0x3c5c126c, 0x9a2b19d8,
+ 0x5f8c2756, 0xf9fb2ce2, 0xc813367f, 0x6e643dcb,
+ 0x982c4d22, 0x3e5b4696, 0x0fb35c0b, 0xa9c457bf,
+ 0x6c636931, 0xca146285, 0xfbfc7818, 0x5d8b73ac,
+ 0x03a0a617, 0xa5d7ada3, 0x943fb73e, 0x3248bc8a,
+ 0xf7ef8204, 0x519889b0, 0x6070932d, 0xc6079899,
+ 0x304fe870, 0x9638e3c4, 0xa7d0f959, 0x01a7f2ed,
+ 0xc400cc63, 0x6277c7d7, 0x539fdd4a, 0xf5e8d6fe,
+ 0x647e3ad9, 0xc209316d, 0xf3e12bf0, 0x55962044,
+ 0x90311eca, 0x3646157e, 0x07ae0fe3, 0xa1d90457,
+ 0x579174be, 0xf1e67f0a, 0xc00e6597, 0x66796e23,
+ 0xa3de50ad, 0x05a95b19, 0x34414184, 0x92364a30,
+ 0x00000000, 0xccaa009e, 0x4225077d, 0x8e8f07e3,
+ 0x844a0efa, 0x48e00e64, 0xc66f0987, 0x0ac50919,
+ 0xd3e51bb5, 0x1f4f1b2b, 0x91c01cc8, 0x5d6a1c56,
+ 0x57af154f, 0x9b0515d1, 0x158a1232, 0xd92012ac,
+ 0x7cbb312b, 0xb01131b5, 0x3e9e3656, 0xf23436c8,
+ 0xf8f13fd1, 0x345b3f4f, 0xbad438ac, 0x767e3832,
+ 0xaf5e2a9e, 0x63f42a00, 0xed7b2de3, 0x21d12d7d,
+ 0x2b142464, 0xe7be24fa, 0x69312319, 0xa59b2387,
+ 0xf9766256, 0x35dc62c8, 0xbb53652b, 0x77f965b5,
+ 0x7d3c6cac, 0xb1966c32, 0x3f196bd1, 0xf3b36b4f,
+ 0x2a9379e3, 0xe639797d, 0x68b67e9e, 0xa41c7e00,
+ 0xaed97719, 0x62737787, 0xecfc7064, 0x205670fa,
+ 0x85cd537d, 0x496753e3, 0xc7e85400, 0x0b42549e,
+ 0x01875d87, 0xcd2d5d19, 0x43a25afa, 0x8f085a64,
+ 0x562848c8, 0x9a824856, 0x140d4fb5, 0xd8a74f2b,
+ 0xd2624632, 0x1ec846ac, 0x9047414f, 0x5ced41d1,
+ 0x299dc2ed, 0xe537c273, 0x6bb8c590, 0xa712c50e,
+ 0xadd7cc17, 0x617dcc89, 0xeff2cb6a, 0x2358cbf4,
+ 0xfa78d958, 0x36d2d9c6, 0xb85dde25, 0x74f7debb,
+ 0x7e32d7a2, 0xb298d73c, 0x3c17d0df, 0xf0bdd041,
+ 0x5526f3c6, 0x998cf358, 0x1703f4bb, 0xdba9f425,
+ 0xd16cfd3c, 0x1dc6fda2, 0x9349fa41, 0x5fe3fadf,
+ 0x86c3e873, 0x4a69e8ed, 0xc4e6ef0e, 0x084cef90,
+ 0x0289e689, 0xce23e617, 0x40ace1f4, 0x8c06e16a,
+ 0xd0eba0bb, 0x1c41a025, 0x92cea7c6, 0x5e64a758,
+ 0x54a1ae41, 0x980baedf, 0x1684a93c, 0xda2ea9a2,
+ 0x030ebb0e, 0xcfa4bb90, 0x412bbc73, 0x8d81bced,
+ 0x8744b5f4, 0x4beeb56a, 0xc561b289, 0x09cbb217,
+ 0xac509190, 0x60fa910e, 0xee7596ed, 0x22df9673,
+ 0x281a9f6a, 0xe4b09ff4, 0x6a3f9817, 0xa6959889,
+ 0x7fb58a25, 0xb31f8abb, 0x3d908d58, 0xf13a8dc6,
+ 0xfbff84df, 0x37558441, 0xb9da83a2, 0x7570833c,
+ 0x533b85da, 0x9f918544, 0x111e82a7, 0xddb48239,
+ 0xd7718b20, 0x1bdb8bbe, 0x95548c5d, 0x59fe8cc3,
+ 0x80de9e6f, 0x4c749ef1, 0xc2fb9912, 0x0e51998c,
+ 0x04949095, 0xc83e900b, 0x46b197e8, 0x8a1b9776,
+ 0x2f80b4f1, 0xe32ab46f, 0x6da5b38c, 0xa10fb312,
+ 0xabcaba0b, 0x6760ba95, 0xe9efbd76, 0x2545bde8,
+ 0xfc65af44, 0x30cfafda, 0xbe40a839, 0x72eaa8a7,
+ 0x782fa1be, 0xb485a120, 0x3a0aa6c3, 0xf6a0a65d,
+ 0xaa4de78c, 0x66e7e712, 0xe868e0f1, 0x24c2e06f,
+ 0x2e07e976, 0xe2ade9e8, 0x6c22ee0b, 0xa088ee95,
+ 0x79a8fc39, 0xb502fca7, 0x3b8dfb44, 0xf727fbda,
+ 0xfde2f2c3, 0x3148f25d, 0xbfc7f5be, 0x736df520,
+ 0xd6f6d6a7, 0x1a5cd639, 0x94d3d1da, 0x5879d144,
+ 0x52bcd85d, 0x9e16d8c3, 0x1099df20, 0xdc33dfbe,
+ 0x0513cd12, 0xc9b9cd8c, 0x4736ca6f, 0x8b9ccaf1,
+ 0x8159c3e8, 0x4df3c376, 0xc37cc495, 0x0fd6c40b,
+ 0x7aa64737, 0xb60c47a9, 0x3883404a, 0xf42940d4,
+ 0xfeec49cd, 0x32464953, 0xbcc94eb0, 0x70634e2e,
+ 0xa9435c82, 0x65e95c1c, 0xeb665bff, 0x27cc5b61,
+ 0x2d095278, 0xe1a352e6, 0x6f2c5505, 0xa386559b,
+ 0x061d761c, 0xcab77682, 0x44387161, 0x889271ff,
+ 0x825778e6, 0x4efd7878, 0xc0727f9b, 0x0cd87f05,
+ 0xd5f86da9, 0x19526d37, 0x97dd6ad4, 0x5b776a4a,
+ 0x51b26353, 0x9d1863cd, 0x1397642e, 0xdf3d64b0,
+ 0x83d02561, 0x4f7a25ff, 0xc1f5221c, 0x0d5f2282,
+ 0x079a2b9b, 0xcb302b05, 0x45bf2ce6, 0x89152c78,
+ 0x50353ed4, 0x9c9f3e4a, 0x121039a9, 0xdeba3937,
+ 0xd47f302e, 0x18d530b0, 0x965a3753, 0x5af037cd,
+ 0xff6b144a, 0x33c114d4, 0xbd4e1337, 0x71e413a9,
+ 0x7b211ab0, 0xb78b1a2e, 0x39041dcd, 0xf5ae1d53,
+ 0x2c8e0fff, 0xe0240f61, 0x6eab0882, 0xa201081c,
+ 0xa8c40105, 0x646e019b, 0xeae10678, 0x264b06e6,
+};
diff --git a/tools/z64compress/src/enc/libdeflate/lib/decompress_template.h b/tools/z64compress/src/enc/libdeflate/lib/decompress_template.h
new file mode 100644
index 000000000..2d9dfa82b
--- /dev/null
+++ b/tools/z64compress/src/enc/libdeflate/lib/decompress_template.h
@@ -0,0 +1,774 @@
+/*
+ * decompress_template.h
+ *
+ * Copyright 2016 Eric Biggers
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * This is the actual DEFLATE decompression routine, lifted out of
+ * deflate_decompress.c so that it can be compiled multiple times with different
+ * target instruction sets.
+ */
+
+#ifndef ATTRIBUTES
+# define ATTRIBUTES
+#endif
+#ifndef EXTRACT_VARBITS
+# define EXTRACT_VARBITS(word, count) ((word) & BITMASK(count))
+#endif
+#ifndef EXTRACT_VARBITS8
+# define EXTRACT_VARBITS8(word, count) ((word) & BITMASK((u8)(count)))
+#endif
+
+static enum libdeflate_result ATTRIBUTES MAYBE_UNUSED
+FUNCNAME(struct libdeflate_decompressor * restrict d,
+ const void * restrict in, size_t in_nbytes,
+ void * restrict out, size_t out_nbytes_avail,
+ size_t *actual_in_nbytes_ret, size_t *actual_out_nbytes_ret)
+{
+ u8 *out_next = out;
+ u8 * const out_end = out_next + out_nbytes_avail;
+ u8 * const out_fastloop_end =
+ out_end - MIN(out_nbytes_avail, FASTLOOP_MAX_BYTES_WRITTEN);
+
+ /* Input bitstream state; see deflate_decompress.c for documentation */
+ const u8 *in_next = in;
+ const u8 * const in_end = in_next + in_nbytes;
+ const u8 * const in_fastloop_end =
+ in_end - MIN(in_nbytes, FASTLOOP_MAX_BYTES_READ);
+ bitbuf_t bitbuf = 0;
+ bitbuf_t saved_bitbuf;
+ u32 bitsleft = 0;
+ size_t overread_count = 0;
+
+ bool is_final_block;
+ unsigned block_type;
+ unsigned num_litlen_syms;
+ unsigned num_offset_syms;
+ bitbuf_t litlen_tablemask;
+ u32 entry;
+
+next_block:
+ /* Starting to read the next block */
+ ;
+
+ STATIC_ASSERT(CAN_CONSUME(1 + 2 + 5 + 5 + 4 + 3));
+ REFILL_BITS();
+
+ /* BFINAL: 1 bit */
+ is_final_block = bitbuf & BITMASK(1);
+
+ /* BTYPE: 2 bits */
+ block_type = (bitbuf >> 1) & BITMASK(2);
+
+ if (block_type == DEFLATE_BLOCKTYPE_DYNAMIC_HUFFMAN) {
+
+ /* Dynamic Huffman block */
+
+ /* The order in which precode lengths are stored */
+ static const u8 deflate_precode_lens_permutation[DEFLATE_NUM_PRECODE_SYMS] = {
+ 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
+ };
+
+ unsigned num_explicit_precode_lens;
+ unsigned i;
+
+ /* Read the codeword length counts. */
+
+ STATIC_ASSERT(DEFLATE_NUM_LITLEN_SYMS == 257 + BITMASK(5));
+ num_litlen_syms = 257 + ((bitbuf >> 3) & BITMASK(5));
+
+ STATIC_ASSERT(DEFLATE_NUM_OFFSET_SYMS == 1 + BITMASK(5));
+ num_offset_syms = 1 + ((bitbuf >> 8) & BITMASK(5));
+
+ STATIC_ASSERT(DEFLATE_NUM_PRECODE_SYMS == 4 + BITMASK(4));
+ num_explicit_precode_lens = 4 + ((bitbuf >> 13) & BITMASK(4));
+
+ d->static_codes_loaded = false;
+
+ /*
+ * Read the precode codeword lengths.
+ *
+ * A 64-bit bitbuffer is just one bit too small to hold the
+ * maximum number of precode lens, so to minimize branches we
+ * merge one len with the previous fields.
+ */
+ STATIC_ASSERT(DEFLATE_MAX_PRE_CODEWORD_LEN == (1 << 3) - 1);
+ if (CAN_CONSUME(3 * (DEFLATE_NUM_PRECODE_SYMS - 1))) {
+ d->u.precode_lens[deflate_precode_lens_permutation[0]] =
+ (bitbuf >> 17) & BITMASK(3);
+ bitbuf >>= 20;
+ bitsleft -= 20;
+ REFILL_BITS();
+ i = 1;
+ do {
+ d->u.precode_lens[deflate_precode_lens_permutation[i]] =
+ bitbuf & BITMASK(3);
+ bitbuf >>= 3;
+ bitsleft -= 3;
+ } while (++i < num_explicit_precode_lens);
+ } else {
+ bitbuf >>= 17;
+ bitsleft -= 17;
+ i = 0;
+ do {
+ if ((u8)bitsleft < 3)
+ REFILL_BITS();
+ d->u.precode_lens[deflate_precode_lens_permutation[i]] =
+ bitbuf & BITMASK(3);
+ bitbuf >>= 3;
+ bitsleft -= 3;
+ } while (++i < num_explicit_precode_lens);
+ }
+ for (; i < DEFLATE_NUM_PRECODE_SYMS; i++)
+ d->u.precode_lens[deflate_precode_lens_permutation[i]] = 0;
+
+ /* Build the decode table for the precode. */
+ SAFETY_CHECK(build_precode_decode_table(d));
+
+ /* Decode the litlen and offset codeword lengths. */
+ i = 0;
+ do {
+ unsigned presym;
+ u8 rep_val;
+ unsigned rep_count;
+
+ if ((u8)bitsleft < DEFLATE_MAX_PRE_CODEWORD_LEN + 7)
+ REFILL_BITS();
+
+ /*
+ * The code below assumes that the precode decode table
+ * doesn't have any subtables.
+ */
+ STATIC_ASSERT(PRECODE_TABLEBITS == DEFLATE_MAX_PRE_CODEWORD_LEN);
+
+ /* Decode the next precode symbol. */
+ entry = d->u.l.precode_decode_table[
+ bitbuf & BITMASK(DEFLATE_MAX_PRE_CODEWORD_LEN)];
+ bitbuf >>= (u8)entry;
+ bitsleft -= entry; /* optimization: subtract full entry */
+ presym = entry >> 16;
+
+ if (presym < 16) {
+ /* Explicit codeword length */
+ d->u.l.lens[i++] = presym;
+ continue;
+ }
+
+ /* Run-length encoded codeword lengths */
+
+ /*
+ * Note: we don't need verify that the repeat count
+ * doesn't overflow the number of elements, since we've
+ * sized the lens array to have enough extra space to
+ * allow for the worst-case overrun (138 zeroes when
+ * only 1 length was remaining).
+ *
+ * In the case of the small repeat counts (presyms 16
+ * and 17), it is fastest to always write the maximum
+ * number of entries. That gets rid of branches that
+ * would otherwise be required.
+ *
+ * It is not just because of the numerical order that
+ * our checks go in the order 'presym < 16', 'presym ==
+ * 16', and 'presym == 17'. For typical data this is
+ * ordered from most frequent to least frequent case.
+ */
+ STATIC_ASSERT(DEFLATE_MAX_LENS_OVERRUN == 138 - 1);
+
+ if (presym == 16) {
+ /* Repeat the previous length 3 - 6 times. */
+ SAFETY_CHECK(i != 0);
+ rep_val = d->u.l.lens[i - 1];
+ STATIC_ASSERT(3 + BITMASK(2) == 6);
+ rep_count = 3 + (bitbuf & BITMASK(2));
+ bitbuf >>= 2;
+ bitsleft -= 2;
+ d->u.l.lens[i + 0] = rep_val;
+ d->u.l.lens[i + 1] = rep_val;
+ d->u.l.lens[i + 2] = rep_val;
+ d->u.l.lens[i + 3] = rep_val;
+ d->u.l.lens[i + 4] = rep_val;
+ d->u.l.lens[i + 5] = rep_val;
+ i += rep_count;
+ } else if (presym == 17) {
+ /* Repeat zero 3 - 10 times. */
+ STATIC_ASSERT(3 + BITMASK(3) == 10);
+ rep_count = 3 + (bitbuf & BITMASK(3));
+ bitbuf >>= 3;
+ bitsleft -= 3;
+ d->u.l.lens[i + 0] = 0;
+ d->u.l.lens[i + 1] = 0;
+ d->u.l.lens[i + 2] = 0;
+ d->u.l.lens[i + 3] = 0;
+ d->u.l.lens[i + 4] = 0;
+ d->u.l.lens[i + 5] = 0;
+ d->u.l.lens[i + 6] = 0;
+ d->u.l.lens[i + 7] = 0;
+ d->u.l.lens[i + 8] = 0;
+ d->u.l.lens[i + 9] = 0;
+ i += rep_count;
+ } else {
+ /* Repeat zero 11 - 138 times. */
+ STATIC_ASSERT(11 + BITMASK(7) == 138);
+ rep_count = 11 + (bitbuf & BITMASK(7));
+ bitbuf >>= 7;
+ bitsleft -= 7;
+ memset(&d->u.l.lens[i], 0,
+ rep_count * sizeof(d->u.l.lens[i]));
+ i += rep_count;
+ }
+ } while (i < num_litlen_syms + num_offset_syms);
+
+ } else if (block_type == DEFLATE_BLOCKTYPE_UNCOMPRESSED) {
+ u16 len, nlen;
+
+ /*
+ * Uncompressed block: copy 'len' bytes literally from the input
+ * buffer to the output buffer.
+ */
+
+ bitsleft -= 3; /* for BTYPE and BFINAL */
+
+ /*
+ * Align the bitstream to the next byte boundary. This means
+ * the next byte boundary as if we were reading a byte at a
+ * time. Therefore, we have to rewind 'in_next' by any bytes
+ * that have been refilled but not actually consumed yet (not
+ * counting overread bytes, which don't increment 'in_next').
+ */
+ bitsleft = (u8)bitsleft;
+ SAFETY_CHECK(overread_count <= (bitsleft >> 3));
+ in_next -= (bitsleft >> 3) - overread_count;
+ overread_count = 0;
+ bitbuf = 0;
+ bitsleft = 0;
+
+ SAFETY_CHECK(in_end - in_next >= 4);
+ len = get_unaligned_le16(in_next);
+ nlen = get_unaligned_le16(in_next + 2);
+ in_next += 4;
+
+ SAFETY_CHECK(len == (u16)~nlen);
+ if (unlikely(len > out_end - out_next))
+ return LIBDEFLATE_INSUFFICIENT_SPACE;
+ SAFETY_CHECK(len <= in_end - in_next);
+
+ memcpy(out_next, in_next, len);
+ in_next += len;
+ out_next += len;
+
+ goto block_done;
+
+ } else {
+ unsigned i;
+
+ SAFETY_CHECK(block_type == DEFLATE_BLOCKTYPE_STATIC_HUFFMAN);
+
+ /*
+ * Static Huffman block: build the decode tables for the static
+ * codes. Skip doing so if the tables are already set up from
+ * an earlier static block; this speeds up decompression of
+ * degenerate input of many empty or very short static blocks.
+ *
+ * Afterwards, the remainder is the same as decompressing a
+ * dynamic Huffman block.
+ */
+
+ bitbuf >>= 3; /* for BTYPE and BFINAL */
+ bitsleft -= 3;
+
+ if (d->static_codes_loaded)
+ goto have_decode_tables;
+
+ d->static_codes_loaded = true;
+
+ STATIC_ASSERT(DEFLATE_NUM_LITLEN_SYMS == 288);
+ STATIC_ASSERT(DEFLATE_NUM_OFFSET_SYMS == 32);
+
+ for (i = 0; i < 144; i++)
+ d->u.l.lens[i] = 8;
+ for (; i < 256; i++)
+ d->u.l.lens[i] = 9;
+ for (; i < 280; i++)
+ d->u.l.lens[i] = 7;
+ for (; i < 288; i++)
+ d->u.l.lens[i] = 8;
+
+ for (; i < 288 + 32; i++)
+ d->u.l.lens[i] = 5;
+
+ num_litlen_syms = 288;
+ num_offset_syms = 32;
+ }
+
+ /* Decompressing a Huffman block (either dynamic or static) */
+
+ SAFETY_CHECK(build_offset_decode_table(d, num_litlen_syms, num_offset_syms));
+ SAFETY_CHECK(build_litlen_decode_table(d, num_litlen_syms, num_offset_syms));
+have_decode_tables:
+ litlen_tablemask = BITMASK(d->litlen_tablebits);
+
+ /*
+ * This is the "fastloop" for decoding literals and matches. It does
+ * bounds checks on in_next and out_next in the loop conditions so that
+ * additional bounds checks aren't needed inside the loop body.
+ *
+ * To reduce latency, the bitbuffer is refilled and the next litlen
+ * decode table entry is preloaded before each loop iteration.
+ */
+ if (in_next >= in_fastloop_end || out_next >= out_fastloop_end)
+ goto generic_loop;
+ REFILL_BITS_IN_FASTLOOP();
+ entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask];
+ do {
+ u32 length, offset, lit;
+ const u8 *src;
+ u8 *dst;
+
+ /*
+ * Consume the bits for the litlen decode table entry. Save the
+ * original bitbuf for later, in case the extra match length
+ * bits need to be extracted from it.
+ */
+ saved_bitbuf = bitbuf;
+ bitbuf >>= (u8)entry;
+ bitsleft -= entry; /* optimization: subtract full entry */
+
+ /*
+ * Begin by checking for a "fast" literal, i.e. a literal that
+ * doesn't need a subtable.
+ */
+ if (entry & HUFFDEC_LITERAL) {
+ /*
+ * On 64-bit platforms, we decode up to 2 extra fast
+ * literals in addition to the primary item, as this
+ * increases performance and still leaves enough bits
+ * remaining for what follows. We could actually do 3,
+ * assuming LITLEN_TABLEBITS=11, but that actually
+ * decreases performance slightly (perhaps by messing
+ * with the branch prediction of the conditional refill
+ * that happens later while decoding the match offset).
+ *
+ * Note: the definitions of FASTLOOP_MAX_BYTES_WRITTEN
+ * and FASTLOOP_MAX_BYTES_READ need to be updated if the
+ * number of extra literals decoded here is changed.
+ */
+ if (/* enough bits for 2 fast literals + length + offset preload? */
+ CAN_CONSUME_AND_THEN_PRELOAD(2 * LITLEN_TABLEBITS +
+ LENGTH_MAXBITS,
+ OFFSET_TABLEBITS) &&
+ /* enough bits for 2 fast literals + slow literal + litlen preload? */
+ CAN_CONSUME_AND_THEN_PRELOAD(2 * LITLEN_TABLEBITS +
+ DEFLATE_MAX_LITLEN_CODEWORD_LEN,
+ LITLEN_TABLEBITS)) {
+ /* 1st extra fast literal */
+ lit = entry >> 16;
+ entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask];
+ saved_bitbuf = bitbuf;
+ bitbuf >>= (u8)entry;
+ bitsleft -= entry;
+ *out_next++ = lit;
+ if (entry & HUFFDEC_LITERAL) {
+ /* 2nd extra fast literal */
+ lit = entry >> 16;
+ entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask];
+ saved_bitbuf = bitbuf;
+ bitbuf >>= (u8)entry;
+ bitsleft -= entry;
+ *out_next++ = lit;
+ if (entry & HUFFDEC_LITERAL) {
+ /*
+ * Another fast literal, but
+ * this one is in lieu of the
+ * primary item, so it doesn't
+ * count as one of the extras.
+ */
+ lit = entry >> 16;
+ entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask];
+ REFILL_BITS_IN_FASTLOOP();
+ *out_next++ = lit;
+ continue;
+ }
+ }
+ } else {
+ /*
+ * Decode a literal. While doing so, preload
+ * the next litlen decode table entry and refill
+ * the bitbuffer. To reduce latency, we've
+ * arranged for there to be enough "preloadable"
+ * bits remaining to do the table preload
+ * independently of the refill.
+ */
+ STATIC_ASSERT(CAN_CONSUME_AND_THEN_PRELOAD(
+ LITLEN_TABLEBITS, LITLEN_TABLEBITS));
+ lit = entry >> 16;
+ entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask];
+ REFILL_BITS_IN_FASTLOOP();
+ *out_next++ = lit;
+ continue;
+ }
+ }
+
+ /*
+ * It's not a literal entry, so it can be a length entry, a
+ * subtable pointer entry, or an end-of-block entry. Detect the
+ * two unlikely cases by testing the HUFFDEC_EXCEPTIONAL flag.
+ */
+ if (unlikely(entry & HUFFDEC_EXCEPTIONAL)) {
+ /* Subtable pointer or end-of-block entry */
+
+ if (unlikely(entry & HUFFDEC_END_OF_BLOCK))
+ goto block_done;
+
+ /*
+ * A subtable is required. Load and consume the
+ * subtable entry. The subtable entry can be of any
+ * type: literal, length, or end-of-block.
+ */
+ entry = d->u.litlen_decode_table[(entry >> 16) +
+ EXTRACT_VARBITS(bitbuf, (entry >> 8) & 0x3F)];
+ saved_bitbuf = bitbuf;
+ bitbuf >>= (u8)entry;
+ bitsleft -= entry;
+
+ /*
+ * 32-bit platforms that use the byte-at-a-time refill
+ * method have to do a refill here for there to always
+ * be enough bits to decode a literal that requires a
+ * subtable, then preload the next litlen decode table
+ * entry; or to decode a match length that requires a
+ * subtable, then preload the offset decode table entry.
+ */
+ if (!CAN_CONSUME_AND_THEN_PRELOAD(DEFLATE_MAX_LITLEN_CODEWORD_LEN,
+ LITLEN_TABLEBITS) ||
+ !CAN_CONSUME_AND_THEN_PRELOAD(LENGTH_MAXBITS,
+ OFFSET_TABLEBITS))
+ REFILL_BITS_IN_FASTLOOP();
+ if (entry & HUFFDEC_LITERAL) {
+ /* Decode a literal that required a subtable. */
+ lit = entry >> 16;
+ entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask];
+ REFILL_BITS_IN_FASTLOOP();
+ *out_next++ = lit;
+ continue;
+ }
+ if (unlikely(entry & HUFFDEC_END_OF_BLOCK))
+ goto block_done;
+ /* Else, it's a length that required a subtable. */
+ }
+
+ /*
+ * Decode the match length: the length base value associated
+ * with the litlen symbol (which we extract from the decode
+ * table entry), plus the extra length bits. We don't need to
+ * consume the extra length bits here, as they were included in
+ * the bits consumed by the entry earlier. We also don't need
+ * to check for too-long matches here, as this is inside the
+ * fastloop where it's already been verified that the output
+ * buffer has enough space remaining to copy a max-length match.
+ */
+ length = entry >> 16;
+ length += EXTRACT_VARBITS8(saved_bitbuf, entry) >> (u8)(entry >> 8);
+
+ /*
+ * Decode the match offset. There are enough "preloadable" bits
+ * remaining to preload the offset decode table entry, but a
+ * refill might be needed before consuming it.
+ */
+ STATIC_ASSERT(CAN_CONSUME_AND_THEN_PRELOAD(LENGTH_MAXFASTBITS,
+ OFFSET_TABLEBITS));
+ entry = d->offset_decode_table[bitbuf & BITMASK(OFFSET_TABLEBITS)];
+ if (CAN_CONSUME_AND_THEN_PRELOAD(OFFSET_MAXBITS,
+ LITLEN_TABLEBITS)) {
+ /*
+ * Decoding a match offset on a 64-bit platform. We may
+ * need to refill once, but then we can decode the whole
+ * offset and preload the next litlen table entry.
+ */
+ if (unlikely(entry & HUFFDEC_EXCEPTIONAL)) {
+ /* Offset codeword requires a subtable */
+ if (unlikely((u8)bitsleft < OFFSET_MAXBITS +
+ LITLEN_TABLEBITS - PRELOAD_SLACK))
+ REFILL_BITS_IN_FASTLOOP();
+ bitbuf >>= OFFSET_TABLEBITS;
+ bitsleft -= OFFSET_TABLEBITS;
+ entry = d->offset_decode_table[(entry >> 16) +
+ EXTRACT_VARBITS(bitbuf, (entry >> 8) & 0x3F)];
+ } else if (unlikely((u8)bitsleft < OFFSET_MAXFASTBITS +
+ LITLEN_TABLEBITS - PRELOAD_SLACK))
+ REFILL_BITS_IN_FASTLOOP();
+ } else {
+ /* Decoding a match offset on a 32-bit platform */
+ REFILL_BITS_IN_FASTLOOP();
+ if (unlikely(entry & HUFFDEC_EXCEPTIONAL)) {
+ /* Offset codeword requires a subtable */
+ bitbuf >>= OFFSET_TABLEBITS;
+ bitsleft -= OFFSET_TABLEBITS;
+ entry = d->offset_decode_table[(entry >> 16) +
+ EXTRACT_VARBITS(bitbuf, (entry >> 8) & 0x3F)];
+ REFILL_BITS_IN_FASTLOOP();
+ /* No further refill needed before extra bits */
+ STATIC_ASSERT(CAN_CONSUME(
+ OFFSET_MAXBITS - OFFSET_TABLEBITS));
+ } else {
+ /* No refill needed before extra bits */
+ STATIC_ASSERT(CAN_CONSUME(OFFSET_MAXFASTBITS));
+ }
+ }
+ saved_bitbuf = bitbuf;
+ bitbuf >>= (u8)entry;
+ bitsleft -= entry; /* optimization: subtract full entry */
+ offset = entry >> 16;
+ offset += EXTRACT_VARBITS8(saved_bitbuf, entry) >> (u8)(entry >> 8);
+
+ /* Validate the match offset; needed even in the fastloop. */
+ SAFETY_CHECK(offset <= out_next - (const u8 *)out);
+ src = out_next - offset;
+ dst = out_next;
+ out_next += length;
+
+ /*
+ * Before starting to issue the instructions to copy the match,
+ * refill the bitbuffer and preload the litlen decode table
+ * entry for the next loop iteration. This can increase
+ * performance by allowing the latency of the match copy to
+ * overlap with these other operations. To further reduce
+ * latency, we've arranged for there to be enough bits remaining
+ * to do the table preload independently of the refill, except
+ * on 32-bit platforms using the byte-at-a-time refill method.
+ */
+ if (!CAN_CONSUME_AND_THEN_PRELOAD(
+ MAX(OFFSET_MAXBITS - OFFSET_TABLEBITS,
+ OFFSET_MAXFASTBITS),
+ LITLEN_TABLEBITS) &&
+ unlikely((u8)bitsleft < LITLEN_TABLEBITS - PRELOAD_SLACK))
+ REFILL_BITS_IN_FASTLOOP();
+ entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask];
+ REFILL_BITS_IN_FASTLOOP();
+
+ /*
+ * Copy the match. On most CPUs the fastest method is a
+ * word-at-a-time copy, unconditionally copying about 5 words
+ * since this is enough for most matches without being too much.
+ *
+ * The normal word-at-a-time copy works for offset >= WORDBYTES,
+ * which is most cases. The case of offset == 1 is also common
+ * and is worth optimizing for, since it is just RLE encoding of
+ * the previous byte, which is the result of compressing long
+ * runs of the same byte.
+ *
+ * Writing past the match 'length' is allowed here, since it's
+ * been ensured there is enough output space left for a slight
+ * overrun. FASTLOOP_MAX_BYTES_WRITTEN needs to be updated if
+ * the maximum possible overrun here is changed.
+ */
+ if (UNALIGNED_ACCESS_IS_FAST && offset >= WORDBYTES) {
+ store_word_unaligned(load_word_unaligned(src), dst);
+ src += WORDBYTES;
+ dst += WORDBYTES;
+ store_word_unaligned(load_word_unaligned(src), dst);
+ src += WORDBYTES;
+ dst += WORDBYTES;
+ store_word_unaligned(load_word_unaligned(src), dst);
+ src += WORDBYTES;
+ dst += WORDBYTES;
+ store_word_unaligned(load_word_unaligned(src), dst);
+ src += WORDBYTES;
+ dst += WORDBYTES;
+ store_word_unaligned(load_word_unaligned(src), dst);
+ src += WORDBYTES;
+ dst += WORDBYTES;
+ while (dst < out_next) {
+ store_word_unaligned(load_word_unaligned(src), dst);
+ src += WORDBYTES;
+ dst += WORDBYTES;
+ store_word_unaligned(load_word_unaligned(src), dst);
+ src += WORDBYTES;
+ dst += WORDBYTES;
+ store_word_unaligned(load_word_unaligned(src), dst);
+ src += WORDBYTES;
+ dst += WORDBYTES;
+ store_word_unaligned(load_word_unaligned(src), dst);
+ src += WORDBYTES;
+ dst += WORDBYTES;
+ store_word_unaligned(load_word_unaligned(src), dst);
+ src += WORDBYTES;
+ dst += WORDBYTES;
+ }
+ } else if (UNALIGNED_ACCESS_IS_FAST && offset == 1) {
+ machine_word_t v;
+
+ /*
+ * This part tends to get auto-vectorized, so keep it
+ * copying a multiple of 16 bytes at a time.
+ */
+ v = (machine_word_t)0x0101010101010101 * src[0];
+ store_word_unaligned(v, dst);
+ dst += WORDBYTES;
+ store_word_unaligned(v, dst);
+ dst += WORDBYTES;
+ store_word_unaligned(v, dst);
+ dst += WORDBYTES;
+ store_word_unaligned(v, dst);
+ dst += WORDBYTES;
+ while (dst < out_next) {
+ store_word_unaligned(v, dst);
+ dst += WORDBYTES;
+ store_word_unaligned(v, dst);
+ dst += WORDBYTES;
+ store_word_unaligned(v, dst);
+ dst += WORDBYTES;
+ store_word_unaligned(v, dst);
+ dst += WORDBYTES;
+ }
+ } else if (UNALIGNED_ACCESS_IS_FAST) {
+ store_word_unaligned(load_word_unaligned(src), dst);
+ src += offset;
+ dst += offset;
+ store_word_unaligned(load_word_unaligned(src), dst);
+ src += offset;
+ dst += offset;
+ do {
+ store_word_unaligned(load_word_unaligned(src), dst);
+ src += offset;
+ dst += offset;
+ store_word_unaligned(load_word_unaligned(src), dst);
+ src += offset;
+ dst += offset;
+ } while (dst < out_next);
+ } else {
+ *dst++ = *src++;
+ *dst++ = *src++;
+ do {
+ *dst++ = *src++;
+ } while (dst < out_next);
+ }
+ } while (in_next < in_fastloop_end && out_next < out_fastloop_end);
+
+ /*
+ * This is the generic loop for decoding literals and matches. This
+ * handles cases where in_next and out_next are close to the end of
+ * their respective buffers. Usually this loop isn't performance-
+ * critical, as most time is spent in the fastloop above instead. We
+ * therefore omit some optimizations here in favor of smaller code.
+ */
+generic_loop:
+ for (;;) {
+ u32 length, offset;
+ const u8 *src;
+ u8 *dst;
+
+ REFILL_BITS();
+ entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask];
+ saved_bitbuf = bitbuf;
+ bitbuf >>= (u8)entry;
+ bitsleft -= entry;
+ if (unlikely(entry & HUFFDEC_SUBTABLE_POINTER)) {
+ entry = d->u.litlen_decode_table[(entry >> 16) +
+ EXTRACT_VARBITS(bitbuf, (entry >> 8) & 0x3F)];
+ saved_bitbuf = bitbuf;
+ bitbuf >>= (u8)entry;
+ bitsleft -= entry;
+ }
+ length = entry >> 16;
+ if (entry & HUFFDEC_LITERAL) {
+ if (unlikely(out_next == out_end))
+ return LIBDEFLATE_INSUFFICIENT_SPACE;
+ *out_next++ = length;
+ continue;
+ }
+ if (unlikely(entry & HUFFDEC_END_OF_BLOCK))
+ goto block_done;
+ length += EXTRACT_VARBITS8(saved_bitbuf, entry) >> (u8)(entry >> 8);
+ if (unlikely(length > out_end - out_next))
+ return LIBDEFLATE_INSUFFICIENT_SPACE;
+
+ if (!CAN_CONSUME(LENGTH_MAXBITS + OFFSET_MAXBITS))
+ REFILL_BITS();
+ entry = d->offset_decode_table[bitbuf & BITMASK(OFFSET_TABLEBITS)];
+ if (unlikely(entry & HUFFDEC_EXCEPTIONAL)) {
+ bitbuf >>= OFFSET_TABLEBITS;
+ bitsleft -= OFFSET_TABLEBITS;
+ entry = d->offset_decode_table[(entry >> 16) +
+ EXTRACT_VARBITS(bitbuf, (entry >> 8) & 0x3F)];
+ if (!CAN_CONSUME(OFFSET_MAXBITS))
+ REFILL_BITS();
+ }
+ offset = entry >> 16;
+ offset += EXTRACT_VARBITS8(bitbuf, entry) >> (u8)(entry >> 8);
+ bitbuf >>= (u8)entry;
+ bitsleft -= entry;
+
+ SAFETY_CHECK(offset <= out_next - (const u8 *)out);
+ src = out_next - offset;
+ dst = out_next;
+ out_next += length;
+
+ STATIC_ASSERT(DEFLATE_MIN_MATCH_LEN == 3);
+ *dst++ = *src++;
+ *dst++ = *src++;
+ do {
+ *dst++ = *src++;
+ } while (dst < out_next);
+ }
+
+block_done:
+ /* Finished decoding a block */
+
+ if (!is_final_block)
+ goto next_block;
+
+ /* That was the last block. */
+
+ bitsleft = (u8)bitsleft;
+
+ /*
+ * If any of the implicit appended zero bytes were consumed (not just
+ * refilled) before hitting end of stream, then the data is bad.
+ */
+ SAFETY_CHECK(overread_count <= (bitsleft >> 3));
+
+ /* Optionally return the actual number of bytes consumed. */
+ if (actual_in_nbytes_ret) {
+ /* Don't count bytes that were refilled but not consumed. */
+ in_next -= (bitsleft >> 3) - overread_count;
+
+ *actual_in_nbytes_ret = in_next - (u8 *)in;
+ }
+
+ /* Optionally return the actual number of bytes written. */
+ if (actual_out_nbytes_ret) {
+ *actual_out_nbytes_ret = out_next - (u8 *)out;
+ } else {
+ if (out_next != out_end)
+ return LIBDEFLATE_SHORT_OUTPUT;
+ }
+ return LIBDEFLATE_SUCCESS;
+}
+
+#undef FUNCNAME
+#undef ATTRIBUTES
+#undef EXTRACT_VARBITS
+#undef EXTRACT_VARBITS8
diff --git a/tools/z64compress/src/enc/libdeflate/lib/deflate_compress.c b/tools/z64compress/src/enc/libdeflate/lib/deflate_compress.c
new file mode 100644
index 000000000..7c92d9823
--- /dev/null
+++ b/tools/z64compress/src/enc/libdeflate/lib/deflate_compress.c
@@ -0,0 +1,3877 @@
+/*
+ * deflate_compress.c - a compressor for DEFLATE
+ *
+ * Copyright 2016 Eric Biggers
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "deflate_compress.h"
+#include "deflate_constants.h"
+
+#include "libdeflate.h"
+
+/******************************************************************************/
+
+/*
+ * The following parameters can be changed at build time to customize the
+ * compression algorithms slightly:
+ *
+ * (Note, not all customizable parameters are here. Some others can be found in
+ * libdeflate_alloc_compressor() and in *_matchfinder.h.)
+ */
+
+/*
+ * If this parameter is defined to 1, then the near-optimal parsing algorithm
+ * will be included, and compression levels 10-12 will use it. This algorithm
+ * usually produces a compression ratio significantly better than the other
+ * algorithms. However, it is slow. If this parameter is defined to 0, then
+ * levels 10-12 will be the same as level 9 and will use the lazy2 algorithm.
+ */
+#define SUPPORT_NEAR_OPTIMAL_PARSING 1
+
+/*
+ * This is the minimum block length that the compressor will use, in
+ * uncompressed bytes. This should be a value below which using shorter blocks
+ * is unlikely to be worthwhile, due to the per-block overhead. This value does
+ * not apply to the final block, which may be shorter than this (if the input is
+ * shorter, it will have to be), or to the final uncompressed block in a series
+ * of uncompressed blocks that cover more than UINT16_MAX bytes.
+ *
+ * This value is also approximately the amount by which what would otherwise be
+ * the second-to-last block is allowed to grow past the soft maximum length in
+ * order to avoid having to use a very short final block.
+ *
+ * Defining a fixed minimum block length is needed in order to guarantee a
+ * reasonable upper bound on the compressed size. It's also needed because our
+ * block splitting algorithm doesn't work well on very short blocks.
+ */
+#define MIN_BLOCK_LENGTH 5000
+
+/*
+ * For the greedy, lazy, lazy2, and near-optimal compressors: This is the soft
+ * maximum block length, in uncompressed bytes. The compressor will try to end
+ * blocks at this length, but it may go slightly past it if there is a match
+ * that straddles this limit or if the input data ends soon after this limit.
+ * This parameter doesn't apply to uncompressed blocks, which the DEFLATE format
+ * limits to 65535 bytes.
+ *
+ * This should be a value above which it is very likely that splitting the block
+ * would produce a better compression ratio. For the near-optimal compressor,
+ * increasing/decreasing this parameter will increase/decrease per-compressor
+ * memory usage linearly.
+ */
+#define SOFT_MAX_BLOCK_LENGTH 300000
+
+/*
+ * For the greedy, lazy, and lazy2 compressors: this is the length of the
+ * sequence store, which is an array where the compressor temporarily stores
+ * matches that it's going to use in the current block. This value is the
+ * maximum number of matches that can be used in a block. If the sequence store
+ * fills up, then the compressor will be forced to end the block early. This
+ * value should be large enough so that this rarely happens, due to the block
+ * being ended normally before then. Increasing/decreasing this value will
+ * increase/decrease per-compressor memory usage linearly.
+ */
+#define SEQ_STORE_LENGTH 50000
+
+/*
+ * For deflate_compress_fastest(): This is the soft maximum block length.
+ * deflate_compress_fastest() doesn't use the regular block splitting algorithm;
+ * it only ends blocks when they reach FAST_SOFT_MAX_BLOCK_LENGTH bytes or
+ * FAST_SEQ_STORE_LENGTH matches. Therefore, this value should be lower than
+ * the regular SOFT_MAX_BLOCK_LENGTH.
+ */
+#define FAST_SOFT_MAX_BLOCK_LENGTH 65535
+
+/*
+ * For deflate_compress_fastest(): this is the length of the sequence store.
+ * This is like SEQ_STORE_LENGTH, but this should be a lower value.
+ */
+#define FAST_SEQ_STORE_LENGTH 8192
+
+/*
+ * These are the maximum codeword lengths, in bits, the compressor will use for
+ * each Huffman code. The DEFLATE format defines limits for these. However,
+ * further limiting litlen codewords to 14 bits is beneficial, since it has
+ * negligible effect on compression ratio but allows some optimizations when
+ * outputting bits. (It allows 4 literals to be written at once rather than 3.)
+ */
+#define MAX_LITLEN_CODEWORD_LEN 14
+#define MAX_OFFSET_CODEWORD_LEN DEFLATE_MAX_OFFSET_CODEWORD_LEN
+#define MAX_PRE_CODEWORD_LEN DEFLATE_MAX_PRE_CODEWORD_LEN
+
+#if SUPPORT_NEAR_OPTIMAL_PARSING
+
+/* Parameters specific to the near-optimal parsing algorithm */
+
+/*
+ * BIT_COST is a scaling factor that allows the near-optimal compressor to
+ * consider fractional bit costs when deciding which literal/match sequence to
+ * use. This is useful when the true symbol costs are unknown. For example, if
+ * the compressor thinks that a symbol has 6.5 bits of entropy, it can set its
+ * cost to 6.5 bits rather than have to use 6 or 7 bits. Although in the end
+ * each symbol will use a whole number of bits due to the Huffman coding,
+ * considering fractional bits can be helpful due to the limited information.
+ *
+ * BIT_COST should be a power of 2. A value of 8 or 16 works well. A higher
+ * value isn't very useful since the calculations are approximate anyway.
+ *
+ * BIT_COST doesn't apply to deflate_flush_block(), which considers whole bits.
+ */
+#define BIT_COST 16
+
+/*
+ * The NOSTAT_BITS value for a given alphabet is the number of bits assumed to
+ * be needed to output a symbol that was unused in the previous optimization
+ * pass. Assigning a default cost allows the symbol to be used in the next
+ * optimization pass. However, the cost should be relatively high because the
+ * symbol probably won't be used very many times (if at all).
+ */
+#define LITERAL_NOSTAT_BITS 13
+#define LENGTH_NOSTAT_BITS 13
+#define OFFSET_NOSTAT_BITS 10
+
+/*
+ * This is (slightly less than) the maximum number of matches that the
+ * near-optimal compressor will cache per block. This behaves similarly to
+ * SEQ_STORE_LENGTH for the other compressors.
+ */
+#define MATCH_CACHE_LENGTH (SOFT_MAX_BLOCK_LENGTH * 5)
+
+#endif /* SUPPORT_NEAR_OPTIMAL_PARSING */
+
+/******************************************************************************/
+
+/* Include the needed matchfinders. */
+#define MATCHFINDER_WINDOW_ORDER DEFLATE_WINDOW_ORDER
+#include "hc_matchfinder.h"
+#include "ht_matchfinder.h"
+#if SUPPORT_NEAR_OPTIMAL_PARSING
+# include "bt_matchfinder.h"
+/*
+ * This is the maximum number of matches the binary trees matchfinder can find
+ * at a single position. Since the matchfinder never finds more than one match
+ * for the same length, presuming one of each possible length is sufficient for
+ * an upper bound. (This says nothing about whether it is worthwhile to
+ * consider so many matches; this is just defining the worst case.)
+ */
+#define MAX_MATCHES_PER_POS \
+ (DEFLATE_MAX_MATCH_LEN - DEFLATE_MIN_MATCH_LEN + 1)
+#endif
+
+/*
+ * The largest block length we will ever use is when the final block is of
+ * length SOFT_MAX_BLOCK_LENGTH + MIN_BLOCK_LENGTH - 1, or when any block is of
+ * length SOFT_MAX_BLOCK_LENGTH + 1 + DEFLATE_MAX_MATCH_LEN. The latter case
+ * occurs when the lazy2 compressor chooses two literals and a maximum-length
+ * match, starting at SOFT_MAX_BLOCK_LENGTH - 1.
+ */
+#define MAX_BLOCK_LENGTH \
+ MAX(SOFT_MAX_BLOCK_LENGTH + MIN_BLOCK_LENGTH - 1, \
+ SOFT_MAX_BLOCK_LENGTH + 1 + DEFLATE_MAX_MATCH_LEN)
+
+static forceinline void
+check_buildtime_parameters(void)
+{
+ /*
+ * Verify that MIN_BLOCK_LENGTH is being honored, as
+ * libdeflate_deflate_compress_bound() depends on it.
+ */
+ STATIC_ASSERT(SOFT_MAX_BLOCK_LENGTH >= MIN_BLOCK_LENGTH);
+ STATIC_ASSERT(FAST_SOFT_MAX_BLOCK_LENGTH >= MIN_BLOCK_LENGTH);
+ STATIC_ASSERT(SEQ_STORE_LENGTH * DEFLATE_MIN_MATCH_LEN >=
+ MIN_BLOCK_LENGTH);
+ STATIC_ASSERT(FAST_SEQ_STORE_LENGTH * HT_MATCHFINDER_MIN_MATCH_LEN >=
+ MIN_BLOCK_LENGTH);
+#if SUPPORT_NEAR_OPTIMAL_PARSING
+ STATIC_ASSERT(MIN_BLOCK_LENGTH * MAX_MATCHES_PER_POS <=
+ MATCH_CACHE_LENGTH);
+#endif
+
+ /* The definition of MAX_BLOCK_LENGTH assumes this. */
+ STATIC_ASSERT(FAST_SOFT_MAX_BLOCK_LENGTH <= SOFT_MAX_BLOCK_LENGTH);
+
+ /* Verify that the sequence stores aren't uselessly large. */
+ STATIC_ASSERT(SEQ_STORE_LENGTH * DEFLATE_MIN_MATCH_LEN <=
+ SOFT_MAX_BLOCK_LENGTH + MIN_BLOCK_LENGTH);
+ STATIC_ASSERT(FAST_SEQ_STORE_LENGTH * HT_MATCHFINDER_MIN_MATCH_LEN <=
+ FAST_SOFT_MAX_BLOCK_LENGTH + MIN_BLOCK_LENGTH);
+
+ /* Verify that the maximum codeword lengths are valid. */
+ STATIC_ASSERT(
+ MAX_LITLEN_CODEWORD_LEN <= DEFLATE_MAX_LITLEN_CODEWORD_LEN);
+ STATIC_ASSERT(
+ MAX_OFFSET_CODEWORD_LEN <= DEFLATE_MAX_OFFSET_CODEWORD_LEN);
+ STATIC_ASSERT(
+ MAX_PRE_CODEWORD_LEN <= DEFLATE_MAX_PRE_CODEWORD_LEN);
+ STATIC_ASSERT(
+ (1U << MAX_LITLEN_CODEWORD_LEN) >= DEFLATE_NUM_LITLEN_SYMS);
+ STATIC_ASSERT(
+ (1U << MAX_OFFSET_CODEWORD_LEN) >= DEFLATE_NUM_OFFSET_SYMS);
+ STATIC_ASSERT(
+ (1U << MAX_PRE_CODEWORD_LEN) >= DEFLATE_NUM_PRECODE_SYMS);
+}
+
+/******************************************************************************/
+
+/* Table: length slot => length slot base value */
+static const unsigned deflate_length_slot_base[] = {
+ 3, 4, 5, 6, 7, 8, 9, 10,
+ 11, 13, 15, 17, 19, 23, 27, 31,
+ 35, 43, 51, 59, 67, 83, 99, 115,
+ 131, 163, 195, 227, 258,
+};
+
+/* Table: length slot => number of extra length bits */
+static const u8 deflate_extra_length_bits[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 1, 1, 1, 2, 2, 2, 2,
+ 3, 3, 3, 3, 4, 4, 4, 4,
+ 5, 5, 5, 5, 0,
+};
+
+/* Table: offset slot => offset slot base value */
+static const unsigned deflate_offset_slot_base[] = {
+ 1, 2, 3, 4, 5, 7, 9, 13,
+ 17, 25, 33, 49, 65, 97, 129, 193,
+ 257, 385, 513, 769, 1025, 1537, 2049, 3073,
+ 4097, 6145, 8193, 12289, 16385, 24577,
+};
+
+/* Table: offset slot => number of extra offset bits */
+static const u8 deflate_extra_offset_bits[] = {
+ 0, 0, 0, 0, 1, 1, 2, 2,
+ 3, 3, 4, 4, 5, 5, 6, 6,
+ 7, 7, 8, 8, 9, 9, 10, 10,
+ 11, 11, 12, 12, 13, 13,
+};
+
+/* Table: length => length slot */
+static const u8 deflate_length_slot[DEFLATE_MAX_MATCH_LEN + 1] = {
+ 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 12,
+ 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 16,
+ 16, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18,
+ 18, 19, 19, 19, 19, 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
+ 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
+ 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 25, 25, 25,
+ 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
+ 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26,
+ 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+ 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
+ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
+ 27, 27, 28,
+};
+
+/*
+ * A condensed table which maps offset => offset slot as follows:
+ *
+ * offset <= 256: deflate_offset_slot[offset]
+ * offset > 256: deflate_offset_slot[256 + ((offset - 1) >> 7)]
+ *
+ * This table was generated by scripts/gen_offset_slot_map.py.
+ */
+static const u8 deflate_offset_slot[512] = {
+ 0, 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7,
+ 7, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9,
+ 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+ 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+ 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 0, 16, 17, 18, 18, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21,
+ 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23,
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+ 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
+ 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+ 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
+ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
+ 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+ 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+ 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+ 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+ 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
+ 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
+ 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
+ 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
+};
+
+/* The order in which precode codeword lengths are stored */
+static const u8 deflate_precode_lens_permutation[DEFLATE_NUM_PRECODE_SYMS] = {
+ 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
+};
+
+/* Table: precode symbol => number of extra bits */
+static const u8 deflate_extra_precode_bits[DEFLATE_NUM_PRECODE_SYMS] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 3, 7
+};
+
+/* Codewords for the DEFLATE Huffman codes */
+struct deflate_codewords {
+ u32 litlen[DEFLATE_NUM_LITLEN_SYMS];
+ u32 offset[DEFLATE_NUM_OFFSET_SYMS];
+};
+
+/*
+ * Codeword lengths (in bits) for the DEFLATE Huffman codes.
+ * A zero length means the corresponding symbol had zero frequency.
+ */
+struct deflate_lens {
+ u8 litlen[DEFLATE_NUM_LITLEN_SYMS];
+ u8 offset[DEFLATE_NUM_OFFSET_SYMS];
+};
+
+/* Codewords and lengths for the DEFLATE Huffman codes */
+struct deflate_codes {
+ struct deflate_codewords codewords;
+ struct deflate_lens lens;
+};
+
+/* Symbol frequency counters for the DEFLATE Huffman codes */
+struct deflate_freqs {
+ u32 litlen[DEFLATE_NUM_LITLEN_SYMS];
+ u32 offset[DEFLATE_NUM_OFFSET_SYMS];
+};
+
+/*
+ * Represents a run of literals followed by a match or end-of-block. This
+ * struct is needed to temporarily store items chosen by the parser, since items
+ * cannot be written until all items for the block have been chosen and the
+ * block's Huffman codes have been computed.
+ */
+struct deflate_sequence {
+
+ /*
+ * Bits 0..22: the number of literals in this run. This may be 0 and
+ * can be at most MAX_BLOCK_LENGTH. The literals are not stored
+ * explicitly in this structure; instead, they are read directly from
+ * the uncompressed data.
+ *
+ * Bits 23..31: the length of the match which follows the literals, or 0
+ * if this literal run was the last in the block, so there is no match
+ * which follows it.
+ */
+#define SEQ_LENGTH_SHIFT 23
+#define SEQ_LITRUNLEN_MASK (((u32)1 << SEQ_LENGTH_SHIFT) - 1)
+ u32 litrunlen_and_length;
+
+ /*
+ * If 'length' doesn't indicate end-of-block, then this is the offset of
+ * the match which follows the literals.
+ */
+ u16 offset;
+
+ /*
+ * If 'length' doesn't indicate end-of-block, then this is the offset
+ * slot of the match which follows the literals.
+ */
+ u16 offset_slot;
+};
+
+#if SUPPORT_NEAR_OPTIMAL_PARSING
+
+/* Costs for the near-optimal parsing algorithm */
+struct deflate_costs {
+
+ /* The cost to output each possible literal */
+ u32 literal[DEFLATE_NUM_LITERALS];
+
+ /* The cost to output each possible match length */
+ u32 length[DEFLATE_MAX_MATCH_LEN + 1];
+
+ /* The cost to output a match offset of each possible offset slot */
+ u32 offset_slot[DEFLATE_NUM_OFFSET_SYMS];
+};
+
+/*
+ * This structure represents a byte position in the input data and a node in the
+ * graph of possible match/literal choices for the current block.
+ *
+ * Logically, each incoming edge to this node is labeled with a literal or a
+ * match that can be taken to reach this position from an earlier position; and
+ * each outgoing edge from this node is labeled with a literal or a match that
+ * can be taken to advance from this position to a later position.
+ *
+ * But these "edges" are actually stored elsewhere (in 'match_cache'). Here we
+ * associate with each node just two pieces of information:
+ *
+ * 'cost_to_end' is the minimum cost to reach the end of the block from
+ * this position.
+ *
+ * 'item' represents the literal or match that must be chosen from here to
+ * reach the end of the block with the minimum cost. Equivalently, this
+ * can be interpreted as the label of the outgoing edge on the minimum-cost
+ * path to the "end of block" node from this node.
+ */
+struct deflate_optimum_node {
+
+ u32 cost_to_end;
+
+ /*
+ * Notes on the match/literal representation used here:
+ *
+ * The low bits of 'item' are the length: 1 if this is a literal,
+ * or the match length if this is a match.
+ *
+ * The high bits of 'item' are the actual literal byte if this is a
+ * literal, or the match offset if this is a match.
+ */
+#define OPTIMUM_OFFSET_SHIFT 9
+#define OPTIMUM_LEN_MASK (((u32)1 << OPTIMUM_OFFSET_SHIFT) - 1)
+ u32 item;
+
+};
+
+#endif /* SUPPORT_NEAR_OPTIMAL_PARSING */
+
+/* Block split statistics. See "Block splitting algorithm" below. */
+#define NUM_LITERAL_OBSERVATION_TYPES 8
+#define NUM_MATCH_OBSERVATION_TYPES 2
+#define NUM_OBSERVATION_TYPES (NUM_LITERAL_OBSERVATION_TYPES + \
+ NUM_MATCH_OBSERVATION_TYPES)
+#define NUM_OBSERVATIONS_PER_BLOCK_CHECK 512
+struct block_split_stats {
+ u32 new_observations[NUM_OBSERVATION_TYPES];
+ u32 observations[NUM_OBSERVATION_TYPES];
+ u32 num_new_observations;
+ u32 num_observations;
+};
+
+struct deflate_output_bitstream;
+
+/* The main DEFLATE compressor structure */
+struct libdeflate_compressor {
+
+ /* Pointer to the compress() implementation chosen at allocation time */
+ void (*impl)(struct libdeflate_compressor *restrict c, const u8 *in,
+ size_t in_nbytes, struct deflate_output_bitstream *os);
+
+ /* The compression level with which this compressor was created */
+ unsigned compression_level;
+
+ /* Anything of this size or less we won't bother trying to compress. */
+ size_t max_passthrough_size;
+
+ /*
+ * The maximum search depth: consider at most this many potential
+ * matches at each position
+ */
+ unsigned max_search_depth;
+
+ /*
+ * The "nice" match length: if a match of this length is found, choose
+ * it immediately without further consideration
+ */
+ unsigned nice_match_length;
+
+ /* Frequency counters for the current block */
+ struct deflate_freqs freqs;
+
+ /* Block split statistics for the current block */
+ struct block_split_stats split_stats;
+
+ /* Dynamic Huffman codes for the current block */
+ struct deflate_codes codes;
+
+ /* The static Huffman codes defined by the DEFLATE format */
+ struct deflate_codes static_codes;
+
+ /* Temporary space for block flushing */
+ union {
+ /* Information about the precode */
+ struct {
+ u32 freqs[DEFLATE_NUM_PRECODE_SYMS];
+ u32 codewords[DEFLATE_NUM_PRECODE_SYMS];
+ u8 lens[DEFLATE_NUM_PRECODE_SYMS];
+ unsigned items[DEFLATE_NUM_LITLEN_SYMS +
+ DEFLATE_NUM_OFFSET_SYMS];
+ unsigned num_litlen_syms;
+ unsigned num_offset_syms;
+ unsigned num_explicit_lens;
+ unsigned num_items;
+ } precode;
+ /*
+ * The "full" length codewords. Used only after the information
+ * in 'precode' is no longer needed.
+ */
+ struct {
+ u32 codewords[DEFLATE_MAX_MATCH_LEN + 1];
+ u8 lens[DEFLATE_MAX_MATCH_LEN + 1];
+ } length;
+ } o;
+
+ union {
+ /* Data for greedy or lazy parsing */
+ struct {
+ /* Hash chains matchfinder */
+ struct hc_matchfinder hc_mf;
+
+ /* Matches and literals chosen for the current block */
+ struct deflate_sequence sequences[SEQ_STORE_LENGTH + 1];
+
+ } g; /* (g)reedy */
+
+ /* Data for fastest parsing */
+ struct {
+ /* Hash table matchfinder */
+ struct ht_matchfinder ht_mf;
+
+ /* Matches and literals chosen for the current block */
+ struct deflate_sequence sequences[
+ FAST_SEQ_STORE_LENGTH + 1];
+
+ } f; /* (f)astest */
+
+ #if SUPPORT_NEAR_OPTIMAL_PARSING
+ /* Data for near-optimal parsing */
+ struct {
+
+ /* Binary tree matchfinder */
+ struct bt_matchfinder bt_mf;
+
+ /*
+ * Cached matches for the current block. This array
+ * contains the matches that were found at each position
+ * in the block. Specifically, for each position, there
+ * is a list of matches found at that position, if any,
+ * sorted by strictly increasing length. In addition,
+ * following the matches for each position, there is a
+ * special 'struct lz_match' whose 'length' member
+ * contains the number of matches found at that
+ * position, and whose 'offset' member contains the
+ * literal at that position.
+ *
+ * Note: in rare cases, there will be a very high number
+ * of matches in the block and this array will overflow.
+ * If this happens, we force the end of the current
+ * block. MATCH_CACHE_LENGTH is the length at which we
+ * actually check for overflow. The extra slots beyond
+ * this are enough to absorb the worst case overflow,
+ * which occurs if starting at
+ * &match_cache[MATCH_CACHE_LENGTH - 1], we write
+ * MAX_MATCHES_PER_POS matches and a match count header,
+ * then skip searching for matches at
+ * 'DEFLATE_MAX_MATCH_LEN - 1' positions and write the
+ * match count header for each.
+ */
+ struct lz_match match_cache[MATCH_CACHE_LENGTH +
+ MAX_MATCHES_PER_POS +
+ DEFLATE_MAX_MATCH_LEN - 1];
+
+ /*
+ * Array of nodes, one per position, for running the
+ * minimum-cost path algorithm.
+ *
+ * This array must be large enough to accommodate the
+ * worst-case number of nodes, which is MAX_BLOCK_LENGTH
+ * plus 1 for the end-of-block node.
+ */
+ struct deflate_optimum_node optimum_nodes[
+ MAX_BLOCK_LENGTH + 1];
+
+ /* The current cost model being used */
+ struct deflate_costs costs;
+
+ /*
+ * A table that maps match offset to offset slot. This
+ * differs from deflate_offset_slot[] in that this is a
+ * full map, not a condensed one. The full map is more
+ * appropriate for the near-optimal parser, since the
+ * near-optimal parser does more offset => offset_slot
+ * translations, it doesn't intersperse them with
+ * matchfinding (so cache evictions are less of a
+ * concern), and it uses more memory anyway.
+ */
+ u8 offset_slot_full[DEFLATE_MAX_MATCH_OFFSET + 1];
+
+ /* Literal/match statistics saved from previous block */
+ u32 prev_observations[NUM_OBSERVATION_TYPES];
+ u32 prev_num_observations;
+
+ /*
+ * Approximate match length frequencies based on a
+ * greedy parse, gathered during matchfinding. This is
+ * used for setting the initial symbol costs.
+ */
+ u32 new_match_len_freqs[DEFLATE_MAX_MATCH_LEN + 1];
+ u32 match_len_freqs[DEFLATE_MAX_MATCH_LEN + 1];
+
+ unsigned num_optim_passes;
+ } n; /* (n)ear-optimal */
+ #endif /* SUPPORT_NEAR_OPTIMAL_PARSING */
+
+ } p; /* (p)arser */
+};
+
+/*
+ * The type for the bitbuffer variable, which temporarily holds bits that are
+ * being packed into bytes and written to the output buffer. For best
+ * performance, this should have size equal to a machine word.
+ */
+typedef machine_word_t bitbuf_t;
+
+/*
+ * The capacity of the bitbuffer, in bits. This is 1 less than the real size,
+ * in order to avoid undefined behavior when doing bitbuf >>= bitcount & ~7.
+ */
+#define BITBUF_NBITS (8 * sizeof(bitbuf_t) - 1)
+
+/*
+ * Can the specified number of bits always be added to 'bitbuf' after any
+ * pending bytes have been flushed? There can be up to 7 bits remaining after a
+ * flush, so the count must not exceed BITBUF_NBITS after adding 'n' more bits.
+ */
+#define CAN_BUFFER(n) (7 + (n) <= BITBUF_NBITS)
+
+/*
+ * Structure to keep track of the current state of sending bits to the
+ * compressed output buffer
+ */
+struct deflate_output_bitstream {
+
+ /* Bits that haven't yet been written to the output buffer */
+ bitbuf_t bitbuf;
+
+ /*
+ * Number of bits currently held in @bitbuf. This can be between 0 and
+ * BITBUF_NBITS in general, or between 0 and 7 after a flush.
+ */
+ unsigned bitcount;
+
+ /*
+ * Pointer to the position in the output buffer at which the next byte
+ * should be written
+ */
+ u8 *next;
+
+ /*
+ * Pointer to near the end of the output buffer. 'next' will never
+ * exceed this. There are OUTPUT_END_PADDING bytes reserved after this
+ * to allow branchlessly writing a whole word at this location.
+ */
+ u8 *end;
+};
+
+/*
+ * OUTPUT_END_PADDING is the size, in bytes, of the extra space that must be
+ * present following os->end, in order to not overrun the buffer when generating
+ * output. When UNALIGNED_ACCESS_IS_FAST, we need at least sizeof(bitbuf_t)
+ * bytes for put_unaligned_leword(). Otherwise we need only 1 byte. However,
+ * to make the compression algorithm produce the same result on all CPU
+ * architectures (which is sometimes desirable), we have to unconditionally use
+ * the maximum for any CPU, which is sizeof(bitbuf_t) == 8.
+ */
+#define OUTPUT_END_PADDING 8
+
+/*
+ * Add some bits to the bitbuffer variable of the output bitstream. The caller
+ * must ensure that 'bitcount + n <= BITBUF_NBITS', by calling FLUSH_BITS()
+ * frequently enough.
+ */
+#define ADD_BITS(bits, n) \
+do { \
+ bitbuf |= (bitbuf_t)(bits) << bitcount; \
+ bitcount += (n); \
+ ASSERT(bitcount <= BITBUF_NBITS); \
+} while (0)
+
+/* Flush bits from the bitbuffer variable to the output buffer. */
+#define FLUSH_BITS() \
+do { \
+ if (UNALIGNED_ACCESS_IS_FAST) { \
+ /* Flush a whole word (branchlessly). */ \
+ put_unaligned_leword(bitbuf, out_next); \
+ bitbuf >>= bitcount & ~7; \
+ out_next += MIN(out_end - out_next, bitcount >> 3); \
+ bitcount &= 7; \
+ } else { \
+ /* Flush a byte at a time. */ \
+ while (bitcount >= 8) { \
+ *out_next = bitbuf; \
+ if (out_next != out_end) \
+ out_next++; \
+ bitcount -= 8; \
+ bitbuf >>= 8; \
+ } \
+ } \
+} while (0)
+
+/*
+ * Given the binary tree node A[subtree_idx] whose children already satisfy the
+ * maxheap property, swap the node with its greater child until it is greater
+ * than or equal to both of its children, so that the maxheap property is
+ * satisfied in the subtree rooted at A[subtree_idx]. 'A' uses 1-based indices.
+ */
+static void
+heapify_subtree(u32 A[], unsigned length, unsigned subtree_idx)
+{
+ unsigned parent_idx;
+ unsigned child_idx;
+ u32 v;
+
+ v = A[subtree_idx];
+ parent_idx = subtree_idx;
+ while ((child_idx = parent_idx * 2) <= length) {
+ if (child_idx < length && A[child_idx + 1] > A[child_idx])
+ child_idx++;
+ if (v >= A[child_idx])
+ break;
+ A[parent_idx] = A[child_idx];
+ parent_idx = child_idx;
+ }
+ A[parent_idx] = v;
+}
+
+/*
+ * Rearrange the array 'A' so that it satisfies the maxheap property.
+ * 'A' uses 1-based indices, so the children of A[i] are A[i*2] and A[i*2 + 1].
+ */
+static void
+heapify_array(u32 A[], unsigned length)
+{
+ unsigned subtree_idx;
+
+ for (subtree_idx = length / 2; subtree_idx >= 1; subtree_idx--)
+ heapify_subtree(A, length, subtree_idx);
+}
+
+/*
+ * Sort the array 'A', which contains 'length' unsigned 32-bit integers.
+ *
+ * Note: name this function heap_sort() instead of heapsort() to avoid colliding
+ * with heapsort() from stdlib.h on BSD-derived systems --- though this isn't
+ * necessary when compiling with -D_ANSI_SOURCE, which is the better solution.
+ */
+static void
+heap_sort(u32 A[], unsigned length)
+{
+ A--; /* Use 1-based indices */
+
+ heapify_array(A, length);
+
+ while (length >= 2) {
+ u32 tmp = A[length];
+
+ A[length] = A[1];
+ A[1] = tmp;
+ length--;
+ heapify_subtree(A, length, 1);
+ }
+}
+
+#define NUM_SYMBOL_BITS 10
+#define NUM_FREQ_BITS (32 - NUM_SYMBOL_BITS)
+#define SYMBOL_MASK ((1 << NUM_SYMBOL_BITS) - 1)
+#define FREQ_MASK (~SYMBOL_MASK)
+
+#define GET_NUM_COUNTERS(num_syms) (num_syms)
+
+/*
+ * Sort the symbols primarily by frequency and secondarily by symbol value.
+ * Discard symbols with zero frequency and fill in an array with the remaining
+ * symbols, along with their frequencies. The low NUM_SYMBOL_BITS bits of each
+ * array entry will contain the symbol value, and the remaining bits will
+ * contain the frequency.
+ *
+ * @num_syms
+ * Number of symbols in the alphabet, at most 1 << NUM_SYMBOL_BITS.
+ *
+ * @freqs[num_syms]
+ * Frequency of each symbol, summing to at most (1 << NUM_FREQ_BITS) - 1.
+ *
+ * @lens[num_syms]
+ * An array that eventually will hold the length of each codeword. This
+ * function only fills in the codeword lengths for symbols that have zero
+ * frequency, which are not well defined per se but will be set to 0.
+ *
+ * @symout[num_syms]
+ * The output array, described above.
+ *
+ * Returns the number of entries in 'symout' that were filled. This is the
+ * number of symbols that have nonzero frequency.
+ */
+static unsigned
+sort_symbols(unsigned num_syms, const u32 freqs[], u8 lens[], u32 symout[])
+{
+ unsigned sym;
+ unsigned i;
+ unsigned num_used_syms;
+ unsigned num_counters;
+ unsigned counters[GET_NUM_COUNTERS(DEFLATE_MAX_NUM_SYMS)];
+
+ /*
+ * We use heapsort, but with an added optimization. Since often most
+ * symbol frequencies are low, we first do a count sort using a limited
+ * number of counters. High frequencies are counted in the last
+ * counter, and only they will be sorted with heapsort.
+ *
+ * Note: with more symbols, it is generally beneficial to have more
+ * counters. About 1 counter per symbol seems fastest.
+ */
+
+ num_counters = GET_NUM_COUNTERS(num_syms);
+
+ memset(counters, 0, num_counters * sizeof(counters[0]));
+
+ /* Count the frequencies. */
+ for (sym = 0; sym < num_syms; sym++)
+ counters[MIN(freqs[sym], num_counters - 1)]++;
+
+ /*
+ * Make the counters cumulative, ignoring the zero-th, which counted
+ * symbols with zero frequency. As a side effect, this calculates the
+ * number of symbols with nonzero frequency.
+ */
+ num_used_syms = 0;
+ for (i = 1; i < num_counters; i++) {
+ unsigned count = counters[i];
+
+ counters[i] = num_used_syms;
+ num_used_syms += count;
+ }
+
+ /*
+ * Sort nonzero-frequency symbols using the counters. At the same time,
+ * set the codeword lengths of zero-frequency symbols to 0.
+ */
+ for (sym = 0; sym < num_syms; sym++) {
+ u32 freq = freqs[sym];
+
+ if (freq != 0) {
+ symout[counters[MIN(freq, num_counters - 1)]++] =
+ sym | (freq << NUM_SYMBOL_BITS);
+ } else {
+ lens[sym] = 0;
+ }
+ }
+
+ /* Sort the symbols counted in the last counter. */
+ heap_sort(symout + counters[num_counters - 2],
+ counters[num_counters - 1] - counters[num_counters - 2]);
+
+ return num_used_syms;
+}
+
+/*
+ * Build a Huffman tree.
+ *
+ * This is an optimized implementation that
+ * (a) takes advantage of the frequencies being already sorted;
+ * (b) only generates non-leaf nodes, since the non-leaf nodes of a Huffman
+ * tree are sufficient to generate a canonical code;
+ * (c) Only stores parent pointers, not child pointers;
+ * (d) Produces the nodes in the same memory used for input frequency
+ * information.
+ *
+ * Array 'A', which contains 'sym_count' entries, is used for both input and
+ * output. For this function, 'sym_count' must be at least 2.
+ *
+ * For input, the array must contain the frequencies of the symbols, sorted in
+ * increasing order. Specifically, each entry must contain a frequency left
+ * shifted by NUM_SYMBOL_BITS bits. Any data in the low NUM_SYMBOL_BITS bits of
+ * the entries will be ignored by this function. Although these bits will, in
+ * fact, contain the symbols that correspond to the frequencies, this function
+ * is concerned with frequencies only and keeps the symbols as-is.
+ *
+ * For output, this function will produce the non-leaf nodes of the Huffman
+ * tree. These nodes will be stored in the first (sym_count - 1) entries of the
+ * array. Entry A[sym_count - 2] will represent the root node. Each other node
+ * will contain the zero-based index of its parent node in 'A', left shifted by
+ * NUM_SYMBOL_BITS bits. The low NUM_SYMBOL_BITS bits of each entry in A will
+ * be kept as-is. Again, note that although these low bits will, in fact,
+ * contain a symbol value, this symbol will have *no relationship* with the
+ * Huffman tree node that happens to occupy the same slot. This is because this
+ * implementation only generates the non-leaf nodes of the tree.
+ */
+static void
+build_tree(u32 A[], unsigned sym_count)
+{
+ const unsigned last_idx = sym_count - 1;
+
+ /* Index of the next lowest frequency leaf that still needs a parent */
+ unsigned i = 0;
+
+ /*
+ * Index of the next lowest frequency non-leaf that still needs a
+ * parent, or 'e' if there is currently no such node
+ */
+ unsigned b = 0;
+
+ /* Index of the next spot for a non-leaf (will overwrite a leaf) */
+ unsigned e = 0;
+
+ do {
+ u32 new_freq;
+
+ /*
+ * Select the next two lowest frequency nodes among the leaves
+ * A[i] and non-leaves A[b], and create a new node A[e] to be
+ * their parent. Set the new node's frequency to the sum of the
+ * frequencies of its two children.
+ *
+ * Usually the next two lowest frequency nodes are of the same
+ * type (leaf or non-leaf), so check those cases first.
+ */
+ if (i + 1 <= last_idx &&
+ (b == e || (A[i + 1] & FREQ_MASK) <= (A[b] & FREQ_MASK))) {
+ /* Two leaves */
+ new_freq = (A[i] & FREQ_MASK) + (A[i + 1] & FREQ_MASK);
+ i += 2;
+ } else if (b + 2 <= e &&
+ (i > last_idx ||
+ (A[b + 1] & FREQ_MASK) < (A[i] & FREQ_MASK))) {
+ /* Two non-leaves */
+ new_freq = (A[b] & FREQ_MASK) + (A[b + 1] & FREQ_MASK);
+ A[b] = (e << NUM_SYMBOL_BITS) | (A[b] & SYMBOL_MASK);
+ A[b + 1] = (e << NUM_SYMBOL_BITS) |
+ (A[b + 1] & SYMBOL_MASK);
+ b += 2;
+ } else {
+ /* One leaf and one non-leaf */
+ new_freq = (A[i] & FREQ_MASK) + (A[b] & FREQ_MASK);
+ A[b] = (e << NUM_SYMBOL_BITS) | (A[b] & SYMBOL_MASK);
+ i++;
+ b++;
+ }
+ A[e] = new_freq | (A[e] & SYMBOL_MASK);
+ /*
+ * A binary tree with 'n' leaves has 'n - 1' non-leaves, so the
+ * tree is complete once we've created 'n - 1' non-leaves.
+ */
+ } while (++e < last_idx);
+}
+
+/*
+ * Given the stripped-down Huffman tree constructed by build_tree(), determine
+ * the number of codewords that should be assigned each possible length, taking
+ * into account the length-limited constraint.
+ *
+ * @A
+ * The array produced by build_tree(), containing parent index information
+ * for the non-leaf nodes of the Huffman tree. Each entry in this array is
+ * a node; a node's parent always has a greater index than that node
+ * itself. This function will overwrite the parent index information in
+ * this array, so essentially it will destroy the tree. However, the data
+ * in the low NUM_SYMBOL_BITS of each entry will be preserved.
+ *
+ * @root_idx
+ * The 0-based index of the root node in 'A', and consequently one less
+ * than the number of tree node entries in 'A'. (Or, really 2 less than
+ * the actual length of 'A'.)
+ *
+ * @len_counts
+ * An array of length ('max_codeword_len' + 1) in which the number of
+ * codewords having each length <= max_codeword_len will be returned.
+ *
+ * @max_codeword_len
+ * The maximum permissible codeword length.
+ */
+static void
+compute_length_counts(u32 A[], unsigned root_idx, unsigned len_counts[],
+ unsigned max_codeword_len)
+{
+ unsigned len;
+ int node;
+
+ /*
+ * The key observations are:
+ *
+ * (1) We can traverse the non-leaf nodes of the tree, always visiting a
+ * parent before its children, by simply iterating through the array
+ * in reverse order. Consequently, we can compute the depth of each
+ * node in one pass, overwriting the parent indices with depths.
+ *
+ * (2) We can initially assume that in the real Huffman tree, both
+ * children of the root are leaves. This corresponds to two
+ * codewords of length 1. Then, whenever we visit a (non-leaf) node
+ * during the traversal, we modify this assumption to account for
+ * the current node *not* being a leaf, but rather its two children
+ * being leaves. This causes the loss of one codeword for the
+ * current depth and the addition of two codewords for the current
+ * depth plus one.
+ *
+ * (3) We can handle the length-limited constraint fairly easily by
+ * simply using the largest length available when a depth exceeds
+ * max_codeword_len.
+ */
+
+ for (len = 0; len <= max_codeword_len; len++)
+ len_counts[len] = 0;
+ len_counts[1] = 2;
+
+ /* Set the root node's depth to 0. */
+ A[root_idx] &= SYMBOL_MASK;
+
+ for (node = root_idx - 1; node >= 0; node--) {
+
+ /* Calculate the depth of this node. */
+
+ unsigned parent = A[node] >> NUM_SYMBOL_BITS;
+ unsigned parent_depth = A[parent] >> NUM_SYMBOL_BITS;
+ unsigned depth = parent_depth + 1;
+
+ /*
+ * Set the depth of this node so that it is available when its
+ * children (if any) are processed.
+ */
+ A[node] = (A[node] & SYMBOL_MASK) | (depth << NUM_SYMBOL_BITS);
+
+ /*
+ * If needed, decrease the length to meet the length-limited
+ * constraint. This is not the optimal method for generating
+ * length-limited Huffman codes! But it should be good enough.
+ */
+ if (depth >= max_codeword_len) {
+ depth = max_codeword_len;
+ do {
+ depth--;
+ } while (len_counts[depth] == 0);
+ }
+
+ /*
+ * Account for the fact that we have a non-leaf node at the
+ * current depth.
+ */
+ len_counts[depth]--;
+ len_counts[depth + 1] += 2;
+ }
+}
+
+/*
+ * DEFLATE uses bit-reversed codewords, so we must bit-reverse the codewords
+ * after generating them. All codewords have length <= 16 bits. If the CPU has
+ * a bit-reversal instruction, then that is the fastest method. Otherwise the
+ * fastest method is to reverse the bits in each of the two bytes using a table.
+ * The table method is slightly faster than using bitwise operations to flip
+ * adjacent 1, 2, 4, and then 8-bit fields, even if 2 to 4 codewords are packed
+ * into a machine word and processed together using that method.
+ */
+
+#ifdef rbit32
+static forceinline u32 reverse_codeword(u32 codeword, u8 len)
+{
+ return rbit32(codeword) >> ((32 - len) & 31);
+}
+#else
+/* Generated by scripts/gen_bitreverse_tab.py */
+static const u8 bitreverse_tab[256] = {
+ 0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0,
+ 0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0,
+ 0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8,
+ 0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8,
+ 0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4,
+ 0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4,
+ 0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec,
+ 0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc,
+ 0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2,
+ 0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2,
+ 0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea,
+ 0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa,
+ 0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6,
+ 0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6,
+ 0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee,
+ 0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe,
+ 0x01, 0x81, 0x41, 0xc1, 0x21, 0xa1, 0x61, 0xe1,
+ 0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1,
+ 0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9,
+ 0x19, 0x99, 0x59, 0xd9, 0x39, 0xb9, 0x79, 0xf9,
+ 0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5,
+ 0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5,
+ 0x0d, 0x8d, 0x4d, 0xcd, 0x2d, 0xad, 0x6d, 0xed,
+ 0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd,
+ 0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3,
+ 0x13, 0x93, 0x53, 0xd3, 0x33, 0xb3, 0x73, 0xf3,
+ 0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb,
+ 0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb,
+ 0x07, 0x87, 0x47, 0xc7, 0x27, 0xa7, 0x67, 0xe7,
+ 0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7,
+ 0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef,
+ 0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff,
+};
+
+static forceinline u32 reverse_codeword(u32 codeword, u8 len)
+{
+ STATIC_ASSERT(DEFLATE_MAX_CODEWORD_LEN <= 16);
+ codeword = ((u32)bitreverse_tab[codeword & 0xff] << 8) |
+ bitreverse_tab[codeword >> 8];
+ return codeword >> (16 - len);
+}
+#endif /* !rbit32 */
+
+/*
+ * Generate the codewords for a canonical Huffman code.
+ *
+ * @A
+ * The output array for codewords. In addition, initially this
+ * array must contain the symbols, sorted primarily by frequency and
+ * secondarily by symbol value, in the low NUM_SYMBOL_BITS bits of
+ * each entry.
+ *
+ * @len
+ * Output array for codeword lengths.
+ *
+ * @len_counts
+ * An array that provides the number of codewords that will have
+ * each possible length <= max_codeword_len.
+ *
+ * @max_codeword_len
+ * Maximum length, in bits, of each codeword.
+ *
+ * @num_syms
+ * Number of symbols in the alphabet, including symbols with zero
+ * frequency. This is the length of the 'A' and 'len' arrays.
+ */
+static void
+gen_codewords(u32 A[], u8 lens[], const unsigned len_counts[],
+ unsigned max_codeword_len, unsigned num_syms)
+{
+ u32 next_codewords[DEFLATE_MAX_CODEWORD_LEN + 1];
+ unsigned i;
+ unsigned len;
+ unsigned sym;
+
+ /*
+ * Given the number of codewords that will have each length, assign
+ * codeword lengths to symbols. We do this by assigning the lengths in
+ * decreasing order to the symbols sorted primarily by increasing
+ * frequency and secondarily by increasing symbol value.
+ */
+ for (i = 0, len = max_codeword_len; len >= 1; len--) {
+ unsigned count = len_counts[len];
+
+ while (count--)
+ lens[A[i++] & SYMBOL_MASK] = len;
+ }
+
+ /*
+ * Generate the codewords themselves. We initialize the
+ * 'next_codewords' array to provide the lexicographically first
+ * codeword of each length, then assign codewords in symbol order. This
+ * produces a canonical code.
+ */
+ next_codewords[0] = 0;
+ next_codewords[1] = 0;
+ for (len = 2; len <= max_codeword_len; len++)
+ next_codewords[len] =
+ (next_codewords[len - 1] + len_counts[len - 1]) << 1;
+
+ for (sym = 0; sym < num_syms; sym++) {
+ /* DEFLATE requires bit-reversed codewords. */
+ A[sym] = reverse_codeword(next_codewords[lens[sym]]++,
+ lens[sym]);
+ }
+}
+
+/*
+ * ---------------------------------------------------------------------
+ * deflate_make_huffman_code()
+ * ---------------------------------------------------------------------
+ *
+ * Given an alphabet and the frequency of each symbol in it, construct a
+ * length-limited canonical Huffman code.
+ *
+ * @num_syms
+ * The number of symbols in the alphabet. The symbols are the integers in
+ * the range [0, num_syms - 1]. This parameter must be at least 2 and
+ * must not exceed (1 << NUM_SYMBOL_BITS).
+ *
+ * @max_codeword_len
+ * The maximum permissible codeword length.
+ *
+ * @freqs
+ * An array of length @num_syms that gives the frequency of each symbol.
+ * It is valid for some, none, or all of the frequencies to be 0. The sum
+ * of frequencies must not exceed (1 << NUM_FREQ_BITS) - 1.
+ *
+ * @lens
+ * An array of @num_syms entries in which this function will return the
+ * length, in bits, of the codeword assigned to each symbol. Symbols with
+ * 0 frequency will not have codewords per se, but their entries in this
+ * array will be set to 0. No lengths greater than @max_codeword_len will
+ * be assigned.
+ *
+ * @codewords
+ * An array of @num_syms entries in which this function will return the
+ * codeword for each symbol, right-justified and padded on the left with
+ * zeroes. Codewords for symbols with 0 frequency will be undefined.
+ *
+ * ---------------------------------------------------------------------
+ *
+ * This function builds a length-limited canonical Huffman code.
+ *
+ * A length-limited Huffman code contains no codewords longer than some
+ * specified length, and has exactly (with some algorithms) or approximately
+ * (with the algorithm used here) the minimum weighted path length from the
+ * root, given this constraint.
+ *
+ * A canonical Huffman code satisfies the properties that a longer codeword
+ * never lexicographically precedes a shorter codeword, and the lexicographic
+ * ordering of codewords of the same length is the same as the lexicographic
+ * ordering of the corresponding symbols. A canonical Huffman code, or more
+ * generally a canonical prefix code, can be reconstructed from only a list
+ * containing the codeword length of each symbol.
+ *
+ * The classic algorithm to generate a Huffman code creates a node for each
+ * symbol, then inserts these nodes into a min-heap keyed by symbol frequency.
+ * Then, repeatedly, the two lowest-frequency nodes are removed from the
+ * min-heap and added as the children of a new node having frequency equal to
+ * the sum of its two children, which is then inserted into the min-heap. When
+ * only a single node remains in the min-heap, it is the root of the Huffman
+ * tree. The codeword for each symbol is determined by the path needed to reach
+ * the corresponding node from the root. Descending to the left child appends a
+ * 0 bit, whereas descending to the right child appends a 1 bit.
+ *
+ * The classic algorithm is relatively easy to understand, but it is subject to
+ * a number of inefficiencies. In practice, it is fastest to first sort the
+ * symbols by frequency. (This itself can be subject to an optimization based
+ * on the fact that most frequencies tend to be low.) At the same time, we sort
+ * secondarily by symbol value, which aids the process of generating a canonical
+ * code. Then, during tree construction, no heap is necessary because both the
+ * leaf nodes and the unparented non-leaf nodes can be easily maintained in
+ * sorted order. Consequently, there can never be more than two possibilities
+ * for the next-lowest-frequency node.
+ *
+ * In addition, because we're generating a canonical code, we actually don't
+ * need the leaf nodes of the tree at all, only the non-leaf nodes. This is
+ * because for canonical code generation we don't need to know where the symbols
+ * are in the tree. Rather, we only need to know how many leaf nodes have each
+ * depth (codeword length). And this information can, in fact, be quickly
+ * generated from the tree of non-leaves only.
+ *
+ * Furthermore, we can build this stripped-down Huffman tree directly in the
+ * array in which the codewords are to be generated, provided that these array
+ * slots are large enough to hold a symbol and frequency value.
+ *
+ * Still furthermore, we don't even need to maintain explicit child pointers.
+ * We only need the parent pointers, and even those can be overwritten in-place
+ * with depth information as part of the process of extracting codeword lengths
+ * from the tree. So in summary, we do NOT need a big structure like:
+ *
+ * struct huffman_tree_node {
+ * unsigned int symbol;
+ * unsigned int frequency;
+ * unsigned int depth;
+ * struct huffman_tree_node *left_child;
+ * struct huffman_tree_node *right_child;
+ * };
+ *
+ *
+ * ... which often gets used in "naive" implementations of Huffman code
+ * generation.
+ *
+ * Many of these optimizations are based on the implementation in 7-Zip (source
+ * file: C/HuffEnc.c), which was placed in the public domain by Igor Pavlov.
+ */
+static void
+deflate_make_huffman_code(unsigned num_syms, unsigned max_codeword_len,
+ const u32 freqs[], u8 lens[], u32 codewords[])
+{
+ u32 *A = codewords;
+ unsigned num_used_syms;
+
+ STATIC_ASSERT(DEFLATE_MAX_NUM_SYMS <= 1 << NUM_SYMBOL_BITS);
+ STATIC_ASSERT(MAX_BLOCK_LENGTH <= ((u32)1 << NUM_FREQ_BITS) - 1);
+
+ /*
+ * We begin by sorting the symbols primarily by frequency and
+ * secondarily by symbol value. As an optimization, the array used for
+ * this purpose ('A') shares storage with the space in which we will
+ * eventually return the codewords.
+ */
+ num_used_syms = sort_symbols(num_syms, freqs, lens, A);
+
+ /*
+ * 'num_used_syms' is the number of symbols with nonzero frequency.
+ * This may be less than @num_syms. 'num_used_syms' is also the number
+ * of entries in 'A' that are valid. Each entry consists of a distinct
+ * symbol and a nonzero frequency packed into a 32-bit integer.
+ */
+
+ /*
+ * Handle special cases where only 0 or 1 symbols were used (had nonzero
+ * frequency).
+ */
+
+ if (unlikely(num_used_syms == 0)) {
+ /*
+ * Code is empty. sort_symbols() already set all lengths to 0,
+ * so there is nothing more to do.
+ */
+ return;
+ }
+
+ if (unlikely(num_used_syms == 1)) {
+ /*
+ * Only one symbol was used, so we only need one codeword. But
+ * two codewords are needed to form the smallest complete
+ * Huffman code, which uses codewords 0 and 1. Therefore, we
+ * choose another symbol to which to assign a codeword. We use
+ * 0 (if the used symbol is not 0) or 1 (if the used symbol is
+ * 0). In either case, the lesser-valued symbol must be
+ * assigned codeword 0 so that the resulting code is canonical.
+ */
+
+ unsigned sym = A[0] & SYMBOL_MASK;
+ unsigned nonzero_idx = sym ? sym : 1;
+
+ codewords[0] = 0;
+ lens[0] = 1;
+ codewords[nonzero_idx] = 1;
+ lens[nonzero_idx] = 1;
+ return;
+ }
+
+ /*
+ * Build a stripped-down version of the Huffman tree, sharing the array
+ * 'A' with the symbol values. Then extract length counts from the tree
+ * and use them to generate the final codewords.
+ */
+
+ build_tree(A, num_used_syms);
+
+ {
+ unsigned len_counts[DEFLATE_MAX_CODEWORD_LEN + 1];
+
+ compute_length_counts(A, num_used_syms - 2,
+ len_counts, max_codeword_len);
+
+ gen_codewords(A, lens, len_counts, max_codeword_len, num_syms);
+ }
+}
+
+/*
+ * Clear the Huffman symbol frequency counters. This must be called when
+ * starting a new DEFLATE block.
+ */
+static void
+deflate_reset_symbol_frequencies(struct libdeflate_compressor *c)
+{
+ memset(&c->freqs, 0, sizeof(c->freqs));
+}
+
+/*
+ * Build the literal/length and offset Huffman codes for a DEFLATE block.
+ *
+ * This takes as input the frequency tables for each alphabet and produces as
+ * output a set of tables that map symbols to codewords and codeword lengths.
+ */
+static void
+deflate_make_huffman_codes(const struct deflate_freqs *freqs,
+ struct deflate_codes *codes)
+{
+ deflate_make_huffman_code(DEFLATE_NUM_LITLEN_SYMS,
+ MAX_LITLEN_CODEWORD_LEN,
+ freqs->litlen,
+ codes->lens.litlen,
+ codes->codewords.litlen);
+
+ deflate_make_huffman_code(DEFLATE_NUM_OFFSET_SYMS,
+ MAX_OFFSET_CODEWORD_LEN,
+ freqs->offset,
+ codes->lens.offset,
+ codes->codewords.offset);
+}
+
+/* Initialize c->static_codes. */
+static void
+deflate_init_static_codes(struct libdeflate_compressor *c)
+{
+ unsigned i;
+
+ for (i = 0; i < 144; i++)
+ c->freqs.litlen[i] = 1 << (9 - 8);
+ for (; i < 256; i++)
+ c->freqs.litlen[i] = 1 << (9 - 9);
+ for (; i < 280; i++)
+ c->freqs.litlen[i] = 1 << (9 - 7);
+ for (; i < 288; i++)
+ c->freqs.litlen[i] = 1 << (9 - 8);
+
+ for (i = 0; i < 32; i++)
+ c->freqs.offset[i] = 1 << (5 - 5);
+
+ deflate_make_huffman_codes(&c->freqs, &c->static_codes);
+}
+
+/* Return the offset slot for the given match offset, using the small map. */
+static forceinline unsigned
+deflate_get_offset_slot(unsigned offset)
+{
+#if 1
+ if (offset <= 256)
+ return deflate_offset_slot[offset];
+ else
+ return deflate_offset_slot[256 + ((offset - 1) >> 7)];
+#else /* Branchless version */
+ u32 i1 = offset;
+ u32 i2 = 256 + ((offset - 1) >> 7);
+ u32 is_small = (s32)(offset - 257) >> 31;
+
+ return deflate_offset_slot[(i1 & is_small) ^ (i2 & ~is_small)];
+#endif
+}
+
+static unsigned
+deflate_compute_precode_items(const u8 lens[], const unsigned num_lens,
+ u32 precode_freqs[], unsigned precode_items[])
+{
+ unsigned *itemptr;
+ unsigned run_start;
+ unsigned run_end;
+ unsigned extra_bits;
+ u8 len;
+
+ memset(precode_freqs, 0,
+ DEFLATE_NUM_PRECODE_SYMS * sizeof(precode_freqs[0]));
+
+ itemptr = precode_items;
+ run_start = 0;
+ do {
+ /* Find the next run of codeword lengths. */
+
+ /* len = the length being repeated */
+ len = lens[run_start];
+
+ /* Extend the run. */
+ run_end = run_start;
+ do {
+ run_end++;
+ } while (run_end != num_lens && len == lens[run_end]);
+
+ if (len == 0) {
+ /* Run of zeroes. */
+
+ /* Symbol 18: RLE 11 to 138 zeroes at a time. */
+ while ((run_end - run_start) >= 11) {
+ extra_bits = MIN((run_end - run_start) - 11,
+ 0x7F);
+ precode_freqs[18]++;
+ *itemptr++ = 18 | (extra_bits << 5);
+ run_start += 11 + extra_bits;
+ }
+
+ /* Symbol 17: RLE 3 to 10 zeroes at a time. */
+ if ((run_end - run_start) >= 3) {
+ extra_bits = MIN((run_end - run_start) - 3,
+ 0x7);
+ precode_freqs[17]++;
+ *itemptr++ = 17 | (extra_bits << 5);
+ run_start += 3 + extra_bits;
+ }
+ } else {
+
+ /* A run of nonzero lengths. */
+
+ /* Symbol 16: RLE 3 to 6 of the previous length. */
+ if ((run_end - run_start) >= 4) {
+ precode_freqs[len]++;
+ *itemptr++ = len;
+ run_start++;
+ do {
+ extra_bits = MIN((run_end - run_start) -
+ 3, 0x3);
+ precode_freqs[16]++;
+ *itemptr++ = 16 | (extra_bits << 5);
+ run_start += 3 + extra_bits;
+ } while ((run_end - run_start) >= 3);
+ }
+ }
+
+ /* Output any remaining lengths without RLE. */
+ while (run_start != run_end) {
+ precode_freqs[len]++;
+ *itemptr++ = len;
+ run_start++;
+ }
+ } while (run_start != num_lens);
+
+ return itemptr - precode_items;
+}
+
+/*
+ * Huffman codeword lengths for dynamic Huffman blocks are compressed using a
+ * separate Huffman code, the "precode", which contains a symbol for each
+ * possible codeword length in the larger code as well as several special
+ * symbols to represent repeated codeword lengths (a form of run-length
+ * encoding). The precode is itself constructed in canonical form, and its
+ * codeword lengths are represented literally in 19 3-bit fields that
+ * immediately precede the compressed codeword lengths of the larger code.
+ */
+
+/* Precompute the information needed to output dynamic Huffman codes. */
+static void
+deflate_precompute_huffman_header(struct libdeflate_compressor *c)
+{
+ /* Compute how many litlen and offset symbols are needed. */
+
+ for (c->o.precode.num_litlen_syms = DEFLATE_NUM_LITLEN_SYMS;
+ c->o.precode.num_litlen_syms > 257;
+ c->o.precode.num_litlen_syms--)
+ if (c->codes.lens.litlen[c->o.precode.num_litlen_syms - 1] != 0)
+ break;
+
+ for (c->o.precode.num_offset_syms = DEFLATE_NUM_OFFSET_SYMS;
+ c->o.precode.num_offset_syms > 1;
+ c->o.precode.num_offset_syms--)
+ if (c->codes.lens.offset[c->o.precode.num_offset_syms - 1] != 0)
+ break;
+
+ /*
+ * If we're not using the full set of literal/length codeword lengths,
+ * then temporarily move the offset codeword lengths over so that the
+ * literal/length and offset codeword lengths are contiguous.
+ */
+ STATIC_ASSERT(offsetof(struct deflate_lens, offset) ==
+ DEFLATE_NUM_LITLEN_SYMS);
+ if (c->o.precode.num_litlen_syms != DEFLATE_NUM_LITLEN_SYMS) {
+ memmove((u8 *)&c->codes.lens + c->o.precode.num_litlen_syms,
+ (u8 *)&c->codes.lens + DEFLATE_NUM_LITLEN_SYMS,
+ c->o.precode.num_offset_syms);
+ }
+
+ /*
+ * Compute the "items" (RLE / literal tokens and extra bits) with which
+ * the codeword lengths in the larger code will be output.
+ */
+ c->o.precode.num_items =
+ deflate_compute_precode_items((u8 *)&c->codes.lens,
+ c->o.precode.num_litlen_syms +
+ c->o.precode.num_offset_syms,
+ c->o.precode.freqs,
+ c->o.precode.items);
+
+ /* Build the precode. */
+ deflate_make_huffman_code(DEFLATE_NUM_PRECODE_SYMS,
+ MAX_PRE_CODEWORD_LEN,
+ c->o.precode.freqs, c->o.precode.lens,
+ c->o.precode.codewords);
+
+ /* Count how many precode lengths we actually need to output. */
+ for (c->o.precode.num_explicit_lens = DEFLATE_NUM_PRECODE_SYMS;
+ c->o.precode.num_explicit_lens > 4;
+ c->o.precode.num_explicit_lens--)
+ if (c->o.precode.lens[deflate_precode_lens_permutation[
+ c->o.precode.num_explicit_lens - 1]] != 0)
+ break;
+
+ /* Restore the offset codeword lengths if needed. */
+ if (c->o.precode.num_litlen_syms != DEFLATE_NUM_LITLEN_SYMS) {
+ memmove((u8 *)&c->codes.lens + DEFLATE_NUM_LITLEN_SYMS,
+ (u8 *)&c->codes.lens + c->o.precode.num_litlen_syms,
+ c->o.precode.num_offset_syms);
+ }
+}
+
+/*
+ * To make it faster to output matches, compute the "full" match length
+ * codewords, i.e. the concatenation of the litlen codeword and the extra bits
+ * for each possible match length.
+ */
+static void
+deflate_compute_full_len_codewords(struct libdeflate_compressor *c,
+ const struct deflate_codes *codes)
+{
+ unsigned len;
+
+ STATIC_ASSERT(MAX_LITLEN_CODEWORD_LEN +
+ DEFLATE_MAX_EXTRA_LENGTH_BITS <= 32);
+
+ for (len = DEFLATE_MIN_MATCH_LEN; len <= DEFLATE_MAX_MATCH_LEN; len++) {
+ unsigned slot = deflate_length_slot[len];
+ unsigned litlen_sym = DEFLATE_FIRST_LEN_SYM + slot;
+ u32 extra_bits = len - deflate_length_slot_base[slot];
+
+ c->o.length.codewords[len] =
+ codes->codewords.litlen[litlen_sym] |
+ (extra_bits << codes->lens.litlen[litlen_sym]);
+ c->o.length.lens[len] = codes->lens.litlen[litlen_sym] +
+ deflate_extra_length_bits[slot];
+ }
+}
+
+/* Write a match to the output buffer. */
+#define WRITE_MATCH(c_, codes_, length_, offset_, offset_slot_) \
+do { \
+ const struct libdeflate_compressor *c__ = (c_); \
+ const struct deflate_codes *codes__ = (codes_); \
+ unsigned length__ = (length_); \
+ unsigned offset__ = (offset_); \
+ unsigned offset_slot__ = (offset_slot_); \
+ \
+ /* Litlen symbol and extra length bits */ \
+ STATIC_ASSERT(CAN_BUFFER(MAX_LITLEN_CODEWORD_LEN + \
+ DEFLATE_MAX_EXTRA_LENGTH_BITS)); \
+ ADD_BITS(c__->o.length.codewords[length__], \
+ c__->o.length.lens[length__]); \
+ \
+ if (!CAN_BUFFER(MAX_LITLEN_CODEWORD_LEN + \
+ DEFLATE_MAX_EXTRA_LENGTH_BITS + \
+ MAX_OFFSET_CODEWORD_LEN + \
+ DEFLATE_MAX_EXTRA_OFFSET_BITS)) \
+ FLUSH_BITS(); \
+ \
+ /* Offset symbol */ \
+ ADD_BITS(codes__->codewords.offset[offset_slot__], \
+ codes__->lens.offset[offset_slot__]); \
+ \
+ if (!CAN_BUFFER(MAX_OFFSET_CODEWORD_LEN + \
+ DEFLATE_MAX_EXTRA_OFFSET_BITS)) \
+ FLUSH_BITS(); \
+ \
+ /* Extra offset bits */ \
+ ADD_BITS(offset__ - deflate_offset_slot_base[offset_slot__], \
+ deflate_extra_offset_bits[offset_slot__]); \
+ \
+ FLUSH_BITS(); \
+} while (0)
+
+/*
+ * Choose the best type of block to use (dynamic Huffman, static Huffman, or
+ * uncompressed), then output it.
+ */
+static void
+deflate_flush_block(struct libdeflate_compressor *c,
+ struct deflate_output_bitstream *os,
+ const u8 *block_begin, u32 block_length,
+ const struct deflate_sequence *sequences,
+ bool is_final_block)
+{
+ /*
+ * It is hard to get compilers to understand that writes to 'os->next'
+ * don't alias 'os'. That hurts performance significantly, as
+ * everything in 'os' would keep getting re-loaded. ('restrict'
+ * *should* do the trick, but it's unreliable.) Therefore, we keep all
+ * the output bitstream state in local variables, and output bits using
+ * macros. This is similar to what the decompressor does.
+ */
+ const u8 *in_next = block_begin;
+ const u8 * const in_end = block_begin + block_length;
+ bitbuf_t bitbuf = os->bitbuf;
+ unsigned bitcount = os->bitcount;
+ u8 *out_next = os->next;
+ u8 * const out_end = os->end;
+ /* The cost for each block type, in bits */
+ u32 dynamic_cost = 0;
+ u32 static_cost = 0;
+ u32 uncompressed_cost = 0;
+ u32 best_cost;
+ struct deflate_codes *codes;
+ unsigned sym;
+
+ ASSERT(block_length >= MIN_BLOCK_LENGTH || is_final_block);
+ ASSERT(block_length <= MAX_BLOCK_LENGTH);
+ ASSERT(bitcount <= 7);
+ ASSERT((bitbuf & ~(((bitbuf_t)1 << bitcount) - 1)) == 0);
+ ASSERT(out_next <= out_end);
+
+ if (sequences != NULL /* !near_optimal */ ||
+ !SUPPORT_NEAR_OPTIMAL_PARSING) {
+ /* Tally the end-of-block symbol. */
+ c->freqs.litlen[DEFLATE_END_OF_BLOCK]++;
+
+ /* Build dynamic Huffman codes. */
+ deflate_make_huffman_codes(&c->freqs, &c->codes);
+ } /* Else, this was already done. */
+
+ /* Precompute the precode items and build the precode. */
+ deflate_precompute_huffman_header(c);
+
+ /* Account for the cost of encoding dynamic Huffman codes. */
+ dynamic_cost += 5 + 5 + 4 + (3 * c->o.precode.num_explicit_lens);
+ for (sym = 0; sym < DEFLATE_NUM_PRECODE_SYMS; sym++) {
+ u32 extra = deflate_extra_precode_bits[sym];
+
+ dynamic_cost += c->o.precode.freqs[sym] *
+ (extra + c->o.precode.lens[sym]);
+ }
+
+ /* Account for the cost of encoding literals. */
+ for (sym = 0; sym < 144; sym++) {
+ dynamic_cost += c->freqs.litlen[sym] *
+ c->codes.lens.litlen[sym];
+ static_cost += c->freqs.litlen[sym] * 8;
+ }
+ for (; sym < 256; sym++) {
+ dynamic_cost += c->freqs.litlen[sym] *
+ c->codes.lens.litlen[sym];
+ static_cost += c->freqs.litlen[sym] * 9;
+ }
+
+ /* Account for the cost of encoding the end-of-block symbol. */
+ dynamic_cost += c->codes.lens.litlen[DEFLATE_END_OF_BLOCK];
+ static_cost += 7;
+
+ /* Account for the cost of encoding lengths. */
+ for (sym = DEFLATE_FIRST_LEN_SYM;
+ sym < DEFLATE_FIRST_LEN_SYM + ARRAY_LEN(deflate_extra_length_bits);
+ sym++) {
+ u32 extra = deflate_extra_length_bits[
+ sym - DEFLATE_FIRST_LEN_SYM];
+
+ dynamic_cost += c->freqs.litlen[sym] *
+ (extra + c->codes.lens.litlen[sym]);
+ static_cost += c->freqs.litlen[sym] *
+ (extra + c->static_codes.lens.litlen[sym]);
+ }
+
+ /* Account for the cost of encoding offsets. */
+ for (sym = 0; sym < ARRAY_LEN(deflate_extra_offset_bits); sym++) {
+ u32 extra = deflate_extra_offset_bits[sym];
+
+ dynamic_cost += c->freqs.offset[sym] *
+ (extra + c->codes.lens.offset[sym]);
+ static_cost += c->freqs.offset[sym] * (extra + 5);
+ }
+
+ /* Compute the cost of using uncompressed blocks. */
+ uncompressed_cost += (-(bitcount + 3) & 7) + 32 +
+ (40 * (DIV_ROUND_UP(block_length,
+ UINT16_MAX) - 1)) +
+ (8 * block_length);
+
+ /* Choose and output the cheapest type of block. */
+ best_cost = MIN(static_cost, uncompressed_cost);
+ if (dynamic_cost < best_cost) {
+ const unsigned num_explicit_lens = c->o.precode.num_explicit_lens;
+ const unsigned num_precode_items = c->o.precode.num_items;
+ unsigned precode_sym, precode_item;
+ unsigned i;
+
+ /* Dynamic Huffman block */
+
+ best_cost = dynamic_cost;
+ codes = &c->codes;
+ STATIC_ASSERT(CAN_BUFFER(1 + 2 + 5 + 5 + 4 + 3));
+ ADD_BITS(is_final_block, 1);
+ ADD_BITS(DEFLATE_BLOCKTYPE_DYNAMIC_HUFFMAN, 2);
+ ADD_BITS(c->o.precode.num_litlen_syms - 257, 5);
+ ADD_BITS(c->o.precode.num_offset_syms - 1, 5);
+ ADD_BITS(num_explicit_lens - 4, 4);
+
+ /* Output the lengths of the codewords in the precode. */
+ if (CAN_BUFFER(3 * (DEFLATE_NUM_PRECODE_SYMS - 1))) {
+ /*
+ * A 64-bit bitbuffer is just one bit too small to hold
+ * the maximum number of precode lens, so to minimize
+ * flushes we merge one len with the previous fields.
+ */
+ precode_sym = deflate_precode_lens_permutation[0];
+ ADD_BITS(c->o.precode.lens[precode_sym], 3);
+ FLUSH_BITS();
+ i = 1; /* num_explicit_lens >= 4 */
+ do {
+ precode_sym =
+ deflate_precode_lens_permutation[i];
+ ADD_BITS(c->o.precode.lens[precode_sym], 3);
+ } while (++i < num_explicit_lens);
+ FLUSH_BITS();
+ } else {
+ FLUSH_BITS();
+ i = 0;
+ do {
+ precode_sym =
+ deflate_precode_lens_permutation[i];
+ ADD_BITS(c->o.precode.lens[precode_sym], 3);
+ FLUSH_BITS();
+ } while (++i < num_explicit_lens);
+ }
+
+ /*
+ * Output the lengths of the codewords in the litlen and offset
+ * codes, encoded by the precode.
+ */
+ i = 0;
+ do {
+ precode_item = c->o.precode.items[i];
+ precode_sym = precode_item & 0x1F;
+ STATIC_ASSERT(CAN_BUFFER(MAX_PRE_CODEWORD_LEN + 7));
+ ADD_BITS(c->o.precode.codewords[precode_sym],
+ c->o.precode.lens[precode_sym]);
+ ADD_BITS(precode_item >> 5,
+ deflate_extra_precode_bits[precode_sym]);
+ FLUSH_BITS();
+ } while (++i < num_precode_items);
+ } else if (static_cost < uncompressed_cost) {
+ /* Static Huffman block */
+ codes = &c->static_codes;
+ ADD_BITS(is_final_block, 1);
+ ADD_BITS(DEFLATE_BLOCKTYPE_STATIC_HUFFMAN, 2);
+ FLUSH_BITS();
+ } else {
+ /*
+ * Uncompressed block(s). DEFLATE limits the length of
+ * uncompressed blocks to UINT16_MAX bytes, so if the length of
+ * the "block" we're flushing is over UINT16_MAX, we actually
+ * output multiple blocks.
+ */
+ do {
+ u8 bfinal = 0;
+ size_t len = UINT16_MAX;
+
+ if (in_end - in_next <= UINT16_MAX) {
+ bfinal = is_final_block;
+ len = in_end - in_next;
+ }
+ if (out_end - out_next <
+ (bitcount + 3 + 7) / 8 + 4 + len) {
+ /* Not enough output space remaining. */
+ out_next = out_end;
+ goto out;
+ }
+ /*
+ * Output BFINAL (1 bit) and BTYPE (2 bits), then align
+ * to a byte boundary.
+ */
+ STATIC_ASSERT(DEFLATE_BLOCKTYPE_UNCOMPRESSED == 0);
+ *out_next++ = (bfinal << bitcount) | bitbuf;
+ if (bitcount > 5)
+ *out_next++ = 0;
+ bitbuf = 0;
+ bitcount = 0;
+ /* Output LEN and NLEN, then the data itself. */
+ put_unaligned_le16(len, out_next);
+ out_next += 2;
+ put_unaligned_le16(~len, out_next);
+ out_next += 2;
+ memcpy(out_next, in_next, len);
+ out_next += len;
+ in_next += len;
+ } while (in_next != in_end);
+ /* Done outputting uncompressed block(s) */
+ goto out;
+ }
+
+ /* Output the literals and matches for a dynamic or static block. */
+ ASSERT(bitcount <= 7);
+ deflate_compute_full_len_codewords(c, codes);
+#if SUPPORT_NEAR_OPTIMAL_PARSING
+ if (sequences == NULL) {
+ /* Output the literals and matches from the minimum-cost path */
+ struct deflate_optimum_node *cur_node =
+ &c->p.n.optimum_nodes[0];
+ struct deflate_optimum_node * const end_node =
+ &c->p.n.optimum_nodes[block_length];
+ do {
+ unsigned length = cur_node->item & OPTIMUM_LEN_MASK;
+ unsigned offset = cur_node->item >>
+ OPTIMUM_OFFSET_SHIFT;
+ if (length == 1) {
+ /* Literal */
+ ADD_BITS(codes->codewords.litlen[offset],
+ codes->lens.litlen[offset]);
+ FLUSH_BITS();
+ } else {
+ /* Match */
+ WRITE_MATCH(c, codes, length, offset,
+ c->p.n.offset_slot_full[offset]);
+ }
+ cur_node += length;
+ } while (cur_node != end_node);
+ } else
+#endif /* SUPPORT_NEAR_OPTIMAL_PARSING */
+ {
+ /* Output the literals and matches from the sequences list. */
+ const struct deflate_sequence *seq;
+
+ for (seq = sequences; ; seq++) {
+ u32 litrunlen = seq->litrunlen_and_length &
+ SEQ_LITRUNLEN_MASK;
+ unsigned length = seq->litrunlen_and_length >>
+ SEQ_LENGTH_SHIFT;
+ unsigned lit;
+
+ /* Output a run of literals. */
+ if (CAN_BUFFER(4 * MAX_LITLEN_CODEWORD_LEN)) {
+ for (; litrunlen >= 4; litrunlen -= 4) {
+ lit = *in_next++;
+ ADD_BITS(codes->codewords.litlen[lit],
+ codes->lens.litlen[lit]);
+ lit = *in_next++;
+ ADD_BITS(codes->codewords.litlen[lit],
+ codes->lens.litlen[lit]);
+ lit = *in_next++;
+ ADD_BITS(codes->codewords.litlen[lit],
+ codes->lens.litlen[lit]);
+ lit = *in_next++;
+ ADD_BITS(codes->codewords.litlen[lit],
+ codes->lens.litlen[lit]);
+ FLUSH_BITS();
+ }
+ if (litrunlen-- != 0) {
+ lit = *in_next++;
+ ADD_BITS(codes->codewords.litlen[lit],
+ codes->lens.litlen[lit]);
+ if (litrunlen-- != 0) {
+ lit = *in_next++;
+ ADD_BITS(codes->codewords.litlen[lit],
+ codes->lens.litlen[lit]);
+ if (litrunlen-- != 0) {
+ lit = *in_next++;
+ ADD_BITS(codes->codewords.litlen[lit],
+ codes->lens.litlen[lit]);
+ }
+ }
+ FLUSH_BITS();
+ }
+ } else {
+ while (litrunlen--) {
+ lit = *in_next++;
+ ADD_BITS(codes->codewords.litlen[lit],
+ codes->lens.litlen[lit]);
+ FLUSH_BITS();
+ }
+ }
+
+ if (length == 0) { /* Last sequence? */
+ ASSERT(in_next == in_end);
+ break;
+ }
+
+ /* Output a match. */
+ WRITE_MATCH(c, codes, length, seq->offset,
+ seq->offset_slot);
+ in_next += length;
+ }
+ }
+
+ /* Output the end-of-block symbol. */
+ ASSERT(bitcount <= 7);
+ ADD_BITS(codes->codewords.litlen[DEFLATE_END_OF_BLOCK],
+ codes->lens.litlen[DEFLATE_END_OF_BLOCK]);
+ FLUSH_BITS();
+out:
+ ASSERT(bitcount <= 7);
+ /*
+ * Assert that the block cost was computed correctly, as
+ * libdeflate_deflate_compress_bound() relies on this via the assumption
+ * that uncompressed blocks will always be used when cheaper.
+ */
+ ASSERT(8 * (out_next - os->next) + bitcount - os->bitcount ==
+ 3 + best_cost || out_next == out_end);
+
+ os->bitbuf = bitbuf;
+ os->bitcount = bitcount;
+ os->next = out_next;
+}
+
+/******************************************************************************/
+
+/*
+ * Block splitting algorithm. The problem is to decide when it is worthwhile to
+ * start a new block with new Huffman codes. There is a theoretically optimal
+ * solution: recursively consider every possible block split, considering the
+ * exact cost of each block, and choose the minimum cost approach. But this is
+ * far too slow. Instead, as an approximation, we can count symbols and after
+ * every N symbols, compare the expected distribution of symbols based on the
+ * previous data with the actual distribution. If they differ "by enough", then
+ * start a new block.
+ *
+ * As an optimization and heuristic, we don't distinguish between every symbol
+ * but rather we combine many symbols into a single "observation type". For
+ * literals we only look at the high bits and low bits, and for matches we only
+ * look at whether the match is long or not. The assumption is that for typical
+ * "real" data, places that are good block boundaries will tend to be noticeable
+ * based only on changes in these aggregate probabilities, without looking for
+ * subtle differences in individual symbols. For example, a change from ASCII
+ * bytes to non-ASCII bytes, or from few matches (generally less compressible)
+ * to many matches (generally more compressible), would be easily noticed based
+ * on the aggregates.
+ *
+ * For determining whether the probability distributions are "different enough"
+ * to start a new block, the simple heuristic of splitting when the sum of
+ * absolute differences exceeds a constant seems to be good enough. We also add
+ * a number proportional to the block length so that the algorithm is more
+ * likely to end long blocks than short blocks. This reflects the general
+ * expectation that it will become increasingly beneficial to start a new block
+ * as the current block grows longer.
+ *
+ * Finally, for an approximation, it is not strictly necessary that the exact
+ * symbols being used are considered. With "near-optimal parsing", for example,
+ * the actual symbols that will be used are unknown until after the block
+ * boundary is chosen and the block has been optimized. Since the final choices
+ * cannot be used, we can use preliminary "greedy" choices instead.
+ */
+
+/* Initialize the block split statistics when starting a new block. */
+static void
+init_block_split_stats(struct block_split_stats *stats)
+{
+ int i;
+
+ for (i = 0; i < NUM_OBSERVATION_TYPES; i++) {
+ stats->new_observations[i] = 0;
+ stats->observations[i] = 0;
+ }
+ stats->num_new_observations = 0;
+ stats->num_observations = 0;
+}
+
+/*
+ * Literal observation. Heuristic: use the top 2 bits and low 1 bits of the
+ * literal, for 8 possible literal observation types.
+ */
+static forceinline void
+observe_literal(struct block_split_stats *stats, u8 lit)
+{
+ stats->new_observations[((lit >> 5) & 0x6) | (lit & 1)]++;
+ stats->num_new_observations++;
+}
+
+/*
+ * Match observation. Heuristic: use one observation type for "short match" and
+ * one observation type for "long match".
+ */
+static forceinline void
+observe_match(struct block_split_stats *stats, unsigned length)
+{
+ stats->new_observations[NUM_LITERAL_OBSERVATION_TYPES +
+ (length >= 9)]++;
+ stats->num_new_observations++;
+}
+
+static void
+merge_new_observations(struct block_split_stats *stats)
+{
+ int i;
+
+ for (i = 0; i < NUM_OBSERVATION_TYPES; i++) {
+ stats->observations[i] += stats->new_observations[i];
+ stats->new_observations[i] = 0;
+ }
+ stats->num_observations += stats->num_new_observations;
+ stats->num_new_observations = 0;
+}
+
+static bool
+do_end_block_check(struct block_split_stats *stats, u32 block_length)
+{
+ if (stats->num_observations > 0) {
+ /*
+ * Compute the sum of absolute differences of probabilities. To
+ * avoid needing to use floating point arithmetic or do slow
+ * divisions, we do all arithmetic with the probabilities
+ * multiplied by num_observations * num_new_observations. E.g.,
+ * for the "old" observations the probabilities would be
+ * (double)observations[i] / num_observations, but since we
+ * multiply by both num_observations and num_new_observations we
+ * really do observations[i] * num_new_observations.
+ */
+ u32 total_delta = 0;
+ u32 num_items;
+ u32 cutoff;
+ int i;
+
+ for (i = 0; i < NUM_OBSERVATION_TYPES; i++) {
+ u32 expected = stats->observations[i] *
+ stats->num_new_observations;
+ u32 actual = stats->new_observations[i] *
+ stats->num_observations;
+ u32 delta = (actual > expected) ? actual - expected :
+ expected - actual;
+
+ total_delta += delta;
+ }
+
+ num_items = stats->num_observations +
+ stats->num_new_observations;
+ /*
+ * Heuristic: the cutoff is when the sum of absolute differences
+ * of probabilities becomes at least 200/512. As above, the
+ * probability is multiplied by both num_new_observations and
+ * num_observations. Be careful to avoid integer overflow.
+ */
+ cutoff = stats->num_new_observations * 200 / 512 *
+ stats->num_observations;
+ /*
+ * Very short blocks have a lot of overhead for the Huffman
+ * codes, so only use them if it clearly seems worthwhile.
+ * (This is an additional penalty, which adds to the smaller
+ * penalty below which scales more slowly.)
+ */
+ if (block_length < 10000 && num_items < 8192)
+ cutoff += (u64)cutoff * (8192 - num_items) / 8192;
+
+ /* Ready to end the block? */
+ if (total_delta +
+ (block_length / 4096) * stats->num_observations >= cutoff)
+ return true;
+ }
+ merge_new_observations(stats);
+ return false;
+}
+
+static forceinline bool
+ready_to_check_block(const struct block_split_stats *stats,
+ const u8 *in_block_begin, const u8 *in_next,
+ const u8 *in_end)
+{
+ return stats->num_new_observations >= NUM_OBSERVATIONS_PER_BLOCK_CHECK
+ && in_next - in_block_begin >= MIN_BLOCK_LENGTH
+ && in_end - in_next >= MIN_BLOCK_LENGTH;
+}
+
+static forceinline bool
+should_end_block(struct block_split_stats *stats,
+ const u8 *in_block_begin, const u8 *in_next, const u8 *in_end)
+{
+ /* Ready to try to end the block (again)? */
+ if (!ready_to_check_block(stats, in_block_begin, in_next, in_end))
+ return false;
+
+ return do_end_block_check(stats, in_next - in_block_begin);
+}
+
+/******************************************************************************/
+
+static void
+deflate_begin_sequences(struct libdeflate_compressor *c,
+ struct deflate_sequence *first_seq)
+{
+ deflate_reset_symbol_frequencies(c);
+ first_seq->litrunlen_and_length = 0;
+}
+
+static forceinline void
+deflate_choose_literal(struct libdeflate_compressor *c, unsigned literal,
+ bool gather_split_stats, struct deflate_sequence *seq)
+{
+ c->freqs.litlen[literal]++;
+
+ if (gather_split_stats)
+ observe_literal(&c->split_stats, literal);
+
+ STATIC_ASSERT(MAX_BLOCK_LENGTH <= SEQ_LITRUNLEN_MASK);
+ seq->litrunlen_and_length++;
+}
+
+static forceinline void
+deflate_choose_match(struct libdeflate_compressor *c,
+ unsigned length, unsigned offset, bool gather_split_stats,
+ struct deflate_sequence **seq_p)
+{
+ struct deflate_sequence *seq = *seq_p;
+ unsigned length_slot = deflate_length_slot[length];
+ unsigned offset_slot = deflate_get_offset_slot(offset);
+
+ c->freqs.litlen[DEFLATE_FIRST_LEN_SYM + length_slot]++;
+ c->freqs.offset[offset_slot]++;
+ if (gather_split_stats)
+ observe_match(&c->split_stats, length);
+
+ seq->litrunlen_and_length |= (u32)length << SEQ_LENGTH_SHIFT;
+ seq->offset = offset;
+ seq->offset_slot = offset_slot;
+
+ seq++;
+ seq->litrunlen_and_length = 0;
+ *seq_p = seq;
+}
+
+/*
+ * Decrease the maximum and nice match lengths if we're approaching the end of
+ * the input buffer.
+ */
+static forceinline void
+adjust_max_and_nice_len(unsigned *max_len, unsigned *nice_len, size_t remaining)
+{
+ if (unlikely(remaining < DEFLATE_MAX_MATCH_LEN)) {
+ *max_len = remaining;
+ *nice_len = MIN(*nice_len, *max_len);
+ }
+}
+
+/*
+ * Choose the minimum match length for the greedy and lazy parsers.
+ *
+ * By default the minimum match length is 3, which is the smallest length the
+ * DEFLATE format allows. However, with greedy and lazy parsing, some data
+ * (e.g. DNA sequencing data) benefits greatly from a longer minimum length.
+ * Typically, this is because literals are very cheap. In general, the
+ * near-optimal parser handles this case naturally, but the greedy and lazy
+ * parsers need a heuristic to decide when to use short matches.
+ *
+ * The heuristic we use is to make the minimum match length depend on the number
+ * of different literals that exist in the data. If there are many different
+ * literals, then literals will probably be expensive, so short matches will
+ * probably be worthwhile. Conversely, if not many literals are used, then
+ * probably literals will be cheap and short matches won't be worthwhile.
+ */
+static unsigned
+choose_min_match_len(unsigned num_used_literals, unsigned max_search_depth)
+{
+ /* map from num_used_literals to min_len */
+ static const u8 min_lens[] = {
+ 9, 9, 9, 9, 9, 9, 8, 8, 7, 7, 6, 6, 6, 6, 6, 6,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ /* The rest is implicitly 3. */
+ };
+ unsigned min_len;
+
+ STATIC_ASSERT(DEFLATE_MIN_MATCH_LEN <= 3);
+ STATIC_ASSERT(ARRAY_LEN(min_lens) <= DEFLATE_NUM_LITERALS + 1);
+
+ if (num_used_literals >= ARRAY_LEN(min_lens))
+ return 3;
+ min_len = min_lens[num_used_literals];
+ /*
+ * With a low max_search_depth, it may be too hard to find long matches.
+ */
+ if (max_search_depth < 16) {
+ if (max_search_depth < 5)
+ min_len = MIN(min_len, 4);
+ else if (max_search_depth < 10)
+ min_len = MIN(min_len, 5);
+ else
+ min_len = MIN(min_len, 7);
+ }
+ return min_len;
+}
+
+static unsigned
+calculate_min_match_len(const u8 *data, size_t data_len,
+ unsigned max_search_depth)
+{
+ u8 used[256] = { 0 };
+ unsigned num_used_literals = 0;
+ size_t i;
+
+ /*
+ * For an initial approximation, scan the first 4 KiB of data. The
+ * caller may use recalculate_min_match_len() to update min_len later.
+ */
+ data_len = MIN(data_len, 4096);
+ for (i = 0; i < data_len; i++)
+ used[data[i]] = 1;
+ for (i = 0; i < 256; i++)
+ num_used_literals += used[i];
+ return choose_min_match_len(num_used_literals, max_search_depth);
+}
+
+/*
+ * Recalculate the minimum match length for a block, now that we know the
+ * distribution of literals that are actually being used (freqs->litlen).
+ */
+static unsigned
+recalculate_min_match_len(const struct deflate_freqs *freqs,
+ unsigned max_search_depth)
+{
+ u32 literal_freq = 0;
+ u32 cutoff;
+ unsigned num_used_literals = 0;
+ int i;
+
+ for (i = 0; i < DEFLATE_NUM_LITERALS; i++)
+ literal_freq += freqs->litlen[i];
+
+ cutoff = literal_freq >> 10; /* Ignore literals used very rarely. */
+
+ for (i = 0; i < DEFLATE_NUM_LITERALS; i++) {
+ if (freqs->litlen[i] > cutoff)
+ num_used_literals++;
+ }
+ return choose_min_match_len(num_used_literals, max_search_depth);
+}
+
+static forceinline const u8 *
+choose_max_block_end(const u8 *in_block_begin, const u8 *in_end,
+ size_t soft_max_len)
+{
+ if (in_end - in_block_begin < soft_max_len + MIN_BLOCK_LENGTH)
+ return in_end;
+ return in_block_begin + soft_max_len;
+}
+
+/*
+ * This is the level 0 "compressor". It always outputs uncompressed blocks.
+ */
+static size_t
+deflate_compress_none(const u8 *in, size_t in_nbytes,
+ u8 *out, size_t out_nbytes_avail)
+{
+ const u8 *in_next = in;
+ const u8 * const in_end = in + in_nbytes;
+ u8 *out_next = out;
+ u8 * const out_end = out + out_nbytes_avail;
+
+ /*
+ * If the input is zero-length, we still must output a block in order
+ * for the output to be a valid DEFLATE stream. Handle this case
+ * specially to avoid potentially passing NULL to memcpy() below.
+ */
+ if (unlikely(in_nbytes == 0)) {
+ if (out_nbytes_avail < 5)
+ return 0;
+ /* BFINAL and BTYPE */
+ *out_next++ = 1 | (DEFLATE_BLOCKTYPE_UNCOMPRESSED << 1);
+ /* LEN and NLEN */
+ put_unaligned_le32(0xFFFF0000, out_next);
+ return 5;
+ }
+
+ do {
+ u8 bfinal = 0;
+ size_t len = UINT16_MAX;
+
+ if (in_end - in_next <= UINT16_MAX) {
+ bfinal = 1;
+ len = in_end - in_next;
+ }
+ if (out_end - out_next < 5 + len)
+ return 0;
+ /*
+ * Output BFINAL and BTYPE. The stream is already byte-aligned
+ * here, so this step always requires outputting exactly 1 byte.
+ */
+ *out_next++ = bfinal | (DEFLATE_BLOCKTYPE_UNCOMPRESSED << 1);
+
+ /* Output LEN and NLEN, then the data itself. */
+ put_unaligned_le16(len, out_next);
+ out_next += 2;
+ put_unaligned_le16(~len, out_next);
+ out_next += 2;
+ memcpy(out_next, in_next, len);
+ out_next += len;
+ in_next += len;
+ } while (in_next != in_end);
+
+ return out_next - out;
+}
+
+/*
+ * This is a faster variant of deflate_compress_greedy(). It uses the
+ * ht_matchfinder rather than the hc_matchfinder. It also skips the block
+ * splitting algorithm and just uses fixed length blocks. c->max_search_depth
+ * has no effect with this algorithm, as it is hardcoded in ht_matchfinder.h.
+ */
+static void
+deflate_compress_fastest(struct libdeflate_compressor * restrict c,
+ const u8 *in, size_t in_nbytes,
+ struct deflate_output_bitstream *os)
+{
+ const u8 *in_next = in;
+ const u8 *in_end = in_next + in_nbytes;
+ const u8 *in_cur_base = in_next;
+ unsigned max_len = DEFLATE_MAX_MATCH_LEN;
+ unsigned nice_len = MIN(c->nice_match_length, max_len);
+ u32 next_hash = 0;
+
+ ht_matchfinder_init(&c->p.f.ht_mf);
+
+ do {
+ /* Starting a new DEFLATE block */
+
+ const u8 * const in_block_begin = in_next;
+ const u8 * const in_max_block_end = choose_max_block_end(
+ in_next, in_end, FAST_SOFT_MAX_BLOCK_LENGTH);
+ struct deflate_sequence *seq = c->p.f.sequences;
+
+ deflate_begin_sequences(c, seq);
+
+ do {
+ u32 length;
+ u32 offset;
+ size_t remaining = in_end - in_next;
+
+ if (unlikely(remaining < DEFLATE_MAX_MATCH_LEN)) {
+ max_len = remaining;
+ if (max_len < HT_MATCHFINDER_REQUIRED_NBYTES) {
+ do {
+ deflate_choose_literal(c,
+ *in_next++, false, seq);
+ } while (--max_len);
+ break;
+ }
+ nice_len = MIN(nice_len, max_len);
+ }
+ length = ht_matchfinder_longest_match(&c->p.f.ht_mf,
+ &in_cur_base,
+ in_next,
+ max_len,
+ nice_len,
+ &next_hash,
+ &offset);
+ if (length) {
+ /* Match found */
+ deflate_choose_match(c, length, offset, false,
+ &seq);
+ ht_matchfinder_skip_bytes(&c->p.f.ht_mf,
+ &in_cur_base,
+ in_next + 1,
+ in_end,
+ length - 1,
+ &next_hash);
+ in_next += length;
+ } else {
+ /* No match found */
+ deflate_choose_literal(c, *in_next++, false,
+ seq);
+ }
+
+ /* Check if it's time to output another block. */
+ } while (in_next < in_max_block_end &&
+ seq < &c->p.f.sequences[FAST_SEQ_STORE_LENGTH]);
+
+ deflate_flush_block(c, os, in_block_begin,
+ in_next - in_block_begin,
+ c->p.f.sequences, in_next == in_end);
+ } while (in_next != in_end);
+}
+
+/*
+ * This is the "greedy" DEFLATE compressor. It always chooses the longest match.
+ */
+static void
+deflate_compress_greedy(struct libdeflate_compressor * restrict c,
+ const u8 *in, size_t in_nbytes,
+ struct deflate_output_bitstream *os)
+{
+ const u8 *in_next = in;
+ const u8 *in_end = in_next + in_nbytes;
+ const u8 *in_cur_base = in_next;
+ unsigned max_len = DEFLATE_MAX_MATCH_LEN;
+ unsigned nice_len = MIN(c->nice_match_length, max_len);
+ u32 next_hashes[2] = {0, 0};
+
+ hc_matchfinder_init(&c->p.g.hc_mf);
+
+ do {
+ /* Starting a new DEFLATE block */
+
+ const u8 * const in_block_begin = in_next;
+ const u8 * const in_max_block_end = choose_max_block_end(
+ in_next, in_end, SOFT_MAX_BLOCK_LENGTH);
+ struct deflate_sequence *seq = c->p.g.sequences;
+ unsigned min_len;
+
+ init_block_split_stats(&c->split_stats);
+ deflate_begin_sequences(c, seq);
+ min_len = calculate_min_match_len(in_next,
+ in_max_block_end - in_next,
+ c->max_search_depth);
+ do {
+ u32 length;
+ u32 offset;
+
+ adjust_max_and_nice_len(&max_len, &nice_len,
+ in_end - in_next);
+ length = hc_matchfinder_longest_match(
+ &c->p.g.hc_mf,
+ &in_cur_base,
+ in_next,
+ min_len - 1,
+ max_len,
+ nice_len,
+ c->max_search_depth,
+ next_hashes,
+ &offset);
+
+ if (length >= min_len &&
+ (length > DEFLATE_MIN_MATCH_LEN ||
+ offset <= 4096)) {
+ /* Match found */
+ deflate_choose_match(c, length, offset, true,
+ &seq);
+ hc_matchfinder_skip_bytes(&c->p.g.hc_mf,
+ &in_cur_base,
+ in_next + 1,
+ in_end,
+ length - 1,
+ next_hashes);
+ in_next += length;
+ } else {
+ /* No match found */
+ deflate_choose_literal(c, *in_next++, true,
+ seq);
+ }
+
+ /* Check if it's time to output another block. */
+ } while (in_next < in_max_block_end &&
+ seq < &c->p.g.sequences[SEQ_STORE_LENGTH] &&
+ !should_end_block(&c->split_stats,
+ in_block_begin, in_next, in_end));
+
+ deflate_flush_block(c, os, in_block_begin,
+ in_next - in_block_begin,
+ c->p.g.sequences, in_next == in_end);
+ } while (in_next != in_end);
+}
+
+static forceinline void
+deflate_compress_lazy_generic(struct libdeflate_compressor * restrict c,
+ const u8 *in, size_t in_nbytes,
+ struct deflate_output_bitstream *os, bool lazy2)
+{
+ const u8 *in_next = in;
+ const u8 *in_end = in_next + in_nbytes;
+ const u8 *in_cur_base = in_next;
+ unsigned max_len = DEFLATE_MAX_MATCH_LEN;
+ unsigned nice_len = MIN(c->nice_match_length, max_len);
+ u32 next_hashes[2] = {0, 0};
+
+ hc_matchfinder_init(&c->p.g.hc_mf);
+
+ do {
+ /* Starting a new DEFLATE block */
+
+ const u8 * const in_block_begin = in_next;
+ const u8 * const in_max_block_end = choose_max_block_end(
+ in_next, in_end, SOFT_MAX_BLOCK_LENGTH);
+ const u8 *next_recalc_min_len =
+ in_next + MIN(in_end - in_next, 10000);
+ struct deflate_sequence *seq = c->p.g.sequences;
+ unsigned min_len;
+
+ init_block_split_stats(&c->split_stats);
+ deflate_begin_sequences(c, seq);
+ min_len = calculate_min_match_len(in_next,
+ in_max_block_end - in_next,
+ c->max_search_depth);
+ do {
+ unsigned cur_len;
+ unsigned cur_offset;
+ unsigned next_len;
+ unsigned next_offset;
+
+ /*
+ * Recalculate the minimum match length if it hasn't
+ * been done recently.
+ */
+ if (in_next >= next_recalc_min_len) {
+ min_len = recalculate_min_match_len(
+ &c->freqs,
+ c->max_search_depth);
+ next_recalc_min_len +=
+ MIN(in_end - next_recalc_min_len,
+ in_next - in_block_begin);
+ }
+
+ /* Find the longest match at the current position. */
+ adjust_max_and_nice_len(&max_len, &nice_len,
+ in_end - in_next);
+ cur_len = hc_matchfinder_longest_match(
+ &c->p.g.hc_mf,
+ &in_cur_base,
+ in_next,
+ min_len - 1,
+ max_len,
+ nice_len,
+ c->max_search_depth,
+ next_hashes,
+ &cur_offset);
+ if (cur_len < min_len ||
+ (cur_len == DEFLATE_MIN_MATCH_LEN &&
+ cur_offset > 8192)) {
+ /* No match found. Choose a literal. */
+ deflate_choose_literal(c, *in_next++, true,
+ seq);
+ continue;
+ }
+ in_next++;
+
+have_cur_match:
+ /*
+ * We have a match at the current position.
+ * If it's very long, choose it immediately.
+ */
+ if (cur_len >= nice_len) {
+ deflate_choose_match(c, cur_len, cur_offset,
+ true, &seq);
+ hc_matchfinder_skip_bytes(&c->p.g.hc_mf,
+ &in_cur_base,
+ in_next,
+ in_end,
+ cur_len - 1,
+ next_hashes);
+ in_next += cur_len - 1;
+ continue;
+ }
+
+ /*
+ * Try to find a better match at the next position.
+ *
+ * Note: since we already have a match at the *current*
+ * position, we use only half the 'max_search_depth'
+ * when checking the *next* position. This is a useful
+ * trade-off because it's more worthwhile to use a
+ * greater search depth on the initial match.
+ *
+ * Note: it's possible to structure the code such that
+ * there's only one call to longest_match(), which
+ * handles both the "find the initial match" and "try to
+ * find a better match" cases. However, it is faster to
+ * have two call sites, with longest_match() inlined at
+ * each.
+ */
+ adjust_max_and_nice_len(&max_len, &nice_len,
+ in_end - in_next);
+ next_len = hc_matchfinder_longest_match(
+ &c->p.g.hc_mf,
+ &in_cur_base,
+ in_next++,
+ cur_len - 1,
+ max_len,
+ nice_len,
+ c->max_search_depth >> 1,
+ next_hashes,
+ &next_offset);
+ if (next_len >= cur_len &&
+ 4 * (int)(next_len - cur_len) +
+ ((int)bsr32(cur_offset) -
+ (int)bsr32(next_offset)) > 2) {
+ /*
+ * Found a better match at the next position.
+ * Output a literal. Then the next match
+ * becomes the current match.
+ */
+ deflate_choose_literal(c, *(in_next - 2), true,
+ seq);
+ cur_len = next_len;
+ cur_offset = next_offset;
+ goto have_cur_match;
+ }
+
+ if (lazy2) {
+ /* In lazy2 mode, look ahead another position */
+ adjust_max_and_nice_len(&max_len, &nice_len,
+ in_end - in_next);
+ next_len = hc_matchfinder_longest_match(
+ &c->p.g.hc_mf,
+ &in_cur_base,
+ in_next++,
+ cur_len - 1,
+ max_len,
+ nice_len,
+ c->max_search_depth >> 2,
+ next_hashes,
+ &next_offset);
+ if (next_len >= cur_len &&
+ 4 * (int)(next_len - cur_len) +
+ ((int)bsr32(cur_offset) -
+ (int)bsr32(next_offset)) > 6) {
+ /*
+ * There's a much better match two
+ * positions ahead, so use two literals.
+ */
+ deflate_choose_literal(
+ c, *(in_next - 3), true, seq);
+ deflate_choose_literal(
+ c, *(in_next - 2), true, seq);
+ cur_len = next_len;
+ cur_offset = next_offset;
+ goto have_cur_match;
+ }
+ /*
+ * No better match at either of the next 2
+ * positions. Output the current match.
+ */
+ deflate_choose_match(c, cur_len, cur_offset,
+ true, &seq);
+ if (cur_len > 3) {
+ hc_matchfinder_skip_bytes(&c->p.g.hc_mf,
+ &in_cur_base,
+ in_next,
+ in_end,
+ cur_len - 3,
+ next_hashes);
+ in_next += cur_len - 3;
+ }
+ } else { /* !lazy2 */
+ /*
+ * No better match at the next position. Output
+ * the current match.
+ */
+ deflate_choose_match(c, cur_len, cur_offset,
+ true, &seq);
+ hc_matchfinder_skip_bytes(&c->p.g.hc_mf,
+ &in_cur_base,
+ in_next,
+ in_end,
+ cur_len - 2,
+ next_hashes);
+ in_next += cur_len - 2;
+ }
+ /* Check if it's time to output another block. */
+ } while (in_next < in_max_block_end &&
+ seq < &c->p.g.sequences[SEQ_STORE_LENGTH] &&
+ !should_end_block(&c->split_stats,
+ in_block_begin, in_next, in_end));
+
+ deflate_flush_block(c, os, in_block_begin,
+ in_next - in_block_begin,
+ c->p.g.sequences, in_next == in_end);
+ } while (in_next != in_end);
+}
+
+/*
+ * This is the "lazy" DEFLATE compressor. Before choosing a match, it checks to
+ * see if there's a better match at the next position. If yes, it outputs a
+ * literal and continues to the next position. If no, it outputs the match.
+ */
+static void
+deflate_compress_lazy(struct libdeflate_compressor * restrict c,
+ const u8 *in, size_t in_nbytes,
+ struct deflate_output_bitstream *os)
+{
+ deflate_compress_lazy_generic(c, in, in_nbytes, os, false);
+}
+
+/*
+ * The lazy2 compressor. This is similar to the regular lazy one, but it looks
+ * for a better match at the next 2 positions rather than the next 1. This
+ * makes it take slightly more time, but compress some inputs slightly more.
+ */
+static void
+deflate_compress_lazy2(struct libdeflate_compressor * restrict c,
+ const u8 *in, size_t in_nbytes,
+ struct deflate_output_bitstream *os)
+{
+ deflate_compress_lazy_generic(c, in, in_nbytes, os, true);
+}
+
+#if SUPPORT_NEAR_OPTIMAL_PARSING
+
+/*
+ * Follow the minimum-cost path in the graph of possible match/literal choices
+ * for the current block and compute the frequencies of the Huffman symbols that
+ * would be needed to output those matches and literals.
+ */
+static void
+deflate_tally_item_list(struct libdeflate_compressor *c, u32 block_length)
+{
+ struct deflate_optimum_node *cur_node = &c->p.n.optimum_nodes[0];
+ struct deflate_optimum_node *end_node =
+ &c->p.n.optimum_nodes[block_length];
+
+ do {
+ unsigned length = cur_node->item & OPTIMUM_LEN_MASK;
+ unsigned offset = cur_node->item >> OPTIMUM_OFFSET_SHIFT;
+
+ if (length == 1) {
+ /* Literal */
+ c->freqs.litlen[offset]++;
+ } else {
+ /* Match */
+ c->freqs.litlen[DEFLATE_FIRST_LEN_SYM +
+ deflate_length_slot[length]]++;
+ c->freqs.offset[c->p.n.offset_slot_full[offset]]++;
+ }
+ cur_node += length;
+ } while (cur_node != end_node);
+
+ /* Tally the end-of-block symbol. */
+ c->freqs.litlen[DEFLATE_END_OF_BLOCK]++;
+}
+
+/* Set the current cost model from the codeword lengths specified in @lens. */
+static void
+deflate_set_costs_from_codes(struct libdeflate_compressor *c,
+ const struct deflate_lens *lens)
+{
+ unsigned i;
+
+ /* Literals */
+ for (i = 0; i < DEFLATE_NUM_LITERALS; i++) {
+ u32 bits = (lens->litlen[i] ?
+ lens->litlen[i] : LITERAL_NOSTAT_BITS);
+
+ c->p.n.costs.literal[i] = bits * BIT_COST;
+ }
+
+ /* Lengths */
+ for (i = DEFLATE_MIN_MATCH_LEN; i <= DEFLATE_MAX_MATCH_LEN; i++) {
+ unsigned length_slot = deflate_length_slot[i];
+ unsigned litlen_sym = DEFLATE_FIRST_LEN_SYM + length_slot;
+ u32 bits = (lens->litlen[litlen_sym] ?
+ lens->litlen[litlen_sym] : LENGTH_NOSTAT_BITS);
+
+ bits += deflate_extra_length_bits[length_slot];
+ c->p.n.costs.length[i] = bits * BIT_COST;
+ }
+
+ /* Offset slots */
+ for (i = 0; i < ARRAY_LEN(deflate_offset_slot_base); i++) {
+ u32 bits = (lens->offset[i] ?
+ lens->offset[i] : OFFSET_NOSTAT_BITS);
+
+ bits += deflate_extra_offset_bits[i];
+ c->p.n.costs.offset_slot[i] = bits * BIT_COST;
+ }
+}
+
+/*
+ * This lookup table gives the default cost of a literal symbol and of a length
+ * symbol, depending on the characteristics of the input data. It was generated
+ * by scripts/gen_default_litlen_costs.py.
+ *
+ * This table is indexed first by the estimated match probability:
+ *
+ * i=0: data doesn't contain many matches [match_prob=0.25]
+ * i=1: neutral [match_prob=0.50]
+ * i=2: data contains lots of matches [match_prob=0.75]
+ *
+ * This lookup produces a subtable which maps the number of distinct used
+ * literals to the default cost of a literal symbol, i.e.:
+ *
+ * int(-log2((1 - match_prob) / num_used_literals) * BIT_COST)
+ *
+ * ... for num_used_literals in [1, 256] (and 0, which is copied from 1). This
+ * accounts for literals usually getting cheaper as the number of distinct
+ * literals decreases, and as the proportion of literals to matches increases.
+ *
+ * The lookup also produces the cost of a length symbol, which is:
+ *
+ * int(-log2(match_prob/NUM_LEN_SLOTS) * BIT_COST)
+ *
+ * Note: we don't currently assign different costs to different literal symbols,
+ * or to different length symbols, as this is hard to do in a useful way.
+ */
+static const struct {
+ u8 used_lits_to_lit_cost[257];
+ u8 len_sym_cost;
+} default_litlen_costs[] = {
+ { /* match_prob = 0.25 */
+ .used_lits_to_lit_cost = {
+ 6, 6, 22, 32, 38, 43, 48, 51,
+ 54, 57, 59, 61, 64, 65, 67, 69,
+ 70, 72, 73, 74, 75, 76, 77, 79,
+ 80, 80, 81, 82, 83, 84, 85, 85,
+ 86, 87, 88, 88, 89, 89, 90, 91,
+ 91, 92, 92, 93, 93, 94, 95, 95,
+ 96, 96, 96, 97, 97, 98, 98, 99,
+ 99, 99, 100, 100, 101, 101, 101, 102,
+ 102, 102, 103, 103, 104, 104, 104, 105,
+ 105, 105, 105, 106, 106, 106, 107, 107,
+ 107, 108, 108, 108, 108, 109, 109, 109,
+ 109, 110, 110, 110, 111, 111, 111, 111,
+ 112, 112, 112, 112, 112, 113, 113, 113,
+ 113, 114, 114, 114, 114, 114, 115, 115,
+ 115, 115, 115, 116, 116, 116, 116, 116,
+ 117, 117, 117, 117, 117, 118, 118, 118,
+ 118, 118, 118, 119, 119, 119, 119, 119,
+ 120, 120, 120, 120, 120, 120, 121, 121,
+ 121, 121, 121, 121, 121, 122, 122, 122,
+ 122, 122, 122, 123, 123, 123, 123, 123,
+ 123, 123, 124, 124, 124, 124, 124, 124,
+ 124, 125, 125, 125, 125, 125, 125, 125,
+ 125, 126, 126, 126, 126, 126, 126, 126,
+ 127, 127, 127, 127, 127, 127, 127, 127,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 129, 129, 129, 129, 129, 129, 129,
+ 129, 129, 130, 130, 130, 130, 130, 130,
+ 130, 130, 130, 131, 131, 131, 131, 131,
+ 131, 131, 131, 131, 131, 132, 132, 132,
+ 132, 132, 132, 132, 132, 132, 132, 133,
+ 133, 133, 133, 133, 133, 133, 133, 133,
+ 133, 134, 134, 134, 134, 134, 134, 134,
+ 134,
+ },
+ .len_sym_cost = 109,
+ }, { /* match_prob = 0.5 */
+ .used_lits_to_lit_cost = {
+ 16, 16, 32, 41, 48, 53, 57, 60,
+ 64, 66, 69, 71, 73, 75, 76, 78,
+ 80, 81, 82, 83, 85, 86, 87, 88,
+ 89, 90, 91, 92, 92, 93, 94, 95,
+ 96, 96, 97, 98, 98, 99, 99, 100,
+ 101, 101, 102, 102, 103, 103, 104, 104,
+ 105, 105, 106, 106, 107, 107, 108, 108,
+ 108, 109, 109, 110, 110, 110, 111, 111,
+ 112, 112, 112, 113, 113, 113, 114, 114,
+ 114, 115, 115, 115, 115, 116, 116, 116,
+ 117, 117, 117, 118, 118, 118, 118, 119,
+ 119, 119, 119, 120, 120, 120, 120, 121,
+ 121, 121, 121, 122, 122, 122, 122, 122,
+ 123, 123, 123, 123, 124, 124, 124, 124,
+ 124, 125, 125, 125, 125, 125, 126, 126,
+ 126, 126, 126, 127, 127, 127, 127, 127,
+ 128, 128, 128, 128, 128, 128, 129, 129,
+ 129, 129, 129, 129, 130, 130, 130, 130,
+ 130, 130, 131, 131, 131, 131, 131, 131,
+ 131, 132, 132, 132, 132, 132, 132, 133,
+ 133, 133, 133, 133, 133, 133, 134, 134,
+ 134, 134, 134, 134, 134, 134, 135, 135,
+ 135, 135, 135, 135, 135, 135, 136, 136,
+ 136, 136, 136, 136, 136, 136, 137, 137,
+ 137, 137, 137, 137, 137, 137, 138, 138,
+ 138, 138, 138, 138, 138, 138, 138, 139,
+ 139, 139, 139, 139, 139, 139, 139, 139,
+ 140, 140, 140, 140, 140, 140, 140, 140,
+ 140, 141, 141, 141, 141, 141, 141, 141,
+ 141, 141, 141, 142, 142, 142, 142, 142,
+ 142, 142, 142, 142, 142, 142, 143, 143,
+ 143, 143, 143, 143, 143, 143, 143, 143,
+ 144,
+ },
+ .len_sym_cost = 93,
+ }, { /* match_prob = 0.75 */
+ .used_lits_to_lit_cost = {
+ 32, 32, 48, 57, 64, 69, 73, 76,
+ 80, 82, 85, 87, 89, 91, 92, 94,
+ 96, 97, 98, 99, 101, 102, 103, 104,
+ 105, 106, 107, 108, 108, 109, 110, 111,
+ 112, 112, 113, 114, 114, 115, 115, 116,
+ 117, 117, 118, 118, 119, 119, 120, 120,
+ 121, 121, 122, 122, 123, 123, 124, 124,
+ 124, 125, 125, 126, 126, 126, 127, 127,
+ 128, 128, 128, 129, 129, 129, 130, 130,
+ 130, 131, 131, 131, 131, 132, 132, 132,
+ 133, 133, 133, 134, 134, 134, 134, 135,
+ 135, 135, 135, 136, 136, 136, 136, 137,
+ 137, 137, 137, 138, 138, 138, 138, 138,
+ 139, 139, 139, 139, 140, 140, 140, 140,
+ 140, 141, 141, 141, 141, 141, 142, 142,
+ 142, 142, 142, 143, 143, 143, 143, 143,
+ 144, 144, 144, 144, 144, 144, 145, 145,
+ 145, 145, 145, 145, 146, 146, 146, 146,
+ 146, 146, 147, 147, 147, 147, 147, 147,
+ 147, 148, 148, 148, 148, 148, 148, 149,
+ 149, 149, 149, 149, 149, 149, 150, 150,
+ 150, 150, 150, 150, 150, 150, 151, 151,
+ 151, 151, 151, 151, 151, 151, 152, 152,
+ 152, 152, 152, 152, 152, 152, 153, 153,
+ 153, 153, 153, 153, 153, 153, 154, 154,
+ 154, 154, 154, 154, 154, 154, 154, 155,
+ 155, 155, 155, 155, 155, 155, 155, 155,
+ 156, 156, 156, 156, 156, 156, 156, 156,
+ 156, 157, 157, 157, 157, 157, 157, 157,
+ 157, 157, 157, 158, 158, 158, 158, 158,
+ 158, 158, 158, 158, 158, 158, 159, 159,
+ 159, 159, 159, 159, 159, 159, 159, 159,
+ 160,
+ },
+ .len_sym_cost = 84,
+ },
+};
+
+/*
+ * Choose the default costs for literal and length symbols. These symbols are
+ * both part of the litlen alphabet.
+ */
+static void
+deflate_choose_default_litlen_costs(struct libdeflate_compressor *c,
+ const u8 *block_begin, u32 block_length,
+ u32 *lit_cost, u32 *len_sym_cost)
+{
+ unsigned num_used_literals = 0;
+ u32 literal_freq = block_length;
+ u32 match_freq = 0;
+ u32 cutoff;
+ u32 i;
+
+ /* Calculate the number of distinct literals that exist in the data. */
+ memset(c->freqs.litlen, 0,
+ DEFLATE_NUM_LITERALS * sizeof(c->freqs.litlen[0]));
+ cutoff = literal_freq >> 11; /* Ignore literals used very rarely. */
+ for (i = 0; i < block_length; i++)
+ c->freqs.litlen[block_begin[i]]++;
+ for (i = 0; i < DEFLATE_NUM_LITERALS; i++) {
+ if (c->freqs.litlen[i] > cutoff)
+ num_used_literals++;
+ }
+ if (num_used_literals == 0)
+ num_used_literals = 1;
+
+ /*
+ * Estimate the relative frequency of literals and matches in the
+ * optimal parsing solution. We don't know the optimal solution, so
+ * this can only be a very rough estimate. Therefore, we basically use
+ * the match frequency from a greedy parse. We also apply the min_len
+ * heuristic used by the greedy and lazy parsers, to avoid counting too
+ * many matches when literals are cheaper than short matches.
+ */
+ match_freq = 0;
+ i = choose_min_match_len(num_used_literals, c->max_search_depth);
+ for (; i < ARRAY_LEN(c->p.n.match_len_freqs); i++) {
+ match_freq += c->p.n.match_len_freqs[i];
+ literal_freq -= i * c->p.n.match_len_freqs[i];
+ }
+ if ((s32)literal_freq < 0) /* shouldn't happen */
+ literal_freq = 0;
+
+ if (match_freq > literal_freq)
+ i = 2; /* many matches */
+ else if (match_freq * 4 > literal_freq)
+ i = 1; /* neutral */
+ else
+ i = 0; /* few matches */
+
+ STATIC_ASSERT(BIT_COST == 16);
+ *lit_cost = default_litlen_costs[i].used_lits_to_lit_cost[
+ num_used_literals];
+ *len_sym_cost = default_litlen_costs[i].len_sym_cost;
+}
+
+static forceinline u32
+deflate_default_length_cost(unsigned len, u32 len_sym_cost)
+{
+ unsigned slot = deflate_length_slot[len];
+ u32 num_extra_bits = deflate_extra_length_bits[slot];
+
+ return len_sym_cost + (num_extra_bits * BIT_COST);
+}
+
+static forceinline u32
+deflate_default_offset_slot_cost(unsigned slot)
+{
+ u32 num_extra_bits = deflate_extra_offset_bits[slot];
+ /*
+ * Assume that all offset symbols are equally probable.
+ * The resulting cost is 'int(-log2(1/30) * BIT_COST)',
+ * where 30 is the number of potentially-used offset symbols.
+ */
+ u32 offset_sym_cost = 4*BIT_COST + (907*BIT_COST)/1000;
+
+ return offset_sym_cost + (num_extra_bits * BIT_COST);
+}
+
+/* Set default symbol costs for the first block's first optimization pass. */
+static void
+deflate_set_default_costs(struct libdeflate_compressor *c,
+ u32 lit_cost, u32 len_sym_cost)
+{
+ unsigned i;
+
+ /* Literals */
+ for (i = 0; i < DEFLATE_NUM_LITERALS; i++)
+ c->p.n.costs.literal[i] = lit_cost;
+
+ /* Lengths */
+ for (i = DEFLATE_MIN_MATCH_LEN; i <= DEFLATE_MAX_MATCH_LEN; i++)
+ c->p.n.costs.length[i] =
+ deflate_default_length_cost(i, len_sym_cost);
+
+ /* Offset slots */
+ for (i = 0; i < ARRAY_LEN(deflate_offset_slot_base); i++)
+ c->p.n.costs.offset_slot[i] =
+ deflate_default_offset_slot_cost(i);
+}
+
+static forceinline void
+deflate_adjust_cost(u32 *cost_p, u32 default_cost, int change_amount)
+{
+ if (change_amount == 0)
+ /* Block is very similar to previous; prefer previous costs. */
+ *cost_p = (default_cost + 3 * *cost_p) / 4;
+ else if (change_amount == 1)
+ *cost_p = (default_cost + *cost_p) / 2;
+ else if (change_amount == 2)
+ *cost_p = (5 * default_cost + 3 * *cost_p) / 8;
+ else
+ /* Block differs greatly from previous; prefer default costs. */
+ *cost_p = (3 * default_cost + *cost_p) / 4;
+}
+
+static forceinline void
+deflate_adjust_costs_impl(struct libdeflate_compressor *c,
+ u32 lit_cost, u32 len_sym_cost, int change_amount)
+{
+ unsigned i;
+
+ /* Literals */
+ for (i = 0; i < DEFLATE_NUM_LITERALS; i++)
+ deflate_adjust_cost(&c->p.n.costs.literal[i], lit_cost,
+ change_amount);
+
+ /* Lengths */
+ for (i = DEFLATE_MIN_MATCH_LEN; i <= DEFLATE_MAX_MATCH_LEN; i++)
+ deflate_adjust_cost(&c->p.n.costs.length[i],
+ deflate_default_length_cost(i,
+ len_sym_cost),
+ change_amount);
+
+ /* Offset slots */
+ for (i = 0; i < ARRAY_LEN(deflate_offset_slot_base); i++)
+ deflate_adjust_cost(&c->p.n.costs.offset_slot[i],
+ deflate_default_offset_slot_cost(i),
+ change_amount);
+}
+
+/*
+ * Adjust the costs when beginning a new block.
+ *
+ * Since the current costs have been optimized for the data, it's undesirable to
+ * throw them away and start over with the default costs. At the same time, we
+ * don't want to bias the parse by assuming that the next block will be similar
+ * to the current block. As a compromise, make the costs closer to the
+ * defaults, but don't simply set them to the defaults.
+ */
+static void
+deflate_adjust_costs(struct libdeflate_compressor *c,
+ u32 lit_cost, u32 len_sym_cost)
+{
+ u64 total_delta = 0;
+ u64 cutoff;
+ int i;
+
+ /*
+ * Decide how different the current block is from the previous block,
+ * using the block splitting statistics from the current and previous
+ * blocks. The more different the current block is, the more we prefer
+ * the default costs rather than the previous block's costs.
+ *
+ * The algorithm here is similar to the end-of-block check one, but here
+ * we compare two entire blocks rather than a partial block with a small
+ * extra part, and therefore we need 64-bit numbers in some places.
+ */
+ for (i = 0; i < NUM_OBSERVATION_TYPES; i++) {
+ u64 prev = (u64)c->p.n.prev_observations[i] *
+ c->split_stats.num_observations;
+ u64 cur = (u64)c->split_stats.observations[i] *
+ c->p.n.prev_num_observations;
+
+ total_delta += prev > cur ? prev - cur : cur - prev;
+ }
+ cutoff = ((u64)c->p.n.prev_num_observations *
+ c->split_stats.num_observations * 200) / 512;
+
+ if (4 * total_delta > 9 * cutoff)
+ deflate_adjust_costs_impl(c, lit_cost, len_sym_cost, 3);
+ else if (2 * total_delta > 3 * cutoff)
+ deflate_adjust_costs_impl(c, lit_cost, len_sym_cost, 2);
+ else if (2 * total_delta > cutoff)
+ deflate_adjust_costs_impl(c, lit_cost, len_sym_cost, 1);
+ else
+ deflate_adjust_costs_impl(c, lit_cost, len_sym_cost, 0);
+}
+
+/*
+ * Find the minimum-cost path through the graph of possible match/literal
+ * choices for this block.
+ *
+ * We find the minimum cost path from 'c->p.n.optimum_nodes[0]', which
+ * represents the node at the beginning of the block, to
+ * 'c->p.n.optimum_nodes[block_length]', which represents the node at the end of
+ * the block. Edge costs are evaluated using the cost model 'c->p.n.costs'.
+ *
+ * The algorithm works backwards, starting at the end node and proceeding
+ * backwards one node at a time. At each node, the minimum cost to reach the
+ * end node is computed and the match/literal choice that begins that path is
+ * saved.
+ */
+static void
+deflate_find_min_cost_path(struct libdeflate_compressor *c,
+ const u32 block_length,
+ const struct lz_match *cache_ptr)
+{
+ struct deflate_optimum_node *end_node =
+ &c->p.n.optimum_nodes[block_length];
+ struct deflate_optimum_node *cur_node = end_node;
+
+ cur_node->cost_to_end = 0;
+ do {
+ unsigned num_matches;
+ unsigned literal;
+ u32 best_cost_to_end;
+
+ cur_node--;
+ cache_ptr--;
+
+ num_matches = cache_ptr->length;
+ literal = cache_ptr->offset;
+
+ /* It's always possible to choose a literal. */
+ best_cost_to_end = c->p.n.costs.literal[literal] +
+ (cur_node + 1)->cost_to_end;
+ cur_node->item = ((u32)literal << OPTIMUM_OFFSET_SHIFT) | 1;
+
+ /* Also consider matches if there are any. */
+ if (num_matches) {
+ const struct lz_match *match;
+ unsigned len;
+ unsigned offset;
+ unsigned offset_slot;
+ u32 offset_cost;
+ u32 cost_to_end;
+
+ /*
+ * Consider each length from the minimum
+ * (DEFLATE_MIN_MATCH_LEN) to the length of the longest
+ * match found at this position. For each length, we
+ * consider only the smallest offset for which that
+ * length is available. Although this is not guaranteed
+ * to be optimal due to the possibility of a larger
+ * offset costing less than a smaller offset to code,
+ * this is a very useful heuristic.
+ */
+ match = cache_ptr - num_matches;
+ len = DEFLATE_MIN_MATCH_LEN;
+ do {
+ offset = match->offset;
+ offset_slot = c->p.n.offset_slot_full[offset];
+ offset_cost =
+ c->p.n.costs.offset_slot[offset_slot];
+ do {
+ cost_to_end = offset_cost +
+ c->p.n.costs.length[len] +
+ (cur_node + len)->cost_to_end;
+ if (cost_to_end < best_cost_to_end) {
+ best_cost_to_end = cost_to_end;
+ cur_node->item = len |
+ ((u32)offset <<
+ OPTIMUM_OFFSET_SHIFT);
+ }
+ } while (++len <= match->length);
+ } while (++match != cache_ptr);
+ cache_ptr -= num_matches;
+ }
+ cur_node->cost_to_end = best_cost_to_end;
+ } while (cur_node != &c->p.n.optimum_nodes[0]);
+}
+
+/*
+ * Choose the literal/match sequence to use for the current block. The basic
+ * algorithm finds a minimum-cost path through the block's graph of
+ * literal/match choices, given a cost model. However, the cost of each symbol
+ * is unknown until the Huffman codes have been built, but at the same time the
+ * Huffman codes depend on the frequencies of chosen symbols. Consequently,
+ * multiple passes must be used to try to approximate an optimal solution. The
+ * first pass uses default costs, mixed with the costs from the previous block
+ * if any. Later passes use the Huffman codeword lengths from the previous pass
+ * as the costs.
+ */
+static void
+deflate_optimize_block(struct libdeflate_compressor *c,
+ const u8 *block_begin, u32 block_length,
+ const struct lz_match *cache_ptr, bool is_first_block,
+ bool is_final_block)
+{
+ unsigned num_passes_remaining = c->p.n.num_optim_passes;
+ u32 lit_cost, len_sym_cost;
+ u32 i;
+
+ /*
+ * Force the block to really end at the desired length, even if some
+ * matches extend beyond it.
+ */
+ for (i = block_length;
+ i <= MIN(block_length - 1 + DEFLATE_MAX_MATCH_LEN,
+ ARRAY_LEN(c->p.n.optimum_nodes) - 1); i++)
+ c->p.n.optimum_nodes[i].cost_to_end = 0x80000000;
+
+ /* Set the initial costs. */
+ deflate_choose_default_litlen_costs(c, block_begin, block_length,
+ &lit_cost, &len_sym_cost);
+ if (is_first_block)
+ deflate_set_default_costs(c, lit_cost, len_sym_cost);
+ else
+ deflate_adjust_costs(c, lit_cost, len_sym_cost);
+
+ do {
+ /* Find the minimum cost path for this pass. */
+ deflate_find_min_cost_path(c, block_length, cache_ptr);
+
+ /* Compute frequencies of the chosen symbols. */
+ deflate_reset_symbol_frequencies(c);
+ deflate_tally_item_list(c, block_length);
+
+ /* Make the Huffman codes. */
+ deflate_make_huffman_codes(&c->freqs, &c->codes);
+
+ /*
+ * Update the costs. After the last optimization pass, the
+ * final costs won't be needed for this block, but they will be
+ * used in determining the initial costs for the next block.
+ */
+ if (--num_passes_remaining || !is_final_block)
+ deflate_set_costs_from_codes(c, &c->codes.lens);
+ } while (num_passes_remaining);
+}
+
+static void
+deflate_near_optimal_init_stats(struct libdeflate_compressor *c)
+{
+ init_block_split_stats(&c->split_stats);
+ memset(c->p.n.new_match_len_freqs, 0,
+ sizeof(c->p.n.new_match_len_freqs));
+ memset(c->p.n.match_len_freqs, 0, sizeof(c->p.n.match_len_freqs));
+}
+
+static void
+deflate_near_optimal_merge_stats(struct libdeflate_compressor *c)
+{
+ unsigned i;
+
+ merge_new_observations(&c->split_stats);
+ for (i = 0; i < ARRAY_LEN(c->p.n.match_len_freqs); i++) {
+ c->p.n.match_len_freqs[i] += c->p.n.new_match_len_freqs[i];
+ c->p.n.new_match_len_freqs[i] = 0;
+ }
+}
+
+/*
+ * Save some literal/match statistics from the previous block so that
+ * deflate_adjust_costs() will be able to decide how much the current block
+ * differs from the previous one.
+ */
+static void
+deflate_near_optimal_save_stats(struct libdeflate_compressor *c)
+{
+ int i;
+
+ for (i = 0; i < NUM_OBSERVATION_TYPES; i++)
+ c->p.n.prev_observations[i] = c->split_stats.observations[i];
+ c->p.n.prev_num_observations = c->split_stats.num_observations;
+}
+
+static void
+deflate_near_optimal_clear_old_stats(struct libdeflate_compressor *c)
+{
+ int i;
+
+ for (i = 0; i < NUM_OBSERVATION_TYPES; i++)
+ c->split_stats.observations[i] = 0;
+ c->split_stats.num_observations = 0;
+ memset(c->p.n.match_len_freqs, 0, sizeof(c->p.n.match_len_freqs));
+}
+
+/*
+ * This is the "near-optimal" DEFLATE compressor. It computes the optimal
+ * representation of each DEFLATE block using a minimum-cost path search over
+ * the graph of possible match/literal choices for that block, assuming a
+ * certain cost for each Huffman symbol.
+ *
+ * For several reasons, the end result is not guaranteed to be optimal:
+ *
+ * - Nonoptimal choice of blocks
+ * - Heuristic limitations on which matches are actually considered
+ * - Symbol costs are unknown until the symbols have already been chosen
+ * (so iterative optimization must be used)
+ */
+static void
+deflate_compress_near_optimal(struct libdeflate_compressor * restrict c,
+ const u8 *in, size_t in_nbytes,
+ struct deflate_output_bitstream *os)
+{
+ const u8 *in_next = in;
+ const u8 *in_block_begin = in_next;
+ const u8 *in_end = in_next + in_nbytes;
+ const u8 *in_cur_base = in_next;
+ const u8 *in_next_slide =
+ in_next + MIN(in_end - in_next, MATCHFINDER_WINDOW_SIZE);
+ unsigned max_len = DEFLATE_MAX_MATCH_LEN;
+ unsigned nice_len = MIN(c->nice_match_length, max_len);
+ struct lz_match *cache_ptr = c->p.n.match_cache;
+ u32 next_hashes[2] = {0, 0};
+
+ bt_matchfinder_init(&c->p.n.bt_mf);
+ deflate_near_optimal_init_stats(c);
+
+ do {
+ /* Starting a new DEFLATE block */
+ const u8 * const in_max_block_end = choose_max_block_end(
+ in_block_begin, in_end, SOFT_MAX_BLOCK_LENGTH);
+ const u8 *prev_end_block_check = NULL;
+ bool change_detected = false;
+ const u8 *next_observation = in_next;
+ unsigned min_len;
+
+ /*
+ * Use the minimum match length heuristic to improve the
+ * literal/match statistics gathered during matchfinding.
+ * However, the actual near-optimal parse won't respect min_len,
+ * as it can accurately assess the costs of different matches.
+ */
+ min_len = calculate_min_match_len(
+ in_block_begin,
+ in_max_block_end - in_block_begin,
+ c->max_search_depth);
+
+ /*
+ * Find matches until we decide to end the block. We end the
+ * block if any of the following is true:
+ *
+ * (1) Maximum block length has been reached
+ * (2) Match catch may overflow.
+ * (3) Block split heuristic says to split now.
+ */
+ for (;;) {
+ struct lz_match *matches;
+ unsigned best_len;
+ size_t remaining = in_end - in_next;
+
+ /* Slide the window forward if needed. */
+ if (in_next == in_next_slide) {
+ bt_matchfinder_slide_window(&c->p.n.bt_mf);
+ in_cur_base = in_next;
+ in_next_slide = in_next +
+ MIN(remaining, MATCHFINDER_WINDOW_SIZE);
+ }
+
+ /*
+ * Find matches with the current position using the
+ * binary tree matchfinder and save them in match_cache.
+ *
+ * Note: the binary tree matchfinder is more suited for
+ * optimal parsing than the hash chain matchfinder. The
+ * reasons for this include:
+ *
+ * - The binary tree matchfinder can find more matches
+ * in the same number of steps.
+ * - One of the major advantages of hash chains is that
+ * skipping positions (not searching for matches at
+ * them) is faster; however, with optimal parsing we
+ * search for matches at almost all positions, so this
+ * advantage of hash chains is negated.
+ */
+ matches = cache_ptr;
+ best_len = 0;
+ adjust_max_and_nice_len(&max_len, &nice_len, remaining);
+ if (likely(max_len >= BT_MATCHFINDER_REQUIRED_NBYTES)) {
+ cache_ptr = bt_matchfinder_get_matches(
+ &c->p.n.bt_mf,
+ in_cur_base,
+ in_next - in_cur_base,
+ max_len,
+ nice_len,
+ c->max_search_depth,
+ next_hashes,
+ matches);
+ if (cache_ptr > matches)
+ best_len = cache_ptr[-1].length;
+ }
+ if (in_next >= next_observation) {
+ if (best_len >= min_len) {
+ observe_match(&c->split_stats,
+ best_len);
+ next_observation = in_next + best_len;
+ c->p.n.new_match_len_freqs[best_len]++;
+ } else {
+ observe_literal(&c->split_stats,
+ *in_next);
+ next_observation = in_next + 1;
+ }
+ }
+
+ cache_ptr->length = cache_ptr - matches;
+ cache_ptr->offset = *in_next;
+ in_next++;
+ cache_ptr++;
+
+ /*
+ * If there was a very long match found, don't cache any
+ * matches for the bytes covered by that match. This
+ * avoids degenerate behavior when compressing highly
+ * redundant data, where the number of matches can be
+ * very large.
+ *
+ * This heuristic doesn't actually hurt the compression
+ * ratio very much. If there's a long match, then the
+ * data must be highly compressible, so it doesn't
+ * matter much what we do.
+ */
+ if (best_len >= DEFLATE_MIN_MATCH_LEN &&
+ best_len >= nice_len) {
+ --best_len;
+ do {
+ remaining = in_end - in_next;
+ if (in_next == in_next_slide) {
+ bt_matchfinder_slide_window(
+ &c->p.n.bt_mf);
+ in_cur_base = in_next;
+ in_next_slide = in_next +
+ MIN(remaining,
+ MATCHFINDER_WINDOW_SIZE);
+ }
+ adjust_max_and_nice_len(&max_len,
+ &nice_len,
+ remaining);
+ if (max_len >=
+ BT_MATCHFINDER_REQUIRED_NBYTES) {
+ bt_matchfinder_skip_byte(
+ &c->p.n.bt_mf,
+ in_cur_base,
+ in_next - in_cur_base,
+ nice_len,
+ c->max_search_depth,
+ next_hashes);
+ }
+ cache_ptr->length = 0;
+ cache_ptr->offset = *in_next;
+ in_next++;
+ cache_ptr++;
+ } while (--best_len);
+ }
+ /* Maximum block length or end of input reached? */
+ if (in_next >= in_max_block_end)
+ break;
+ /* Match cache overflowed? */
+ if (cache_ptr >=
+ &c->p.n.match_cache[MATCH_CACHE_LENGTH])
+ break;
+ /* Not ready to try to end the block (again)? */
+ if (!ready_to_check_block(&c->split_stats,
+ in_block_begin, in_next,
+ in_end))
+ continue;
+ /* Check if it would be worthwhile to end the block. */
+ if (do_end_block_check(&c->split_stats,
+ in_next - in_block_begin)) {
+ change_detected = true;
+ break;
+ }
+ /* Ending the block doesn't seem worthwhile here. */
+ deflate_near_optimal_merge_stats(c);
+ prev_end_block_check = in_next;
+ }
+ /*
+ * All the matches for this block have been cached. Now choose
+ * the precise end of the block and the sequence of items to
+ * output to represent it, then flush the block.
+ */
+ if (change_detected && prev_end_block_check != NULL) {
+ /*
+ * The block is being ended because a recent chunk of
+ * data differs from the rest of the block. We could
+ * end the block at 'in_next' like the greedy and lazy
+ * compressors do, but that's not ideal since it would
+ * include the differing chunk in the block. The
+ * near-optimal compressor has time to do a better job.
+ * Therefore, we rewind to just before the chunk, and
+ * output a block that only goes up to there.
+ *
+ * We then set things up to correctly start the next
+ * block, considering that some work has already been
+ * done on it (some matches found and stats gathered).
+ */
+ struct lz_match *orig_cache_ptr = cache_ptr;
+ const u8 *in_block_end = prev_end_block_check;
+ u32 block_length = in_block_end - in_block_begin;
+ bool is_first = (in_block_begin == in);
+ bool is_final = false;
+ u32 num_bytes_to_rewind = in_next - in_block_end;
+ size_t cache_len_rewound;
+
+ /* Rewind the match cache. */
+ do {
+ cache_ptr--;
+ cache_ptr -= cache_ptr->length;
+ } while (--num_bytes_to_rewind);
+ cache_len_rewound = orig_cache_ptr - cache_ptr;
+
+ deflate_optimize_block(c, in_block_begin, block_length,
+ cache_ptr, is_first, is_final);
+ deflate_flush_block(c, os, in_block_begin, block_length,
+ NULL, is_final);
+ memmove(c->p.n.match_cache, cache_ptr,
+ cache_len_rewound * sizeof(*cache_ptr));
+ cache_ptr = &c->p.n.match_cache[cache_len_rewound];
+ deflate_near_optimal_save_stats(c);
+ /*
+ * Clear the stats for the just-flushed block, leaving
+ * just the stats for the beginning of the next block.
+ */
+ deflate_near_optimal_clear_old_stats(c);
+ in_block_begin = in_block_end;
+ } else {
+ /*
+ * The block is being ended for a reason other than a
+ * differing data chunk being detected. Don't rewind at
+ * all; just end the block at the current position.
+ */
+ u32 block_length = in_next - in_block_begin;
+ bool is_first = (in_block_begin == in);
+ bool is_final = (in_next == in_end);
+
+ deflate_near_optimal_merge_stats(c);
+ deflate_optimize_block(c, in_block_begin, block_length,
+ cache_ptr, is_first, is_final);
+ deflate_flush_block(c, os, in_block_begin, block_length,
+ NULL, is_final);
+ cache_ptr = &c->p.n.match_cache[0];
+ deflate_near_optimal_save_stats(c);
+ deflate_near_optimal_init_stats(c);
+ in_block_begin = in_next;
+ }
+ } while (in_next != in_end);
+}
+
+/* Initialize c->p.n.offset_slot_full. */
+static void
+deflate_init_offset_slot_full(struct libdeflate_compressor *c)
+{
+ unsigned offset_slot;
+ unsigned offset;
+ unsigned offset_end;
+
+ for (offset_slot = 0; offset_slot < ARRAY_LEN(deflate_offset_slot_base);
+ offset_slot++) {
+ offset = deflate_offset_slot_base[offset_slot];
+ offset_end = offset +
+ (1 << deflate_extra_offset_bits[offset_slot]);
+ do {
+ c->p.n.offset_slot_full[offset] = offset_slot;
+ } while (++offset != offset_end);
+ }
+}
+
+#endif /* SUPPORT_NEAR_OPTIMAL_PARSING */
+
+LIBDEFLATEAPI struct libdeflate_compressor *
+libdeflate_alloc_compressor(int compression_level)
+{
+ struct libdeflate_compressor *c;
+ size_t size = offsetof(struct libdeflate_compressor, p);
+
+ check_buildtime_parameters();
+
+ if (compression_level < 0 || compression_level > 12)
+ return NULL;
+
+#if SUPPORT_NEAR_OPTIMAL_PARSING
+ if (compression_level >= 10)
+ size += sizeof(c->p.n);
+ else
+#endif
+ {
+ if (compression_level >= 2)
+ size += sizeof(c->p.g);
+ else if (compression_level == 1)
+ size += sizeof(c->p.f);
+ }
+
+ c = libdeflate_aligned_malloc(MATCHFINDER_MEM_ALIGNMENT, size);
+ if (!c)
+ return NULL;
+
+ c->compression_level = compression_level;
+
+ /*
+ * The higher the compression level, the more we should bother trying to
+ * compress very small inputs.
+ */
+ c->max_passthrough_size = 55 - (compression_level * 4);
+
+ switch (compression_level) {
+ case 0:
+ c->max_passthrough_size = SIZE_MAX;
+ c->impl = NULL; /* not used */
+ break;
+ case 1:
+ c->impl = deflate_compress_fastest;
+ /* max_search_depth is unused. */
+ c->nice_match_length = 32;
+ break;
+ case 2:
+ c->impl = deflate_compress_greedy;
+ c->max_search_depth = 6;
+ c->nice_match_length = 10;
+ break;
+ case 3:
+ c->impl = deflate_compress_greedy;
+ c->max_search_depth = 12;
+ c->nice_match_length = 14;
+ break;
+ case 4:
+ c->impl = deflate_compress_greedy;
+ c->max_search_depth = 16;
+ c->nice_match_length = 30;
+ break;
+ case 5:
+ c->impl = deflate_compress_lazy;
+ c->max_search_depth = 16;
+ c->nice_match_length = 30;
+ break;
+ case 6:
+ c->impl = deflate_compress_lazy;
+ c->max_search_depth = 35;
+ c->nice_match_length = 65;
+ break;
+ case 7:
+ c->impl = deflate_compress_lazy;
+ c->max_search_depth = 100;
+ c->nice_match_length = 130;
+ break;
+ case 8:
+ c->impl = deflate_compress_lazy2;
+ c->max_search_depth = 300;
+ c->nice_match_length = DEFLATE_MAX_MATCH_LEN;
+ break;
+ case 9:
+#if !SUPPORT_NEAR_OPTIMAL_PARSING
+ default:
+#endif
+ c->impl = deflate_compress_lazy2;
+ c->max_search_depth = 600;
+ c->nice_match_length = DEFLATE_MAX_MATCH_LEN;
+ break;
+#if SUPPORT_NEAR_OPTIMAL_PARSING
+ case 10:
+ c->impl = deflate_compress_near_optimal;
+ c->max_search_depth = 35;
+ c->nice_match_length = 75;
+ c->p.n.num_optim_passes = 2;
+ deflate_init_offset_slot_full(c);
+ break;
+ case 11:
+ c->impl = deflate_compress_near_optimal;
+ c->max_search_depth = 70;
+ c->nice_match_length = 150;
+ c->p.n.num_optim_passes = 3;
+ deflate_init_offset_slot_full(c);
+ break;
+ case 12:
+ default:
+ c->impl = deflate_compress_near_optimal;
+ c->max_search_depth = 150;
+ c->nice_match_length = DEFLATE_MAX_MATCH_LEN;
+ c->p.n.num_optim_passes = 4;
+ deflate_init_offset_slot_full(c);
+ break;
+#endif /* SUPPORT_NEAR_OPTIMAL_PARSING */
+ }
+
+ deflate_init_static_codes(c);
+
+ return c;
+}
+
+LIBDEFLATEAPI size_t
+libdeflate_deflate_compress(struct libdeflate_compressor *c,
+ const void *in, size_t in_nbytes,
+ void *out, size_t out_nbytes_avail)
+{
+ struct deflate_output_bitstream os;
+
+ /*
+ * For extremely short inputs, or for compression level 0, just output
+ * uncompressed blocks.
+ */
+ if (unlikely(in_nbytes <= c->max_passthrough_size))
+ return deflate_compress_none(in, in_nbytes,
+ out, out_nbytes_avail);
+
+ /*
+ * Initialize the output bitstream structure.
+ *
+ * The end is set to OUTPUT_END_PADDING below the true end, so that
+ * FLUSH_BITS() can be more efficient.
+ */
+ if (unlikely(out_nbytes_avail <= OUTPUT_END_PADDING))
+ return 0;
+ os.bitbuf = 0;
+ os.bitcount = 0;
+ os.next = out;
+ os.end = os.next + out_nbytes_avail - OUTPUT_END_PADDING;
+ (*c->impl)(c, in, in_nbytes, &os);
+ /*
+ * If 'os.next' reached 'os.end', then either there was not enough space
+ * in the output buffer, or the compressed size would have been within
+ * OUTPUT_END_PADDING of the true end. For performance reasons we don't
+ * distinguish between these cases; we just make sure to return some
+ * extra space from libdeflate_deflate_compress_bound().
+ */
+ if (os.next >= os.end)
+ return 0;
+ ASSERT(os.bitcount <= 7);
+ if (os.bitcount)
+ *os.next++ = os.bitbuf;
+ return os.next - (u8 *)out;
+}
+
+LIBDEFLATEAPI void
+libdeflate_free_compressor(struct libdeflate_compressor *c)
+{
+ libdeflate_aligned_free(c);
+}
+
+unsigned int
+libdeflate_get_compression_level(struct libdeflate_compressor *c)
+{
+ return c->compression_level;
+}
+
+LIBDEFLATEAPI size_t
+libdeflate_deflate_compress_bound(struct libdeflate_compressor *c,
+ size_t in_nbytes)
+{
+ size_t bound = 0;
+ size_t max_blocks;
+
+ /*
+ * Since the compressor never uses a compressed block when an
+ * uncompressed block is cheaper, the worst case can be no worse than
+ * the case where only uncompressed blocks are used.
+ *
+ * This is true even though up to 7 bits are "wasted" to byte-align the
+ * bitstream when a compressed block is followed by an uncompressed
+ * block. This is because a compressed block wouldn't have been used if
+ * it wasn't cheaper than an uncompressed block, and uncompressed blocks
+ * always end on a byte boundary. So the alignment bits will, at worst,
+ * go up to the place where the uncompressed block would have ended.
+ */
+
+ /*
+ * The minimum length that is passed to deflate_flush_block() is
+ * MIN_BLOCK_LENGTH bytes, except for the final block if needed.
+ *
+ * If deflate_flush_block() decides to use an uncompressed block, it
+ * actually will (in general) output a series of uncompressed blocks in
+ * order to stay within the UINT16_MAX limit of DEFLATE. But this can
+ * be disregarded here as long as '2 * MIN_BLOCK_LENGTH <= UINT16_MAX',
+ * as in that case this behavior can't result in more blocks than the
+ * case where deflate_flush_block() is called with min-length inputs.
+ *
+ * So the number of uncompressed blocks needed would be bounded by
+ * DIV_ROUND_UP(in_nbytes, MIN_BLOCK_LENGTH). However, empty inputs
+ * need 1 (empty) block, which gives the final expression below.
+ */
+ STATIC_ASSERT(2 * MIN_BLOCK_LENGTH <= UINT16_MAX);
+ max_blocks = MAX(DIV_ROUND_UP(in_nbytes, MIN_BLOCK_LENGTH), 1);
+
+ /*
+ * Each uncompressed block has 5 bytes of overhead, for the BFINAL,
+ * BTYPE, LEN, and NLEN fields. (For the reason explained earlier, the
+ * alignment bits at the very start of the block can be disregarded;
+ * they would otherwise increase the overhead to 6 bytes per block.)
+ */
+ bound += 5 * max_blocks;
+
+ /* Account for the data itself, stored uncompressed. */
+ bound += in_nbytes;
+
+ /*
+ * Add 1 + OUTPUT_END_PADDING because for performance reasons, the
+ * compressor doesn't distinguish between cases where there wasn't
+ * enough space and cases where the compressed size would have been
+ * 'out_nbytes_avail - OUTPUT_END_PADDING' or greater. Adding
+ * 1 + OUTPUT_END_PADDING to the bound ensures the needed wiggle room.
+ */
+ bound += 1 + OUTPUT_END_PADDING;
+
+ return bound;
+}
diff --git a/tools/z64compress/src/enc/libdeflate/lib/deflate_compress.h b/tools/z64compress/src/enc/libdeflate/lib/deflate_compress.h
new file mode 100644
index 000000000..9451d548b
--- /dev/null
+++ b/tools/z64compress/src/enc/libdeflate/lib/deflate_compress.h
@@ -0,0 +1,15 @@
+#ifndef LIB_DEFLATE_COMPRESS_H
+#define LIB_DEFLATE_COMPRESS_H
+
+#include "lib_common.h"
+
+/*
+ * DEFLATE compression is private to deflate_compress.c, but we do need to be
+ * able to query the compression level for zlib and gzip header generation.
+ */
+
+struct libdeflate_compressor;
+
+unsigned int libdeflate_get_compression_level(struct libdeflate_compressor *c);
+
+#endif /* LIB_DEFLATE_COMPRESS_H */
diff --git a/tools/z64compress/src/enc/libdeflate/lib/deflate_constants.h b/tools/z64compress/src/enc/libdeflate/lib/deflate_constants.h
new file mode 100644
index 000000000..95c9e0a50
--- /dev/null
+++ b/tools/z64compress/src/enc/libdeflate/lib/deflate_constants.h
@@ -0,0 +1,56 @@
+/*
+ * deflate_constants.h - constants for the DEFLATE compression format
+ */
+
+#ifndef LIB_DEFLATE_CONSTANTS_H
+#define LIB_DEFLATE_CONSTANTS_H
+
+/* Valid block types */
+#define DEFLATE_BLOCKTYPE_UNCOMPRESSED 0
+#define DEFLATE_BLOCKTYPE_STATIC_HUFFMAN 1
+#define DEFLATE_BLOCKTYPE_DYNAMIC_HUFFMAN 2
+
+/* Minimum and maximum supported match lengths (in bytes) */
+#define DEFLATE_MIN_MATCH_LEN 3
+#define DEFLATE_MAX_MATCH_LEN 258
+
+/* Maximum supported match offset (in bytes) */
+#define DEFLATE_MAX_MATCH_OFFSET 32768
+
+/* log2 of DEFLATE_MAX_MATCH_OFFSET */
+#define DEFLATE_WINDOW_ORDER 15
+
+/* Number of symbols in each Huffman code. Note: for the literal/length
+ * and offset codes, these are actually the maximum values; a given block
+ * might use fewer symbols. */
+#define DEFLATE_NUM_PRECODE_SYMS 19
+#define DEFLATE_NUM_LITLEN_SYMS 288
+#define DEFLATE_NUM_OFFSET_SYMS 32
+
+/* The maximum number of symbols across all codes */
+#define DEFLATE_MAX_NUM_SYMS 288
+
+/* Division of symbols in the literal/length code */
+#define DEFLATE_NUM_LITERALS 256
+#define DEFLATE_END_OF_BLOCK 256
+#define DEFLATE_FIRST_LEN_SYM 257
+
+/* Maximum codeword length, in bits, within each Huffman code */
+#define DEFLATE_MAX_PRE_CODEWORD_LEN 7
+#define DEFLATE_MAX_LITLEN_CODEWORD_LEN 15
+#define DEFLATE_MAX_OFFSET_CODEWORD_LEN 15
+
+/* The maximum codeword length across all codes */
+#define DEFLATE_MAX_CODEWORD_LEN 15
+
+/* Maximum possible overrun when decoding codeword lengths */
+#define DEFLATE_MAX_LENS_OVERRUN 137
+
+/*
+ * Maximum number of extra bits that may be required to represent a match
+ * length or offset.
+ */
+#define DEFLATE_MAX_EXTRA_LENGTH_BITS 5
+#define DEFLATE_MAX_EXTRA_OFFSET_BITS 13
+
+#endif /* LIB_DEFLATE_CONSTANTS_H */
diff --git a/tools/z64compress/src/enc/libdeflate/lib/deflate_decompress.c b/tools/z64compress/src/enc/libdeflate/lib/deflate_decompress.c
new file mode 100644
index 000000000..7d22fc443
--- /dev/null
+++ b/tools/z64compress/src/enc/libdeflate/lib/deflate_decompress.c
@@ -0,0 +1,1176 @@
+/*
+ * deflate_decompress.c - a decompressor for DEFLATE
+ *
+ * Copyright 2016 Eric Biggers
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ---------------------------------------------------------------------------
+ *
+ * This is a highly optimized DEFLATE decompressor. It is much faster than
+ * vanilla zlib, typically well over twice as fast, though results vary by CPU.
+ *
+ * Why this is faster than vanilla zlib:
+ *
+ * - Word accesses rather than byte accesses when reading input
+ * - Word accesses rather than byte accesses when copying matches
+ * - Faster Huffman decoding combined with various DEFLATE-specific tricks
+ * - Larger bitbuffer variable that doesn't need to be refilled as often
+ * - Other optimizations to remove unnecessary branches
+ * - Only full-buffer decompression is supported, so the code doesn't need to
+ * support stopping and resuming decompression.
+ * - On x86_64, a version of the decompression routine is compiled with BMI2
+ * instructions enabled and is used automatically at runtime when supported.
+ */
+
+#include
+
+#include "lib_common.h"
+#include "deflate_constants.h"
+
+#include "libdeflate.h"
+
+/*
+ * If the expression passed to SAFETY_CHECK() evaluates to false, then the
+ * decompression routine immediately returns LIBDEFLATE_BAD_DATA, indicating the
+ * compressed data is invalid.
+ *
+ * Theoretically, these checks could be disabled for specialized applications
+ * where all input to the decompressor will be trusted.
+ */
+#if 0
+# pragma message("UNSAFE DECOMPRESSION IS ENABLED. THIS MUST ONLY BE USED IF THE DECOMPRESSOR INPUT WILL ALWAYS BE TRUSTED!")
+# define SAFETY_CHECK(expr) (void)(expr)
+#else
+# define SAFETY_CHECK(expr) if (unlikely(!(expr))) return LIBDEFLATE_BAD_DATA
+#endif
+
+/*****************************************************************************
+ * Input bitstream *
+ *****************************************************************************/
+
+/*
+ * The state of the "input bitstream" consists of the following variables:
+ *
+ * - in_next: a pointer to the next unread byte in the input buffer
+ *
+ * - in_end: a pointer to just past the end of the input buffer
+ *
+ * - bitbuf: a word-sized variable containing bits that have been read from
+ * the input buffer or from the implicit appended zero bytes
+ *
+ * - bitsleft: the number of bits in 'bitbuf' available to be consumed.
+ * After REFILL_BITS_BRANCHLESS(), 'bitbuf' can actually
+ * contain more bits than this. However, only the bits counted
+ * by 'bitsleft' can actually be consumed; the rest can only be
+ * used for preloading.
+ *
+ * As a micro-optimization, we allow bits 8 and higher of
+ * 'bitsleft' to contain garbage. When consuming the bits
+ * associated with a decode table entry, this allows us to do
+ * 'bitsleft -= entry' instead of 'bitsleft -= (u8)entry'.
+ * On some CPUs, this helps reduce instruction dependencies.
+ * This does have the disadvantage that 'bitsleft' sometimes
+ * needs to be cast to 'u8', such as when it's used as a shift
+ * amount in REFILL_BITS_BRANCHLESS(). But that one happens
+ * for free since most CPUs ignore high bits in shift amounts.
+ *
+ * - overread_count: the total number of implicit appended zero bytes that
+ * have been loaded into the bitbuffer, including any
+ * counted by 'bitsleft' and any already consumed
+ */
+
+/*
+ * The type for the bitbuffer variable ('bitbuf' described above). For best
+ * performance, this should have size equal to a machine word.
+ *
+ * 64-bit platforms have a significant advantage: they get a bigger bitbuffer
+ * which they don't have to refill as often.
+ */
+typedef machine_word_t bitbuf_t;
+#define BITBUF_NBITS (8 * (int)sizeof(bitbuf_t))
+
+/* BITMASK(n) returns a bitmask of length 'n'. */
+#define BITMASK(n) (((bitbuf_t)1 << (n)) - 1)
+
+/*
+ * MAX_BITSLEFT is the maximum number of consumable bits, i.e. the maximum value
+ * of '(u8)bitsleft'. This is the size of the bitbuffer variable, minus 1 if
+ * the branchless refill method is being used (see REFILL_BITS_BRANCHLESS()).
+ */
+#define MAX_BITSLEFT \
+ (UNALIGNED_ACCESS_IS_FAST ? BITBUF_NBITS - 1 : BITBUF_NBITS)
+
+/*
+ * CONSUMABLE_NBITS is the minimum number of bits that are guaranteed to be
+ * consumable (counted in 'bitsleft') immediately after refilling the bitbuffer.
+ * Since only whole bytes can be added to 'bitsleft', the worst case is
+ * 'MAX_BITSLEFT - 7': the smallest amount where another byte doesn't fit.
+ */
+#define CONSUMABLE_NBITS (MAX_BITSLEFT - 7)
+
+/*
+ * FASTLOOP_PRELOADABLE_NBITS is the minimum number of bits that are guaranteed
+ * to be preloadable immediately after REFILL_BITS_IN_FASTLOOP(). (It is *not*
+ * guaranteed after REFILL_BITS(), since REFILL_BITS() falls back to a
+ * byte-at-a-time refill method near the end of input.) This may exceed the
+ * number of consumable bits (counted by 'bitsleft'). Any bits not counted in
+ * 'bitsleft' can only be used for precomputation and cannot be consumed.
+ */
+#define FASTLOOP_PRELOADABLE_NBITS \
+ (UNALIGNED_ACCESS_IS_FAST ? BITBUF_NBITS : CONSUMABLE_NBITS)
+
+/*
+ * PRELOAD_SLACK is the minimum number of bits that are guaranteed to be
+ * preloadable but not consumable, following REFILL_BITS_IN_FASTLOOP() and any
+ * subsequent consumptions. This is 1 bit if the branchless refill method is
+ * being used, and 0 bits otherwise.
+ */
+#define PRELOAD_SLACK MAX(0, FASTLOOP_PRELOADABLE_NBITS - MAX_BITSLEFT)
+
+/*
+ * CAN_CONSUME(n) is true if it's guaranteed that if the bitbuffer has just been
+ * refilled, then it's always possible to consume 'n' bits from it. 'n' should
+ * be a compile-time constant, to enable compile-time evaluation.
+ */
+#define CAN_CONSUME(n) (CONSUMABLE_NBITS >= (n))
+
+/*
+ * CAN_CONSUME_AND_THEN_PRELOAD(consume_nbits, preload_nbits) is true if it's
+ * guaranteed that after REFILL_BITS_IN_FASTLOOP(), it's always possible to
+ * consume 'consume_nbits' bits, then preload 'preload_nbits' bits. The
+ * arguments should be compile-time constants to enable compile-time evaluation.
+ */
+#define CAN_CONSUME_AND_THEN_PRELOAD(consume_nbits, preload_nbits) \
+ (CONSUMABLE_NBITS >= (consume_nbits) && \
+ FASTLOOP_PRELOADABLE_NBITS >= (consume_nbits) + (preload_nbits))
+
+/*
+ * REFILL_BITS_BRANCHLESS() branchlessly refills the bitbuffer variable by
+ * reading the next word from the input buffer and updating 'in_next' and
+ * 'bitsleft' based on how many bits were refilled -- counting whole bytes only.
+ * This is much faster than reading a byte at a time, at least if the CPU is
+ * little endian and supports fast unaligned memory accesses.
+ *
+ * The simplest way of branchlessly updating 'bitsleft' would be:
+ *
+ * bitsleft += (MAX_BITSLEFT - bitsleft) & ~7;
+ *
+ * To make it faster, we define MAX_BITSLEFT to be 'WORDBITS - 1' rather than
+ * WORDBITS, so that in binary it looks like 111111 or 11111. Then, we update
+ * 'bitsleft' by just setting the bits above the low 3 bits:
+ *
+ * bitsleft |= MAX_BITSLEFT & ~7;
+ *
+ * That compiles down to a single instruction like 'or $0x38, %rbp'. Using
+ * 'MAX_BITSLEFT == WORDBITS - 1' also has the advantage that refills can be
+ * done when 'bitsleft == MAX_BITSLEFT' without invoking undefined behavior.
+ *
+ * The simplest way of branchlessly updating 'in_next' would be:
+ *
+ * in_next += (MAX_BITSLEFT - bitsleft) >> 3;
+ *
+ * With 'MAX_BITSLEFT == WORDBITS - 1' we could use an XOR instead, though this
+ * isn't really better:
+ *
+ * in_next += (MAX_BITSLEFT ^ bitsleft) >> 3;
+ *
+ * An alternative which can be marginally better is the following:
+ *
+ * in_next += sizeof(bitbuf_t) - 1;
+ * in_next -= (bitsleft >> 3) & 0x7;
+ *
+ * It seems this would increase the number of CPU instructions from 3 (sub, shr,
+ * add) to 4 (add, shr, and, sub). However, if the CPU has a bitfield
+ * extraction instruction (e.g. arm's ubfx), it stays at 3, and is potentially
+ * more efficient because the length of the longest dependency chain decreases
+ * from 3 to 2. This alternative also has the advantage that it ignores the
+ * high bits in 'bitsleft', so it is compatible with the micro-optimization we
+ * use where we let the high bits of 'bitsleft' contain garbage.
+ */
+#define REFILL_BITS_BRANCHLESS() \
+do { \
+ bitbuf |= get_unaligned_leword(in_next) << (u8)bitsleft; \
+ in_next += sizeof(bitbuf_t) - 1; \
+ in_next -= (bitsleft >> 3) & 0x7; \
+ bitsleft |= MAX_BITSLEFT & ~7; \
+} while (0)
+
+/*
+ * REFILL_BITS() loads bits from the input buffer until the bitbuffer variable
+ * contains at least CONSUMABLE_NBITS consumable bits.
+ *
+ * This checks for the end of input, and it doesn't guarantee
+ * FASTLOOP_PRELOADABLE_NBITS, so it can't be used in the fastloop.
+ *
+ * If we would overread the input buffer, we just don't read anything, leaving
+ * the bits zeroed but marking them filled. This simplifies the decompressor
+ * because it removes the need to always be able to distinguish between real
+ * overreads and overreads caused only by the decompressor's own lookahead.
+ *
+ * We do still keep track of the number of bytes that have been overread, for
+ * two reasons. First, it allows us to determine the exact number of bytes that
+ * were consumed once the stream ends or an uncompressed block is reached.
+ * Second, it allows us to stop early if the overread amount gets so large (more
+ * than sizeof bitbuf) that it can only be caused by a real overread. (The
+ * second part is arguably unneeded, since libdeflate is buffer-based; given
+ * infinite zeroes, it will eventually either completely fill the output buffer
+ * or return an error. However, we do it to be slightly more friendly to the
+ * not-recommended use case of decompressing with an unknown output size.)
+ */
+#define REFILL_BITS() \
+do { \
+ if (UNALIGNED_ACCESS_IS_FAST && \
+ likely(in_end - in_next >= sizeof(bitbuf_t))) { \
+ REFILL_BITS_BRANCHLESS(); \
+ } else { \
+ while ((u8)bitsleft < CONSUMABLE_NBITS) { \
+ if (likely(in_next != in_end)) { \
+ bitbuf |= (bitbuf_t)*in_next++ << \
+ (u8)bitsleft; \
+ } else { \
+ overread_count++; \
+ SAFETY_CHECK(overread_count <= \
+ sizeof(bitbuf_t)); \
+ } \
+ bitsleft += 8; \
+ } \
+ } \
+} while (0)
+
+/*
+ * REFILL_BITS_IN_FASTLOOP() is like REFILL_BITS(), but it doesn't check for the
+ * end of the input. It can only be used in the fastloop.
+ */
+#define REFILL_BITS_IN_FASTLOOP() \
+do { \
+ STATIC_ASSERT(UNALIGNED_ACCESS_IS_FAST || \
+ FASTLOOP_PRELOADABLE_NBITS == CONSUMABLE_NBITS); \
+ if (UNALIGNED_ACCESS_IS_FAST) { \
+ REFILL_BITS_BRANCHLESS(); \
+ } else { \
+ while ((u8)bitsleft < CONSUMABLE_NBITS) { \
+ bitbuf |= (bitbuf_t)*in_next++ << (u8)bitsleft; \
+ bitsleft += 8; \
+ } \
+ } \
+} while (0)
+
+/*
+ * This is the worst-case maximum number of output bytes that are written to
+ * during each iteration of the fastloop. The worst case is 2 literals, then a
+ * match of length DEFLATE_MAX_MATCH_LEN. Additionally, some slack space must
+ * be included for the intentional overrun in the match copy implementation.
+ */
+#define FASTLOOP_MAX_BYTES_WRITTEN \
+ (2 + DEFLATE_MAX_MATCH_LEN + (5 * WORDBYTES) - 1)
+
+/*
+ * This is the worst-case maximum number of input bytes that are read during
+ * each iteration of the fastloop. To get this value, we first compute the
+ * greatest number of bits that can be refilled during a loop iteration. The
+ * refill at the beginning can add at most MAX_BITSLEFT, and the amount that can
+ * be refilled later is no more than the maximum amount that can be consumed by
+ * 2 literals that don't need a subtable, then a match. We convert this value
+ * to bytes, rounding up; this gives the maximum number of bytes that 'in_next'
+ * can be advanced. Finally, we add sizeof(bitbuf_t) to account for
+ * REFILL_BITS_BRANCHLESS() reading a word past 'in_next'.
+ */
+#define FASTLOOP_MAX_BYTES_READ \
+ (DIV_ROUND_UP(MAX_BITSLEFT + (2 * LITLEN_TABLEBITS) + \
+ LENGTH_MAXBITS + OFFSET_MAXBITS, 8) + \
+ sizeof(bitbuf_t))
+
+/*****************************************************************************
+ * Huffman decoding *
+ *****************************************************************************/
+
+/*
+ * The fastest way to decode Huffman-encoded data is basically to use a decode
+ * table that maps the next TABLEBITS bits of data to their symbol. Each entry
+ * decode_table[i] maps to the symbol whose codeword is a prefix of 'i'. A
+ * symbol with codeword length 'n' has '2**(TABLEBITS-n)' entries in the table.
+ *
+ * Ideally, TABLEBITS and the maximum codeword length would be the same; some
+ * compression formats are designed with this goal in mind. Unfortunately, in
+ * DEFLATE, the maximum litlen and offset codeword lengths are 15 bits, which is
+ * too large for a practical TABLEBITS. It's not *that* much larger, though, so
+ * the workaround is to use a single level of subtables. In the main table,
+ * entries for prefixes of codewords longer than TABLEBITS contain a "pointer"
+ * to the appropriate subtable along with the number of bits it is indexed with.
+ *
+ * The most efficient way to allocate subtables is to allocate them dynamically
+ * after the main table. The worst-case number of table entries needed,
+ * including subtables, is precomputable; see the ENOUGH constants below.
+ *
+ * A useful optimization is to store the codeword lengths in the decode table so
+ * that they don't have to be looked up by indexing a separate table that maps
+ * symbols to their codeword lengths. We basically do this; however, for the
+ * litlen and offset codes we also implement some DEFLATE-specific optimizations
+ * that build in the consideration of the "extra bits" and the
+ * literal/length/end-of-block division. For the exact decode table entry
+ * format we use, see the definitions of the *_decode_results[] arrays below.
+ */
+
+
+/*
+ * These are the TABLEBITS values we use for each of the DEFLATE Huffman codes,
+ * along with their corresponding ENOUGH values.
+ *
+ * For the precode, we use PRECODE_TABLEBITS == 7 since this is the maximum
+ * precode codeword length. This avoids ever needing subtables.
+ *
+ * For the litlen and offset codes, we cannot realistically avoid ever needing
+ * subtables, since litlen and offset codewords can be up to 15 bits. A higher
+ * TABLEBITS reduces the number of lookups that need a subtable, which increases
+ * performance; however, it increases memory usage and makes building the table
+ * take longer, which decreases performance. We choose values that work well in
+ * practice, making subtables rarely needed without making the tables too large.
+ *
+ * Our choice of OFFSET_TABLEBITS == 8 is a bit low; without any special
+ * considerations, 9 would fit the trade-off curve better. However, there is a
+ * performance benefit to using exactly 8 bits when it is a compile-time
+ * constant, as many CPUs can take the low byte more easily than the low 9 bits.
+ *
+ * zlib treats its equivalents of TABLEBITS as maximum values; whenever it
+ * builds a table, it caps the actual table_bits to the longest codeword. This
+ * makes sense in theory, as there's no need for the table to be any larger than
+ * needed to support the longest codeword. However, having the table bits be a
+ * compile-time constant is beneficial to the performance of the decode loop, so
+ * there is a trade-off. libdeflate currently uses the dynamic table_bits
+ * strategy for the litlen table only, due to its larger maximum size.
+ * PRECODE_TABLEBITS and OFFSET_TABLEBITS are smaller, so going dynamic there
+ * isn't as useful, and OFFSET_TABLEBITS=8 is useful as mentioned above.
+ *
+ * Each TABLEBITS value has a corresponding ENOUGH value that gives the
+ * worst-case maximum number of decode table entries, including the main table
+ * and all subtables. The ENOUGH value depends on three parameters:
+ *
+ * (1) the maximum number of symbols in the code (DEFLATE_NUM_*_SYMS)
+ * (2) the maximum number of main table bits (*_TABLEBITS)
+ * (3) the maximum allowed codeword length (DEFLATE_MAX_*_CODEWORD_LEN)
+ *
+ * The ENOUGH values were computed using the utility program 'enough' from zlib.
+ */
+#define PRECODE_TABLEBITS 7
+#define PRECODE_ENOUGH 128 /* enough 19 7 7 */
+#define LITLEN_TABLEBITS 11
+#define LITLEN_ENOUGH 2342 /* enough 288 11 15 */
+#define OFFSET_TABLEBITS 8
+#define OFFSET_ENOUGH 402 /* enough 32 8 15 */
+
+/*
+ * make_decode_table_entry() creates a decode table entry for the given symbol
+ * by combining the static part 'decode_results[sym]' with the dynamic part
+ * 'len', which is the remaining codeword length (the codeword length for main
+ * table entries, or the codeword length minus TABLEBITS for subtable entries).
+ *
+ * In all cases, we add 'len' to each of the two low-order bytes to create the
+ * appropriately-formatted decode table entry. See the definitions of the
+ * *_decode_results[] arrays below, where the entry format is described.
+ */
+static forceinline u32
+make_decode_table_entry(const u32 decode_results[], u32 sym, u32 len)
+{
+ return decode_results[sym] + (len << 8) + len;
+}
+
+/*
+ * Here is the format of our precode decode table entries. Bits not explicitly
+ * described contain zeroes:
+ *
+ * Bit 20-16: presym
+ * Bit 10-8: codeword length [not used]
+ * Bit 2-0: codeword length
+ *
+ * The precode decode table never has subtables, since we use
+ * PRECODE_TABLEBITS == DEFLATE_MAX_PRE_CODEWORD_LEN.
+ *
+ * precode_decode_results[] contains the static part of the entry for each
+ * symbol. make_decode_table_entry() produces the final entries.
+ */
+static const u32 precode_decode_results[] = {
+#define ENTRY(presym) ((u32)presym << 16)
+ ENTRY(0) , ENTRY(1) , ENTRY(2) , ENTRY(3) ,
+ ENTRY(4) , ENTRY(5) , ENTRY(6) , ENTRY(7) ,
+ ENTRY(8) , ENTRY(9) , ENTRY(10) , ENTRY(11) ,
+ ENTRY(12) , ENTRY(13) , ENTRY(14) , ENTRY(15) ,
+ ENTRY(16) , ENTRY(17) , ENTRY(18) ,
+#undef ENTRY
+};
+
+/* Litlen and offset decode table entry flags */
+
+/* Indicates a literal entry in the litlen decode table */
+#define HUFFDEC_LITERAL 0x80000000
+
+/* Indicates that HUFFDEC_SUBTABLE_POINTER or HUFFDEC_END_OF_BLOCK is set */
+#define HUFFDEC_EXCEPTIONAL 0x00008000
+
+/* Indicates a subtable pointer entry in the litlen or offset decode table */
+#define HUFFDEC_SUBTABLE_POINTER 0x00004000
+
+/* Indicates an end-of-block entry in the litlen decode table */
+#define HUFFDEC_END_OF_BLOCK 0x00002000
+
+/* Maximum number of bits that can be consumed by decoding a match length */
+#define LENGTH_MAXBITS (DEFLATE_MAX_LITLEN_CODEWORD_LEN + \
+ DEFLATE_MAX_EXTRA_LENGTH_BITS)
+#define LENGTH_MAXFASTBITS (LITLEN_TABLEBITS /* no subtable needed */ + \
+ DEFLATE_MAX_EXTRA_LENGTH_BITS)
+
+/*
+ * Here is the format of our litlen decode table entries. Bits not explicitly
+ * described contain zeroes:
+ *
+ * Literals:
+ * Bit 31: 1 (HUFFDEC_LITERAL)
+ * Bit 23-16: literal value
+ * Bit 15: 0 (!HUFFDEC_EXCEPTIONAL)
+ * Bit 14: 0 (!HUFFDEC_SUBTABLE_POINTER)
+ * Bit 13: 0 (!HUFFDEC_END_OF_BLOCK)
+ * Bit 11-8: remaining codeword length [not used]
+ * Bit 3-0: remaining codeword length
+ * Lengths:
+ * Bit 31: 0 (!HUFFDEC_LITERAL)
+ * Bit 24-16: length base value
+ * Bit 15: 0 (!HUFFDEC_EXCEPTIONAL)
+ * Bit 14: 0 (!HUFFDEC_SUBTABLE_POINTER)
+ * Bit 13: 0 (!HUFFDEC_END_OF_BLOCK)
+ * Bit 11-8: remaining codeword length
+ * Bit 4-0: remaining codeword length + number of extra bits
+ * End of block:
+ * Bit 31: 0 (!HUFFDEC_LITERAL)
+ * Bit 15: 1 (HUFFDEC_EXCEPTIONAL)
+ * Bit 14: 0 (!HUFFDEC_SUBTABLE_POINTER)
+ * Bit 13: 1 (HUFFDEC_END_OF_BLOCK)
+ * Bit 11-8: remaining codeword length [not used]
+ * Bit 3-0: remaining codeword length
+ * Subtable pointer:
+ * Bit 31: 0 (!HUFFDEC_LITERAL)
+ * Bit 30-16: index of start of subtable
+ * Bit 15: 1 (HUFFDEC_EXCEPTIONAL)
+ * Bit 14: 1 (HUFFDEC_SUBTABLE_POINTER)
+ * Bit 13: 0 (!HUFFDEC_END_OF_BLOCK)
+ * Bit 11-8: number of subtable bits
+ * Bit 3-0: number of main table bits
+ *
+ * This format has several desirable properties:
+ *
+ * - The codeword length, length slot base, and number of extra length bits
+ * are all built in. This eliminates the need to separately look up this
+ * information by indexing separate arrays by symbol or length slot.
+ *
+ * - The HUFFDEC_* flags enable easily distinguishing between the different
+ * types of entries. The HUFFDEC_LITERAL flag enables a fast path for
+ * literals; the high bit is used for this, as some CPUs can test the
+ * high bit more easily than other bits. The HUFFDEC_EXCEPTIONAL flag
+ * makes it possible to detect the two unlikely cases (subtable pointer
+ * and end of block) in a single bit flag test.
+ *
+ * - The low byte is the number of bits that need to be removed from the
+ * bitstream; this makes this value easily accessible, and it enables the
+ * micro-optimization of doing 'bitsleft -= entry' instead of
+ * 'bitsleft -= (u8)entry'. It also includes the number of extra bits,
+ * so they don't need to be removed separately.
+ *
+ * - The flags in bits 15-13 are arranged to be 0 when the
+ * "remaining codeword length" in bits 11-8 is needed, making this value
+ * fairly easily accessible as well via a shift and downcast.
+ *
+ * - Similarly, bits 13-12 are 0 when the "subtable bits" in bits 11-8 are
+ * needed, making it possible to extract this value with '& 0x3F' rather
+ * than '& 0xF'. This value is only used as a shift amount, so this can
+ * save an 'and' instruction as the masking by 0x3F happens implicitly.
+ *
+ * litlen_decode_results[] contains the static part of the entry for each
+ * symbol. make_decode_table_entry() produces the final entries.
+ */
+static const u32 litlen_decode_results[] = {
+
+ /* Literals */
+#define ENTRY(literal) (HUFFDEC_LITERAL | ((u32)literal << 16))
+ ENTRY(0) , ENTRY(1) , ENTRY(2) , ENTRY(3) ,
+ ENTRY(4) , ENTRY(5) , ENTRY(6) , ENTRY(7) ,
+ ENTRY(8) , ENTRY(9) , ENTRY(10) , ENTRY(11) ,
+ ENTRY(12) , ENTRY(13) , ENTRY(14) , ENTRY(15) ,
+ ENTRY(16) , ENTRY(17) , ENTRY(18) , ENTRY(19) ,
+ ENTRY(20) , ENTRY(21) , ENTRY(22) , ENTRY(23) ,
+ ENTRY(24) , ENTRY(25) , ENTRY(26) , ENTRY(27) ,
+ ENTRY(28) , ENTRY(29) , ENTRY(30) , ENTRY(31) ,
+ ENTRY(32) , ENTRY(33) , ENTRY(34) , ENTRY(35) ,
+ ENTRY(36) , ENTRY(37) , ENTRY(38) , ENTRY(39) ,
+ ENTRY(40) , ENTRY(41) , ENTRY(42) , ENTRY(43) ,
+ ENTRY(44) , ENTRY(45) , ENTRY(46) , ENTRY(47) ,
+ ENTRY(48) , ENTRY(49) , ENTRY(50) , ENTRY(51) ,
+ ENTRY(52) , ENTRY(53) , ENTRY(54) , ENTRY(55) ,
+ ENTRY(56) , ENTRY(57) , ENTRY(58) , ENTRY(59) ,
+ ENTRY(60) , ENTRY(61) , ENTRY(62) , ENTRY(63) ,
+ ENTRY(64) , ENTRY(65) , ENTRY(66) , ENTRY(67) ,
+ ENTRY(68) , ENTRY(69) , ENTRY(70) , ENTRY(71) ,
+ ENTRY(72) , ENTRY(73) , ENTRY(74) , ENTRY(75) ,
+ ENTRY(76) , ENTRY(77) , ENTRY(78) , ENTRY(79) ,
+ ENTRY(80) , ENTRY(81) , ENTRY(82) , ENTRY(83) ,
+ ENTRY(84) , ENTRY(85) , ENTRY(86) , ENTRY(87) ,
+ ENTRY(88) , ENTRY(89) , ENTRY(90) , ENTRY(91) ,
+ ENTRY(92) , ENTRY(93) , ENTRY(94) , ENTRY(95) ,
+ ENTRY(96) , ENTRY(97) , ENTRY(98) , ENTRY(99) ,
+ ENTRY(100) , ENTRY(101) , ENTRY(102) , ENTRY(103) ,
+ ENTRY(104) , ENTRY(105) , ENTRY(106) , ENTRY(107) ,
+ ENTRY(108) , ENTRY(109) , ENTRY(110) , ENTRY(111) ,
+ ENTRY(112) , ENTRY(113) , ENTRY(114) , ENTRY(115) ,
+ ENTRY(116) , ENTRY(117) , ENTRY(118) , ENTRY(119) ,
+ ENTRY(120) , ENTRY(121) , ENTRY(122) , ENTRY(123) ,
+ ENTRY(124) , ENTRY(125) , ENTRY(126) , ENTRY(127) ,
+ ENTRY(128) , ENTRY(129) , ENTRY(130) , ENTRY(131) ,
+ ENTRY(132) , ENTRY(133) , ENTRY(134) , ENTRY(135) ,
+ ENTRY(136) , ENTRY(137) , ENTRY(138) , ENTRY(139) ,
+ ENTRY(140) , ENTRY(141) , ENTRY(142) , ENTRY(143) ,
+ ENTRY(144) , ENTRY(145) , ENTRY(146) , ENTRY(147) ,
+ ENTRY(148) , ENTRY(149) , ENTRY(150) , ENTRY(151) ,
+ ENTRY(152) , ENTRY(153) , ENTRY(154) , ENTRY(155) ,
+ ENTRY(156) , ENTRY(157) , ENTRY(158) , ENTRY(159) ,
+ ENTRY(160) , ENTRY(161) , ENTRY(162) , ENTRY(163) ,
+ ENTRY(164) , ENTRY(165) , ENTRY(166) , ENTRY(167) ,
+ ENTRY(168) , ENTRY(169) , ENTRY(170) , ENTRY(171) ,
+ ENTRY(172) , ENTRY(173) , ENTRY(174) , ENTRY(175) ,
+ ENTRY(176) , ENTRY(177) , ENTRY(178) , ENTRY(179) ,
+ ENTRY(180) , ENTRY(181) , ENTRY(182) , ENTRY(183) ,
+ ENTRY(184) , ENTRY(185) , ENTRY(186) , ENTRY(187) ,
+ ENTRY(188) , ENTRY(189) , ENTRY(190) , ENTRY(191) ,
+ ENTRY(192) , ENTRY(193) , ENTRY(194) , ENTRY(195) ,
+ ENTRY(196) , ENTRY(197) , ENTRY(198) , ENTRY(199) ,
+ ENTRY(200) , ENTRY(201) , ENTRY(202) , ENTRY(203) ,
+ ENTRY(204) , ENTRY(205) , ENTRY(206) , ENTRY(207) ,
+ ENTRY(208) , ENTRY(209) , ENTRY(210) , ENTRY(211) ,
+ ENTRY(212) , ENTRY(213) , ENTRY(214) , ENTRY(215) ,
+ ENTRY(216) , ENTRY(217) , ENTRY(218) , ENTRY(219) ,
+ ENTRY(220) , ENTRY(221) , ENTRY(222) , ENTRY(223) ,
+ ENTRY(224) , ENTRY(225) , ENTRY(226) , ENTRY(227) ,
+ ENTRY(228) , ENTRY(229) , ENTRY(230) , ENTRY(231) ,
+ ENTRY(232) , ENTRY(233) , ENTRY(234) , ENTRY(235) ,
+ ENTRY(236) , ENTRY(237) , ENTRY(238) , ENTRY(239) ,
+ ENTRY(240) , ENTRY(241) , ENTRY(242) , ENTRY(243) ,
+ ENTRY(244) , ENTRY(245) , ENTRY(246) , ENTRY(247) ,
+ ENTRY(248) , ENTRY(249) , ENTRY(250) , ENTRY(251) ,
+ ENTRY(252) , ENTRY(253) , ENTRY(254) , ENTRY(255) ,
+#undef ENTRY
+
+ /* End of block */
+ HUFFDEC_EXCEPTIONAL | HUFFDEC_END_OF_BLOCK,
+
+ /* Lengths */
+#define ENTRY(length_base, num_extra_bits) \
+ (((u32)(length_base) << 16) | (num_extra_bits))
+ ENTRY(3 , 0) , ENTRY(4 , 0) , ENTRY(5 , 0) , ENTRY(6 , 0),
+ ENTRY(7 , 0) , ENTRY(8 , 0) , ENTRY(9 , 0) , ENTRY(10 , 0),
+ ENTRY(11 , 1) , ENTRY(13 , 1) , ENTRY(15 , 1) , ENTRY(17 , 1),
+ ENTRY(19 , 2) , ENTRY(23 , 2) , ENTRY(27 , 2) , ENTRY(31 , 2),
+ ENTRY(35 , 3) , ENTRY(43 , 3) , ENTRY(51 , 3) , ENTRY(59 , 3),
+ ENTRY(67 , 4) , ENTRY(83 , 4) , ENTRY(99 , 4) , ENTRY(115, 4),
+ ENTRY(131, 5) , ENTRY(163, 5) , ENTRY(195, 5) , ENTRY(227, 5),
+ ENTRY(258, 0) , ENTRY(258, 0) , ENTRY(258, 0) ,
+#undef ENTRY
+};
+
+/* Maximum number of bits that can be consumed by decoding a match offset */
+#define OFFSET_MAXBITS (DEFLATE_MAX_OFFSET_CODEWORD_LEN + \
+ DEFLATE_MAX_EXTRA_OFFSET_BITS)
+#define OFFSET_MAXFASTBITS (OFFSET_TABLEBITS /* no subtable needed */ + \
+ DEFLATE_MAX_EXTRA_OFFSET_BITS)
+
+/*
+ * Here is the format of our offset decode table entries. Bits not explicitly
+ * described contain zeroes:
+ *
+ * Offsets:
+ * Bit 31-16: offset base value
+ * Bit 15: 0 (!HUFFDEC_EXCEPTIONAL)
+ * Bit 14: 0 (!HUFFDEC_SUBTABLE_POINTER)
+ * Bit 11-8: remaining codeword length
+ * Bit 4-0: remaining codeword length + number of extra bits
+ * Subtable pointer:
+ * Bit 31-16: index of start of subtable
+ * Bit 15: 1 (HUFFDEC_EXCEPTIONAL)
+ * Bit 14: 1 (HUFFDEC_SUBTABLE_POINTER)
+ * Bit 11-8: number of subtable bits
+ * Bit 3-0: number of main table bits
+ *
+ * These work the same way as the length entries and subtable pointer entries in
+ * the litlen decode table; see litlen_decode_results[] above.
+ */
+static const u32 offset_decode_results[] = {
+#define ENTRY(offset_base, num_extra_bits) \
+ (((u32)(offset_base) << 16) | (num_extra_bits))
+ ENTRY(1 , 0) , ENTRY(2 , 0) , ENTRY(3 , 0) , ENTRY(4 , 0) ,
+ ENTRY(5 , 1) , ENTRY(7 , 1) , ENTRY(9 , 2) , ENTRY(13 , 2) ,
+ ENTRY(17 , 3) , ENTRY(25 , 3) , ENTRY(33 , 4) , ENTRY(49 , 4) ,
+ ENTRY(65 , 5) , ENTRY(97 , 5) , ENTRY(129 , 6) , ENTRY(193 , 6) ,
+ ENTRY(257 , 7) , ENTRY(385 , 7) , ENTRY(513 , 8) , ENTRY(769 , 8) ,
+ ENTRY(1025 , 9) , ENTRY(1537 , 9) , ENTRY(2049 , 10) , ENTRY(3073 , 10) ,
+ ENTRY(4097 , 11) , ENTRY(6145 , 11) , ENTRY(8193 , 12) , ENTRY(12289 , 12) ,
+ ENTRY(16385 , 13) , ENTRY(24577 , 13) , ENTRY(24577 , 13) , ENTRY(24577 , 13) ,
+#undef ENTRY
+};
+
+/*
+ * The main DEFLATE decompressor structure. Since libdeflate only supports
+ * full-buffer decompression, this structure doesn't store the entire
+ * decompression state, most of which is in stack variables. Instead, this
+ * struct just contains the decode tables and some temporary arrays used for
+ * building them, as these are too large to comfortably allocate on the stack.
+ *
+ * Storing the decode tables in the decompressor struct also allows the decode
+ * tables for the static codes to be reused whenever two static Huffman blocks
+ * are decoded without an intervening dynamic block, even across streams.
+ */
+struct libdeflate_decompressor {
+
+ /*
+ * The arrays aren't all needed at the same time. 'precode_lens' and
+ * 'precode_decode_table' are unneeded after 'lens' has been filled.
+ * Furthermore, 'lens' need not be retained after building the litlen
+ * and offset decode tables. In fact, 'lens' can be in union with
+ * 'litlen_decode_table' provided that 'offset_decode_table' is separate
+ * and is built first.
+ */
+
+ union {
+ u8 precode_lens[DEFLATE_NUM_PRECODE_SYMS];
+
+ struct {
+ u8 lens[DEFLATE_NUM_LITLEN_SYMS +
+ DEFLATE_NUM_OFFSET_SYMS +
+ DEFLATE_MAX_LENS_OVERRUN];
+
+ u32 precode_decode_table[PRECODE_ENOUGH];
+ } l;
+
+ u32 litlen_decode_table[LITLEN_ENOUGH];
+ } u;
+
+ u32 offset_decode_table[OFFSET_ENOUGH];
+
+ /* used only during build_decode_table() */
+ u16 sorted_syms[DEFLATE_MAX_NUM_SYMS];
+
+ bool static_codes_loaded;
+ unsigned litlen_tablebits;
+};
+
+/*
+ * Build a table for fast decoding of symbols from a Huffman code. As input,
+ * this function takes the codeword length of each symbol which may be used in
+ * the code. As output, it produces a decode table for the canonical Huffman
+ * code described by the codeword lengths. The decode table is built with the
+ * assumption that it will be indexed with "bit-reversed" codewords, where the
+ * low-order bit is the first bit of the codeword. This format is used for all
+ * Huffman codes in DEFLATE.
+ *
+ * @decode_table
+ * The array in which the decode table will be generated. This array must
+ * have sufficient length; see the definition of the ENOUGH numbers.
+ * @lens
+ * An array which provides, for each symbol, the length of the
+ * corresponding codeword in bits, or 0 if the symbol is unused. This may
+ * alias @decode_table, since nothing is written to @decode_table until all
+ * @lens have been consumed. All codeword lengths are assumed to be <=
+ * @max_codeword_len but are otherwise considered untrusted. If they do
+ * not form a valid Huffman code, then the decode table is not built and
+ * %false is returned.
+ * @num_syms
+ * The number of symbols in the code, including all unused symbols.
+ * @decode_results
+ * An array which gives the incomplete decode result for each symbol. The
+ * needed values in this array will be combined with codeword lengths to
+ * make the final decode table entries using make_decode_table_entry().
+ * @table_bits
+ * The log base-2 of the number of main table entries to use.
+ * If @table_bits_ret != NULL, then @table_bits is treated as a maximum
+ * value and it will be decreased if a smaller table would be sufficient.
+ * @max_codeword_len
+ * The maximum allowed codeword length for this Huffman code.
+ * Must be <= DEFLATE_MAX_CODEWORD_LEN.
+ * @sorted_syms
+ * A temporary array of length @num_syms.
+ * @table_bits_ret
+ * If non-NULL, then the dynamic table_bits is enabled, and the actual
+ * table_bits value will be returned here.
+ *
+ * Returns %true if successful; %false if the codeword lengths do not form a
+ * valid Huffman code.
+ */
+static bool
+build_decode_table(u32 decode_table[],
+ const u8 lens[],
+ const unsigned num_syms,
+ const u32 decode_results[],
+ unsigned table_bits,
+ unsigned max_codeword_len,
+ u16 *sorted_syms,
+ unsigned *table_bits_ret)
+{
+ unsigned len_counts[DEFLATE_MAX_CODEWORD_LEN + 1];
+ unsigned offsets[DEFLATE_MAX_CODEWORD_LEN + 1];
+ unsigned sym; /* current symbol */
+ unsigned codeword; /* current codeword, bit-reversed */
+ unsigned len; /* current codeword length in bits */
+ unsigned count; /* num codewords remaining with this length */
+ u32 codespace_used; /* codespace used out of '2^max_codeword_len' */
+ unsigned cur_table_end; /* end index of current table */
+ unsigned subtable_prefix; /* codeword prefix of current subtable */
+ unsigned subtable_start; /* start index of current subtable */
+ unsigned subtable_bits; /* log2 of current subtable length */
+
+ /* Count how many codewords have each length, including 0. */
+ for (len = 0; len <= max_codeword_len; len++)
+ len_counts[len] = 0;
+ for (sym = 0; sym < num_syms; sym++)
+ len_counts[lens[sym]]++;
+
+ /*
+ * Determine the actual maximum codeword length that was used, and
+ * decrease table_bits to it if allowed.
+ */
+ while (max_codeword_len > 1 && len_counts[max_codeword_len] == 0)
+ max_codeword_len--;
+ if (table_bits_ret != NULL) {
+ table_bits = MIN(table_bits, max_codeword_len);
+ *table_bits_ret = table_bits;
+ }
+
+ /*
+ * Sort the symbols primarily by increasing codeword length and
+ * secondarily by increasing symbol value; or equivalently by their
+ * codewords in lexicographic order, since a canonical code is assumed.
+ *
+ * For efficiency, also compute 'codespace_used' in the same pass over
+ * 'len_counts[]' used to build 'offsets[]' for sorting.
+ */
+
+ /* Ensure that 'codespace_used' cannot overflow. */
+ STATIC_ASSERT(sizeof(codespace_used) == 4);
+ STATIC_ASSERT(UINT32_MAX / (1U << (DEFLATE_MAX_CODEWORD_LEN - 1)) >=
+ DEFLATE_MAX_NUM_SYMS);
+
+ offsets[0] = 0;
+ offsets[1] = len_counts[0];
+ codespace_used = 0;
+ for (len = 1; len < max_codeword_len; len++) {
+ offsets[len + 1] = offsets[len] + len_counts[len];
+ codespace_used = (codespace_used << 1) + len_counts[len];
+ }
+ codespace_used = (codespace_used << 1) + len_counts[len];
+
+ for (sym = 0; sym < num_syms; sym++)
+ sorted_syms[offsets[lens[sym]]++] = sym;
+
+ sorted_syms += offsets[0]; /* Skip unused symbols */
+
+ /* lens[] is done being used, so we can write to decode_table[] now. */
+
+ /*
+ * Check whether the lengths form a complete code (exactly fills the
+ * codespace), an incomplete code (doesn't fill the codespace), or an
+ * overfull code (overflows the codespace). A codeword of length 'n'
+ * uses proportion '1/(2^n)' of the codespace. An overfull code is
+ * nonsensical, so is considered invalid. An incomplete code is
+ * considered valid only in two specific cases; see below.
+ */
+
+ /* overfull code? */
+ if (unlikely(codespace_used > (1U << max_codeword_len)))
+ return false;
+
+ /* incomplete code? */
+ if (unlikely(codespace_used < (1U << max_codeword_len))) {
+ u32 entry;
+ unsigned i;
+
+ if (codespace_used == 0) {
+ /*
+ * An empty code is allowed. This can happen for the
+ * offset code in DEFLATE, since a dynamic Huffman block
+ * need not contain any matches.
+ */
+
+ /* sym=0, len=1 (arbitrary) */
+ entry = make_decode_table_entry(decode_results, 0, 1);
+ } else {
+ /*
+ * Allow codes with a single used symbol, with codeword
+ * length 1. The DEFLATE RFC is unclear regarding this
+ * case. What zlib's decompressor does is permit this
+ * for the litlen and offset codes and assume the
+ * codeword is '0' rather than '1'. We do the same
+ * except we allow this for precodes too, since there's
+ * no convincing reason to treat the codes differently.
+ * We also assign both codewords '0' and '1' to the
+ * symbol to avoid having to handle '1' specially.
+ */
+ if (codespace_used != (1U << (max_codeword_len - 1)) ||
+ len_counts[1] != 1)
+ return false;
+ entry = make_decode_table_entry(decode_results,
+ *sorted_syms, 1);
+ }
+ /*
+ * Note: the decode table still must be fully initialized, in
+ * case the stream is malformed and contains bits from the part
+ * of the codespace the incomplete code doesn't use.
+ */
+ for (i = 0; i < (1U << table_bits); i++)
+ decode_table[i] = entry;
+ return true;
+ }
+
+ /*
+ * The lengths form a complete code. Now, enumerate the codewords in
+ * lexicographic order and fill the decode table entries for each one.
+ *
+ * First, process all codewords with len <= table_bits. Each one gets
+ * '2^(table_bits-len)' direct entries in the table.
+ *
+ * Since DEFLATE uses bit-reversed codewords, these entries aren't
+ * consecutive but rather are spaced '2^len' entries apart. This makes
+ * filling them naively somewhat awkward and inefficient, since strided
+ * stores are less cache-friendly and preclude the use of word or
+ * vector-at-a-time stores to fill multiple entries per instruction.
+ *
+ * To optimize this, we incrementally double the table size. When
+ * processing codewords with length 'len', the table is treated as
+ * having only '2^len' entries, so each codeword uses just one entry.
+ * Then, each time 'len' is incremented, the table size is doubled and
+ * the first half is copied to the second half. This significantly
+ * improves performance over naively doing strided stores.
+ *
+ * Note that some entries copied for each table doubling may not have
+ * been initialized yet, but it doesn't matter since they're guaranteed
+ * to be initialized later (because the Huffman code is complete).
+ */
+ codeword = 0;
+ len = 1;
+ while ((count = len_counts[len]) == 0)
+ len++;
+ cur_table_end = 1U << len;
+ while (len <= table_bits) {
+ /* Process all 'count' codewords with length 'len' bits. */
+ do {
+ unsigned bit;
+
+ /* Fill the first entry for the current codeword. */
+ decode_table[codeword] =
+ make_decode_table_entry(decode_results,
+ *sorted_syms++, len);
+
+ if (codeword == cur_table_end - 1) {
+ /* Last codeword (all 1's) */
+ for (; len < table_bits; len++) {
+ memcpy(&decode_table[cur_table_end],
+ decode_table,
+ cur_table_end *
+ sizeof(decode_table[0]));
+ cur_table_end <<= 1;
+ }
+ return true;
+ }
+ /*
+ * To advance to the lexicographically next codeword in
+ * the canonical code, the codeword must be incremented,
+ * then 0's must be appended to the codeword as needed
+ * to match the next codeword's length.
+ *
+ * Since the codeword is bit-reversed, appending 0's is
+ * a no-op. However, incrementing it is nontrivial. To
+ * do so efficiently, use the 'bsr' instruction to find
+ * the last (highest order) 0 bit in the codeword, set
+ * it, and clear any later (higher order) 1 bits. But
+ * 'bsr' actually finds the highest order 1 bit, so to
+ * use it first flip all bits in the codeword by XOR'ing
+ * it with (1U << len) - 1 == cur_table_end - 1.
+ */
+ bit = 1U << bsr32(codeword ^ (cur_table_end - 1));
+ codeword &= bit - 1;
+ codeword |= bit;
+ } while (--count);
+
+ /* Advance to the next codeword length. */
+ do {
+ if (++len <= table_bits) {
+ memcpy(&decode_table[cur_table_end],
+ decode_table,
+ cur_table_end * sizeof(decode_table[0]));
+ cur_table_end <<= 1;
+ }
+ } while ((count = len_counts[len]) == 0);
+ }
+
+ /* Process codewords with len > table_bits. These require subtables. */
+ cur_table_end = 1U << table_bits;
+ subtable_prefix = -1;
+ subtable_start = 0;
+ for (;;) {
+ u32 entry;
+ unsigned i;
+ unsigned stride;
+ unsigned bit;
+
+ /*
+ * Start a new subtable if the first 'table_bits' bits of the
+ * codeword don't match the prefix of the current subtable.
+ */
+ if ((codeword & ((1U << table_bits) - 1)) != subtable_prefix) {
+ subtable_prefix = (codeword & ((1U << table_bits) - 1));
+ subtable_start = cur_table_end;
+ /*
+ * Calculate the subtable length. If the codeword has
+ * length 'table_bits + n', then the subtable needs
+ * '2^n' entries. But it may need more; if fewer than
+ * '2^n' codewords of length 'table_bits + n' remain,
+ * then the length will need to be incremented to bring
+ * in longer codewords until the subtable can be
+ * completely filled. Note that because the Huffman
+ * code is complete, it will always be possible to fill
+ * the subtable eventually.
+ */
+ subtable_bits = len - table_bits;
+ codespace_used = count;
+ while (codespace_used < (1U << subtable_bits)) {
+ subtable_bits++;
+ codespace_used = (codespace_used << 1) +
+ len_counts[table_bits + subtable_bits];
+ }
+ cur_table_end = subtable_start + (1U << subtable_bits);
+
+ /*
+ * Create the entry that points from the main table to
+ * the subtable.
+ */
+ decode_table[subtable_prefix] =
+ ((u32)subtable_start << 16) |
+ HUFFDEC_EXCEPTIONAL |
+ HUFFDEC_SUBTABLE_POINTER |
+ (subtable_bits << 8) | table_bits;
+ }
+
+ /* Fill the subtable entries for the current codeword. */
+ entry = make_decode_table_entry(decode_results, *sorted_syms++,
+ len - table_bits);
+ i = subtable_start + (codeword >> table_bits);
+ stride = 1U << (len - table_bits);
+ do {
+ decode_table[i] = entry;
+ i += stride;
+ } while (i < cur_table_end);
+
+ /* Advance to the next codeword. */
+ if (codeword == (1U << len) - 1) /* last codeword (all 1's)? */
+ return true;
+ bit = 1U << bsr32(codeword ^ ((1U << len) - 1));
+ codeword &= bit - 1;
+ codeword |= bit;
+ count--;
+ while (count == 0)
+ count = len_counts[++len];
+ }
+}
+
+/* Build the decode table for the precode. */
+static bool
+build_precode_decode_table(struct libdeflate_decompressor *d)
+{
+ /* When you change TABLEBITS, you must change ENOUGH, and vice versa! */
+ STATIC_ASSERT(PRECODE_TABLEBITS == 7 && PRECODE_ENOUGH == 128);
+
+ STATIC_ASSERT(ARRAY_LEN(precode_decode_results) ==
+ DEFLATE_NUM_PRECODE_SYMS);
+
+ return build_decode_table(d->u.l.precode_decode_table,
+ d->u.precode_lens,
+ DEFLATE_NUM_PRECODE_SYMS,
+ precode_decode_results,
+ PRECODE_TABLEBITS,
+ DEFLATE_MAX_PRE_CODEWORD_LEN,
+ d->sorted_syms,
+ NULL);
+}
+
+/* Build the decode table for the literal/length code. */
+static bool
+build_litlen_decode_table(struct libdeflate_decompressor *d,
+ unsigned num_litlen_syms, unsigned num_offset_syms)
+{
+ /* When you change TABLEBITS, you must change ENOUGH, and vice versa! */
+ STATIC_ASSERT(LITLEN_TABLEBITS == 11 && LITLEN_ENOUGH == 2342);
+
+ STATIC_ASSERT(ARRAY_LEN(litlen_decode_results) ==
+ DEFLATE_NUM_LITLEN_SYMS);
+
+ return build_decode_table(d->u.litlen_decode_table,
+ d->u.l.lens,
+ num_litlen_syms,
+ litlen_decode_results,
+ LITLEN_TABLEBITS,
+ DEFLATE_MAX_LITLEN_CODEWORD_LEN,
+ d->sorted_syms,
+ &d->litlen_tablebits);
+}
+
+/* Build the decode table for the offset code. */
+static bool
+build_offset_decode_table(struct libdeflate_decompressor *d,
+ unsigned num_litlen_syms, unsigned num_offset_syms)
+{
+ /* When you change TABLEBITS, you must change ENOUGH, and vice versa! */
+ STATIC_ASSERT(OFFSET_TABLEBITS == 8 && OFFSET_ENOUGH == 402);
+
+ STATIC_ASSERT(ARRAY_LEN(offset_decode_results) ==
+ DEFLATE_NUM_OFFSET_SYMS);
+
+ return build_decode_table(d->offset_decode_table,
+ d->u.l.lens + num_litlen_syms,
+ num_offset_syms,
+ offset_decode_results,
+ OFFSET_TABLEBITS,
+ DEFLATE_MAX_OFFSET_CODEWORD_LEN,
+ d->sorted_syms,
+ NULL);
+}
+
+/*****************************************************************************
+ * Main decompression routine
+ *****************************************************************************/
+
+typedef enum libdeflate_result (*decompress_func_t)
+ (struct libdeflate_decompressor * restrict d,
+ const void * restrict in, size_t in_nbytes,
+ void * restrict out, size_t out_nbytes_avail,
+ size_t *actual_in_nbytes_ret, size_t *actual_out_nbytes_ret);
+
+#define FUNCNAME deflate_decompress_default
+#undef ATTRIBUTES
+#undef EXTRACT_VARBITS
+#undef EXTRACT_VARBITS8
+#include "decompress_template.h"
+
+/* Include architecture-specific implementation(s) if available. */
+#undef DEFAULT_IMPL
+#undef arch_select_decompress_func
+#if defined(ARCH_X86_32) || defined(ARCH_X86_64)
+# include "x86/decompress_impl.h"
+#endif
+
+#ifndef DEFAULT_IMPL
+# define DEFAULT_IMPL deflate_decompress_default
+#endif
+
+#ifdef arch_select_decompress_func
+static enum libdeflate_result
+dispatch_decomp(struct libdeflate_decompressor *d,
+ const void *in, size_t in_nbytes,
+ void *out, size_t out_nbytes_avail,
+ size_t *actual_in_nbytes_ret, size_t *actual_out_nbytes_ret);
+
+static volatile decompress_func_t decompress_impl = dispatch_decomp;
+
+/* Choose the best implementation at runtime. */
+static enum libdeflate_result
+dispatch_decomp(struct libdeflate_decompressor *d,
+ const void *in, size_t in_nbytes,
+ void *out, size_t out_nbytes_avail,
+ size_t *actual_in_nbytes_ret, size_t *actual_out_nbytes_ret)
+{
+ decompress_func_t f = arch_select_decompress_func();
+
+ if (f == NULL)
+ f = DEFAULT_IMPL;
+
+ decompress_impl = f;
+ return f(d, in, in_nbytes, out, out_nbytes_avail,
+ actual_in_nbytes_ret, actual_out_nbytes_ret);
+}
+#else
+/* The best implementation is statically known, so call it directly. */
+# define decompress_impl DEFAULT_IMPL
+#endif
+
+/*
+ * This is the main DEFLATE decompression routine. See libdeflate.h for the
+ * documentation.
+ *
+ * Note that the real code is in decompress_template.h. The part here just
+ * handles calling the appropriate implementation depending on the CPU features
+ * at runtime.
+ */
+LIBDEFLATEAPI enum libdeflate_result
+libdeflate_deflate_decompress_ex(struct libdeflate_decompressor *d,
+ const void *in, size_t in_nbytes,
+ void *out, size_t out_nbytes_avail,
+ size_t *actual_in_nbytes_ret,
+ size_t *actual_out_nbytes_ret)
+{
+ return decompress_impl(d, in, in_nbytes, out, out_nbytes_avail,
+ actual_in_nbytes_ret, actual_out_nbytes_ret);
+}
+
+LIBDEFLATEAPI enum libdeflate_result
+libdeflate_deflate_decompress(struct libdeflate_decompressor *d,
+ const void *in, size_t in_nbytes,
+ void *out, size_t out_nbytes_avail,
+ size_t *actual_out_nbytes_ret)
+{
+ return libdeflate_deflate_decompress_ex(d, in, in_nbytes,
+ out, out_nbytes_avail,
+ NULL, actual_out_nbytes_ret);
+}
+
+LIBDEFLATEAPI struct libdeflate_decompressor *
+libdeflate_alloc_decompressor(void)
+{
+ /*
+ * Note that only certain parts of the decompressor actually must be
+ * initialized here:
+ *
+ * - 'static_codes_loaded' must be initialized to false.
+ *
+ * - The first half of the main portion of each decode table must be
+ * initialized to any value, to avoid reading from uninitialized
+ * memory during table expansion in build_decode_table(). (Although,
+ * this is really just to avoid warnings with dynamic tools like
+ * valgrind, since build_decode_table() is guaranteed to initialize
+ * all entries eventually anyway.)
+ *
+ * But for simplicity, we currently just zero the whole decompressor.
+ */
+ struct libdeflate_decompressor *d = libdeflate_malloc(sizeof(*d));
+
+ if (d == NULL)
+ return NULL;
+ memset(d, 0, sizeof(*d));
+ return d;
+}
+
+LIBDEFLATEAPI void
+libdeflate_free_decompressor(struct libdeflate_decompressor *d)
+{
+ libdeflate_free(d);
+}
diff --git a/tools/z64compress/src/enc/libdeflate/lib/gzip_compress.c b/tools/z64compress/src/enc/libdeflate/lib/gzip_compress.c
new file mode 100644
index 000000000..e343e5068
--- /dev/null
+++ b/tools/z64compress/src/enc/libdeflate/lib/gzip_compress.c
@@ -0,0 +1,92 @@
+/*
+ * gzip_compress.c - compress with a gzip wrapper
+ *
+ * Copyright 2016 Eric Biggers
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "deflate_compress.h"
+#include "gzip_constants.h"
+
+#include "libdeflate.h"
+
+LIBDEFLATEAPI size_t
+libdeflate_gzip_compress(struct libdeflate_compressor *c,
+ const void *in, size_t in_nbytes,
+ void *out, size_t out_nbytes_avail)
+{
+ u8 *out_next = out;
+ unsigned compression_level;
+ u8 xfl;
+ size_t deflate_size;
+
+ if (out_nbytes_avail <= GZIP_MIN_OVERHEAD)
+ return 0;
+
+ /* ID1 */
+ *out_next++ = GZIP_ID1;
+ /* ID2 */
+ *out_next++ = GZIP_ID2;
+ /* CM */
+ *out_next++ = GZIP_CM_DEFLATE;
+ /* FLG */
+ *out_next++ = 0;
+ /* MTIME */
+ put_unaligned_le32(GZIP_MTIME_UNAVAILABLE, out_next);
+ out_next += 4;
+ /* XFL */
+ xfl = 0;
+ compression_level = libdeflate_get_compression_level(c);
+ if (compression_level < 2)
+ xfl |= GZIP_XFL_FASTEST_COMPRESSION;
+ else if (compression_level >= 8)
+ xfl |= GZIP_XFL_SLOWEST_COMPRESSION;
+ *out_next++ = xfl;
+ /* OS */
+ *out_next++ = GZIP_OS_UNKNOWN; /* OS */
+
+ /* Compressed data */
+ deflate_size = libdeflate_deflate_compress(c, in, in_nbytes, out_next,
+ out_nbytes_avail - GZIP_MIN_OVERHEAD);
+ if (deflate_size == 0)
+ return 0;
+ out_next += deflate_size;
+
+ /* CRC32 */
+ put_unaligned_le32(libdeflate_crc32(0, in, in_nbytes), out_next);
+ out_next += 4;
+
+ /* ISIZE */
+ put_unaligned_le32((u32)in_nbytes, out_next);
+ out_next += 4;
+
+ return out_next - (u8 *)out;
+}
+
+LIBDEFLATEAPI size_t
+libdeflate_gzip_compress_bound(struct libdeflate_compressor *c,
+ size_t in_nbytes)
+{
+ return GZIP_MIN_OVERHEAD +
+ libdeflate_deflate_compress_bound(c, in_nbytes);
+}
diff --git a/tools/z64compress/src/enc/libdeflate/lib/gzip_constants.h b/tools/z64compress/src/enc/libdeflate/lib/gzip_constants.h
new file mode 100644
index 000000000..35e4728d8
--- /dev/null
+++ b/tools/z64compress/src/enc/libdeflate/lib/gzip_constants.h
@@ -0,0 +1,45 @@
+/*
+ * gzip_constants.h - constants for the gzip wrapper format
+ */
+
+#ifndef LIB_GZIP_CONSTANTS_H
+#define LIB_GZIP_CONSTANTS_H
+
+#define GZIP_MIN_HEADER_SIZE 10
+#define GZIP_FOOTER_SIZE 8
+#define GZIP_MIN_OVERHEAD (GZIP_MIN_HEADER_SIZE + GZIP_FOOTER_SIZE)
+
+#define GZIP_ID1 0x1F
+#define GZIP_ID2 0x8B
+
+#define GZIP_CM_DEFLATE 8
+
+#define GZIP_FTEXT 0x01
+#define GZIP_FHCRC 0x02
+#define GZIP_FEXTRA 0x04
+#define GZIP_FNAME 0x08
+#define GZIP_FCOMMENT 0x10
+#define GZIP_FRESERVED 0xE0
+
+#define GZIP_MTIME_UNAVAILABLE 0
+
+#define GZIP_XFL_SLOWEST_COMPRESSION 0x02
+#define GZIP_XFL_FASTEST_COMPRESSION 0x04
+
+#define GZIP_OS_FAT 0
+#define GZIP_OS_AMIGA 1
+#define GZIP_OS_VMS 2
+#define GZIP_OS_UNIX 3
+#define GZIP_OS_VM_CMS 4
+#define GZIP_OS_ATARI_TOS 5
+#define GZIP_OS_HPFS 6
+#define GZIP_OS_MACINTOSH 7
+#define GZIP_OS_Z_SYSTEM 8
+#define GZIP_OS_CP_M 9
+#define GZIP_OS_TOPS_20 10
+#define GZIP_OS_NTFS 11
+#define GZIP_OS_QDOS 12
+#define GZIP_OS_RISCOS 13
+#define GZIP_OS_UNKNOWN 255
+
+#endif /* LIB_GZIP_CONSTANTS_H */
diff --git a/tools/z64compress/src/enc/libdeflate/lib/gzip_decompress.c b/tools/z64compress/src/enc/libdeflate/lib/gzip_decompress.c
new file mode 100644
index 000000000..9518e7047
--- /dev/null
+++ b/tools/z64compress/src/enc/libdeflate/lib/gzip_decompress.c
@@ -0,0 +1,146 @@
+/*
+ * gzip_decompress.c - decompress with a gzip wrapper
+ *
+ * Copyright 2016 Eric Biggers
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "lib_common.h"
+#include "gzip_constants.h"
+
+#include "libdeflate.h"
+
+LIBDEFLATEAPI enum libdeflate_result
+libdeflate_gzip_decompress_ex(struct libdeflate_decompressor *d,
+ const void *in, size_t in_nbytes,
+ void *out, size_t out_nbytes_avail,
+ size_t *actual_in_nbytes_ret,
+ size_t *actual_out_nbytes_ret)
+{
+ const u8 *in_next = in;
+ const u8 * const in_end = in_next + in_nbytes;
+ u8 flg;
+ size_t actual_in_nbytes;
+ size_t actual_out_nbytes;
+ enum libdeflate_result result;
+
+ if (in_nbytes < GZIP_MIN_OVERHEAD)
+ return LIBDEFLATE_BAD_DATA;
+
+ /* ID1 */
+ if (*in_next++ != GZIP_ID1)
+ return LIBDEFLATE_BAD_DATA;
+ /* ID2 */
+ if (*in_next++ != GZIP_ID2)
+ return LIBDEFLATE_BAD_DATA;
+ /* CM */
+ if (*in_next++ != GZIP_CM_DEFLATE)
+ return LIBDEFLATE_BAD_DATA;
+ flg = *in_next++;
+ /* MTIME */
+ in_next += 4;
+ /* XFL */
+ in_next += 1;
+ /* OS */
+ in_next += 1;
+
+ if (flg & GZIP_FRESERVED)
+ return LIBDEFLATE_BAD_DATA;
+
+ /* Extra field */
+ if (flg & GZIP_FEXTRA) {
+ u16 xlen = get_unaligned_le16(in_next);
+ in_next += 2;
+
+ if (in_end - in_next < (u32)xlen + GZIP_FOOTER_SIZE)
+ return LIBDEFLATE_BAD_DATA;
+
+ in_next += xlen;
+ }
+
+ /* Original file name (zero terminated) */
+ if (flg & GZIP_FNAME) {
+ while (*in_next++ != 0 && in_next != in_end)
+ ;
+ if (in_end - in_next < GZIP_FOOTER_SIZE)
+ return LIBDEFLATE_BAD_DATA;
+ }
+
+ /* File comment (zero terminated) */
+ if (flg & GZIP_FCOMMENT) {
+ while (*in_next++ != 0 && in_next != in_end)
+ ;
+ if (in_end - in_next < GZIP_FOOTER_SIZE)
+ return LIBDEFLATE_BAD_DATA;
+ }
+
+ /* CRC16 for gzip header */
+ if (flg & GZIP_FHCRC) {
+ in_next += 2;
+ if (in_end - in_next < GZIP_FOOTER_SIZE)
+ return LIBDEFLATE_BAD_DATA;
+ }
+
+ /* Compressed data */
+ result = libdeflate_deflate_decompress_ex(d, in_next,
+ in_end - GZIP_FOOTER_SIZE - in_next,
+ out, out_nbytes_avail,
+ &actual_in_nbytes,
+ actual_out_nbytes_ret);
+ if (result != LIBDEFLATE_SUCCESS)
+ return result;
+
+ if (actual_out_nbytes_ret)
+ actual_out_nbytes = *actual_out_nbytes_ret;
+ else
+ actual_out_nbytes = out_nbytes_avail;
+
+ in_next += actual_in_nbytes;
+
+ /* CRC32 */
+ if (libdeflate_crc32(0, out, actual_out_nbytes) !=
+ get_unaligned_le32(in_next))
+ return LIBDEFLATE_BAD_DATA;
+ in_next += 4;
+
+ /* ISIZE */
+ if ((u32)actual_out_nbytes != get_unaligned_le32(in_next))
+ return LIBDEFLATE_BAD_DATA;
+ in_next += 4;
+
+ if (actual_in_nbytes_ret)
+ *actual_in_nbytes_ret = in_next - (u8 *)in;
+
+ return LIBDEFLATE_SUCCESS;
+}
+
+LIBDEFLATEAPI enum libdeflate_result
+libdeflate_gzip_decompress(struct libdeflate_decompressor *d,
+ const void *in, size_t in_nbytes,
+ void *out, size_t out_nbytes_avail,
+ size_t *actual_out_nbytes_ret)
+{
+ return libdeflate_gzip_decompress_ex(d, in, in_nbytes,
+ out, out_nbytes_avail,
+ NULL, actual_out_nbytes_ret);
+}
diff --git a/tools/z64compress/src/enc/libdeflate/lib/hc_matchfinder.h b/tools/z64compress/src/enc/libdeflate/lib/hc_matchfinder.h
new file mode 100644
index 000000000..a0cddfca1
--- /dev/null
+++ b/tools/z64compress/src/enc/libdeflate/lib/hc_matchfinder.h
@@ -0,0 +1,401 @@
+/*
+ * hc_matchfinder.h - Lempel-Ziv matchfinding with a hash table of linked lists
+ *
+ * Copyright 2016 Eric Biggers
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ---------------------------------------------------------------------------
+ *
+ * Algorithm
+ *
+ * This is a Hash Chains (hc) based matchfinder.
+ *
+ * The main data structure is a hash table where each hash bucket contains a
+ * linked list (or "chain") of sequences whose first 4 bytes share the same hash
+ * code. Each sequence is identified by its starting position in the input
+ * buffer.
+ *
+ * The algorithm processes the input buffer sequentially. At each byte
+ * position, the hash code of the first 4 bytes of the sequence beginning at
+ * that position (the sequence being matched against) is computed. This
+ * identifies the hash bucket to use for that position. Then, this hash
+ * bucket's linked list is searched for matches. Then, a new linked list node
+ * is created to represent the current sequence and is prepended to the list.
+ *
+ * This algorithm has several useful properties:
+ *
+ * - It only finds true Lempel-Ziv matches; i.e., those where the matching
+ * sequence occurs prior to the sequence being matched against.
+ *
+ * - The sequences in each linked list are always sorted by decreasing starting
+ * position. Therefore, the closest (smallest offset) matches are found
+ * first, which in many compression formats tend to be the cheapest to encode.
+ *
+ * - Although fast running time is not guaranteed due to the possibility of the
+ * lists getting very long, the worst degenerate behavior can be easily
+ * prevented by capping the number of nodes searched at each position.
+ *
+ * - If the compressor decides not to search for matches at a certain position,
+ * then that position can be quickly inserted without searching the list.
+ *
+ * - The algorithm is adaptable to sliding windows: just store the positions
+ * relative to a "base" value that is updated from time to time, and stop
+ * searching each list when the sequences get too far away.
+ *
+ * ----------------------------------------------------------------------------
+ *
+ * Optimizations
+ *
+ * The main hash table and chains handle length 4+ matches. Length 3 matches
+ * are handled by a separate hash table with no chains. This works well for
+ * typical "greedy" or "lazy"-style compressors, where length 3 matches are
+ * often only helpful if they have small offsets. Instead of searching a full
+ * chain for length 3+ matches, the algorithm just checks for one close length 3
+ * match, then focuses on finding length 4+ matches.
+ *
+ * The longest_match() and skip_bytes() functions are inlined into the
+ * compressors that use them. This isn't just about saving the overhead of a
+ * function call. These functions are intended to be called from the inner
+ * loops of compressors, where giving the compiler more control over register
+ * allocation is very helpful. There is also significant benefit to be gained
+ * from allowing the CPU to predict branches independently at each call site.
+ * For example, "lazy"-style compressors can be written with two calls to
+ * longest_match(), each of which starts with a different 'best_len' and
+ * therefore has significantly different performance characteristics.
+ *
+ * Although any hash function can be used, a multiplicative hash is fast and
+ * works well.
+ *
+ * On some processors, it is significantly faster to extend matches by whole
+ * words (32 or 64 bits) instead of by individual bytes. For this to be the
+ * case, the processor must implement unaligned memory accesses efficiently and
+ * must have either a fast "find first set bit" instruction or a fast "find last
+ * set bit" instruction, depending on the processor's endianness.
+ *
+ * The code uses one loop for finding the first match and one loop for finding a
+ * longer match. Each of these loops is tuned for its respective task and in
+ * combination are faster than a single generalized loop that handles both
+ * tasks.
+ *
+ * The code also uses a tight inner loop that only compares the last and first
+ * bytes of a potential match. It is only when these bytes match that a full
+ * match extension is attempted.
+ *
+ * ----------------------------------------------------------------------------
+ */
+
+#ifndef LIB_HC_MATCHFINDER_H
+#define LIB_HC_MATCHFINDER_H
+
+#include "matchfinder_common.h"
+
+#define HC_MATCHFINDER_HASH3_ORDER 15
+#define HC_MATCHFINDER_HASH4_ORDER 16
+
+#define HC_MATCHFINDER_TOTAL_HASH_SIZE \
+ (((1UL << HC_MATCHFINDER_HASH3_ORDER) + \
+ (1UL << HC_MATCHFINDER_HASH4_ORDER)) * sizeof(mf_pos_t))
+
+struct MATCHFINDER_ALIGNED hc_matchfinder {
+
+ /* The hash table for finding length 3 matches */
+ mf_pos_t hash3_tab[1UL << HC_MATCHFINDER_HASH3_ORDER];
+
+ /* The hash table which contains the first nodes of the linked lists for
+ * finding length 4+ matches */
+ mf_pos_t hash4_tab[1UL << HC_MATCHFINDER_HASH4_ORDER];
+
+ /* The "next node" references for the linked lists. The "next node" of
+ * the node for the sequence with position 'pos' is 'next_tab[pos]'. */
+ mf_pos_t next_tab[MATCHFINDER_WINDOW_SIZE];
+};
+
+/* Prepare the matchfinder for a new input buffer. */
+static forceinline void
+hc_matchfinder_init(struct hc_matchfinder *mf)
+{
+ STATIC_ASSERT(HC_MATCHFINDER_TOTAL_HASH_SIZE %
+ MATCHFINDER_SIZE_ALIGNMENT == 0);
+
+ matchfinder_init((mf_pos_t *)mf, HC_MATCHFINDER_TOTAL_HASH_SIZE);
+}
+
+static forceinline void
+hc_matchfinder_slide_window(struct hc_matchfinder *mf)
+{
+ STATIC_ASSERT(sizeof(*mf) % MATCHFINDER_SIZE_ALIGNMENT == 0);
+
+ matchfinder_rebase((mf_pos_t *)mf, sizeof(*mf));
+}
+
+/*
+ * Find the longest match longer than 'best_len' bytes.
+ *
+ * @mf
+ * The matchfinder structure.
+ * @in_base_p
+ * Location of a pointer which points to the place in the input data the
+ * matchfinder currently stores positions relative to. This may be updated
+ * by this function.
+ * @in_next
+ * Pointer to the next position in the input buffer, i.e. the sequence
+ * being matched against.
+ * @best_len
+ * Require a match longer than this length.
+ * @max_len
+ * The maximum permissible match length at this position.
+ * @nice_len
+ * Stop searching if a match of at least this length is found.
+ * Must be <= @max_len.
+ * @max_search_depth
+ * Limit on the number of potential matches to consider. Must be >= 1.
+ * @next_hashes
+ * The precomputed hash codes for the sequence beginning at @in_next.
+ * These will be used and then updated with the precomputed hashcodes for
+ * the sequence beginning at @in_next + 1.
+ * @offset_ret
+ * If a match is found, its offset is returned in this location.
+ *
+ * Return the length of the match found, or 'best_len' if no match longer than
+ * 'best_len' was found.
+ */
+static forceinline u32
+hc_matchfinder_longest_match(struct hc_matchfinder * const mf,
+ const u8 ** const in_base_p,
+ const u8 * const in_next,
+ u32 best_len,
+ const u32 max_len,
+ const u32 nice_len,
+ const u32 max_search_depth,
+ u32 * const next_hashes,
+ u32 * const offset_ret)
+{
+ u32 depth_remaining = max_search_depth;
+ const u8 *best_matchptr = in_next;
+ mf_pos_t cur_node3, cur_node4;
+ u32 hash3, hash4;
+ u32 next_hashseq;
+ u32 seq4;
+ const u8 *matchptr;
+ u32 len;
+ u32 cur_pos = in_next - *in_base_p;
+ const u8 *in_base;
+ mf_pos_t cutoff;
+
+ if (cur_pos == MATCHFINDER_WINDOW_SIZE) {
+ hc_matchfinder_slide_window(mf);
+ *in_base_p += MATCHFINDER_WINDOW_SIZE;
+ cur_pos = 0;
+ }
+
+ in_base = *in_base_p;
+ cutoff = cur_pos - MATCHFINDER_WINDOW_SIZE;
+
+ if (unlikely(max_len < 5)) /* can we read 4 bytes from 'in_next + 1'? */
+ goto out;
+
+ /* Get the precomputed hash codes. */
+ hash3 = next_hashes[0];
+ hash4 = next_hashes[1];
+
+ /* From the hash buckets, get the first node of each linked list. */
+ cur_node3 = mf->hash3_tab[hash3];
+ cur_node4 = mf->hash4_tab[hash4];
+
+ /* Update for length 3 matches. This replaces the singleton node in the
+ * 'hash3' bucket with the node for the current sequence. */
+ mf->hash3_tab[hash3] = cur_pos;
+
+ /* Update for length 4 matches. This prepends the node for the current
+ * sequence to the linked list in the 'hash4' bucket. */
+ mf->hash4_tab[hash4] = cur_pos;
+ mf->next_tab[cur_pos] = cur_node4;
+
+ /* Compute the next hash codes. */
+ next_hashseq = get_unaligned_le32(in_next + 1);
+ next_hashes[0] = lz_hash(next_hashseq & 0xFFFFFF, HC_MATCHFINDER_HASH3_ORDER);
+ next_hashes[1] = lz_hash(next_hashseq, HC_MATCHFINDER_HASH4_ORDER);
+ prefetchw(&mf->hash3_tab[next_hashes[0]]);
+ prefetchw(&mf->hash4_tab[next_hashes[1]]);
+
+ if (best_len < 4) { /* No match of length >= 4 found yet? */
+
+ /* Check for a length 3 match if needed. */
+
+ if (cur_node3 <= cutoff)
+ goto out;
+
+ seq4 = load_u32_unaligned(in_next);
+
+ if (best_len < 3) {
+ matchptr = &in_base[cur_node3];
+ if (load_u24_unaligned(matchptr) == loaded_u32_to_u24(seq4)) {
+ best_len = 3;
+ best_matchptr = matchptr;
+ }
+ }
+
+ /* Check for a length 4 match. */
+
+ if (cur_node4 <= cutoff)
+ goto out;
+
+ for (;;) {
+ /* No length 4 match found yet. Check the first 4 bytes. */
+ matchptr = &in_base[cur_node4];
+
+ if (load_u32_unaligned(matchptr) == seq4)
+ break;
+
+ /* The first 4 bytes did not match. Keep trying. */
+ cur_node4 = mf->next_tab[cur_node4 & (MATCHFINDER_WINDOW_SIZE - 1)];
+ if (cur_node4 <= cutoff || !--depth_remaining)
+ goto out;
+ }
+
+ /* Found a match of length >= 4. Extend it to its full length. */
+ best_matchptr = matchptr;
+ best_len = lz_extend(in_next, best_matchptr, 4, max_len);
+ if (best_len >= nice_len)
+ goto out;
+ cur_node4 = mf->next_tab[cur_node4 & (MATCHFINDER_WINDOW_SIZE - 1)];
+ if (cur_node4 <= cutoff || !--depth_remaining)
+ goto out;
+ } else {
+ if (cur_node4 <= cutoff || best_len >= nice_len)
+ goto out;
+ }
+
+ /* Check for matches of length >= 5. */
+
+ for (;;) {
+ for (;;) {
+ matchptr = &in_base[cur_node4];
+
+ /* Already found a length 4 match. Try for a longer
+ * match; start by checking either the last 4 bytes and
+ * the first 4 bytes, or the last byte. (The last byte,
+ * the one which would extend the match length by 1, is
+ * the most important.) */
+ #if UNALIGNED_ACCESS_IS_FAST
+ if ((load_u32_unaligned(matchptr + best_len - 3) ==
+ load_u32_unaligned(in_next + best_len - 3)) &&
+ (load_u32_unaligned(matchptr) ==
+ load_u32_unaligned(in_next)))
+ #else
+ if (matchptr[best_len] == in_next[best_len])
+ #endif
+ break;
+
+ /* Continue to the next node in the list. */
+ cur_node4 = mf->next_tab[cur_node4 & (MATCHFINDER_WINDOW_SIZE - 1)];
+ if (cur_node4 <= cutoff || !--depth_remaining)
+ goto out;
+ }
+
+ #if UNALIGNED_ACCESS_IS_FAST
+ len = 4;
+ #else
+ len = 0;
+ #endif
+ len = lz_extend(in_next, matchptr, len, max_len);
+ if (len > best_len) {
+ /* This is the new longest match. */
+ best_len = len;
+ best_matchptr = matchptr;
+ if (best_len >= nice_len)
+ goto out;
+ }
+
+ /* Continue to the next node in the list. */
+ cur_node4 = mf->next_tab[cur_node4 & (MATCHFINDER_WINDOW_SIZE - 1)];
+ if (cur_node4 <= cutoff || !--depth_remaining)
+ goto out;
+ }
+out:
+ *offset_ret = in_next - best_matchptr;
+ return best_len;
+}
+
+/*
+ * Advance the matchfinder, but don't search for matches.
+ *
+ * @mf
+ * The matchfinder structure.
+ * @in_base_p
+ * Location of a pointer which points to the place in the input data the
+ * matchfinder currently stores positions relative to. This may be updated
+ * by this function.
+ * @in_next
+ * Pointer to the next position in the input buffer.
+ * @in_end
+ * Pointer to the end of the input buffer.
+ * @count
+ * The number of bytes to advance. Must be > 0.
+ * @next_hashes
+ * The precomputed hash codes for the sequence beginning at @in_next.
+ * These will be used and then updated with the precomputed hashcodes for
+ * the sequence beginning at @in_next + @count.
+ */
+static forceinline void
+hc_matchfinder_skip_bytes(struct hc_matchfinder * const mf,
+ const u8 ** const in_base_p,
+ const u8 *in_next,
+ const u8 * const in_end,
+ const u32 count,
+ u32 * const next_hashes)
+{
+ u32 cur_pos;
+ u32 hash3, hash4;
+ u32 next_hashseq;
+ u32 remaining = count;
+
+ if (unlikely(count + 5 > in_end - in_next))
+ return;
+
+ cur_pos = in_next - *in_base_p;
+ hash3 = next_hashes[0];
+ hash4 = next_hashes[1];
+ do {
+ if (cur_pos == MATCHFINDER_WINDOW_SIZE) {
+ hc_matchfinder_slide_window(mf);
+ *in_base_p += MATCHFINDER_WINDOW_SIZE;
+ cur_pos = 0;
+ }
+ mf->hash3_tab[hash3] = cur_pos;
+ mf->next_tab[cur_pos] = mf->hash4_tab[hash4];
+ mf->hash4_tab[hash4] = cur_pos;
+
+ next_hashseq = get_unaligned_le32(++in_next);
+ hash3 = lz_hash(next_hashseq & 0xFFFFFF, HC_MATCHFINDER_HASH3_ORDER);
+ hash4 = lz_hash(next_hashseq, HC_MATCHFINDER_HASH4_ORDER);
+ cur_pos++;
+ } while (--remaining);
+
+ prefetchw(&mf->hash3_tab[hash3]);
+ prefetchw(&mf->hash4_tab[hash4]);
+ next_hashes[0] = hash3;
+ next_hashes[1] = hash4;
+}
+
+#endif /* LIB_HC_MATCHFINDER_H */
diff --git a/tools/z64compress/src/enc/libdeflate/lib/ht_matchfinder.h b/tools/z64compress/src/enc/libdeflate/lib/ht_matchfinder.h
new file mode 100644
index 000000000..6e5a187c1
--- /dev/null
+++ b/tools/z64compress/src/enc/libdeflate/lib/ht_matchfinder.h
@@ -0,0 +1,234 @@
+/*
+ * ht_matchfinder.h - Lempel-Ziv matchfinding with a hash table
+ *
+ * Copyright 2022 Eric Biggers
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ---------------------------------------------------------------------------
+ *
+ * This is a Hash Table (ht) matchfinder.
+ *
+ * This is a variant of the Hash Chains (hc) matchfinder that is optimized for
+ * very fast compression. The ht_matchfinder stores the hash chains inline in
+ * the hash table, whereas the hc_matchfinder stores them in a separate array.
+ * Storing the hash chains inline is the faster method when max_search_depth
+ * (the maximum chain length) is very small. It is not appropriate when
+ * max_search_depth is larger, as then it uses too much memory.
+ *
+ * Due to its focus on speed, the ht_matchfinder doesn't support length 3
+ * matches. It also doesn't allow max_search_depth to vary at runtime; it is
+ * fixed at build time as HT_MATCHFINDER_BUCKET_SIZE.
+ *
+ * See hc_matchfinder.h for more information.
+ */
+
+#ifndef LIB_HT_MATCHFINDER_H
+#define LIB_HT_MATCHFINDER_H
+
+#include "matchfinder_common.h"
+
+#define HT_MATCHFINDER_HASH_ORDER 15
+#define HT_MATCHFINDER_BUCKET_SIZE 2
+
+#define HT_MATCHFINDER_MIN_MATCH_LEN 4
+/* Minimum value of max_len for ht_matchfinder_longest_match() */
+#define HT_MATCHFINDER_REQUIRED_NBYTES 5
+
+struct MATCHFINDER_ALIGNED ht_matchfinder {
+ mf_pos_t hash_tab[1UL << HT_MATCHFINDER_HASH_ORDER]
+ [HT_MATCHFINDER_BUCKET_SIZE];
+};
+
+static forceinline void
+ht_matchfinder_init(struct ht_matchfinder *mf)
+{
+ STATIC_ASSERT(sizeof(*mf) % MATCHFINDER_SIZE_ALIGNMENT == 0);
+
+ matchfinder_init((mf_pos_t *)mf, sizeof(*mf));
+}
+
+static forceinline void
+ht_matchfinder_slide_window(struct ht_matchfinder *mf)
+{
+ matchfinder_rebase((mf_pos_t *)mf, sizeof(*mf));
+}
+
+/* Note: max_len must be >= HT_MATCHFINDER_REQUIRED_NBYTES */
+static forceinline u32
+ht_matchfinder_longest_match(struct ht_matchfinder * const mf,
+ const u8 ** const in_base_p,
+ const u8 * const in_next,
+ const u32 max_len,
+ const u32 nice_len,
+ u32 * const next_hash,
+ u32 * const offset_ret)
+{
+ u32 best_len = 0;
+ const u8 *best_matchptr = in_next;
+ u32 cur_pos = in_next - *in_base_p;
+ const u8 *in_base;
+ mf_pos_t cutoff;
+ u32 hash;
+ u32 seq;
+ mf_pos_t cur_node;
+ const u8 *matchptr;
+#if HT_MATCHFINDER_BUCKET_SIZE > 1
+ mf_pos_t to_insert;
+ u32 len;
+#endif
+#if HT_MATCHFINDER_BUCKET_SIZE > 2
+ int i;
+#endif
+
+ /* This is assumed throughout this function. */
+ STATIC_ASSERT(HT_MATCHFINDER_MIN_MATCH_LEN == 4);
+
+ if (cur_pos == MATCHFINDER_WINDOW_SIZE) {
+ ht_matchfinder_slide_window(mf);
+ *in_base_p += MATCHFINDER_WINDOW_SIZE;
+ cur_pos = 0;
+ }
+ in_base = *in_base_p;
+ cutoff = cur_pos - MATCHFINDER_WINDOW_SIZE;
+
+ hash = *next_hash;
+ STATIC_ASSERT(HT_MATCHFINDER_REQUIRED_NBYTES == 5);
+ *next_hash = lz_hash(get_unaligned_le32(in_next + 1),
+ HT_MATCHFINDER_HASH_ORDER);
+ seq = load_u32_unaligned(in_next);
+ prefetchw(&mf->hash_tab[*next_hash]);
+#if HT_MATCHFINDER_BUCKET_SIZE == 1
+ /* Hand-unrolled version for BUCKET_SIZE == 1 */
+ cur_node = mf->hash_tab[hash][0];
+ mf->hash_tab[hash][0] = cur_pos;
+ if (cur_node <= cutoff)
+ goto out;
+ matchptr = &in_base[cur_node];
+ if (load_u32_unaligned(matchptr) == seq) {
+ best_len = lz_extend(in_next, matchptr, 4, max_len);
+ best_matchptr = matchptr;
+ }
+#elif HT_MATCHFINDER_BUCKET_SIZE == 2
+ /*
+ * Hand-unrolled version for BUCKET_SIZE == 2. The logic here also
+ * differs slightly in that it copies the first entry to the second even
+ * if nice_len is reached on the first, as this can be slightly faster.
+ */
+ cur_node = mf->hash_tab[hash][0];
+ mf->hash_tab[hash][0] = cur_pos;
+ if (cur_node <= cutoff)
+ goto out;
+ matchptr = &in_base[cur_node];
+
+ to_insert = cur_node;
+ cur_node = mf->hash_tab[hash][1];
+ mf->hash_tab[hash][1] = to_insert;
+
+ if (load_u32_unaligned(matchptr) == seq) {
+ best_len = lz_extend(in_next, matchptr, 4, max_len);
+ best_matchptr = matchptr;
+ if (cur_node <= cutoff || best_len >= nice_len)
+ goto out;
+ matchptr = &in_base[cur_node];
+ if (load_u32_unaligned(matchptr) == seq &&
+ load_u32_unaligned(matchptr + best_len - 3) ==
+ load_u32_unaligned(in_next + best_len - 3)) {
+ len = lz_extend(in_next, matchptr, 4, max_len);
+ if (len > best_len) {
+ best_len = len;
+ best_matchptr = matchptr;
+ }
+ }
+ } else {
+ if (cur_node <= cutoff)
+ goto out;
+ matchptr = &in_base[cur_node];
+ if (load_u32_unaligned(matchptr) == seq) {
+ best_len = lz_extend(in_next, matchptr, 4, max_len);
+ best_matchptr = matchptr;
+ }
+ }
+#else
+ /* Generic version for HT_MATCHFINDER_BUCKET_SIZE > 2 */
+ to_insert = cur_pos;
+ for (i = 0; i < HT_MATCHFINDER_BUCKET_SIZE; i++) {
+ cur_node = mf->hash_tab[hash][i];
+ mf->hash_tab[hash][i] = to_insert;
+ if (cur_node <= cutoff)
+ goto out;
+ matchptr = &in_base[cur_node];
+ if (load_u32_unaligned(matchptr) == seq) {
+ len = lz_extend(in_next, matchptr, 4, max_len);
+ if (len > best_len) {
+ best_len = len;
+ best_matchptr = matchptr;
+ if (best_len >= nice_len)
+ goto out;
+ }
+ }
+ to_insert = cur_node;
+ }
+#endif
+out:
+ *offset_ret = in_next - best_matchptr;
+ return best_len;
+}
+
+static forceinline void
+ht_matchfinder_skip_bytes(struct ht_matchfinder * const mf,
+ const u8 ** const in_base_p,
+ const u8 *in_next,
+ const u8 * const in_end,
+ const u32 count,
+ u32 * const next_hash)
+{
+ s32 cur_pos = in_next - *in_base_p;
+ u32 hash;
+ u32 remaining = count;
+ int i;
+
+ if (unlikely(count + HT_MATCHFINDER_REQUIRED_NBYTES > in_end - in_next))
+ return;
+
+ if (cur_pos + count - 1 >= MATCHFINDER_WINDOW_SIZE) {
+ ht_matchfinder_slide_window(mf);
+ *in_base_p += MATCHFINDER_WINDOW_SIZE;
+ cur_pos -= MATCHFINDER_WINDOW_SIZE;
+ }
+
+ hash = *next_hash;
+ do {
+ for (i = HT_MATCHFINDER_BUCKET_SIZE - 1; i > 0; i--)
+ mf->hash_tab[hash][i] = mf->hash_tab[hash][i - 1];
+ mf->hash_tab[hash][0] = cur_pos;
+
+ hash = lz_hash(get_unaligned_le32(++in_next),
+ HT_MATCHFINDER_HASH_ORDER);
+ cur_pos++;
+ } while (--remaining);
+
+ prefetchw(&mf->hash_tab[hash]);
+ *next_hash = hash;
+}
+
+#endif /* LIB_HT_MATCHFINDER_H */
diff --git a/tools/z64compress/src/enc/libdeflate/lib/lib_common.h b/tools/z64compress/src/enc/libdeflate/lib/lib_common.h
new file mode 100644
index 000000000..6aad0feec
--- /dev/null
+++ b/tools/z64compress/src/enc/libdeflate/lib/lib_common.h
@@ -0,0 +1,94 @@
+/*
+ * lib_common.h - internal header included by all library code
+ */
+
+#ifndef LIB_LIB_COMMON_H
+#define LIB_LIB_COMMON_H
+
+#include "../common_defs.h"
+
+#ifdef LIBDEFLATE_H
+ /*
+ * When building the library, LIBDEFLATEAPI needs to be defined properly before
+ * including libdeflate.h.
+ */
+# error "lib_common.h must always be included before libdeflate.h"
+#endif
+
+#if defined(LIBDEFLATE_DLL) && (defined(_WIN32) || defined(__CYGWIN__))
+# define LIBDEFLATE_EXPORT_SYM __declspec(dllexport)
+#elif defined(__GNUC__)
+# define LIBDEFLATE_EXPORT_SYM __attribute__((visibility("default")))
+#else
+# define LIBDEFLATE_EXPORT_SYM
+#endif
+
+/*
+ * On i386, gcc assumes that the stack is 16-byte aligned at function entry.
+ * However, some compilers (e.g. MSVC) and programming languages (e.g. Delphi)
+ * only guarantee 4-byte alignment when calling functions. This is mainly an
+ * issue on Windows, but it has been seen on Linux too. Work around this ABI
+ * incompatibility by realigning the stack pointer when entering libdeflate.
+ * This prevents crashes in SSE/AVX code.
+ */
+#if defined(__GNUC__) && defined(__i386__)
+# define LIBDEFLATE_ALIGN_STACK __attribute__((force_align_arg_pointer))
+#else
+# define LIBDEFLATE_ALIGN_STACK
+#endif
+
+#define LIBDEFLATEAPI LIBDEFLATE_EXPORT_SYM LIBDEFLATE_ALIGN_STACK
+
+void *libdeflate_malloc(size_t size);
+void libdeflate_free(void *ptr);
+
+void *libdeflate_aligned_malloc(size_t alignment, size_t size);
+void libdeflate_aligned_free(void *ptr);
+
+#ifdef FREESTANDING
+/*
+ * With -ffreestanding, may be missing, and we must provide
+ * implementations of memset(), memcpy(), memmove(), and memcmp().
+ * See https://gcc.gnu.org/onlinedocs/gcc/Standards.html
+ *
+ * Also, -ffreestanding disables interpreting calls to these functions as
+ * built-ins. E.g., calling memcpy(&v, p, WORDBYTES) will make a function call,
+ * not be optimized to a single load instruction. For performance reasons we
+ * don't want that. So, declare these functions as macros that expand to the
+ * corresponding built-ins. This approach is recommended in the gcc man page.
+ * We still need the actual function definitions in case gcc calls them.
+ */
+void *memset(void *s, int c, size_t n);
+#define memset(s, c, n) __builtin_memset((s), (c), (n))
+
+void *memcpy(void *dest, const void *src, size_t n);
+#define memcpy(dest, src, n) __builtin_memcpy((dest), (src), (n))
+
+void *memmove(void *dest, const void *src, size_t n);
+#define memmove(dest, src, n) __builtin_memmove((dest), (src), (n))
+
+int memcmp(const void *s1, const void *s2, size_t n);
+#define memcmp(s1, s2, n) __builtin_memcmp((s1), (s2), (n))
+
+#undef LIBDEFLATE_ENABLE_ASSERTIONS
+#else
+#include
+#endif
+
+/*
+ * Runtime assertion support. Don't enable this in production builds; it may
+ * hurt performance significantly.
+ */
+#ifdef LIBDEFLATE_ENABLE_ASSERTIONS
+void libdeflate_assertion_failed(const char *expr, const char *file, int line);
+#define ASSERT(expr) { if (unlikely(!(expr))) \
+ libdeflate_assertion_failed(#expr, __FILE__, __LINE__); }
+#else
+#define ASSERT(expr) (void)(expr)
+#endif
+
+#define CONCAT_IMPL(a, b) a##b
+#define CONCAT(a, b) CONCAT_IMPL(a, b)
+#define ADD_SUFFIX(name) CONCAT(name, SUFFIX)
+
+#endif /* LIB_LIB_COMMON_H */
diff --git a/tools/z64compress/src/enc/libdeflate/lib/matchfinder_common.h b/tools/z64compress/src/enc/libdeflate/lib/matchfinder_common.h
new file mode 100644
index 000000000..48a243e1d
--- /dev/null
+++ b/tools/z64compress/src/enc/libdeflate/lib/matchfinder_common.h
@@ -0,0 +1,199 @@
+/*
+ * matchfinder_common.h - common code for Lempel-Ziv matchfinding
+ */
+
+#ifndef LIB_MATCHFINDER_COMMON_H
+#define LIB_MATCHFINDER_COMMON_H
+
+#include "lib_common.h"
+
+#ifndef MATCHFINDER_WINDOW_ORDER
+# error "MATCHFINDER_WINDOW_ORDER must be defined!"
+#endif
+
+/*
+ * Given a 32-bit value that was loaded with the platform's native endianness,
+ * return a 32-bit value whose high-order 8 bits are 0 and whose low-order 24
+ * bits contain the first 3 bytes, arranged in octets in a platform-dependent
+ * order, at the memory location from which the input 32-bit value was loaded.
+ */
+static forceinline u32
+loaded_u32_to_u24(u32 v)
+{
+ if (CPU_IS_LITTLE_ENDIAN())
+ return v & 0xFFFFFF;
+ else
+ return v >> 8;
+}
+
+/*
+ * Load the next 3 bytes from @p into the 24 low-order bits of a 32-bit value.
+ * The order in which the 3 bytes will be arranged as octets in the 24 bits is
+ * platform-dependent. At least 4 bytes (not 3) must be available at @p.
+ */
+static forceinline u32
+load_u24_unaligned(const u8 *p)
+{
+#if UNALIGNED_ACCESS_IS_FAST
+ return loaded_u32_to_u24(load_u32_unaligned(p));
+#else
+ if (CPU_IS_LITTLE_ENDIAN())
+ return ((u32)p[0] << 0) | ((u32)p[1] << 8) | ((u32)p[2] << 16);
+ else
+ return ((u32)p[2] << 0) | ((u32)p[1] << 8) | ((u32)p[0] << 16);
+#endif
+}
+
+#define MATCHFINDER_WINDOW_SIZE (1UL << MATCHFINDER_WINDOW_ORDER)
+
+typedef s16 mf_pos_t;
+
+#define MATCHFINDER_INITVAL ((mf_pos_t)-MATCHFINDER_WINDOW_SIZE)
+
+/*
+ * Required alignment of the matchfinder buffer pointer and size. The values
+ * here come from the AVX-2 implementation, which is the worst case.
+ */
+#define MATCHFINDER_MEM_ALIGNMENT 32
+#define MATCHFINDER_SIZE_ALIGNMENT 128
+
+#undef matchfinder_init
+#undef matchfinder_rebase
+#ifdef _aligned_attribute
+# define MATCHFINDER_ALIGNED _aligned_attribute(MATCHFINDER_MEM_ALIGNMENT)
+# if defined(ARCH_ARM32) || defined(ARCH_ARM64)
+# include "arm/matchfinder_impl.h"
+# elif defined(ARCH_X86_32) || defined(ARCH_X86_64)
+# include "x86/matchfinder_impl.h"
+# endif
+#else
+# define MATCHFINDER_ALIGNED
+#endif
+
+/*
+ * Initialize the hash table portion of the matchfinder.
+ *
+ * Essentially, this is an optimized memset().
+ *
+ * 'data' must be aligned to a MATCHFINDER_MEM_ALIGNMENT boundary, and
+ * 'size' must be a multiple of MATCHFINDER_SIZE_ALIGNMENT.
+ */
+#ifndef matchfinder_init
+static forceinline void
+matchfinder_init(mf_pos_t *data, size_t size)
+{
+ size_t num_entries = size / sizeof(*data);
+ size_t i;
+
+ for (i = 0; i < num_entries; i++)
+ data[i] = MATCHFINDER_INITVAL;
+}
+#endif
+
+/*
+ * Slide the matchfinder by MATCHFINDER_WINDOW_SIZE bytes.
+ *
+ * This must be called just after each MATCHFINDER_WINDOW_SIZE bytes have been
+ * run through the matchfinder.
+ *
+ * This subtracts MATCHFINDER_WINDOW_SIZE bytes from each entry in the given
+ * array, making the entries be relative to the current position rather than the
+ * position MATCHFINDER_WINDOW_SIZE bytes prior. To avoid integer underflows,
+ * entries that would become less than -MATCHFINDER_WINDOW_SIZE stay at
+ * -MATCHFINDER_WINDOW_SIZE, keeping them permanently out of bounds.
+ *
+ * The given array must contain all matchfinder data that is position-relative:
+ * the hash table(s) as well as any hash chain or binary tree links. Its
+ * address must be aligned to a MATCHFINDER_MEM_ALIGNMENT boundary, and its size
+ * must be a multiple of MATCHFINDER_SIZE_ALIGNMENT.
+ */
+#ifndef matchfinder_rebase
+static forceinline void
+matchfinder_rebase(mf_pos_t *data, size_t size)
+{
+ size_t num_entries = size / sizeof(*data);
+ size_t i;
+
+ if (MATCHFINDER_WINDOW_SIZE == 32768) {
+ /*
+ * Branchless version for 32768-byte windows. Clear all bits if
+ * the value was already negative, then set the sign bit. This
+ * is equivalent to subtracting 32768 with signed saturation.
+ */
+ for (i = 0; i < num_entries; i++)
+ data[i] = 0x8000 | (data[i] & ~(data[i] >> 15));
+ } else {
+ for (i = 0; i < num_entries; i++) {
+ if (data[i] >= 0)
+ data[i] -= (mf_pos_t)-MATCHFINDER_WINDOW_SIZE;
+ else
+ data[i] = (mf_pos_t)-MATCHFINDER_WINDOW_SIZE;
+ }
+ }
+}
+#endif
+
+/*
+ * The hash function: given a sequence prefix held in the low-order bits of a
+ * 32-bit value, multiply by a carefully-chosen large constant. Discard any
+ * bits of the product that don't fit in a 32-bit value, but take the
+ * next-highest @num_bits bits of the product as the hash value, as those have
+ * the most randomness.
+ */
+static forceinline u32
+lz_hash(u32 seq, unsigned num_bits)
+{
+ return (u32)(seq * 0x1E35A7BD) >> (32 - num_bits);
+}
+
+/*
+ * Return the number of bytes at @matchptr that match the bytes at @strptr, up
+ * to a maximum of @max_len. Initially, @start_len bytes are matched.
+ */
+static forceinline unsigned
+lz_extend(const u8 * const strptr, const u8 * const matchptr,
+ const unsigned start_len, const unsigned max_len)
+{
+ unsigned len = start_len;
+ machine_word_t v_word;
+
+ if (UNALIGNED_ACCESS_IS_FAST) {
+
+ if (likely(max_len - len >= 4 * WORDBYTES)) {
+
+ #define COMPARE_WORD_STEP \
+ v_word = load_word_unaligned(&matchptr[len]) ^ \
+ load_word_unaligned(&strptr[len]); \
+ if (v_word != 0) \
+ goto word_differs; \
+ len += WORDBYTES; \
+
+ COMPARE_WORD_STEP
+ COMPARE_WORD_STEP
+ COMPARE_WORD_STEP
+ COMPARE_WORD_STEP
+ #undef COMPARE_WORD_STEP
+ }
+
+ while (len + WORDBYTES <= max_len) {
+ v_word = load_word_unaligned(&matchptr[len]) ^
+ load_word_unaligned(&strptr[len]);
+ if (v_word != 0)
+ goto word_differs;
+ len += WORDBYTES;
+ }
+ }
+
+ while (len < max_len && matchptr[len] == strptr[len])
+ len++;
+ return len;
+
+word_differs:
+ if (CPU_IS_LITTLE_ENDIAN())
+ len += (bsfw(v_word) >> 3);
+ else
+ len += (WORDBITS - 1 - bsrw(v_word)) >> 3;
+ return len;
+}
+
+#endif /* LIB_MATCHFINDER_COMMON_H */
diff --git a/tools/z64compress/src/enc/libdeflate/lib/utils.c b/tools/z64compress/src/enc/libdeflate/lib/utils.c
new file mode 100644
index 000000000..c8e5121e5
--- /dev/null
+++ b/tools/z64compress/src/enc/libdeflate/lib/utils.c
@@ -0,0 +1,153 @@
+/*
+ * utils.c - utility functions for libdeflate
+ *
+ * Copyright 2016 Eric Biggers
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "lib_common.h"
+
+#include "libdeflate.h"
+
+#ifdef FREESTANDING
+# define malloc NULL
+# define free NULL
+#else
+# include
+#endif
+
+static void *(*libdeflate_malloc_func)(size_t) = malloc;
+static void (*libdeflate_free_func)(void *) = free;
+
+void *
+libdeflate_malloc(size_t size)
+{
+ return (*libdeflate_malloc_func)(size);
+}
+
+void
+libdeflate_free(void *ptr)
+{
+ (*libdeflate_free_func)(ptr);
+}
+
+void *
+libdeflate_aligned_malloc(size_t alignment, size_t size)
+{
+ void *ptr = libdeflate_malloc(sizeof(void *) + alignment - 1 + size);
+ if (ptr) {
+ void *orig_ptr = ptr;
+ ptr = (void *)ALIGN((uintptr_t)ptr + sizeof(void *), alignment);
+ ((void **)ptr)[-1] = orig_ptr;
+ }
+ return ptr;
+}
+
+void
+libdeflate_aligned_free(void *ptr)
+{
+ if (ptr)
+ libdeflate_free(((void **)ptr)[-1]);
+}
+
+LIBDEFLATEAPI void
+libdeflate_set_memory_allocator(void *(*malloc_func)(size_t),
+ void (*free_func)(void *))
+{
+ libdeflate_malloc_func = malloc_func;
+ libdeflate_free_func = free_func;
+}
+
+/*
+ * Implementations of libc functions for freestanding library builds.
+ * Normal library builds don't use these. Not optimized yet; usually the
+ * compiler expands these functions and doesn't actually call them anyway.
+ */
+#ifdef FREESTANDING
+#undef memset
+void * __attribute__((weak))
+memset(void *s, int c, size_t n)
+{
+ u8 *p = s;
+ size_t i;
+
+ for (i = 0; i < n; i++)
+ p[i] = c;
+ return s;
+}
+
+#undef memcpy
+void * __attribute__((weak))
+memcpy(void *dest, const void *src, size_t n)
+{
+ u8 *d = dest;
+ const u8 *s = src;
+ size_t i;
+
+ for (i = 0; i < n; i++)
+ d[i] = s[i];
+ return dest;
+}
+
+#undef memmove
+void * __attribute__((weak))
+memmove(void *dest, const void *src, size_t n)
+{
+ u8 *d = dest;
+ const u8 *s = src;
+ size_t i;
+
+ if (d <= s)
+ return memcpy(d, s, n);
+
+ for (i = n; i > 0; i--)
+ d[i - 1] = s[i - 1];
+ return dest;
+}
+
+#undef memcmp
+int __attribute__((weak))
+memcmp(const void *s1, const void *s2, size_t n)
+{
+ const u8 *p1 = s1;
+ const u8 *p2 = s2;
+ size_t i;
+
+ for (i = 0; i < n; i++) {
+ if (p1[i] != p2[i])
+ return (int)p1[i] - (int)p2[i];
+ }
+ return 0;
+}
+#endif /* FREESTANDING */
+
+#ifdef LIBDEFLATE_ENABLE_ASSERTIONS
+#include
+#include
+void
+libdeflate_assertion_failed(const char *expr, const char *file, int line)
+{
+ fprintf(stderr, "Assertion failed: %s at %s:%d\n", expr, file, line);
+ abort();
+}
+#endif /* LIBDEFLATE_ENABLE_ASSERTIONS */
diff --git a/tools/z64compress/src/enc/libdeflate/lib/x86/adler32_impl.h b/tools/z64compress/src/enc/libdeflate/lib/x86/adler32_impl.h
new file mode 100644
index 000000000..6285dc80a
--- /dev/null
+++ b/tools/z64compress/src/enc/libdeflate/lib/x86/adler32_impl.h
@@ -0,0 +1,287 @@
+/*
+ * x86/adler32_impl.h - x86 implementations of Adler-32 checksum algorithm
+ *
+ * Copyright 2016 Eric Biggers
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef LIB_X86_ADLER32_IMPL_H
+#define LIB_X86_ADLER32_IMPL_H
+
+#include "cpu_features.h"
+
+/*
+ * The following macros horizontally sum the s1 counters and add them to the
+ * real s1, and likewise for s2. They do this via a series of reductions, each
+ * of which halves the vector length, until just one counter remains.
+ *
+ * The s1 reductions don't depend on the s2 reductions and vice versa, so for
+ * efficiency they are interleaved. Also, every other s1 counter is 0 due to
+ * the 'psadbw' instruction (_mm_sad_epu8) summing groups of 8 bytes rather than
+ * 4; hence, one of the s1 reductions is skipped when going from 128 => 32 bits.
+ */
+
+#define ADLER32_FINISH_VEC_CHUNK_128(s1, s2, v_s1, v_s2) \
+{ \
+ __m128i /* __v4su */ s1_last = (v_s1), s2_last = (v_s2); \
+ \
+ /* 128 => 32 bits */ \
+ s2_last = _mm_add_epi32(s2_last, _mm_shuffle_epi32(s2_last, 0x31)); \
+ s1_last = _mm_add_epi32(s1_last, _mm_shuffle_epi32(s1_last, 0x02)); \
+ s2_last = _mm_add_epi32(s2_last, _mm_shuffle_epi32(s2_last, 0x02)); \
+ \
+ *(s1) += (u32)_mm_cvtsi128_si32(s1_last); \
+ *(s2) += (u32)_mm_cvtsi128_si32(s2_last); \
+}
+
+#define ADLER32_FINISH_VEC_CHUNK_256(s1, s2, v_s1, v_s2) \
+{ \
+ __m128i /* __v4su */ s1_128bit, s2_128bit; \
+ \
+ /* 256 => 128 bits */ \
+ s1_128bit = _mm_add_epi32(_mm256_extracti128_si256((v_s1), 0), \
+ _mm256_extracti128_si256((v_s1), 1)); \
+ s2_128bit = _mm_add_epi32(_mm256_extracti128_si256((v_s2), 0), \
+ _mm256_extracti128_si256((v_s2), 1)); \
+ \
+ ADLER32_FINISH_VEC_CHUNK_128((s1), (s2), s1_128bit, s2_128bit); \
+}
+
+/*
+ * This is a very silly partial workaround for gcc bug
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107892. The bug causes gcc to
+ * generate extra move instructions in some loops containing vector intrinsics.
+ *
+ * An alternate workaround would be to use gcc native vector operations instead
+ * of vector intrinsics. But that would result in MSVC needing its own code.
+ */
+#if GCC_PREREQ(1, 0)
+# define GCC_UPDATE_VARS(a, b, c, d, e, f) \
+ __asm__("" : "+x" (a), "+x" (b), "+x" (c), "+x" (d), "+x" (e), "+x" (f))
+#else
+# define GCC_UPDATE_VARS(a, b, c, d, e, f) \
+ (void)a, (void)b, (void)c, (void)d, (void)e, (void)f
+#endif
+
+/* SSE2 implementation */
+#if HAVE_SSE2_INTRIN
+# define adler32_sse2 adler32_sse2
+# define FUNCNAME adler32_sse2
+# define FUNCNAME_CHUNK adler32_sse2_chunk
+# define IMPL_ALIGNMENT 16
+# define IMPL_SEGMENT_LEN 32
+/*
+ * The 16-bit precision byte counters must not be allowed to undergo *signed*
+ * overflow, otherwise the signed multiplications at the end (_mm_madd_epi16)
+ * would behave incorrectly.
+ */
+# define IMPL_MAX_CHUNK_LEN (32 * (0x7FFF / 0xFF))
+# if HAVE_SSE2_NATIVE
+# define ATTRIBUTES
+# else
+# define ATTRIBUTES _target_attribute("sse2")
+# endif
+# include
+static forceinline ATTRIBUTES void
+adler32_sse2_chunk(const __m128i *p, const __m128i *const end, u32 *s1, u32 *s2)
+{
+ const __m128i zeroes = _mm_setzero_si128();
+ const __m128i /* __v8hu */ mults_a =
+ _mm_setr_epi16(32, 31, 30, 29, 28, 27, 26, 25);
+ const __m128i /* __v8hu */ mults_b =
+ _mm_setr_epi16(24, 23, 22, 21, 20, 19, 18, 17);
+ const __m128i /* __v8hu */ mults_c =
+ _mm_setr_epi16(16, 15, 14, 13, 12, 11, 10, 9);
+ const __m128i /* __v8hu */ mults_d =
+ _mm_setr_epi16(8, 7, 6, 5, 4, 3, 2, 1);
+
+ /* s1 counters: 32-bit, sum of bytes */
+ __m128i /* __v4su */ v_s1 = zeroes;
+
+ /* s2 counters: 32-bit, sum of s1 values */
+ __m128i /* __v4su */ v_s2 = zeroes;
+
+ /*
+ * Thirty-two 16-bit counters for byte sums. Each accumulates the bytes
+ * that eventually need to be multiplied by a number 32...1 for addition
+ * into s2.
+ */
+ __m128i /* __v8hu */ v_byte_sums_a = zeroes;
+ __m128i /* __v8hu */ v_byte_sums_b = zeroes;
+ __m128i /* __v8hu */ v_byte_sums_c = zeroes;
+ __m128i /* __v8hu */ v_byte_sums_d = zeroes;
+
+ do {
+ /* Load the next 32 bytes. */
+ const __m128i bytes1 = *p++;
+ const __m128i bytes2 = *p++;
+
+ /*
+ * Accumulate the previous s1 counters into the s2 counters.
+ * Logically, this really should be v_s2 += v_s1 * 32, but we
+ * can do the multiplication (or left shift) later.
+ */
+ v_s2 = _mm_add_epi32(v_s2, v_s1);
+
+ /*
+ * s1 update: use "Packed Sum of Absolute Differences" to add
+ * the bytes horizontally with 8 bytes per sum. Then add the
+ * sums to the s1 counters.
+ */
+ v_s1 = _mm_add_epi32(v_s1, _mm_sad_epu8(bytes1, zeroes));
+ v_s1 = _mm_add_epi32(v_s1, _mm_sad_epu8(bytes2, zeroes));
+
+ /*
+ * Also accumulate the bytes into 32 separate counters that have
+ * 16-bit precision.
+ */
+ v_byte_sums_a = _mm_add_epi16(
+ v_byte_sums_a, _mm_unpacklo_epi8(bytes1, zeroes));
+ v_byte_sums_b = _mm_add_epi16(
+ v_byte_sums_b, _mm_unpackhi_epi8(bytes1, zeroes));
+ v_byte_sums_c = _mm_add_epi16(
+ v_byte_sums_c, _mm_unpacklo_epi8(bytes2, zeroes));
+ v_byte_sums_d = _mm_add_epi16(
+ v_byte_sums_d, _mm_unpackhi_epi8(bytes2, zeroes));
+
+ GCC_UPDATE_VARS(v_s1, v_s2, v_byte_sums_a, v_byte_sums_b,
+ v_byte_sums_c, v_byte_sums_d);
+ } while (p != end);
+
+ /* Finish calculating the s2 counters. */
+ v_s2 = _mm_slli_epi32(v_s2, 5);
+ v_s2 = _mm_add_epi32(v_s2, _mm_madd_epi16(v_byte_sums_a, mults_a));
+ v_s2 = _mm_add_epi32(v_s2, _mm_madd_epi16(v_byte_sums_b, mults_b));
+ v_s2 = _mm_add_epi32(v_s2, _mm_madd_epi16(v_byte_sums_c, mults_c));
+ v_s2 = _mm_add_epi32(v_s2, _mm_madd_epi16(v_byte_sums_d, mults_d));
+
+ /* Add the counters to the real s1 and s2. */
+ ADLER32_FINISH_VEC_CHUNK_128(s1, s2, v_s1, v_s2);
+}
+# include "../adler32_vec_template.h"
+#endif /* HAVE_SSE2_INTRIN */
+
+/*
+ * AVX2 implementation. Basically the same as the SSE2 one, but with the vector
+ * width doubled.
+ */
+#if HAVE_AVX2_INTRIN
+# define adler32_avx2 adler32_avx2
+# define FUNCNAME adler32_avx2
+# define FUNCNAME_CHUNK adler32_avx2_chunk
+# define IMPL_ALIGNMENT 32
+# define IMPL_SEGMENT_LEN 64
+# define IMPL_MAX_CHUNK_LEN (64 * (0x7FFF / 0xFF))
+# if HAVE_AVX2_NATIVE
+# define ATTRIBUTES
+# else
+# define ATTRIBUTES _target_attribute("avx2")
+# endif
+# include
+ /*
+ * With clang in MSVC compatibility mode, immintrin.h incorrectly skips
+ * including some sub-headers.
+ */
+# if defined(__clang__) && defined(_MSC_VER)
+# include
+# include
+# endif
+static forceinline ATTRIBUTES void
+adler32_avx2_chunk(const __m256i *p, const __m256i *const end, u32 *s1, u32 *s2)
+{
+ const __m256i zeroes = _mm256_setzero_si256();
+ /*
+ * Note, the multipliers have to be in this order because
+ * _mm256_unpack{lo,hi}_epi8 work on each 128-bit lane separately.
+ */
+ const __m256i /* __v16hu */ mults_a =
+ _mm256_setr_epi16(64, 63, 62, 61, 60, 59, 58, 57,
+ 48, 47, 46, 45, 44, 43, 42, 41);
+ const __m256i /* __v16hu */ mults_b =
+ _mm256_setr_epi16(56, 55, 54, 53, 52, 51, 50, 49,
+ 40, 39, 38, 37, 36, 35, 34, 33);
+ const __m256i /* __v16hu */ mults_c =
+ _mm256_setr_epi16(32, 31, 30, 29, 28, 27, 26, 25,
+ 16, 15, 14, 13, 12, 11, 10, 9);
+ const __m256i /* __v16hu */ mults_d =
+ _mm256_setr_epi16(24, 23, 22, 21, 20, 19, 18, 17,
+ 8, 7, 6, 5, 4, 3, 2, 1);
+ __m256i /* __v8su */ v_s1 = zeroes;
+ __m256i /* __v8su */ v_s2 = zeroes;
+ __m256i /* __v16hu */ v_byte_sums_a = zeroes;
+ __m256i /* __v16hu */ v_byte_sums_b = zeroes;
+ __m256i /* __v16hu */ v_byte_sums_c = zeroes;
+ __m256i /* __v16hu */ v_byte_sums_d = zeroes;
+
+ do {
+ const __m256i bytes1 = *p++;
+ const __m256i bytes2 = *p++;
+
+ v_s2 = _mm256_add_epi32(v_s2, v_s1);
+ v_s1 = _mm256_add_epi32(v_s1, _mm256_sad_epu8(bytes1, zeroes));
+ v_s1 = _mm256_add_epi32(v_s1, _mm256_sad_epu8(bytes2, zeroes));
+ v_byte_sums_a = _mm256_add_epi16(
+ v_byte_sums_a, _mm256_unpacklo_epi8(bytes1, zeroes));
+ v_byte_sums_b = _mm256_add_epi16(
+ v_byte_sums_b, _mm256_unpackhi_epi8(bytes1, zeroes));
+ v_byte_sums_c = _mm256_add_epi16(
+ v_byte_sums_c, _mm256_unpacklo_epi8(bytes2, zeroes));
+ v_byte_sums_d = _mm256_add_epi16(
+ v_byte_sums_d, _mm256_unpackhi_epi8(bytes2, zeroes));
+
+ GCC_UPDATE_VARS(v_s1, v_s2, v_byte_sums_a, v_byte_sums_b,
+ v_byte_sums_c, v_byte_sums_d);
+ } while (p != end);
+
+ v_s2 = _mm256_slli_epi32(v_s2, 6);
+ v_s2 = _mm256_add_epi32(v_s2, _mm256_madd_epi16(v_byte_sums_a, mults_a));
+ v_s2 = _mm256_add_epi32(v_s2, _mm256_madd_epi16(v_byte_sums_b, mults_b));
+ v_s2 = _mm256_add_epi32(v_s2, _mm256_madd_epi16(v_byte_sums_c, mults_c));
+ v_s2 = _mm256_add_epi32(v_s2, _mm256_madd_epi16(v_byte_sums_d, mults_d));
+ ADLER32_FINISH_VEC_CHUNK_256(s1, s2, v_s1, v_s2);
+}
+# include "../adler32_vec_template.h"
+#endif /* HAVE_AVX2_INTRIN */
+
+#if defined(adler32_avx2) && HAVE_AVX2_NATIVE
+#define DEFAULT_IMPL adler32_avx2
+#else
+static inline adler32_func_t
+arch_select_adler32_func(void)
+{
+ const u32 features MAYBE_UNUSED = get_x86_cpu_features();
+
+#ifdef adler32_avx2
+ if (HAVE_AVX2(features))
+ return adler32_avx2;
+#endif
+#ifdef adler32_sse2
+ if (HAVE_SSE2(features))
+ return adler32_sse2;
+#endif
+ return NULL;
+}
+#define arch_select_adler32_func arch_select_adler32_func
+#endif
+
+#endif /* LIB_X86_ADLER32_IMPL_H */
diff --git a/tools/z64compress/src/enc/libdeflate/lib/x86/cpu_features.c b/tools/z64compress/src/enc/libdeflate/lib/x86/cpu_features.c
new file mode 100644
index 000000000..958777ebd
--- /dev/null
+++ b/tools/z64compress/src/enc/libdeflate/lib/x86/cpu_features.c
@@ -0,0 +1,151 @@
+/*
+ * x86/cpu_features.c - feature detection for x86 CPUs
+ *
+ * Copyright 2016 Eric Biggers
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "../cpu_features_common.h" /* must be included first */
+#include "cpu_features.h"
+
+#if HAVE_DYNAMIC_X86_CPU_FEATURES
+
+/* With old GCC versions we have to manually save and restore the x86_32 PIC
+ * register (ebx). See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=47602 */
+#if defined(ARCH_X86_32) && defined(__PIC__)
+# define EBX_CONSTRAINT "=&r"
+#else
+# define EBX_CONSTRAINT "=b"
+#endif
+
+/* Execute the CPUID instruction. */
+static inline void
+cpuid(u32 leaf, u32 subleaf, u32 *a, u32 *b, u32 *c, u32 *d)
+{
+#ifdef _MSC_VER
+ int result[4];
+
+ __cpuidex(result, leaf, subleaf);
+ *a = result[0];
+ *b = result[1];
+ *c = result[2];
+ *d = result[3];
+#else
+ __asm__(".ifnc %%ebx, %1; mov %%ebx, %1; .endif\n"
+ "cpuid \n"
+ ".ifnc %%ebx, %1; xchg %%ebx, %1; .endif\n"
+ : "=a" (*a), EBX_CONSTRAINT (*b), "=c" (*c), "=d" (*d)
+ : "a" (leaf), "c" (subleaf));
+#endif
+}
+
+/* Read an extended control register. */
+static inline u64
+read_xcr(u32 index)
+{
+#ifdef _MSC_VER
+ return _xgetbv(index);
+#else
+ u32 edx, eax;
+
+ /* Execute the "xgetbv" instruction. Old versions of binutils do not
+ * recognize this instruction, so list the raw bytes instead. */
+ __asm__ (".byte 0x0f, 0x01, 0xd0" : "=d" (edx), "=a" (eax) : "c" (index));
+
+ return ((u64)edx << 32) | eax;
+#endif
+}
+
+#undef BIT
+#define BIT(nr) (1UL << (nr))
+
+#define XCR0_BIT_SSE BIT(1)
+#define XCR0_BIT_AVX BIT(2)
+
+#define IS_SET(reg, nr) ((reg) & BIT(nr))
+#define IS_ALL_SET(reg, mask) (((reg) & (mask)) == (mask))
+
+static const struct cpu_feature x86_cpu_feature_table[] = {
+ {X86_CPU_FEATURE_SSE2, "sse2"},
+ {X86_CPU_FEATURE_PCLMUL, "pclmul"},
+ {X86_CPU_FEATURE_AVX, "avx"},
+ {X86_CPU_FEATURE_AVX2, "avx2"},
+ {X86_CPU_FEATURE_BMI2, "bmi2"},
+};
+
+volatile u32 libdeflate_x86_cpu_features = 0;
+
+/* Initialize libdeflate_x86_cpu_features. */
+void libdeflate_init_x86_cpu_features(void)
+{
+ u32 features = 0;
+ u32 dummy1, dummy2, dummy3, dummy4;
+ u32 max_function;
+ u32 features_1, features_2, features_3, features_4;
+ bool os_avx_support = false;
+
+ /* Get maximum supported function */
+ cpuid(0, 0, &max_function, &dummy2, &dummy3, &dummy4);
+ if (max_function < 1)
+ goto out;
+
+ /* Standard feature flags */
+ cpuid(1, 0, &dummy1, &dummy2, &features_2, &features_1);
+
+ if (IS_SET(features_1, 26))
+ features |= X86_CPU_FEATURE_SSE2;
+
+ if (IS_SET(features_2, 1))
+ features |= X86_CPU_FEATURE_PCLMUL;
+
+ if (IS_SET(features_2, 27)) { /* OSXSAVE set? */
+ u64 xcr0 = read_xcr(0);
+
+ os_avx_support = IS_ALL_SET(xcr0,
+ XCR0_BIT_SSE |
+ XCR0_BIT_AVX);
+ }
+
+ if (os_avx_support && IS_SET(features_2, 28))
+ features |= X86_CPU_FEATURE_AVX;
+
+ if (max_function < 7)
+ goto out;
+
+ /* Extended feature flags */
+ cpuid(7, 0, &dummy1, &features_3, &features_4, &dummy4);
+
+ if (os_avx_support && IS_SET(features_3, 5))
+ features |= X86_CPU_FEATURE_AVX2;
+
+ if (IS_SET(features_3, 8))
+ features |= X86_CPU_FEATURE_BMI2;
+
+out:
+ disable_cpu_features_for_testing(&features, x86_cpu_feature_table,
+ ARRAY_LEN(x86_cpu_feature_table));
+
+ libdeflate_x86_cpu_features = features | X86_CPU_FEATURES_KNOWN;
+}
+
+#endif /* HAVE_DYNAMIC_X86_CPU_FEATURES */
diff --git a/tools/z64compress/src/enc/libdeflate/lib/x86/cpu_features.h b/tools/z64compress/src/enc/libdeflate/lib/x86/cpu_features.h
new file mode 100644
index 000000000..561bd567f
--- /dev/null
+++ b/tools/z64compress/src/enc/libdeflate/lib/x86/cpu_features.h
@@ -0,0 +1,155 @@
+/*
+ * x86/cpu_features.h - feature detection for x86 CPUs
+ *
+ * Copyright 2016 Eric Biggers
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef LIB_X86_CPU_FEATURES_H
+#define LIB_X86_CPU_FEATURES_H
+
+#include "../lib_common.h"
+
+#define HAVE_DYNAMIC_X86_CPU_FEATURES 0
+
+#if defined(ARCH_X86_32) || defined(ARCH_X86_64)
+
+#if COMPILER_SUPPORTS_TARGET_FUNCTION_ATTRIBUTE || defined(_MSC_VER)
+# undef HAVE_DYNAMIC_X86_CPU_FEATURES
+# define HAVE_DYNAMIC_X86_CPU_FEATURES 1
+#endif
+
+#define X86_CPU_FEATURE_SSE2 0x00000001
+#define X86_CPU_FEATURE_PCLMUL 0x00000002
+#define X86_CPU_FEATURE_AVX 0x00000004
+#define X86_CPU_FEATURE_AVX2 0x00000008
+#define X86_CPU_FEATURE_BMI2 0x00000010
+
+#define HAVE_SSE2(features) (HAVE_SSE2_NATIVE || ((features) & X86_CPU_FEATURE_SSE2))
+#define HAVE_PCLMUL(features) (HAVE_PCLMUL_NATIVE || ((features) & X86_CPU_FEATURE_PCLMUL))
+#define HAVE_AVX(features) (HAVE_AVX_NATIVE || ((features) & X86_CPU_FEATURE_AVX))
+#define HAVE_AVX2(features) (HAVE_AVX2_NATIVE || ((features) & X86_CPU_FEATURE_AVX2))
+#define HAVE_BMI2(features) (HAVE_BMI2_NATIVE || ((features) & X86_CPU_FEATURE_BMI2))
+
+#if HAVE_DYNAMIC_X86_CPU_FEATURES
+#define X86_CPU_FEATURES_KNOWN 0x80000000
+extern volatile u32 libdeflate_x86_cpu_features;
+
+void libdeflate_init_x86_cpu_features(void);
+
+static inline u32 get_x86_cpu_features(void)
+{
+ if (libdeflate_x86_cpu_features == 0)
+ libdeflate_init_x86_cpu_features();
+ return libdeflate_x86_cpu_features;
+}
+#else /* HAVE_DYNAMIC_X86_CPU_FEATURES */
+static inline u32 get_x86_cpu_features(void) { return 0; }
+#endif /* !HAVE_DYNAMIC_X86_CPU_FEATURES */
+
+/*
+ * Prior to gcc 4.9 (r200349) and clang 3.8 (r239883), x86 intrinsics not
+ * available in the main target couldn't be used in 'target' attribute
+ * functions. Unfortunately clang has no feature test macro for this, so we
+ * have to check its version.
+ */
+#if HAVE_DYNAMIC_X86_CPU_FEATURES && \
+ (GCC_PREREQ(4, 9) || CLANG_PREREQ(3, 8, 7030000) || defined(_MSC_VER))
+# define HAVE_TARGET_INTRINSICS 1
+#else
+# define HAVE_TARGET_INTRINSICS 0
+#endif
+
+/* SSE2 */
+#if defined(__SSE2__) || \
+ (defined(_MSC_VER) && \
+ (defined(ARCH_X86_64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 2)))
+# define HAVE_SSE2_NATIVE 1
+#else
+# define HAVE_SSE2_NATIVE 0
+#endif
+#define HAVE_SSE2_INTRIN (HAVE_SSE2_NATIVE || HAVE_TARGET_INTRINSICS)
+
+/* PCLMUL */
+#if defined(__PCLMUL__) || (defined(_MSC_VER) && defined(__AVX2__))
+# define HAVE_PCLMUL_NATIVE 1
+#else
+# define HAVE_PCLMUL_NATIVE 0
+#endif
+#if HAVE_PCLMUL_NATIVE || (HAVE_TARGET_INTRINSICS && \
+ (GCC_PREREQ(4, 4) || \
+ __has_builtin(__builtin_ia32_pclmulqdq128) || \
+ defined(_MSC_VER)))
+# define HAVE_PCLMUL_INTRIN 1
+#else
+# define HAVE_PCLMUL_INTRIN 0
+#endif
+
+/* AVX */
+#ifdef __AVX__
+# define HAVE_AVX_NATIVE 1
+#else
+# define HAVE_AVX_NATIVE 0
+#endif
+#if HAVE_AVX_NATIVE || (HAVE_TARGET_INTRINSICS && \
+ (GCC_PREREQ(4, 6) || \
+ __has_builtin(__builtin_ia32_maxps256) || \
+ defined(_MSC_VER)))
+# define HAVE_AVX_INTRIN 1
+#else
+# define HAVE_AVX_INTRIN 0
+#endif
+
+/* AVX2 */
+#ifdef __AVX2__
+# define HAVE_AVX2_NATIVE 1
+#else
+# define HAVE_AVX2_NATIVE 0
+#endif
+#if HAVE_AVX2_NATIVE || (HAVE_TARGET_INTRINSICS && \
+ (GCC_PREREQ(4, 7) || \
+ __has_builtin(__builtin_ia32_psadbw256) || \
+ defined(_MSC_VER)))
+# define HAVE_AVX2_INTRIN 1
+#else
+# define HAVE_AVX2_INTRIN 0
+#endif
+
+/* BMI2 */
+#if defined(__BMI2__) || (defined(_MSC_VER) && defined(__AVX2__))
+# define HAVE_BMI2_NATIVE 1
+#else
+# define HAVE_BMI2_NATIVE 0
+#endif
+#if HAVE_BMI2_NATIVE || (HAVE_TARGET_INTRINSICS && \
+ (GCC_PREREQ(4, 7) || \
+ __has_builtin(__builtin_ia32_pdep_di) || \
+ defined(_MSC_VER)))
+# define HAVE_BMI2_INTRIN 1
+#else
+# define HAVE_BMI2_INTRIN 0
+#endif
+
+#endif /* ARCH_X86_32 || ARCH_X86_64 */
+
+#endif /* LIB_X86_CPU_FEATURES_H */
diff --git a/tools/z64compress/src/enc/libdeflate/lib/x86/crc32_impl.h b/tools/z64compress/src/enc/libdeflate/lib/x86/crc32_impl.h
new file mode 100644
index 000000000..79cc7944e
--- /dev/null
+++ b/tools/z64compress/src/enc/libdeflate/lib/x86/crc32_impl.h
@@ -0,0 +1,96 @@
+/*
+ * x86/crc32_impl.h - x86 implementations of the gzip CRC-32 algorithm
+ *
+ * Copyright 2016 Eric Biggers
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef LIB_X86_CRC32_IMPL_H
+#define LIB_X86_CRC32_IMPL_H
+
+#include "cpu_features.h"
+
+/* PCLMUL implementation */
+#if HAVE_PCLMUL_INTRIN
+# define crc32_x86_pclmul crc32_x86_pclmul
+# define SUFFIX _pclmul
+# if HAVE_PCLMUL_NATIVE
+# define ATTRIBUTES
+# else
+# define ATTRIBUTES _target_attribute("pclmul")
+# endif
+# define FOLD_PARTIAL_VECS 0
+# include "crc32_pclmul_template.h"
+#endif
+
+/*
+ * PCLMUL/AVX implementation. This implementation has two benefits over the
+ * regular PCLMUL one. First, simply compiling against the AVX target can
+ * improve performance significantly (e.g. 10100 MB/s to 16700 MB/s on Skylake)
+ * without actually using any AVX intrinsics, probably due to the availability
+ * of non-destructive VEX-encoded instructions. Second, AVX support implies
+ * SSSE3 and SSE4.1 support, and we can use SSSE3 and SSE4.1 intrinsics for
+ * efficient handling of partial blocks. (We *could* compile a variant with
+ * PCLMUL+SSSE3+SSE4.1 w/o AVX, but for simplicity we don't currently bother.)
+ *
+ * FIXME: with MSVC, this isn't actually compiled with AVX code generation
+ * enabled yet. That would require that this be moved to its own .c file.
+ */
+#if HAVE_PCLMUL_INTRIN && HAVE_AVX_INTRIN
+# define crc32_x86_pclmul_avx crc32_x86_pclmul_avx
+# define SUFFIX _pclmul_avx
+# if HAVE_PCLMUL_NATIVE && HAVE_AVX_NATIVE
+# define ATTRIBUTES
+# else
+# define ATTRIBUTES _target_attribute("pclmul,avx")
+# endif
+# define FOLD_PARTIAL_VECS 1
+# include "crc32_pclmul_template.h"
+#endif
+
+/*
+ * If the best implementation is statically available, use it unconditionally.
+ * Otherwise choose the best implementation at runtime.
+ */
+#if defined(crc32_x86_pclmul_avx) && HAVE_PCLMUL_NATIVE && HAVE_AVX_NATIVE
+#define DEFAULT_IMPL crc32_x86_pclmul_avx
+#else
+static inline crc32_func_t
+arch_select_crc32_func(void)
+{
+ const u32 features MAYBE_UNUSED = get_x86_cpu_features();
+
+#ifdef crc32_x86_pclmul_avx
+ if (HAVE_PCLMUL(features) && HAVE_AVX(features))
+ return crc32_x86_pclmul_avx;
+#endif
+#ifdef crc32_x86_pclmul
+ if (HAVE_PCLMUL(features))
+ return crc32_x86_pclmul;
+#endif
+ return NULL;
+}
+#define arch_select_crc32_func arch_select_crc32_func
+#endif
+
+#endif /* LIB_X86_CRC32_IMPL_H */
diff --git a/tools/z64compress/src/enc/libdeflate/lib/x86/crc32_pclmul_template.h b/tools/z64compress/src/enc/libdeflate/lib/x86/crc32_pclmul_template.h
new file mode 100644
index 000000000..1d5782375
--- /dev/null
+++ b/tools/z64compress/src/enc/libdeflate/lib/x86/crc32_pclmul_template.h
@@ -0,0 +1,354 @@
+/*
+ * x86/crc32_pclmul_template.h - gzip CRC-32 with PCLMULQDQ instructions
+ *
+ * Copyright 2016 Eric Biggers
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * This file is a "template" for instantiating PCLMULQDQ-based crc32_x86
+ * functions. The "parameters" are:
+ *
+ * SUFFIX:
+ * Name suffix to append to all instantiated functions.
+ * ATTRIBUTES:
+ * Target function attributes to use.
+ * FOLD_PARTIAL_VECS:
+ * Use vector instructions to handle any partial blocks at the beginning
+ * and end, instead of falling back to scalar instructions for those parts.
+ * Requires SSSE3 and SSE4.1 intrinsics.
+ *
+ * The overall algorithm used is CRC folding with carryless multiplication
+ * instructions. Note that the x86 crc32 instruction cannot be used, as it is
+ * for a different polynomial, not the gzip one. For an explanation of CRC
+ * folding with carryless multiplication instructions, see
+ * scripts/gen_crc32_multipliers.c and the following paper:
+ *
+ * "Fast CRC Computation for Generic Polynomials Using PCLMULQDQ Instruction"
+ * https://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf
+ */
+
+#include
+/*
+ * With clang in MSVC compatibility mode, immintrin.h incorrectly skips
+ * including some sub-headers.
+ */
+#if defined(__clang__) && defined(_MSC_VER)
+# include
+# include
+# include
+#endif
+
+#undef fold_vec
+static forceinline ATTRIBUTES __m128i
+ADD_SUFFIX(fold_vec)(__m128i src, __m128i dst, __m128i /* __v2di */ multipliers)
+{
+ /*
+ * The immediate constant for PCLMULQDQ specifies which 64-bit halves of
+ * the 128-bit vectors to multiply:
+ *
+ * 0x00 means low halves (higher degree polynomial terms for us)
+ * 0x11 means high halves (lower degree polynomial terms for us)
+ */
+ dst = _mm_xor_si128(dst, _mm_clmulepi64_si128(src, multipliers, 0x00));
+ dst = _mm_xor_si128(dst, _mm_clmulepi64_si128(src, multipliers, 0x11));
+ return dst;
+}
+#define fold_vec ADD_SUFFIX(fold_vec)
+
+#if FOLD_PARTIAL_VECS
+/*
+ * Given v containing a 16-byte polynomial, and a pointer 'p' that points to the
+ * next '1 <= len <= 15' data bytes, rearrange the concatenation of v and the
+ * data into vectors x0 and x1 that contain 'len' bytes and 16 bytes,
+ * respectively. Then fold x0 into x1 and return the result. Assumes that
+ * 'p + len - 16' is in-bounds.
+ */
+#undef fold_partial_vec
+static forceinline ATTRIBUTES __m128i
+ADD_SUFFIX(fold_partial_vec)(__m128i v, const u8 *p, size_t len,
+ __m128i /* __v2du */ multipliers_1)
+{
+ /*
+ * pshufb(v, shift_tab[len..len+15]) left shifts v by 16-len bytes.
+ * pshufb(v, shift_tab[len+16..len+31]) right shifts v by len bytes.
+ */
+ static const u8 shift_tab[48] = {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ };
+ __m128i lshift = _mm_loadu_si128((const void *)&shift_tab[len]);
+ __m128i rshift = _mm_loadu_si128((const void *)&shift_tab[len + 16]);
+ __m128i x0, x1;
+
+ /* x0 = v left-shifted by '16 - len' bytes */
+ x0 = _mm_shuffle_epi8(v, lshift);
+
+ /*
+ * x1 = the last '16 - len' bytes from v (i.e. v right-shifted by 'len'
+ * bytes) followed by the remaining data.
+ */
+ x1 = _mm_blendv_epi8(_mm_shuffle_epi8(v, rshift),
+ _mm_loadu_si128((const void *)(p + len - 16)),
+ /* msb 0/1 of each byte selects byte from arg1/2 */
+ rshift);
+
+ return fold_vec(x0, x1, multipliers_1);
+}
+#define fold_partial_vec ADD_SUFFIX(fold_partial_vec)
+#endif /* FOLD_PARTIAL_VECS */
+
+static u32 ATTRIBUTES MAYBE_UNUSED
+ADD_SUFFIX(crc32_x86)(u32 crc, const u8 *p, size_t len)
+{
+ const __m128i /* __v2du */ multipliers_8 =
+ _mm_set_epi64x(CRC32_8VECS_MULT_2, CRC32_8VECS_MULT_1);
+ const __m128i /* __v2du */ multipliers_4 =
+ _mm_set_epi64x(CRC32_4VECS_MULT_2, CRC32_4VECS_MULT_1);
+ const __m128i /* __v2du */ multipliers_2 =
+ _mm_set_epi64x(CRC32_2VECS_MULT_2, CRC32_2VECS_MULT_1);
+ const __m128i /* __v2du */ multipliers_1 =
+ _mm_set_epi64x(CRC32_1VECS_MULT_2, CRC32_1VECS_MULT_1);
+ const __m128i /* __v2du */ final_multiplier =
+ _mm_set_epi64x(0, CRC32_FINAL_MULT);
+ const __m128i mask32 = _mm_set_epi32(0, 0, 0, 0xFFFFFFFF);
+ const __m128i /* __v2du */ barrett_reduction_constants =
+ _mm_set_epi64x(CRC32_BARRETT_CONSTANT_2,
+ CRC32_BARRETT_CONSTANT_1);
+ __m128i v0, v1, v2, v3, v4, v5, v6, v7;
+
+ /*
+ * There are two overall code paths. The first path supports all
+ * lengths, but is intended for short lengths; it uses unaligned loads
+ * and does at most 4-way folds. The second path only supports longer
+ * lengths, aligns the pointer in order to do aligned loads, and does up
+ * to 8-way folds. The length check below decides which path to take.
+ */
+ if (len < 1024) {
+ if (len < 16)
+ return crc32_slice1(crc, p, len);
+
+ v0 = _mm_xor_si128(_mm_loadu_si128((const void *)p),
+ _mm_cvtsi32_si128(crc));
+ p += 16;
+
+ if (len >= 64) {
+ v1 = _mm_loadu_si128((const void *)(p + 0));
+ v2 = _mm_loadu_si128((const void *)(p + 16));
+ v3 = _mm_loadu_si128((const void *)(p + 32));
+ p += 48;
+ while (len >= 64 + 64) {
+ v0 = fold_vec(v0, _mm_loadu_si128((const void *)(p + 0)),
+ multipliers_4);
+ v1 = fold_vec(v1, _mm_loadu_si128((const void *)(p + 16)),
+ multipliers_4);
+ v2 = fold_vec(v2, _mm_loadu_si128((const void *)(p + 32)),
+ multipliers_4);
+ v3 = fold_vec(v3, _mm_loadu_si128((const void *)(p + 48)),
+ multipliers_4);
+ p += 64;
+ len -= 64;
+ }
+ v0 = fold_vec(v0, v2, multipliers_2);
+ v1 = fold_vec(v1, v3, multipliers_2);
+ if (len & 32) {
+ v0 = fold_vec(v0, _mm_loadu_si128((const void *)(p + 0)),
+ multipliers_2);
+ v1 = fold_vec(v1, _mm_loadu_si128((const void *)(p + 16)),
+ multipliers_2);
+ p += 32;
+ }
+ v0 = fold_vec(v0, v1, multipliers_1);
+ if (len & 16) {
+ v0 = fold_vec(v0, _mm_loadu_si128((const void *)p),
+ multipliers_1);
+ p += 16;
+ }
+ } else {
+ if (len >= 32) {
+ v0 = fold_vec(v0, _mm_loadu_si128((const void *)p),
+ multipliers_1);
+ p += 16;
+ if (len >= 48) {
+ v0 = fold_vec(v0, _mm_loadu_si128((const void *)p),
+ multipliers_1);
+ p += 16;
+ }
+ }
+ }
+ } else {
+ const size_t align = -(uintptr_t)p & 15;
+ const __m128i *vp;
+
+ #if FOLD_PARTIAL_VECS
+ v0 = _mm_xor_si128(_mm_loadu_si128((const void *)p),
+ _mm_cvtsi32_si128(crc));
+ p += 16;
+ /* Align p to the next 16-byte boundary. */
+ if (align) {
+ v0 = fold_partial_vec(v0, p, align, multipliers_1);
+ p += align;
+ len -= align;
+ }
+ vp = (const __m128i *)p;
+ #else
+ /* Align p to the next 16-byte boundary. */
+ if (align) {
+ crc = crc32_slice1(crc, p, align);
+ p += align;
+ len -= align;
+ }
+ vp = (const __m128i *)p;
+ v0 = _mm_xor_si128(*vp++, _mm_cvtsi32_si128(crc));
+ #endif
+ v1 = *vp++;
+ v2 = *vp++;
+ v3 = *vp++;
+ v4 = *vp++;
+ v5 = *vp++;
+ v6 = *vp++;
+ v7 = *vp++;
+ do {
+ v0 = fold_vec(v0, *vp++, multipliers_8);
+ v1 = fold_vec(v1, *vp++, multipliers_8);
+ v2 = fold_vec(v2, *vp++, multipliers_8);
+ v3 = fold_vec(v3, *vp++, multipliers_8);
+ v4 = fold_vec(v4, *vp++, multipliers_8);
+ v5 = fold_vec(v5, *vp++, multipliers_8);
+ v6 = fold_vec(v6, *vp++, multipliers_8);
+ v7 = fold_vec(v7, *vp++, multipliers_8);
+ len -= 128;
+ } while (len >= 128 + 128);
+
+ v0 = fold_vec(v0, v4, multipliers_4);
+ v1 = fold_vec(v1, v5, multipliers_4);
+ v2 = fold_vec(v2, v6, multipliers_4);
+ v3 = fold_vec(v3, v7, multipliers_4);
+ if (len & 64) {
+ v0 = fold_vec(v0, *vp++, multipliers_4);
+ v1 = fold_vec(v1, *vp++, multipliers_4);
+ v2 = fold_vec(v2, *vp++, multipliers_4);
+ v3 = fold_vec(v3, *vp++, multipliers_4);
+ }
+
+ v0 = fold_vec(v0, v2, multipliers_2);
+ v1 = fold_vec(v1, v3, multipliers_2);
+ if (len & 32) {
+ v0 = fold_vec(v0, *vp++, multipliers_2);
+ v1 = fold_vec(v1, *vp++, multipliers_2);
+ }
+
+ v0 = fold_vec(v0, v1, multipliers_1);
+ if (len & 16)
+ v0 = fold_vec(v0, *vp++, multipliers_1);
+
+ p = (const u8 *)vp;
+ }
+ len &= 15;
+
+ /*
+ * If fold_partial_vec() is available, handle any remaining partial
+ * block now before reducing to 32 bits.
+ */
+#if FOLD_PARTIAL_VECS
+ if (len)
+ v0 = fold_partial_vec(v0, p, len, multipliers_1);
+#endif
+
+ /*
+ * Fold 128 => 96 bits. This also implicitly appends 32 zero bits,
+ * which is equivalent to multiplying by x^32. This is needed because
+ * the CRC is defined as M(x)*x^32 mod G(x), not just M(x) mod G(x).
+ */
+ v0 = _mm_xor_si128(_mm_srli_si128(v0, 8),
+ _mm_clmulepi64_si128(v0, multipliers_1, 0x10));
+
+ /* Fold 96 => 64 bits. */
+ v0 = _mm_xor_si128(_mm_srli_si128(v0, 4),
+ _mm_clmulepi64_si128(_mm_and_si128(v0, mask32),
+ final_multiplier, 0x00));
+
+ /*
+ * Reduce 64 => 32 bits using Barrett reduction.
+ *
+ * Let M(x) = A(x)*x^32 + B(x) be the remaining message. The goal is to
+ * compute R(x) = M(x) mod G(x). Since degree(B(x)) < degree(G(x)):
+ *
+ * R(x) = (A(x)*x^32 + B(x)) mod G(x)
+ * = (A(x)*x^32) mod G(x) + B(x)
+ *
+ * Then, by the Division Algorithm there exists a unique q(x) such that:
+ *
+ * A(x)*x^32 mod G(x) = A(x)*x^32 - q(x)*G(x)
+ *
+ * Since the left-hand side is of maximum degree 31, the right-hand side
+ * must be too. This implies that we can apply 'mod x^32' to the
+ * right-hand side without changing its value:
+ *
+ * (A(x)*x^32 - q(x)*G(x)) mod x^32 = q(x)*G(x) mod x^32
+ *
+ * Note that '+' is equivalent to '-' in polynomials over GF(2).
+ *
+ * We also know that:
+ *
+ * / A(x)*x^32 \
+ * q(x) = floor ( --------- )
+ * \ G(x) /
+ *
+ * To compute this efficiently, we can multiply the top and bottom by
+ * x^32 and move the division by G(x) to the top:
+ *
+ * / A(x) * floor(x^64 / G(x)) \
+ * q(x) = floor ( ------------------------- )
+ * \ x^32 /
+ *
+ * Note that floor(x^64 / G(x)) is a constant.
+ *
+ * So finally we have:
+ *
+ * / A(x) * floor(x^64 / G(x)) \
+ * R(x) = B(x) + G(x)*floor ( ------------------------- )
+ * \ x^32 /
+ */
+ v1 = _mm_clmulepi64_si128(_mm_and_si128(v0, mask32),
+ barrett_reduction_constants, 0x00);
+ v1 = _mm_clmulepi64_si128(_mm_and_si128(v1, mask32),
+ barrett_reduction_constants, 0x10);
+ v0 = _mm_xor_si128(v0, v1);
+#if FOLD_PARTIAL_VECS
+ crc = _mm_extract_epi32(v0, 1);
+#else
+ crc = _mm_cvtsi128_si32(_mm_shuffle_epi32(v0, 0x01));
+ /* Process up to 15 bytes left over at the end. */
+ crc = crc32_slice1(crc, p, len);
+#endif
+ return crc;
+}
+
+#undef SUFFIX
+#undef ATTRIBUTES
+#undef FOLD_PARTIAL_VECS
diff --git a/tools/z64compress/src/enc/libdeflate/lib/x86/decompress_impl.h b/tools/z64compress/src/enc/libdeflate/lib/x86/decompress_impl.h
new file mode 100644
index 000000000..3e2ec37e7
--- /dev/null
+++ b/tools/z64compress/src/enc/libdeflate/lib/x86/decompress_impl.h
@@ -0,0 +1,54 @@
+#ifndef LIB_X86_DECOMPRESS_IMPL_H
+#define LIB_X86_DECOMPRESS_IMPL_H
+
+#include "cpu_features.h"
+
+/*
+ * BMI2 optimized version
+ *
+ * FIXME: with MSVC, this isn't actually compiled with BMI2 code generation
+ * enabled yet. That would require that this be moved to its own .c file.
+ */
+#if HAVE_BMI2_INTRIN
+# define deflate_decompress_bmi2 deflate_decompress_bmi2
+# define FUNCNAME deflate_decompress_bmi2
+# if !HAVE_BMI2_NATIVE
+# define ATTRIBUTES _target_attribute("bmi2")
+# endif
+ /*
+ * Even with __attribute__((target("bmi2"))), gcc doesn't reliably use the
+ * bzhi instruction for 'word & BITMASK(count)'. So use the bzhi intrinsic
+ * explicitly. EXTRACT_VARBITS() is equivalent to 'word & BITMASK(count)';
+ * EXTRACT_VARBITS8() is equivalent to 'word & BITMASK((u8)count)'.
+ * Nevertheless, their implementation using the bzhi intrinsic is identical,
+ * as the bzhi instruction truncates the count to 8 bits implicitly.
+ */
+# ifndef __clang__
+# include
+# ifdef ARCH_X86_64
+# define EXTRACT_VARBITS(word, count) _bzhi_u64((word), (count))
+# define EXTRACT_VARBITS8(word, count) _bzhi_u64((word), (count))
+# else
+# define EXTRACT_VARBITS(word, count) _bzhi_u32((word), (count))
+# define EXTRACT_VARBITS8(word, count) _bzhi_u32((word), (count))
+# endif
+# endif
+# include "../decompress_template.h"
+#endif /* HAVE_BMI2_INTRIN */
+
+#if defined(deflate_decompress_bmi2) && HAVE_BMI2_NATIVE
+#define DEFAULT_IMPL deflate_decompress_bmi2
+#else
+static inline decompress_func_t
+arch_select_decompress_func(void)
+{
+#ifdef deflate_decompress_bmi2
+ if (HAVE_BMI2(get_x86_cpu_features()))
+ return deflate_decompress_bmi2;
+#endif
+ return NULL;
+}
+#define arch_select_decompress_func arch_select_decompress_func
+#endif
+
+#endif /* LIB_X86_DECOMPRESS_IMPL_H */
diff --git a/tools/z64compress/src/enc/libdeflate/lib/x86/matchfinder_impl.h b/tools/z64compress/src/enc/libdeflate/lib/x86/matchfinder_impl.h
new file mode 100644
index 000000000..8433b9b10
--- /dev/null
+++ b/tools/z64compress/src/enc/libdeflate/lib/x86/matchfinder_impl.h
@@ -0,0 +1,124 @@
+/*
+ * x86/matchfinder_impl.h - x86 implementations of matchfinder functions
+ *
+ * Copyright 2016 Eric Biggers
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef LIB_X86_MATCHFINDER_IMPL_H
+#define LIB_X86_MATCHFINDER_IMPL_H
+
+#include "cpu_features.h"
+
+#if HAVE_AVX2_NATIVE
+# include
+static forceinline void
+matchfinder_init_avx2(mf_pos_t *data, size_t size)
+{
+ __m256i *p = (__m256i *)data;
+ __m256i v = _mm256_set1_epi16(MATCHFINDER_INITVAL);
+
+ STATIC_ASSERT(MATCHFINDER_MEM_ALIGNMENT % sizeof(*p) == 0);
+ STATIC_ASSERT(MATCHFINDER_SIZE_ALIGNMENT % (4 * sizeof(*p)) == 0);
+ STATIC_ASSERT(sizeof(mf_pos_t) == 2);
+
+ do {
+ p[0] = v;
+ p[1] = v;
+ p[2] = v;
+ p[3] = v;
+ p += 4;
+ size -= 4 * sizeof(*p);
+ } while (size != 0);
+}
+#define matchfinder_init matchfinder_init_avx2
+
+static forceinline void
+matchfinder_rebase_avx2(mf_pos_t *data, size_t size)
+{
+ __m256i *p = (__m256i *)data;
+ __m256i v = _mm256_set1_epi16((u16)-MATCHFINDER_WINDOW_SIZE);
+
+ STATIC_ASSERT(MATCHFINDER_MEM_ALIGNMENT % sizeof(*p) == 0);
+ STATIC_ASSERT(MATCHFINDER_SIZE_ALIGNMENT % (4 * sizeof(*p)) == 0);
+ STATIC_ASSERT(sizeof(mf_pos_t) == 2);
+
+ do {
+ /* PADDSW: Add Packed Signed Integers With Signed Saturation */
+ p[0] = _mm256_adds_epi16(p[0], v);
+ p[1] = _mm256_adds_epi16(p[1], v);
+ p[2] = _mm256_adds_epi16(p[2], v);
+ p[3] = _mm256_adds_epi16(p[3], v);
+ p += 4;
+ size -= 4 * sizeof(*p);
+ } while (size != 0);
+}
+#define matchfinder_rebase matchfinder_rebase_avx2
+
+#elif HAVE_SSE2_NATIVE
+# include
+static forceinline void
+matchfinder_init_sse2(mf_pos_t *data, size_t size)
+{
+ __m128i *p = (__m128i *)data;
+ __m128i v = _mm_set1_epi16(MATCHFINDER_INITVAL);
+
+ STATIC_ASSERT(MATCHFINDER_MEM_ALIGNMENT % sizeof(*p) == 0);
+ STATIC_ASSERT(MATCHFINDER_SIZE_ALIGNMENT % (4 * sizeof(*p)) == 0);
+ STATIC_ASSERT(sizeof(mf_pos_t) == 2);
+
+ do {
+ p[0] = v;
+ p[1] = v;
+ p[2] = v;
+ p[3] = v;
+ p += 4;
+ size -= 4 * sizeof(*p);
+ } while (size != 0);
+}
+#define matchfinder_init matchfinder_init_sse2
+
+static forceinline void
+matchfinder_rebase_sse2(mf_pos_t *data, size_t size)
+{
+ __m128i *p = (__m128i *)data;
+ __m128i v = _mm_set1_epi16((u16)-MATCHFINDER_WINDOW_SIZE);
+
+ STATIC_ASSERT(MATCHFINDER_MEM_ALIGNMENT % sizeof(*p) == 0);
+ STATIC_ASSERT(MATCHFINDER_SIZE_ALIGNMENT % (4 * sizeof(*p)) == 0);
+ STATIC_ASSERT(sizeof(mf_pos_t) == 2);
+
+ do {
+ /* PADDSW: Add Packed Signed Integers With Signed Saturation */
+ p[0] = _mm_adds_epi16(p[0], v);
+ p[1] = _mm_adds_epi16(p[1], v);
+ p[2] = _mm_adds_epi16(p[2], v);
+ p[3] = _mm_adds_epi16(p[3], v);
+ p += 4;
+ size -= 4 * sizeof(*p);
+ } while (size != 0);
+}
+#define matchfinder_rebase matchfinder_rebase_sse2
+#endif /* HAVE_SSE2_NATIVE */
+
+#endif /* LIB_X86_MATCHFINDER_IMPL_H */
diff --git a/tools/z64compress/src/enc/libdeflate/lib/zlib_compress.c b/tools/z64compress/src/enc/libdeflate/lib/zlib_compress.c
new file mode 100644
index 000000000..4f9cc6f08
--- /dev/null
+++ b/tools/z64compress/src/enc/libdeflate/lib/zlib_compress.c
@@ -0,0 +1,84 @@
+/*
+ * zlib_compress.c - compress with a zlib wrapper
+ *
+ * Copyright 2016 Eric Biggers
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "deflate_compress.h"
+#include "zlib_constants.h"
+
+#include "libdeflate.h"
+
+LIBDEFLATEAPI size_t
+libdeflate_zlib_compress(struct libdeflate_compressor *c,
+ const void *in, size_t in_nbytes,
+ void *out, size_t out_nbytes_avail)
+{
+ u8 *out_next = out;
+ u16 hdr;
+ unsigned compression_level;
+ unsigned level_hint;
+ size_t deflate_size;
+
+ if (out_nbytes_avail <= ZLIB_MIN_OVERHEAD)
+ return 0;
+
+ /* 2 byte header: CMF and FLG */
+ hdr = (ZLIB_CM_DEFLATE << 8) | (ZLIB_CINFO_32K_WINDOW << 12);
+ compression_level = libdeflate_get_compression_level(c);
+ if (compression_level < 2)
+ level_hint = ZLIB_FASTEST_COMPRESSION;
+ else if (compression_level < 6)
+ level_hint = ZLIB_FAST_COMPRESSION;
+ else if (compression_level < 8)
+ level_hint = ZLIB_DEFAULT_COMPRESSION;
+ else
+ level_hint = ZLIB_SLOWEST_COMPRESSION;
+ hdr |= level_hint << 6;
+ hdr |= 31 - (hdr % 31);
+
+ put_unaligned_be16(hdr, out_next);
+ out_next += 2;
+
+ /* Compressed data */
+ deflate_size = libdeflate_deflate_compress(c, in, in_nbytes, out_next,
+ out_nbytes_avail - ZLIB_MIN_OVERHEAD);
+ if (deflate_size == 0)
+ return 0;
+ out_next += deflate_size;
+
+ /* ADLER32 */
+ put_unaligned_be32(libdeflate_adler32(1, in, in_nbytes), out_next);
+ out_next += 4;
+
+ return out_next - (u8 *)out;
+}
+
+LIBDEFLATEAPI size_t
+libdeflate_zlib_compress_bound(struct libdeflate_compressor *c,
+ size_t in_nbytes)
+{
+ return ZLIB_MIN_OVERHEAD +
+ libdeflate_deflate_compress_bound(c, in_nbytes);
+}
diff --git a/tools/z64compress/src/enc/libdeflate/lib/zlib_constants.h b/tools/z64compress/src/enc/libdeflate/lib/zlib_constants.h
new file mode 100644
index 000000000..f304310c7
--- /dev/null
+++ b/tools/z64compress/src/enc/libdeflate/lib/zlib_constants.h
@@ -0,0 +1,21 @@
+/*
+ * zlib_constants.h - constants for the zlib wrapper format
+ */
+
+#ifndef LIB_ZLIB_CONSTANTS_H
+#define LIB_ZLIB_CONSTANTS_H
+
+#define ZLIB_MIN_HEADER_SIZE 2
+#define ZLIB_FOOTER_SIZE 4
+#define ZLIB_MIN_OVERHEAD (ZLIB_MIN_HEADER_SIZE + ZLIB_FOOTER_SIZE)
+
+#define ZLIB_CM_DEFLATE 8
+
+#define ZLIB_CINFO_32K_WINDOW 7
+
+#define ZLIB_FASTEST_COMPRESSION 0
+#define ZLIB_FAST_COMPRESSION 1
+#define ZLIB_DEFAULT_COMPRESSION 2
+#define ZLIB_SLOWEST_COMPRESSION 3
+
+#endif /* LIB_ZLIB_CONSTANTS_H */
diff --git a/tools/z64compress/src/enc/libdeflate/lib/zlib_decompress.c b/tools/z64compress/src/enc/libdeflate/lib/zlib_decompress.c
new file mode 100644
index 000000000..b7b3b1f95
--- /dev/null
+++ b/tools/z64compress/src/enc/libdeflate/lib/zlib_decompress.c
@@ -0,0 +1,106 @@
+/*
+ * zlib_decompress.c - decompress with a zlib wrapper
+ *
+ * Copyright 2016 Eric Biggers
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "lib_common.h"
+#include "zlib_constants.h"
+
+#include "libdeflate.h"
+
+LIBDEFLATEAPI enum libdeflate_result
+libdeflate_zlib_decompress_ex(struct libdeflate_decompressor *d,
+ const void *in, size_t in_nbytes,
+ void *out, size_t out_nbytes_avail,
+ size_t *actual_in_nbytes_ret,
+ size_t *actual_out_nbytes_ret)
+{
+ const u8 *in_next = in;
+ const u8 * const in_end = in_next + in_nbytes;
+ u16 hdr;
+ size_t actual_in_nbytes;
+ size_t actual_out_nbytes;
+ enum libdeflate_result result;
+
+ if (in_nbytes < ZLIB_MIN_OVERHEAD)
+ return LIBDEFLATE_BAD_DATA;
+
+ /* 2 byte header: CMF and FLG */
+ hdr = get_unaligned_be16(in_next);
+ in_next += 2;
+
+ /* FCHECK */
+ if ((hdr % 31) != 0)
+ return LIBDEFLATE_BAD_DATA;
+
+ /* CM */
+ if (((hdr >> 8) & 0xF) != ZLIB_CM_DEFLATE)
+ return LIBDEFLATE_BAD_DATA;
+
+ /* CINFO */
+ if ((hdr >> 12) > ZLIB_CINFO_32K_WINDOW)
+ return LIBDEFLATE_BAD_DATA;
+
+ /* FDICT */
+ if ((hdr >> 5) & 1)
+ return LIBDEFLATE_BAD_DATA;
+
+ /* Compressed data */
+ result = libdeflate_deflate_decompress_ex(d, in_next,
+ in_end - ZLIB_FOOTER_SIZE - in_next,
+ out, out_nbytes_avail,
+ &actual_in_nbytes, actual_out_nbytes_ret);
+ if (result != LIBDEFLATE_SUCCESS)
+ return result;
+
+ if (actual_out_nbytes_ret)
+ actual_out_nbytes = *actual_out_nbytes_ret;
+ else
+ actual_out_nbytes = out_nbytes_avail;
+
+ in_next += actual_in_nbytes;
+
+ /* ADLER32 */
+ if (libdeflate_adler32(1, out, actual_out_nbytes) !=
+ get_unaligned_be32(in_next))
+ return LIBDEFLATE_BAD_DATA;
+ in_next += 4;
+
+ if (actual_in_nbytes_ret)
+ *actual_in_nbytes_ret = in_next - (u8 *)in;
+
+ return LIBDEFLATE_SUCCESS;
+}
+
+LIBDEFLATEAPI enum libdeflate_result
+libdeflate_zlib_decompress(struct libdeflate_decompressor *d,
+ const void *in, size_t in_nbytes,
+ void *out, size_t out_nbytes_avail,
+ size_t *actual_out_nbytes_ret)
+{
+ return libdeflate_zlib_decompress_ex(d, in, in_nbytes,
+ out, out_nbytes_avail,
+ NULL, actual_out_nbytes_ret);
+}
diff --git a/tools/z64compress/src/enc/libdeflate/libdeflate-config.cmake.in b/tools/z64compress/src/enc/libdeflate/libdeflate-config.cmake.in
new file mode 100644
index 000000000..747799df9
--- /dev/null
+++ b/tools/z64compress/src/enc/libdeflate/libdeflate-config.cmake.in
@@ -0,0 +1,3 @@
+@PACKAGE_INIT@
+
+include("${CMAKE_CURRENT_LIST_DIR}/libdeflate-targets.cmake")
diff --git a/tools/z64compress/src/enc/libdeflate/libdeflate.h b/tools/z64compress/src/enc/libdeflate/libdeflate.h
new file mode 100644
index 000000000..f26087597
--- /dev/null
+++ b/tools/z64compress/src/enc/libdeflate/libdeflate.h
@@ -0,0 +1,368 @@
+/*
+ * libdeflate.h - public header for libdeflate
+ */
+
+#ifndef LIBDEFLATE_H
+#define LIBDEFLATE_H
+
+#include
+#include
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define LIBDEFLATE_VERSION_MAJOR 1
+#define LIBDEFLATE_VERSION_MINOR 15
+#define LIBDEFLATE_VERSION_STRING "1.15"
+
+/*
+ * Users of libdeflate.dll on Windows can define LIBDEFLATE_DLL to cause
+ * __declspec(dllimport) to be used. This should be done when it's easy to do.
+ * Otherwise it's fine to skip it, since it is a very minor performance
+ * optimization that is irrelevant for most use cases of libdeflate.
+ */
+#ifndef LIBDEFLATEAPI
+# if defined(LIBDEFLATE_DLL) && (defined(_WIN32) || defined(__CYGWIN__))
+# define LIBDEFLATEAPI __declspec(dllimport)
+# else
+# define LIBDEFLATEAPI
+# endif
+#endif
+
+/* ========================================================================== */
+/* Compression */
+/* ========================================================================== */
+
+struct libdeflate_compressor;
+
+/*
+ * libdeflate_alloc_compressor() allocates a new compressor that supports
+ * DEFLATE, zlib, and gzip compression. 'compression_level' is the compression
+ * level on a zlib-like scale but with a higher maximum value (1 = fastest, 6 =
+ * medium/default, 9 = slow, 12 = slowest). Level 0 is also supported and means
+ * "no compression", specifically "create a valid stream, but only emit
+ * uncompressed blocks" (this will expand the data slightly).
+ *
+ * The return value is a pointer to the new compressor, or NULL if out of memory
+ * or if the compression level is invalid (i.e. outside the range [0, 12]).
+ *
+ * Note: for compression, the sliding window size is defined at compilation time
+ * to 32768, the largest size permissible in the DEFLATE format. It cannot be
+ * changed at runtime.
+ *
+ * A single compressor is not safe to use by multiple threads concurrently.
+ * However, different threads may use different compressors concurrently.
+ */
+LIBDEFLATEAPI struct libdeflate_compressor *
+libdeflate_alloc_compressor(int compression_level);
+
+/*
+ * libdeflate_deflate_compress() performs raw DEFLATE compression on a buffer of
+ * data. It attempts to compress 'in_nbytes' bytes of data located at 'in' and
+ * write the result to 'out', which has space for 'out_nbytes_avail' bytes. The
+ * return value is the compressed size in bytes, or 0 if the data could not be
+ * compressed to 'out_nbytes_avail' bytes or fewer (but see note below).
+ *
+ * If compression is successful, then the output data is guaranteed to be a
+ * valid DEFLATE stream that decompresses to the input data. No other
+ * guarantees are made about the output data. Notably, different versions of
+ * libdeflate can produce different compressed data for the same uncompressed
+ * data, even at the same compression level. Do ***NOT*** do things like
+ * writing tests that compare compressed data to a golden output, as this can
+ * break when libdeflate is updated. (This property isn't specific to
+ * libdeflate; the same is true for zlib and other compression libraries too.)
+ *
+ * Note: due to a performance optimization, libdeflate_deflate_compress()
+ * currently needs a small amount of slack space at the end of the output
+ * buffer. As a result, it can't actually report compressed sizes very close to
+ * 'out_nbytes_avail'. This doesn't matter in real-world use cases, and
+ * libdeflate_deflate_compress_bound() already includes the slack space.
+ * However, it does mean that testing code that redundantly compresses data
+ * using an exact-sized output buffer won't work as might be expected:
+ *
+ * out_nbytes = libdeflate_deflate_compress(c, in, in_nbytes, out,
+ * libdeflate_deflate_compress_bound(in_nbytes));
+ * // The following assertion will fail.
+ * assert(libdeflate_deflate_compress(c, in, in_nbytes, out, out_nbytes) != 0);
+ *
+ * To avoid this, either don't write tests like the above, or make sure to
+ * include at least 9 bytes of slack space in 'out_nbytes_avail'.
+ */
+LIBDEFLATEAPI size_t
+libdeflate_deflate_compress(struct libdeflate_compressor *compressor,
+ const void *in, size_t in_nbytes,
+ void *out, size_t out_nbytes_avail);
+
+/*
+ * libdeflate_deflate_compress_bound() returns a worst-case upper bound on the
+ * number of bytes of compressed data that may be produced by compressing any
+ * buffer of length less than or equal to 'in_nbytes' using
+ * libdeflate_deflate_compress() with the specified compressor. This bound will
+ * necessarily be a number greater than or equal to 'in_nbytes'. It may be an
+ * overestimate of the true upper bound. The return value is guaranteed to be
+ * the same for all invocations with the same compressor and same 'in_nbytes'.
+ *
+ * As a special case, 'compressor' may be NULL. This causes the bound to be
+ * taken across *any* libdeflate_compressor that could ever be allocated with
+ * this build of the library, with any options.
+ *
+ * Note that this function is not necessary in many applications. With
+ * block-based compression, it is usually preferable to separately store the
+ * uncompressed size of each block and to store any blocks that did not compress
+ * to less than their original size uncompressed. In that scenario, there is no
+ * need to know the worst-case compressed size, since the maximum number of
+ * bytes of compressed data that may be used would always be one less than the
+ * input length. You can just pass a buffer of that size to
+ * libdeflate_deflate_compress() and store the data uncompressed if
+ * libdeflate_deflate_compress() returns 0, indicating that the compressed data
+ * did not fit into the provided output buffer.
+ */
+LIBDEFLATEAPI size_t
+libdeflate_deflate_compress_bound(struct libdeflate_compressor *compressor,
+ size_t in_nbytes);
+
+/*
+ * Like libdeflate_deflate_compress(), but uses the zlib wrapper format instead
+ * of raw DEFLATE.
+ */
+LIBDEFLATEAPI size_t
+libdeflate_zlib_compress(struct libdeflate_compressor *compressor,
+ const void *in, size_t in_nbytes,
+ void *out, size_t out_nbytes_avail);
+
+/*
+ * Like libdeflate_deflate_compress_bound(), but assumes the data will be
+ * compressed with libdeflate_zlib_compress() rather than with
+ * libdeflate_deflate_compress().
+ */
+LIBDEFLATEAPI size_t
+libdeflate_zlib_compress_bound(struct libdeflate_compressor *compressor,
+ size_t in_nbytes);
+
+/*
+ * Like libdeflate_deflate_compress(), but uses the gzip wrapper format instead
+ * of raw DEFLATE.
+ */
+LIBDEFLATEAPI size_t
+libdeflate_gzip_compress(struct libdeflate_compressor *compressor,
+ const void *in, size_t in_nbytes,
+ void *out, size_t out_nbytes_avail);
+
+/*
+ * Like libdeflate_deflate_compress_bound(), but assumes the data will be
+ * compressed with libdeflate_gzip_compress() rather than with
+ * libdeflate_deflate_compress().
+ */
+LIBDEFLATEAPI size_t
+libdeflate_gzip_compress_bound(struct libdeflate_compressor *compressor,
+ size_t in_nbytes);
+
+/*
+ * libdeflate_free_compressor() frees a compressor that was allocated with
+ * libdeflate_alloc_compressor(). If a NULL pointer is passed in, no action is
+ * taken.
+ */
+LIBDEFLATEAPI void
+libdeflate_free_compressor(struct libdeflate_compressor *compressor);
+
+/* ========================================================================== */
+/* Decompression */
+/* ========================================================================== */
+
+struct libdeflate_decompressor;
+
+/*
+ * libdeflate_alloc_decompressor() allocates a new decompressor that can be used
+ * for DEFLATE, zlib, and gzip decompression. The return value is a pointer to
+ * the new decompressor, or NULL if out of memory.
+ *
+ * This function takes no parameters, and the returned decompressor is valid for
+ * decompressing data that was compressed at any compression level and with any
+ * sliding window size.
+ *
+ * A single decompressor is not safe to use by multiple threads concurrently.
+ * However, different threads may use different decompressors concurrently.
+ */
+LIBDEFLATEAPI struct libdeflate_decompressor *
+libdeflate_alloc_decompressor(void);
+
+/*
+ * Result of a call to libdeflate_deflate_decompress(),
+ * libdeflate_zlib_decompress(), or libdeflate_gzip_decompress().
+ */
+enum libdeflate_result {
+ /* Decompression was successful. */
+ LIBDEFLATE_SUCCESS = 0,
+
+ /* Decompression failed because the compressed data was invalid,
+ * corrupt, or otherwise unsupported. */
+ LIBDEFLATE_BAD_DATA = 1,
+
+ /* A NULL 'actual_out_nbytes_ret' was provided, but the data would have
+ * decompressed to fewer than 'out_nbytes_avail' bytes. */
+ LIBDEFLATE_SHORT_OUTPUT = 2,
+
+ /* The data would have decompressed to more than 'out_nbytes_avail'
+ * bytes. */
+ LIBDEFLATE_INSUFFICIENT_SPACE = 3,
+};
+
+/*
+ * libdeflate_deflate_decompress() decompresses a DEFLATE stream from the buffer
+ * 'in' with compressed size up to 'in_nbytes' bytes. The uncompressed data is
+ * written to 'out', a buffer with size 'out_nbytes_avail' bytes. If
+ * decompression succeeds, then 0 (LIBDEFLATE_SUCCESS) is returned. Otherwise,
+ * a nonzero result code such as LIBDEFLATE_BAD_DATA is returned, and the
+ * contents of the output buffer are undefined.
+ *
+ * Decompression stops at the end of the DEFLATE stream (as indicated by the
+ * BFINAL flag), even if it is actually shorter than 'in_nbytes' bytes.
+ *
+ * libdeflate_deflate_decompress() can be used in cases where the actual
+ * uncompressed size is known (recommended) or unknown (not recommended):
+ *
+ * - If the actual uncompressed size is known, then pass the actual
+ * uncompressed size as 'out_nbytes_avail' and pass NULL for
+ * 'actual_out_nbytes_ret'. This makes libdeflate_deflate_decompress() fail
+ * with LIBDEFLATE_SHORT_OUTPUT if the data decompressed to fewer than the
+ * specified number of bytes.
+ *
+ * - If the actual uncompressed size is unknown, then provide a non-NULL
+ * 'actual_out_nbytes_ret' and provide a buffer with some size
+ * 'out_nbytes_avail' that you think is large enough to hold all the
+ * uncompressed data. In this case, if the data decompresses to less than
+ * or equal to 'out_nbytes_avail' bytes, then
+ * libdeflate_deflate_decompress() will write the actual uncompressed size
+ * to *actual_out_nbytes_ret and return 0 (LIBDEFLATE_SUCCESS). Otherwise,
+ * it will return LIBDEFLATE_INSUFFICIENT_SPACE if the provided buffer was
+ * not large enough but no other problems were encountered, or another
+ * nonzero result code if decompression failed for another reason.
+ */
+LIBDEFLATEAPI enum libdeflate_result
+libdeflate_deflate_decompress(struct libdeflate_decompressor *decompressor,
+ const void *in, size_t in_nbytes,
+ void *out, size_t out_nbytes_avail,
+ size_t *actual_out_nbytes_ret);
+
+/*
+ * Like libdeflate_deflate_decompress(), but adds the 'actual_in_nbytes_ret'
+ * argument. If decompression succeeds and 'actual_in_nbytes_ret' is not NULL,
+ * then the actual compressed size of the DEFLATE stream (aligned to the next
+ * byte boundary) is written to *actual_in_nbytes_ret.
+ */
+LIBDEFLATEAPI enum libdeflate_result
+libdeflate_deflate_decompress_ex(struct libdeflate_decompressor *decompressor,
+ const void *in, size_t in_nbytes,
+ void *out, size_t out_nbytes_avail,
+ size_t *actual_in_nbytes_ret,
+ size_t *actual_out_nbytes_ret);
+
+/*
+ * Like libdeflate_deflate_decompress(), but assumes the zlib wrapper format
+ * instead of raw DEFLATE.
+ *
+ * Decompression will stop at the end of the zlib stream, even if it is shorter
+ * than 'in_nbytes'. If you need to know exactly where the zlib stream ended,
+ * use libdeflate_zlib_decompress_ex().
+ */
+LIBDEFLATEAPI enum libdeflate_result
+libdeflate_zlib_decompress(struct libdeflate_decompressor *decompressor,
+ const void *in, size_t in_nbytes,
+ void *out, size_t out_nbytes_avail,
+ size_t *actual_out_nbytes_ret);
+
+/*
+ * Like libdeflate_zlib_decompress(), but adds the 'actual_in_nbytes_ret'
+ * argument. If 'actual_in_nbytes_ret' is not NULL and the decompression
+ * succeeds (indicating that the first zlib-compressed stream in the input
+ * buffer was decompressed), then the actual number of input bytes consumed is
+ * written to *actual_in_nbytes_ret.
+ */
+LIBDEFLATEAPI enum libdeflate_result
+libdeflate_zlib_decompress_ex(struct libdeflate_decompressor *decompressor,
+ const void *in, size_t in_nbytes,
+ void *out, size_t out_nbytes_avail,
+ size_t *actual_in_nbytes_ret,
+ size_t *actual_out_nbytes_ret);
+
+/*
+ * Like libdeflate_deflate_decompress(), but assumes the gzip wrapper format
+ * instead of raw DEFLATE.
+ *
+ * If multiple gzip-compressed members are concatenated, then only the first
+ * will be decompressed. Use libdeflate_gzip_decompress_ex() if you need
+ * multi-member support.
+ */
+LIBDEFLATEAPI enum libdeflate_result
+libdeflate_gzip_decompress(struct libdeflate_decompressor *decompressor,
+ const void *in, size_t in_nbytes,
+ void *out, size_t out_nbytes_avail,
+ size_t *actual_out_nbytes_ret);
+
+/*
+ * Like libdeflate_gzip_decompress(), but adds the 'actual_in_nbytes_ret'
+ * argument. If 'actual_in_nbytes_ret' is not NULL and the decompression
+ * succeeds (indicating that the first gzip-compressed member in the input
+ * buffer was decompressed), then the actual number of input bytes consumed is
+ * written to *actual_in_nbytes_ret.
+ */
+LIBDEFLATEAPI enum libdeflate_result
+libdeflate_gzip_decompress_ex(struct libdeflate_decompressor *decompressor,
+ const void *in, size_t in_nbytes,
+ void *out, size_t out_nbytes_avail,
+ size_t *actual_in_nbytes_ret,
+ size_t *actual_out_nbytes_ret);
+
+/*
+ * libdeflate_free_decompressor() frees a decompressor that was allocated with
+ * libdeflate_alloc_decompressor(). If a NULL pointer is passed in, no action
+ * is taken.
+ */
+LIBDEFLATEAPI void
+libdeflate_free_decompressor(struct libdeflate_decompressor *decompressor);
+
+/* ========================================================================== */
+/* Checksums */
+/* ========================================================================== */
+
+/*
+ * libdeflate_adler32() updates a running Adler-32 checksum with 'len' bytes of
+ * data and returns the updated checksum. When starting a new checksum, the
+ * required initial value for 'adler' is 1. This value is also returned when
+ * 'buffer' is specified as NULL.
+ */
+LIBDEFLATEAPI uint32_t
+libdeflate_adler32(uint32_t adler, const void *buffer, size_t len);
+
+
+/*
+ * libdeflate_crc32() updates a running CRC-32 checksum with 'len' bytes of data
+ * and returns the updated checksum. When starting a new checksum, the required
+ * initial value for 'crc' is 0. This value is also returned when 'buffer' is
+ * specified as NULL.
+ */
+LIBDEFLATEAPI uint32_t
+libdeflate_crc32(uint32_t crc, const void *buffer, size_t len);
+
+/* ========================================================================== */
+/* Custom memory allocator */
+/* ========================================================================== */
+
+/*
+ * Install a custom memory allocator which libdeflate will use for all memory
+ * allocations. 'malloc_func' is a function that must behave like malloc(), and
+ * 'free_func' is a function that must behave like free().
+ *
+ * There must not be any libdeflate_compressor or libdeflate_decompressor
+ * structures in existence when calling this function.
+ */
+LIBDEFLATEAPI void
+libdeflate_set_memory_allocator(void *(*malloc_func)(size_t),
+ void (*free_func)(void *));
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* LIBDEFLATE_H */
diff --git a/tools/z64compress/src/enc/libdeflate/libdeflate.pc.in b/tools/z64compress/src/enc/libdeflate/libdeflate.pc.in
new file mode 100644
index 000000000..b8ced3c69
--- /dev/null
+++ b/tools/z64compress/src/enc/libdeflate/libdeflate.pc.in
@@ -0,0 +1,18 @@
+prefix=@CMAKE_INSTALL_PREFIX@
+exec_prefix=${prefix}
+includedir=@CMAKE_PKGCONFIG_INCLUDEDIR@
+libdir=@CMAKE_PKGCONFIG_LIBDIR@
+
+Name: libdeflate
+Description: Fast implementation of DEFLATE, zlib, and gzip
+Version: @PROJECT_VERSION@
+Libs: -L${libdir} -ldeflate
+Cflags: -I${includedir}
+
+# Note: this library's public header allows LIBDEFLATE_DLL to be defined when
+# linking to the DLL on Windows, to make __declspec(dllimport) be used.
+# However, the only way to define a shared-library-only flag in a pkgconfig file
+# is to use the weird workaround of unconditionally defining it in Cflags, then
+# undefining it in Cflags.private. Just don't bother with this, since
+# __declspec(dllimport) is optional anyway. It is a very minor performance
+# optimization that is irrelevant for most use cases of libdeflate.
diff --git a/tools/z64compress/src/enc/libdeflate/programs/benchmark.c b/tools/z64compress/src/enc/libdeflate/programs/benchmark.c
new file mode 100644
index 000000000..52af8dafc
--- /dev/null
+++ b/tools/z64compress/src/enc/libdeflate/programs/benchmark.c
@@ -0,0 +1,696 @@
+/*
+ * benchmark.c - a compression testing and benchmark program
+ *
+ * Copyright 2016 Eric Biggers
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "test_util.h"
+
+static const tchar *const optstring = T("0::1::2::3::4::5::6::7::8::9::C:D:eghs:VYZz");
+
+enum format {
+ DEFLATE_FORMAT,
+ ZLIB_FORMAT,
+ GZIP_FORMAT,
+};
+
+struct compressor {
+ int level;
+ enum format format;
+ const struct engine *engine;
+ void *private;
+};
+
+struct decompressor {
+ enum format format;
+ const struct engine *engine;
+ void *private;
+};
+
+struct engine {
+ const tchar *name;
+
+ bool (*init_compressor)(struct compressor *);
+ size_t (*compress_bound)(struct compressor *, size_t);
+ size_t (*compress)(struct compressor *, const void *, size_t,
+ void *, size_t);
+ void (*destroy_compressor)(struct compressor *);
+
+ bool (*init_decompressor)(struct decompressor *);
+ bool (*decompress)(struct decompressor *, const void *, size_t,
+ void *, size_t);
+ void (*destroy_decompressor)(struct decompressor *);
+};
+
+/******************************************************************************/
+
+static bool
+libdeflate_engine_init_compressor(struct compressor *c)
+{
+ c->private = alloc_compressor(c->level);
+ return c->private != NULL;
+}
+
+static size_t
+libdeflate_engine_compress_bound(struct compressor *c, size_t in_nbytes)
+{
+ switch (c->format) {
+ case ZLIB_FORMAT:
+ return libdeflate_zlib_compress_bound(c->private, in_nbytes);
+ case GZIP_FORMAT:
+ return libdeflate_gzip_compress_bound(c->private, in_nbytes);
+ default:
+ return libdeflate_deflate_compress_bound(c->private, in_nbytes);
+ }
+}
+
+static size_t
+libdeflate_engine_compress(struct compressor *c, const void *in,
+ size_t in_nbytes, void *out, size_t out_nbytes_avail)
+{
+ switch (c->format) {
+ case ZLIB_FORMAT:
+ return libdeflate_zlib_compress(c->private, in, in_nbytes,
+ out, out_nbytes_avail);
+ case GZIP_FORMAT:
+ return libdeflate_gzip_compress(c->private, in, in_nbytes,
+ out, out_nbytes_avail);
+ default:
+ return libdeflate_deflate_compress(c->private, in, in_nbytes,
+ out, out_nbytes_avail);
+ }
+}
+
+static void
+libdeflate_engine_destroy_compressor(struct compressor *c)
+{
+ libdeflate_free_compressor(c->private);
+}
+
+static bool
+libdeflate_engine_init_decompressor(struct decompressor *d)
+{
+ d->private = alloc_decompressor();
+ return d->private != NULL;
+}
+
+static bool
+libdeflate_engine_decompress(struct decompressor *d, const void *in,
+ size_t in_nbytes, void *out, size_t out_nbytes)
+{
+ switch (d->format) {
+ case ZLIB_FORMAT:
+ return !libdeflate_zlib_decompress(d->private, in, in_nbytes,
+ out, out_nbytes, NULL);
+ case GZIP_FORMAT:
+ return !libdeflate_gzip_decompress(d->private, in, in_nbytes,
+ out, out_nbytes, NULL);
+ default:
+ return !libdeflate_deflate_decompress(d->private, in, in_nbytes,
+ out, out_nbytes, NULL);
+ }
+}
+
+static void
+libdeflate_engine_destroy_decompressor(struct decompressor *d)
+{
+ libdeflate_free_decompressor(d->private);
+}
+
+static const struct engine libdeflate_engine = {
+ .name = T("libdeflate"),
+
+ .init_compressor = libdeflate_engine_init_compressor,
+ .compress_bound = libdeflate_engine_compress_bound,
+ .compress = libdeflate_engine_compress,
+ .destroy_compressor = libdeflate_engine_destroy_compressor,
+
+ .init_decompressor = libdeflate_engine_init_decompressor,
+ .decompress = libdeflate_engine_decompress,
+ .destroy_decompressor = libdeflate_engine_destroy_decompressor,
+};
+
+/******************************************************************************/
+
+static int
+get_libz_window_bits(enum format format)
+{
+ const int windowBits = 15;
+ switch (format) {
+ case ZLIB_FORMAT:
+ return windowBits;
+ case GZIP_FORMAT:
+ return windowBits + 16;
+ default:
+ return -windowBits;
+ }
+}
+
+static bool
+libz_engine_init_compressor(struct compressor *c)
+{
+ z_stream *z;
+
+ if (c->level > 9) {
+ msg("libz only supports up to compression level 9");
+ return false;
+ }
+
+ z = xmalloc(sizeof(*z));
+ if (z == NULL)
+ return false;
+
+ z->next_in = NULL;
+ z->avail_in = 0;
+ z->zalloc = NULL;
+ z->zfree = NULL;
+ z->opaque = NULL;
+ if (deflateInit2(z, c->level, Z_DEFLATED,
+ get_libz_window_bits(c->format),
+ 8, Z_DEFAULT_STRATEGY) != Z_OK)
+ {
+ msg("unable to initialize deflater");
+ free(z);
+ return false;
+ }
+
+ c->private = z;
+ return true;
+}
+
+static size_t
+libz_engine_compress_bound(struct compressor *c, size_t in_nbytes)
+{
+ return deflateBound(c->private, in_nbytes);
+}
+
+static size_t
+libz_engine_compress(struct compressor *c, const void *in, size_t in_nbytes,
+ void *out, size_t out_nbytes_avail)
+{
+ z_stream *z = c->private;
+
+ deflateReset(z);
+
+ z->next_in = (void *)in;
+ z->avail_in = in_nbytes;
+ z->next_out = out;
+ z->avail_out = out_nbytes_avail;
+
+ if (deflate(z, Z_FINISH) != Z_STREAM_END)
+ return 0;
+
+ return out_nbytes_avail - z->avail_out;
+}
+
+static void
+libz_engine_destroy_compressor(struct compressor *c)
+{
+ z_stream *z = c->private;
+
+ deflateEnd(z);
+ free(z);
+}
+
+static bool
+libz_engine_init_decompressor(struct decompressor *d)
+{
+ z_stream *z;
+
+ z = xmalloc(sizeof(*z));
+ if (z == NULL)
+ return false;
+
+ z->next_in = NULL;
+ z->avail_in = 0;
+ z->zalloc = NULL;
+ z->zfree = NULL;
+ z->opaque = NULL;
+ if (inflateInit2(z, get_libz_window_bits(d->format)) != Z_OK) {
+ msg("unable to initialize inflater");
+ free(z);
+ return false;
+ }
+
+ d->private = z;
+ return true;
+}
+
+static bool
+libz_engine_decompress(struct decompressor *d, const void *in, size_t in_nbytes,
+ void *out, size_t out_nbytes)
+{
+ z_stream *z = d->private;
+
+ inflateReset(z);
+
+ z->next_in = (void *)in;
+ z->avail_in = in_nbytes;
+ z->next_out = out;
+ z->avail_out = out_nbytes;
+
+ return inflate(z, Z_FINISH) == Z_STREAM_END && z->avail_out == 0;
+}
+
+static void
+libz_engine_destroy_decompressor(struct decompressor *d)
+{
+ z_stream *z = d->private;
+
+ inflateEnd(z);
+ free(z);
+}
+
+static const struct engine libz_engine = {
+ .name = T("libz"),
+
+ .init_compressor = libz_engine_init_compressor,
+ .compress_bound = libz_engine_compress_bound,
+ .compress = libz_engine_compress,
+ .destroy_compressor = libz_engine_destroy_compressor,
+
+ .init_decompressor = libz_engine_init_decompressor,
+ .decompress = libz_engine_decompress,
+ .destroy_decompressor = libz_engine_destroy_decompressor,
+};
+
+/******************************************************************************/
+
+static const struct engine * const all_engines[] = {
+ &libdeflate_engine,
+ &libz_engine,
+};
+
+#define DEFAULT_ENGINE libdeflate_engine
+
+static const struct engine *
+name_to_engine(const tchar *name)
+{
+ size_t i;
+
+ for (i = 0; i < ARRAY_LEN(all_engines); i++)
+ if (tstrcmp(all_engines[i]->name, name) == 0)
+ return all_engines[i];
+ return NULL;
+}
+
+/******************************************************************************/
+
+static bool
+compressor_init(struct compressor *c, int level, enum format format,
+ const struct engine *engine)
+{
+ c->level = level;
+ c->format = format;
+ c->engine = engine;
+ return engine->init_compressor(c);
+}
+
+static size_t
+compress_bound(struct compressor *c, size_t in_nbytes)
+{
+ return c->engine->compress_bound(c, in_nbytes);
+}
+
+static size_t
+do_compress(struct compressor *c, const void *in, size_t in_nbytes,
+ void *out, size_t out_nbytes_avail)
+{
+ return c->engine->compress(c, in, in_nbytes, out, out_nbytes_avail);
+}
+
+static void
+compressor_destroy(struct compressor *c)
+{
+ if (c->engine != NULL)
+ c->engine->destroy_compressor(c);
+}
+
+static bool
+decompressor_init(struct decompressor *d, enum format format,
+ const struct engine *engine)
+{
+ d->format = format;
+ d->engine = engine;
+ return engine->init_decompressor(d);
+}
+
+static bool
+do_decompress(struct decompressor *d, const void *in, size_t in_nbytes,
+ void *out, size_t out_nbytes)
+{
+ return d->engine->decompress(d, in, in_nbytes, out, out_nbytes);
+}
+
+static void
+decompressor_destroy(struct decompressor *d)
+{
+ if (d->engine != NULL)
+ d->engine->destroy_decompressor(d);
+}
+
+/******************************************************************************/
+
+static void
+show_available_engines(FILE *fp)
+{
+ size_t i;
+
+ fprintf(fp, "Available ENGINEs are: ");
+ for (i = 0; i < ARRAY_LEN(all_engines); i++) {
+ fprintf(fp, "%"TS, all_engines[i]->name);
+ if (i < ARRAY_LEN(all_engines) - 1)
+ fprintf(fp, ", ");
+ }
+ fprintf(fp, ". Default is %"TS"\n", DEFAULT_ENGINE.name);
+}
+
+static void
+show_usage(FILE *fp)
+{
+ fprintf(fp,
+"Usage: %"TS" [-LVL] [-C ENGINE] [-D ENGINE] [-ghVz] [-s SIZE] [FILE]...\n"
+"Benchmark DEFLATE compression and decompression on the specified FILEs.\n"
+"\n"
+"Options:\n"
+" -0 no compression\n"
+" -1 fastest (worst) compression\n"
+" -6 medium compression (default)\n"
+" -12 slowest (best) compression\n"
+" -C ENGINE compression engine\n"
+" -D ENGINE decompression engine\n"
+" -e allow chunks to be expanded (implied by -0)\n"
+" -g use gzip format instead of raw DEFLATE\n"
+" -h print this help\n"
+" -s SIZE chunk size\n"
+" -V show version and legal information\n"
+" -z use zlib format instead of raw DEFLATE\n"
+"\n", prog_invocation_name);
+
+ show_available_engines(fp);
+}
+
+static void
+show_version(void)
+{
+ printf(
+"libdeflate compression benchmark program v" LIBDEFLATE_VERSION_STRING "\n"
+"Copyright 2016 Eric Biggers\n"
+"\n"
+"This program is free software which may be modified and/or redistributed\n"
+"under the terms of the MIT license. There is NO WARRANTY, to the extent\n"
+"permitted by law. See the COPYING file for details.\n"
+ );
+}
+
+
+/******************************************************************************/
+
+static int
+do_benchmark(struct file_stream *in, void *original_buf, void *compressed_buf,
+ void *decompressed_buf, u32 chunk_size,
+ bool allow_expansion, size_t compressed_buf_size,
+ struct compressor *compressor,
+ struct decompressor *decompressor)
+{
+ u64 total_uncompressed_size = 0;
+ u64 total_compressed_size = 0;
+ u64 total_compress_time = 0;
+ u64 total_decompress_time = 0;
+ ssize_t ret;
+
+ while ((ret = xread(in, original_buf, chunk_size)) > 0) {
+ u32 original_size = ret;
+ size_t out_nbytes_avail;
+ u32 compressed_size;
+ u64 start_time;
+ bool ok;
+
+ total_uncompressed_size += original_size;
+
+ if (allow_expansion) {
+ out_nbytes_avail = compress_bound(compressor,
+ original_size);
+ if (out_nbytes_avail > compressed_buf_size) {
+ msg("%"TS": bug in compress_bound()", in->name);
+ return -1;
+ }
+ } else {
+ out_nbytes_avail = original_size - 1;
+ }
+
+ /* Compress the chunk of data. */
+ start_time = timer_ticks();
+ compressed_size = do_compress(compressor,
+ original_buf,
+ original_size,
+ compressed_buf,
+ out_nbytes_avail);
+ total_compress_time += timer_ticks() - start_time;
+
+ if (compressed_size) {
+ /* Successfully compressed the chunk of data. */
+
+ /* Decompress the data we just compressed and compare
+ * the result with the original. */
+ start_time = timer_ticks();
+ ok = do_decompress(decompressor,
+ compressed_buf, compressed_size,
+ decompressed_buf, original_size);
+ total_decompress_time += timer_ticks() - start_time;
+
+ if (!ok) {
+ msg("%"TS": failed to decompress data",
+ in->name);
+ return -1;
+ }
+
+ if (memcmp(original_buf, decompressed_buf,
+ original_size) != 0)
+ {
+ msg("%"TS": data did not decompress to "
+ "original", in->name);
+ return -1;
+ }
+
+ total_compressed_size += compressed_size;
+ } else {
+ /*
+ * The chunk would have compressed to more than
+ * out_nbytes_avail bytes.
+ */
+ if (allow_expansion) {
+ msg("%"TS": bug in compress_bound()", in->name);
+ return -1;
+ }
+ total_compressed_size += original_size;
+ }
+ }
+
+ if (ret < 0)
+ return ret;
+
+ if (total_uncompressed_size == 0) {
+ printf("\tFile was empty.\n");
+ return 0;
+ }
+
+ if (total_compress_time == 0)
+ total_compress_time = 1;
+ if (total_decompress_time == 0)
+ total_decompress_time = 1;
+
+ printf("\tCompressed %"PRIu64 " => %"PRIu64" bytes (%u.%03u%%)\n",
+ total_uncompressed_size, total_compressed_size,
+ (unsigned int)(total_compressed_size * 100 /
+ total_uncompressed_size),
+ (unsigned int)(total_compressed_size * 100000 /
+ total_uncompressed_size % 1000));
+ printf("\tCompression time: %"PRIu64" ms (%"PRIu64" MB/s)\n",
+ timer_ticks_to_ms(total_compress_time),
+ timer_MB_per_s(total_uncompressed_size, total_compress_time));
+ printf("\tDecompression time: %"PRIu64" ms (%"PRIu64" MB/s)\n",
+ timer_ticks_to_ms(total_decompress_time),
+ timer_MB_per_s(total_uncompressed_size, total_decompress_time));
+
+ return 0;
+}
+
+int
+tmain(int argc, tchar *argv[])
+{
+ u32 chunk_size = 1048576;
+ int level = 6;
+ enum format format = DEFLATE_FORMAT;
+ const struct engine *compress_engine = &DEFAULT_ENGINE;
+ const struct engine *decompress_engine = &DEFAULT_ENGINE;
+ bool allow_expansion = false;
+ struct compressor compressor = { 0 };
+ struct decompressor decompressor = { 0 };
+ size_t compressed_buf_size;
+ void *original_buf = NULL;
+ void *compressed_buf = NULL;
+ void *decompressed_buf = NULL;
+ tchar *default_file_list[] = { NULL };
+ int opt_char;
+ int i;
+ int ret;
+
+ begin_program(argv);
+
+ while ((opt_char = tgetopt(argc, argv, optstring)) != -1) {
+ switch (opt_char) {
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ level = parse_compression_level(opt_char, toptarg);
+ if (level < 0)
+ return 1;
+ break;
+ case 'C':
+ compress_engine = name_to_engine(toptarg);
+ if (compress_engine == NULL) {
+ msg("invalid compression engine: \"%"TS"\"", toptarg);
+ show_available_engines(stderr);
+ return 1;
+ }
+ break;
+ case 'D':
+ decompress_engine = name_to_engine(toptarg);
+ if (decompress_engine == NULL) {
+ msg("invalid decompression engine: \"%"TS"\"", toptarg);
+ show_available_engines(stderr);
+ return 1;
+ }
+ break;
+ case 'e':
+ allow_expansion = true;
+ break;
+ case 'g':
+ format = GZIP_FORMAT;
+ break;
+ case 'h':
+ show_usage(stdout);
+ return 0;
+ case 's':
+ chunk_size = tstrtoul(toptarg, NULL, 10);
+ if (chunk_size == 0) {
+ msg("invalid chunk size: \"%"TS"\"", toptarg);
+ return 1;
+ }
+ break;
+ case 'V':
+ show_version();
+ return 0;
+ case 'Y': /* deprecated, use '-C libz' instead */
+ compress_engine = &libz_engine;
+ break;
+ case 'Z': /* deprecated, use '-D libz' instead */
+ decompress_engine = &libz_engine;
+ break;
+ case 'z':
+ format = ZLIB_FORMAT;
+ break;
+ default:
+ show_usage(stderr);
+ return 1;
+ }
+ }
+
+ argc -= toptind;
+ argv += toptind;
+
+ if (level == 0)
+ allow_expansion = true;
+
+ ret = -1;
+ if (!compressor_init(&compressor, level, format, compress_engine))
+ goto out;
+ if (!decompressor_init(&decompressor, format, decompress_engine))
+ goto out;
+
+ if (allow_expansion)
+ compressed_buf_size = compress_bound(&compressor, chunk_size);
+ else
+ compressed_buf_size = chunk_size - 1;
+
+ original_buf = xmalloc(chunk_size);
+ compressed_buf = xmalloc(compressed_buf_size);
+ decompressed_buf = xmalloc(chunk_size);
+
+ ret = -1;
+ if (original_buf == NULL || compressed_buf == NULL ||
+ decompressed_buf == NULL)
+ goto out;
+
+ if (argc == 0) {
+ argv = default_file_list;
+ argc = ARRAY_LEN(default_file_list);
+ } else {
+ for (i = 0; i < argc; i++)
+ if (argv[i][0] == '-' && argv[i][1] == '\0')
+ argv[i] = NULL;
+ }
+
+ printf("Benchmarking %s compression:\n",
+ format == DEFLATE_FORMAT ? "DEFLATE" :
+ format == ZLIB_FORMAT ? "zlib" : "gzip");
+ printf("\tCompression level: %d\n", level);
+ printf("\tChunk size: %"PRIu32"\n", chunk_size);
+ printf("\tCompression engine: %"TS"\n", compress_engine->name);
+ printf("\tDecompression engine: %"TS"\n", decompress_engine->name);
+
+ for (i = 0; i < argc; i++) {
+ struct file_stream in;
+
+ ret = xopen_for_read(argv[i], true, &in);
+ if (ret != 0)
+ goto out;
+
+ printf("Processing %"TS"...\n", in.name);
+
+ ret = do_benchmark(&in, original_buf, compressed_buf,
+ decompressed_buf, chunk_size,
+ allow_expansion, compressed_buf_size,
+ &compressor, &decompressor);
+ xclose(&in);
+ if (ret != 0)
+ goto out;
+ }
+ ret = 0;
+out:
+ free(decompressed_buf);
+ free(compressed_buf);
+ free(original_buf);
+ decompressor_destroy(&decompressor);
+ compressor_destroy(&compressor);
+ return -ret;
+}
diff --git a/tools/z64compress/src/enc/libdeflate/programs/checksum.c b/tools/z64compress/src/enc/libdeflate/programs/checksum.c
new file mode 100644
index 000000000..68cd43c91
--- /dev/null
+++ b/tools/z64compress/src/enc/libdeflate/programs/checksum.c
@@ -0,0 +1,218 @@
+/*
+ * checksum.c - Adler-32 and CRC-32 checksumming program
+ *
+ * Copyright 2016 Eric Biggers
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "test_util.h"
+
+static const tchar *const optstring = T("Ahm:s:tZ");
+
+static void
+show_usage(FILE *fp)
+{
+ fprintf(fp,
+"Usage: %"TS" [-A] [-h] [-m ALIGN] [-s SIZE] [-t] [-Z] [FILE]...\n"
+"Calculate Adler-32 or CRC-32 checksums of the specified FILEs.\n"
+"\n"
+"Options:\n"
+" -A use Adler-32 (default is CRC-32)\n"
+" -h print this help\n"
+" -m ALIGN misalign the buffer by ALIGN bytes\n"
+" -s SIZE chunk size in bytes\n"
+" -t show checksum speed, excluding I/O\n"
+" -Z use zlib implementation instead of libdeflate\n",
+ prog_invocation_name);
+}
+
+typedef u32 (*cksum_fn_t)(u32, const void *, size_t);
+
+static u32
+adler32_libdeflate(u32 adler, const void *buf, size_t len)
+{
+ return libdeflate_adler32(adler, buf, len);
+}
+
+static u32
+crc32_libdeflate(u32 crc, const void *buf, size_t len)
+{
+ return libdeflate_crc32(crc, buf, len);
+}
+
+static u32
+adler32_zlib(u32 adler, const void *buf, size_t len)
+{
+ return adler32(adler, buf, len);
+}
+
+static u32
+crc32_zlib(u32 crc, const void *buf, size_t len)
+{
+ return crc32(crc, buf, len);
+}
+
+static int
+checksum_stream(struct file_stream *in, cksum_fn_t cksum, u32 *sum,
+ void *buf, size_t bufsize, u64 *size_ret, u64 *elapsed_ret)
+{
+ u64 size = 0;
+ u64 elapsed = 0;
+
+ for (;;) {
+ ssize_t ret;
+ u64 start_time;
+
+ ret = xread(in, buf, bufsize);
+ if (ret < 0)
+ return ret;
+ if (ret == 0)
+ break;
+
+ size += ret;
+ start_time = timer_ticks();
+ *sum = cksum(*sum, buf, ret);
+ elapsed += timer_ticks() - start_time;
+ }
+
+ if (elapsed == 0)
+ elapsed = 1;
+ *size_ret = size;
+ *elapsed_ret = elapsed;
+ return 0;
+}
+
+int
+tmain(int argc, tchar *argv[])
+{
+ bool use_adler32 = false;
+ bool use_zlib_impl = false;
+ bool do_timing = false;
+ void *orig_buf = NULL;
+ void *buf;
+ size_t misalignment = 0;
+ size_t bufsize = 131072;
+ tchar *default_file_list[] = { NULL };
+ cksum_fn_t cksum;
+ int opt_char;
+ int i;
+ int ret;
+
+ begin_program(argv);
+
+ while ((opt_char = tgetopt(argc, argv, optstring)) != -1) {
+ switch (opt_char) {
+ case 'A':
+ use_adler32 = true;
+ break;
+ case 'h':
+ show_usage(stdout);
+ return 0;
+ case 'm':
+ misalignment = tstrtoul(toptarg, NULL, 10);
+ if (misalignment >= 4096) {
+ msg("invalid misalignment: \"%"TS"\"", toptarg);
+ return 1;
+ }
+ break;
+ case 's':
+ bufsize = tstrtoul(toptarg, NULL, 10);
+ if (bufsize == 0 || bufsize > SIZE_MAX / 2) {
+ msg("invalid chunk size: \"%"TS"\"", toptarg);
+ return 1;
+ }
+ break;
+ case 't':
+ do_timing = true;
+ break;
+ case 'Z':
+ use_zlib_impl = true;
+ break;
+ default:
+ show_usage(stderr);
+ return 1;
+ }
+ }
+
+ argc -= toptind;
+ argv += toptind;
+
+ if (use_adler32) {
+ if (use_zlib_impl)
+ cksum = adler32_zlib;
+ else
+ cksum = adler32_libdeflate;
+ } else {
+ if (use_zlib_impl)
+ cksum = crc32_zlib;
+ else
+ cksum = crc32_libdeflate;
+ }
+
+ orig_buf = xmalloc(bufsize + 4096 + misalignment);
+ if (orig_buf == NULL)
+ return 1;
+ buf = (u8 *)orig_buf + (-(uintptr_t)orig_buf % 4096) + misalignment;
+
+ if (argc == 0) {
+ argv = default_file_list;
+ argc = ARRAY_LEN(default_file_list);
+ } else {
+ for (i = 0; i < argc; i++)
+ if (argv[i][0] == '-' && argv[i][1] == '\0')
+ argv[i] = NULL;
+ }
+
+ for (i = 0; i < argc; i++) {
+ struct file_stream in;
+ u32 sum = cksum(0, NULL, 0);
+ u64 size = 0;
+ u64 elapsed = 0;
+
+ ret = xopen_for_read(argv[i], true, &in);
+ if (ret != 0)
+ goto out;
+
+ ret = checksum_stream(&in, cksum, &sum, buf, bufsize,
+ &size, &elapsed);
+ if (ret == 0) {
+ if (do_timing) {
+ printf("%08"PRIx32"\t%"TS"\t"
+ "%"PRIu64" ms\t%"PRIu64" MB/s\n",
+ sum, in.name, timer_ticks_to_ms(elapsed),
+ timer_MB_per_s(size, elapsed));
+ } else {
+ printf("%08"PRIx32"\t%"TS"\t\n", sum, in.name);
+ }
+ }
+
+ xclose(&in);
+
+ if (ret != 0)
+ goto out;
+ }
+ ret = 0;
+out:
+ free(orig_buf);
+ return -ret;
+}
diff --git a/tools/z64compress/src/enc/libdeflate/programs/config.h.in b/tools/z64compress/src/enc/libdeflate/programs/config.h.in
new file mode 100644
index 000000000..588aa8dca
--- /dev/null
+++ b/tools/z64compress/src/enc/libdeflate/programs/config.h.in
@@ -0,0 +1,22 @@
+#ifndef CONFIG_H
+#define CONFIG_H
+
+/* Is the clock_gettime() function available? */
+#cmakedefine HAVE_CLOCK_GETTIME
+
+/* Is the futimens() function available? */
+#cmakedefine HAVE_FUTIMENS
+
+/* Is the futimes() function available? */
+#cmakedefine HAVE_FUTIMES
+
+/* Is the posix_fadvise() function available? */
+#cmakedefine HAVE_POSIX_FADVISE
+
+/* Is the posix_madvise() function available? */
+#cmakedefine HAVE_POSIX_MADVISE
+
+/* Does stat() provide nanosecond-precision timestamps? */
+#cmakedefine HAVE_STAT_NANOSECOND_PRECISION
+
+#endif /* CONFIG_H */
diff --git a/tools/z64compress/src/enc/libdeflate/programs/gzip.c b/tools/z64compress/src/enc/libdeflate/programs/gzip.c
new file mode 100644
index 000000000..c13474af5
--- /dev/null
+++ b/tools/z64compress/src/enc/libdeflate/programs/gzip.c
@@ -0,0 +1,701 @@
+/*
+ * gzip.c - a file compression and decompression program
+ *
+ * Copyright 2016 Eric Biggers
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifdef __sun
+# define __EXTENSIONS__ /* for futimens() */
+#endif
+
+#include "prog_util.h"
+
+#include
+#include
+#include
+#ifdef _WIN32
+# include
+#else
+# include
+# include
+# include
+#endif
+
+#define GZIP_MIN_HEADER_SIZE 10
+#define GZIP_FOOTER_SIZE 8
+#define GZIP_MIN_OVERHEAD (GZIP_MIN_HEADER_SIZE + GZIP_FOOTER_SIZE)
+#define GZIP_ID1 0x1F
+#define GZIP_ID2 0x8B
+
+struct options {
+ bool to_stdout;
+ bool decompress;
+ bool force;
+ bool keep;
+ bool test;
+ int compression_level;
+ const tchar *suffix;
+};
+
+static const tchar *const optstring = T("1::2::3::4::5::6::7::8::9::cdfhknqS:tV");
+
+static void
+show_usage(FILE *fp)
+{
+ fprintf(fp,
+"Usage: %"TS" [-LEVEL] [-cdfhkqtV] [-S SUF] FILE...\n"
+"Compress or decompress the specified FILEs.\n"
+"\n"
+"Options:\n"
+" -1 fastest (worst) compression\n"
+" -6 medium compression (default)\n"
+" -12 slowest (best) compression\n"
+" -c write to standard output\n"
+" -d decompress\n"
+" -f overwrite existing output files; (de)compress hard-linked files;\n"
+" allow reading/writing compressed data from/to terminal;\n"
+" with gunzip -c, pass through non-gzipped data\n"
+" -h print this help\n"
+" -k don't delete input files\n"
+" -q suppress warnings\n"
+" -S SUF use suffix SUF instead of .gz\n"
+" -t test file integrity\n"
+" -V show version and legal information\n",
+ prog_invocation_name);
+}
+
+static void
+show_version(void)
+{
+ printf(
+"gzip compression program v" LIBDEFLATE_VERSION_STRING "\n"
+"Copyright 2016 Eric Biggers\n"
+"\n"
+"This program is free software which may be modified and/or redistributed\n"
+"under the terms of the MIT license. There is NO WARRANTY, to the extent\n"
+"permitted by law. See the COPYING file for details.\n"
+ );
+}
+
+/* Was the program invoked in decompression mode? */
+static bool
+is_gunzip(void)
+{
+ if (tstrxcmp(prog_invocation_name, T("gunzip")) == 0)
+ return true;
+ if (tstrxcmp(prog_invocation_name, T("libdeflate-gunzip")) == 0)
+ return true;
+#ifdef _WIN32
+ if (tstrxcmp(prog_invocation_name, T("gunzip.exe")) == 0)
+ return true;
+ if (tstrxcmp(prog_invocation_name, T("libdeflate-gunzip.exe")) == 0)
+ return true;
+#endif
+ return false;
+}
+
+static const tchar *
+get_suffix(const tchar *path, const tchar *suffix)
+{
+ size_t path_len = tstrlen(path);
+ size_t suffix_len = tstrlen(suffix);
+ const tchar *p;
+
+ if (path_len <= suffix_len)
+ return NULL;
+ p = &path[path_len - suffix_len];
+ if (tstrxcmp(p, suffix) == 0)
+ return p;
+ return NULL;
+}
+
+static bool
+has_suffix(const tchar *path, const tchar *suffix)
+{
+ return get_suffix(path, suffix) != NULL;
+}
+
+static tchar *
+append_suffix(const tchar *path, const tchar *suffix)
+{
+ size_t path_len = tstrlen(path);
+ size_t suffix_len = tstrlen(suffix);
+ tchar *suffixed_path;
+
+ suffixed_path = xmalloc((path_len + suffix_len + 1) * sizeof(tchar));
+ if (suffixed_path == NULL)
+ return NULL;
+ tmemcpy(suffixed_path, path, path_len);
+ tmemcpy(&suffixed_path[path_len], suffix, suffix_len + 1);
+ return suffixed_path;
+}
+
+static int
+do_compress(struct libdeflate_compressor *compressor,
+ struct file_stream *in, struct file_stream *out)
+{
+ const void *uncompressed_data = in->mmap_mem;
+ size_t uncompressed_size = in->mmap_size;
+ void *compressed_data;
+ size_t actual_compressed_size;
+ size_t max_compressed_size;
+ int ret;
+
+ max_compressed_size = libdeflate_gzip_compress_bound(compressor,
+ uncompressed_size);
+ compressed_data = xmalloc(max_compressed_size);
+ if (compressed_data == NULL) {
+ msg("%"TS": file is probably too large to be processed by this "
+ "program", in->name);
+ ret = -1;
+ goto out;
+ }
+
+ actual_compressed_size = libdeflate_gzip_compress(compressor,
+ uncompressed_data,
+ uncompressed_size,
+ compressed_data,
+ max_compressed_size);
+ if (actual_compressed_size == 0) {
+ msg("Bug in libdeflate_gzip_compress_bound()!");
+ ret = -1;
+ goto out;
+ }
+
+ ret = full_write(out, compressed_data, actual_compressed_size);
+out:
+ free(compressed_data);
+ return ret;
+}
+
+static int
+do_decompress(struct libdeflate_decompressor *decompressor,
+ struct file_stream *in, struct file_stream *out,
+ const struct options *options)
+{
+ const u8 *compressed_data = in->mmap_mem;
+ size_t compressed_size = in->mmap_size;
+ void *uncompressed_data = NULL;
+ size_t uncompressed_size;
+ size_t max_uncompressed_size;
+ size_t actual_in_nbytes;
+ size_t actual_out_nbytes;
+ enum libdeflate_result result;
+ int ret = 0;
+
+ if (compressed_size < GZIP_MIN_OVERHEAD ||
+ compressed_data[0] != GZIP_ID1 ||
+ compressed_data[1] != GZIP_ID2) {
+ if (options->force && options->to_stdout)
+ return full_write(out, compressed_data, compressed_size);
+ msg("%"TS": not in gzip format", in->name);
+ return -1;
+ }
+
+ /*
+ * Use the ISIZE field as a hint for the decompressed data size. It may
+ * need to be increased later, however, because the file may contain
+ * multiple gzip members and the particular ISIZE we happen to use may
+ * not be the largest; or the real size may be >= 4 GiB, causing ISIZE
+ * to overflow. In any case, make sure to allocate at least one byte.
+ */
+ uncompressed_size =
+ get_unaligned_le32(&compressed_data[compressed_size - 4]);
+ if (uncompressed_size == 0)
+ uncompressed_size = 1;
+
+ /*
+ * DEFLATE cannot expand data more than 1032x, so there's no need to
+ * ever allocate a buffer more than 1032 times larger than the
+ * compressed data. This is a fail-safe, albeit not a very good one, if
+ * ISIZE becomes corrupted on a small file. (The 1032x number comes
+ * from each 2 bits generating a 258-byte match. This is a hard upper
+ * bound; the real upper bound is slightly smaller due to overhead.)
+ */
+ if (compressed_size <= SIZE_MAX / 1032)
+ max_uncompressed_size = compressed_size * 1032;
+ else
+ max_uncompressed_size = SIZE_MAX;
+
+ do {
+ if (uncompressed_data == NULL) {
+ uncompressed_size = MIN(uncompressed_size,
+ max_uncompressed_size);
+ uncompressed_data = xmalloc(uncompressed_size);
+ if (uncompressed_data == NULL) {
+ msg("%"TS": file is probably too large to be "
+ "processed by this program", in->name);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ result = libdeflate_gzip_decompress_ex(decompressor,
+ compressed_data,
+ compressed_size,
+ uncompressed_data,
+ uncompressed_size,
+ &actual_in_nbytes,
+ &actual_out_nbytes);
+
+ if (result == LIBDEFLATE_INSUFFICIENT_SPACE) {
+ if (uncompressed_size >= max_uncompressed_size) {
+ msg("Bug in libdeflate_gzip_decompress_ex(): data expanded too much!");
+ ret = -1;
+ goto out;
+ }
+ if (uncompressed_size * 2 <= uncompressed_size) {
+ msg("%"TS": file corrupt or too large to be "
+ "processed by this program", in->name);
+ ret = -1;
+ goto out;
+ }
+ uncompressed_size *= 2;
+ free(uncompressed_data);
+ uncompressed_data = NULL;
+ continue;
+ }
+
+ if (result != LIBDEFLATE_SUCCESS) {
+ msg("%"TS": file corrupt or not in gzip format",
+ in->name);
+ ret = -1;
+ goto out;
+ }
+
+ if (actual_in_nbytes == 0 ||
+ actual_in_nbytes > compressed_size ||
+ actual_out_nbytes > uncompressed_size) {
+ msg("Bug in libdeflate_gzip_decompress_ex(): impossible actual_nbytes value!");
+ ret = -1;
+ goto out;
+ }
+
+ if (!options->test) {
+ ret = full_write(out, uncompressed_data, actual_out_nbytes);
+ if (ret != 0)
+ goto out;
+ }
+
+ compressed_data += actual_in_nbytes;
+ compressed_size -= actual_in_nbytes;
+
+ } while (compressed_size != 0);
+out:
+ free(uncompressed_data);
+ return ret;
+}
+
+static int
+stat_file(struct file_stream *in, stat_t *stbuf, bool allow_hard_links)
+{
+ if (tfstat(in->fd, stbuf) != 0) {
+ msg("%"TS": unable to stat file", in->name);
+ return -1;
+ }
+
+ if (!S_ISREG(stbuf->st_mode) && !in->is_standard_stream) {
+ warn("%"TS" is %s -- skipping",
+ in->name, S_ISDIR(stbuf->st_mode) ? "a directory" :
+ "not a regular file");
+ return -2;
+ }
+
+ if (stbuf->st_nlink > 1 && !allow_hard_links) {
+ warn("%"TS" has multiple hard links -- skipping (use -f to process anyway)",
+ in->name);
+ return -2;
+ }
+
+ return 0;
+}
+
+static void
+restore_mode(struct file_stream *out, const stat_t *stbuf)
+{
+#ifndef _WIN32
+ if (fchmod(out->fd, stbuf->st_mode) != 0)
+ msg_errno("%"TS": unable to preserve mode", out->name);
+#endif
+}
+
+static void
+restore_owner_and_group(struct file_stream *out, const stat_t *stbuf)
+{
+#ifndef _WIN32
+ if (fchown(out->fd, stbuf->st_uid, stbuf->st_gid) != 0) {
+ msg_errno("%"TS": unable to preserve owner and group",
+ out->name);
+ }
+#endif
+}
+
+static void
+restore_timestamps(struct file_stream *out, const tchar *newpath,
+ const stat_t *stbuf)
+{
+ int ret;
+#ifdef __APPLE__
+ struct timespec times[2] = {
+ { stbuf->st_atime, stbuf->st_atimensec },
+ { stbuf->st_mtime, stbuf->st_mtimensec },
+ };
+ ret = futimens(out->fd, times);
+#elif defined(HAVE_FUTIMENS) && defined(HAVE_STAT_NANOSECOND_PRECISION)
+ struct timespec times[2] = {
+ stbuf->st_atim, stbuf->st_mtim,
+ };
+ ret = futimens(out->fd, times);
+#elif defined(HAVE_FUTIMES) && defined(HAVE_STAT_NANOSECOND_PRECISION)
+ struct timeval times[2] = {
+ { stbuf->st_atim.tv_sec, stbuf->st_atim.tv_nsec / 1000, },
+ { stbuf->st_mtim.tv_sec, stbuf->st_mtim.tv_nsec / 1000, },
+ };
+ ret = futimes(out->fd, times);
+#else
+ struct tutimbuf times = {
+ stbuf->st_atime, stbuf->st_mtime,
+ };
+ ret = tutime(newpath, ×);
+#endif
+ if (ret != 0)
+ msg_errno("%"TS": unable to preserve timestamps", out->name);
+}
+
+static void
+restore_metadata(struct file_stream *out, const tchar *newpath,
+ const stat_t *stbuf)
+{
+ restore_mode(out, stbuf);
+ restore_owner_and_group(out, stbuf);
+ restore_timestamps(out, newpath, stbuf);
+}
+
+static int
+decompress_file(struct libdeflate_decompressor *decompressor, const tchar *path,
+ const struct options *options)
+{
+ tchar *oldpath = (tchar *)path;
+ tchar *newpath = NULL;
+ struct file_stream in;
+ struct file_stream out;
+ stat_t stbuf;
+ int ret;
+ int ret2;
+
+ if (path != NULL) {
+ const tchar *suffix = get_suffix(path, options->suffix);
+ if (suffix == NULL) {
+ /*
+ * Input file is unsuffixed. If the file doesn't exist,
+ * then try it suffixed. Otherwise, if we're not
+ * writing to stdout, skip the file with warning status.
+ * Otherwise, go ahead and try to open the file anyway
+ * (which will very likely fail).
+ */
+ if (tstat(path, &stbuf) != 0 && errno == ENOENT) {
+ oldpath = append_suffix(path, options->suffix);
+ if (oldpath == NULL)
+ return -1;
+ if (!options->to_stdout)
+ newpath = (tchar *)path;
+ } else if (!options->to_stdout) {
+ warn("\"%"TS"\" does not end with the %"TS" suffix -- skipping",
+ path, options->suffix);
+ return -2;
+ }
+ } else if (!options->to_stdout) {
+ /*
+ * Input file is suffixed, and we're not writing to
+ * stdout. Strip the suffix to get the path to the
+ * output file.
+ */
+ newpath = xmalloc((suffix - oldpath + 1) *
+ sizeof(tchar));
+ if (newpath == NULL)
+ return -1;
+ tmemcpy(newpath, oldpath, suffix - oldpath);
+ newpath[suffix - oldpath] = '\0';
+ }
+ }
+
+ ret = xopen_for_read(oldpath, options->force || options->to_stdout,
+ &in);
+ if (ret != 0)
+ goto out_free_paths;
+
+ if (!options->force && isatty(in.fd)) {
+ msg("Refusing to read compressed data from terminal. "
+ "Use -f to override.\nFor help, use -h.");
+ ret = -1;
+ goto out_close_in;
+ }
+
+ ret = stat_file(&in, &stbuf, options->force || options->keep ||
+ oldpath == NULL || newpath == NULL);
+ if (ret != 0)
+ goto out_close_in;
+
+ ret = xopen_for_write(newpath, options->force, &out);
+ if (ret != 0)
+ goto out_close_in;
+
+ /* TODO: need a streaming-friendly solution */
+ ret = map_file_contents(&in, stbuf.st_size);
+ if (ret != 0)
+ goto out_close_out;
+
+ ret = do_decompress(decompressor, &in, &out, options);
+ if (ret != 0)
+ goto out_close_out;
+
+ if (oldpath != NULL && newpath != NULL)
+ restore_metadata(&out, newpath, &stbuf);
+ ret = 0;
+out_close_out:
+ ret2 = xclose(&out);
+ if (ret == 0)
+ ret = ret2;
+ if (ret != 0 && newpath != NULL)
+ tunlink(newpath);
+out_close_in:
+ xclose(&in);
+ if (ret == 0 && oldpath != NULL && newpath != NULL && !options->keep)
+ tunlink(oldpath);
+out_free_paths:
+ if (newpath != path)
+ free(newpath);
+ if (oldpath != path)
+ free(oldpath);
+ return ret;
+}
+
+static int
+compress_file(struct libdeflate_compressor *compressor, const tchar *path,
+ const struct options *options)
+{
+ tchar *newpath = NULL;
+ struct file_stream in;
+ struct file_stream out;
+ stat_t stbuf;
+ int ret;
+ int ret2;
+
+ if (path != NULL && !options->to_stdout) {
+ if (!options->force && has_suffix(path, options->suffix)) {
+ msg("%"TS": already has %"TS" suffix -- skipping",
+ path, options->suffix);
+ return 0;
+ }
+ newpath = append_suffix(path, options->suffix);
+ if (newpath == NULL)
+ return -1;
+ }
+
+ ret = xopen_for_read(path, options->force || options->to_stdout, &in);
+ if (ret != 0)
+ goto out_free_newpath;
+
+ ret = stat_file(&in, &stbuf, options->force || options->keep ||
+ path == NULL || newpath == NULL);
+ if (ret != 0)
+ goto out_close_in;
+
+ ret = xopen_for_write(newpath, options->force, &out);
+ if (ret != 0)
+ goto out_close_in;
+
+ if (!options->force && isatty(out.fd)) {
+ msg("Refusing to write compressed data to terminal. "
+ "Use -f to override.\nFor help, use -h.");
+ ret = -1;
+ goto out_close_out;
+ }
+
+ /* TODO: need a streaming-friendly solution */
+ ret = map_file_contents(&in, stbuf.st_size);
+ if (ret != 0)
+ goto out_close_out;
+
+ ret = do_compress(compressor, &in, &out);
+ if (ret != 0)
+ goto out_close_out;
+
+ if (path != NULL && newpath != NULL)
+ restore_metadata(&out, newpath, &stbuf);
+ ret = 0;
+out_close_out:
+ ret2 = xclose(&out);
+ if (ret == 0)
+ ret = ret2;
+ if (ret != 0 && newpath != NULL)
+ tunlink(newpath);
+out_close_in:
+ xclose(&in);
+ if (ret == 0 && path != NULL && newpath != NULL && !options->keep)
+ tunlink(path);
+out_free_newpath:
+ free(newpath);
+ return ret;
+}
+
+int
+tmain(int argc, tchar *argv[])
+{
+ tchar *default_file_list[] = { NULL };
+ struct options options;
+ int opt_char;
+ int i;
+ int ret;
+
+ begin_program(argv);
+
+ options.to_stdout = false;
+ options.decompress = is_gunzip();
+ options.force = false;
+ options.keep = false;
+ options.test = false;
+ options.compression_level = 6;
+ options.suffix = T(".gz");
+
+ while ((opt_char = tgetopt(argc, argv, optstring)) != -1) {
+ switch (opt_char) {
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ options.compression_level =
+ parse_compression_level(opt_char, toptarg);
+ if (options.compression_level < 0)
+ return 1;
+ break;
+ case 'c':
+ options.to_stdout = true;
+ break;
+ case 'd':
+ options.decompress = true;
+ break;
+ case 'f':
+ options.force = true;
+ break;
+ case 'h':
+ show_usage(stdout);
+ return 0;
+ case 'k':
+ options.keep = true;
+ break;
+ case 'n':
+ /*
+ * -n means don't save or restore the original filename
+ * in the gzip header. Currently this implementation
+ * already behaves this way by default, so accept the
+ * option as a no-op.
+ */
+ break;
+ case 'q':
+ suppress_warnings = true;
+ break;
+ case 'S':
+ options.suffix = toptarg;
+ if (options.suffix[0] == T('\0')) {
+ msg("invalid suffix");
+ return 1;
+ }
+ break;
+ case 't':
+ options.test = true;
+ options.decompress = true;
+ options.to_stdout = true;
+ /*
+ * -t behaves just like the more commonly used -c
+ * option, except that -t doesn't actually write
+ * anything. For ease of implementation, just pretend
+ * that -c was specified too.
+ */
+ break;
+ case 'V':
+ show_version();
+ return 0;
+ default:
+ show_usage(stderr);
+ return 1;
+ }
+ }
+
+ argv += toptind;
+ argc -= toptind;
+
+ if (argc == 0) {
+ argv = default_file_list;
+ argc = ARRAY_LEN(default_file_list);
+ } else {
+ for (i = 0; i < argc; i++)
+ if (argv[i][0] == '-' && argv[i][1] == '\0')
+ argv[i] = NULL;
+ }
+
+ ret = 0;
+ if (options.decompress) {
+ struct libdeflate_decompressor *d;
+
+ d = alloc_decompressor();
+ if (d == NULL)
+ return 1;
+
+ for (i = 0; i < argc; i++)
+ ret |= -decompress_file(d, argv[i], &options);
+
+ libdeflate_free_decompressor(d);
+ } else {
+ struct libdeflate_compressor *c;
+
+ c = alloc_compressor(options.compression_level);
+ if (c == NULL)
+ return 1;
+
+ for (i = 0; i < argc; i++)
+ ret |= -compress_file(c, argv[i], &options);
+
+ libdeflate_free_compressor(c);
+ }
+
+ switch (ret) {
+ case 0:
+ /* No warnings or errors */
+ return 0;
+ case 2:
+ /* At least one warning, but no errors */
+ if (suppress_warnings)
+ return 0;
+ return 2;
+ default:
+ /* At least one error */
+ return 1;
+ }
+}
diff --git a/tools/z64compress/src/enc/libdeflate/programs/prog_util.c b/tools/z64compress/src/enc/libdeflate/programs/prog_util.c
new file mode 100644
index 000000000..a4bf1c47d
--- /dev/null
+++ b/tools/z64compress/src/enc/libdeflate/programs/prog_util.c
@@ -0,0 +1,522 @@
+/*
+ * prog_util.c - utility functions for programs
+ *
+ * Copyright 2016 Eric Biggers
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifdef __APPLE__
+/* for O_NOFOLLOW */
+# undef _POSIX_C_SOURCE
+# define _DARWIN_C_SOURCE
+#endif
+
+#include "prog_util.h"
+
+#include
+#include
+#include
+#ifdef _WIN32
+# include
+#else
+# include
+# include
+#endif
+
+#ifndef O_BINARY
+# define O_BINARY 0
+#endif
+#ifndef O_SEQUENTIAL
+# define O_SEQUENTIAL 0
+#endif
+#ifndef O_NOFOLLOW
+# define O_NOFOLLOW 0
+#endif
+#ifndef O_NONBLOCK
+# define O_NONBLOCK 0
+#endif
+#ifndef O_NOCTTY
+# define O_NOCTTY 0
+#endif
+
+/* The invocation name of the program (filename component only) */
+const tchar *prog_invocation_name;
+
+/* Whether to suppress warning messages or not */
+bool suppress_warnings;
+
+static void
+do_msg(const char *format, bool with_errno, va_list va)
+{
+ int saved_errno = errno;
+
+ fprintf(stderr, "%"TS": ", prog_invocation_name);
+ vfprintf(stderr, format, va);
+ if (with_errno)
+ fprintf(stderr, ": %s\n", strerror(saved_errno));
+ else
+ fprintf(stderr, "\n");
+
+ errno = saved_errno;
+}
+
+/* Print a message to standard error */
+void
+msg(const char *format, ...)
+{
+ va_list va;
+
+ va_start(va, format);
+ do_msg(format, false, va);
+ va_end(va);
+}
+
+/* Print a message to standard error, including a description of errno */
+void
+msg_errno(const char *format, ...)
+{
+ va_list va;
+
+ va_start(va, format);
+ do_msg(format, true, va);
+ va_end(va);
+}
+
+
+/* Same as msg(), but do nothing if 'suppress_warnings' has been set. */
+void
+warn(const char *format, ...)
+{
+ if (!suppress_warnings) {
+ va_list va;
+
+ va_start(va, format);
+ do_msg(format, false, va);
+ va_end(va);
+ }
+}
+
+/* malloc() wrapper */
+void *
+xmalloc(size_t size)
+{
+ void *p = malloc(size);
+ if (p == NULL && size == 0)
+ p = malloc(1);
+ if (p == NULL)
+ msg("Out of memory");
+ return p;
+}
+
+/*
+ * Retrieve a pointer to the filename component of the specified path.
+ *
+ * Note: this does not modify the path. Therefore, it is not guaranteed to work
+ * properly for directories, since a path to a directory might have trailing
+ * slashes.
+ */
+static const tchar *
+get_filename(const tchar *path)
+{
+ const tchar *slash = tstrrchr(path, '/');
+#ifdef _WIN32
+ const tchar *backslash = tstrrchr(path, '\\');
+ if (backslash != NULL && (slash == NULL || backslash > slash))
+ slash = backslash;
+#endif
+ if (slash != NULL)
+ return slash + 1;
+ return path;
+}
+
+void
+begin_program(tchar *argv[])
+{
+ prog_invocation_name = get_filename(argv[0]);
+
+#ifdef FREESTANDING
+ /* This allows testing freestanding library builds. */
+ libdeflate_set_memory_allocator(malloc, free);
+#endif
+}
+
+/* Create a copy of 'path' surrounded by double quotes */
+static tchar *
+quote_path(const tchar *path)
+{
+ size_t len = tstrlen(path);
+ tchar *result;
+
+ result = xmalloc((1 + len + 1 + 1) * sizeof(tchar));
+ if (result == NULL)
+ return NULL;
+ result[0] = '"';
+ tmemcpy(&result[1], path, len);
+ result[1 + len] = '"';
+ result[1 + len + 1] = '\0';
+ return result;
+}
+
+/* Open a file for reading, or set up standard input for reading */
+int
+xopen_for_read(const tchar *path, bool symlink_ok, struct file_stream *strm)
+{
+ strm->mmap_token = NULL;
+ strm->mmap_mem = NULL;
+
+ if (path == NULL) {
+ strm->is_standard_stream = true;
+ strm->name = T("standard input");
+ strm->fd = STDIN_FILENO;
+ #ifdef _WIN32
+ _setmode(strm->fd, O_BINARY);
+ #endif
+ return 0;
+ }
+
+ strm->is_standard_stream = false;
+
+ strm->name = quote_path(path);
+ if (strm->name == NULL)
+ return -1;
+
+ strm->fd = topen(path, O_RDONLY | O_BINARY | O_NONBLOCK | O_NOCTTY |
+ (symlink_ok ? 0 : O_NOFOLLOW) | O_SEQUENTIAL);
+ if (strm->fd < 0) {
+ msg_errno("Can't open %"TS" for reading", strm->name);
+ free(strm->name);
+ return -1;
+ }
+
+#if defined(HAVE_POSIX_FADVISE) && (O_SEQUENTIAL == 0)
+ (void)posix_fadvise(strm->fd, 0, 0, POSIX_FADV_SEQUENTIAL);
+#endif
+
+ return 0;
+}
+
+/* Open a file for writing, or set up standard output for writing */
+int
+xopen_for_write(const tchar *path, bool overwrite, struct file_stream *strm)
+{
+ int ret = -1;
+
+ strm->mmap_token = NULL;
+ strm->mmap_mem = NULL;
+
+ if (path == NULL) {
+ strm->is_standard_stream = true;
+ strm->name = T("standard output");
+ strm->fd = STDOUT_FILENO;
+ #ifdef _WIN32
+ _setmode(strm->fd, O_BINARY);
+ #endif
+ return 0;
+ }
+
+ strm->is_standard_stream = false;
+
+ strm->name = quote_path(path);
+ if (strm->name == NULL)
+ goto err;
+retry:
+ strm->fd = topen(path, O_WRONLY | O_BINARY | O_NOFOLLOW |
+ O_CREAT | O_EXCL, 0644);
+ if (strm->fd < 0) {
+ if (errno != EEXIST) {
+ msg_errno("Can't open %"TS" for writing", strm->name);
+ goto err;
+ }
+ if (!overwrite) {
+ if (!isatty(STDERR_FILENO) || !isatty(STDIN_FILENO)) {
+ warn("%"TS" already exists; use -f to overwrite",
+ strm->name);
+ ret = -2; /* warning only */
+ goto err;
+ }
+ fprintf(stderr, "%"TS": %"TS" already exists; "
+ "overwrite? (y/n) ",
+ prog_invocation_name, strm->name);
+ if (getchar() != 'y') {
+ msg("Not overwriting.");
+ goto err;
+ }
+ }
+ if (tunlink(path) != 0) {
+ msg_errno("Unable to delete %"TS, strm->name);
+ goto err;
+ }
+ goto retry;
+ }
+
+ return 0;
+
+err:
+ free(strm->name);
+ return ret;
+}
+
+/* Read the full contents of a file into memory */
+static int
+read_full_contents(struct file_stream *strm)
+{
+ size_t filled = 0;
+ size_t capacity = 4096;
+ char *buf;
+ int ret;
+
+ buf = xmalloc(capacity);
+ if (buf == NULL)
+ return -1;
+ do {
+ if (filled == capacity) {
+ char *newbuf;
+
+ if (capacity == SIZE_MAX)
+ goto oom;
+ capacity += MIN(SIZE_MAX - capacity, capacity);
+ newbuf = realloc(buf, capacity);
+ if (newbuf == NULL)
+ goto oom;
+ buf = newbuf;
+ }
+ ret = xread(strm, &buf[filled], capacity - filled);
+ if (ret < 0)
+ goto err;
+ filled += ret;
+ } while (ret != 0);
+
+ strm->mmap_mem = buf;
+ strm->mmap_size = filled;
+ return 0;
+
+err:
+ free(buf);
+ return ret;
+oom:
+ msg("Out of memory! %"TS" is too large to be processed by "
+ "this program as currently implemented.", strm->name);
+ ret = -1;
+ goto err;
+}
+
+/* Map the contents of a file into memory */
+int
+map_file_contents(struct file_stream *strm, u64 size)
+{
+ if (size == 0) /* mmap isn't supported on empty files */
+ return read_full_contents(strm);
+
+ if (size > SIZE_MAX) {
+ msg("%"TS" is too large to be processed by this program",
+ strm->name);
+ return -1;
+ }
+#ifdef _WIN32
+ strm->mmap_token = CreateFileMapping(
+ (HANDLE)(intptr_t)_get_osfhandle(strm->fd),
+ NULL, PAGE_READONLY, 0, 0, NULL);
+ if (strm->mmap_token == NULL) {
+ DWORD err = GetLastError();
+ if (err == ERROR_BAD_EXE_FORMAT) /* mmap unsupported */
+ return read_full_contents(strm);
+ msg("Unable create file mapping for %"TS": Windows error %u",
+ strm->name, (unsigned int)err);
+ return -1;
+ }
+
+ strm->mmap_mem = MapViewOfFile((HANDLE)strm->mmap_token,
+ FILE_MAP_READ, 0, 0, size);
+ if (strm->mmap_mem == NULL) {
+ msg("Unable to map %"TS" into memory: Windows error %u",
+ strm->name, (unsigned int)GetLastError());
+ CloseHandle((HANDLE)strm->mmap_token);
+ return -1;
+ }
+#else /* _WIN32 */
+ strm->mmap_mem = mmap(NULL, size, PROT_READ, MAP_SHARED, strm->fd, 0);
+ if (strm->mmap_mem == MAP_FAILED) {
+ strm->mmap_mem = NULL;
+ if (errno == ENODEV /* standard */ ||
+ errno == EINVAL /* macOS */) {
+ /* mmap isn't supported on this file */
+ return read_full_contents(strm);
+ }
+ if (errno == ENOMEM) {
+ msg("%"TS" is too large to be processed by this "
+ "program", strm->name);
+ } else {
+ msg_errno("Unable to map %"TS" into memory",
+ strm->name);
+ }
+ return -1;
+ }
+
+#ifdef HAVE_POSIX_MADVISE
+ (void)posix_madvise(strm->mmap_mem, size, POSIX_MADV_SEQUENTIAL);
+#endif
+ strm->mmap_token = strm; /* anything that's not NULL */
+
+#endif /* !_WIN32 */
+ strm->mmap_size = size;
+ return 0;
+}
+
+/*
+ * Read from a file, returning the full count to indicate all bytes were read, a
+ * short count (possibly 0) to indicate EOF, or -1 to indicate error.
+ */
+ssize_t
+xread(struct file_stream *strm, void *buf, size_t count)
+{
+ char *p = buf;
+ size_t orig_count = count;
+
+ while (count != 0) {
+ ssize_t res = read(strm->fd, p, MIN(count, INT_MAX));
+ if (res == 0)
+ break;
+ if (res < 0) {
+ if (errno == EAGAIN || errno == EINTR)
+ continue;
+ msg_errno("Error reading from %"TS, strm->name);
+ return -1;
+ }
+ p += res;
+ count -= res;
+ }
+ return orig_count - count;
+}
+
+/* Write to a file, returning 0 if all bytes were written or -1 on error */
+int
+full_write(struct file_stream *strm, const void *buf, size_t count)
+{
+ const char *p = buf;
+
+ while (count != 0) {
+ ssize_t res = write(strm->fd, p, MIN(count, INT_MAX));
+ if (res <= 0) {
+ msg_errno("Error writing to %"TS, strm->name);
+ return -1;
+ }
+ p += res;
+ count -= res;
+ }
+ return 0;
+}
+
+/* Close a file, returning 0 on success or -1 on error */
+int
+xclose(struct file_stream *strm)
+{
+ int ret = 0;
+
+ if (!strm->is_standard_stream) {
+ if (close(strm->fd) != 0) {
+ msg_errno("Error closing %"TS, strm->name);
+ ret = -1;
+ }
+ free(strm->name);
+ }
+
+ if (strm->mmap_token != NULL) {
+#ifdef _WIN32
+ UnmapViewOfFile(strm->mmap_mem);
+ CloseHandle((HANDLE)strm->mmap_token);
+#else
+ munmap(strm->mmap_mem, strm->mmap_size);
+#endif
+ strm->mmap_token = NULL;
+ } else {
+ free(strm->mmap_mem);
+ }
+ strm->mmap_mem = NULL;
+ strm->fd = -1;
+ strm->name = NULL;
+ return ret;
+}
+
+/*
+ * Parse the compression level given on the command line, returning the
+ * compression level on success or -1 on error
+ */
+int
+parse_compression_level(tchar opt_char, const tchar *arg)
+{
+ int level;
+
+ if (arg == NULL)
+ arg = T("");
+
+ if (opt_char < '0' || opt_char > '9')
+ goto invalid;
+ level = opt_char - '0';
+
+ if (arg[0] != '\0') {
+ if (arg[0] < '0' || arg[0] > '9')
+ goto invalid;
+ if (arg[1] != '\0') /* Levels are at most 2 digits */
+ goto invalid;
+ if (level == 0) /* Don't allow arguments like "-01" */
+ goto invalid;
+ level = (level * 10) + (arg[0] - '0');
+ }
+
+ if (level < 0 || level > 12)
+ goto invalid;
+
+ return level;
+
+invalid:
+ msg("Invalid compression level: \"%"TC"%"TS"\". "
+ "Must be an integer in the range [0, 12].", opt_char, arg);
+ return -1;
+}
+
+/* Allocate a new DEFLATE compressor */
+struct libdeflate_compressor *
+alloc_compressor(int level)
+{
+ struct libdeflate_compressor *c;
+
+ c = libdeflate_alloc_compressor(level);
+ if (c == NULL) {
+ msg_errno("Unable to allocate compressor with "
+ "compression level %d", level);
+ }
+ return c;
+}
+
+/* Allocate a new DEFLATE decompressor */
+struct libdeflate_decompressor *
+alloc_decompressor(void)
+{
+ struct libdeflate_decompressor *d;
+
+ d = libdeflate_alloc_decompressor();
+ if (d == NULL)
+ msg_errno("Unable to allocate decompressor");
+
+ return d;
+}
diff --git a/tools/z64compress/src/enc/libdeflate/programs/prog_util.h b/tools/z64compress/src/enc/libdeflate/programs/prog_util.h
new file mode 100644
index 000000000..08f538399
--- /dev/null
+++ b/tools/z64compress/src/enc/libdeflate/programs/prog_util.h
@@ -0,0 +1,177 @@
+/*
+ * prog_util.h - utility functions for programs
+ *
+ * Copyright 2016 Eric Biggers
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef PROGRAMS_PROG_UTIL_H
+#define PROGRAMS_PROG_UTIL_H
+
+/*
+ * To keep the code similar on all platforms, sometimes we intentionally use the
+ * "deprecated" non-underscore-prefixed variants of functions in msvcrt.
+ */
+#if defined(_WIN32) && !defined(_CRT_NONSTDC_NO_DEPRECATE)
+# define _CRT_NONSTDC_NO_DEPRECATE 1
+#endif
+/*
+ * Similarly, to match other platforms we intentionally use the "non-secure"
+ * variants, which aren't actually any less secure when used properly.
+ */
+#if defined(_WIN32) && !defined(_CRT_SECURE_NO_WARNINGS)
+# define _CRT_SECURE_NO_WARNINGS 1
+#endif
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include "libdeflate.h"
+
+#include
+#include
+#include
+#include
+#include
+#ifndef _WIN32
+# include
+#endif
+
+#include "../common_defs.h"
+
+#if defined(__GNUC__) || __has_attribute(format)
+# define _printf(str_idx, args_idx) \
+ __attribute__((format(printf, str_idx, args_idx)))
+#else
+# define _printf(str_idx, args_idx)
+#endif
+
+#ifdef _WIN32
+
+/*
+ * Definitions for Windows builds. Mainly, 'tchar' is defined to be the 2-byte
+ * 'wchar_t' type instead of 'char'. This is the only "easy" way I know of to
+ * get full Unicode support on Windows...
+ */
+
+#include
+#include
+int wmain(int argc, wchar_t **argv);
+# define tmain wmain
+# define tchar wchar_t
+# define _T(text) L##text
+# define T(text) _T(text)
+# define TS "ls"
+# define TC "lc"
+# define tmemcpy wmemcpy
+# define topen _wopen
+# define tstrchr wcschr
+# define tstrcmp wcscmp
+# define tstrlen wcslen
+# define tstrrchr wcsrchr
+# define tstrtoul wcstoul
+# define tstrxcmp wcsicmp
+# define tunlink _wunlink
+# define tutimbuf __utimbuf64
+# define tutime _wutime64
+# define tstat _wstat64
+# define tfstat _fstat64
+# define stat_t struct _stat64
+# ifdef _MSC_VER
+# define STDIN_FILENO 0
+# define STDOUT_FILENO 1
+# define STDERR_FILENO 2
+# define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
+# define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR)
+# endif
+
+#else /* _WIN32 */
+
+/* Standard definitions for everyone else */
+
+# define tmain main
+# define tchar char
+# define T(text) text
+# define TS "s"
+# define TC "c"
+# define tmemcpy memcpy
+# define topen open
+# define tstrchr strchr
+# define tstrcmp strcmp
+# define tstrlen strlen
+# define tstrrchr strrchr
+# define tstrtoul strtoul
+# define tstrxcmp strcmp
+# define tunlink unlink
+# define tutimbuf utimbuf
+# define tutime utime
+# define tstat stat
+# define tfstat fstat
+# define stat_t struct stat
+
+#endif /* !_WIN32 */
+
+extern const tchar *prog_invocation_name;
+extern bool suppress_warnings;
+
+void _printf(1, 2) msg(const char *fmt, ...);
+void _printf(1, 2) msg_errno(const char *fmt, ...);
+void _printf(1, 2) warn(const char *fmt, ...);
+
+void *xmalloc(size_t size);
+
+void begin_program(tchar *argv[]);
+
+struct file_stream {
+ int fd;
+ tchar *name;
+ bool is_standard_stream;
+ void *mmap_token;
+ void *mmap_mem;
+ size_t mmap_size;
+};
+
+int xopen_for_read(const tchar *path, bool symlink_ok,
+ struct file_stream *strm);
+int xopen_for_write(const tchar *path, bool force, struct file_stream *strm);
+int map_file_contents(struct file_stream *strm, u64 size);
+
+ssize_t xread(struct file_stream *strm, void *buf, size_t count);
+int full_write(struct file_stream *strm, const void *buf, size_t count);
+
+int xclose(struct file_stream *strm);
+
+int parse_compression_level(tchar opt_char, const tchar *arg);
+
+struct libdeflate_compressor *alloc_compressor(int level);
+struct libdeflate_decompressor *alloc_decompressor(void);
+
+/* tgetopt.c */
+
+extern tchar *toptarg;
+extern int toptind, topterr, toptopt;
+
+int tgetopt(int argc, tchar *argv[], const tchar *optstring);
+
+#endif /* PROGRAMS_PROG_UTIL_H */
diff --git a/tools/z64compress/src/enc/libdeflate/programs/test_checksums.c b/tools/z64compress/src/enc/libdeflate/programs/test_checksums.c
new file mode 100644
index 000000000..e66e62443
--- /dev/null
+++ b/tools/z64compress/src/enc/libdeflate/programs/test_checksums.c
@@ -0,0 +1,200 @@
+/*
+ * test_checksums.c
+ *
+ * Verify that libdeflate's Adler-32 and CRC-32 functions produce the same
+ * results as their zlib equivalents.
+ */
+
+#include
+#include
+
+#include "test_util.h"
+
+static unsigned int rng_seed;
+
+typedef u32 (*cksum_fn_t)(u32, const void *, size_t);
+
+static u32
+adler32_libdeflate(u32 adler, const void *buf, size_t len)
+{
+ return libdeflate_adler32(adler, buf, len);
+}
+
+static u32
+crc32_libdeflate(u32 crc, const void *buf, size_t len)
+{
+ return libdeflate_crc32(crc, buf, len);
+}
+
+static u32
+adler32_zlib(u32 adler, const void *buf, size_t len)
+{
+ return adler32(adler, buf, len);
+}
+
+static u32
+crc32_zlib(u32 crc, const void *buf, size_t len)
+{
+ return crc32(crc, buf, len);
+}
+
+static u32
+select_initial_crc(void)
+{
+ if (rand() & 1)
+ return 0;
+ return ((u32)rand() << 16) | rand();
+}
+
+static u32
+select_initial_adler(void)
+{
+ u32 lo, hi;
+
+ if (rand() & 1)
+ return 1;
+
+ lo = (rand() % 4 == 0 ? 65520 : rand() % 65521);
+ hi = (rand() % 4 == 0 ? 65520 : rand() % 65521);
+ return (hi << 16) | lo;
+}
+
+static void
+test_initial_values(cksum_fn_t cksum, u32 expected)
+{
+ ASSERT(cksum(0, NULL, 0) == expected);
+ if (cksum != adler32_zlib) /* broken */
+ ASSERT(cksum(0, NULL, 1) == expected);
+ ASSERT(cksum(0, NULL, 1234) == expected);
+ ASSERT(cksum(1234, NULL, 0) == expected);
+ ASSERT(cksum(1234, NULL, 1234) == expected);
+}
+
+static void
+test_multipart(const u8 *buffer, size_t size, const char *name,
+ cksum_fn_t cksum, u32 v, u32 expected)
+{
+ size_t division = rand() % (size + 1);
+ v = cksum(v, buffer, division);
+ v = cksum(v, buffer + division, size - division);
+ if (v != expected) {
+ fprintf(stderr, "%s checksum failed multipart test\n", name);
+ ASSERT(0);
+ }
+}
+
+static void
+test_checksums(const void *buffer, size_t size, const char *name,
+ cksum_fn_t cksum1, cksum_fn_t cksum2, u32 initial_value)
+{
+ u32 v1 = cksum1(initial_value, buffer, size);
+ u32 v2 = cksum2(initial_value, buffer, size);
+
+ if (v1 != v2) {
+ fprintf(stderr, "%s checksum mismatch\n", name);
+ fprintf(stderr, "initial_value=0x%08"PRIx32", buffer=%p, "
+ "size=%zu, buffer=", initial_value, buffer, size);
+ for (size_t i = 0; i < MIN(size, 256); i++)
+ fprintf(stderr, "%02x", ((const u8 *)buffer)[i]);
+ if (size > 256)
+ fprintf(stderr, "...");
+ fprintf(stderr, "\n");
+ ASSERT(0);
+ }
+
+ if ((rand() & 15) == 0) {
+ test_multipart(buffer, size, name, cksum1, initial_value, v1);
+ test_multipart(buffer, size, name, cksum2, initial_value, v1);
+ }
+}
+
+static void
+test_crc32(const void *buffer, size_t size, u32 initial_value)
+{
+ test_checksums(buffer, size, "CRC-32",
+ crc32_libdeflate, crc32_zlib, initial_value);
+}
+
+static void
+test_adler32(const void *buffer, size_t size, u32 initial_value)
+{
+ test_checksums(buffer, size, "Adler-32",
+ adler32_libdeflate, adler32_zlib, initial_value);
+}
+
+static void test_random_buffers(u8 *buf_start, u8 *buf_end, size_t limit,
+ u32 num_iter)
+{
+ for (u32 i = 0; i < num_iter; i++) {
+ size_t start = rand() % limit;
+ size_t len = rand() % (limit - start);
+ u32 a0 = select_initial_adler();
+ u32 c0 = select_initial_crc();
+
+ for (size_t j = start; j < start + len; j++)
+ buf_start[j] = rand();
+
+ /* Test with chosen size and alignment */
+ test_adler32(&buf_start[start], len, a0);
+ test_crc32(&buf_start[start], len, c0);
+
+ /* Test with chosen size, with guard page before input buffer */
+ memmove(buf_start, &buf_start[start], len);
+ test_adler32(buf_start, len, a0);
+ test_crc32(buf_start, len, c0);
+
+ /* Test with chosen size, with guard page after input buffer */
+ memmove(buf_end - len, buf_start, len);
+ test_adler32(buf_end - len, len, a0);
+ test_crc32(buf_end - len, len, c0);
+ }
+}
+
+int
+tmain(int argc, tchar *argv[])
+{
+ u8 *buf_start, *buf_end;
+
+ begin_program(argv);
+
+ alloc_guarded_buffer(262144, &buf_start, &buf_end);
+
+ rng_seed = time(NULL);
+ srand(rng_seed);
+
+ test_initial_values(adler32_libdeflate, 1);
+ test_initial_values(adler32_zlib, 1);
+ test_initial_values(crc32_libdeflate, 0);
+ test_initial_values(crc32_zlib, 0);
+
+ /* Test different buffer sizes and alignments */
+ test_random_buffers(buf_start, buf_end, 256, 5000);
+ test_random_buffers(buf_start, buf_end, 1024, 500);
+ test_random_buffers(buf_start, buf_end, 32768, 50);
+ test_random_buffers(buf_start, buf_end, 262144, 25);
+
+ /*
+ * Test Adler-32 overflow cases. For example, given all 0xFF bytes and
+ * the highest possible initial (s1, s2) of (65520, 65520), then s2 if
+ * stored as a 32-bit unsigned integer will overflow if > 5552 bytes are
+ * processed. Implementations must make sure to reduce s2 modulo 65521
+ * before that point. Also, some implementations make use of 16-bit
+ * counters which can overflow earlier.
+ */
+ memset(buf_start, 0xFF, 32768);
+ for (u32 i = 0; i < 20; i++) {
+ u32 initial_value;
+
+ if (i == 0)
+ initial_value = ((u32)65520 << 16) | 65520;
+ else
+ initial_value = select_initial_adler();
+
+ test_adler32(buf_start, 5553, initial_value);
+ test_adler32(buf_start, rand() % 32769, initial_value);
+ buf_start[rand() % 32768] = 0xFE;
+ }
+
+ free_guarded_buffer(buf_start, buf_end);
+ return 0;
+}
diff --git a/tools/z64compress/src/enc/libdeflate/programs/test_custom_malloc.c b/tools/z64compress/src/enc/libdeflate/programs/test_custom_malloc.c
new file mode 100644
index 000000000..2bbb7f098
--- /dev/null
+++ b/tools/z64compress/src/enc/libdeflate/programs/test_custom_malloc.c
@@ -0,0 +1,85 @@
+/*
+ * test_custom_malloc.c
+ *
+ * Test libdeflate_set_memory_allocator().
+ * Also test injecting allocation failures.
+ */
+
+#include "test_util.h"
+
+static int malloc_count = 0;
+static int free_count = 0;
+
+static void *do_malloc(size_t size)
+{
+ malloc_count++;
+ return malloc(size);
+}
+
+static void *do_fail_malloc(size_t size)
+{
+ malloc_count++;
+ return NULL;
+}
+
+static void do_free(void *ptr)
+{
+ free_count++;
+ free(ptr);
+}
+
+int
+tmain(int argc, tchar *argv[])
+{
+ int level;
+ struct libdeflate_compressor *c;
+ struct libdeflate_decompressor *d;
+
+ begin_program(argv);
+
+ /* Test that the custom allocator is actually used when requested. */
+
+ libdeflate_set_memory_allocator(do_malloc, do_free);
+ ASSERT(malloc_count == 0);
+ ASSERT(free_count == 0);
+
+ for (level = 0; level <= 12; level++) {
+ malloc_count = free_count = 0;
+ c = libdeflate_alloc_compressor(level);
+ ASSERT(c != NULL);
+ ASSERT(malloc_count == 1);
+ ASSERT(free_count == 0);
+ libdeflate_free_compressor(c);
+ ASSERT(malloc_count == 1);
+ ASSERT(free_count == 1);
+ }
+
+ malloc_count = free_count = 0;
+ d = libdeflate_alloc_decompressor();
+ ASSERT(d != NULL);
+ ASSERT(malloc_count == 1);
+ ASSERT(free_count == 0);
+ libdeflate_free_decompressor(d);
+ ASSERT(malloc_count == 1);
+ ASSERT(free_count == 1);
+
+ /* As long as we're here, also test injecting allocation failures. */
+
+ libdeflate_set_memory_allocator(do_fail_malloc, do_free);
+
+ for (level = 0; level <= 12; level++) {
+ malloc_count = free_count = 0;
+ c = libdeflate_alloc_compressor(level);
+ ASSERT(c == NULL);
+ ASSERT(malloc_count == 1);
+ ASSERT(free_count == 0);
+ }
+
+ malloc_count = free_count = 0;
+ d = libdeflate_alloc_decompressor();
+ ASSERT(d == NULL);
+ ASSERT(malloc_count == 1);
+ ASSERT(free_count == 0);
+
+ return 0;
+}
diff --git a/tools/z64compress/src/enc/libdeflate/programs/test_incomplete_codes.c b/tools/z64compress/src/enc/libdeflate/programs/test_incomplete_codes.c
new file mode 100644
index 000000000..4e441bccb
--- /dev/null
+++ b/tools/z64compress/src/enc/libdeflate/programs/test_incomplete_codes.c
@@ -0,0 +1,385 @@
+/*
+ * test_incomplete_codes.c
+ *
+ * Test that the decompressor accepts incomplete Huffman codes in certain
+ * specific cases.
+ */
+
+#include "test_util.h"
+
+static void
+verify_decompression_libdeflate(const u8 *in, size_t in_nbytes,
+ u8 *out, size_t out_nbytes_avail,
+ const u8 *expected_out,
+ size_t expected_out_nbytes)
+{
+ struct libdeflate_decompressor *d;
+ enum libdeflate_result res;
+ size_t actual_out_nbytes;
+
+ d = libdeflate_alloc_decompressor();
+ ASSERT(d != NULL);
+
+ res = libdeflate_deflate_decompress(d, in, in_nbytes,
+ out, out_nbytes_avail,
+ &actual_out_nbytes);
+ ASSERT(res == LIBDEFLATE_SUCCESS);
+ ASSERT(actual_out_nbytes == expected_out_nbytes);
+ ASSERT(memcmp(out, expected_out, actual_out_nbytes) == 0);
+
+ libdeflate_free_decompressor(d);
+}
+
+static void
+verify_decompression_zlib(const u8 *in, size_t in_nbytes,
+ u8 *out, size_t out_nbytes_avail,
+ const u8 *expected_out, size_t expected_out_nbytes)
+{
+ z_stream z;
+ int res;
+ size_t actual_out_nbytes;
+
+ memset(&z, 0, sizeof(z));
+ res = inflateInit2(&z, -15);
+ ASSERT(res == Z_OK);
+
+ z.next_in = (void *)in;
+ z.avail_in = in_nbytes;
+ z.next_out = (void *)out;
+ z.avail_out = out_nbytes_avail;
+ res = inflate(&z, Z_FINISH);
+ ASSERT(res == Z_STREAM_END);
+ actual_out_nbytes = out_nbytes_avail - z.avail_out;
+ ASSERT(actual_out_nbytes == expected_out_nbytes);
+ ASSERT(memcmp(out, expected_out, actual_out_nbytes) == 0);
+
+ inflateEnd(&z);
+}
+
+static void
+verify_decompression(const u8 *in, size_t in_nbytes,
+ u8 *out, size_t out_nbytes_avail,
+ const u8 *expected_out, size_t expected_out_nbytes)
+{
+ verify_decompression_libdeflate(in, in_nbytes, out, out_nbytes_avail,
+ expected_out, expected_out_nbytes);
+ verify_decompression_zlib(in, in_nbytes, out, out_nbytes_avail,
+ expected_out, expected_out_nbytes);
+
+}
+
+/* Test that an empty offset code is accepted. */
+static void
+test_empty_offset_code(void)
+{
+ static const u8 expected_out[] = { 'A', 'B', 'A', 'A' };
+ u8 in[128];
+ u8 out[128];
+ struct output_bitstream os = { .next = in, .end = in + sizeof(in) };
+ int i;
+
+ /*
+ * Generate a DEFLATE stream containing a "dynamic Huffman" block
+ * containing literals, but no offsets; and having an empty offset code
+ * (all codeword lengths set to 0).
+ *
+ * Litlen code:
+ * litlensym_A freq=3 len=1 codeword= 0
+ * litlensym_B freq=1 len=2 codeword=01
+ * litlensym_256 (end-of-block) freq=1 len=2 codeword=11
+ * Offset code:
+ * (empty)
+ *
+ * Litlen and offset codeword lengths:
+ * [0..'A'-1] = 0 presym_18
+ * ['A'] = 1 presym_1
+ * ['B'] = 2 presym_2
+ * ['B'+1..255] = 0 presym_18 presym_18
+ * [256] = 2 presym_2
+ * [257] = 0 presym_0
+ *
+ * Precode:
+ * presym_0 freq=1 len=3 codeword=011
+ * presym_1 freq=1 len=3 codeword=111
+ * presym_2 freq=2 len=2 codeword= 01
+ * presym_18 freq=3 len=1 codeword= 0
+ */
+
+ ASSERT(put_bits(&os, 1, 1)); /* BFINAL: 1 */
+ ASSERT(put_bits(&os, 2, 2)); /* BTYPE: DYNAMIC_HUFFMAN */
+ ASSERT(put_bits(&os, 0, 5)); /* num_litlen_syms: 0 + 257 */
+ ASSERT(put_bits(&os, 0, 5)); /* num_offset_syms: 0 + 1 */
+ ASSERT(put_bits(&os, 14, 4)); /* num_explicit_precode_lens: 14 + 4 */
+
+ /*
+ * Precode codeword lengths: order is
+ * [16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15]
+ */
+ for (i = 0; i < 2; i++) /* presym_{16,17}: len=0 */
+ ASSERT(put_bits(&os, 0, 3));
+ ASSERT(put_bits(&os, 1, 3)); /* presym_18: len=1 */
+ ASSERT(put_bits(&os, 3, 3)); /* presym_0: len=3 */
+ for (i = 0; i < 11; i++) /* presym_{8,...,13}: len=0 */
+ ASSERT(put_bits(&os, 0, 3));
+ ASSERT(put_bits(&os, 2, 3)); /* presym_2: len=2 */
+ ASSERT(put_bits(&os, 0, 3)); /* presym_14: len=0 */
+ ASSERT(put_bits(&os, 3, 3)); /* presym_1: len=3 */
+
+ /* Litlen and offset codeword lengths */
+ ASSERT(put_bits(&os, 0x0, 1) &&
+ put_bits(&os, 54, 7)); /* presym_18, 65 zeroes */
+ ASSERT(put_bits(&os, 0x7, 3)); /* presym_1 */
+ ASSERT(put_bits(&os, 0x1, 2)); /* presym_2 */
+ ASSERT(put_bits(&os, 0x0, 1) &&
+ put_bits(&os, 89, 7)); /* presym_18, 100 zeroes */
+ ASSERT(put_bits(&os, 0x0, 1) &&
+ put_bits(&os, 78, 7)); /* presym_18, 89 zeroes */
+ ASSERT(put_bits(&os, 0x1, 2)); /* presym_2 */
+ ASSERT(put_bits(&os, 0x3, 3)); /* presym_0 */
+
+ /* Litlen symbols */
+ ASSERT(put_bits(&os, 0x0, 1)); /* litlensym_A */
+ ASSERT(put_bits(&os, 0x1, 2)); /* litlensym_B */
+ ASSERT(put_bits(&os, 0x0, 1)); /* litlensym_A */
+ ASSERT(put_bits(&os, 0x0, 1)); /* litlensym_A */
+ ASSERT(put_bits(&os, 0x3, 2)); /* litlensym_256 (end-of-block) */
+
+ ASSERT(flush_bits(&os));
+
+ verify_decompression(in, os.next - in, out, sizeof(out),
+ expected_out, sizeof(expected_out));
+}
+
+/* Test that a litrunlen code containing only one symbol is accepted. */
+static void
+test_singleton_litrunlen_code(void)
+{
+ u8 in[128];
+ u8 out[128];
+ struct output_bitstream os = { .next = in, .end = in + sizeof(in) };
+ int i;
+
+ /*
+ * Litlen code:
+ * litlensym_256 (end-of-block) freq=1 len=1 codeword=0
+ * Offset code:
+ * (empty)
+ *
+ * Litlen and offset codeword lengths:
+ * [0..256] = 0 presym_18 presym_18
+ * [256] = 1 presym_1
+ * [257] = 0 presym_0
+ *
+ * Precode:
+ * presym_0 freq=1 len=2 codeword=01
+ * presym_1 freq=1 len=2 codeword=11
+ * presym_18 freq=2 len=1 codeword= 0
+ */
+
+ ASSERT(put_bits(&os, 1, 1)); /* BFINAL: 1 */
+ ASSERT(put_bits(&os, 2, 2)); /* BTYPE: DYNAMIC_HUFFMAN */
+ ASSERT(put_bits(&os, 0, 5)); /* num_litlen_syms: 0 + 257 */
+ ASSERT(put_bits(&os, 0, 5)); /* num_offset_syms: 0 + 1 */
+ ASSERT(put_bits(&os, 14, 4)); /* num_explicit_precode_lens: 14 + 4 */
+
+ /*
+ * Precode codeword lengths: order is
+ * [16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15]
+ */
+ for (i = 0; i < 2; i++) /* presym_{16,17}: len=0 */
+ ASSERT(put_bits(&os, 0, 3));
+ ASSERT(put_bits(&os, 1, 3)); /* presym_18: len=1 */
+ ASSERT(put_bits(&os, 2, 3)); /* presym_0: len=2 */
+ for (i = 0; i < 13; i++) /* presym_{8,...,14}: len=0 */
+ ASSERT(put_bits(&os, 0, 3));
+ ASSERT(put_bits(&os, 2, 3)); /* presym_1: len=2 */
+
+ /* Litlen and offset codeword lengths */
+ for (i = 0; i < 2; i++) {
+ ASSERT(put_bits(&os, 0, 1) && /* presym_18, 128 zeroes */
+ put_bits(&os, 117, 7));
+ }
+ ASSERT(put_bits(&os, 0x3, 2)); /* presym_1 */
+ ASSERT(put_bits(&os, 0x1, 2)); /* presym_0 */
+
+ /* Litlen symbols */
+ ASSERT(put_bits(&os, 0x0, 1)); /* litlensym_256 (end-of-block) */
+
+ ASSERT(flush_bits(&os));
+
+ verify_decompression(in, os.next - in, out, sizeof(out), in, 0);
+}
+
+/* Test that an offset code containing only one symbol is accepted. */
+static void
+test_singleton_offset_code(void)
+{
+ static const u8 expected_out[] = { 255, 255, 255, 255 };
+ u8 in[128];
+ u8 out[128];
+ struct output_bitstream os = { .next = in, .end = in + sizeof(in) };
+ int i;
+
+ ASSERT(put_bits(&os, 1, 1)); /* BFINAL: 1 */
+ ASSERT(put_bits(&os, 2, 2)); /* BTYPE: DYNAMIC_HUFFMAN */
+
+ /*
+ * Litlen code:
+ * litlensym_255 freq=1 len=1 codeword= 0
+ * litlensym_256 (end-of-block) freq=1 len=2 codeword=01
+ * litlensym_257 (len 3) freq=1 len=2 codeword=11
+ * Offset code:
+ * offsetsym_0 (offset 0) freq=1 len=1 codeword=0
+ *
+ * Litlen and offset codeword lengths:
+ * [0..254] = 0 presym_{18,18}
+ * [255] = 1 presym_1
+ * [256] = 1 presym_2
+ * [257] = 1 presym_2
+ * [258] = 1 presym_1
+ *
+ * Precode:
+ * presym_1 freq=2 len=2 codeword=01
+ * presym_2 freq=2 len=2 codeword=11
+ * presym_18 freq=2 len=1 codeword= 0
+ */
+
+ ASSERT(put_bits(&os, 1, 5)); /* num_litlen_syms: 1 + 257 */
+ ASSERT(put_bits(&os, 0, 5)); /* num_offset_syms: 0 + 1 */
+ ASSERT(put_bits(&os, 14, 4)); /* num_explicit_precode_lens: 14 + 4 */
+ /*
+ * Precode codeword lengths: order is
+ * [16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15]
+ */
+ for (i = 0; i < 2; i++) /* presym_{16,17}: len=0 */
+ ASSERT(put_bits(&os, 0, 3));
+ ASSERT(put_bits(&os, 1, 3)); /* presym_18: len=1 */
+ for (i = 0; i < 12; i++) /* presym_{0,...,13}: len=0 */
+ ASSERT(put_bits(&os, 0, 3));
+ ASSERT(put_bits(&os, 2, 3)); /* presym_2: len=2 */
+ ASSERT(put_bits(&os, 0, 3)); /* presym_14: len=0 */
+ ASSERT(put_bits(&os, 2, 3)); /* presym_1: len=2 */
+
+ /* Litlen and offset codeword lengths */
+ ASSERT(put_bits(&os, 0x0, 1) && /* presym_18, 128 zeroes */
+ put_bits(&os, 117, 7));
+ ASSERT(put_bits(&os, 0x0, 1) && /* presym_18, 127 zeroes */
+ put_bits(&os, 116, 7));
+ ASSERT(put_bits(&os, 0x1, 2)); /* presym_1 */
+ ASSERT(put_bits(&os, 0x3, 2)); /* presym_2 */
+ ASSERT(put_bits(&os, 0x3, 2)); /* presym_2 */
+ ASSERT(put_bits(&os, 0x1, 2)); /* presym_1 */
+
+ /* Literal */
+ ASSERT(put_bits(&os, 0x0, 1)); /* litlensym_255 */
+
+ /* Match */
+ ASSERT(put_bits(&os, 0x3, 2)); /* litlensym_257 */
+ ASSERT(put_bits(&os, 0x0, 1)); /* offsetsym_0 */
+
+ /* End of block */
+ ASSERT(put_bits(&os, 0x1, 2)); /* litlensym_256 */
+
+ ASSERT(flush_bits(&os));
+
+ verify_decompression(in, os.next - in, out, sizeof(out),
+ expected_out, sizeof(expected_out));
+}
+
+/* Test that an offset code containing only one symbol is accepted, even if that
+ * symbol is not symbol 0. The codeword should be '0' in either case. */
+static void
+test_singleton_offset_code_notsymzero(void)
+{
+ static const u8 expected_out[] = { 254, 255, 254, 255, 254 };
+ u8 in[128];
+ u8 out[128];
+ struct output_bitstream os = { .next = in, .end = in + sizeof(in) };
+ int i;
+
+ ASSERT(put_bits(&os, 1, 1)); /* BFINAL: 1 */
+ ASSERT(put_bits(&os, 2, 2)); /* BTYPE: DYNAMIC_HUFFMAN */
+
+ /*
+ * Litlen code:
+ * litlensym_254 len=2 codeword=00
+ * litlensym_255 len=2 codeword=10
+ * litlensym_256 (end-of-block) len=2 codeword=01
+ * litlensym_257 (len 3) len=2 codeword=11
+ * Offset code:
+ * offsetsym_1 (offset 2) len=1 codeword=0
+ *
+ * Litlen and offset codeword lengths:
+ * [0..253] = 0 presym_{18,18}
+ * [254] = 2 presym_2
+ * [255] = 2 presym_2
+ * [256] = 2 presym_2
+ * [257] = 2 presym_2
+ * [258] = 0 presym_0
+ * [259] = 1 presym_1
+ *
+ * Precode:
+ * presym_0 len=2 codeword=00
+ * presym_1 len=2 codeword=10
+ * presym_2 len=2 codeword=01
+ * presym_18 len=2 codeword=11
+ */
+
+ ASSERT(put_bits(&os, 1, 5)); /* num_litlen_syms: 1 + 257 */
+ ASSERT(put_bits(&os, 1, 5)); /* num_offset_syms: 1 + 1 */
+ ASSERT(put_bits(&os, 14, 4)); /* num_explicit_precode_lens: 14 + 4 */
+ /*
+ * Precode codeword lengths: order is
+ * [16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15]
+ */
+ for (i = 0; i < 2; i++) /* presym_{16,17}: len=0 */
+ ASSERT(put_bits(&os, 0, 3));
+ ASSERT(put_bits(&os, 2, 3)); /* presym_18: len=2 */
+ ASSERT(put_bits(&os, 2, 3)); /* presym_0: len=2 */
+ for (i = 0; i < 11; i++) /* presym_{8,...,13}: len=0 */
+ ASSERT(put_bits(&os, 0, 3));
+ ASSERT(put_bits(&os, 2, 3)); /* presym_2: len=2 */
+ ASSERT(put_bits(&os, 0, 3)); /* presym_14: len=0 */
+ ASSERT(put_bits(&os, 2, 3)); /* presym_1: len=2 */
+
+ /* Litlen and offset codeword lengths */
+ ASSERT(put_bits(&os, 0x3, 2) && /* presym_18, 128 zeroes */
+ put_bits(&os, 117, 7));
+ ASSERT(put_bits(&os, 0x3, 2) && /* presym_18, 126 zeroes */
+ put_bits(&os, 115, 7));
+ ASSERT(put_bits(&os, 0x1, 2)); /* presym_2 */
+ ASSERT(put_bits(&os, 0x1, 2)); /* presym_2 */
+ ASSERT(put_bits(&os, 0x1, 2)); /* presym_2 */
+ ASSERT(put_bits(&os, 0x1, 2)); /* presym_2 */
+ ASSERT(put_bits(&os, 0x0, 2)); /* presym_0 */
+ ASSERT(put_bits(&os, 0x2, 2)); /* presym_1 */
+
+ /* Literals */
+ ASSERT(put_bits(&os, 0x0, 2)); /* litlensym_254 */
+ ASSERT(put_bits(&os, 0x2, 2)); /* litlensym_255 */
+
+ /* Match */
+ ASSERT(put_bits(&os, 0x3, 2)); /* litlensym_257 */
+ ASSERT(put_bits(&os, 0x0, 1)); /* offsetsym_1 */
+
+ /* End of block */
+ ASSERT(put_bits(&os, 0x1, 2)); /* litlensym_256 */
+
+ ASSERT(flush_bits(&os));
+
+ verify_decompression(in, os.next - in, out, sizeof(out),
+ expected_out, sizeof(expected_out));
+}
+
+int
+tmain(int argc, tchar *argv[])
+{
+ begin_program(argv);
+
+ test_empty_offset_code();
+ test_singleton_litrunlen_code();
+ test_singleton_offset_code();
+ test_singleton_offset_code_notsymzero();
+
+ return 0;
+}
diff --git a/tools/z64compress/src/enc/libdeflate/programs/test_litrunlen_overflow.c b/tools/z64compress/src/enc/libdeflate/programs/test_litrunlen_overflow.c
new file mode 100644
index 000000000..cdec8c802
--- /dev/null
+++ b/tools/z64compress/src/enc/libdeflate/programs/test_litrunlen_overflow.c
@@ -0,0 +1,72 @@
+/*
+ * test_litrunlen_overflow.c
+ *
+ * Regression test for commit f2f0df727444 ("deflate_compress: fix corruption
+ * with long literal run"). Try to compress a file longer than 65535 bytes
+ * where no 2-byte sequence (3 would be sufficient) is repeated <= 32768 bytes
+ * apart, and the distribution of bytes remains constant throughout, and yet not
+ * all bytes are used so the data is still slightly compressible. There will be
+ * no matches in this data, but the compressor should still output a compressed
+ * block, and this block should contain more than 65535 consecutive literals,
+ * which triggered the bug.
+ *
+ * Note: on random data, this situation is extremely unlikely if the compressor
+ * uses all matches it finds, since random data will on average have a 3-byte
+ * match every (256**3)/32768 = 512 bytes.
+ */
+
+#include "test_util.h"
+
+int
+tmain(int argc, tchar *argv[])
+{
+ const int data_size = 2 * 250 * 251;
+ u8 *orig_data, *compressed_data, *decompressed_data;
+ int i, stride, multiple, j = 0;
+ struct libdeflate_decompressor *d;
+ static const int levels[] = { 3, 6, 12 };
+
+ begin_program(argv);
+
+ orig_data = xmalloc(data_size);
+ compressed_data = xmalloc(data_size);
+ decompressed_data = xmalloc(data_size);
+
+ for (i = 0; i < 2; i++) {
+ for (stride = 1; stride < 251; stride++) {
+ for (multiple = 0; multiple < 251; multiple++)
+ orig_data[j++] = (stride * multiple) % 251;
+ }
+ }
+ ASSERT(j == data_size);
+
+ d = libdeflate_alloc_decompressor();
+ ASSERT(d != NULL);
+
+ for (i = 0; i < ARRAY_LEN(levels); i++) {
+ struct libdeflate_compressor *c;
+ size_t csize;
+ enum libdeflate_result res;
+
+ c = libdeflate_alloc_compressor(levels[i]);
+ ASSERT(c != NULL);
+
+ csize = libdeflate_deflate_compress(c, orig_data, data_size,
+ compressed_data, data_size);
+ ASSERT(csize > 0 && csize < data_size);
+
+ res = libdeflate_deflate_decompress(d, compressed_data, csize,
+ decompressed_data,
+ data_size, NULL);
+ ASSERT(res == LIBDEFLATE_SUCCESS);
+ ASSERT(memcmp(orig_data, decompressed_data, data_size) == 0);
+
+ libdeflate_free_compressor(c);
+ }
+
+ libdeflate_free_decompressor(d);
+ free(orig_data);
+ free(compressed_data);
+ free(decompressed_data);
+ return 0;
+}
diff --git a/tools/z64compress/src/enc/libdeflate/programs/test_overread.c b/tools/z64compress/src/enc/libdeflate/programs/test_overread.c
new file mode 100644
index 000000000..2a6003218
--- /dev/null
+++ b/tools/z64compress/src/enc/libdeflate/programs/test_overread.c
@@ -0,0 +1,95 @@
+/*
+ * test_overread.c
+ *
+ * Test that the decompressor doesn't produce an unbounded amount of output if
+ * it runs out of input, even when implicit zeroes appended to the input would
+ * continue producing output (as is the case when the input ends during a
+ * DYNAMIC_HUFFMAN block where a literal has an all-zeroes codeword).
+ *
+ * This is a regression test for commit 3f21ec9d6121 ("deflate_decompress: error
+ * out if overread count gets too large").
+ */
+
+#include "test_util.h"
+
+static void
+generate_test_input(struct output_bitstream *os)
+{
+ int i;
+
+ put_bits(os, 0, 1); /* BFINAL: 0 */
+ put_bits(os, 2, 2); /* BTYPE: DYNAMIC_HUFFMAN */
+
+ /*
+ * Write the Huffman codes.
+ *
+ * Litlen code:
+ * litlensym_0 (0) len=1 codeword=0
+ * litlensym_256 (end-of-block) len=1 codeword=1
+ * Offset code:
+ * offsetsym_0 (unused) len=1 codeword=0
+ *
+ * Litlen and offset codeword lengths:
+ * [0] = 1 presym_1
+ * [1..255] = 0 presym_{18,18}
+ * [256] = 1 presym_1
+ * [257] = 1 presym_1
+ *
+ * Precode:
+ * presym_1 len=1 codeword=0
+ * presym_18 len=1 codeword=1
+ */
+ put_bits(os, 0, 5); /* num_litlen_syms: 0 + 257 */
+ put_bits(os, 0, 5); /* num_offset_syms: 0 + 1 */
+ put_bits(os, 14, 4); /* num_explicit_precode_lens: 14 + 4 */
+ /*
+ * Precode codeword lengths: order is
+ * [16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15]
+ */
+ put_bits(os, 0, 3); /* presym_16: len=0 */
+ put_bits(os, 0, 3); /* presym_17: len=0 */
+ put_bits(os, 1, 3); /* presym_18: len=1 */
+ for (i = 0; i < 14; i++) /* presym_{0,...,14}: len=0 */
+ put_bits(os, 0, 3);
+ put_bits(os, 1, 3); /* presym_1: len=1 */
+
+ /* Litlen and offset codeword lengths */
+ put_bits(os, 0, 1); /* presym_1 */
+ put_bits(os, 1, 1); /* presym_18 ... */
+ put_bits(os, 117, 7); /* ... 11 + 117 zeroes */
+ put_bits(os, 1, 1); /* presym_18 ... */
+ put_bits(os, 116, 7); /* ... 11 + 116 zeroes */
+ put_bits(os, 0, 1); /* presym_1 */
+ put_bits(os, 0, 1); /* presym_1 */
+
+ /* Implicit zeroes would generate endless literals from here. */
+
+ ASSERT(flush_bits(os));
+}
+
+int
+tmain(int argc, tchar *argv[])
+{
+ u8 cdata[16];
+ u8 udata[256];
+ struct output_bitstream os =
+ { .next = cdata, .end = cdata + sizeof(cdata) };
+ struct libdeflate_decompressor *d;
+ enum libdeflate_result res;
+ size_t actual_out_nbytes;
+
+ begin_program(argv);
+
+ generate_test_input(&os);
+ d = libdeflate_alloc_decompressor();
+ ASSERT(d != NULL);
+
+ res = libdeflate_deflate_decompress(d, cdata, os.next - cdata,
+ udata, sizeof(udata),
+ &actual_out_nbytes);
+ /* Before the fix, the result was LIBDEFLATE_INSUFFICIENT_SPACE here. */
+ ASSERT(res == LIBDEFLATE_BAD_DATA);
+
+ libdeflate_free_decompressor(d);
+ return 0;
+}
diff --git a/tools/z64compress/src/enc/libdeflate/programs/test_slow_decompression.c b/tools/z64compress/src/enc/libdeflate/programs/test_slow_decompression.c
new file mode 100644
index 000000000..d5ac26245
--- /dev/null
+++ b/tools/z64compress/src/enc/libdeflate/programs/test_slow_decompression.c
@@ -0,0 +1,472 @@
+/*
+ * test_slow_decompression.c
+ *
+ * Test how quickly libdeflate decompresses degenerate/malicious compressed data
+ * streams that start new Huffman blocks extremely frequently.
+ */
+
+#include "test_util.h"
+
+/*
+ * Generate a DEFLATE stream containing all empty "static Huffman" blocks.
+ *
+ * libdeflate used to decompress this very slowly (~1000x slower than typical
+ * data), but now it's much faster (only ~2x slower than typical data) because
+ * now it skips rebuilding the decode tables for the static Huffman codes when
+ * they're already loaded into the decompressor.
+ */
+static void
+generate_empty_static_huffman_blocks(u8 *p, size_t len)
+{
+ struct output_bitstream os = { .next = p, .end = p + len };
+
+ while (put_bits(&os, 0, 1) && /* BFINAL: 0 */
+ put_bits(&os, 1, 2) && /* BTYPE: STATIC_HUFFMAN */
+ put_bits(&os, 0, 7)) /* litlensym_256 (end-of-block) */
+ ;
+}
+
+static bool
+generate_empty_dynamic_huffman_block(struct output_bitstream *os)
+{
+ int i;
+
+ if (!put_bits(os, 0, 1)) /* BFINAL: 0 */
+ return false;
+ if (!put_bits(os, 2, 2)) /* BTYPE: DYNAMIC_HUFFMAN */
+ return false;
+
+ /*
+ * Write a minimal Huffman code, then the end-of-block symbol.
+ *
+ * Litlen code:
+ * litlensym_256 (end-of-block) freq=1 len=1 codeword=0
+ * Offset code:
+ * offsetsym_0 (unused) freq=0 len=1 codeword=0
+ *
+ * Litlen and offset codeword lengths:
+ * [0..255] = 0 presym_{18,18}
+ * [256] = 1 presym_1
+ * [257] = 1 presym_1
+ *
+ * Precode:
+ * presym_1 freq=2 len=1 codeword=0
+ * presym_18 freq=2 len=1 codeword=1
+ */
+
+ if (!put_bits(os, 0, 5)) /* num_litlen_syms: 0 + 257 */
+ return false;
+ if (!put_bits(os, 0, 5)) /* num_offset_syms: 0 + 1 */
+ return false;
+ if (!put_bits(os, 14, 4)) /* num_explicit_precode_lens: 14 + 4 */
+ return false;
+ /*
+ * Precode codeword lengths: order is
+ * [16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15]
+ */
+ for (i = 0; i < 2; i++) { /* presym_{16,17}: len=0 */
+ if (!put_bits(os, 0, 3))
+ return false;
+ }
+ if (!put_bits(os, 1, 3)) /* presym_18: len=1 */
+ return false;
+ for (i = 0; i < 14; i++) { /* presym_{0,...,14}: len=0 */
+ if (!put_bits(os, 0, 3))
+ return false;
+ }
+ if (!put_bits(os, 1, 3)) /* presym_1: len=1 */
+ return false;
+
+ /* Litlen and offset codeword lengths */
+ for (i = 0; i < 2; i++) {
+ if (!put_bits(os, 1, 1) || /* presym_18, 128 zeroes */
+ !put_bits(os, 117, 7))
+ return false;
+ }
+ if (!put_bits(os, 0, 1)) /* presym_1 */
+ return false;
+ if (!put_bits(os, 0, 1)) /* presym_1 */
+ return false;
+ /* Done writing the Huffman codes */
+
+ return put_bits(os, 0, 1); /* litlensym_256 (end-of-block) */
+}
+
+/*
+ * Generate a DEFLATE stream containing all empty "dynamic Huffman" blocks.
+ *
+ * This is the worst known case currently, being ~100x slower to decompress than
+ * typical data.
+ */
+static void
+generate_empty_dynamic_huffman_blocks(u8 *p, size_t len)
+{
+ struct output_bitstream os = { .next = p, .end = p + len };
+
+ while (generate_empty_dynamic_huffman_block(&os))
+ ;
+}
+
+#define NUM_ITERATIONS 100
+
+static u64
+do_test_libdeflate(const char *input_type, const u8 *in, size_t in_nbytes,
+ u8 *out, size_t out_nbytes_avail)
+{
+ struct libdeflate_decompressor *d;
+ enum libdeflate_result res;
+ u64 t;
+ int i;
+
+ d = libdeflate_alloc_decompressor();
+ ASSERT(d != NULL);
+
+ t = timer_ticks();
+ for (i = 0; i < NUM_ITERATIONS; i++) {
+ res = libdeflate_deflate_decompress(d, in, in_nbytes, out,
+ out_nbytes_avail, NULL);
+ ASSERT(res == LIBDEFLATE_BAD_DATA ||
+ res == LIBDEFLATE_INSUFFICIENT_SPACE);
+ }
+ t = timer_ticks() - t;
+
+ printf("[%s, libdeflate]: %"PRIu64" KB/s\n", input_type,
+ timer_KB_per_s((u64)in_nbytes * NUM_ITERATIONS, t));
+
+ libdeflate_free_decompressor(d);
+ return t;
+}
+
+static u64
+do_test_zlib(const char *input_type, const u8 *in, size_t in_nbytes,
+ u8 *out, size_t out_nbytes_avail)
+{
+ z_stream z;
+ int res;
+ u64 t;
+ int i;
+
+ memset(&z, 0, sizeof(z));
+ res = inflateInit2(&z, -15);
+ ASSERT(res == Z_OK);
+
+ t = timer_ticks();
+ for (i = 0; i < NUM_ITERATIONS; i++) {
+ inflateReset(&z);
+ z.next_in = (void *)in;
+ z.avail_in = in_nbytes;
+ z.next_out = out;
+ z.avail_out = out_nbytes_avail;
+ res = inflate(&z, Z_FINISH);
+ ASSERT(res == Z_BUF_ERROR || res == Z_DATA_ERROR);
+ }
+ t = timer_ticks() - t;
+
+ printf("[%s, zlib ]: %"PRIu64" KB/s\n", input_type,
+ timer_KB_per_s((u64)in_nbytes * NUM_ITERATIONS, t));
+
+ inflateEnd(&z);
+ return t;
+}
+
+/*
+ * Test case from https://github.com/ebiggers/libdeflate/issues/33
+ * with the gzip header and footer removed to leave just the DEFLATE stream
+ */
+static const u8 orig_repro[3962] =
+ "\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x6a\x6a\x6a\x6a\x6a\x6a\x6a\x6a\x6a\x6a\x6a\x6a"
+ "\x6a\x6a\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x00\xea\x04\x48\x00\x20"
+ "\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x00\xea\x04\x48\x00\x20\x80\x28"
+ "\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x92\x63\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11"
+ "\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x00\xea\x04\x48"
+ "\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x00\xea\x04\x48\x00\x20\x80"
+ "\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x00\xea\x04\x48\x00\x20\x80\x28\x00"
+ "\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x00\xea"
+ "\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x00\xea"
+ "\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x00\xea\x04\x48"
+ "\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x00\xea\x04\x48\x00\x20"
+ "\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x92\x63\x00\xea\x04\x48\x00\x20\x80\x28\x00"
+ "\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11"
+ "\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x63"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92"
+ "\x63\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x00"
+ "\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x00\xea\x04\x48"
+ "\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x00\xea\x04\x48\x00\x20"
+ "\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x92\x63\x00\xea\x04\x48\x00\x20\x80\x28\x00"
+ "\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x92\x63\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x00\xea"
+ "\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x00\xea\x04\x48"
+ "\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11"
+ "\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x92\x63\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x92\x63\x00\xea\x04\x48\x00\x20\x80\x28\x00"
+ "\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x92\x63\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x92\x63\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11"
+ "\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63"
+ "\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x00\xea"
+ "\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x92\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x6a\x6a\x6a\x6a\x6a\x6a\x6a\x6a\x6a\x6a\x6a\x6a\x6a"
+ "\x6a\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x00\xea\x04\x48\x00\x20\x80"
+ "\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x00\xea\x04\x48\x00\x20\x80\x28\x00"
+ "\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x92\x63\x00\xea\x04\x48\x00\x20\x80\x28\x00"
+ "\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x92\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x6a\x6a\x6a\x6a\x6a\x6a\x6a\x6a\x6a\x6a\x6a"
+ "\x6a\x6a\x6a\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x00\xea\x04\x48\x00"
+ "\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x00\xea\x04\x48\x00\x20\x80"
+ "\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x92\x63\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00"
+ "\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x00\xea\x04"
+ "\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x00\xea\x04\x48\x00\x20"
+ "\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x00\xea\x04\x48\x00\x20\x80\x28"
+ "\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x00"
+ "\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x00\xea\x04"
+ "\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x00\xea\x04\x48\x00"
+ "\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x00\xea\x04\x48\x00\x20\x80\x28"
+ "\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00"
+ "\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x63\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x92\x63\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x00\xea\x04"
+ "\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x00\xea\x04\x48\x00"
+ "\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x00\xea\x04\x48\x00\x20\x80\x28"
+ "\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x92\x63\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x00"
+ "\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x00\xea\x04"
+ "\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00"
+ "\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x92\x63\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x00\xea\x04\x48\x00\x20\x80\x28"
+ "\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x92\x63\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x92\x63\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00"
+ "\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92"
+ "\x63\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x00"
+ "\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x63\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x00\xea\x04\x48\x00"
+ "\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x00\xea\x04\x48\x00\x20\x80"
+ "\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x92\x63\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00"
+ "\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92"
+ "\x63\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x00"
+ "\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x00\xea\x04"
+ "\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x00\xea\x04\x48\x00\x20"
+ "\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x00\xea\x04\x48\x00\x20\x80\x28"
+ "\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1a\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x00"
+ "\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b"
+ "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x00\x04\xea\x48\x00\x20"
+ "\x80\x28\x00\x00\x11\x1b\x1b\x1b\x1b\x92\x63\x00\xea\x04\x48\x00"
+ "\x20\x80\x28\x00\x00\x11\x00\x00\x01\x04\x00\x3f\x00\x00\x00\x00"
+ "\x28\xf7\xff\x00\xff\xff\xff\xff\x00\x00";
+
+int
+tmain(int argc, tchar *argv[])
+{
+ u8 in[4096];
+ u8 out[10000];
+ u64 t, tz;
+
+ begin_program(argv);
+
+ begin_performance_test();
+
+ /* static huffman case */
+ generate_empty_static_huffman_blocks(in, sizeof(in));
+ t = do_test_libdeflate("static huffman", in, sizeof(in),
+ out, sizeof(out));
+ tz = do_test_zlib("static huffman", in, sizeof(in), out, sizeof(out));
+ /*
+ * libdeflate is faster than zlib in this case, e.g.
+ * [static huffman, libdeflate]: 215861 KB/s
+ * [static huffman, zlib ]: 73651 KB/s
+ */
+ putchar('\n');
+ ASSERT(t < tz);
+
+ /* dynamic huffman case */
+ generate_empty_dynamic_huffman_blocks(in, sizeof(in));
+ t = do_test_libdeflate("dynamic huffman", in, sizeof(in),
+ out, sizeof(out));
+ tz = do_test_zlib("dynamic huffman", in, sizeof(in), out, sizeof(out));
+ /*
+ * libdeflate is slower than zlib in this case, though not super bad.
+ * [dynamic huffman, libdeflate]: 6277 KB/s
+ * [dynamic huffman, zlib ]: 10419 KB/s
+ * FIXME: make it faster.
+ */
+ putchar('\n');
+ ASSERT(t < 4 * tz);
+
+ /* original reproducer */
+ t = do_test_libdeflate("original repro", orig_repro, sizeof(orig_repro),
+ out, sizeof(out));
+ tz = do_test_zlib("original repro", orig_repro, sizeof(orig_repro),
+ out, sizeof(out));
+ ASSERT(t < tz);
+
+ return 0;
+}
diff --git a/tools/z64compress/src/enc/libdeflate/programs/test_trailing_bytes.c b/tools/z64compress/src/enc/libdeflate/programs/test_trailing_bytes.c
new file mode 100644
index 000000000..e37e97b9c
--- /dev/null
+++ b/tools/z64compress/src/enc/libdeflate/programs/test_trailing_bytes.c
@@ -0,0 +1,151 @@
+/*
+ * test_trailing_bytes.c
+ *
+ * Test that decompression correctly stops at the end of the first DEFLATE,
+ * zlib, or gzip stream, and doesn't process any additional trailing bytes.
+ */
+
+#include "test_util.h"
+
+static const struct {
+ size_t (*compress)(struct libdeflate_compressor *compressor,
+ const void *in, size_t in_nbytes,
+ void *out, size_t out_nbytes_avail);
+ enum libdeflate_result (*decompress)(
+ struct libdeflate_decompressor *decompressor,
+ const void *in, size_t in_nbytes,
+ void *out, size_t out_nbytes_avail,
+ size_t *actual_out_nbytes_ret);
+ enum libdeflate_result (*decompress_ex)(
+ struct libdeflate_decompressor *decompressor,
+ const void *in, size_t in_nbytes,
+ void *out, size_t out_nbytes_avail,
+ size_t *actual_in_nbytes_ret,
+ size_t *actual_out_nbytes_ret);
+} codecs[] = {
+ {
+ .compress = libdeflate_deflate_compress,
+ .decompress = libdeflate_deflate_decompress,
+ .decompress_ex = libdeflate_deflate_decompress_ex,
+ }, {
+ .compress = libdeflate_zlib_compress,
+ .decompress = libdeflate_zlib_decompress,
+ .decompress_ex = libdeflate_zlib_decompress_ex,
+ }, {
+ .compress = libdeflate_gzip_compress,
+ .decompress = libdeflate_gzip_decompress,
+ .decompress_ex = libdeflate_gzip_decompress_ex,
+ }
+};
+
+int
+tmain(int argc, tchar *argv[])
+{
+ const size_t original_nbytes = 32768;
+ const size_t compressed_nbytes_total = 32768;
+ /*
+ * Don't use the full buffer for compressed data, because we want to
+ * test whether decompression can deal with additional trailing bytes.
+ *
+ * Note: we can't use a guarded buffer (i.e. a buffer where the byte
+ * after compressed_nbytes is unmapped) because the decompressor may
+ * read a few bytes beyond the end of the stream (but ultimately not
+ * actually use those bytes) as long as they are within the buffer.
+ */
+ const size_t compressed_nbytes_avail = 30000;
+ size_t i;
+ u8 *original;
+ u8 *compressed;
+ u8 *decompressed;
+ struct libdeflate_compressor *c;
+ struct libdeflate_decompressor *d;
+ size_t compressed_nbytes;
+ enum libdeflate_result res;
+ size_t actual_compressed_nbytes;
+ size_t actual_decompressed_nbytes;
+
+ begin_program(argv);
+
+ ASSERT(compressed_nbytes_avail < compressed_nbytes_total);
+
+ /* Prepare some dummy data to compress */
+ original = xmalloc(original_nbytes);
+ ASSERT(original != NULL);
+ for (i = 0; i < original_nbytes; i++)
+ original[i] = (i % 123) + (i % 1023);
+
+ compressed = xmalloc(compressed_nbytes_total);
+ ASSERT(compressed != NULL);
+ memset(compressed, 0, compressed_nbytes_total);
+
+ decompressed = xmalloc(original_nbytes);
+ ASSERT(decompressed != NULL);
+
+ c = libdeflate_alloc_compressor(6);
+ ASSERT(c != NULL);
+
+ d = libdeflate_alloc_decompressor();
+ ASSERT(d != NULL);
+
+ for (i = 0; i < ARRAY_LEN(codecs); i++) {
+ compressed_nbytes = codecs[i].compress(c, original,
+ original_nbytes,
+ compressed,
+ compressed_nbytes_avail);
+ ASSERT(compressed_nbytes > 0);
+ ASSERT(compressed_nbytes <= compressed_nbytes_avail);
+
+ /* Test decompress() of stream that fills the whole buffer */
+ actual_decompressed_nbytes = 0;
+ memset(decompressed, 0, original_nbytes);
+ res = codecs[i].decompress(d, compressed, compressed_nbytes,
+ decompressed, original_nbytes,
+ &actual_decompressed_nbytes);
+ ASSERT(res == LIBDEFLATE_SUCCESS);
+ ASSERT(actual_decompressed_nbytes == original_nbytes);
+ ASSERT(memcmp(decompressed, original, original_nbytes) == 0);
+
+ /* Test decompress_ex() of stream that fills the whole buffer */
+ actual_compressed_nbytes = actual_decompressed_nbytes = 0;
+ memset(decompressed, 0, original_nbytes);
+ res = codecs[i].decompress_ex(d, compressed, compressed_nbytes,
+ decompressed, original_nbytes,
+ &actual_compressed_nbytes,
+ &actual_decompressed_nbytes);
+ ASSERT(res == LIBDEFLATE_SUCCESS);
+ ASSERT(actual_compressed_nbytes == compressed_nbytes);
+ ASSERT(actual_decompressed_nbytes == original_nbytes);
+ ASSERT(memcmp(decompressed, original, original_nbytes) == 0);
+
+ /* Test decompress() of stream with trailing bytes */
+ actual_decompressed_nbytes = 0;
+ memset(decompressed, 0, original_nbytes);
+ res = codecs[i].decompress(d, compressed,
+ compressed_nbytes_total,
+ decompressed, original_nbytes,
+ &actual_decompressed_nbytes);
+ ASSERT(res == LIBDEFLATE_SUCCESS);
+ ASSERT(actual_decompressed_nbytes == original_nbytes);
+ ASSERT(memcmp(decompressed, original, original_nbytes) == 0);
+
+ /* Test decompress_ex() of stream with trailing bytes */
+ actual_compressed_nbytes = actual_decompressed_nbytes = 0;
+ memset(decompressed, 0, original_nbytes);
+ res = codecs[i].decompress_ex(d, compressed,
+ compressed_nbytes_total,
+ decompressed, original_nbytes,
+ &actual_compressed_nbytes,
+ &actual_decompressed_nbytes);
+ ASSERT(res == LIBDEFLATE_SUCCESS);
+ ASSERT(actual_compressed_nbytes == compressed_nbytes);
+ ASSERT(actual_decompressed_nbytes == original_nbytes);
+ ASSERT(memcmp(decompressed, original, original_nbytes) == 0);
+ }
+
+ free(original);
+ free(compressed);
+ free(decompressed);
+ libdeflate_free_compressor(c);
+ libdeflate_free_decompressor(d);
+ return 0;
+}
diff --git a/tools/z64compress/src/enc/libdeflate/programs/test_util.c b/tools/z64compress/src/enc/libdeflate/programs/test_util.c
new file mode 100644
index 000000000..20e7c217f
--- /dev/null
+++ b/tools/z64compress/src/enc/libdeflate/programs/test_util.c
@@ -0,0 +1,243 @@
+/*
+ * test_util.c - utility functions for test programs
+ *
+ * Copyright 2016 Eric Biggers
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _WIN32
+/* for MAP_ANONYMOUS or MAP_ANON, which unfortunately aren't part of POSIX... */
+# undef _POSIX_C_SOURCE
+# ifdef __APPLE__
+# define _DARWIN_C_SOURCE
+# elif defined(__linux__)
+# define _GNU_SOURCE
+# endif
+#endif
+
+#include "test_util.h"
+
+#include
+#include
+#ifdef _WIN32
+# include
+#else
+# include
+# include
+# include
+#endif
+
+#ifndef MAP_ANONYMOUS
+# define MAP_ANONYMOUS MAP_ANON
+#endif
+
+/* Abort with an error message */
+_noreturn void
+assertion_failed(const char *expr, const char *file, int line)
+{
+ msg("Assertion failed: %s at %s:%d", expr, file, line);
+ abort();
+}
+
+void
+begin_performance_test(void)
+{
+ /* Skip performance tests by default, since they can be flaky. */
+ if (getenv("INCLUDE_PERF_TESTS") == NULL)
+ exit(0);
+}
+
+static size_t
+get_page_size(void)
+{
+#ifdef _WIN32
+ SYSTEM_INFO info;
+
+ GetSystemInfo(&info);
+ return info.dwPageSize;
+#else
+ return sysconf(_SC_PAGESIZE);
+#endif
+}
+
+/* Allocate a buffer with guard pages */
+void
+alloc_guarded_buffer(size_t size, u8 **start_ret, u8 **end_ret)
+{
+ const size_t pagesize = get_page_size();
+ const size_t nr_pages = (size + pagesize - 1) / pagesize;
+ u8 *base_addr;
+ u8 *start, *end;
+#ifdef _WIN32
+ DWORD oldProtect;
+#endif
+
+ *start_ret = NULL;
+ *end_ret = NULL;
+
+#ifdef _WIN32
+ /* Allocate buffer and guard pages with no access. */
+ base_addr = VirtualAlloc(NULL, (nr_pages + 2) * pagesize,
+ MEM_COMMIT | MEM_RESERVE, PAGE_NOACCESS);
+ if (!base_addr) {
+ msg("Unable to allocate memory (VirtualAlloc): Windows error %u",
+ (unsigned int)GetLastError());
+ ASSERT(0);
+ }
+ start = base_addr + pagesize;
+ end = start + (nr_pages * pagesize);
+
+ /* Grant read+write access to just the buffer. */
+ if (!VirtualProtect(start, end - start, PAGE_READWRITE, &oldProtect)) {
+ msg("Unable to protect memory (VirtualProtect): Windows error %u",
+ (unsigned int)GetLastError());
+ VirtualFree(base_addr, 0, MEM_RELEASE);
+ ASSERT(0);
+ }
+#else
+ /* Allocate buffer and guard pages. */
+ base_addr = mmap(NULL, (nr_pages + 2) * pagesize, PROT_READ|PROT_WRITE,
+ MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+ if (base_addr == (u8 *)MAP_FAILED) {
+ msg_errno("Unable to allocate memory (anonymous mmap)");
+ ASSERT(0);
+ }
+ start = base_addr + pagesize;
+ end = start + (nr_pages * pagesize);
+
+ /* Unmap the guard pages. */
+ munmap(base_addr, pagesize);
+ munmap(end, pagesize);
+#endif
+ *start_ret = start;
+ *end_ret = end;
+}
+
+/* Free a buffer that was allocated by alloc_guarded_buffer() */
+void
+free_guarded_buffer(u8 *start, u8 *end)
+{
+ if (!start)
+ return;
+#ifdef _WIN32
+ VirtualFree(start - get_page_size(), 0, MEM_RELEASE);
+#else
+ munmap(start, end - start);
+#endif
+}
+
+/*
+ * Return the number of timer ticks that have elapsed since some unspecified
+ * point fixed at the start of program execution
+ */
+u64
+timer_ticks(void)
+{
+#ifdef _WIN32
+ LARGE_INTEGER count;
+
+ QueryPerformanceCounter(&count);
+ return count.QuadPart;
+#elif defined(HAVE_CLOCK_GETTIME)
+ struct timespec ts;
+
+ clock_gettime(CLOCK_MONOTONIC, &ts);
+ return (1000000000 * (u64)ts.tv_sec) + ts.tv_nsec;
+#else
+ struct timeval tv;
+
+ gettimeofday(&tv, NULL);
+ return (1000000 * (u64)tv.tv_sec) + tv.tv_usec;
+#endif
+}
+
+/*
+ * Return the number of timer ticks per second
+ */
+static u64
+timer_frequency(void)
+{
+#ifdef _WIN32
+ LARGE_INTEGER freq;
+
+ QueryPerformanceFrequency(&freq);
+ return freq.QuadPart;
+#elif defined(HAVE_CLOCK_GETTIME)
+ return 1000000000;
+#else
+ return 1000000;
+#endif
+}
+
+/*
+ * Convert a number of elapsed timer ticks to milliseconds
+ */
+u64 timer_ticks_to_ms(u64 ticks)
+{
+ return ticks * 1000 / timer_frequency();
+}
+
+/*
+ * Convert a byte count and a number of elapsed timer ticks to MB/s
+ */
+u64 timer_MB_per_s(u64 bytes, u64 ticks)
+{
+ return bytes * timer_frequency() / ticks / 1000000;
+}
+
+/*
+ * Convert a byte count and a number of elapsed timer ticks to KB/s
+ */
+u64 timer_KB_per_s(u64 bytes, u64 ticks)
+{
+ return bytes * timer_frequency() / ticks / 1000;
+}
+
+bool
+put_bits(struct output_bitstream *os, machine_word_t bits, int num_bits)
+{
+ os->bitbuf |= bits << os->bitcount;
+ os->bitcount += num_bits;
+ while (os->bitcount >= 8) {
+ if (os->next == os->end)
+ return false;
+ *os->next++ = os->bitbuf;
+ os->bitcount -= 8;
+ os->bitbuf >>= 8;
+ }
+ return true;
+}
+
+bool
+flush_bits(struct output_bitstream *os)
+{
+ while (os->bitcount > 0) {
+ if (os->next == os->end)
+ return false;
+ *os->next++ = os->bitbuf;
+ os->bitcount -= 8;
+ os->bitbuf >>= 8;
+ }
+ os->bitcount = 0;
+ return true;
+}
diff --git a/tools/z64compress/src/enc/libdeflate/programs/test_util.h b/tools/z64compress/src/enc/libdeflate/programs/test_util.h
new file mode 100644
index 000000000..4fb9688f6
--- /dev/null
+++ b/tools/z64compress/src/enc/libdeflate/programs/test_util.h
@@ -0,0 +1,67 @@
+/*
+ * test_util.h - utility functions for test programs
+ *
+ * Copyright 2016 Eric Biggers
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef PROGRAMS_TEST_UTIL_H
+#define PROGRAMS_TEST_UTIL_H
+
+#include "prog_util.h"
+
+#include /* for comparison purposes */
+
+#if defined(__GNUC__) || __has_attribute(noreturn)
+# define _noreturn __attribute__((noreturn))
+#else
+# define _noreturn
+#endif
+
+void _noreturn
+assertion_failed(const char *expr, const char *file, int line);
+
+#define ASSERT(expr) { if (unlikely(!(expr))) \
+ assertion_failed(#expr, __FILE__, __LINE__); }
+
+void begin_performance_test(void);
+
+void alloc_guarded_buffer(size_t size, u8 **start_ret, u8 **end_ret);
+void free_guarded_buffer(u8 *start, u8 *end);
+
+u64 timer_ticks(void);
+u64 timer_ticks_to_ms(u64 ticks);
+u64 timer_MB_per_s(u64 bytes, u64 ticks);
+u64 timer_KB_per_s(u64 bytes, u64 ticks);
+
+struct output_bitstream {
+ machine_word_t bitbuf;
+ int bitcount;
+ u8 *next;
+ u8 *end;
+};
+
+bool put_bits(struct output_bitstream *os, machine_word_t bits, int num_bits);
+bool flush_bits(struct output_bitstream *os);
+
+#endif /* PROGRAMS_TEST_UTIL_H */
diff --git a/tools/z64compress/src/enc/libdeflate/programs/tgetopt.c b/tools/z64compress/src/enc/libdeflate/programs/tgetopt.c
new file mode 100644
index 000000000..868600d97
--- /dev/null
+++ b/tools/z64compress/src/enc/libdeflate/programs/tgetopt.c
@@ -0,0 +1,118 @@
+/*
+ * tgetopt.c - portable replacement for GNU getopt()
+ *
+ * Copyright 2016 Eric Biggers
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "prog_util.h"
+
+tchar *toptarg;
+int toptind = 1, topterr = 1, toptopt;
+
+/*
+ * This is a simple implementation of getopt(). It can be compiled with either
+ * 'char' or 'wchar_t' as the character type.
+ *
+ * Do *not* use this implementation if you need any of the following features,
+ * as they are not supported:
+ * - Long options
+ * - Option-related arguments retained in argv, not nulled out
+ * - '+' and '-' characters in optstring
+ */
+int
+tgetopt(int argc, tchar *argv[], const tchar *optstring)
+{
+ static tchar empty[1];
+ static tchar *nextchar;
+ static bool done;
+
+ if (toptind == 1) {
+ /* Starting to scan a new argument vector */
+ nextchar = NULL;
+ done = false;
+ }
+
+ while (!done && (nextchar != NULL || toptind < argc)) {
+ if (nextchar == NULL) {
+ /* Scanning a new argument */
+ tchar *arg = argv[toptind++];
+ if (arg[0] == '-' && arg[1] != '\0') {
+ if (arg[1] == '-' && arg[2] == '\0') {
+ /* All args after "--" are nonoptions */
+ argv[toptind - 1] = NULL;
+ done = true;
+ } else {
+ /* Start of short option characters */
+ nextchar = &arg[1];
+ }
+ }
+ } else {
+ /* More short options in previous arg */
+ tchar opt = *nextchar;
+ tchar *p = tstrchr(optstring, opt);
+ if (p == NULL) {
+ if (topterr)
+ msg("invalid option -- '%"TC"'", opt);
+ toptopt = opt;
+ return '?';
+ }
+ /* 'opt' is a valid short option character */
+ nextchar++;
+ toptarg = NULL;
+ if (*(p + 1) == ':') {
+ /* 'opt' can take an argument */
+ if (*nextchar != '\0') {
+ /* Optarg is in same argv argument */
+ toptarg = nextchar;
+ nextchar = empty;
+ } else if (toptind < argc && *(p + 2) != ':') {
+ /* Optarg is next argv argument */
+ argv[toptind - 1] = NULL;
+ toptarg = argv[toptind++];
+ } else if (*(p + 2) != ':') {
+ if (topterr && *optstring != ':') {
+ msg("option requires an "
+ "argument -- '%"TC"'", opt);
+ }
+ toptopt = opt;
+ opt = (*optstring == ':') ? ':' : '?';
+ }
+ }
+ if (*nextchar == '\0') {
+ argv[toptind - 1] = NULL;
+ nextchar = NULL;
+ }
+ return opt;
+ }
+ }
+
+ /* Done scanning. Move all nonoptions to the end, set optind to the
+ * index of the first nonoption, and return -1. */
+ toptind = argc;
+ while (--argc > 0)
+ if (argv[argc] != NULL)
+ argv[--toptind] = argv[argc];
+ done = true;
+ return -1;
+}
diff --git a/tools/z64compress/src/enc/libdeflate/scripts/afl-fuzz/deflate_compress/fuzz.c b/tools/z64compress/src/enc/libdeflate/scripts/afl-fuzz/deflate_compress/fuzz.c
new file mode 100644
index 000000000..420a7db67
--- /dev/null
+++ b/tools/z64compress/src/enc/libdeflate/scripts/afl-fuzz/deflate_compress/fuzz.c
@@ -0,0 +1,56 @@
+#include
+#include