Imported Upstream version 6.10.0.49

Former-commit-id: 1d6753294b2993e1fbf92de9366bb9544db4189b
2020-01-16 16:38:04 +00:00
parent d94e79959b
commit 468663ddbb
48518 changed files with 2789335 additions and 61176 deletions
--- a/external/llvm-project/polly/www/.htaccess
+++ b/external/llvm-project/polly/www/.htaccess
@@ -0,0 +1 @@
+AddHandler server-parsed .html 
--- a/external/llvm-project/polly/www/bugs.html
+++ b/external/llvm-project/polly/www/bugs.html
@@ -0,0 +1,36 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+  "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+  <META http-equiv="Content-Type" content="text/html; charset=ISO-8859-1" />
+  <title>Polly - Bugs</title>
+  <link type="text/css" rel="stylesheet" href="menu.css" />
+  <link type="text/css" rel="stylesheet" href="content.css" />
+</head>
+<body>
+
+<div id="box">
+<!--#include virtual="menu.html.incl"-->
+
+<div id="content">
+
+<h1>Bug Reports</h1>
+
+Polly uses the LLVM bug tracking system:
+
+<ul>
+<li>
+<a href="https://bugs.llvm.org/enter_bug.cgi?product=Polly">File new bug</a>
+</li>
+
+<li>
+<a
+href="https://bugs.llvm.org/buglist.cgi?query_format=advanced&bug_status=NEW&bug_status=ASSIGNED&bug_status=REOPENED&product=Polly&list_id=91437">Show open bugs</a>
+</li>
+</ul>
+
+
+</div>
+</div>
+</body>
+</html>
--- a/external/llvm-project/polly/www/changelog.html
+++ b/external/llvm-project/polly/www/changelog.html
@@ -0,0 +1,59 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+          "http://www.w3.org/TR/html4/strict.dtd">
+<!-- Material used from: HTML 4.01 specs: http://www.w3.org/TR/html401/ -->
+<html>
+<head> <META http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
+  <title>Polly - ChangeLog</title>
+  <link type="text/css" rel="stylesheet" href="menu.css">
+  <link type="text/css" rel="stylesheet" href="content.css">
+</head>
+<body>
+<div id="box">
+<!--#include virtual="menu.html.incl"-->
+<div id="content">
+<h1> ChangeLog </h1>
+<h2> trunk</h2>
+
+<ul>
+<li>Optimized isl for small integers, such that mostly cheap 32bit operations
+are used instead of costly arbitrary precision integers that often also involve
+malloc/free calls. As a result, the compile-time increase due to Polly has
+been largely reduced.</li>
+
+<li>Support for modulo operations: Accesses such as <pre>A[t%2][i]</pre> can
+    now be analyzed.
+</ul>
+
+<h2> 3.7 </h2>
+
+<ul>
+<li>libPluto support has been removed. It has not been tested regularly and
+due to it being copyleft license it had never a chance to become a a core
+piece of Polly. Experiments with different schedulers should use the jscop
+interface.</li>
+</ul>
+
+<h2> 3.6</h2>
+
+<ul>
+<li>Switch to the new isl AST generator (replacing CLooG)</li>
+<li>Run-time alias checks</li>
+<li>Computation of no-alias information for later LLVM optimizations
+(vectorizer, LICM, ...)</li>
+<li>Support for multi-dimensional arrays of parameteric size (still tested)</li>
+<li>New assumption tracking framework</li>
+<ul>
+<li>Accesses to multi-dimensional arrays of fixed size are within bounds</li>
+</ul>
+<li>Compile-time reduction</li>
+</ul>
+
+<h2> Older releases</h2>
+
+No changelog available. Please look at the <a
+href="http://repo.or.cz/w/polly-mirror.git">commit history</a>.
+
+</html>
+</div>
+</body>
+</html>
--- a/external/llvm-project/polly/www/content.css
+++ b/external/llvm-project/polly/www/content.css
@@ -0,0 +1,139 @@
+html { margin: 0px; } body { margin: 8px; }
+
+html, body {
+  padding:0px;
+  font-family:"Lucida Grande", "Lucida Sans Unicode", Arial, Verdana, Helvetica, sans-serif; background-color: #fff; color: #222;
+}
+
+#box {
+  margin-left: auto;
+  margin-right: auto;
+  max-width: 67em;
+}
+[id=content] {
+	/* *****  EDIT THIS VALUE IF CONTENT OVERLAPS MENU ***** */
+        margin-left: 21em;
+	padding-left: 3em;
+}
+
+a:visited {
+  color: #931e24;
+}
+
+h1, h2, h3, tt { color: #000 }
+
+h1 { padding-top:0px; margin-top:0px;}
+h2 { color:#333333; padding-top:0.5em; }
+h3 { padding-top: 0.5em; color:#2d58b7}
+li { padding-bottom: 0.5em; }
+ul { padding-left:1.5em; }
+
+TD.done {background-color: #88ff99; text-align: center}
+TD.inprogress{background-color: #ffce00; text-align: center}
+TD.open{background-color: #e6705f; text-align: center}
+TD.nice{background-color: #5555df; text-align: center}
+TD.niceinprogress{background-color: #8888ff; text-align: center}
+
+PRE.code {padding-left: 0.5em; background-color: #eeeeee}
+PRE {padding-left: 0.5em}
+
+/* Slides */
+IMG.img_slide {
+    display: block;
+    margin-left: auto;
+    margin-right: auto
+}
+
+.itemTitle { color:#2d58b7 }
+
+span.error { color:red }
+span.caret { color:green; font-weight:bold }
+
+/* Tables */
+tr { vertical-align:top }
+#news P {padding: 0px; margin: 0px; border: 0px}
+
+#head {
+    min-height: 15em;
+    background-image:url(images/header-background.png);
+    background-repeat:no-repeat;
+    background-position: right;
+    max-width: 70em;
+    margin-bottom: 1em
+
+
+}
+
+#head h1 {
+  padding-bottom: 0em;
+  margin-bottom: 0em;
+  padding-top: 1em;
+  padding-left: 2em;
+  font-size: 3em;
+}
+
+#head h1 span {
+  background: white;
+  -webkit-border-radius: 5px;
+  -moz-border-radius: 5px;
+  border-radius: 5px;
+  background: white;
+  box-shadow: 1px 2px 5px 1px rgba(0, 0, 0, 0.7),
+              -1px 2px 20px rgba(255, 255, 255, 0.6) inset; 
+}
+
+#head h1 span a {
+  text-decoration: none;
+  color: #3b4567;
+}
+#head h1 span:before {
+  content: "\00a0 ";
+}
+#head h1 span:after {
+  content: "\00a0 ";
+}
+
+#head h2 {
+  color: #3b4567;
+  text-align: center;
+  padding-top: 0em;
+  margin-top: 0em;
+  padding-left: .5em;
+  padding-bottom: .5em;
+}
+
+#head h2 span {
+  background: white;
+  -webkit-border-radius: 5px;
+  -moz-border-radius: 5px;
+  border-radius: 5px;
+  background: white;
+  box-shadow: 1px 2px 5px 1px rgba(0, 0, 0, 0.7),
+              -1px 2px 20px rgba(255, 255, 255, 0.6) inset; 
+  padding-top: 0.2em;
+  padding-bottom: 0.2em;
+}
+
+#head h2 span:before {
+  content: "\00a0\00a0\00a0";
+}
+#head h2 span:after {
+  content: "\00a0\00a0\00a0";
+}
+
+#head p:before {
+  content: "\00a0\00a0\00a0";
+}
+#head p:after {
+  content: "\00a0\00a0\00a0";
+}
+
+#head p {
+  padding:0.1em;
+  background: rgba(0,0,0,0.3);
+  color: white;
+  position: absolute;
+  top: -1em; right: 0.5em;
+  -webkit-border-radius: 5px;
+  -moz-border-radius: 5px;
+}
--- a/external/llvm-project/polly/www/contributors.html
+++ b/external/llvm-project/polly/www/contributors.html
@@ -0,0 +1,65 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+          "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+  <META http-equiv="Content-Type" content="text/html; charset=ISO-8859-1" />
+  <title>Polly - Contributors</title>
+  <link type="text/css" rel="stylesheet" href="menu.css" />
+  <link type="text/css" rel="stylesheet" href="content.css" />
+</head>
+<body>
+
+<div id="box">
+<!--#include virtual="menu.html.incl"-->
+
+<div id="content">
+
+<h1>Contributors</h1>
+
+Polly is developed by a team of students supported by different universities.
+
+<h2>People</h2>
+<h3>Raghesh Aloor</h3>
+<p>Raghesh works on OpenMP code generation. He is funded as Google Summer of Code
+Student 2011.</p>
+
+<h3>Johannes Doerfert</h3>
+<p>Johannes works on Polly as part of his PhD research at Saarland University
+in Germany.</p>
+
+<h3>Tobias Grosser</h3>
+<p>Tobias is one of the two Co-founders of Polly. He designed the overall
+architecture and contributed to almost every part of Polly. Polly was started
+during his diploma studies at University of Passau. Furthermore, he spent 6
+months at Ohio State University (funded by the U.S. National Science Foundation
+through awards 0811781 and 0926688). From August 2011 he works on Polly,
+during his PhD with INRIA/UMPC/ENS (funded through a
+<a href="http://research.google.com/university/relations/fellowship_recipients.html">
+Google Europe Fellowship in Efficient Computing</a>).</p>
+
+<p>Website: <a href="http://www.grosser.es">www.grosser.es</a></p>
+
+
+<h3>Andreas Simb&uuml;rger</h3>
+<p>
+Andreas works on the profiling infrastructure during his PhD at University of
+Passau.
+</p>
+<p>Website: <a href="http://www.infosun.fim.uni-passau.de/cl/staff/simbuerger/">
+http://www.infosun.fim.uni-passau.de/cl/staff/simbuerger/</a></p>
+<h3>Hongbin Zheng</h3>
+<p>Hongbin Zheng is one of the two Co-founders of Polly. He was funded as a
+Google Summer of Code Student 2010 and implemented parts of the Polly frontends
+as well as the automake/cmake infrastructure.</p>
+
+<h2> Universities</h2>
+
+<p>Polly is supported by the following Universities.</p>
+<img src="images/iit-madras.png" style="padding:1em" />
+<img src="images/uni-passau.png" style="padding: 1em; padding-bottom:2em;"/>
+<img src="images/osu.png" style="padding:1em"/>
+<img src="images/sys-uni.png" style="padding:1em"/>
+</div>
+</div>
+</body>
+</html>
--- a/external/llvm-project/polly/www/documentation.html
+++ b/external/llvm-project/polly/www/documentation.html
@@ -0,0 +1,37 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" 
+          "http://www.w3.org/TR/html4/strict.dtd">
+<!-- Material used from: HTML 4.01 specs: http://www.w3.org/TR/html401/ -->
+<html>
+<head>
+  <META http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
+  <title>Polly - Documentation</title>
+  <link type="text/css" rel="stylesheet" href="menu.css">
+  <link type="text/css" rel="stylesheet" href="content.css">
+</head>
+<body>
+<div id="box">
+<!--#include virtual="menu.html.incl"-->
+<div id="content">
+  <!--*********************************************************************-->
+  <h1>Documentation</h1>
+  <!--*********************************************************************-->
+
+<ul>
+<li><a href="documentation/architecture.html">The Architecture of Polly</a></li>
+<li><a href="example_load_Polly_into_clang.html">Use Polly in clang/clang++</a>
+
+</li>
+
+<li>
+<a href="example_manual_matmul.html">Inside Polly - How to manually use the
+individual pieces of Polly</a>
+
+</li>
+<li><a href="documentation/passes.html">A list of the LLVM passes available
+in Polly</a></li>
+<li><a href="docs/">New SPINX based documentation (early stage)</a></li>
+</ul>
+</div>
+</div>
+</body>
+</html>
--- a/external/llvm-project/polly/www/documentation/architecture.html
+++ b/external/llvm-project/polly/www/documentation/architecture.html
@@ -0,0 +1,23 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+        "http://www.w3.org/TR/html4/strict.dtd">
+<!-- Material used from: HTML 4.01 specs: http://www.w3.org/TR/html401/ -->
+<html>
+<head>
+  <META http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
+  <title>Polly - The architecture</title>
+  <link type="text/css" rel="stylesheet" href="../menu.css">
+  <link type="text/css" rel="stylesheet" href="../content.css">
+</head>
+<body>
+<div id="box">
+<!--#include virtual="../menu.html.incl"-->
+<div id="content">
+  <!--*********************************************************************-->
+  <h1>The Architecture Diagram of Polly</h1>
+  <!--*********************************************************************-->
+  <img src='../images/architecture.png' />
+</div>
+</div>
+</body>
+</html>
+
--- a/external/llvm-project/polly/www/documentation/gpgpucodegen.html
+++ b/external/llvm-project/polly/www/documentation/gpgpucodegen.html
@@ -0,0 +1,229 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+        "http://www.w3.org/TR/html4/strict.dtd">
+<!-- Material used from: HTML 4.01 specs: http://www.w3.org/TR/html401/ -->
+<html>
+<head>
+  <META http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
+  <title>Polly - GPGPU Code Generation</title>
+  <link type="text/css" rel="stylesheet" href="../menu.css">
+  <link type="text/css" rel="stylesheet" href="../content.css">
+</head>
+<body>
+<div id="box">
+<!--#include virtual="../menu.html.incl"-->
+<div id="content">
+  <!--*********************************************************************-->
+  <h1>Polly - GPGPU Code Generation</h1>
+  <!--*********************************************************************-->
+<p><em>WARNING: This project was part of the Google Summer of Code 2012.
+It is currently not finished, but it is in the design and implementation stage.
+The ideas/plans described here may not yet be implemented in Polly and may
+change later on.</em></p>
+
+This project adds GPGPU code generation feature to Polly.
+
+<h2>Objective</h2>
+<p>The overall objective of this GSoC project is to create a preliminary
+   implementation of GPGPU code generation for Polly. With this addition, users
+   can parallelize some perfectly nested loops with Polly to execute on a
+   heterogeneous platform, composed of CPU and GPU.</p>
+<p>There are several successful projects about automatic source-to-source gpu
+   code transformation. C-to-CUDA[1] uses the standard Pluto algorithms for
+   computing an affine schedule and then applies a wavefront transformation to
+   obtain one sequential and n-1 parallel loops. The parallel loops are then
+   mapped onto the blocks and threads of GPU. PPCG[2] introduces some advanced
+   algorithms which can expose much more parallelism than other methods . And It
+   also introduces affine partition heuristics and code generation algorithms
+   for locality enhancement in the registers and shared memory.</p>
+<p>Since automatic GPGPU code generation is quite a complex problem and what we
+   target is a low-level intermediate representation, LLVM IR, rather than a
+   high-level language source, it is important for us to set a proper objective
+   as a start step to give a complete solution to GPGPU code generation for LLVM
+   IR.</p>
+<p>Firstly, we plan to target two kinds of relatively simple test cases. One is
+   comprised of pure parallel and perfectly nested loops, like the following
+   code.</p>
+<pre>
+parfor(int i=0 to M)
+  parfor(int j=0 to N)
+    LoopBody(i, j);
+</pre>
+<p>Another one is that all the loops in it are parallel except the inner-most
+   one, just like this:</p>
+<pre>
+parfor(int i=0 to M)
+  parfor(int j=0 to N)
+    non-parfor(int k=0 to K)
+      LoopBody(i, j, k);
+</pre>
+<p>The LoopBody part should be limited to instructions or functions calls
+   (intrinsics) which can be handled by LLVM's NVPTX backend.</p>
+<p>On the other hand, we focus on building a preliminary and scalable framework
+   of GPGPU code generation for polly. Thus we plan to employ relatively simple
+   tiling and mapping algorithms and optimize them later.</p>
+<h2>Work Flow</h2>
+<h3>GPGPU Code Generation In General</h3>
+<p>C-to-CUDA[1] and PPCG[2] propose similar steps to solve the automatic GPGPU
+   code generation problem.</p>
+<li>Look for parallel loops.</li>
+<li>Create a polyhedral model from the loops.</li>
+<li>Tile and map the loops to GPU blocks and threads.</li>
+<li>Determine where to place the data.</li>
+<h3>What has been done in Polly</h3>
+<p>Polly has implemented the 1st, 2nd and part of the 3rd of the above steps and
+   many other analysis and transformation passes.</p>
+<h3>What to do in Polly</h3>
+<p>Unlike many source-to-source optimizers such as C-to-CUDA and PPCG, Polly is
+   a low-level optimizer, which means we can't use a source-level compiler
+   (e.g. NVCC) to generate the final assembly for the device. We need manually
+   insert device driver API calls to execute the generated kernel assembly
+   text.</p>
+<p>In this project, we assume that the device driver library has provided an
+   interface to launch kernels in the form of assembly text. Fortunately, most
+   of the mainstream GPU vendors provide such a feature in thier products (see
+   ptxjit of NVIDIA GPUs and CAL of AMD GPUs). Generally speaking, what we
+   are going to do in Polly is:</p>
+<li>Find a way to tile the parallel loops.</li>
+<li>Find a way to extract the loop body and transform it into thread-centric
+    parallel code.</li>
+<li>Find a way to store/load the thread-centric code into/from a device module.
+<li>Find a way to pass the target machine information and generate code of the
+    device module for the target.
+<li>Find a way to map the tiled loop to GPU blocks and threads.</li>
+<li>Find a way to insert CUDA synchronization operations on-demand.
+<li>Find a way to generate the memory copy operations between a host and a
+    device.</li>
+<li>Implement/Wrap a runtime library to serve as the execution engine for the
+    generated device code.</li>
+
+<h3>The Work Flow</h3>
+<p>In this section, we assume that the host cpu is X86 and the device is NVIDIA
+   CUDA-compatible. we will use the following test case to describe our work
+   flow.</p>
+<pre>
+for(i = 0; i &lt; 128; i++)
+      for(j = 0; j &lt; 128; j++)
+              A[i][j] = i*128 + j;
+</pre>
+<p>The work flow of our code generator is as follows.</p>
+<p>1.We first use Polly's jscop file importer to get a wanted 4-level parallel
+   tiled code.</p>
+The "schedule" part of the pre-optimization jscop file is as the following:
+<pre>
+"schedule" : "{ Stmt_for_body3[i0, i1] -&gt; schedule[0, i0, 0, i1, 0] }"
+</pre>
+The jscop file describing the tiling transformation is:
+<pre>
+"schedule" : "{ Stmt_for_body3[i0, i1] -&gt; schedule[0, o0, o1, o2, o3]:
+              o0 &gt;= 0 and o0 &lt;= 7 and o1 &gt;= 0 and o1 &lt;= 15 and
+              o2 &gt;= 0 and o2 &lt;= 7 and o3 &gt;= 0 and o3 &lt;= 15 and
+              i0 = 16o0 + o1 and i1 = 16o2 + o3 }"
+</pre>
+We can test the schedule with the following command line.
+<pre>
+opt -load /path/to/polly/build/LLVMPolly.so -basicaa -polly-import-jscop
+    -polly-ast -analyze -q ./test.ll
+    -polly-import-jscop-postfix=transformed+gpu
+</pre>
+The output of this schedule is:
+<pre>
+for (c2=0;c2&lt;=7;c2++) {
+  for (c3=0;c3&lt;=15;c3++) {
+    for (c4=0;c4&lt;=7;c4++) {
+      for (c5=0;c5&lt;=15;c5++) {
+        Stmt_for_body3(16*c2+c3,16*c4+c5);
+      }
+    }
+  }
+}
+</pre>
+Now we get a 4-dimensional parallel loops with a single SCoP statement in it.
+<p>2.We then extract the loop body (or the inner-most non-parallel loop) into a
+   LLVM function, tagging it with PTX_Kernel call convention.</p>
+<p>3.We extract the PTX_kernel function into a temporary module, set the target
+   triple (e.g. nvptx64-unknown-linux) for the module, transform the temporary
+   module into a string, store it in the original module and erase the
+   PTX_kernel function.</p>
+<p>4.We replace the loops with their GPGPU counterpart. The GPGPU part of code
+   is composed of a call to the llvm.codegen intrinsic and function calls to our
+   GPU runtime library.</p>
+<p>5.Finally, we generate the executable program with <em>llc</em> or run the
+   optimized LLVM IRs with a JIT compiler like  <em>lli</em>.</p>
+<h2>Usage</h2>
+<p>1. Apply the llvm.codegen intrinsic patch to LLVM code base.</p>
+<pre>cd /path/to/llvm/source
+git am /path/to/polly/source/utils/0001-Add-llvm.codegen-intrinsic.patch</pre>
+<p>2. Build the test case.</p>
+<pre>/path/to/polly/source/test/create_ll.sh test.c</pre>
+<p>3. Get and edit the jscop file (take function "gpu_codegen" as an example).
+</p>
+<pre>opt -load /path/to/polly/build/lib/LLVMPolly.so -basicaa
+    -polly-export-jscop ./test.ll
+cp gpu_codegen___%for.cond---%for.end8.jscop
+   gpu_codegen___%for.cond---%for.end8.jscop.transformed+gpu
+vi gpu_codegen___%for.cond---%for.end8.jscop.transformed+gpu</pre>
+<p><em>(Please refer to section "The Work Flow" on how to edit the "schedule"
+       part of a statement)</em></p>
+<p>4. Optimize the code with GPGPU code generation.</p>
+<pre>opt -load /path/to/polly/build/lib/LLVMPolly.so -basicaa
+    -polly-import-jscop-postfix=transformed+gpu -enable-polly-gpgpu
+    -polly-gpgpu-triple=nvptx64-unknown-unknown -polly-codegen ./test.ll -S
+    -o test.gpued.ll</pre>
+<p>5. Build the final assembly and executable.</p>
+<pre>llc test.gpued.ll -o test.s
+gcc test.s -lGPURuntime -o test</pre>
+<p><em>(Please make sure that LD_LIBRARY_PATH is set properly so that
+        /path/to/polly/build/lib/libGPURuntime.so is visible to gcc.)</em></p>
+<h2>TODO List</h2>
+
+<table class="wikitable" cellpadding="2">
+<tbody>
+<tr style="background: rgb(239, 239, 239)">
+  <th width="400px"> Tasks</th>
+  <th width="150px"> Status </th>
+  <th> Owner </th>
+</tr>
+<tr>
+<th align="left">Tiling the Parallel Loops with An External Jscop File</th>
+<td align="center" class='open'>Open, In Design</td>
+<td>Yabin Hu</td>
+</tr>
+<tr>
+<th align="left">GPU Runtime Library Implementation</th>
+<td align="center" class='inprogress'>Coding Finished, In Reviewing</td>
+<td></td>
+</tr>
+<tr>
+<th align="left">llvm.codegen Intrinsic Implementation</th>
+<td align="center" class='inprogress'>Codeing Finished, To Be Reviewed</td>
+<td></td>
+</tr>
+<tr>
+<th align="left">Code Generation For Host</th>
+<td align="center" class='inprogress'>50% Done</td>
+<td></td>
+</tr>
+
+</tbody></table>
+
+<h2>References</h2>
+<li type="1" value="1">
+<em>Automatic C-to-CUDA Code Generation for Affine Programs. </em><br />
+    Muthu Manikandan Baskaran, J. Ramanujam and P. Sadayappan.<br />
+    International Conference on Compiler Construction (CC) 2010.<br />
+</li>
+<li type="1"><em>PPCG Project</em><br />
+<a href="http://freecode.com/projects/ppcg">http://freecode.com/projects/ppcg
+</a></li>
+<li type="1">
+<em>Where is the Data? Why You Cannot Debate GPU vs. CPU Performance Without the
+    Answer. </em><br />
+  Chris Gregg and Kim Hazelwood<br />
+  International Symposium on Performance Analysis of Systems and Software
+  (ISPASS) 2011.
+</li>
+<p></p>
+</div>
+</div>
+</body>
+</html>
--- a/external/llvm-project/polly/www/documentation/passes.html
+++ b/external/llvm-project/polly/www/documentation/passes.html
@@ -0,0 +1,56 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+          "http://www.w3.org/TR/html4/strict.dtd">
+<!-- Material used from: HTML 4.01 specs: http://www.w3.org/TR/html401/ -->
+<html>
+<head>
+  <META http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
+  <title>Polly - The available LLVM passes</title>
+  <link type="text/css" rel="stylesheet" href="../menu.css">
+  <link type="text/css" rel="stylesheet" href="../content.css">
+</head>
+<body>
+<div id="box">
+<!--#include virtual="../menu.html.incl"-->
+<div id="content">
+  <!--*********************************************************************-->
+  <h1>The available LLVM passes</h1>
+  <!--*********************************************************************-->
+
+  <p>Polly consists of a set of LLVM passes.  </p>
+
+<h2>Front End</h2>
+<ul>
+<li><em>polly-canonicalize</em> Prepare code for Polly</li>
+<li><em>polly-detect</em> Detect SCoPs in functions</li>
+<li><em>polly-scops</em> Create polyhedral description of SCoPs</li>
+</ul>
+<h2>Middle End</h2>
+<ul>
+<li><em>polly-dependences</em> Calculate the dependences in a SCoPs</li>
+<li><em>polly-opt-isl</em> Optimize the SCoP using isl</li>
+<li>Import/Export
+<ul>
+<li><em>polly-export-jscop</em> Export SCoPs as JSON
+(Writes a .jscop file for each SCoP)</li>
+<li><em>polly-import-jscop</em> Import SCoPs from JSON
+(Reads a .jscop file for each SCoP)</li>
+</ul>
+</li>
+<li>Graphviz
+<ul>
+<li><em>dot-scops</em> Print SCoPs of function</li>
+<li><em>dot-scops-only</em> Print SCoPs of function (without function bodies)</li>
+<li><em>view-scops</em> View SCoPs of function</li>
+<li><em>view-scops-only</em> View SCoPs of function (without function bodies)</li>
+</ul></li>
+</ul>
+<h2>Back End</h2>
+<ul>
+<li><em>polly-ast</em> Execute isl code generation</li>
+<li><em>polly-codegen</em> Create LLVM-IR from the polyhedral information</li>
+</ul>
+
+</div>
+</div>
+</body>
+</html>
--- a/external/llvm-project/polly/www/experiments/matmul/matmul.c
+++ b/external/llvm-project/polly/www/experiments/matmul/matmul.c
@@ -0,0 +1,52 @@
+#include <stdio.h>
+
+#define N 1536
+float A[N][N];
+float B[N][N];
+float C[N][N];
+
+void init_array()
+{
+    int i, j;
+
+    for (i = 0; i < N; i++) {
+        for (j = 0; j < N; j++) {
+            A[i][j] = (1+(i*j)%1024)/2.0;
+            B[i][j] = (1+(i*j)%1024)/2.0;
+        }
+    }
+}
+
+void print_array()
+{
+    int i, j;
+
+    for (i = 0; i < N; i++) {
+        for (j = 0; j < N; j++) {
+            fprintf(stdout, "%lf ", C[i][j]);
+            if (j%80 == 79) fprintf(stdout, "\n");
+        }
+        fprintf(stdout, "\n");
+    }
+}
+
+int main()
+{
+    int i, j, k;
+    double t_start, t_end;
+
+    init_array();
+
+    for (i = 0; i < N; i++) {
+        for (j = 0; j < N; j++) {
+            C[i][j] = 0;
+            for (k = 0; k < N; k++)
+                C[i][j] = C[i][j] + A[i][k] * B[k][j];
+        }
+    }
+
+#ifdef TEST
+    print_array();
+#endif
+    return 0;
+}
--- a/external/llvm-project/polly/www/experiments/matmul/matmul.normalopt.ll
+++ b/external/llvm-project/polly/www/experiments/matmul/matmul.normalopt.ll
--- a/external/llvm-project/polly/www/experiments/matmul/matmul.normalopt.s
+++ b/external/llvm-project/polly/www/experiments/matmul/matmul.normalopt.s
@@ -0,0 +1,274 @@
+	.file	"matmul.normalopt.ll"
+	.section	.rodata.cst8,"aM",@progbits,8
+	.align	8
+.LCPI0_0:
+	.quad	4602678819172646912     # double 0.5
+	.text
+	.globl	init_array
+	.align	16, 0x90
+	.type	init_array,@function
+init_array:                             # @init_array
+	.cfi_startproc
+# BB#0:                                 # %entry
+	pushq	%rbp
+.Ltmp2:
+	.cfi_def_cfa_offset 16
+.Ltmp3:
+	.cfi_offset %rbp, -16
+	movq	%rsp, %rbp
+.Ltmp4:
+	.cfi_def_cfa_register %rbp
+	xorl	%r8d, %r8d
+	vmovsd	.LCPI0_0(%rip), %xmm0
+	.align	16, 0x90
+.LBB0_1:                                # %for.cond1.preheader
+                                        # =>This Loop Header: Depth=1
+                                        #     Child Loop BB0_2 Depth 2
+	xorl	%ecx, %ecx
+	.align	16, 0x90
+.LBB0_2:                                # %for.body3
+                                        #   Parent Loop BB0_1 Depth=1
+                                        # =>  This Inner Loop Header: Depth=2
+	movl	%ecx, %edx
+	imull	%r8d, %edx
+	movl	%edx, %esi
+	sarl	$31, %esi
+	shrl	$22, %esi
+	addl	%edx, %esi
+	andl	$-1024, %esi            # imm = 0xFFFFFFFFFFFFFC00
+	negl	%esi
+	movq	%r8, %rax
+	shlq	$11, %rax
+	leal	1(%rdx,%rsi), %edi
+	leaq	(%rax,%rax,2), %rsi
+	leaq	1(%rcx), %rdx
+	cmpq	$1536, %rdx             # imm = 0x600
+	vcvtsi2sdl	%edi, %xmm0, %xmm1
+	vmulsd	%xmm0, %xmm1, %xmm1
+	vcvtsd2ss	%xmm1, %xmm1, %xmm1
+	vmovss	%xmm1, A(%rsi,%rcx,4)
+	vmovss	%xmm1, B(%rsi,%rcx,4)
+	movq	%rdx, %rcx
+	jne	.LBB0_2
+# BB#3:                                 # %for.inc17
+                                        #   in Loop: Header=BB0_1 Depth=1
+	incq	%r8
+	cmpq	$1536, %r8              # imm = 0x600
+	jne	.LBB0_1
+# BB#4:                                 # %for.end19
+	popq	%rbp
+	ret
+.Ltmp5:
+	.size	init_array, .Ltmp5-init_array
+	.cfi_endproc
+
+	.globl	print_array
+	.align	16, 0x90
+	.type	print_array,@function
+print_array:                            # @print_array
+	.cfi_startproc
+# BB#0:                                 # %entry
+	pushq	%rbp
+.Ltmp9:
+	.cfi_def_cfa_offset 16
+.Ltmp10:
+	.cfi_offset %rbp, -16
+	movq	%rsp, %rbp
+.Ltmp11:
+	.cfi_def_cfa_register %rbp
+	pushq	%r15
+	pushq	%r14
+	pushq	%r12
+	pushq	%rbx
+.Ltmp12:
+	.cfi_offset %rbx, -48
+.Ltmp13:
+	.cfi_offset %r12, -40
+.Ltmp14:
+	.cfi_offset %r14, -32
+.Ltmp15:
+	.cfi_offset %r15, -24
+	xorl	%r14d, %r14d
+	movl	$C, %r15d
+	.align	16, 0x90
+.LBB1_1:                                # %for.cond1.preheader
+                                        # =>This Loop Header: Depth=1
+                                        #     Child Loop BB1_2 Depth 2
+	movq	stdout(%rip), %rax
+	movq	%r15, %r12
+	xorl	%ebx, %ebx
+	.align	16, 0x90
+.LBB1_2:                                # %for.body3
+                                        #   Parent Loop BB1_1 Depth=1
+                                        # =>  This Inner Loop Header: Depth=2
+	vmovss	(%r12), %xmm0
+	vcvtss2sd	%xmm0, %xmm0, %xmm0
+	movq	%rax, %rdi
+	movl	$.L.str, %esi
+	movb	$1, %al
+	callq	fprintf
+	movslq	%ebx, %rax
+	imulq	$1717986919, %rax, %rcx # imm = 0x66666667
+	movq	%rcx, %rdx
+	shrq	$63, %rdx
+	sarq	$37, %rcx
+	addl	%edx, %ecx
+	imull	$80, %ecx, %ecx
+	subl	%ecx, %eax
+	cmpl	$79, %eax
+	jne	.LBB1_4
+# BB#3:                                 # %if.then
+                                        #   in Loop: Header=BB1_2 Depth=2
+	movq	stdout(%rip), %rsi
+	movl	$10, %edi
+	callq	fputc
+.LBB1_4:                                # %for.inc
+                                        #   in Loop: Header=BB1_2 Depth=2
+	addq	$4, %r12
+	incq	%rbx
+	movq	stdout(%rip), %rax
+	cmpq	$1536, %rbx             # imm = 0x600
+	jne	.LBB1_2
+# BB#5:                                 # %for.end
+                                        #   in Loop: Header=BB1_1 Depth=1
+	movl	$10, %edi
+	movq	%rax, %rsi
+	callq	fputc
+	addq	$6144, %r15             # imm = 0x1800
+	incq	%r14
+	cmpq	$1536, %r14             # imm = 0x600
+	jne	.LBB1_1
+# BB#6:                                 # %for.end12
+	popq	%rbx
+	popq	%r12
+	popq	%r14
+	popq	%r15
+	popq	%rbp
+	ret
+.Ltmp16:
+	.size	print_array, .Ltmp16-print_array
+	.cfi_endproc
+
+	.section	.rodata.cst8,"aM",@progbits,8
+	.align	8
+.LCPI2_0:
+	.quad	4602678819172646912     # double 0.5
+	.text
+	.globl	main
+	.align	16, 0x90
+	.type	main,@function
+main:                                   # @main
+	.cfi_startproc
+# BB#0:                                 # %entry
+	pushq	%rbp
+.Ltmp19:
+	.cfi_def_cfa_offset 16
+.Ltmp20:
+	.cfi_offset %rbp, -16
+	movq	%rsp, %rbp
+.Ltmp21:
+	.cfi_def_cfa_register %rbp
+	xorl	%r8d, %r8d
+	vmovsd	.LCPI2_0(%rip), %xmm0
+	.align	16, 0x90
+.LBB2_1:                                # %for.cond1.preheader.i
+                                        # =>This Loop Header: Depth=1
+                                        #     Child Loop BB2_2 Depth 2
+	xorl	%ecx, %ecx
+	.align	16, 0x90
+.LBB2_2:                                # %for.body3.i
+                                        #   Parent Loop BB2_1 Depth=1
+                                        # =>  This Inner Loop Header: Depth=2
+	movl	%ecx, %edx
+	imull	%r8d, %edx
+	movl	%edx, %esi
+	sarl	$31, %esi
+	shrl	$22, %esi
+	addl	%edx, %esi
+	andl	$-1024, %esi            # imm = 0xFFFFFFFFFFFFFC00
+	negl	%esi
+	movq	%r8, %rax
+	shlq	$11, %rax
+	leal	1(%rdx,%rsi), %edi
+	leaq	(%rax,%rax,2), %rsi
+	leaq	1(%rcx), %rdx
+	cmpq	$1536, %rdx             # imm = 0x600
+	vcvtsi2sdl	%edi, %xmm0, %xmm1
+	vmulsd	%xmm0, %xmm1, %xmm1
+	vcvtsd2ss	%xmm1, %xmm1, %xmm1
+	vmovss	%xmm1, A(%rsi,%rcx,4)
+	vmovss	%xmm1, B(%rsi,%rcx,4)
+	movq	%rdx, %rcx
+	jne	.LBB2_2
+# BB#3:                                 # %for.inc17.i
+                                        #   in Loop: Header=BB2_1 Depth=1
+	incq	%r8
+	cmpq	$1536, %r8              # imm = 0x600
+	jne	.LBB2_1
+# BB#4:
+	xorl	%r8d, %r8d
+	movl	$A, %r9d
+	.align	16, 0x90
+.LBB2_5:                                # %for.cond1.preheader
+                                        # =>This Loop Header: Depth=1
+                                        #     Child Loop BB2_6 Depth 2
+                                        #       Child Loop BB2_7 Depth 3
+	leaq	(%r8,%r8,2), %rdx
+	shlq	$11, %rdx
+	leaq	C(%rdx), %rsi
+	xorl	%edi, %edi
+	.align	16, 0x90
+.LBB2_6:                                # %for.body3
+                                        #   Parent Loop BB2_5 Depth=1
+                                        # =>  This Loop Header: Depth=2
+                                        #       Child Loop BB2_7 Depth 3
+	movl	$0, (%rsi)
+	vxorps	%xmm0, %xmm0, %xmm0
+	movq	$-9437184, %rax         # imm = 0xFFFFFFFFFF700000
+	movq	%r9, %rcx
+	.align	16, 0x90
+.LBB2_7:                                # %for.body8
+                                        #   Parent Loop BB2_5 Depth=1
+                                        #     Parent Loop BB2_6 Depth=2
+                                        # =>    This Inner Loop Header: Depth=3
+	vmovss	(%rcx), %xmm1
+	vmulss	B+9437184(%rax,%rdi,4), %xmm1, %xmm1
+	vaddss	%xmm1, %xmm0, %xmm0
+	addq	$4, %rcx
+	addq	$6144, %rax             # imm = 0x1800
+	jne	.LBB2_7
+# BB#8:                                 # %for.inc25
+                                        #   in Loop: Header=BB2_6 Depth=2
+	vmovss	%xmm0, (%rsi)
+	leaq	C+4(%rdx,%rdi,4), %rsi
+	incq	%rdi
+	cmpq	$1536, %rdi             # imm = 0x600
+	jne	.LBB2_6
+# BB#9:                                 # %for.inc28
+                                        #   in Loop: Header=BB2_5 Depth=1
+	addq	$6144, %r9              # imm = 0x1800
+	incq	%r8
+	cmpq	$1536, %r8              # imm = 0x600
+	jne	.LBB2_5
+# BB#10:                                # %for.end30
+	xorl	%eax, %eax
+	popq	%rbp
+	ret
+.Ltmp22:
+	.size	main, .Ltmp22-main
+	.cfi_endproc
+
+	.type	A,@object               # @A
+	.comm	A,9437184,16
+	.type	B,@object               # @B
+	.comm	B,9437184,16
+	.type	.L.str,@object          # @.str
+	.section	.rodata.str1.1,"aMS",@progbits,1
+.L.str:
+	.asciz	 "%lf "
+	.size	.L.str, 5
+
+	.type	C,@object               # @C
+	.comm	C,9437184,16
+
+	.section	".note.GNU-stack","",@progbits
--- a/external/llvm-project/polly/www/experiments/matmul/matmul.polly.interchanged+tiled+vector+openmp.ll
+++ b/external/llvm-project/polly/www/experiments/matmul/matmul.polly.interchanged+tiled+vector+openmp.ll
--- a/external/llvm-project/polly/www/experiments/matmul/matmul.polly.interchanged+tiled+vector+openmp.s
+++ b/external/llvm-project/polly/www/experiments/matmul/matmul.polly.interchanged+tiled+vector+openmp.s
--- a/external/llvm-project/polly/www/experiments/matmul/matmul.polly.interchanged+tiled+vector.ll
+++ b/external/llvm-project/polly/www/experiments/matmul/matmul.polly.interchanged+tiled+vector.ll
--- a/external/llvm-project/polly/www/experiments/matmul/matmul.polly.interchanged+tiled+vector.s
+++ b/external/llvm-project/polly/www/experiments/matmul/matmul.polly.interchanged+tiled+vector.s
@@ -0,0 +1,396 @@
+	.file	"matmul.polly.interchanged+tiled+vector.ll"
+	.section	.rodata.cst8,"aM",@progbits,8
+	.align	8
+.LCPI0_0:
+	.quad	4602678819172646912     # double 0.5
+	.text
+	.globl	init_array
+	.align	16, 0x90
+	.type	init_array,@function
+init_array:                             # @init_array
+	.cfi_startproc
+# BB#0:                                 # %entry
+	pushq	%rbp
+.Ltmp2:
+	.cfi_def_cfa_offset 16
+.Ltmp3:
+	.cfi_offset %rbp, -16
+	movq	%rsp, %rbp
+.Ltmp4:
+	.cfi_def_cfa_register %rbp
+	xorl	%r8d, %r8d
+	vmovsd	.LCPI0_0(%rip), %xmm0
+	.align	16, 0x90
+.LBB0_1:                                # %polly.loop_preheader3
+                                        # =>This Loop Header: Depth=1
+                                        #     Child Loop BB0_2 Depth 2
+	xorl	%ecx, %ecx
+	.align	16, 0x90
+.LBB0_2:                                # %polly.loop_header2
+                                        #   Parent Loop BB0_1 Depth=1
+                                        # =>  This Inner Loop Header: Depth=2
+	movl	%ecx, %edx
+	imull	%r8d, %edx
+	movl	%edx, %esi
+	sarl	$31, %esi
+	shrl	$22, %esi
+	addl	%edx, %esi
+	andl	$-1024, %esi            # imm = 0xFFFFFFFFFFFFFC00
+	negl	%esi
+	movq	%r8, %rax
+	shlq	$11, %rax
+	leal	1(%rdx,%rsi), %edi
+	leaq	(%rax,%rax,2), %rsi
+	leaq	1(%rcx), %rdx
+	cmpq	$1536, %rdx             # imm = 0x600
+	vcvtsi2sdl	%edi, %xmm0, %xmm1
+	vmulsd	%xmm0, %xmm1, %xmm1
+	vcvtsd2ss	%xmm1, %xmm1, %xmm1
+	vmovss	%xmm1, A(%rsi,%rcx,4)
+	vmovss	%xmm1, B(%rsi,%rcx,4)
+	movq	%rdx, %rcx
+	jne	.LBB0_2
+# BB#3:                                 # %polly.loop_exit4
+                                        #   in Loop: Header=BB0_1 Depth=1
+	incq	%r8
+	cmpq	$1536, %r8              # imm = 0x600
+	jne	.LBB0_1
+# BB#4:                                 # %polly.loop_exit
+	popq	%rbp
+	ret
+.Ltmp5:
+	.size	init_array, .Ltmp5-init_array
+	.cfi_endproc
+
+	.globl	print_array
+	.align	16, 0x90
+	.type	print_array,@function
+print_array:                            # @print_array
+	.cfi_startproc
+# BB#0:                                 # %entry
+	pushq	%rbp
+.Ltmp9:
+	.cfi_def_cfa_offset 16
+.Ltmp10:
+	.cfi_offset %rbp, -16
+	movq	%rsp, %rbp
+.Ltmp11:
+	.cfi_def_cfa_register %rbp
+	pushq	%r15
+	pushq	%r14
+	pushq	%r12
+	pushq	%rbx
+.Ltmp12:
+	.cfi_offset %rbx, -48
+.Ltmp13:
+	.cfi_offset %r12, -40
+.Ltmp14:
+	.cfi_offset %r14, -32
+.Ltmp15:
+	.cfi_offset %r15, -24
+	xorl	%r14d, %r14d
+	movl	$C, %r15d
+	.align	16, 0x90
+.LBB1_1:                                # %for.cond1.preheader
+                                        # =>This Loop Header: Depth=1
+                                        #     Child Loop BB1_2 Depth 2
+	movq	stdout(%rip), %rax
+	movq	%r15, %r12
+	xorl	%ebx, %ebx
+	.align	16, 0x90
+.LBB1_2:                                # %for.body3
+                                        #   Parent Loop BB1_1 Depth=1
+                                        # =>  This Inner Loop Header: Depth=2
+	vmovss	(%r12), %xmm0
+	vcvtss2sd	%xmm0, %xmm0, %xmm0
+	movq	%rax, %rdi
+	movl	$.L.str, %esi
+	movb	$1, %al
+	callq	fprintf
+	movslq	%ebx, %rax
+	imulq	$1717986919, %rax, %rcx # imm = 0x66666667
+	movq	%rcx, %rdx
+	shrq	$63, %rdx
+	sarq	$37, %rcx
+	addl	%edx, %ecx
+	imull	$80, %ecx, %ecx
+	subl	%ecx, %eax
+	cmpl	$79, %eax
+	jne	.LBB1_4
+# BB#3:                                 # %if.then
+                                        #   in Loop: Header=BB1_2 Depth=2
+	movq	stdout(%rip), %rsi
+	movl	$10, %edi
+	callq	fputc
+.LBB1_4:                                # %for.inc
+                                        #   in Loop: Header=BB1_2 Depth=2
+	addq	$4, %r12
+	incq	%rbx
+	movq	stdout(%rip), %rax
+	cmpq	$1536, %rbx             # imm = 0x600
+	jne	.LBB1_2
+# BB#5:                                 # %for.end
+                                        #   in Loop: Header=BB1_1 Depth=1
+	movl	$10, %edi
+	movq	%rax, %rsi
+	callq	fputc
+	addq	$6144, %r15             # imm = 0x1800
+	incq	%r14
+	cmpq	$1536, %r14             # imm = 0x600
+	jne	.LBB1_1
+# BB#6:                                 # %for.end12
+	popq	%rbx
+	popq	%r12
+	popq	%r14
+	popq	%r15
+	popq	%rbp
+	ret
+.Ltmp16:
+	.size	print_array, .Ltmp16-print_array
+	.cfi_endproc
+
+	.section	.rodata.cst8,"aM",@progbits,8
+	.align	8
+.LCPI2_0:
+	.quad	4602678819172646912     # double 0.5
+	.text
+	.globl	main
+	.align	16, 0x90
+	.type	main,@function
+main:                                   # @main
+	.cfi_startproc
+# BB#0:                                 # %entry
+	pushq	%rbp
+.Ltmp20:
+	.cfi_def_cfa_offset 16
+.Ltmp21:
+	.cfi_offset %rbp, -16
+	movq	%rsp, %rbp
+.Ltmp22:
+	.cfi_def_cfa_register %rbp
+	pushq	%r15
+	pushq	%r14
+	pushq	%r13
+	pushq	%r12
+	pushq	%rbx
+	subq	$56, %rsp
+.Ltmp23:
+	.cfi_offset %rbx, -56
+.Ltmp24:
+	.cfi_offset %r12, -48
+.Ltmp25:
+	.cfi_offset %r13, -40
+.Ltmp26:
+	.cfi_offset %r14, -32
+.Ltmp27:
+	.cfi_offset %r15, -24
+	xorl	%ebx, %ebx
+	vmovsd	.LCPI2_0(%rip), %xmm0
+	.align	16, 0x90
+.LBB2_1:                                # %polly.loop_preheader3.i
+                                        # =>This Loop Header: Depth=1
+                                        #     Child Loop BB2_2 Depth 2
+	xorl	%ecx, %ecx
+	.align	16, 0x90
+.LBB2_2:                                # %polly.loop_header2.i
+                                        #   Parent Loop BB2_1 Depth=1
+                                        # =>  This Inner Loop Header: Depth=2
+	movl	%ecx, %edx
+	imull	%ebx, %edx
+	movl	%edx, %esi
+	sarl	$31, %esi
+	shrl	$22, %esi
+	addl	%edx, %esi
+	andl	$-1024, %esi            # imm = 0xFFFFFFFFFFFFFC00
+	negl	%esi
+	movq	%rbx, %rax
+	shlq	$11, %rax
+	leal	1(%rdx,%rsi), %edi
+	leaq	(%rax,%rax,2), %rsi
+	leaq	1(%rcx), %rdx
+	cmpq	$1536, %rdx             # imm = 0x600
+	vcvtsi2sdl	%edi, %xmm0, %xmm1
+	vmulsd	%xmm0, %xmm1, %xmm1
+	vcvtsd2ss	%xmm1, %xmm1, %xmm1
+	vmovss	%xmm1, A(%rsi,%rcx,4)
+	vmovss	%xmm1, B(%rsi,%rcx,4)
+	movq	%rdx, %rcx
+	jne	.LBB2_2
+# BB#3:                                 # %polly.loop_exit4.i
+                                        #   in Loop: Header=BB2_1 Depth=1
+	incq	%rbx
+	cmpq	$1536, %rbx             # imm = 0x600
+	jne	.LBB2_1
+# BB#4:                                 # %polly.loop_preheader3.preheader
+	movl	$C, %edi
+	xorl	%esi, %esi
+	movl	$9437184, %edx          # imm = 0x900000
+	callq	memset
+	xorl	%esi, %esi
+	movl	$C+16, %eax
+	movq	%rax, -88(%rbp)         # 8-byte Spill
+	.align	16, 0x90
+.LBB2_5:                                # %polly.loop_preheader17
+                                        # =>This Loop Header: Depth=1
+                                        #     Child Loop BB2_15 Depth 2
+                                        #       Child Loop BB2_8 Depth 3
+                                        #         Child Loop BB2_11 Depth 4
+                                        #           Child Loop BB2_17 Depth 5
+                                        #             Child Loop BB2_18 Depth 6
+	movq	%rsi, -56(%rbp)         # 8-byte Spill
+	movq	%rsi, %rax
+	orq	$63, %rax
+	movq	%rax, -72(%rbp)         # 8-byte Spill
+	leaq	-1(%rax), %rax
+	movq	%rax, -48(%rbp)         # 8-byte Spill
+	xorl	%edx, %edx
+	.align	16, 0x90
+.LBB2_15:                               # %polly.loop_preheader24
+                                        #   Parent Loop BB2_5 Depth=1
+                                        # =>  This Loop Header: Depth=2
+                                        #       Child Loop BB2_8 Depth 3
+                                        #         Child Loop BB2_11 Depth 4
+                                        #           Child Loop BB2_17 Depth 5
+                                        #             Child Loop BB2_18 Depth 6
+	movq	%rdx, -80(%rbp)         # 8-byte Spill
+	leaq	-4(%rdx), %rcx
+	movq	%rdx, %rax
+	decq	%rax
+	cmovsq	%rcx, %rax
+	movq	%rax, %r15
+	sarq	$63, %r15
+	shrq	$62, %r15
+	addq	%rax, %r15
+	andq	$-4, %r15
+	movq	%rdx, %r13
+	orq	$63, %r13
+	leaq	-4(%r13), %rdx
+	xorl	%r10d, %r10d
+	movq	-88(%rbp), %rax         # 8-byte Reload
+	leaq	(%rax,%r15,4), %rax
+	movq	%rax, -64(%rbp)         # 8-byte Spill
+	leaq	B+16(,%r15,4), %rbx
+	leaq	4(%r15), %r12
+	.align	16, 0x90
+.LBB2_8:                                # %polly.loop_header23
+                                        #   Parent Loop BB2_5 Depth=1
+                                        #     Parent Loop BB2_15 Depth=2
+                                        # =>    This Loop Header: Depth=3
+                                        #         Child Loop BB2_11 Depth 4
+                                        #           Child Loop BB2_17 Depth 5
+                                        #             Child Loop BB2_18 Depth 6
+	cmpq	-72(%rbp), %rsi         # 8-byte Folded Reload
+	jg	.LBB2_13
+# BB#9:                                 # %polly.loop_header30.preheader
+                                        #   in Loop: Header=BB2_8 Depth=3
+	movq	%r10, %rax
+	orq	$63, %rax
+	cmpq	%rax, %r10
+	jg	.LBB2_13
+# BB#10:                                #   in Loop: Header=BB2_8 Depth=3
+	decq	%rax
+	movq	-64(%rbp), %r14         # 8-byte Reload
+	movq	-56(%rbp), %r11         # 8-byte Reload
+	.align	16, 0x90
+.LBB2_11:                               # %polly.loop_header37.preheader
+                                        #   Parent Loop BB2_5 Depth=1
+                                        #     Parent Loop BB2_15 Depth=2
+                                        #       Parent Loop BB2_8 Depth=3
+                                        # =>      This Loop Header: Depth=4
+                                        #           Child Loop BB2_17 Depth 5
+                                        #             Child Loop BB2_18 Depth 6
+	cmpq	%r13, %r12
+	movq	%rbx, %r8
+	movq	%r10, %rsi
+	jg	.LBB2_12
+	.align	16, 0x90
+.LBB2_17:                               # %polly.loop_header46.preheader
+                                        #   Parent Loop BB2_5 Depth=1
+                                        #     Parent Loop BB2_15 Depth=2
+                                        #       Parent Loop BB2_8 Depth=3
+                                        #         Parent Loop BB2_11 Depth=4
+                                        # =>        This Loop Header: Depth=5
+                                        #             Child Loop BB2_18 Depth 6
+	leaq	(%r11,%r11,2), %rcx
+	shlq	$11, %rcx
+	vbroadcastss	A(%rcx,%rsi,4), %xmm0
+	movq	%r14, %rdi
+	movq	%r8, %r9
+	movq	%r15, %rcx
+.LBB2_18:                               # %polly.loop_header46
+                                        #   Parent Loop BB2_5 Depth=1
+                                        #     Parent Loop BB2_15 Depth=2
+                                        #       Parent Loop BB2_8 Depth=3
+                                        #         Parent Loop BB2_11 Depth=4
+                                        #           Parent Loop BB2_17 Depth=5
+                                        # =>          This Inner Loop Header: Depth=6
+	vmulps	(%r9), %xmm0, %xmm1
+	vaddps	(%rdi), %xmm1, %xmm1
+	vmovaps	%xmm1, (%rdi)
+	addq	$16, %rdi
+	addq	$16, %r9
+	addq	$4, %rcx
+	cmpq	%rdx, %rcx
+	jle	.LBB2_18
+# BB#16:                                # %polly.loop_exit48
+                                        #   in Loop: Header=BB2_17 Depth=5
+	addq	$6144, %r8              # imm = 0x1800
+	cmpq	%rax, %rsi
+	leaq	1(%rsi), %rsi
+	jle	.LBB2_17
+	.align	16, 0x90
+.LBB2_12:                               # %polly.loop_exit39
+                                        #   in Loop: Header=BB2_11 Depth=4
+	addq	$6144, %r14             # imm = 0x1800
+	cmpq	-48(%rbp), %r11         # 8-byte Folded Reload
+	leaq	1(%r11), %r11
+	jle	.LBB2_11
+	.align	16, 0x90
+.LBB2_13:                               # %polly.loop_exit32
+                                        #   in Loop: Header=BB2_8 Depth=3
+	addq	$393216, %rbx           # imm = 0x60000
+	cmpq	$1472, %r10             # imm = 0x5C0
+	leaq	64(%r10), %r10
+	movq	-56(%rbp), %rsi         # 8-byte Reload
+	jl	.LBB2_8
+# BB#14:                                # %polly.loop_exit25
+                                        #   in Loop: Header=BB2_15 Depth=2
+	movq	-80(%rbp), %rdx         # 8-byte Reload
+	cmpq	$1472, %rdx             # imm = 0x5C0
+	leaq	64(%rdx), %rdx
+	jl	.LBB2_15
+# BB#6:                                 # %polly.loop_exit18
+                                        #   in Loop: Header=BB2_5 Depth=1
+	addq	$393216, -88(%rbp)      # 8-byte Folded Spill
+                                        # imm = 0x60000
+	cmpq	$1472, %rsi             # imm = 0x5C0
+	leaq	64(%rsi), %rsi
+	jl	.LBB2_5
+# BB#7:                                 # %polly.loop_exit11
+	xorl	%eax, %eax
+	addq	$56, %rsp
+	popq	%rbx
+	popq	%r12
+	popq	%r13
+	popq	%r14
+	popq	%r15
+	popq	%rbp
+	ret
+.Ltmp28:
+	.size	main, .Ltmp28-main
+	.cfi_endproc
+
+	.type	A,@object               # @A
+	.comm	A,9437184,16
+	.type	B,@object               # @B
+	.comm	B,9437184,16
+	.type	.L.str,@object          # @.str
+	.section	.rodata.str1.1,"aMS",@progbits,1
+.L.str:
+	.asciz	 "%lf "
+	.size	.L.str, 5
+
+	.type	C,@object               # @C
+	.comm	C,9437184,16
+
+	.section	".note.GNU-stack","",@progbits
--- a/external/llvm-project/polly/www/experiments/matmul/matmul.polly.interchanged+tiled.ll
+++ b/external/llvm-project/polly/www/experiments/matmul/matmul.polly.interchanged+tiled.ll
--- a/external/llvm-project/polly/www/experiments/matmul/matmul.polly.interchanged+tiled.s
+++ b/external/llvm-project/polly/www/experiments/matmul/matmul.polly.interchanged+tiled.s
@@ -0,0 +1,390 @@
+	.file	"matmul.polly.interchanged+tiled.ll"
+	.section	.rodata.cst8,"aM",@progbits,8
+	.align	8
+.LCPI0_0:
+	.quad	4602678819172646912     # double 0.5
+	.text
+	.globl	init_array
+	.align	16, 0x90
+	.type	init_array,@function
+init_array:                             # @init_array
+	.cfi_startproc
+# BB#0:                                 # %entry
+	pushq	%rbp
+.Ltmp2:
+	.cfi_def_cfa_offset 16
+.Ltmp3:
+	.cfi_offset %rbp, -16
+	movq	%rsp, %rbp
+.Ltmp4:
+	.cfi_def_cfa_register %rbp
+	xorl	%r8d, %r8d
+	vmovsd	.LCPI0_0(%rip), %xmm0
+	.align	16, 0x90
+.LBB0_1:                                # %polly.loop_preheader3
+                                        # =>This Loop Header: Depth=1
+                                        #     Child Loop BB0_2 Depth 2
+	xorl	%ecx, %ecx
+	.align	16, 0x90
+.LBB0_2:                                # %polly.loop_header2
+                                        #   Parent Loop BB0_1 Depth=1
+                                        # =>  This Inner Loop Header: Depth=2
+	movl	%ecx, %edx
+	imull	%r8d, %edx
+	movl	%edx, %esi
+	sarl	$31, %esi
+	shrl	$22, %esi
+	addl	%edx, %esi
+	andl	$-1024, %esi            # imm = 0xFFFFFFFFFFFFFC00
+	negl	%esi
+	movq	%r8, %rax
+	shlq	$11, %rax
+	leal	1(%rdx,%rsi), %edi
+	leaq	(%rax,%rax,2), %rsi
+	leaq	1(%rcx), %rdx
+	cmpq	$1536, %rdx             # imm = 0x600
+	vcvtsi2sdl	%edi, %xmm0, %xmm1
+	vmulsd	%xmm0, %xmm1, %xmm1
+	vcvtsd2ss	%xmm1, %xmm1, %xmm1
+	vmovss	%xmm1, A(%rsi,%rcx,4)
+	vmovss	%xmm1, B(%rsi,%rcx,4)
+	movq	%rdx, %rcx
+	jne	.LBB0_2
+# BB#3:                                 # %polly.loop_exit4
+                                        #   in Loop: Header=BB0_1 Depth=1
+	incq	%r8
+	cmpq	$1536, %r8              # imm = 0x600
+	jne	.LBB0_1
+# BB#4:                                 # %polly.loop_exit
+	popq	%rbp
+	ret
+.Ltmp5:
+	.size	init_array, .Ltmp5-init_array
+	.cfi_endproc
+
+	.globl	print_array
+	.align	16, 0x90
+	.type	print_array,@function
+print_array:                            # @print_array
+	.cfi_startproc
+# BB#0:                                 # %entry
+	pushq	%rbp
+.Ltmp9:
+	.cfi_def_cfa_offset 16
+.Ltmp10:
+	.cfi_offset %rbp, -16
+	movq	%rsp, %rbp
+.Ltmp11:
+	.cfi_def_cfa_register %rbp
+	pushq	%r15
+	pushq	%r14
+	pushq	%r12
+	pushq	%rbx
+.Ltmp12:
+	.cfi_offset %rbx, -48
+.Ltmp13:
+	.cfi_offset %r12, -40
+.Ltmp14:
+	.cfi_offset %r14, -32
+.Ltmp15:
+	.cfi_offset %r15, -24
+	xorl	%r14d, %r14d
+	movl	$C, %r15d
+	.align	16, 0x90
+.LBB1_1:                                # %for.cond1.preheader
+                                        # =>This Loop Header: Depth=1
+                                        #     Child Loop BB1_2 Depth 2
+	movq	stdout(%rip), %rax
+	movq	%r15, %r12
+	xorl	%ebx, %ebx
+	.align	16, 0x90
+.LBB1_2:                                # %for.body3
+                                        #   Parent Loop BB1_1 Depth=1
+                                        # =>  This Inner Loop Header: Depth=2
+	vmovss	(%r12), %xmm0
+	vcvtss2sd	%xmm0, %xmm0, %xmm0
+	movq	%rax, %rdi
+	movl	$.L.str, %esi
+	movb	$1, %al
+	callq	fprintf
+	movslq	%ebx, %rax
+	imulq	$1717986919, %rax, %rcx # imm = 0x66666667
+	movq	%rcx, %rdx
+	shrq	$63, %rdx
+	sarq	$37, %rcx
+	addl	%edx, %ecx
+	imull	$80, %ecx, %ecx
+	subl	%ecx, %eax
+	cmpl	$79, %eax
+	jne	.LBB1_4
+# BB#3:                                 # %if.then
+                                        #   in Loop: Header=BB1_2 Depth=2
+	movq	stdout(%rip), %rsi
+	movl	$10, %edi
+	callq	fputc
+.LBB1_4:                                # %for.inc
+                                        #   in Loop: Header=BB1_2 Depth=2
+	addq	$4, %r12
+	incq	%rbx
+	movq	stdout(%rip), %rax
+	cmpq	$1536, %rbx             # imm = 0x600
+	jne	.LBB1_2
+# BB#5:                                 # %for.end
+                                        #   in Loop: Header=BB1_1 Depth=1
+	movl	$10, %edi
+	movq	%rax, %rsi
+	callq	fputc
+	addq	$6144, %r15             # imm = 0x1800
+	incq	%r14
+	cmpq	$1536, %r14             # imm = 0x600
+	jne	.LBB1_1
+# BB#6:                                 # %for.end12
+	popq	%rbx
+	popq	%r12
+	popq	%r14
+	popq	%r15
+	popq	%rbp
+	ret
+.Ltmp16:
+	.size	print_array, .Ltmp16-print_array
+	.cfi_endproc
+
+	.section	.rodata.cst8,"aM",@progbits,8
+	.align	8
+.LCPI2_0:
+	.quad	4602678819172646912     # double 0.5
+	.text
+	.globl	main
+	.align	16, 0x90
+	.type	main,@function
+main:                                   # @main
+	.cfi_startproc
+# BB#0:                                 # %entry
+	pushq	%rbp
+.Ltmp20:
+	.cfi_def_cfa_offset 16
+.Ltmp21:
+	.cfi_offset %rbp, -16
+	movq	%rsp, %rbp
+.Ltmp22:
+	.cfi_def_cfa_register %rbp
+	pushq	%r15
+	pushq	%r14
+	pushq	%r13
+	pushq	%r12
+	pushq	%rbx
+	subq	$56, %rsp
+.Ltmp23:
+	.cfi_offset %rbx, -56
+.Ltmp24:
+	.cfi_offset %r12, -48
+.Ltmp25:
+	.cfi_offset %r13, -40
+.Ltmp26:
+	.cfi_offset %r14, -32
+.Ltmp27:
+	.cfi_offset %r15, -24
+	xorl	%ebx, %ebx
+	vmovsd	.LCPI2_0(%rip), %xmm0
+	.align	16, 0x90
+.LBB2_1:                                # %polly.loop_preheader3.i
+                                        # =>This Loop Header: Depth=1
+                                        #     Child Loop BB2_2 Depth 2
+	xorl	%ecx, %ecx
+	.align	16, 0x90
+.LBB2_2:                                # %polly.loop_header2.i
+                                        #   Parent Loop BB2_1 Depth=1
+                                        # =>  This Inner Loop Header: Depth=2
+	movl	%ecx, %edx
+	imull	%ebx, %edx
+	movl	%edx, %esi
+	sarl	$31, %esi
+	shrl	$22, %esi
+	addl	%edx, %esi
+	andl	$-1024, %esi            # imm = 0xFFFFFFFFFFFFFC00
+	negl	%esi
+	movq	%rbx, %rax
+	shlq	$11, %rax
+	leal	1(%rdx,%rsi), %edi
+	leaq	(%rax,%rax,2), %rsi
+	leaq	1(%rcx), %rdx
+	cmpq	$1536, %rdx             # imm = 0x600
+	vcvtsi2sdl	%edi, %xmm0, %xmm1
+	vmulsd	%xmm0, %xmm1, %xmm1
+	vcvtsd2ss	%xmm1, %xmm1, %xmm1
+	vmovss	%xmm1, A(%rsi,%rcx,4)
+	vmovss	%xmm1, B(%rsi,%rcx,4)
+	movq	%rdx, %rcx
+	jne	.LBB2_2
+# BB#3:                                 # %polly.loop_exit4.i
+                                        #   in Loop: Header=BB2_1 Depth=1
+	incq	%rbx
+	cmpq	$1536, %rbx             # imm = 0x600
+	jne	.LBB2_1
+# BB#4:                                 # %polly.loop_preheader3.preheader
+	movl	$C, %ebx
+	movl	$C, %edi
+	xorl	%esi, %esi
+	movl	$9437184, %edx          # imm = 0x900000
+	callq	memset
+	xorl	%eax, %eax
+	.align	16, 0x90
+.LBB2_5:                                # %polly.loop_preheader17
+                                        # =>This Loop Header: Depth=1
+                                        #     Child Loop BB2_15 Depth 2
+                                        #       Child Loop BB2_8 Depth 3
+                                        #         Child Loop BB2_11 Depth 4
+                                        #           Child Loop BB2_17 Depth 5
+                                        #             Child Loop BB2_18 Depth 6
+	movq	%rax, -56(%rbp)         # 8-byte Spill
+	movq	%rbx, -88(%rbp)         # 8-byte Spill
+	movq	%rax, %rcx
+	orq	$63, %rcx
+	movq	%rcx, -72(%rbp)         # 8-byte Spill
+	leaq	-1(%rcx), %rcx
+	movq	%rcx, -48(%rbp)         # 8-byte Spill
+	movq	$-1, %r15
+	movl	$B, %ecx
+	movq	%rbx, -64(%rbp)         # 8-byte Spill
+	xorl	%r12d, %r12d
+	.align	16, 0x90
+.LBB2_15:                               # %polly.loop_preheader24
+                                        #   Parent Loop BB2_5 Depth=1
+                                        # =>  This Loop Header: Depth=2
+                                        #       Child Loop BB2_8 Depth 3
+                                        #         Child Loop BB2_11 Depth 4
+                                        #           Child Loop BB2_17 Depth 5
+                                        #             Child Loop BB2_18 Depth 6
+	movq	%rcx, -80(%rbp)         # 8-byte Spill
+	movq	%r12, %r13
+	orq	$63, %r13
+	leaq	-1(%r13), %rbx
+	xorl	%r9d, %r9d
+	movq	%rcx, %rdx
+	.align	16, 0x90
+.LBB2_8:                                # %polly.loop_header23
+                                        #   Parent Loop BB2_5 Depth=1
+                                        #     Parent Loop BB2_15 Depth=2
+                                        # =>    This Loop Header: Depth=3
+                                        #         Child Loop BB2_11 Depth 4
+                                        #           Child Loop BB2_17 Depth 5
+                                        #             Child Loop BB2_18 Depth 6
+	cmpq	-72(%rbp), %rax         # 8-byte Folded Reload
+	jg	.LBB2_13
+# BB#9:                                 # %polly.loop_header30.preheader
+                                        #   in Loop: Header=BB2_8 Depth=3
+	movq	%r9, %rax
+	orq	$63, %rax
+	cmpq	%rax, %r9
+	jg	.LBB2_13
+# BB#10:                                #   in Loop: Header=BB2_8 Depth=3
+	decq	%rax
+	movq	-64(%rbp), %r10         # 8-byte Reload
+	movq	-56(%rbp), %r11         # 8-byte Reload
+	.align	16, 0x90
+.LBB2_11:                               # %polly.loop_header37.preheader
+                                        #   Parent Loop BB2_5 Depth=1
+                                        #     Parent Loop BB2_15 Depth=2
+                                        #       Parent Loop BB2_8 Depth=3
+                                        # =>      This Loop Header: Depth=4
+                                        #           Child Loop BB2_17 Depth 5
+                                        #             Child Loop BB2_18 Depth 6
+	cmpq	%r13, %r12
+	movq	%rdx, %r14
+	movq	%r9, %rcx
+	jg	.LBB2_12
+	.align	16, 0x90
+.LBB2_17:                               # %polly.loop_header46.preheader
+                                        #   Parent Loop BB2_5 Depth=1
+                                        #     Parent Loop BB2_15 Depth=2
+                                        #       Parent Loop BB2_8 Depth=3
+                                        #         Parent Loop BB2_11 Depth=4
+                                        # =>        This Loop Header: Depth=5
+                                        #             Child Loop BB2_18 Depth 6
+	leaq	(%r11,%r11,2), %rsi
+	shlq	$11, %rsi
+	vmovss	A(%rsi,%rcx,4), %xmm0
+	movq	%r10, %rdi
+	movq	%r14, %r8
+	movq	%r15, %rsi
+.LBB2_18:                               # %polly.loop_header46
+                                        #   Parent Loop BB2_5 Depth=1
+                                        #     Parent Loop BB2_15 Depth=2
+                                        #       Parent Loop BB2_8 Depth=3
+                                        #         Parent Loop BB2_11 Depth=4
+                                        #           Parent Loop BB2_17 Depth=5
+                                        # =>          This Inner Loop Header: Depth=6
+	vmulss	(%r8), %xmm0, %xmm1
+	vaddss	(%rdi), %xmm1, %xmm1
+	vmovss	%xmm1, (%rdi)
+	addq	$4, %rdi
+	addq	$4, %r8
+	incq	%rsi
+	cmpq	%rbx, %rsi
+	jle	.LBB2_18
+# BB#16:                                # %polly.loop_exit48
+                                        #   in Loop: Header=BB2_17 Depth=5
+	addq	$6144, %r14             # imm = 0x1800
+	cmpq	%rax, %rcx
+	leaq	1(%rcx), %rcx
+	jle	.LBB2_17
+	.align	16, 0x90
+.LBB2_12:                               # %polly.loop_exit39
+                                        #   in Loop: Header=BB2_11 Depth=4
+	addq	$6144, %r10             # imm = 0x1800
+	cmpq	-48(%rbp), %r11         # 8-byte Folded Reload
+	leaq	1(%r11), %r11
+	jle	.LBB2_11
+	.align	16, 0x90
+.LBB2_13:                               # %polly.loop_exit32
+                                        #   in Loop: Header=BB2_8 Depth=3
+	addq	$393216, %rdx           # imm = 0x60000
+	cmpq	$1472, %r9              # imm = 0x5C0
+	leaq	64(%r9), %r9
+	movq	-56(%rbp), %rax         # 8-byte Reload
+	jl	.LBB2_8
+# BB#14:                                # %polly.loop_exit25
+                                        #   in Loop: Header=BB2_15 Depth=2
+	addq	$256, -64(%rbp)         # 8-byte Folded Spill
+                                        # imm = 0x100
+	movq	-80(%rbp), %rcx         # 8-byte Reload
+	addq	$256, %rcx              # imm = 0x100
+	addq	$64, %r15
+	cmpq	$1472, %r12             # imm = 0x5C0
+	leaq	64(%r12), %r12
+	jl	.LBB2_15
+# BB#6:                                 # %polly.loop_exit18
+                                        #   in Loop: Header=BB2_5 Depth=1
+	movq	-88(%rbp), %rbx         # 8-byte Reload
+	addq	$393216, %rbx           # imm = 0x60000
+	cmpq	$1472, %rax             # imm = 0x5C0
+	leaq	64(%rax), %rax
+	jl	.LBB2_5
+# BB#7:                                 # %polly.loop_exit11
+	xorl	%eax, %eax
+	addq	$56, %rsp
+	popq	%rbx
+	popq	%r12
+	popq	%r13
+	popq	%r14
+	popq	%r15
+	popq	%rbp
+	ret
+.Ltmp28:
+	.size	main, .Ltmp28-main
+	.cfi_endproc
+
+	.type	A,@object               # @A
+	.comm	A,9437184,16
+	.type	B,@object               # @B
+	.comm	B,9437184,16
+	.type	.L.str,@object          # @.str
+	.section	.rodata.str1.1,"aMS",@progbits,1
+.L.str:
+	.asciz	 "%lf "
+	.size	.L.str, 5
+
+	.type	C,@object               # @C
+	.comm	C,9437184,16
+
+	.section	".note.GNU-stack","",@progbits
--- a/external/llvm-project/polly/www/experiments/matmul/matmul.polly.interchanged.ll
+++ b/external/llvm-project/polly/www/experiments/matmul/matmul.polly.interchanged.ll
--- a/external/llvm-project/polly/www/experiments/matmul/matmul.polly.interchanged.s
+++ b/external/llvm-project/polly/www/experiments/matmul/matmul.polly.interchanged.s
@@ -0,0 +1,286 @@
+	.file	"matmul.polly.interchanged.ll"
+	.section	.rodata.cst8,"aM",@progbits,8
+	.align	8
+.LCPI0_0:
+	.quad	4602678819172646912     # double 0.5
+	.text
+	.globl	init_array
+	.align	16, 0x90
+	.type	init_array,@function
+init_array:                             # @init_array
+	.cfi_startproc
+# BB#0:                                 # %entry
+	pushq	%rbp
+.Ltmp2:
+	.cfi_def_cfa_offset 16
+.Ltmp3:
+	.cfi_offset %rbp, -16
+	movq	%rsp, %rbp
+.Ltmp4:
+	.cfi_def_cfa_register %rbp
+	xorl	%r8d, %r8d
+	vmovsd	.LCPI0_0(%rip), %xmm0
+	.align	16, 0x90
+.LBB0_1:                                # %polly.loop_preheader3
+                                        # =>This Loop Header: Depth=1
+                                        #     Child Loop BB0_2 Depth 2
+	xorl	%ecx, %ecx
+	.align	16, 0x90
+.LBB0_2:                                # %polly.loop_header2
+                                        #   Parent Loop BB0_1 Depth=1
+                                        # =>  This Inner Loop Header: Depth=2
+	movl	%ecx, %edx
+	imull	%r8d, %edx
+	movl	%edx, %esi
+	sarl	$31, %esi
+	shrl	$22, %esi
+	addl	%edx, %esi
+	andl	$-1024, %esi            # imm = 0xFFFFFFFFFFFFFC00
+	negl	%esi
+	movq	%r8, %rax
+	shlq	$11, %rax
+	leal	1(%rdx,%rsi), %edi
+	leaq	(%rax,%rax,2), %rsi
+	leaq	1(%rcx), %rdx
+	cmpq	$1536, %rdx             # imm = 0x600
+	vcvtsi2sdl	%edi, %xmm0, %xmm1
+	vmulsd	%xmm0, %xmm1, %xmm1
+	vcvtsd2ss	%xmm1, %xmm1, %xmm1
+	vmovss	%xmm1, A(%rsi,%rcx,4)
+	vmovss	%xmm1, B(%rsi,%rcx,4)
+	movq	%rdx, %rcx
+	jne	.LBB0_2
+# BB#3:                                 # %polly.loop_exit4
+                                        #   in Loop: Header=BB0_1 Depth=1
+	incq	%r8
+	cmpq	$1536, %r8              # imm = 0x600
+	jne	.LBB0_1
+# BB#4:                                 # %polly.loop_exit
+	popq	%rbp
+	ret
+.Ltmp5:
+	.size	init_array, .Ltmp5-init_array
+	.cfi_endproc
+
+	.globl	print_array
+	.align	16, 0x90
+	.type	print_array,@function
+print_array:                            # @print_array
+	.cfi_startproc
+# BB#0:                                 # %entry
+	pushq	%rbp
+.Ltmp9:
+	.cfi_def_cfa_offset 16
+.Ltmp10:
+	.cfi_offset %rbp, -16
+	movq	%rsp, %rbp
+.Ltmp11:
+	.cfi_def_cfa_register %rbp
+	pushq	%r15
+	pushq	%r14
+	pushq	%r12
+	pushq	%rbx
+.Ltmp12:
+	.cfi_offset %rbx, -48
+.Ltmp13:
+	.cfi_offset %r12, -40
+.Ltmp14:
+	.cfi_offset %r14, -32
+.Ltmp15:
+	.cfi_offset %r15, -24
+	xorl	%r14d, %r14d
+	movl	$C, %r15d
+	.align	16, 0x90
+.LBB1_1:                                # %for.cond1.preheader
+                                        # =>This Loop Header: Depth=1
+                                        #     Child Loop BB1_2 Depth 2
+	movq	stdout(%rip), %rax
+	movq	%r15, %r12
+	xorl	%ebx, %ebx
+	.align	16, 0x90
+.LBB1_2:                                # %for.body3
+                                        #   Parent Loop BB1_1 Depth=1
+                                        # =>  This Inner Loop Header: Depth=2
+	vmovss	(%r12), %xmm0
+	vcvtss2sd	%xmm0, %xmm0, %xmm0
+	movq	%rax, %rdi
+	movl	$.L.str, %esi
+	movb	$1, %al
+	callq	fprintf
+	movslq	%ebx, %rax
+	imulq	$1717986919, %rax, %rcx # imm = 0x66666667
+	movq	%rcx, %rdx
+	shrq	$63, %rdx
+	sarq	$37, %rcx
+	addl	%edx, %ecx
+	imull	$80, %ecx, %ecx
+	subl	%ecx, %eax
+	cmpl	$79, %eax
+	jne	.LBB1_4
+# BB#3:                                 # %if.then
+                                        #   in Loop: Header=BB1_2 Depth=2
+	movq	stdout(%rip), %rsi
+	movl	$10, %edi
+	callq	fputc
+.LBB1_4:                                # %for.inc
+                                        #   in Loop: Header=BB1_2 Depth=2
+	addq	$4, %r12
+	incq	%rbx
+	movq	stdout(%rip), %rax
+	cmpq	$1536, %rbx             # imm = 0x600
+	jne	.LBB1_2
+# BB#5:                                 # %for.end
+                                        #   in Loop: Header=BB1_1 Depth=1
+	movl	$10, %edi
+	movq	%rax, %rsi
+	callq	fputc
+	addq	$6144, %r15             # imm = 0x1800
+	incq	%r14
+	cmpq	$1536, %r14             # imm = 0x600
+	jne	.LBB1_1
+# BB#6:                                 # %for.end12
+	popq	%rbx
+	popq	%r12
+	popq	%r14
+	popq	%r15
+	popq	%rbp
+	ret
+.Ltmp16:
+	.size	print_array, .Ltmp16-print_array
+	.cfi_endproc
+
+	.section	.rodata.cst8,"aM",@progbits,8
+	.align	8
+.LCPI2_0:
+	.quad	4602678819172646912     # double 0.5
+	.text
+	.globl	main
+	.align	16, 0x90
+	.type	main,@function
+main:                                   # @main
+	.cfi_startproc
+# BB#0:                                 # %entry
+	pushq	%rbp
+.Ltmp20:
+	.cfi_def_cfa_offset 16
+.Ltmp21:
+	.cfi_offset %rbp, -16
+	movq	%rsp, %rbp
+.Ltmp22:
+	.cfi_def_cfa_register %rbp
+	pushq	%r14
+	pushq	%rbx
+.Ltmp23:
+	.cfi_offset %rbx, -32
+.Ltmp24:
+	.cfi_offset %r14, -24
+	xorl	%ebx, %ebx
+	vmovsd	.LCPI2_0(%rip), %xmm0
+	.align	16, 0x90
+.LBB2_1:                                # %polly.loop_preheader3.i
+                                        # =>This Loop Header: Depth=1
+                                        #     Child Loop BB2_2 Depth 2
+	xorl	%ecx, %ecx
+	.align	16, 0x90
+.LBB2_2:                                # %polly.loop_header2.i
+                                        #   Parent Loop BB2_1 Depth=1
+                                        # =>  This Inner Loop Header: Depth=2
+	movl	%ecx, %edx
+	imull	%ebx, %edx
+	movl	%edx, %esi
+	sarl	$31, %esi
+	shrl	$22, %esi
+	addl	%edx, %esi
+	andl	$-1024, %esi            # imm = 0xFFFFFFFFFFFFFC00
+	negl	%esi
+	movq	%rbx, %rax
+	shlq	$11, %rax
+	leal	1(%rdx,%rsi), %edi
+	leaq	(%rax,%rax,2), %rsi
+	leaq	1(%rcx), %rdx
+	cmpq	$1536, %rdx             # imm = 0x600
+	vcvtsi2sdl	%edi, %xmm0, %xmm1
+	vmulsd	%xmm0, %xmm1, %xmm1
+	vcvtsd2ss	%xmm1, %xmm1, %xmm1
+	vmovss	%xmm1, A(%rsi,%rcx,4)
+	vmovss	%xmm1, B(%rsi,%rcx,4)
+	movq	%rdx, %rcx
+	jne	.LBB2_2
+# BB#3:                                 # %polly.loop_exit4.i
+                                        #   in Loop: Header=BB2_1 Depth=1
+	incq	%rbx
+	cmpq	$1536, %rbx             # imm = 0x600
+	jne	.LBB2_1
+# BB#4:                                 # %polly.loop_preheader3.preheader
+	movl	$C, %r14d
+	movl	$C, %edi
+	xorl	%esi, %esi
+	movl	$9437184, %edx          # imm = 0x900000
+	callq	memset
+	xorl	%eax, %eax
+	.align	16, 0x90
+.LBB2_5:                                # %polly.loop_preheader17
+                                        # =>This Loop Header: Depth=1
+                                        #     Child Loop BB2_10 Depth 2
+                                        #       Child Loop BB2_8 Depth 3
+	movl	$B, %ebx
+	xorl	%edx, %edx
+	.align	16, 0x90
+.LBB2_10:                               # %polly.loop_preheader24
+                                        #   Parent Loop BB2_5 Depth=1
+                                        # =>  This Loop Header: Depth=2
+                                        #       Child Loop BB2_8 Depth 3
+	leaq	(%rax,%rax,2), %rcx
+	shlq	$11, %rcx
+	vmovss	A(%rcx,%rdx,4), %xmm0
+	movl	$1536, %esi             # imm = 0x600
+	movq	%r14, %rdi
+	movq	%rbx, %rcx
+	.align	16, 0x90
+.LBB2_8:                                # %polly.loop_header23
+                                        #   Parent Loop BB2_5 Depth=1
+                                        #     Parent Loop BB2_10 Depth=2
+                                        # =>    This Inner Loop Header: Depth=3
+	vmulss	(%rcx), %xmm0, %xmm1
+	vaddss	(%rdi), %xmm1, %xmm1
+	vmovss	%xmm1, (%rdi)
+	addq	$4, %rdi
+	addq	$4, %rcx
+	decq	%rsi
+	jne	.LBB2_8
+# BB#9:                                 # %polly.loop_exit25
+                                        #   in Loop: Header=BB2_10 Depth=2
+	addq	$6144, %rbx             # imm = 0x1800
+	incq	%rdx
+	cmpq	$1536, %rdx             # imm = 0x600
+	jne	.LBB2_10
+# BB#6:                                 # %polly.loop_exit18
+                                        #   in Loop: Header=BB2_5 Depth=1
+	addq	$6144, %r14             # imm = 0x1800
+	incq	%rax
+	cmpq	$1536, %rax             # imm = 0x600
+	jne	.LBB2_5
+# BB#7:                                 # %polly.loop_exit11
+	xorl	%eax, %eax
+	popq	%rbx
+	popq	%r14
+	popq	%rbp
+	ret
+.Ltmp25:
+	.size	main, .Ltmp25-main
+	.cfi_endproc
+
+	.type	A,@object               # @A
+	.comm	A,9437184,16
+	.type	B,@object               # @B
+	.comm	B,9437184,16
+	.type	.L.str,@object          # @.str
+	.section	.rodata.str1.1,"aMS",@progbits,1
+.L.str:
+	.asciz	 "%lf "
+	.size	.L.str, 5
+
+	.type	C,@object               # @C
+	.comm	C,9437184,16
+
+	.section	".note.GNU-stack","",@progbits
--- a/Show More
+++ b/Show More