From 7eb0c31f085aa1a20779fea1bb8c84a8e6bc9d50 Mon Sep 17 00:00:00 2001
From: Jason Orendorff <jorendorff@mozilla.com>
Date: Fri, 21 Aug 2009 13:18:36 -0500
Subject: [PATCH 1/2] Fix `trace-test.py -f`. r=dmandelin via IRC, no bug.

--HG--
extra : rebase_source : 7ae99002a44e6e5323c9c2909f523d278ac0f38d
---
 js/src/trace-test/trace-test.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/js/src/trace-test/trace-test.py b/js/src/trace-test/trace-test.py
index feac37af895..32689ae238d 100644
--- a/js/src/trace-test/trace-test.py
+++ b/js/src/trace-test/trace-test.py
@@ -1,6 +1,7 @@
 # trace-test.py -- Python harness for JavaScript trace tests.
 
 import datetime, os, re, sys
+import subprocess
 from subprocess import *
 
 JS = None
@@ -118,7 +119,7 @@ def run_tests(tests, lib_dir):
         print('FAILURES:')
         for test in failures:
             if OPTIONS.show_failed:
-                print('    ' + get_test_cmd(test, lib_dir))
+                print('    ' + subprocess.list2cmdline(get_test_cmd(test, lib_dir)))
             else:
                 print('    ' + test)
     else:

From 5a401319baab223f51b4c45df4bc6bbdd3dca8b7 Mon Sep 17 00:00:00 2001
From: Graydon Hoare <graydon@mozilla.com>
Date: Fri, 21 Aug 2009 11:27:08 -0700
Subject: [PATCH 2/2] Bug 511919 - Copy the x64 backend from Tamarin,
 r=dvander.

---
 js/src/nanojit/NativeX64.cpp | 1302 ++++++++++++++++++++++++++++++++++
 js/src/nanojit/NativeX64.h   |  360 ++++++++++
 2 files changed, 1662 insertions(+)
 create mode 100644 js/src/nanojit/NativeX64.cpp
 create mode 100644 js/src/nanojit/NativeX64.h

diff --git a/js/src/nanojit/NativeX64.cpp b/js/src/nanojit/NativeX64.cpp
new file mode 100644
index 00000000000..7645c561aff
--- /dev/null
+++ b/js/src/nanojit/NativeX64.cpp
@@ -0,0 +1,1302 @@
+/* -*- Mode: C++; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 4 -*- */
+/* vi: set ts=4 sw=4 expandtab: (add to ~/.vimrc: set modeline modelines=5) */
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is [Open Source Virtual Machine].
+ *
+ * The Initial Developer of the Original Code is
+ * Adobe System Incorporated.
+ * Portions created by the Initial Developer are Copyright (C) 2009
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ *   Adobe AS3 Team
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+#include "nanojit.h"
+
+// uncomment this to enable _vprof/_nvprof macros
+//#define DOPROF
+#include "../vprof/vprof.h"
+
+#if defined FEATURE_NANOJIT && defined NANOJIT_X64
+
+/*
+completion
+- 64bit branch offsets
+- finish cmov/qcmov with other conditions
+- validate asm_cond with other conditions
+
+better code
+- put R12 back in play as a base register
+- no-disp addr modes (except RBP/R13)
+- disp64 branch/call
+- spill gp values to xmm registers?
+- prefer xmm registers for copies since gprs are in higher demand?
+- stack arg doubles
+- stack based LIR_param
+
+tracing
+- asm_loop
+- asm_qjoin
+- asm_qhi
+- nFragExit
+
+*/
+
+namespace nanojit
+{
+    const Register Assembler::retRegs[] = { RAX };
+#ifdef _MSC_VER
+    const Register Assembler::argRegs[] = { RCX, RDX, R8, R9 };
+    const Register Assembler::savedRegs[] = { RBX, RSI, RDI, R12, R13, R14, R15 };
+#else
+    const Register Assembler::argRegs[] = { RDI, RSI, RDX, RCX, R8, R9 };
+    const Register Assembler::savedRegs[] = { RBX, R12, R13, R14, R15 };
+#endif
+
+    const char *regNames[] = {
+        "rax",   "rcx",  "rdx",   "rbx",   "rsp",   "rbp",   "rsi",   "rdi",
+        "r8",    "r9",   "r10",   "r11",   "r12",   "r13",   "r14",   "r15",
+        "xmm0",  "xmm1", "xmm2",  "xmm3",  "xmm4",  "xmm5",  "xmm6",  "xmm7",
+        "xmm8",  "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15"
+    };
+
+#ifdef _DEBUG
+    #define TODO(x) todo(#x)
+    static void todo(const char *s) {
+        verbose_only( avmplus::AvmLog("%s",s); )
+        NanoAssertMsgf(false, "%s", s);
+    }
+#else
+    #define TODO(x)
+#endif
+
+    // MODRM and restrictions:
+    // memory access modes != 11 require SIB if base&7 == 4 (RSP or R12)
+    // mode 00 with base&7 == 5 means RIP+disp32 (RBP or R13), use mode 01 disp8=0 instead
+    // rex prefix required to use RSP-R15 as 8bit registers in mod/rm8 modes.
+
+    // take R12 out of play as a base register because it requires the SIB byte like ESP
+    const RegisterMask BaseRegs = GpRegs & ~rmask(R12);
+
+    static inline int oplen(uint64_t op) {
+        return op & 255;
+    }
+
+    // encode 2-register rex prefix.  dropped if none of its bits are set.
+    static inline uint64_t rexrb(uint64_t op, Register r, Register b) {
+        int shift = 64 - 8*oplen(op);
+        uint64_t rex = ((op >> shift) & 255) | ((r&8)>>1) | ((b&8)>>3);
+        return rex != 0x40 ? op | rex << shift : op - 1;
+    }
+
+    // encode 2-register rex prefix.  dropped if none of its bits are set, but
+    // keep REX if b >= rsp, to allow uniform use of all 16 8bit registers
+    static inline uint64_t rexrb8(uint64_t op, Register r, Register b) {
+        int shift = 64 - 8*oplen(op);
+        uint64_t rex = ((op >> shift) & 255) | ((r&8)>>1) | ((b&8)>>3);
+        return ((rex | (b & ~3)) != 0x40) ? (op | (rex << shift)) : op - 1;
+    }
+
+    // encode 2-register rex prefix that follows a manditory prefix (66,F2,F3)
+    // [prefix][rex][opcode]
+    static inline uint64_t rexprb(uint64_t op, Register r, Register b) {
+        int shift = 64 - 8*oplen(op) + 8;
+        uint64_t rex = ((op >> shift) & 255) | ((r&8)>>1) | ((b&8)>>3);
+        // to drop rex, we replace rex with manditory prefix, and decrement length
+        return rex != 0x40 ? op | rex << shift :
+            ((op & ~(255LL<<shift)) | (op>>(shift-8)&255) << shift) - 1;
+    }
+
+    // [rex][opcode][mod-rr]
+    static inline uint64_t mod_rr(uint64_t op, Register r, Register b) {
+        return op | uint64_t((r&7)<<3 | (b&7))<<56;
+    }
+
+    static inline uint64_t mod_disp32(uint64_t op, Register r, Register b, int32_t d) {
+        NanoAssert(IsGpReg(r) && IsGpReg(b));
+        NanoAssert((b & 7) != 4); // using RSP or R12 as base requires SIB
+        if (isS8(d)) {
+            // op is:  0x[disp32=0][mod=2:r:b][op][rex][len]
+            NanoAssert((((op>>24)&255)>>6) == 2); // disp32 mode
+            int len = oplen(op);
+            op = (op & ~0xff000000LL) | (0x40 | (r&7)<<3 | (b&7))<<24; // replace mod
+            return op<<24 | int64_t(d)<<56 | (len-3); // shrink disp, add disp8
+        } else {
+            // op is: 0x[disp32][mod][op][rex][len]
+            return op | int64_t(d)<<32 | uint64_t((r&7)<<3 | (b&7))<<24;
+        }
+    }
+
+    #ifdef NJ_VERBOSE
+    void Assembler::dis(NIns *p, int bytes) {
+        char b[32], *s = b; // room for 8 hex bytes plus null
+        *s++ = ' ';
+        for (NIns *end = p + bytes; p < end; p++) {
+            VMPI_sprintf(s, "%02x ", *p);
+            s += 3;
+        }
+        *s = 0;
+        asm_output("%s", b);
+    }
+    #endif
+
+    void Assembler::emit(uint64_t op) {
+        int len = oplen(op);
+        // we will only move nIns by -len bytes, but we write 8
+        // bytes.  so need to protect 8 so we dont stomp the page
+        // header or the end of the preceding page (might segf)
+        underrunProtect(8);
+        ((int64_t*)_nIns)[-1] = op;
+        _nIns -= len; // move pointer by length encoded in opcode
+        _nvprof("x64-bytes", len);
+        verbose_only( if (_logc->lcbits & LC_Assembly) dis(_nIns, len); )
+    }
+
+    void Assembler::emit8(uint64_t op, int64_t v) {
+        NanoAssert(isS8(v));
+        emit(op | uint64_t(v)<<56);
+    }
+
+    void Assembler::emit32(uint64_t op, int64_t v) {
+        NanoAssert(isS32(v));
+        emit(op | uint64_t(uint32_t(v))<<32);
+    }
+
+    // 2-register modrm32 form
+    void Assembler::emitrr(uint64_t op, Register r, Register b) {
+        emit(rexrb(mod_rr(op, r, b), r, b));
+    }
+
+    // 2-register modrm8 form (8 bit operand size)
+    void Assembler::emitrr8(uint64_t op, Register r, Register b) {
+        emit(rexrb8(mod_rr(op, r, b), r, b));
+    }
+
+    // same as emitrr, but with a prefix byte
+    void Assembler::emitprr(uint64_t op, Register r, Register b) {
+        emit(rexprb(mod_rr(op, r, b), r, b));
+    }
+
+    // disp32 modrm form, when the disp fits in the instruction (opcode is 1-3 bytes)
+    void Assembler::emitrm(uint64_t op, Register r, int32_t d, Register b) {
+        emit(rexrb(mod_disp32(op, r, b, d), r, b));
+    }
+
+    // disp32 modrm form when the disp must be written separately (opcode is 4+ bytes)
+    void Assembler::emitprm(uint64_t op, Register r, int32_t d, Register b) {
+        if (isS8(d)) {
+            NanoAssert(((op>>56)&0xC0) == 0x80); // make sure mod bits == 2 == disp32 mode
+            underrunProtect(1+8);
+            *(--_nIns) = (NIns) d;
+            _nvprof("x64-bytes", 1);
+            op ^= 0xC000000000000000LL; // change mod bits to 1 == disp8 mode
+        } else {
+            underrunProtect(4+8); // room for displ plus fullsize op
+            *((int32_t*)(_nIns -= 4)) = d;
+            _nvprof("x64-bytes", 4);
+        }
+        emitprr(op, r, b);
+    }
+
+    void Assembler::emitrr_imm(uint64_t op, Register r, Register b, int32_t imm) {
+        NanoAssert(IsGpReg(r) && IsGpReg(b));
+        underrunProtect(4+8); // room for imm plus fullsize op
+        *((int32_t*)(_nIns -= 4)) = imm;
+        _nvprof("x86-bytes", 4);
+        emitrr(op, r, b);
+    }
+
+    // op = [rex][opcode][modrm][imm8]
+    void Assembler::emitr_imm8(uint64_t op, Register b, int32_t imm8) {
+        NanoAssert(IsGpReg(b) && isS8(imm8));
+        op |= uint64_t(imm8)<<56 | uint64_t(b&7)<<48;  // modrm is 2nd to last byte
+        emit(rexrb(op, (Register)0, b));
+    }
+
+    void Assembler::MR(Register d, Register s) {
+        NanoAssert(IsGpReg(d) && IsGpReg(s));
+        emitrr(X64_movqr, d, s);
+    }
+
+    void Assembler::JMP(NIns *target) {
+        if (!target || isS32(target - _nIns)) {
+            underrunProtect(8); // must do this before calculating offset
+            if (target && isS8(target - _nIns)) {
+                emit8(X64_jmp8, target - _nIns);
+            } else {
+                emit32(X64_jmp, target ? target - _nIns : 0);
+            }
+        } else {
+            TODO(jmp64);
+        }
+    }
+
+    // register allocation for 2-address style ops of the form R = R (op) B
+    void Assembler::regalloc_binary(LIns *ins, RegisterMask allow, Register &rr, Register &ra, Register &rb) {
+        rb = UnknownReg;
+        LIns *a = ins->oprnd1();
+        LIns *b = ins->oprnd2();
+        if (a != b) {
+            rb = findRegFor(b, allow);
+            allow &= ~rmask(rb);
+        }
+        rr = prepResultReg(ins, allow);
+        Reservation* rA = getresv(a);
+        // if this is last use of a in reg, we can re-use result reg
+        if (rA == 0 || (ra = rA->reg) == UnknownReg) {
+            ra = findSpecificRegFor(a, rr);
+        } else {
+            // rA already has a register assigned
+        }
+        if (a == b) {
+            rb = ra;
+        }
+    }
+
+    void Assembler::asm_qbinop(LIns *ins) {
+        asm_arith(ins);
+    }
+
+    void Assembler::asm_shift(LIns *ins) {
+        // shift require rcx for shift count
+        LIns *b = ins->oprnd2();
+        if (b->isconst()) {
+            asm_shift_imm(ins);
+            return;
+        }
+        Register rr, ra;
+        if (b != ins->oprnd1()) {
+            findSpecificRegFor(b, RCX);
+            regalloc_unary(ins, GpRegs & ~rmask(RCX), rr, ra);
+        } else {
+            // a == b means both must be in RCX
+            regalloc_unary(ins, rmask(RCX), rr, ra);
+        }
+        X64Opcode xop;
+        switch (ins->opcode()) {
+        default:
+            TODO(asm_shift);
+        case LIR_qursh:     xop = X64_shrq;     break;
+        case LIR_qirsh:     xop = X64_sarq;     break;
+        case LIR_qilsh:     xop = X64_shlq;     break;
+        case LIR_ush:       xop = X64_shr;      break;
+        case LIR_rsh:       xop = X64_sar;      break;
+        case LIR_lsh:       xop = X64_shl;      break;
+        }
+        emitr(xop, rr);
+        if (rr != ra)
+            MR(rr, ra);
+    }
+
+    void Assembler::asm_shift_imm(LIns *ins) {
+        Register rr, ra;
+        regalloc_unary(ins, GpRegs, rr, ra);
+        X64Opcode xop;
+        switch (ins->opcode()) {
+        default: TODO(shiftimm);
+        case LIR_qursh:     xop = X64_shrqi;    break;
+        case LIR_qirsh:     xop = X64_sarqi;    break;
+        case LIR_qilsh:     xop = X64_shlqi;    break;
+        case LIR_ush:       xop = X64_shri;     break;
+        case LIR_rsh:       xop = X64_sari;     break;
+        case LIR_lsh:       xop = X64_shli;     break;
+        }
+        int shift = ins->oprnd2()->imm32() & 255;
+        emit8(rexrb(xop | uint64_t(rr&7)<<48, (Register)0, rr), shift);
+        if (rr != ra)
+            MR(rr, ra);
+    }
+
+    static bool isImm32(LIns *ins) {
+        return ins->isconst() || (ins->isconstq() && isS32(ins->imm64()));
+    }
+    static int32_t getImm32(LIns *ins) {
+        return ins->isconst() ? ins->imm32() : int32_t(ins->imm64());
+    }
+
+    // binary op, integer regs, rhs is int32 const
+    void Assembler::asm_arith_imm(LIns *ins) {
+        LIns *b = ins->oprnd2();
+        int32_t imm = getImm32(b);
+        LOpcode op = ins->opcode();
+        Register rr, ra;
+        if (op == LIR_mul) {
+            // imul has true 3-addr form, it doesn't clobber ra
+            rr = prepResultReg(ins, GpRegs);
+            LIns *a = ins->oprnd1();
+            ra = findRegFor(a, GpRegs);
+            emitrr_imm(X64_imuli, rr, ra, imm);
+            return;
+        }
+        regalloc_unary(ins, GpRegs, rr, ra);
+        X64Opcode xop;
+        if (isS8(imm)) {
+            switch (ins->opcode()) {
+            default: TODO(arith_imm8);
+            case LIR_iaddp:
+            case LIR_add:   xop = X64_addlr8;   break;
+            case LIR_and:   xop = X64_andlr8;   break;
+            case LIR_or:    xop = X64_orlr8;    break;
+            case LIR_sub:   xop = X64_sublr8;   break;
+            case LIR_xor:   xop = X64_xorlr8;   break;
+            case LIR_qiadd:
+            case LIR_qaddp: xop = X64_addqr8;   break;
+            case LIR_qiand: xop = X64_andqr8;   break;
+            case LIR_qior:  xop = X64_orqr8;    break;
+            case LIR_qxor:  xop = X64_xorqr8;   break;
+            }
+            emitr_imm8(xop, rr, imm);
+        } else {
+            switch (ins->opcode()) {
+            default: TODO(arith_imm);
+            case LIR_iaddp:
+            case LIR_add:   xop = X64_addlri;   break;
+            case LIR_and:   xop = X64_andlri;   break;
+            case LIR_or:    xop = X64_orlri;    break;
+            case LIR_sub:   xop = X64_sublri;   break;
+            case LIR_xor:   xop = X64_xorlri;   break;
+            case LIR_qiadd:
+            case LIR_qaddp: xop = X64_addqri;   break;
+            case LIR_qiand: xop = X64_andqri;   break;
+            case LIR_qior:  xop = X64_orqri;    break;
+            case LIR_qxor:  xop = X64_xorqri;   break;
+            }
+            emitr_imm(xop, rr, imm);
+        }
+        if (rr != ra)
+            MR(rr, ra);
+    }
+
+    // binary op with integer registers
+    void Assembler::asm_arith(LIns *ins) {
+        Register rr, ra, rb;
+        LOpcode op = ins->opcode();
+        if ((op & ~LIR64) >= LIR_lsh && (op & ~LIR64) <= LIR_ush) {
+            asm_shift(ins);
+            return;
+        }
+        LIns *b = ins->oprnd2();
+        if (isImm32(b)) {
+            asm_arith_imm(ins);
+            return;
+        }
+        regalloc_binary(ins, GpRegs, rr, ra, rb);
+        X64Opcode xop;
+        switch (ins->opcode()) {
+        default:
+            TODO(asm_arith);
+        case LIR_or:
+            xop = X64_orlrr;
+            break;
+        case LIR_sub:
+            xop = X64_subrr;
+            break;
+        case LIR_iaddp:
+        case LIR_add:
+            xop = X64_addrr;
+            break;
+        case LIR_and:
+            xop = X64_andrr;
+            break;
+        case LIR_xor:
+            xop = X64_xorrr;
+            break;
+        case LIR_mul:
+            xop = X64_imul;
+            break;
+        case LIR_qxor:
+            xop = X64_xorqrr;
+            break;
+        case LIR_qior:
+            xop = X64_orqrr;
+            break;
+        case LIR_qiand:
+            xop = X64_andqrr;
+            break;
+        case LIR_qiadd:
+        case LIR_qaddp:
+            xop = X64_addqrr;
+            break;
+        }
+        emitrr(xop, rr, rb);
+        if (rr != ra)
+            MR(rr,ra);
+    }
+
+    // binary op with fp registers
+    void Assembler::asm_fop(LIns *ins) {
+        Register rr, ra, rb;
+        regalloc_binary(ins, FpRegs, rr, ra, rb);
+        X64Opcode xop;
+        switch (ins->opcode()) {
+        default:
+            TODO(asm_fop);
+        case LIR_fdiv:
+            xop = X64_divsd;
+            break;
+        case LIR_fmul:
+            xop = X64_mulsd;
+            break;
+        case LIR_fadd:
+            xop = X64_addsd;
+            break;
+        case LIR_fsub:
+            xop = X64_subsd;
+            break;
+        }
+        emitprr(xop, rr, rb);
+        if (rr != ra) {
+            asm_nongp_copy(rr, ra);
+        }
+    }
+
+    void Assembler::asm_neg_not(LIns *ins) {
+        Register rr, ra;
+        regalloc_unary(ins, GpRegs, rr, ra);
+        NanoAssert(IsGpReg(ra));
+        X64Opcode xop;
+        if (ins->isop(LIR_not)) {
+            xop = X64_not;
+        } else {
+            xop = X64_neg;
+        }
+        emitr(xop, rr);
+        if (rr != ra)
+            MR(rr, ra);
+    }
+
+    void Assembler::asm_call(LIns *ins) {
+        const CallInfo *call = ins->callInfo();
+        ArgSize sizes[MAXARGS];
+        int argc = call->get_sizes(sizes);
+
+        bool indirect = call->isIndirect();
+        if (!indirect) {
+            verbose_only(if (_logc->lcbits & LC_Assembly)
+                outputf("        %p:", _nIns);
+            )
+            NIns *target = (NIns*)call->_address;
+            // must do underrunProtect before calculating offset
+            underrunProtect(8);
+            if (isS32(target - _nIns)) {
+                emit32(X64_call, target - _nIns);
+            } else {
+                // can't reach target from here, load imm64 and do an indirect jump
+                emit(X64_callrax);
+                emit_quad(RAX, (uint64_t)target);
+            }
+        } else {
+            // Indirect call: we assign the address arg to RAX since it's not
+            // used for regular arguments, and is otherwise scratch since it's
+            // clobberred by the call.
+            asm_regarg(ARGSIZE_P, ins->arg(--argc), RAX);
+            emit(X64_callrax);
+        }
+
+    #ifdef _MSC_VER
+        int stk_used = 32; // always reserve 32byte shadow area
+    #else
+        int stk_used = 0;
+        Register fr = XMM0;
+    #endif
+        int arg_index = 0;
+        for (int i = 0; i < argc; i++) {
+            int j = argc - i - 1;
+            ArgSize sz = sizes[j];
+            LIns* arg = ins->arg(j);
+            if ((sz & ARGSIZE_MASK_INT) && arg_index < NumArgRegs) {
+                // gp arg
+                asm_regarg(sz, arg, argRegs[arg_index]);
+                arg_index++;
+            }
+        #ifdef _MSC_VER
+            else if (sz == ARGSIZE_F && arg_index < NumArgRegs) {
+                // double goes in XMM reg # based on overall arg_index
+                asm_regarg(sz, arg, Register(XMM0+arg_index));
+                arg_index++;
+            }
+        #else
+            else if (sz == ARGSIZE_F && fr < XMM8) {
+                // double goes in next available XMM register
+                asm_regarg(sz, arg, fr);
+                fr = nextreg(fr);
+            }
+        #endif
+            else {
+                asm_stkarg(sz, arg, stk_used);
+                stk_used += sizeof(void*);
+            }
+        }
+
+        if (stk_used > max_stk_used)
+            max_stk_used = stk_used;
+    }
+
+    void Assembler::asm_regarg(ArgSize sz, LIns *p, Register r) {
+        if (sz == ARGSIZE_I) {
+            NanoAssert(!p->isQuad());
+            if (p->isconst()) {
+                emit_quad(r, int64_t(p->imm32()));
+                return;
+            }
+            // sign extend int32 to int64
+            emitrr(X64_movsxdr, r, r);
+        } else if (sz == ARGSIZE_U) {
+            NanoAssert(!p->isQuad());
+            if (p->isconst()) {
+                emit_quad(r, uint64_t(uint32_t(p->imm32())));
+                return;
+            }
+            // zero extend with 32bit mov, auto-zeros upper 32bits
+            emitrr(X64_movlr, r, r);
+        }
+        /* there is no point in folding an immediate here, because
+         * the argument register must be a scratch register and we're
+         * just before a call.  Just reserving the register will cause
+         * the constant to be rematerialized nearby in asm_restore(),
+         * which is the same instruction we would otherwise emit right
+         * here, and moving it earlier in the stream provides more scheduling
+         * freedom to the cpu. */
+        findSpecificRegFor(p, r);
+    }
+
+    void Assembler::asm_stkarg(ArgSize sz, LIns *p, int stk_off) {
+        NanoAssert(isS8(stk_off));
+        if (sz & ARGSIZE_MASK_INT) {
+            Register r = findRegFor(p, GpRegs);
+            uint64_t xop = X64_movqspr | uint64_t(stk_off) << 56;         // movq [rsp+d8], r
+            xop |= uint64_t((r&7)<<3) << 40 | uint64_t((r&8)>>1) << 24;   // insert r into mod/rm and rex bytes
+            emit(xop);
+            if (sz == ARGSIZE_I) {
+                // extend int32 to int64
+                NanoAssert(!p->isQuad());
+                emitrr(X64_movsxdr, r, r);
+            } else if (sz == ARGSIZE_U) {
+                // extend uint32 to uint64
+                NanoAssert(!p->isQuad());
+                emitrr(X64_movlr, r, r);
+            }
+        } else {
+            TODO(asm_stkarg_non_int);
+        }
+    }
+
+    void Assembler::asm_promote(LIns *ins) {
+        Register rr, ra;
+        regalloc_unary(ins, GpRegs, rr, ra);
+        NanoAssert(IsGpReg(ra));
+        if (ins->isop(LIR_u2q)) {
+            emitrr(X64_movlr, rr, ra); // 32bit mov zeros the upper 32bits of the target
+        } else {
+            NanoAssert(ins->isop(LIR_i2q));
+            emitrr(X64_movsxdr, rr, ra); // sign extend 32->64
+        }
+    }
+
+    // the CVTSI2SD instruction only writes to the low 64bits of the target
+    // XMM register, which hinders register renaming and makes dependence
+    // chains longer.  So we precede with XORPS to clear the target register.
+
+    void Assembler::asm_i2f(LIns *ins) {
+        Register r = prepResultReg(ins, FpRegs);
+        Register b = findRegFor(ins->oprnd1(), GpRegs);
+        emitprr(X64_cvtsi2sd, r, b);    // cvtsi2sd xmmr, b  only writes xmm:0:64
+        emitprr(X64_xorps, r, r);       // xorpd xmmr,xmmr to break dependency chains
+    }
+
+    void Assembler::asm_u2f(LIns *ins) {
+        Register r = prepResultReg(ins, FpRegs);
+        Register b = findRegFor(ins->oprnd1(), GpRegs);
+        NanoAssert(!ins->oprnd1()->isQuad());
+        // since oprnd1 value is 32bit, its okay to zero-extend the value without worrying about clobbering.
+        emitprr(X64_cvtsq2sd, r, b);    // convert int64 to double
+        emitprr(X64_xorps, r, r);       // xorpd xmmr,xmmr to break dependency chains
+        emitrr(X64_movlr, b, b);        // zero extend u32 to int64
+    }
+
+    void Assembler::asm_cmov(LIns *ins) {
+        LIns* cond    = ins->oprnd1();
+        LIns* iftrue  = ins->oprnd2();
+        LIns* iffalse = ins->oprnd3();
+        NanoAssert(cond->isCmp());
+        NanoAssert((ins->isop(LIR_qcmov) && iftrue->isQuad() && iffalse->isQuad()) ||
+                   (ins->isop(LIR_cmov) && !iftrue->isQuad() && !iffalse->isQuad()));
+
+        // this code assumes that neither LD nor MR nor MRcc set any of the condition flags.
+        // (This is true on Intel, is it true on all architectures?)
+        const Register rr = prepResultReg(ins, GpRegs);
+        const Register rf = findRegFor(iffalse, GpRegs & ~rmask(rr));
+        X64Opcode xop;
+        switch (cond->opcode()) {
+            default: TODO(asm_cmov);
+            case LIR_qeq:
+                xop = X64_cmovqne;
+                break;
+        }
+        emitrr(xop, rr, rf);
+        /*const Register rt =*/ findSpecificRegFor(iftrue, rr);
+        asm_cmp(cond);
+    }
+
+    NIns* Assembler::asm_branch(bool onFalse, LIns *cond, NIns *target) {
+        LOpcode condop = cond->opcode();
+        if (condop >= LIR_feq && condop <= LIR_fge)
+            return asm_fbranch(onFalse, cond, target);
+
+        // we must ensure there's room for the instr before calculating
+        // the offset.  and the offset, determines the opcode (8bit or 32bit)
+        underrunProtect(8);
+        if (target && isS8(target - _nIns)) {
+            static const X64Opcode j8[] = {
+                X64_je8, // eq
+                X64_jl8, X64_jg8, X64_jle8, X64_jge8, // lt,  gt,  le,  ge
+                X64_jb8, X64_ja8, X64_jbe8, X64_jae8  // ult, ugt, ule, uge
+            };
+            uint64_t xop = j8[(condop & ~LIR64) - LIR_eq];
+            xop ^= onFalse ? (uint64_t)X64_jneg8 : 0;
+            emit8(xop, target - _nIns);
+        } else {
+            static const X64Opcode j32[] = {
+                X64_je, // eq
+                X64_jl, X64_jg, X64_jle, X64_jge, // lt,  gt,  le,  ge
+                X64_jb, X64_ja, X64_jbe, X64_jae  // ult, ugt, ule, uge
+            };
+            uint64_t xop = j32[(condop & ~LIR64) - LIR_eq];
+            xop ^= onFalse ? (uint64_t)X64_jneg : 0;
+            emit32(xop, target ? target - _nIns : 0);
+        }
+        NIns *patch = _nIns;            // addr of instr to patch
+        asm_cmp(cond);
+        return patch;
+    }
+
+    void Assembler::asm_cmp(LIns *cond) {
+        LIns *b = cond->oprnd2();
+        if (isImm32(b)) {
+            asm_cmp_imm(cond);
+            return;
+        }
+        LIns *a = cond->oprnd1();
+        Register ra, rb;
+        if (a != b) {
+            Reservation *resva, *resvb;
+            findRegFor2(GpRegs, a, resva, b, resvb);
+            ra = resva->reg;
+            rb = resvb->reg;
+        } else {
+            // optimize-me: this will produce a const result!
+            ra = rb = findRegFor(a, GpRegs);
+        }
+
+        LOpcode condop = cond->opcode();
+        emitrr(condop & LIR64 ? X64_cmpqr : X64_cmplr, ra, rb);
+    }
+
+    void Assembler::asm_cmp_imm(LIns *cond) {
+        LIns *a = cond->oprnd1();
+        LIns *b = cond->oprnd2();
+        Register ra = findRegFor(a, GpRegs);
+        int32_t imm = getImm32(b);
+        if (isS8(imm)) {
+            X64Opcode xop = (cond->opcode() & LIR64) ? X64_cmpqr8 : X64_cmplr8;
+            emitr_imm8(xop, ra, imm);
+        } else {
+            X64Opcode xop = (cond->opcode() & LIR64) ? X64_cmpqri : X64_cmplri;
+            emitr_imm(xop, ra, imm);
+        }
+    }
+
+    // compiling floating point branches
+    // discussion in https://bugzilla.mozilla.org/show_bug.cgi?id=443886
+    //
+    //  fucom/p/pp: c3 c2 c0   jae ja    jbe jb je jne
+    //  ucomisd:     Z  P  C   !C  !C&!Z C|Z C  Z  !Z
+    //              -- -- --   --  ----- --- -- -- --
+    //  unordered    1  1  1             T   T  T
+    //  greater >    0  0  0   T   T               T
+    //  less    <    0  0  1             T   T     T
+    //  equal   =    1  0  0   T         T      T
+    //
+    //  here's the cases, using conditionals:
+    //
+    //  branch  >=  >   <=       <        =
+    //  ------  --- --- ---      ---      ---
+    //  LIR_jt  jae ja  swap+jae swap+ja  jp over je
+    //  LIR_jf  jb  jbe swap+jb  swap+jbe jne+jp
+
+    NIns* Assembler::asm_fbranch(bool onFalse, LIns *cond, NIns *target) {
+        LOpcode condop = cond->opcode();
+        NIns *patch;
+        LIns *a = cond->oprnd1();
+        LIns *b = cond->oprnd2();
+        if (condop == LIR_feq) {
+            if (onFalse) {
+                // branch if unordered or !=
+                underrunProtect(16); // 12 needed, round up for overhang
+                emit32(X64_jp, target ? target - _nIns : 0);
+                emit32(X64_jne, target ? target - _nIns : 0);
+                patch = _nIns;
+            } else {
+                // jp skip (2byte)
+                // jeq target
+                // skip: ...
+                underrunProtect(16); // 7 needed but we write 2 instr
+                NIns *skip = _nIns;
+                emit32(X64_je, target ? target - _nIns : 0);
+                patch = _nIns;
+                emit8(X64_jp8, skip - _nIns);
+            }
+        }
+        else {
+            if (condop == LIR_flt) {
+                condop = LIR_fgt;
+                LIns *t = a; a = b; b = t;
+            } else if (condop == LIR_fle) {
+                condop = LIR_fge;
+                LIns *t = a; a = b; b = t;
+            }
+            X64Opcode xop;
+            if (condop == LIR_fgt)
+                xop = onFalse ? X64_jbe : X64_ja;
+            else // LIR_fge
+                xop = onFalse ? X64_jb : X64_jae;
+            underrunProtect(8);
+            emit32(xop, target ? target - _nIns : 0);
+            patch = _nIns;
+        }
+        fcmp(a, b);
+        return patch;
+    }
+
+    void Assembler::asm_fcond(LIns *ins) {
+        LOpcode op = ins->opcode();
+        LIns *a = ins->oprnd1();
+        LIns *b = ins->oprnd2();
+        if (op == LIR_feq) {
+            // result = ZF & !PF, must do logic on flags
+            // r = al|bl|cl|dl, can only use rh without rex prefix
+            Register r = prepResultReg(ins, 1<<RAX|1<<RCX|1<<RDX|1<<RBX);
+            emitrr8(X64_movzx8, r, r);                  // movzx8   r,rl     r[8:63] = 0
+            emit(X86_and8r | uint64_t(r<<3|(r|4))<<56); // and      rl,rh    rl &= rh
+            emit(X86_setnp | uint64_t(r|4)<<56);        // setnp    rh       rh = !PF
+            emit(X86_sete  | uint64_t(r)<<56);          // sete     rl       rl = ZF
+        } else {
+            if (op == LIR_flt) {
+                op = LIR_fgt;
+                LIns *t = a; a = b; b = t;
+            } else if (op == LIR_fle) {
+                op = LIR_fge;
+                LIns *t = a; a = b; b = t;
+            }
+            Register r = prepResultReg(ins, GpRegs); // x64 can use any GPR as setcc target
+            emitrr8(X64_movzx8, r, r);
+            emitr8(op == LIR_fgt ? X64_seta : X64_setae, r);
+        }
+        fcmp(a, b);
+    }
+
+    void Assembler::fcmp(LIns *a, LIns *b) {
+        Reservation *resva, *resvb;
+        findRegFor2(FpRegs, a, resva, b, resvb);
+        emitprr(X64_ucomisd, resva->reg, resvb->reg);
+    }
+
+    void Assembler::asm_restore(LIns *ins, Reservation *resv, Register r) {
+        (void) r;
+        if (ins->isop(LIR_alloc)) {
+            int d = disp(resv);
+            emitrm(X64_leaqrm, r, d, FP);
+        }
+        else if (ins->isconst()) {
+            if (!resv->arIndex) {
+                ins->resv()->clear();
+            }
+            // unsafe to use xor r,r for zero because it changes cc's
+            emit_int(r, ins->imm32());
+        }
+        else if (ins->isconstq() && IsGpReg(r)) {
+            if (!resv->arIndex) {
+                ins->resv()->clear();
+            }
+            // unsafe to use xor r,r for zero because it changes cc's
+            emit_quad(r, ins->imm64());
+        }
+        else {
+            int d = findMemFor(ins);
+            if (IsFpReg(r)) {
+                NanoAssert(ins->isQuad());
+                // load 64bits into XMM.  don't know if double or int64, assume double.
+                emitprm(X64_movsdrm, r, d, FP);
+            } else if (ins->isQuad()) {
+                emitrm(X64_movqrm, r, d, FP);
+            } else {
+                emitrm(X64_movlrm, r, d, FP);
+            }
+        }
+        verbose_only( if (_logc->lcbits & LC_RegAlloc) {
+                        outputForEOL("  <= restore %s",
+                        _thisfrag->lirbuf->names->formatRef(ins)); } )
+    }
+
+    void Assembler::asm_cond(LIns *ins) {
+        LOpcode op = ins->opcode();
+        // unlike x86-32, with a rex prefix we can use any GP register as an 8bit target
+        Register r = prepResultReg(ins, GpRegs);
+        // SETcc only sets low 8 bits, so extend
+        emitrr8(X64_movzx8, r, r);
+        X64Opcode xop;
+        switch (op) {
+        default:
+            TODO(cond);
+        case LIR_qeq:
+        case LIR_eq:    xop = X64_sete;     break;
+        case LIR_qlt:
+        case LIR_lt:    xop = X64_setl;     break;
+        case LIR_qle:
+        case LIR_le:    xop = X64_setle;    break;
+        case LIR_qgt:
+        case LIR_gt:    xop = X64_setg;     break;
+        case LIR_qge:
+        case LIR_ge:    xop = X64_setge;    break;
+        case LIR_qult:
+        case LIR_ult:   xop = X64_setb;     break;
+        case LIR_qule:
+        case LIR_ule:   xop = X64_setbe;    break;
+        case LIR_qugt:
+        case LIR_ugt:   xop = X64_seta;     break;
+        case LIR_quge:
+        case LIR_uge:   xop = X64_setae;    break;
+        case LIR_ov:    xop = X64_seto;     break;
+        }
+        emitr8(xop, r);
+        asm_cmp(ins);
+    }
+
+    void Assembler::asm_ret(LIns *ins) {
+        JMP(_epilogue);
+        assignSavedRegs();
+        LIns *value = ins->oprnd1();
+        Register r = ins->isop(LIR_ret) ? RAX : XMM0;
+        findSpecificRegFor(value, r);
+    }
+
+    void Assembler::asm_nongp_copy(Register d, Register s) {
+        if (!IsFpReg(d) && IsFpReg(s)) {
+            // gpr <- xmm: use movq r/m64, xmm (66 REX.W 0F 7E /r)
+            emitprr(X64_movqrx, s, d);
+        } else if (IsFpReg(d) && IsFpReg(s)) {
+            // xmm <- xmm: use movaps. movsd r,r causes partial register stall
+            emitrr(X64_movapsr, d, s);
+        } else {
+            // xmm <- gpr: use movq xmm, r/m64 (66 REX.W 0F 6E /r)
+            emitprr(X64_movqxr, d, s);
+        }
+    }
+
+    void Assembler::regalloc_load(LIns *ins, Register &rr, int32_t &dr, Register &rb) {
+        dr = ins->disp();
+        LIns *base = ins->oprnd1();
+        rb = getBaseReg(base, dr, BaseRegs);
+        Reservation *resv = getresv(ins);
+        if (resv && (rr = resv->reg) != UnknownReg) {
+            // keep already assigned register
+            freeRsrcOf(ins, false);
+        } else {
+            // use a gpr in case we're copying a non-double
+            rr = prepResultReg(ins, GpRegs & ~rmask(rb));
+        }
+    }
+
+    void Assembler::asm_load64(LIns *ins) {
+        Register rr, rb;
+        int32_t dr;
+        regalloc_load(ins, rr, dr, rb);
+        if (IsGpReg(rr)) {
+            // general 64bit load, 32bit const displacement
+            emitrm(X64_movqrm, rr, dr, rb);
+        } else {
+            // load 64bits into XMM.  don't know if double or int64, assume double.
+            emitprm(X64_movsdrm, rr, dr, rb);
+        }
+    }
+
+    void Assembler::asm_ld(LIns *ins) {
+        NanoAssert(!ins->isQuad());
+        Register r, b;
+        int32_t d;
+        regalloc_load(ins, r, d, b);
+        emitrm(X64_movlrm, r, d, b);
+    }
+
+    void Assembler::asm_store64(LIns *value, int d, LIns *base) {
+        NanoAssert(value->isQuad());
+        Register b = getBaseReg(base, d, BaseRegs);
+
+        // if we have to choose a register, use a GPR, but not the base reg
+        Reservation *resv = getresv(value);
+        Register r;
+        if (!resv || (r = resv->reg) == UnknownReg) {
+            r = findRegFor(value, GpRegs & ~rmask(b));
+        }
+
+        if (IsGpReg(r)) {
+            // gpr store
+            emitrm(X64_movqmr, r, d, b);
+        }
+        else {
+            // xmm store
+            emitprm(X64_movsdmr, r, d, b);
+        }
+    }
+
+    void Assembler::asm_store32(LIns *value, int d, LIns *base) {
+        NanoAssert(!value->isQuad());
+        Register b = getBaseReg(base, d, BaseRegs);
+        Register r = findRegFor(value, GpRegs & ~rmask(b));
+
+        // store 32bits to 64bit addr.  use rex so we can use all 16 regs
+        emitrm(X64_movlmr, r, d, b);
+    }
+
+    // generate a 32bit constant, must not affect condition codes!
+    void Assembler::emit_int(Register r, int32_t v) {
+        NanoAssert(IsGpReg(r));
+        emitr_imm(X64_movi, r, v);
+    }
+
+    // generate a 64bit constant, must not affect condition codes!
+    void Assembler::emit_quad(Register r, uint64_t v) {
+        NanoAssert(IsGpReg(r));
+        if (isU32(v)) {
+            emit_int(r, int32_t(v));
+            return;
+        }
+        if (isS32(v)) {
+            // safe for sign-extension 32->64
+            emitr_imm(X64_movqi32, r, int32_t(v));
+            return;
+        }
+        underrunProtect(8+8); // imm64 + worst case instr len
+        ((uint64_t*)_nIns)[-1] = v;
+        _nIns -= 8;
+        _nvprof("x64-bytes", 8);
+        emitr(X64_movqi, r);
+    }
+
+    void Assembler::asm_int(LIns *ins) {
+        Register r = prepResultReg(ins, GpRegs);
+        int32_t v = ins->imm32();
+        if (v == 0) {
+            // special case for zero
+            emitrr(X64_xorrr, r, r);
+            return;
+        }
+        emit_int(r, v);
+    }
+
+    void Assembler::asm_quad(LIns *ins) {
+        uint64_t v = ins->imm64();
+        RegisterMask allow = v == 0 ? GpRegs|FpRegs : GpRegs;
+        Register r = prepResultReg(ins, allow);
+        if (v == 0) {
+            if (IsGpReg(r)) {
+                // special case for zero
+                emitrr(X64_xorrr, r, r);
+            } else {
+                // xorps for xmm
+                emitprr(X64_xorps, r, r);
+            }
+        } else {
+            emit_quad(r, v);
+        }
+    }
+
+    void Assembler::asm_qjoin(LIns*) {
+        TODO(asm_qjoin);
+    }
+
+    Register Assembler::asm_prep_fcall(Reservation*, LIns *ins) {
+        return prepResultReg(ins, rmask(XMM0));
+    }
+
+    void Assembler::asm_param(LIns *ins) {
+        uint32_t a = ins->paramArg();
+        uint32_t kind = ins->paramKind();
+        if (kind == 0) {
+            // ordinary param
+            // first six args always in registers for mac x64
+            if (a < 6) {
+                // incoming arg in register
+                prepResultReg(ins, rmask(argRegs[a]));
+            } else {
+                // todo: support stack based args, arg 0 is at [FP+off] where off
+                // is the # of regs to be pushed in genProlog()
+                TODO(asm_param_stk);
+            }
+        }
+        else {
+            // saved param
+            prepResultReg(ins, rmask(savedRegs[a]));
+        }
+    }
+
+    // register allocation for 2-address style unary ops of the form R = (op) R
+    void Assembler::regalloc_unary(LIns *ins, RegisterMask allow, Register &rr, Register &ra) {
+        LIns *a = ins->oprnd1();
+        rr = prepResultReg(ins, allow);
+        Reservation* rA = getresv(a);
+        // if this is last use of a in reg, we can re-use result reg
+        if (rA == 0 || (ra = rA->reg) == UnknownReg) {
+            ra = findSpecificRegFor(a, rr);
+        } else {
+            // rA already has a register assigned.  caller must emit a copy
+            // to rr once instr code is generated.  (ie  mov rr,ra ; op rr)
+        }
+    }
+
+    static const AVMPLUS_ALIGN16(int64_t) negateMask[] = {0x8000000000000000LL,0};
+
+    void Assembler::asm_fneg(LIns *ins) {
+        Register rr, ra;
+        if (isS32((uintptr_t)negateMask) || isS32((NIns*)negateMask - _nIns)) {
+            regalloc_unary(ins, FpRegs, rr, ra);
+            if (isS32((uintptr_t)negateMask)) {
+                // builtin code is in bottom or top 2GB addr space, use absolute addressing
+                underrunProtect(4+8);
+                *((int32_t*)(_nIns -= 4)) = (int32_t)(uintptr_t)negateMask;
+                _nvprof("x64-bytes", 4);
+                uint64_t xop = X64_xorpsa | uint64_t((rr&7)<<3)<<48; // put rr[0:2] into mod/rm byte
+                xop = rexrb(xop, rr, (Register)0);  // put rr[3] into rex byte
+                emit(xop);
+            } else {
+                // jit code is within +/-2GB of builtin code, use rip-relative
+                underrunProtect(4+8);
+                int32_t d = (int32_t) ((NIns*)negateMask - _nIns);
+                *((int32_t*)(_nIns -= 4)) = d;
+                _nvprof("x64-bytes", 4);
+                emitrr(X64_xorpsm, rr, (Register)0);
+            }
+            if (ra != rr)
+                asm_nongp_copy(rr,ra);
+        } else {
+            // this is just hideous - can't use RIP-relative load, can't use
+            // absolute-address load, and cant move imm64 const to XMM.
+            // so do it all in a GPR.  hrmph.
+            rr = prepResultReg(ins, GpRegs);
+            ra = findRegFor(ins->oprnd1(), GpRegs & ~rmask(rr));
+            emitrr(X64_xorqrr, rr, ra);         // xor rr, ra
+            emit_quad(rr, negateMask[0]);       // mov rr, 0x8000000000000000
+        }
+    }
+
+    void Assembler::asm_qhi(LIns*) {
+        TODO(asm_qhi);
+    }
+
+    void Assembler::asm_qlo(LIns *ins) {
+        Register rr, ra;
+        regalloc_unary(ins, GpRegs, rr, ra);
+        NanoAssert(IsGpReg(ra));
+        emitrr(X64_movlr, rr, ra); // 32bit mov zeros the upper 32bits of the target
+    }
+
+    void Assembler::asm_spill(Register rr, int d, bool /*pop*/, bool quad) {
+        if (d) {
+            if (!IsFpReg(rr)) {
+                X64Opcode xop = quad ? X64_movqmr : X64_movlmr;
+                emitrm(xop, rr, d, FP);
+            } else {
+                // store 64bits from XMM to memory
+                NanoAssert(quad);
+                emitprm(X64_movsdmr, rr, d, FP);
+            }
+        }
+    }
+
+    void Assembler::asm_loop(LIns*, NInsList&) {
+        TODO(asm_loop);
+    }
+
+    NIns* Assembler::genPrologue() {
+        // activation frame is 4 bytes per entry even on 64bit machines
+        uint32_t stackNeeded = max_stk_used + _activation.highwatermark * 4;
+
+        uint32_t stackPushed =
+            sizeof(void*) + // returnaddr
+            sizeof(void*); // ebp
+        uint32_t aligned = alignUp(stackNeeded + stackPushed, NJ_ALIGN_STACK);
+        uint32_t amt = aligned - stackPushed;
+
+        // Reserve stackNeeded bytes, padded
+        // to preserve NJ_ALIGN_STACK-byte alignment.
+        if (amt) {
+            if (isS8(amt))
+                emitr_imm8(X64_subqr8, RSP, amt);
+            else
+                emitr_imm(X64_subqri, RSP, amt);
+        }
+
+        verbose_only( outputAddr=true; asm_output("[patch entry]"); )
+        NIns *patchEntry = _nIns;
+        MR(FP, RSP);            // Establish our own FP.
+        emitr(X64_pushr, FP);   // Save caller's FP.
+
+        return patchEntry;
+    }
+
+    NIns* Assembler::genEpilogue() {
+        // mov rsp, rbp
+        // pop rbp
+        // ret
+        max_stk_used = 0;
+        emit(X64_ret);
+        emitr(X64_popr, RBP);
+        MR(RSP, RBP);
+        return _nIns;
+    }
+
+    void Assembler::nRegisterResetAll(RegAlloc &a) {
+        // add scratch registers to our free list for the allocator
+        a.clear();
+        a.used = 0;
+#ifdef _MSC_VER
+        a.free = 0x001fffcf; // rax-rbx, rsi, rdi, r8-r15, xmm0-xmm5
+#else
+        a.free = 0xffffffff & ~(1<<RSP | 1<<RBP);
+#endif
+        debug_only( a.managed = a.free; )
+    }
+
+    void Assembler::nPatchBranch(NIns *patch, NIns *target) {
+        NIns *next = 0;
+        if (patch[0] == 0xE9) {
+            // jmp disp32
+            next = patch+5;
+        } else if (patch[0] == 0x0F && (patch[1] & 0xF0) == 0x80) {
+            // jcc disp32
+            next = patch+6;
+        } else {
+            next = 0;
+            TODO(unknown_patch);
+        }
+        NanoAssert(((int32_t*)next)[-1] == 0);
+        NanoAssert(isS32(target - next));
+        ((int32_t*)next)[-1] = int32_t(target - next);
+        if (next[0] == 0x0F && next[1] == 0x8A) {
+            // code is jne<target>,jp<target>, for LIR_jf(feq)
+            // we just patched the jne, now patch the jp.
+            next += 6;
+            NanoAssert(((int32_t*)next)[-1] == 0);
+            NanoAssert(isS32(target - next));
+            ((int32_t*)next)[-1] = int32_t(target - next);
+        }
+    }
+
+    Register Assembler::nRegisterAllocFromSet(RegisterMask set) {
+    #if defined _WIN64
+        DWORD tr;
+        _BitScanForward(&tr, set);
+        _allocator.free &= ~rmask((Register)tr);
+        return (Register) tr;
+    #else
+        // gcc asm syntax
+        Register r;
+        asm("bsf    %1, %%eax\n\t"
+            "btr    %%eax, %2\n\t"
+            "movl   %%eax, %0\n\t"
+            : "=m"(r) : "m"(set), "m"(_allocator.free) : "%eax", "memory");
+        (void)set;
+        return r;
+    #endif
+    }
+
+    void Assembler::nFragExit(LIns*) {
+        TODO(nFragExit);
+    }
+
+    void Assembler::nInit(AvmCore*)
+    {}
+
+    void Assembler::underrunProtect(ptrdiff_t bytes) {
+        NanoAssertMsg(bytes<=LARGEST_UNDERRUN_PROT, "constant LARGEST_UNDERRUN_PROT is too small");
+        NIns *pc = _nIns;
+        NIns *top = _inExit ? this->exitStart : this->codeStart;
+
+    #if PEDANTIC
+        // pedanticTop is based on the last call to underrunProtect; any time we call
+        // underrunProtect and would use more than what's already protected, then insert
+        // a page break jump.  Sometimes, this will be to a new page, usually it's just
+        // the next instruction
+
+        NanoAssert(pedanticTop >= top);
+        if (pc - bytes < pedanticTop) {
+            // no page break required, but insert a far branch anyway just to be difficult
+            const int br_size = 8; // opcode + 32bit addr
+            if (pc - bytes - br_size < top) {
+                // really do need a page break
+                verbose_only(if (_logc->lcbits & LC_Assembly) outputf("newpage %p:", pc);)
+                codeAlloc();
+            }
+            // now emit the jump, but make sure we won't need another page break.
+            // we're pedantic, but not *that* pedantic.
+            pedanticTop = _nIns - br_size;
+            JMP(pc);
+            pedanticTop = _nIns - bytes;
+        }
+    #else
+        if (pc - bytes < top) {
+            verbose_only(if (_logc->lcbits & LC_Assembly) outputf("newpage %p:", pc);)
+            codeAlloc();
+            // this jump will call underrunProtect again, but since we're on a new
+            // page, nothing will happen.
+            JMP(pc);
+        }
+    #endif
+    }
+
+    RegisterMask Assembler::hint(LIns *, RegisterMask allow) {
+        return allow;
+    }
+
+    void Assembler::nativePageSetup() {
+        if (!_nIns) {
+            codeAlloc();
+            IF_PEDANTIC( pedanticTop = _nIns; )
+        }
+        if (!_nExitIns) {
+            codeAlloc(true);
+        }
+    }
+
+    void Assembler::nativePageReset()
+    {}
+
+} // namespace nanojit
+
+#endif // FEATURE_NANOJIT && NANOJIT_X64
diff --git a/js/src/nanojit/NativeX64.h b/js/src/nanojit/NativeX64.h
new file mode 100644
index 00000000000..1e7ce3b086e
--- /dev/null
+++ b/js/src/nanojit/NativeX64.h
@@ -0,0 +1,360 @@
+/* -*- Mode: C++; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 4 -*- */
+/* vi: set ts=4 sw=4 expandtab: (add to ~/.vimrc: set modeline modelines=5) */
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is [Open Source Virtual Machine].
+ *
+ * The Initial Developer of the Original Code is
+ * Adobe System Incorporated.
+ * Portions created by the Initial Developer are Copyright (C) 2008
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ *   Adobe AS3 Team
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+#ifndef __nanojit_NativeX64__
+#define __nanojit_NativeX64__
+
+#ifndef NANOJIT_64BIT
+#error "NANOJIT_64BIT must be defined for X64 backend"
+#endif
+
+#ifdef PERFM
+#define DOPROF
+#include "../vprof/vprof.h"
+#define count_instr() _nvprof("x64",1)
+#define count_prolog() _nvprof("x64-prolog",1); count_instr();
+#define count_imt() _nvprof("x64-imt",1) count_instr()
+#else
+#define count_instr()
+#define count_prolog()
+#define count_imt()
+#endif
+
+namespace nanojit
+{
+#define NJ_MAX_STACK_ENTRY              256
+#define NJ_ALIGN_STACK                  16
+
+    enum Register {
+        RAX = 0, // 1st int return, # of sse varargs
+        RCX = 1, // 4th int arg
+        RDX = 2, // 3rd int arg 2nd return
+        RBX = 3, // saved
+        RSP = 4, // stack ptr
+        RBP = 5, // frame ptr, saved, sib reqd
+        RSI = 6, // 2nd int arg
+        RDI = 7, // 1st int arg
+        R8  = 8, // 5th int arg
+        R9  = 9, // 6th int arg
+        R10 = 10, // scratch
+        R11 = 11, // scratch
+        R12 = 12, // saved
+        R13 = 13, // saved, sib reqd like rbp
+        R14 = 14, // saved
+        R15 = 15, // saved
+
+        XMM0  = 16, // 1st double arg, return
+        XMM1  = 17, // 2nd double arg, return
+        XMM2  = 18, // 3rd double arg
+        XMM3  = 19, // 4th double arg
+        XMM4  = 20, // 5th double arg
+        XMM5  = 21, // 6th double arg
+        XMM6  = 22, // 7th double arg
+        XMM7  = 23, // 8th double arg
+        XMM8  = 24, // scratch
+        XMM9  = 25, // scratch
+        XMM10 = 26, // scratch
+        XMM11 = 27, // scratch
+        XMM12 = 28, // scratch
+        XMM13 = 29, // scratch
+        XMM14 = 30, // scratch
+        XMM15 = 31, // scratch
+
+        FP = RBP,
+        UnknownReg = 32,
+        FirstReg = RAX,
+        LastReg = XMM15
+    };
+
+/*
+ * Micro-templating variable-length opcodes, idea first
+ * describe by Mike Pall of Luajit.
+ *
+ * X86-64 opcode encodings:  LSB encodes the length of the
+ * opcode in bytes, remaining bytes are encoded as 1-7 bytes
+ * in a single uint64_t value.  The value is written as a single
+ * store into the code stream, and the code pointer is decremented
+ * by the length.  each successive instruction partially overlaps
+ * the previous one.
+ *
+ * emit methods below are able to encode mod/rm, sib, rex, and
+ * register and small immediate values into these opcode values
+ * without much branchy code.
+ *
+ * these opcodes encapsulate all the const parts of the instruction.
+ * for example, the alu-immediate opcodes (add, sub, etc) encode
+ * part of their opcode in the R field of the mod/rm byte;  this
+ * hardcoded value is in the constant below, and the R argument
+ * to emitrr() is 0.  In a few cases, a whole instruction is encoded
+ * this way (eg callrax).
+ *
+ * when a disp32, imm32, or imm64 suffix can't fit in an 8-byte
+ * opcode, then it is written into the code separately and not counted
+ * in the opcode length.
+ */
+
+    enum X64Opcode
+#if defined(_MSC_VER) && _MSC_VER >= 1400
+#pragma warning(disable:4480) // nonstandard extension used: specifying underlying type for enum
+          : uint64_t
+#endif
+    {
+        // 64bit opcode constants
+        //              msb        lsb len
+        X64_addqrr  = 0xC003480000000003LL, // 64bit add r += b
+        X64_addqri  = 0xC081480000000003LL, // 64bit add r += int64(imm32)
+        X64_addqr8  = 0x00C0834800000004LL, // 64bit add r += int64(imm8)
+        X64_andqri  = 0xE081480000000003LL, // 64bit and r &= int64(imm32)
+        X64_andqr8  = 0x00E0834800000004LL, // 64bit and r &= int64(imm8)
+        X64_orqri   = 0xC881480000000003LL, // 64bit or  r |= int64(imm32)
+        X64_orqr8   = 0x00C8834800000004LL, // 64bit or  r |= int64(imm8)
+        X64_xorqri  = 0xF081480000000003LL, // 64bit xor r ^= int64(imm32)
+        X64_xorqr8  = 0x00F0834800000004LL, // 64bit xor r ^= int64(imm8)
+        X64_addlri  = 0xC081400000000003LL, // 32bit add r += imm32
+        X64_addlr8  = 0x00C0834000000004LL, // 32bit add r += imm8
+        X64_andlri  = 0xE081400000000003LL, // 32bit and r &= imm32
+        X64_andlr8  = 0x00E0834000000004LL, // 32bit and r &= imm8
+        X64_orlri   = 0xC881400000000003LL, // 32bit or  r |= imm32
+        X64_orlr8   = 0x00C8834000000004LL, // 32bit or  r |= imm8
+        X64_sublri  = 0xE881400000000003LL, // 32bit sub r -= imm32
+        X64_sublr8  = 0x00E8834000000004LL, // 32bit sub r -= imm8
+        X64_xorlri  = 0xF081400000000003LL, // 32bit xor r ^= imm32
+        X64_xorlr8  = 0x00F0834000000004LL, // 32bit xor r ^= imm8
+        X64_addrr   = 0xC003400000000003LL, // 32bit add r += b
+        X64_andqrr  = 0xC023480000000003LL, // 64bit and r &= b
+        X64_andrr   = 0xC023400000000003LL, // 32bit and r &= b
+        X64_call    = 0x00000000E8000005LL, // near call
+        X64_callrax = 0xD0FF000000000002LL, // indirect call to addr in rax (no REX)
+        X64_cmovqne = 0xC0450F4800000004LL, // 64bit conditional mov if (c) r = b
+        X64_cmplr   = 0xC03B400000000003LL, // 32bit compare r,b
+        X64_cmpqr   = 0xC03B480000000003LL, // 64bit compare r,b
+        X64_cmplri  = 0xF881400000000003LL, // 32bit compare r,imm32
+        X64_cmpqri  = 0xF881480000000003LL, // 64bit compare r,int64(imm32)
+        X64_cmplr8  = 0x00F8834000000004LL, // 32bit compare r,imm8
+        X64_cmpqr8  = 0x00F8834800000004LL, // 64bit compare r,int64(imm8)
+        X64_cvtsi2sd= 0xC02A0F40F2000005LL, // convert int32 to double r = (double) b
+        X64_cvtsq2sd= 0xC02A0F48F2000005LL, // convert int64 to double r = (double) b
+        X64_divsd   = 0xC05E0F40F2000005LL, // divide scalar double r /= b
+        X64_mulsd   = 0xC0590F40F2000005LL, // multiply scalar double r *= b
+        X64_addsd   = 0xC0580F40F2000005LL, // add scalar double r += b
+        X64_imul    = 0xC0AF0F4000000004LL, // 32bit signed mul r *= b
+        X64_imuli   = 0xC069400000000003LL, // 32bit signed mul r = b * imm32
+        X64_imul8   = 0x00C06B4000000004LL, // 32bit signed mul r = b * imm8
+        X64_jmp     = 0x00000000E9000005LL, // jump near rel32
+        X64_jmp8    = 0x00EB000000000002LL, // jump near rel8
+        X64_jb      = 0x00000000820F0006LL, // jump near if below (uint <)
+        X64_jae     = 0x00000000830F0006LL, // jump near if above or equal (uint >=)
+        X64_ja      = 0x00000000870F0006LL, // jump near if above (uint >)
+        X64_jbe     = 0x00000000860F0006LL, // jump near if below or equal (uint <=)
+        X64_je      = 0x00000000840F0006LL, // near jump if equal
+        X64_jne     = 0x00000000850F0006LL, // jump near if not equal
+        X64_jl      = 0x000000008C0F0006LL, // jump near if less (int <)
+        X64_jge     = 0x000000008D0F0006LL, // jump near if greater or equal (int >=)
+        X64_jg      = 0x000000008F0F0006LL, // jump near if greater (int >)
+        X64_jle     = 0x000000008E0F0006LL, // jump near if less or equal (int <=)
+        X64_jp      = 0x000000008A0F0006LL, // jump near if parity (PF == 1)
+        X64_jnp     = 0x000000008B0F0006LL, // jump near if not parity (PF == 0)
+        X64_jneg    = 0x0000000001000000LL, // xor with this mask to negate the condition
+        X64_jb8     = 0x0072000000000002LL, // jump near if below (uint <)
+        X64_jae8    = 0x0073000000000002LL, // jump near if above or equal (uint >=)
+        X64_ja8     = 0x0077000000000002LL, // jump near if above (uint >)
+        X64_jbe8    = 0x0076000000000002LL, // jump near if below or equal (uint <=)
+        X64_je8     = 0x0074000000000002LL, // near jump if equal
+        X64_jne8    = 0x0075000000000002LL, // jump near if not equal
+        X64_jl8     = 0x007C000000000002LL, // jump near if less (int <)
+        X64_jge8    = 0x007D000000000002LL, // jump near if greater or equal (int >=)
+        X64_jg8     = 0x007F000000000002LL, // jump near if greater (int >)
+        X64_jle8    = 0x007E000000000002LL, // jump near if less or equal (int <=)
+        X64_jp8     = 0x007A000000000002LL, // jump near if parity (PF == 1)
+        X64_jnp8    = 0x007B000000000002LL, // jump near if not parity (PF == 0)
+        X64_jneg8   = 0x0001000000000000LL, // xor with this mask to negate the condition
+        X64_leaqrm  = 0x00000000808D4807LL, // 64bit load effective addr reg <- disp32+base
+        X64_learm   = 0x00000000808D4007LL, // 32bit load effective addr reg <- disp32+base
+        X64_movlr   = 0xC08B400000000003LL, // 32bit mov r <- b
+        X64_movlmr  = 0x0000000080894007LL, // 32bit store r -> [b+d32]
+        X64_movlrm  = 0x00000000808B4007LL, // 32bit load r <- [b+d32]
+        X64_movqmr  = 0x0000000080894807LL, // 64bit store gpr -> [b+d32]
+        X64_movqspr = 0x0024448948000005LL, // 64bit store gpr -> [rsp+d32] (sib required)
+        X64_movqr   = 0xC08B480000000003LL, // 64bit mov r <- b
+        X64_movqi   = 0xB848000000000002LL, // 64bit mov r <- imm64
+        X64_movi    = 0xB840000000000002LL, // 32bit mov r <- imm32
+        X64_movqi32 = 0xC0C7480000000003LL, // 64bit mov r <- int64(imm32)
+        X64_movapsr = 0xC0280F4000000004LL, // 128bit mov xmm <- xmm
+        X64_movqrx  = 0xC07E0F4866000005LL, // 64bit mov b <- xmm-r
+        X64_movqxr  = 0xC06E0F4866000005LL, // 64bit mov b -> xmm-r
+        X64_movqrm  = 0x00000000808B4807LL, // 64bit load r <- [b+d32]
+        X64_movsdrr = 0xC0100F40F2000005LL, // 64bit mov xmm-r <- xmm-b (upper 64bits unchanged)
+        X64_movsdrm = 0x80100F40F2000005LL, // 64bit load xmm-r <- [b+d32] (upper 64 cleared)
+        X64_movsdmr = 0x80110F40F2000005LL, // 64bit store xmm-r -> [b+d32]
+        X64_movsxdr = 0xC063480000000003LL, // sign extend i32 to i64 r = (int64)(int32) b
+        X64_movzx8  = 0xC0B60F4000000004LL, // zero extend i8 to i64 r = (uint64)(uint8) b
+        X64_neg     = 0xD8F7400000000003LL, // 32bit two's compliment b = -b
+        X64_nop1    = 0x9000000000000001LL, // one byte NOP
+        X64_nop2    = 0x9066000000000002LL, // two byte NOP
+        X64_nop3    = 0x001F0F0000000003LL, // three byte NOP
+        X64_nop4    = 0x00401F0F00000004LL, // four byte NOP
+        X64_nop5    = 0x0000441F0F000005LL, // five byte NOP
+        X64_nop6    = 0x0000441F0F660006LL, // six byte NOP
+        X64_nop7    = 0x00000000801F0F07LL, // seven byte NOP
+        X64_not     = 0xD0F7400000000003LL, // 32bit ones compliment b = ~b
+        X64_orlrr   = 0xC00B400000000003LL, // 32bit or r |= b
+        X64_orqrr   = 0xC00B480000000003LL, // 64bit or r |= b
+        X64_popr    = 0x5840000000000002LL, // 64bit pop r <- [rsp++]
+        X64_pushr   = 0x5040000000000002LL, // 64bit push r -> [--rsp]
+        X64_pxor    = 0xC0EF0F4066000005LL, // 128bit xor xmm-r ^= xmm-b
+        X64_ret     = 0xC300000000000001LL, // near return from called procedure
+        X64_sete    = 0xC0940F4000000004LL, // set byte if equal (ZF == 1)
+        X64_seto    = 0xC0900F4000000004LL, // set byte if overflow (OF == 1)
+        X64_setc    = 0xC0920F4000000004LL, // set byte if carry (CF == 1)
+        X64_setl    = 0xC09C0F4000000004LL, // set byte if less (int <) (SF != OF)
+        X64_setle   = 0xC09E0F4000000004LL, // set byte if less or equal (int <=) (ZF == 1 || SF != OF)
+        X64_setg    = 0xC09F0F4000000004LL, // set byte if greater (int >) (ZF == 0 && SF == OF)
+        X64_setge   = 0xC09D0F4000000004LL, // set byte if greater or equal (int >=) (SF == OF)
+        X64_seta    = 0xC0970F4000000004LL, // set byte if above (uint >) (CF == 0 && ZF == 0)
+        X64_setae   = 0xC0930F4000000004LL, // set byte if above or equal (uint >=) (CF == 0)
+        X64_setb    = 0xC0920F4000000004LL, // set byte if below (uint <) (CF == 1)
+        X64_setbe   = 0xC0960F4000000004LL, // set byte if below or equal (uint <=) (ZF == 1 || CF == 1)
+        X64_subsd   = 0xC05C0F40F2000005LL, // subtract scalar double r -= b
+        X64_shl     = 0xE0D3400000000003LL, // 32bit left shift r <<= rcx
+        X64_shlq    = 0xE0D3480000000003LL, // 64bit left shift r <<= rcx
+        X64_shr     = 0xE8D3400000000003LL, // 32bit uint right shift r >>= rcx
+        X64_shrq    = 0xE8D3480000000003LL, // 64bit uint right shift r >>= rcx
+        X64_sar     = 0xF8D3400000000003LL, // 32bit int right shift r >>= rcx
+        X64_sarq    = 0xF8D3480000000003LL, // 64bit int right shift r >>= rcx
+        X64_shli    = 0x00E0C14000000004LL, // 32bit left shift r <<= imm8
+        X64_shlqi   = 0x00E0C14800000004LL, // 64bit left shift r <<= imm8
+        X64_sari    = 0x00F8C14000000004LL, // 32bit int right shift r >>= imm8
+        X64_sarqi   = 0x00F8C14800000004LL, // 64bit int right shift r >>= imm8
+        X64_shri    = 0x00E8C14000000004LL, // 32bit uint right shift r >>= imm8
+        X64_shrqi   = 0x00E8C14800000004LL, // 64bit uint right shift r >>= imm8
+        X64_subqrr  = 0xC02B480000000003LL, // 64bit sub r -= b
+        X64_subrr   = 0xC02B400000000003LL, // 32bit sub r -= b
+        X64_subqri  = 0xE881480000000003LL, // 64bit sub r -= int64(imm32)
+        X64_subqr8  = 0x00E8834800000004LL, // 64bit sub r -= int64(imm8)
+        X64_ucomisd = 0xC02E0F4066000005LL, // unordered compare scalar double
+        X64_xorqrr  = 0xC033480000000003LL, // 64bit xor r &= b
+        X64_xorrr   = 0xC033400000000003LL, // 32bit xor r &= b
+        X64_xorpd   = 0xC0570F4066000005LL, // 128bit xor xmm (two packed doubles)
+        X64_xorps   = 0xC0570F4000000004LL, // 128bit xor xmm (four packed singles), one byte shorter
+        X64_xorpsm  = 0x05570F4000000004LL, // 128bit xor xmm, [rip+disp32]
+        X64_xorpsa  = 0x2504570F40000005LL, // 128bit xor xmm, [disp32]
+
+        X86_and8r   = 0xC022000000000002LL, // and rl,rh
+        X86_sete    = 0xC0940F0000000003LL, // no-rex version of X64_sete
+        X86_setnp   = 0xC09B0F0000000003LL  // no-rex set byte if odd parity (ordered fcmp result) (PF == 0)
+    };
+
+    typedef uint32_t RegisterMask;
+
+    static const RegisterMask GpRegs = 0xffff;
+    static const RegisterMask FpRegs = 0xffff0000;
+    static const bool CalleeRegsNeedExplicitSaving = true;
+#ifdef _MSC_VER
+    static const RegisterMask SavedRegs = 1<<RBX | 1<<RSI | 1<<RDI | 1<<R12 | 1<<R13 | 1<<R14 | 1<<R15;
+    static const int NumSavedRegs = 7; // rbx, rsi, rdi, r12-15
+    static const int NumArgRegs = 4;
+#else
+    static const RegisterMask SavedRegs = 1<<RBX | 1<<R12 | 1<<R13 | 1<<R14 | 1<<R15;
+    static const int NumSavedRegs = 5; // rbx, r12-15
+    static const int NumArgRegs = 6;
+#endif
+
+    static inline bool IsFpReg(Register r) {
+        return ((1<<r) & FpRegs) != 0;
+    }
+    static inline bool IsGpReg(Register r) {
+        return ((1<<r) & GpRegs) != 0;
+    }
+
+    verbose_only( extern const char* regNames[]; )
+
+    #define DECLARE_PLATFORM_STATS()
+    #define DECLARE_PLATFORM_REGALLOC()
+
+    #define DECLARE_PLATFORM_ASSEMBLER()                                    \
+        const static Register argRegs[NumArgRegs], retRegs[1];              \
+        void underrunProtect(ptrdiff_t bytes);                              \
+        void nativePageReset();                                             \
+        void nativePageSetup();                                             \
+        void asm_qbinop(LIns*);                                             \
+        void MR(Register, Register);\
+        void JMP(NIns*);\
+        void emit(uint64_t op);\
+        void emit8(uint64_t op, int64_t val);\
+        void emit32(uint64_t op, int64_t val);\
+        void emitrr(uint64_t op, Register r, Register b);\
+        void emitrr8(uint64_t op, Register r, Register b);\
+        void emitr(uint64_t op, Register b) { emitrr(op, (Register)0, b); }\
+        void emitr8(uint64_t op, Register b) { emitrr8(op, (Register)0, b); }\
+        void emitprr(uint64_t op, Register r, Register b);\
+        void emitrm(uint64_t op, Register r, int32_t d, Register b);\
+        void emitprm(uint64_t op, Register r, int32_t d, Register b);\
+        void emitrr_imm(uint64_t op, Register r, Register b, int32_t imm);\
+        void emitr_imm(uint64_t op, Register r, int32_t imm) { emitrr_imm(op, (Register)0, r, imm); }\
+        void emitr_imm8(uint64_t op, Register b, int32_t imm8);\
+        void emit_int(Register r, int32_t v);\
+        void emit_quad(Register r, uint64_t v);\
+        void asm_regarg(ArgSize, LIns*, Register);\
+        void asm_stkarg(ArgSize, LIns*, int);\
+        void asm_shift(LIns*);\
+        void asm_shift_imm(LIns*);\
+        void asm_arith_imm(LIns*);\
+        void regalloc_unary(LIns *ins, RegisterMask allow, Register &rr, Register &ra);\
+        void regalloc_binary(LIns *ins, RegisterMask allow, Register &rr, Register &ra, Register &rb);\
+        void regalloc_load(LIns *ins, Register &rr, int32_t &d, Register &rb);\
+        void dis(NIns *p, int bytes);\
+        void asm_cmp(LIns*);\
+        void asm_cmp_imm(LIns*);\
+        void fcmp(LIns*, LIns*);\
+        NIns* asm_fbranch(bool, LIns*, NIns*);\
+        int max_stk_used;
+
+    #define swapptrs()  { NIns* _tins = _nIns; _nIns=_nExitIns; _nExitIns=_tins; }
+
+    const int LARGEST_UNDERRUN_PROT = 32;  // largest value passed to underrunProtect
+
+    typedef uint8_t NIns;
+
+    inline Register nextreg(Register r) {
+        return Register(r+1);
+    }
+
+} // namespace nanojit
+
+#endif // __nanojit_NativeX64__