diff --git a/Changelog.md b/Changelog.md new file mode 100644 index 00000000000..67d9e028284 --- /dev/null +++ b/Changelog.md @@ -0,0 +1,4 @@ +swjdk17-sw1.0.0(20241108)版主要修改 + +1. 当前swjdk17支持的jvm-features有compiler2 dtrace g1gc jni-check jvmti management nmt parallelgc serialgc services vm-structs,暂不支持的jvm-features包括cds compiler1 epsilongc jfr jvmci shenandoahgc zgc。 + diff --git a/README_SW.md b/README_SW.md new file mode 100644 index 00000000000..ccfa22d58ef --- /dev/null +++ b/README_SW.md @@ -0,0 +1,34 @@ +# build your JDK17 for shenwei +## Step 1 : + +You can set your boot JDK as follows + +```bash +export JAVA_HOME= +export PATH=$JAVA_HOME/jdk/bin:$PATH +``` + +Or you can only set your boot JDK in configure with + +``` +--with-boot-jdk= +``` + +## Step 2: + +Now you can configure as follows + +```bash +bash native_configure_sw release +``` + +Next, follow the output suggestions to install the missing dependencies + +## Setp 3: + +```bash +make all +``` + +After make successfully, you can find your special JDK at `./build/linux-sw64-server-release/images` + diff --git a/jmh/jars/commons-math3-3.2.jar b/jmh/jars/commons-math3-3.2.jar new file mode 100755 index 00000000000..f8b7db295b1 Binary files /dev/null and b/jmh/jars/commons-math3-3.2.jar differ diff --git a/jmh/jars/jmh-core-1.28.jar b/jmh/jars/jmh-core-1.28.jar new file mode 100755 index 00000000000..4cecd22b13f Binary files /dev/null and b/jmh/jars/jmh-core-1.28.jar differ diff --git a/jmh/jars/jmh-generator-annprocess-1.28.jar b/jmh/jars/jmh-generator-annprocess-1.28.jar new file mode 100755 index 00000000000..bf234bb0740 Binary files /dev/null and b/jmh/jars/jmh-generator-annprocess-1.28.jar differ diff --git a/jmh/jars/jopt-simple-4.6.jar b/jmh/jars/jopt-simple-4.6.jar new file mode 100755 index 00000000000..a963d1fa4dc Binary files /dev/null and b/jmh/jars/jopt-simple-4.6.jar differ diff --git a/jmh/jmh-1.28.tar.gz b/jmh/jmh-1.28.tar.gz new file mode 100755 index 00000000000..e3fb1a75f76 Binary files /dev/null and b/jmh/jmh-1.28.tar.gz differ diff --git a/make/RunTestsPrebuilt.gmk b/make/RunTestsPrebuilt.gmk index 7c1c55b2070..ec4406358fe 100644 --- a/make/RunTestsPrebuilt.gmk +++ b/make/RunTestsPrebuilt.gmk @@ -207,6 +207,10 @@ endif # Check number of cores and memory in MB ifeq ($(OPENJDK_TARGET_OS), linux) NUM_CORES := $(shell $(CAT) /proc/cpuinfo | $(GREP) -c processor) + # ZHJ20170103 for SW64 + if test "$NUM_CORES" -eq "0"; then + NUM_CORES=`cat /proc/cpuinfo | grep "cpus active" | awk '{print $4}'` + fi MEMORY_SIZE := $(shell \ $(EXPR) `$(CAT) /proc/meminfo | $(GREP) MemTotal | $(AWK) '{print $$2}'` / 1024 \ ) diff --git a/make/autoconf/build-aux/autoconf-config.guess b/make/autoconf/build-aux/autoconf-config.guess index 15ee4389269..d25659f187c 100644 --- a/make/autoconf/build-aux/autoconf-config.guess +++ b/make/autoconf/build-aux/autoconf-config.guess @@ -907,6 +907,9 @@ EOF if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC} exit ;; + sw_64:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; arm*:Linux:*:*) eval $set_cc_for_build if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \ diff --git a/make/autoconf/build-performance.m4 b/make/autoconf/build-performance.m4 index 52d143e5ab9..325a86b70b9 100644 --- a/make/autoconf/build-performance.m4 +++ b/make/autoconf/build-performance.m4 @@ -32,6 +32,10 @@ AC_DEFUN([BPERF_CHECK_CORES], if test -f /proc/cpuinfo; then # Looks like a Linux (or cygwin) system NUM_CORES=`cat /proc/cpuinfo | grep -c processor` + if test "$NUM_CORES" -eq "0"; then + # ZHJ20170103 for SW64 + NUM_CORES=`cat /proc/cpuinfo | grep "cpus active" | awk '{ print [$]4 }'` + fi if test "$NUM_CORES" -eq "0"; then NUM_CORES=`cat /proc/cpuinfo | grep -c ^CPU` fi diff --git a/make/autoconf/flags-cflags.m4 b/make/autoconf/flags-cflags.m4 index f7f2ad53000..f1605c7556b 100644 --- a/make/autoconf/flags-cflags.m4 +++ b/make/autoconf/flags-cflags.m4 @@ -262,9 +262,9 @@ AC_DEFUN([FLAGS_SETUP_QUALITY_CHECKS], AC_DEFUN([FLAGS_SETUP_OPTIMIZATION], [ if test "x$TOOLCHAIN_TYPE" = xgcc; then - C_O_FLAG_HIGHEST_JVM="-O3" - C_O_FLAG_HIGHEST="-O3" - C_O_FLAG_HI="-O3" + C_O_FLAG_HIGHEST_JVM="-O2" #sw64 + C_O_FLAG_HIGHEST="-O2" + C_O_FLAG_HI="-O2" C_O_FLAG_NORM="-O2" C_O_FLAG_SIZE="-Os" C_O_FLAG_DEBUG="-O0" diff --git a/make/autoconf/platform.m4 b/make/autoconf/platform.m4 index eb66266262b..f6c03eda53d 100644 --- a/make/autoconf/platform.m4 +++ b/make/autoconf/platform.m4 @@ -180,6 +180,12 @@ AC_DEFUN([PLATFORM_EXTRACT_VARS_FROM_CPU], VAR_CPU_BITS=64 VAR_CPU_ENDIAN=big ;; + sw_64) + VAR_CPU=sw64 + VAR_CPU_ARCH=sw64 + VAR_CPU_BITS=64 + VAR_CPU_ENDIAN=little + ;; *) AC_MSG_ERROR([unsupported cpu $1]) ;; @@ -539,6 +545,8 @@ AC_DEFUN([PLATFORM_SETUP_LEGACY_VARS_HELPER], HOTSPOT_$1_CPU=ppc_64 elif test "x$OPENJDK_$1_CPU" = xppc64le; then HOTSPOT_$1_CPU=ppc_64 + elif test "x$OPENJDK_$1_CPU" = xsw64; then + HOTSPOT_$1_CPU=sw_64 fi AC_SUBST(HOTSPOT_$1_CPU) @@ -563,6 +571,8 @@ AC_DEFUN([PLATFORM_SETUP_LEGACY_VARS_HELPER], HOTSPOT_$1_CPU_DEFINE=PPC64 elif test "x$OPENJDK_$1_CPU" = xriscv64; then HOTSPOT_$1_CPU_DEFINE=RISCV64 + elif test "x$OPENJDK_$1_CPU" = xsw64; then + HOTSPOT_$1_CPU_DEFINE=SW64 # The cpu defines below are for zero, we don't support them directly. elif test "x$OPENJDK_$1_CPU" = xsparc; then diff --git a/make/conf/jib-profiles.js b/make/conf/jib-profiles.js index 64dcb7723a9..6398719cf88 100644 --- a/make/conf/jib-profiles.js +++ b/make/conf/jib-profiles.js @@ -242,7 +242,7 @@ var getJibProfilesCommon = function (input, data) { common.main_profile_names = [ "linux-x64", "linux-x86", "macosx-x64", "macosx-aarch64", "windows-x64", "windows-x86", "windows-aarch64", - "linux-aarch64", "linux-arm32", "linux-ppc64le", "linux-s390x" + "linux-aarch64", "linux-arm32", "linux-ppc64le", "linux-s390x", "linux-sw64" ]; // These are the base setttings for all the main build profiles. @@ -530,6 +530,17 @@ var getJibProfilesProfiles = function (input, common, data) { "--disable-warnings-as-errors" ], }, + + "linux-sw64": { + target_os: "linux", + target_cpu: "sw64", + build_cpu: "sw64", + dependencies: ["devkit", "build_devkit", "cups"], + configure_args: [ + "--openjdk-target=sw_64-linux-gnu", "--with-freetype=bundled", + "--disable-warnings-as-errors", "--with-cpu-port=sw64", + ], + }, }; // Add the base settings to all the main profiles diff --git a/make/devkit/Tools.gmk b/make/devkit/Tools.gmk index e94a74d0063..b88e4ede408 100644 --- a/make/devkit/Tools.gmk +++ b/make/devkit/Tools.gmk @@ -346,6 +346,10 @@ PATHEXT = $(PREFIX)/bin: PATHPRE = PATH=$(PATHEXT)$(PATH) NUM_CORES := $(shell cat /proc/cpuinfo | grep -c processor) +# ZHJ20170103 for SW64 +if test "$NUM_CORES" -eq "0"; then + NUM_CORES=`cat /proc/cpuinfo | grep "cpus active" | awk '{print $4}'` +fi BUILDPAR = -j$(NUM_CORES) # Default commands to when making diff --git a/native_configure_sw b/native_configure_sw new file mode 100644 index 00000000000..00d2ec8f13c --- /dev/null +++ b/native_configure_sw @@ -0,0 +1,27 @@ +#!/bin/bash +level=${1?usage: $0 release/slowdebug} +builddate=`date +%Y-%m-%d` +buildtag=sw1.0.0 + # --with-jvm-variants JVM variants (separated by commas) to build + # (server,client,minimal,core,zero,custom) [server] + # --with-jvm-features "cds cmsgc compiler1 compiler2 epsilongc g1gc graal jfr jni-check jvmci jvmti management nmt parallelgc serialgc services vm-structs zgc" + # --enable-cds=no + # --with-gtest= + # --with-boot-jdk= + + bash configure \ + --with-freetype=bundled \ + --with-zlib=bundled \ + --with-native-debug-symbols=external \ + --with-version-date=$builddate \ + --with-version-opt=$buildtag \ + --disable-javac-server \ + --with-debug-level=$level \ + --with-jvm-variants=server \ + --enable-jvm-feature-cds=no --enable-jvm-feature-epsilongc=no --enable-jvm-feature-compiler1=no \ + --enable-jvm-feature-jfr=no --enable-jvm-feature-jvmci=no --enable-jvm-feature-shenandoahgc=no --enable-jvm-feature-zgc=no \ + --enable-cds-archive=no \ + --disable-warnings-as-errors \ + --with-extra-cflags=" -mieee" \ + --with-extra-cxxflags="-mieee " \ + --with-extra-ldflags=" -mieee" \ No newline at end of file diff --git a/src/hotspot/cpu/sw64/abstractInterpreter_sw64.cpp b/src/hotspot/cpu/sw64/abstractInterpreter_sw64.cpp new file mode 100644 index 00000000000..7d589c69147 --- /dev/null +++ b/src/hotspot/cpu/sw64/abstractInterpreter_sw64.cpp @@ -0,0 +1,162 @@ +/* + * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "interpreter/interpreter.hpp" +#include "oops/constMethod.hpp" +#include "oops/klass.inline.hpp" +#include "oops/method.hpp" +#include "runtime/frame.inline.hpp" +#include "utilities/align.hpp" +#include "utilities/debug.hpp" +#include "utilities/macros.hpp" + + +// asm based interpreter deoptimization helpers +int AbstractInterpreter::size_activation(int max_stack, + int temps, + int extra_args, + int monitors, + int callee_params, + int callee_locals, + bool is_top_frame) { + // Note: This calculation must exactly parallel the frame setup + // in TemplateInterpreterGenerator::generate_fixed_frame. + + // fixed size of an interpreter frame: + int overhead = frame::sender_sp_offset - + frame::interpreter_frame_initial_sp_offset; + // Our locals were accounted for by the caller (or last_frame_adjust + // on the transistion) Since the callee parameters already account + // for the callee's params we only need to account for the extra + // locals. + int size = overhead + + (callee_locals - callee_params)*Interpreter::stackElementWords + + monitors * frame::interpreter_frame_monitor_size() + + temps* Interpreter::stackElementWords + extra_args; + + return size; +} + +void AbstractInterpreter::layout_activation(Method* method, + int tempcount, + int popframe_extra_args, + int moncount, + int caller_actual_parameters, + int callee_param_count, + int callee_locals, + frame* caller, + frame* interpreter_frame, + bool is_top_frame, + bool is_bottom_frame) { + // The frame interpreter_frame is guaranteed to be the right size, + // as determined by a previous call to the size_activation() method. + // It is also guaranteed to be walkable even though it is in a + // skeletal state + + int max_locals = method->max_locals() * Interpreter::stackElementWords; + int extra_locals = (method->max_locals() - method->size_of_parameters()) * + Interpreter::stackElementWords; + +#ifdef ASSERT + assert(caller->sp() == interpreter_frame->sender_sp(), "Frame not properly walkable"); +#endif + + interpreter_frame->interpreter_frame_set_method(method); + // NOTE the difference in using sender_sp and + // interpreter_frame_sender_sp interpreter_frame_sender_sp is + // the original sp of the caller (the unextended_sp) and + // sender_sp is fp+8/16 (32bit/64bit) XXX + intptr_t* locals = interpreter_frame->sender_sp() + max_locals - 1; + +#ifdef ASSERT + if (caller->is_interpreted_frame()) { + assert(locals < caller->fp() + frame::interpreter_frame_initial_sp_offset, "bad placement"); + } +#endif + + interpreter_frame->interpreter_frame_set_locals(locals); + BasicObjectLock* montop = interpreter_frame->interpreter_frame_monitor_begin(); + BasicObjectLock* monbot = montop - moncount; + interpreter_frame->interpreter_frame_set_monitor_end(monbot); + + // Set last_sp + intptr_t* esp = (intptr_t*) monbot - + tempcount*Interpreter::stackElementWords - + popframe_extra_args; + interpreter_frame->interpreter_frame_set_last_sp(esp); + + // All frames but the initial (oldest) interpreter frame we fill in have + // a value for sender_sp that allows walking the stack but isn't + // truly correct. Correct the value here. + if (extra_locals != 0 && + interpreter_frame->sender_sp() == + interpreter_frame->interpreter_frame_sender_sp()) { + interpreter_frame->set_interpreter_frame_sender_sp(caller->sp() + + extra_locals); + } + *interpreter_frame->interpreter_frame_cache_addr() = + method->constants()->cache(); + *interpreter_frame->interpreter_frame_mirror_addr() = + method->method_holder()->java_mirror(); +} + +int AbstractInterpreter::BasicType_as_index(BasicType type) { + int i = 0; + switch (type) { + case T_BOOLEAN: i = 0; break; + case T_CHAR : i = 1; break; + case T_BYTE : i = 2; break; + case T_SHORT : i = 3; break; + case T_INT : i = 4; break; + case T_LONG : i = 5; break; + case T_VOID : i = 6; break; + case T_FLOAT : i = 7; break; + case T_DOUBLE : i = 8; break; + case T_OBJECT : i = 9; break; + case T_ARRAY : i = 9; break; + default : ShouldNotReachHere(); + } + assert(0 <= i && i < AbstractInterpreter::number_of_result_handlers, + "index out of bounds"); + return i; +} + +// How much stack a method activation needs in words. +int AbstractInterpreter::size_top_interpreter_activation(Method* method) { + const int entry_size = frame::interpreter_frame_monitor_size(); + + // total overhead size: entry_size + (saved rbp thru expr stack + // bottom). be sure to change this if you add/subtract anything + // to/from the overhead area + const int overhead_size = + -(frame::interpreter_frame_initial_sp_offset) + entry_size; + + const int stub_code = frame::entry_frame_after_call_words; + + const int method_stack = (method->max_locals() + method->max_stack()) * + Interpreter::stackElementWords; + return (overhead_size + method_stack + stub_code); +} diff --git a/src/hotspot/cpu/sw64/ad_encode.m4 b/src/hotspot/cpu/sw64/ad_encode.m4 new file mode 100644 index 00000000000..cd68f185f2d --- /dev/null +++ b/src/hotspot/cpu/sw64/ad_encode.m4 @@ -0,0 +1,98 @@ +dnl Copyright (c) 2014, Red Hat Inc. All rights reserved. +dnl DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +dnl +dnl This code is free software; you can redistribute it and/or modify it +dnl under the terms of the GNU General Public License version 2 only, as +dnl published by the Free Software Foundation. +dnl +dnl This code is distributed in the hope that it will be useful, but WITHOUT +dnl ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +dnl FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl version 2 for more details (a copy is included in the LICENSE file that +dnl accompanied this code). +dnl +dnl You should have received a copy of the GNU General Public License version +dnl 2 along with this work; if not, write to the Free Software Foundation, +dnl Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +dnl +dnl Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +dnl or visit www.oracle.com if you need additional information or have any +dnl questions. +dnl +dnl +dnl Process this file with m4 ad_encode.m4 to generate the load/store +dnl patterns used in sw64.ad. +dnl +define(choose, `loadStore($1, &MacroAssembler::$3, $2, $4, + $5, $6, $7, $8);dnl + + %}')dnl +define(access, ` + $3Register $1_reg = as_$3Register($$1$$reg); + $4choose(MacroAssembler(&cbuf), $1_reg,$2,$mem->opcode(), + as_Register($mem$$base),$mem$$index,$mem$$scale,$mem$$disp)')dnl +define(load,` + enc_class sw64_enc_$2($1 dst, memory mem) %{dnl +access(dst,$2,$3)')dnl +load(iRegI,ldrsbw) +load(iRegI,ldrsb) +load(iRegI,ldrb) +load(iRegL,ldrb) +load(iRegI,ldrshw) +load(iRegI,ldrsh) +load(iRegI,ldrh) +load(iRegL,ldrh) +load(iRegI,ldrw) +load(iRegL,ldrw) +load(iRegL,ldrsw) +load(iRegL,ldr) +load(vRegF,ldrs,Float) +load(vRegD,ldrd,Float) +define(STORE,` + enc_class sw64_enc_$2($1 src, memory mem) %{dnl +access(src,$2,$3,$4)')dnl +define(STORE0,` + enc_class sw64_enc_$2`'0(memory mem) %{ + MacroAssembler _masm(&cbuf); + choose(_masm,zr,$2,$mem->opcode(), + as_$3Register($mem$$base),$mem$$index,$mem$$scale,$mem$$disp)')dnl +STORE(iRegI,strb) +STORE0(iRegI,strb) +STORE(iRegI,strh) +STORE0(iRegI,strh) +STORE(iRegI,strw) +STORE0(iRegI,strw) +STORE(iRegL,str,, +`// we sometimes get asked to store the stack pointer into the + // current thread -- we cannot do that directly on Sw64 + if (src_reg == r31_sp) { + MacroAssembler _masm(&cbuf); + assert(as_Register($mem$$base) == rthread, "unexpected store for sp"); + __ mov(rscratch2, sp); + src_reg = rscratch2; + } + ') +STORE0(iRegL,str) +STORE(vRegF,strs,Float) +STORE(vRegD,strd,Float) + + enc_class sw64_enc_strw_immn(immN src, memory mem) %{ + MacroAssembler _masm(&cbuf); + address con = (address)$src$$constant; + // need to do this the hard way until we can manage relocs + // for 32 bit constants + __ movoop(rscratch2, (jobject)con); + if (con) __ encode_heap_oop_not_null(rscratch2); + choose(_masm,rscratch2,strw,$mem->opcode(), + as_Register($mem$$base),$mem$$index,$mem$$scale,$mem$$disp) + + enc_class sw64_enc_strw_immnk(immN src, memory mem) %{ + MacroAssembler _masm(&cbuf); + address con = (address)$src$$constant; + // need to do this the hard way until we can manage relocs + // for 32 bit constants + __ movoop(rscratch2, (jobject)con); + __ encode_klass_not_null(rscratch2); + choose(_masm,rscratch2,strw,$mem->opcode(), + as_Register($mem$$base),$mem$$index,$mem$$scale,$mem$$disp) + diff --git a/src/hotspot/cpu/sw64/assembler_sw64.cpp b/src/hotspot/cpu/sw64/assembler_sw64.cpp new file mode 100644 index 00000000000..7dd45c641f7 --- /dev/null +++ b/src/hotspot/cpu/sw64/assembler_sw64.cpp @@ -0,0 +1,1458 @@ +/* + * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "asm/assembler.inline.hpp" +#include "asm/macroAssembler.hpp" +#include "compiler/disassembler.hpp" +#include "memory/resourceArea.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/biasedLocking.hpp" +#include "runtime/objectMonitor.hpp" +#include "runtime/os.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/vm_version.hpp" +#include "utilities/macros.hpp" + +#ifndef PRODUCT +const unsigned long Assembler::asm_bp = 0x00007fffee09ac88; +#endif + +extern "C" void entry(CodeBuffer *cb); + +#define __ _masm. +#ifdef PRODUCT +#define BLOCK_COMMENT(str) /* nothing */ +#else +#define BLOCK_COMMENT(str) block_comment(str) +#endif + +#define BIND(label) bind(label); __ BLOCK_COMMENT(#label ":") + +void entry(CodeBuffer *cb) { + + // { + // for (int i = 0; i < 256; i+=16) + // { + // printf("\"%20.20g\", ", unpack(i)); + // printf("\"%20.20g\", ", unpack(i+1)); + // } + // printf("\n"); + // } + + Assembler _masm(cb); + address entry = __ pc(); + + // Smoke test for assembler + +} + +#undef __ + +#define ADDRESSEMIT(RegType, is_xx)\ +void Address::emit(RegType ra, Assembler* as, int opcode) {\ + if (_mode == base_index_scale_disp) {\ + guarantee(_tmp != noreg, "we need a tmp reg here"); \ + guarantee(Assembler::is_simm16(_disp),"check disp range"); \ + if (_scale == times_8) {\ + as->s8addl(_index, _base, _tmp); \ + } else if (_scale == times_4) {\ + as->s4addl(_index, _base, _tmp); \ + } else if (_scale == times_2) {\ + if (_tmp != _index) {\ + as->addl(_base, _index, _tmp); \ + as->addl(_tmp, _index, _tmp); \ + } else {\ + as->addl(_index, _index, _index); \ + as->addl(_base, _index, _index); \ + }\ + } else {\ + as->addl(_base, _index, _tmp);\ + }\ + as->emit_sw2_long(opcode| as->is_xx(ra) | as->is_mdisp(_disp) | as->is_rb(_tmp));\ + } else if (_mode == base_plus_disp) { \ + if (Assembler::is_simm16(_disp)) { \ + as->emit_sw2_long(opcode | as->is_xx(ra) | as->is_mdisp(_disp) | as->is_rb(_base)); \ + } else { \ + assert(Assembler::is_simm(_disp, 32), "imm should be simm32 in MacroAssembler::li32"); \ + int16_t high = (_disp - (int16_t)(_disp))>>16; \ + int16_t low = (int16_t)(_disp); \ + as->ldih(T12, high, R0); \ + as->ldi(T12, low, T12); \ + if( ((int)high == (-32768)) && (low < 0) ) { \ + as->addw(T12, R0, T12); \ + } \ + as->addl(T12, _base, T12); \ + as->emit_sw2_long(opcode | as->is_xx(ra) | as->is_mdisp(0) | as->is_rb(T12)); \ + } \ + /* guarantee(Assembler::is_simm16(_disp),"check disp range"); */ \ + /* as->emit_sw2_long(opcode | as->is_xx(ra) | as->is_mdisp(_disp) | as->is_rb(_base));*/ \ + } else {\ + ShouldNotReachHere();\ + }\ +} +ADDRESSEMIT(Register, is_ra) +ADDRESSEMIT(FloatRegister, is_fa) +#undef ADDRESSEMIT + +// Convert the raw encoding form into the form expected by the constructor for +// Address. An index of 30 (rsp) corresponds to having no index, so convert +// that to noreg for the Address constructor. +Address Address::make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc) { + RelocationHolder rspec; + if (disp_reloc != relocInfo::none) { + rspec = Relocation::spec_simple(disp_reloc); + } + bool valid_index = index != sp->encoding(); + if (valid_index) { + Address madr(as_Register(base), as_Register(index), (Address::ScaleFactor)scale, in_ByteSize(disp)); + madr._mode = base_index_scale_disp; + madr._rspec = rspec; + return madr; + } else { + Address madr(as_Register(base), in_ByteSize(disp)); + madr._mode = base_plus_disp; + madr._rspec = rspec; + return madr; + } +} + +int AbstractAssembler::code_fill_byte() { + return 0x00; +} + +// n.b. this is implemented in subclass MacroAssembler +void Assembler::bang_stack_with_offset(int offset) { Unimplemented(); } + + +// and now the routines called by the assembler which encapsulate the +// above encode and decode functions + +//uint32_t +//asm_util::encode_logical_immediate(bool is32, uint64_t imm) +//{ +// ShouldNotReachHere(); +// return encoding_for_logical_immediate(imm); +//} + +//unsigned Assembler::pack(double value) { +// ShouldNotReachHere(); +// float val = (float)value; +// unsigned result = encoding_for_fp_immediate(val); +// guarantee(unpack(result) == value, +// "Invalid floating-point immediate operand"); +// return result; +//} + +// Packed operands for Floating-point Move (immediate) + +//static float unpack(unsigned value) { +// ShouldNotReachHere(); +// return 0; +//} + +AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) { + _is_lval = false; + _target = target; + switch (rtype) { + case relocInfo::oop_type: + case relocInfo::metadata_type: + // Oops are a special case. Normally they would be their own section + // but in cases like icBuffer they are literals in the code stream that + // we don't have a section for. We use none so that we get a literal address + // which is always patchable. + break; + case relocInfo::external_word_type: + _rspec = external_word_Relocation::spec(target); + break; + case relocInfo::internal_word_type: + _rspec = internal_word_Relocation::spec(target); + break; + case relocInfo::opt_virtual_call_type: + _rspec = opt_virtual_call_Relocation::spec(); + break; + case relocInfo::static_call_type: + _rspec = static_call_Relocation::spec(); + break; + case relocInfo::runtime_call_type: + _rspec = runtime_call_Relocation::spec(); + break; + case relocInfo::poll_type: + case relocInfo::poll_return_type: + _rspec = Relocation::spec_simple(rtype); + break; + case relocInfo::none: + break; + default: + ShouldNotReachHere(); + break; + } +} + +// exceedingly dangerous constructor +Address::Address(int disp, address loc, relocInfo::relocType rtype) { + _base = noreg; + _index = noreg; + _scale = no_scale; + _disp = disp; + switch (rtype) { + case relocInfo::external_word_type: + _rspec = external_word_Relocation::spec(loc); + break; + case relocInfo::internal_word_type: + _rspec = internal_word_Relocation::spec(loc); + break; + case relocInfo::runtime_call_type: + // HMM + _rspec = runtime_call_Relocation::spec(); + break; + case relocInfo::poll_type: + case relocInfo::poll_return_type: + _rspec = Relocation::spec_simple(rtype); + break; + case relocInfo::none: + break; + default: + ShouldNotReachHere(); + } +} + +#ifdef ASSERT +void Assembler::check_relocation(RelocationHolder const& rspec, int format) { + address inst = inst_mark(); + assert(inst != NULL && inst <= pc(), "must point to beginning of instruction"); +// address opnd; + + Relocation* r = rspec.reloc(); + if (r->type() == relocInfo::none) { + return; + } else if (r->is_call() || format == call32_operand) { + // assert(format == imm32_operand, "cannot specify a nonzero format"); +// opnd = locate_operand(inst, call32_operand);// yj todo + } else if (r->is_data()) { +// assert(format == imm_operand || format == disp32_operand +// LP64_ONLY(|| format == narrow_oop_operand), "format ok"); +// opnd = locate_operand(inst, (WhichOperand)format);// yj todo + } else { +// assert(format == imm_operand, "cannot specify a format"); + return; + } +// assert(opnd == pc(), "must put operand where relocs can find it"); +} +#endif // ASSERT + +void Assembler::emit_sw2_long(int x) { + AbstractAssembler::emit_int32(x); +} + +void Assembler::sys_call_b( int palfn ) +{ emit_sw2_long( op_sys_call | is_palfn(palfn) ); } +void Assembler::sys_call( int palfn ) +{ sys_call_b(palfn); /* emit_sw2_long( op_sys_call | ( 0x1 << 25 ) | is_palfn(palfn) );*/ } + +void Assembler::call( Register ra, Register rb, int jmphint ) +{ emit_sw2_long( op_call | is_ra(ra) | is_rb(rb) | is_jmphint(jmphint) ); } +void Assembler::ret( Register ra, Register rb, int rethint ) +{ emit_sw2_long( op_ret | is_ra(ra) | is_rb(rb) | is_rethint(rethint) ); } +void Assembler::jmp( Register ra, Register rb, int jmphint ) +{ emit_sw2_long( op_jmp | is_ra(ra) | is_rb(rb) | is_jmphint(jmphint) ); } +void Assembler::br( Register ra, int bdisp ) +{ emit_sw2_long( op_br | is_ra(ra) | is_bdisp(bdisp) ); } +void Assembler::bsr( Register ra, int bdisp ) +{ emit_sw2_long( op_bsr | is_ra(ra) | is_bdisp(bdisp) ); } +void Assembler::memb( void ) +{ emit_sw2_long( op_memb); } +void Assembler::imemb( void ) +{ sw3_only(); emit_sw2_long( op_imemb); } +void Assembler::wmemb( void ) +{ sw4_only(); emit_sw2_long( op_wmemb); } +void Assembler::rtc( Register ra, Register rb ) +{ emit_sw2_long( op_rtc | is_ra(ra) | is_rb(rb) ); } +void Assembler::rcid( Register ra ) +{ emit_sw2_long( op_rcid | is_ra(ra) ); } +void Assembler::halt( void ) +{ emit_sw2_long( op_halt ); } +void Assembler::rd_f( Register ra ) +{ sw2_only(); emit_sw2_long( op_rd_f | is_ra(ra) | is_rb(R0) ); } +void Assembler::wr_f( Register ra ) +{ sw2_only(); emit_sw2_long( op_wr_f | is_ra(ra) | is_rb(R0) ); } +void Assembler::rtid( Register ra ) +{ emit_sw2_long( op_rtid | is_ra(ra) ); } +void Assembler::csrws( Register ra, int rpiindex ) +{ sw4_only(); emit_sw2_long( op_csrws | is_ra(ra) | is_rpiindex(rpiindex) ); } +void Assembler::csrwc( Register ra, int rpiindex ) +{ sw4_only(); emit_sw2_long( op_csrwc | is_ra(ra) | is_rpiindex(rpiindex) ); } +void Assembler::csrr( Register ra, int rpiindex ) +{ emit_sw2_long( op_csrr | is_ra(ra) | is_rpiindex(rpiindex) ); } +void Assembler::csrw( Register ra, int rpiindex ) +{ emit_sw2_long( op_csrw | is_ra(ra) | is_rpiindex(rpiindex) ); } +void Assembler::pri_ret( Register ra ) +{ emit_sw2_long( op_pri_ret | is_ra(ra) ); } + +void Assembler::lldw( Register ra, int atmdisp, Register rb ) +{ emit_sw2_long( op_lldw | is_ra(ra) | is_atmdisp(atmdisp) | is_rb(rb) ); } +void Assembler::lldl( Register ra, int atmdisp, Register rb ) +{ emit_sw2_long( op_lldl | is_ra(ra) | is_atmdisp(atmdisp) | is_rb(rb) ); } + +void Assembler::ldw_inc( Register ra, int atmdisp, Register rb ) +{ sw2_only(); emit_sw2_long( op_ldw_inc | is_ra(ra) | is_atmdisp(atmdisp) | is_rb(rb) ); } +void Assembler::ldl_inc( Register ra, int atmdisp, Register rb ) +{ sw2_only(); emit_sw2_long( op_ldl_inc | is_ra(ra) | is_atmdisp(atmdisp) | is_rb(rb) ); } +void Assembler::ldw_dec( Register ra, int atmdisp, Register rb ) +{ sw2_only(); emit_sw2_long( op_ldw_dec | is_ra(ra) | is_atmdisp(atmdisp) | is_rb(rb) ); } +void Assembler::ldl_dec( Register ra, int atmdisp, Register rb ) +{ sw2_only(); emit_sw2_long( op_ldl_dec | is_ra(ra) | is_atmdisp(atmdisp) | is_rb(rb) ); } +void Assembler::ldw_set( Register ra, int atmdisp, Register rb ) +{ sw2_only(); emit_sw2_long( op_ldw_set | is_ra(ra) | is_atmdisp(atmdisp) | is_rb(rb) ); } +void Assembler::ldl_set( Register ra, int atmdisp, Register rb ) +{ sw2_only(); emit_sw2_long( op_ldl_set | is_ra(ra) | is_atmdisp(atmdisp) | is_rb(rb) ); } + +void Assembler::lstw( Register ra, int atmdisp, Register rb ) +{ emit_sw2_long( op_lstw | is_ra(ra) | is_atmdisp(atmdisp) | is_rb(rb) ); } +void Assembler::lstl( Register ra, int atmdisp, Register rb ) +{ emit_sw2_long( op_lstl | is_ra(ra) | is_atmdisp(atmdisp) | is_rb(rb) ); } +void Assembler::ldw_nc( Register ra, int atmdisp, Register rb ) +{ emit_sw2_long( op_ldw_nc | is_ra(ra) | is_atmdisp(atmdisp) | is_rb(rb) ); } +void Assembler::ldl_nc( Register ra, int atmdisp, Register rb ) +{ emit_sw2_long( op_ldl_nc | is_ra(ra) | is_atmdisp(atmdisp) | is_rb(rb) ); } +void Assembler::ldd_nc( Register ra, int atmdisp, Register rb ) +{ emit_sw2_long( op_ldd_nc | is_ra(ra) | is_atmdisp(atmdisp) | is_rb(rb) ); } +void Assembler::stw_nc( Register ra, int atmdisp, Register rb ) +{ emit_sw2_long( op_stw_nc | is_ra(ra) | is_atmdisp(atmdisp) | is_rb(rb) ); } +void Assembler::stl_nc( Register ra, int atmdisp, Register rb ) +{ emit_sw2_long( op_stl_nc | is_ra(ra) | is_atmdisp(atmdisp) | is_rb(rb) ); } +void Assembler::std_nc( Register ra, int atmdisp, Register rb ) +{ emit_sw2_long( op_std_nc | is_ra(ra) | is_atmdisp(atmdisp) | is_rb(rb) ); } + +void Assembler::ldwe( FloatRegister fa, int mdisp, Register rb ) +{ emit_sw2_long( op_ldwe | is_fa(fa) | is_mdisp(mdisp) | is_rb(rb) ); } +void Assembler::ldse( FloatRegister fa, int mdisp, Register rb ) +{ emit_sw2_long( op_ldse | is_fa(fa) | is_mdisp(mdisp) | is_rb(rb) ); } +void Assembler::ldde( FloatRegister fa, int mdisp, Register rb ) +{ emit_sw2_long( op_ldde | is_fa(fa) | is_mdisp(mdisp) | is_rb(rb) ); } +void Assembler::vlds( FloatRegister fa, int mdisp, Register rb ) +{ emit_sw2_long( op_vlds | is_fa(fa) | is_mdisp(mdisp) | is_rb(rb) ); } +void Assembler::vldd( FloatRegister fa, int mdisp, Register rb ) +{ emit_sw2_long( op_vldd | is_fa(fa) | is_mdisp(mdisp) | is_rb(rb) ); } +void Assembler::vsts( FloatRegister fa, int mdisp, Register rb ) +{ emit_sw2_long( op_vsts | is_fa(fa) | is_mdisp(mdisp) | is_rb(rb) ); } +void Assembler::vstd( FloatRegister fa, int mdisp, Register rb ) +{ emit_sw2_long( op_vstd | is_fa(fa) | is_mdisp(mdisp) | is_rb(rb) ); } + +void Assembler::addw( Register ra, Register rb, Register rc ) +{ emit_sw2_long( op_addw | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::addw( Register ra, int lit, Register rc ) +{ emit_sw2_long( op_addw_l | is_ra(ra) | is_lit(lit) | is_rc(rc) ); } +void Assembler::subw( Register ra, Register rb, Register rc ) +{ emit_sw2_long( op_subw | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::subw( Register ra, int lit, Register rc ) +{ emit_sw2_long( op_subw_l | is_ra(ra) | is_lit(lit) | is_rc(rc) ); } +void Assembler::s4addw( Register ra, Register rb, Register rc ) +{ emit_sw2_long( op_s4addw | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::s4addw( Register ra, int lit, Register rc ) +{ emit_sw2_long( op_s4addw_l | is_ra(ra) | is_lit(lit) | is_rc(rc) ); } +void Assembler::s4subw( Register ra, Register rb, Register rc ) +{ emit_sw2_long( op_s4subw | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::s4subw( Register ra, int lit, Register rc ) +{ emit_sw2_long( op_s4subw_l | is_ra(ra) | is_lit(lit) | is_rc(rc) ); } +void Assembler::s8addw( Register ra, Register rb, Register rc ) +{ emit_sw2_long( op_s8addw | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::s8addw( Register ra, int lit, Register rc ) +{ emit_sw2_long( op_s8addw_l | is_ra(ra) | is_lit(lit) | is_rc(rc) ); } +void Assembler::s8subw( Register ra, Register rb, Register rc ) +{ emit_sw2_long( op_s8subw | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::s8subw( Register ra, int lit, Register rc ) +{ emit_sw2_long( op_s8subw_l | is_ra(ra) | is_lit(lit) | is_rc(rc) ); } + +void Assembler::addl( Register ra, Register rb, Register rc ) +{ emit_sw2_long( op_addl | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::addl( Register ra, int lit, Register rc ) +{ emit_sw2_long( op_addl_l | is_ra(ra) | is_lit(lit) | is_rc(rc) ); } +void Assembler::subl( Register ra, Register rb, Register rc ) +{ emit_sw2_long( op_subl | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::subl( Register ra, int lit, Register rc ) +{ emit_sw2_long( op_subl_l | is_ra(ra) | is_lit(lit) | is_rc(rc) ); } +void Assembler::s4addl( Register ra, Register rb, Register rc ) +{ emit_sw2_long( op_s4addl | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::s4addl( Register ra, int lit, Register rc ) +{ emit_sw2_long( op_s4addl_l | is_ra(ra) | is_lit(lit) | is_rc(rc) ); } +void Assembler::s4subl( Register ra, Register rb, Register rc ) +{ emit_sw2_long( op_s4subl | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::s4subl( Register ra, int lit, Register rc ) +{ emit_sw2_long( op_s4subl_l | is_ra(ra) | is_lit(lit) | is_rc(rc) ); } +void Assembler::s8addl( Register ra, Register rb, Register rc ) +{ emit_sw2_long( op_s8addl | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::s8addl( Register ra, int lit, Register rc ) +{ emit_sw2_long( op_s8addl_l | is_ra(ra) | is_lit(lit) | is_rc(rc) ); } +void Assembler::s8subl( Register ra, Register rb, Register rc ) +{ emit_sw2_long( op_s8subl | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::s8subl( Register ra, int lit, Register rc ) +{ emit_sw2_long( op_s8subl_l | is_ra(ra) | is_lit(lit) | is_rc(rc) ); } + +void Assembler::mulw( Register ra, Register rb, Register rc ) +{ emit_sw2_long( op_mulw | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::mulw( Register ra, int lit, Register rc ) +{ emit_sw2_long( op_mulw_l | is_ra(ra) | is_lit(lit) | is_rc(rc) ); } + +void Assembler::divw( Register ra, Register rb, Register rc ) +{ sw4_only(); emit_sw2_long( op_divw | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::udivw( Register ra, Register rb, Register rc ) +{ sw4_only(); emit_sw2_long( op_udivw | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::remw( Register ra, Register rb, Register rc ) +{ sw4_only(); emit_sw2_long( op_remw | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::uremw( Register ra, Register rb, Register rc ) +{ sw4_only(); emit_sw2_long( op_uremw | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } + +void Assembler::mull( Register ra, Register rb, Register rc ) +{ emit_sw2_long( op_mull | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::mull( Register ra, int lit, Register rc ) +{ emit_sw2_long( op_mull_l | is_ra(ra) | is_lit(lit) | is_rc(rc) ); } +void Assembler::umulh( Register ra, Register rb, Register rc ) +{ emit_sw2_long( op_umulh | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::umulh( Register ra, int lit, Register rc ) +{ emit_sw2_long( op_umulh_l | is_ra(ra) | is_lit(lit) | is_rc(rc) ); } + +void Assembler::divl( Register ra, Register rb, Register rc ) +{ sw4_only(); emit_sw2_long( op_divl | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::udivl( Register ra, Register rb, Register rc ) +{ sw4_only(); emit_sw2_long( op_udivl | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::reml( Register ra, Register rb, Register rc ) +{ sw4_only(); emit_sw2_long( op_reml | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::ureml( Register ra, Register rb, Register rc ) +{ sw4_only(); emit_sw2_long( op_ureml | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } + +void Assembler::addpi( int apint, Register rc ) +{ sw4_only(); emit_sw2_long( op_addpi | is_apint(apint) | is_rc(rc) ); } +void Assembler::addpis( int apint, Register rc ) +{ sw4_only(); emit_sw2_long( op_addpis | is_apint(apint) | is_rc(rc) ); } + +void Assembler::cmpeq( Register ra, Register rb, Register rc ) +{ emit_sw2_long( op_cmpeq | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::cmpeq( Register ra, int lit, Register rc ) +{ emit_sw2_long( op_cmpeq_l | is_ra(ra) | is_lit(lit) | is_rc(rc) ); } +void Assembler::cmplt( Register ra, Register rb, Register rc ) +{ emit_sw2_long( op_cmplt | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::cmplt( Register ra, int lit, Register rc ) +{ emit_sw2_long( op_cmplt_l | is_ra(ra) | is_lit(lit) | is_rc(rc) ); } +void Assembler::cmple( Register ra, Register rb, Register rc ) +{ emit_sw2_long( op_cmple | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::cmple( Register ra, int lit, Register rc ) +{ emit_sw2_long( op_cmple_l | is_ra(ra) | is_lit(lit) | is_rc(rc) ); } +void Assembler::cmpult( Register ra, Register rb, Register rc ) +{ emit_sw2_long( op_cmpult | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::cmpult( Register ra, int lit, Register rc ) +{ emit_sw2_long( op_cmpult_l | is_ra(ra) | is_lit(lit) | is_rc(rc) ); } +void Assembler::cmpule( Register ra, Register rb, Register rc ) +{ emit_sw2_long( op_cmpule | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::cmpule( Register ra, int lit, Register rc ) +{ emit_sw2_long( op_cmpule_l | is_ra(ra) | is_lit(lit) | is_rc(rc) ); } + +void Assembler::sbt( Register ra, Register rb, Register rc ) +{ sw4_only(); emit_sw2_long( op_sbt | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::sbt( Register ra, int lit, Register rc ) +{ sw4_only(); emit_sw2_long( op_sbt_l | is_ra(ra) | is_lit(lit) | is_rc(rc) ); } +void Assembler::cbt( Register ra, Register rb, Register rc ) +{ sw4_only(); emit_sw2_long( op_cbt | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::cbt( Register ra, int lit, Register rc ) +{ sw4_only(); emit_sw2_long( op_cbt_l | is_ra(ra) | is_lit(lit) | is_rc(rc) ); } + +void Assembler::and_ins( Register ra, Register rb, Register rc ) +{ emit_sw2_long( op_and | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::and_ins( Register ra, int lit, Register rc ) +{ emit_sw2_long( op_and_l | is_ra(ra) | is_lit(lit) | is_rc(rc) ); } +void Assembler::bic( Register ra, Register rb, Register rc ) +{ emit_sw2_long( op_bic | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::bic( Register ra, int lit, Register rc ) +{ emit_sw2_long( op_bic_l | is_ra(ra) | is_lit(lit) | is_rc(rc) ); } +void Assembler::bis( Register ra, Register rb, Register rc ) +{ emit_sw2_long( op_bis | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::bis( Register ra, int lit, Register rc ) +{ emit_sw2_long( op_bis_l | is_ra(ra) | is_lit(lit) | is_rc(rc) ); } +void Assembler::ornot( Register ra, Register rb, Register rc ) +{ emit_sw2_long( op_ornot | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::ornot( Register ra, int lit, Register rc ) +{ emit_sw2_long( op_ornot_l | is_ra(ra) | is_lit(lit) | is_rc(rc) ); } +void Assembler::xor_ins( Register ra, Register rb, Register rc ) +{ emit_sw2_long( op_xor | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::xor_ins( Register ra, int lit, Register rc ) +{ emit_sw2_long( op_xor_l | is_ra(ra) | is_lit(lit) | is_rc(rc) ); } +void Assembler::eqv( Register ra, Register rb, Register rc ) +{ emit_sw2_long( op_eqv | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::eqv( Register ra, int lit, Register rc ) +{ emit_sw2_long( op_eqv_l | is_ra(ra) | is_lit(lit) | is_rc(rc) ); } + +void Assembler::inslb( Register ra, Register rb, Register rc ) +{ emit_sw2_long( op_inslb | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::inslb( Register ra, int lit, Register rc ) +{ emit_sw2_long( op_inslb_l | is_ra(ra) | is_lit(lit) | is_rc(rc) ); } +void Assembler::inslh( Register ra, Register rb, Register rc ) +{ emit_sw2_long( op_inslh | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::inslh( Register ra, int lit, Register rc ) +{ emit_sw2_long( op_inslh_l | is_ra(ra) | is_lit(lit) | is_rc(rc) ); } +void Assembler::inslw( Register ra, Register rb, Register rc ) +{ emit_sw2_long( op_inslw | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::inslw( Register ra, int lit, Register rc ) +{ emit_sw2_long( op_inslw_l | is_ra(ra) | is_lit(lit) | is_rc(rc) ); } +void Assembler::insll( Register ra, Register rb, Register rc ) +{ emit_sw2_long( op_insll | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::insll( Register ra, int lit, Register rc ) +{ emit_sw2_long( op_insll_l | is_ra(ra) | is_lit(lit) | is_rc(rc) ); } +void Assembler::inshb( Register ra, Register rb, Register rc ) +{ emit_sw2_long( op_inshb | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::inshb( Register ra, int lit, Register rc ) +{ emit_sw2_long( op_inshb_l | is_ra(ra) | is_lit(lit) | is_rc(rc) ); } +void Assembler::inshh( Register ra, Register rb, Register rc ) +{ emit_sw2_long( op_inshh | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::inshh( Register ra, int lit, Register rc ) +{ emit_sw2_long( op_inshh_l | is_ra(ra) | is_lit(lit) | is_rc(rc) ); } +void Assembler::inshw( Register ra, Register rb, Register rc ) +{ emit_sw2_long( op_inshw | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::inshw( Register ra, int lit, Register rc ) +{ emit_sw2_long( op_inshw_l | is_ra(ra) | is_lit(lit) | is_rc(rc) ); } +void Assembler::inshl( Register ra, Register rb, Register rc ) +{ emit_sw2_long( op_inshl | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::inshl( Register ra, int lit, Register rc ) +{ emit_sw2_long( op_inshl_l | is_ra(ra) | is_lit(lit) | is_rc(rc) ); } + +void Assembler::slll( Register ra, Register rb, Register rc ) +{ emit_sw2_long( op_slll | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::slll( Register ra, int lit, Register rc ) +{ emit_sw2_long( op_slll_l | is_ra(ra) | is_lit(lit) | is_rc(rc) ); } +void Assembler::srll( Register ra, Register rb, Register rc ) +{ emit_sw2_long( op_srll | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::srll( Register ra, int lit, Register rc ) +{ emit_sw2_long( op_srll_l | is_ra(ra) | is_lit(lit) | is_rc(rc) ); } +void Assembler::sral( Register ra, Register rb, Register rc ) +{ emit_sw2_long( op_sral | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::sral( Register ra, int lit, Register rc ) +{ emit_sw2_long( op_sral_l | is_ra(ra) | is_lit(lit) | is_rc(rc) ); } +void Assembler::roll( Register ra, Register rb, Register rc ) +{ sw4_only(); emit_sw2_long( op_roll | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::roll( Register ra, int lit, Register rc ) +{ sw4_only(); emit_sw2_long( op_roll_l | is_ra(ra) | is_lit(lit) | is_rc(rc) ); } +void Assembler::sllw( Register ra, Register rb, Register rc ) +{ sw4_only(); emit_sw2_long( op_sllw | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::sllw( Register ra, int lit, Register rc ) +{ sw4_only(); emit_sw2_long( op_sllw_l | is_ra(ra) | is_lit(lit) | is_rc(rc) ); } +void Assembler::srlw( Register ra, Register rb, Register rc ) +{ sw4_only(); emit_sw2_long( op_srlw | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::srlw( Register ra, int lit, Register rc ) +{ sw4_only(); emit_sw2_long( op_srlw_l | is_ra(ra) | is_lit(lit) | is_rc(rc) ); } +void Assembler::sraw( Register ra, Register rb, Register rc ) +{ sw4_only(); emit_sw2_long( op_sraw | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::sraw( Register ra, int lit, Register rc ) +{ sw4_only(); emit_sw2_long( op_sraw_l | is_ra(ra) | is_lit(lit) | is_rc(rc) ); } +void Assembler::rolw( Register ra, Register rb, Register rc ) +{ sw4_only(); emit_sw2_long( op_rolw | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::rolw( Register ra, int lit, Register rc ) +{ sw4_only(); emit_sw2_long( op_rolw_l | is_ra(ra) | is_lit(lit) | is_rc(rc) ); } + +void Assembler::extlb( Register ra, Register rb, Register rc ) +{ emit_sw2_long( op_extlb | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::extlb( Register ra, int lit, Register rc ) +{ emit_sw2_long( op_extlb_l | is_ra(ra) | is_lit(lit) | is_rc(rc) ); } +void Assembler::extlh( Register ra, Register rb, Register rc ) +{ emit_sw2_long( op_extlh | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::extlh( Register ra, int lit, Register rc ) +{ emit_sw2_long( op_extlh_l | is_ra(ra) | is_lit(lit) | is_rc(rc) ); } +void Assembler::extlw( Register ra, Register rb, Register rc ) +{ emit_sw2_long( op_extlw | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::extlw( Register ra, int lit, Register rc ) +{ emit_sw2_long( op_extlw_l | is_ra(ra) | is_lit(lit) | is_rc(rc) ); } +void Assembler::extll( Register ra, Register rb, Register rc ) +{ emit_sw2_long( op_extll | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::extll( Register ra, int lit, Register rc ) +{ emit_sw2_long( op_extll_l | is_ra(ra) | is_lit(lit) | is_rc(rc) ); } +void Assembler::exthb( Register ra, Register rb, Register rc ) +{ emit_sw2_long( op_exthb | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::exthb( Register ra, int lit, Register rc ) +{ emit_sw2_long( op_exthb_l | is_ra(ra) | is_lit(lit) | is_rc(rc) ); } +void Assembler::exthh( Register ra, Register rb, Register rc ) +{ emit_sw2_long( op_exthh | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::exthh( Register ra, int lit, Register rc ) +{ emit_sw2_long( op_exthh_l | is_ra(ra) | is_lit(lit) | is_rc(rc) ); } +void Assembler::exthw( Register ra, Register rb, Register rc ) +{ emit_sw2_long( op_exthw | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::exthw( Register ra, int lit, Register rc ) +{ emit_sw2_long( op_exthw_l | is_ra(ra) | is_lit(lit) | is_rc(rc) ); } +void Assembler::exthl( Register ra, Register rb, Register rc ) +{ emit_sw2_long( op_exthl | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::exthl( Register ra, int lit, Register rc ) +{ emit_sw2_long( op_exthl_l | is_ra(ra) | is_lit(lit) | is_rc(rc) ); } + +void Assembler::ctpop( Register rb, Register rc ) +{ emit_sw2_long( op_ctpop | is_rb(rb) | is_rc(rc) ); } +void Assembler::ctlz( Register rb, Register rc ) +{ emit_sw2_long( op_ctlz | is_rb(rb) | is_rc(rc) ); } +void Assembler::cttz( Register rb, Register rc ) +{ emit_sw2_long( op_cttz | is_rb(rb) | is_rc(rc) ); } + +void Assembler::revbh( Register rb, Register rc ) +{ sw4_only(); emit_sw2_long( op_revbh | is_rb(rb) | is_rc(rc) ); } +void Assembler::revbw( Register rb, Register rc ) +{ sw4_only(); emit_sw2_long( op_revbw | is_rb(rb) | is_rc(rc) ); } +void Assembler::revbl( Register rb, Register rc ) +{ sw4_only(); emit_sw2_long( op_revbl | is_rb(rb) | is_rc(rc) ); } +void Assembler::casw( Register ra, Register rb, Register rc ) +{ sw4_only(); emit_sw2_long( op_casw | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::casl( Register ra, Register rb, Register rc ) +{ sw4_only(); emit_sw2_long( op_casl | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } + +void Assembler::masklb( Register ra, Register rb, Register rc ) +{ emit_sw2_long( op_masklb | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::masklb( Register ra, int lit, Register rc ) +{ emit_sw2_long( op_masklb_l | is_ra(ra) | is_lit(lit) | is_rc(rc) ); } +void Assembler::masklh( Register ra, Register rb, Register rc ) +{ emit_sw2_long( op_masklh | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::masklh( Register ra, int lit, Register rc ) +{ emit_sw2_long( op_masklh_l | is_ra(ra) | is_lit(lit) | is_rc(rc) ); } +void Assembler::masklw( Register ra, Register rb, Register rc ) +{ emit_sw2_long( op_masklw | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::masklw( Register ra, int lit, Register rc ) +{ emit_sw2_long( op_masklw_l | is_ra(ra) | is_lit(lit) | is_rc(rc) ); } +void Assembler::maskll( Register ra, Register rb, Register rc ) +{ emit_sw2_long( op_maskll | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::maskll( Register ra, int lit, Register rc ) +{ emit_sw2_long( op_maskll_l | is_ra(ra) | is_lit(lit) | is_rc(rc) ); } +void Assembler::maskhb( Register ra, Register rb, Register rc ) +{ emit_sw2_long( op_maskhb | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::maskhb( Register ra, int lit, Register rc ) +{ emit_sw2_long( op_maskhb_l | is_ra(ra) | is_lit(lit) | is_rc(rc) ); } +void Assembler::maskhh( Register ra, Register rb, Register rc ) +{ emit_sw2_long( op_maskhh | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::maskhh( Register ra, int lit, Register rc ) +{ emit_sw2_long( op_maskhh_l | is_ra(ra) | is_lit(lit) | is_rc(rc) ); } +void Assembler::maskhw( Register ra, Register rb, Register rc ) +{ emit_sw2_long( op_maskhw | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::maskhw( Register ra, int lit, Register rc ) +{ emit_sw2_long( op_maskhw_l | is_ra(ra) | is_lit(lit) | is_rc(rc) ); } +void Assembler::maskhl( Register ra, Register rb, Register rc ) +{ emit_sw2_long( op_maskhl | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::maskhl( Register ra, int lit, Register rc ) +{ emit_sw2_long( op_maskhl_l | is_ra(ra) | is_lit(lit) | is_rc(rc) ); } + +void Assembler::zap( Register ra, Register rb, Register rc ) +{ emit_sw2_long( op_zap | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::zap( Register ra, int lit, Register rc ) +{ emit_sw2_long( op_zap_l | is_ra(ra) | is_lit(lit) | is_rc(rc) ); } +void Assembler::zapnot( Register ra, Register rb, Register rc ) +{ emit_sw2_long( op_zapnot | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::zapnot( Register ra, int lit, Register rc ) +{ emit_sw2_long( op_zapnot_l | is_ra(ra) | is_lit(lit) | is_rc(rc) ); } +void Assembler::sextb( Register rb, Register rc) +{ emit_sw2_long( op_sextb | is_ra(R0) | is_rb(rb) | is_rc(rc) ); } +void Assembler::sextb( int lit, Register rc ) +{ emit_sw2_long( op_sextb_l | is_ra(R0) | is_lit(lit) | is_rc(rc) ); } +void Assembler::sexth( Register rb, Register rc ) +{ emit_sw2_long( op_sexth | is_ra(R0) | is_rb(rb) | is_rc(rc) ); } +void Assembler::sexth( int lit, Register rc ) +{ emit_sw2_long( op_sexth_l | is_ra(R0) | is_lit(lit) | is_rc(rc) ); } +void Assembler::cmpgeb( Register ra, Register rb, Register rc ) +{ emit_sw2_long( op_cmpgeb | is_ra(ra) | is_rb(rb) | is_rc(rc) ); } +void Assembler::cmpgeb( Register ra, int lit, Register rc ) +{ emit_sw2_long( op_cmpgeb_l | is_ra(ra) | is_lit(lit) | is_rc(rc) ); } +void Assembler::fimovs( FloatRegister fa, Register rc ) // For sw4a SQData +{ emit_sw2_long( op_fimovs | is_fa(fa) | is_rc(rc) ); } +void Assembler::fimovd( FloatRegister fa, Register rc ) // For sw4a SQData +{ emit_sw2_long( op_fimovd | is_fa(fa) | is_rc(rc) ); } + +void Assembler::seleq( Register ra, Register rb,Register r3, Register rc ) +{ emit_sw2_long( op_seleq | is_ra(ra) | is_rb(rb) | is_r3(r3) | is_rc(rc) ); } +void Assembler::seleq( Register ra, int lit, Register r3,Register rc ) +{ emit_sw2_long( op_seleq_l | is_ra(ra) | is_lit(lit) | is_r3(r3) | is_rc(rc) ); } +void Assembler::selge( Register ra, Register rb,Register r3, Register rc ) +{ emit_sw2_long( op_selge | is_ra(ra) | is_rb(rb) | is_r3(r3) | is_rc(rc) ); } +void Assembler::selge( Register ra, int lit, Register r3,Register rc ) +{ emit_sw2_long( op_selge_l | is_ra(ra) | is_lit(lit) | is_r3(r3) | is_rc(rc) ); } +void Assembler::selgt( Register ra, Register rb,Register r3, Register rc ) +{ emit_sw2_long( op_selgt | is_ra(ra) | is_rb(rb) | is_r3(r3) | is_rc(rc) ); } +void Assembler::selgt( Register ra, int lit, Register r3,Register rc ) +{ emit_sw2_long( op_selgt_l | is_ra(ra) | is_lit(lit) | is_r3(r3) | is_rc(rc) ); } +void Assembler::selle( Register ra, Register rb,Register r3, Register rc ) +{ emit_sw2_long( op_selle | is_ra(ra) | is_rb(rb) | is_r3(r3) | is_rc(rc) ); } +void Assembler::selle( Register ra, int lit, Register r3,Register rc ) +{ emit_sw2_long( op_selle_l | is_ra(ra) | is_lit(lit) | is_r3(r3) | is_rc(rc) ); } +void Assembler::sellt( Register ra, Register rb,Register r3, Register rc ) +{ emit_sw2_long( op_sellt | is_ra(ra) | is_rb(rb) | is_r3(r3) | is_rc(rc) ); } +void Assembler::sellt( Register ra, int lit, Register r3,Register rc ) +{ emit_sw2_long( op_sellt_l | is_ra(ra) | is_lit(lit) | is_r3(r3) | is_rc(rc) ); } +void Assembler::selne( Register ra, Register rb,Register r3, Register rc ) +{ emit_sw2_long( op_selne | is_ra(ra) | is_rb(rb) | is_r3(r3) | is_rc(rc) ); } +void Assembler::selne( Register ra, int lit, Register r3,Register rc ) +{ emit_sw2_long( op_selne_l | is_ra(ra) | is_lit(lit) | is_r3(r3) | is_rc(rc) ); } +void Assembler::sellbc( Register ra, Register rb,Register r3, Register rc ) +{ emit_sw2_long( op_sellbc | is_ra(ra) | is_rb(rb) | is_r3(r3) | is_rc(rc) ); } +void Assembler::sellbc( Register ra, int lit, Register r3,Register rc ) +{ emit_sw2_long( op_sellbc_l | is_ra(ra) | is_lit(lit) | is_r3(r3) | is_rc(rc) ); } +void Assembler::sellbs( Register ra, Register rb,Register r3, Register rc ) +{ emit_sw2_long( op_sellbs | is_ra(ra) | is_rb(rb) | is_r3(r3) | is_rc(rc) ); } +void Assembler::sellbs( Register ra, int lit, Register r3,Register rc ) +{ emit_sw2_long( op_sellbs_l | is_ra(ra) | is_lit(lit) | is_r3(r3) | is_rc(rc) ); } + +void Assembler::vlog( int vlog ,FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc ) +{ emit_sw2_long( op_vlog | is_vlog_h(vlog) | is_vlog_l(vlog) | is_fa(fa) | is_fb(fb) | is_f3(f3) | is_fc(fc) ); } +void Assembler::vbisw( FloatRegister fa , FloatRegister fb , FloatRegister fc ) +{ emit_sw2_long( op_vbisw | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vxorw( FloatRegister fa , FloatRegister fb , FloatRegister fc ) +{ emit_sw2_long( op_vxorw | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vandw( FloatRegister fa , FloatRegister fb , FloatRegister fc ) +{ emit_sw2_long( op_vandw | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::veqvw( FloatRegister fa , FloatRegister fb , FloatRegister fc ) +{ emit_sw2_long( op_veqvw | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vornotw( FloatRegister fa , FloatRegister fb , FloatRegister fc ) +{ emit_sw2_long( op_vornotw | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vbicw( FloatRegister fa , FloatRegister fb , FloatRegister fc ) +{ emit_sw2_long( op_vbicw | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } + +void Assembler::fadds( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ emit_sw2_long( op_fadds | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::faddd( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ emit_sw2_long( op_faddd | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::fsubs( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ emit_sw2_long( op_fsubs | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::fsubd( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ emit_sw2_long( op_fsubd | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::fmuls( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ emit_sw2_long( op_fmuls | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::fmuld( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ emit_sw2_long( op_fmuld | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::fdivs( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ emit_sw2_long( op_fdivs | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::fdivd( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ emit_sw2_long( op_fdivd | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::fsqrts( FloatRegister fb, FloatRegister fc ) +{ emit_sw2_long( op_fsqrts | is_fb(fb) | is_fc(fc) ); } +void Assembler::fsqrtd( FloatRegister fb, FloatRegister fc ) +{ emit_sw2_long( op_fsqrtd | is_fb(fb) | is_fc(fc) ); } + +void Assembler::fcmpeq( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ emit_sw2_long( op_fcmpeq | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::fcmple( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ emit_sw2_long( op_fcmple | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::fcmplt( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ emit_sw2_long( op_fcmplt | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::fcmpun( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ emit_sw2_long( op_fcmpun | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } + +void Assembler::fcvtsd( FloatRegister fb, FloatRegister fc ) +{ emit_sw2_long( op_fcvtsd | is_fb(fb) | is_fc(fc) ); } +void Assembler::fcvtds( FloatRegister fb, FloatRegister fc ) +{ emit_sw2_long( op_fcvtds | is_fb(fb) | is_fc(fc) ); } +void Assembler::fcvtdl_g( FloatRegister fb, FloatRegister fc ) //lx_fcvtdl +{ emit_sw2_long( op_fcvtdl_g | is_fb(fb) | is_fc(fc) ); } +void Assembler::fcvtdl_p( FloatRegister fb, FloatRegister fc ) +{ emit_sw2_long( op_fcvtdl_p | is_fb(fb) | is_fc(fc) ); } +void Assembler::fcvtdl_z( FloatRegister fb, FloatRegister fc ) +{ emit_sw2_long( op_fcvtdl_z | is_fb(fb) | is_fc(fc) ); } +void Assembler::fcvtdl_n( FloatRegister fb, FloatRegister fc ) //lx_fcvtdl +{ emit_sw2_long( op_fcvtdl_n | is_fb(fb) | is_fc(fc) ); } +void Assembler::fcvtdl( FloatRegister fb, FloatRegister fc ) +{ emit_sw2_long( op_fcvtdl | is_fb(fb) | is_fc(fc) ); } +void Assembler::fcvtwl( FloatRegister fb, FloatRegister fc ) +{ emit_sw2_long( op_fcvtwl | is_fb(fb) | is_fc(fc) ); } +void Assembler::fcvtlw( FloatRegister fb, FloatRegister fc ) +{ emit_sw2_long( op_fcvtlw | is_fb(fb) | is_fc(fc) ); } +void Assembler::fcvtls( FloatRegister fb, FloatRegister fc ) +{ emit_sw2_long( op_fcvtls | is_fb(fb) | is_fc(fc) ); } +void Assembler::fcvtld( FloatRegister fb, FloatRegister fc ) +{ emit_sw2_long( op_fcvtld | is_fb(fb) | is_fc(fc) ); } + +void Assembler::fcpys( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ emit_sw2_long( op_fcpys | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::fcpyse( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ emit_sw2_long( op_fcpyse | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::fcpysn( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ emit_sw2_long( op_fcpysn | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::ifmovs( Register ra, FloatRegister fc ) +{ emit_sw2_long( op_ifmovs | is_ra(ra) | is_fc(fc) ); } +void Assembler::ifmovd( Register ra, FloatRegister fc ) +{ emit_sw2_long( op_ifmovd | is_ra(ra) | is_fc(fc) ); } +//cmov +void Assembler::cmovdl( Register rc, FloatRegister fb ) +{ sw4_only(); emit_sw2_long( op_cmovdl | is_fb(fb) | is_rc(rc) ); } +void Assembler::cmovdl_g( Register rc, FloatRegister fb ) +{ sw4_only(); emit_sw2_long( op_cmovdl_g | is_fb(fb) | is_rc(rc) ); } +void Assembler::cmovdl_p( Register rc, FloatRegister fb ) +{ sw4_only(); emit_sw2_long( op_cmovdl_p | is_fb(fb) | is_rc(rc) ); } +void Assembler::cmovdl_z( Register rc, FloatRegister fb ) +{ sw4_only(); emit_sw2_long( op_cmovdl_z | is_fb(fb) | is_rc(rc) ); } +void Assembler::cmovdl_n( Register rc, FloatRegister fb ) +{ sw4_only(); emit_sw2_long( op_cmovdl_n | is_fb(fb) | is_rc(rc) ); } + +void Assembler::cmovdlu( Register rc, FloatRegister fb ) +{ sw4_only(); emit_sw2_long( op_cmovdlu | is_fb(fb) | is_rc(rc) ); } +void Assembler::cmovdlu_g( Register rc, FloatRegister fb ) +{ sw4_only(); emit_sw2_long( op_cmovdlu_g | is_fb(fb) | is_rc(rc) ); } +void Assembler::cmovdlu_p( Register rc, FloatRegister fb ) +{ sw4_only(); emit_sw2_long( op_cmovdlu_p | is_fb(fb) | is_rc(rc) ); } +void Assembler::cmovdlu_z( Register rc, FloatRegister fb ) +{ sw4_only(); emit_sw2_long( op_cmovdlu_z | is_fb(fb) | is_rc(rc) ); } +void Assembler::cmovdlu_n( Register rc, FloatRegister fb ) +{ sw4_only(); emit_sw2_long( op_cmovdlu_n | is_fb(fb) | is_rc(rc) ); } + +void Assembler::cmovdw( Register rc, FloatRegister fb ) +{ sw4_only(); emit_sw2_long( op_cmovdw | is_fb(fb) | is_rc(rc) ); } +void Assembler::cmovdw_g( Register rc, FloatRegister fb ) +{ sw4_only(); emit_sw2_long( op_cmovdw_g | is_fb(fb) | is_rc(rc) ); } +void Assembler::cmovdw_p( Register rc, FloatRegister fb ) +{ sw4_only(); emit_sw2_long( op_cmovdw_p | is_fb(fb) | is_rc(rc) ); } +void Assembler::cmovdw_z( Register rc, FloatRegister fb ) +{ sw4_only(); emit_sw2_long( op_cmovdw_z | is_fb(fb) | is_rc(rc) ); } +void Assembler::cmovdw_n( Register rc, FloatRegister fb ) +{ sw4_only(); emit_sw2_long( op_cmovdw_n | is_fb(fb) | is_rc(rc) ); } + +void Assembler::cmovdwu( Register rc, FloatRegister fb ) +{ sw4_only(); emit_sw2_long( op_cmovdwu | is_fb(fb) | is_rc(rc) ); } +void Assembler::cmovdwu_g( Register rc, FloatRegister fb ) +{ sw4_only(); emit_sw2_long( op_cmovdwu_g | is_fb(fb) | is_rc(rc) ); } +void Assembler::cmovdwu_p( Register rc, FloatRegister fb ) +{ sw4_only(); emit_sw2_long( op_cmovdwu_p | is_fb(fb) | is_rc(rc) ); } +void Assembler::cmovdwu_z( Register rc, FloatRegister fb ) +{ sw4_only(); emit_sw2_long( op_cmovdwu_z | is_fb(fb) | is_rc(rc) ); } +void Assembler::cmovdwu_n( Register rc, FloatRegister fb ) +{ sw4_only(); emit_sw2_long( op_cmovdwu_n | is_fb(fb) | is_rc(rc) ); } + +void Assembler::cmovls( FloatRegister fc, Register rb ) +{ sw4_only(); emit_sw2_long( op_cmovls | is_rb(rb) | is_fc(fc) ); } +void Assembler::cmovld( FloatRegister fc, Register rb ) +{ sw4_only(); emit_sw2_long( op_cmovld | is_rb(rb) | is_fc(fc) ); } +void Assembler::cmovuls( FloatRegister fc, Register rb ) +{ sw4_only(); emit_sw2_long( op_cmovuls | is_rb(rb) | is_fc(fc) ); } +void Assembler::cmovuld( FloatRegister fc, Register rb ) +{ sw4_only(); emit_sw2_long( op_cmovuld | is_rb(rb) | is_fc(fc) ); } +void Assembler::cmovws( FloatRegister fc, Register rb ) +{ sw4_only(); emit_sw2_long( op_cmovws | is_rb(rb) | is_fc(fc) ); } +void Assembler::cmovwd( FloatRegister fc, Register rb ) +{ sw4_only(); emit_sw2_long( op_cmovwd | is_rb(rb) | is_fc(fc) ); } +void Assembler::cmovuws( FloatRegister fc, Register rb ) +{ sw4_only(); emit_sw2_long( op_cmovuws | is_rb(rb) | is_fc(fc) ); } +void Assembler::cmovuwd( FloatRegister fc, Register rb ) +{ sw4_only(); emit_sw2_long( op_cmovuwd | is_rb(rb) | is_fc(fc) ); } + +void Assembler::rfpcr( FloatRegister fa) +{ emit_sw2_long( op_rfpcr | is_fa(fa) ); } +void Assembler::wfpcr( FloatRegister fa) +{ emit_sw2_long( op_wfpcr | is_fa(fa) ); } + +void Assembler::setfpec0() { emit_sw2_long( op_setfpec0 ); } +void Assembler::setfpec1() { emit_sw2_long( op_setfpec1 ); } +void Assembler::setfpec2() { emit_sw2_long( op_setfpec2 ); } +void Assembler::setfpec3() { emit_sw2_long( op_setfpec3 ); } + +void Assembler::frecs( FloatRegister fa, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_frecs | is_fa(fa) | is_fc(fc) ); } +void Assembler::frecd( FloatRegister fa, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_frecd | is_fa(fa) | is_fc(fc) ); } +void Assembler::fris( FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_fris | is_fb(fb) | is_fc(fc) ); } +void Assembler::fris_g( FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_fris_g | is_fb(fb) | is_fc(fc) ); } +void Assembler::fris_p( FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_fris_p | is_fb(fb) | is_fc(fc) ); } +void Assembler::fris_z( FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_fris_z | is_fb(fb) | is_fc(fc) ); } +void Assembler::fris_n( FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_fris_n | is_fb(fb) | is_fc(fc) ); } +void Assembler::frid( FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_frid | is_fb(fb) | is_fc(fc) ); } +void Assembler::frid_g( FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_frid_g | is_fb(fb) | is_fc(fc) ); } +void Assembler::frid_p( FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_frid_p | is_fb(fb) | is_fc(fc) ); } +void Assembler::frid_z( FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_frid_z | is_fb(fb) | is_fc(fc) ); } +void Assembler::frid_n( FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_frid_n | is_fb(fb) | is_fc(fc) ); } + +void Assembler::fmas( FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc ) +{ emit_sw2_long( op_fmas | is_fa(fa) | is_fb(fb) | is_f3(f3) | is_fc(fc) ); } +void Assembler::fmad( FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc ) +{ emit_sw2_long( op_fmad | is_fa(fa) | is_fb(fb) | is_f3(f3) | is_fc(fc) ); } +void Assembler::fmss( FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc ) +{ emit_sw2_long( op_fmss | is_fa(fa) | is_fb(fb) | is_f3(f3) | is_fc(fc) ); } +void Assembler::fmsd( FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc ) +{ emit_sw2_long( op_fmsd | is_fa(fa) | is_fb(fb) | is_f3(f3) | is_fc(fc) ); } +void Assembler::fnmas( FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc ) +{ emit_sw2_long( op_fnmas | is_fa(fa) | is_fb(fb) | is_f3(f3) | is_fc(fc) ); } +void Assembler::fnmad( FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc ) +{ emit_sw2_long( op_fnmad | is_fa(fa) | is_fb(fb) | is_f3(f3) | is_fc(fc) ); } +void Assembler::fnmss( FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc ) +{ emit_sw2_long( op_fnmss | is_fa(fa) | is_fb(fb) | is_f3(f3) | is_fc(fc) ); } +void Assembler::fnmsd( FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc ) +{ emit_sw2_long( op_fnmsd | is_fa(fa) | is_fb(fb) | is_f3(f3) | is_fc(fc) ); } + +void Assembler::fseleq( FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc ) +{ emit_sw2_long( op_fseleq | is_fa(fa) | is_fb(fb) | is_f3(f3) | is_fc(fc) ); } +void Assembler::fselne( FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc ) +{ emit_sw2_long( op_fselne | is_fa(fa) | is_fb(fb) | is_f3(f3) | is_fc(fc) ); } +void Assembler::fsellt( FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc ) +{ emit_sw2_long( op_fsellt | is_fa(fa) | is_fb(fb) | is_f3(f3) | is_fc(fc) ); } +void Assembler::fselle( FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc ) +{ emit_sw2_long( op_fselle | is_fa(fa) | is_fb(fb) | is_f3(f3) | is_fc(fc) ); } +void Assembler::fselgt( FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc ) +{ emit_sw2_long( op_fselgt | is_fa(fa) | is_fb(fb) | is_f3(f3) | is_fc(fc) ); } +void Assembler::fselge( FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc ) +{ emit_sw2_long( op_fselge | is_fa(fa) | is_fb(fb) | is_f3(f3) | is_fc(fc) ); } + +void Assembler::vaddw( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ emit_sw2_long( op_vaddw | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vaddw( FloatRegister fa, int lit, FloatRegister fc ) +{ emit_sw2_long( op_vaddw_l | is_fa(fa) | is_lit(lit) | is_fc(fc) ); } +void Assembler::vsubw( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ emit_sw2_long( op_vsubw | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vsubw( FloatRegister fa, int lit, FloatRegister fc ) +{ emit_sw2_long( op_vsubw_l | is_fa(fa) | is_lit(lit) | is_fc(fc) ); } + +void Assembler::vcmpgew( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ emit_sw2_long( op_vcmpgew | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vcmpgew( FloatRegister fa, int lit, FloatRegister fc ) +{ emit_sw2_long( op_vcmpgew_l | is_fa(fa) | is_lit(lit) | is_fc(fc) ); } +void Assembler::vcmpeqw( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ emit_sw2_long( op_vcmpeqw | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vcmpeqw( FloatRegister fa, int lit, FloatRegister fc ) +{ emit_sw2_long( op_vcmpeqw_l | is_fa(fa) | is_lit(lit) | is_fc(fc) ); } +void Assembler::vcmplew( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ emit_sw2_long( op_vcmplew | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vcmplew( FloatRegister fa, int lit, FloatRegister fc ) +{ emit_sw2_long( op_vcmplew_l | is_fa(fa) | is_lit(lit) | is_fc(fc) ); } +void Assembler::vcmpltw( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ emit_sw2_long( op_vcmpltw | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vcmpltw( FloatRegister fa, int lit, FloatRegister fc ) +{ emit_sw2_long( op_vcmpltw_l | is_fa(fa) | is_lit(lit) | is_fc(fc) ); } +void Assembler::vcmpulew( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ emit_sw2_long( op_vcmpulew | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vcmpulew( FloatRegister fa, int lit, FloatRegister fc ) +{ emit_sw2_long( op_vcmpulew_l | is_fa(fa) | is_lit(lit) | is_fc(fc) ); } +void Assembler::vcmpultw( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ emit_sw2_long( op_vcmpultw | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vcmpultw( FloatRegister fa, int lit, FloatRegister fc ) +{ emit_sw2_long( op_vcmpultw_l | is_fa(fa) | is_lit(lit) | is_fc(fc) ); } + +void Assembler::vsllw( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ emit_sw2_long( op_vsllw | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vsllw( FloatRegister fa, int lit, FloatRegister fc ) +{ emit_sw2_long( op_vsllw_l | is_fa(fa) | is_lit(lit) | is_fc(fc) ); } +void Assembler::vsrlw( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ emit_sw2_long( op_vsrlw | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vsrlw( FloatRegister fa, int lit, FloatRegister fc ) +{ emit_sw2_long( op_vsrlw_l | is_fa(fa) | is_lit(lit) | is_fc(fc) ); } +void Assembler::vsraw( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ emit_sw2_long( op_vsraw | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vsraw( FloatRegister fa, int lit, FloatRegister fc ) +{ emit_sw2_long( op_vsraw_l | is_fa(fa) | is_lit(lit) | is_fc(fc) ); } +void Assembler::vrolw( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ emit_sw2_long( op_vrolw | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vrolw( FloatRegister fa, int lit, FloatRegister fc ) +{ emit_sw2_long( op_vrolw_l | is_fa(fa) | is_lit(lit) | is_fc(fc) ); } +void Assembler::sllow( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ emit_sw2_long( op_sllow | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::sllow( FloatRegister fa, int lit, FloatRegister fc ) +{ emit_sw2_long( op_sllow_l | is_fa(fa) | is_lit(lit) | is_fc(fc) ); } +void Assembler::srlow( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ emit_sw2_long( op_srlow | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::srlow( FloatRegister fa, int lit, FloatRegister fc ) +{ emit_sw2_long( op_srlow_l | is_fa(fa) | is_lit(lit) | is_fc(fc) ); } + +void Assembler::vaddl( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ emit_sw2_long( op_vaddl | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vaddl( FloatRegister fa, int lit, FloatRegister fc ) +{ emit_sw2_long( op_vaddl_l | is_fa(fa) | is_lit(lit) | is_fc(fc) ); } +void Assembler::vsubl( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ emit_sw2_long( op_vsubl | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vsubl( FloatRegister fa, int lit, FloatRegister fc ) +{ emit_sw2_long( op_vsubl_l | is_fa(fa) | is_lit(lit) | is_fc(fc) ); } + +void Assembler::vsllb( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vsllb | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vsllb( FloatRegister fa, int lit, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vsllb_l | is_fa(fa) | is_lit(lit) | is_fc(fc) ); } +void Assembler::vsrlb( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vsrlb | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vsrlb( FloatRegister fa, int lit, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vsrlb_l | is_fa(fa) | is_lit(lit) | is_fc(fc) ); } +void Assembler::vsrab( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vsrab | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vsrab( FloatRegister fa, int lit, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vsrab_l | is_fa(fa) | is_lit(lit) | is_fc(fc) ); } +void Assembler::vrolb( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vrolb | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vrolb( FloatRegister fa, int lit, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vrolb_l | is_fa(fa) | is_lit(lit) | is_fc(fc) ); } +void Assembler::vsllh( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vsllh | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vsllh( FloatRegister fa, int lit, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vsllh_l | is_fa(fa) | is_lit(lit) | is_fc(fc) ); } +void Assembler::vsrlh( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vsrlh | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vsrlh( FloatRegister fa, int lit, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vsrlh_l | is_fa(fa) | is_lit(lit) | is_fc(fc) ); } +void Assembler::vsrah( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vsrah | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vsrah( FloatRegister fa, int lit, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vsrah_l | is_fa(fa) | is_lit(lit) | is_fc(fc) ); } +void Assembler::vrolh( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vrolh | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vrolh( FloatRegister fa, int lit, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vrolh_l | is_fa(fa) | is_lit(lit) | is_fc(fc) ); } + +void Assembler::ctpopow( FloatRegister fa, FloatRegister fc ) +{ emit_sw2_long( op_ctpopow | is_fa(fa) | is_fc(fc) ); } +void Assembler::ctlzow( FloatRegister fa, FloatRegister fc ) +{ emit_sw2_long( op_ctlzow | is_fa(fa) | is_fc(fc) ); } + +void Assembler::vslll( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vslll | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vslll( FloatRegister fa, int lit, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vslll_l | is_fa(fa) | is_lit(lit) | is_fc(fc) ); } +void Assembler::vsrll( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vsrll | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vsrll( FloatRegister fa, int lit, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vsrll_l | is_fa(fa) | is_lit(lit) | is_fc(fc) ); } +void Assembler::vsral( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vsral | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vsral( FloatRegister fa, int lit, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vsral_l | is_fa(fa) | is_lit(lit) | is_fc(fc) ); } +void Assembler::vroll( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vroll | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vroll( FloatRegister fa, int lit, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vroll_l | is_fa(fa) | is_lit(lit) | is_fc(fc) ); } +void Assembler::vmaxb( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vmaxb | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vminb( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vminb | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } + +void Assembler::vucaddw( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ emit_sw2_long( op_vucaddw | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vucaddw( FloatRegister fa, int lit, FloatRegister fc ) +{ emit_sw2_long( op_vucaddw_l | is_fa(fa) | is_lit(lit) | is_fc(fc) ); } +void Assembler::vucsubw( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ emit_sw2_long( op_vucsubw | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vucsubw( FloatRegister fa, int lit, FloatRegister fc ) +{ emit_sw2_long( op_vucsubw_l | is_fa(fa) | is_lit(lit) | is_fc(fc) ); } +void Assembler::vucaddh( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ emit_sw2_long( op_vucaddh | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vucaddh( FloatRegister fa, int lit, FloatRegister fc ) +{ emit_sw2_long( op_vucaddh_l | is_fa(fa) | is_lit(lit) | is_fc(fc) ); } +void Assembler::vucsubh( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ emit_sw2_long( op_vucsubh | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vucsubh( FloatRegister fa, int lit, FloatRegister fc ) +{ emit_sw2_long( op_vucsubh_l | is_fa(fa) | is_lit(lit) | is_fc(fc) ); } +void Assembler::vucaddb( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ emit_sw2_long( op_vucaddb | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vucaddb( FloatRegister fa, int lit, FloatRegister fc ) +{ emit_sw2_long( op_vucaddb_l | is_fa(fa) | is_lit(lit) | is_fc(fc) ); } +void Assembler::vucsubb( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ emit_sw2_long( op_vucsubb | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vucsubb( FloatRegister fa, int lit, FloatRegister fc ) +{ emit_sw2_long( op_vucsubb_l | is_fa(fa) | is_lit(lit) | is_fc(fc) ); } + +void Assembler::sraow( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_sraow | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::sraow( FloatRegister fa, int lit, FloatRegister fc ) +{ sw4_only(); emit_sw2_long(op_sraow_l | is_fa(fa) | is_lit(lit) | is_fc(fc) ); } +void Assembler::vsumw( FloatRegister fa, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vsumw | is_fa(fa) | is_fc(fc) ); } +void Assembler::vsuml( FloatRegister fa, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vsuml | is_fa(fa) | is_fc(fc) ); } +void Assembler::vcmpueqb( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vcmpueqb | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vcmpueqb( FloatRegister fa, int lit, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vcmpueqb_l | is_fa(fa) | is_lit(lit) | is_fc(fc) ); } +void Assembler::vcmpugtb( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vcmpugtb | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vcmpugtb( FloatRegister fa, int lit, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vcmpugtb_l | is_fa(fa) | is_lit(lit) | is_fc(fc) ); } +void Assembler::vmaxh( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vmaxh | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vminh( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vminh | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vmaxw( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vmaxw | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vminw( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vminw | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vmaxl( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vmaxl | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vminl( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vminl | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vumaxb( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vumaxb | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vuminb( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vuminb | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vumaxh( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vumaxh | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vuminh( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vuminh | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vumaxw( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vumaxw | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vuminw( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vuminw | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vumaxl( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vumaxl | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vuminl( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vuminl | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } + +void Assembler::vsm3msw( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vsm3msw | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vsm4key( FloatRegister fa, int lit, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vsm4key_l | is_fa(fa) | is_lit(lit) | is_fc(fc) ); } +void Assembler::vsm4r( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vsm4r | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vbinvw( FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vbinvw | is_fb(fb) | is_fc(fc) ); } + +void Assembler::vadds( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ emit_sw2_long( op_vadds | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vaddd( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ emit_sw2_long( op_vaddd | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vsubs( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ emit_sw2_long( op_vsubs | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vsubd( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ emit_sw2_long( op_vsubd | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vmuls( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ emit_sw2_long( op_vmuls | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vmuld( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ emit_sw2_long( op_vmuld | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vdivs( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ emit_sw2_long( op_vdivs | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vdivd( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ emit_sw2_long( op_vdivd | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vsqrts( FloatRegister fb, FloatRegister fc ) +{ emit_sw2_long( op_vsqrts | is_fb(fb) | is_fc(fc) ); } +void Assembler::vsqrtd( FloatRegister fb, FloatRegister fc ) +{ emit_sw2_long( op_vsqrtd | is_fb(fb) | is_fc(fc) ); } + +void Assembler::vfcmpeq( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ emit_sw2_long( op_vfcmpeq | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vfcmple( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ emit_sw2_long( op_vfcmple | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vfcmplt( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ emit_sw2_long( op_vfcmplt | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vfcmpun( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ emit_sw2_long( op_vfcmpun | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vcpys( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ emit_sw2_long( op_vcpys | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vfmov( FloatRegister fa, FloatRegister fc ) +{ emit_sw2_long( op_vcpys | is_fa(fa) | is_fb(fa) | is_fc(fc) ); } +void Assembler::vcpyse( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ emit_sw2_long( op_vcpyse | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vcpysn( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ emit_sw2_long( op_vcpysn | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } + +void Assembler::vsums( FloatRegister fa, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vsums | is_fa(fa) | is_fc(fc) ); } +void Assembler::vsumd( FloatRegister fa, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vsumd | is_fa(fa) | is_fc(fc) ); } +void Assembler::vfcvtsd( FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vfcvtsd | is_fb(fb) | is_fc(fc) ); } +void Assembler::vfcvtds( FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vfcvtds | is_fb(fb) | is_fc(fc) ); } +void Assembler::vfcvtls( FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vfcvtls | is_fb(fb) | is_fc(fc) ); } +void Assembler::vfcvtld( FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vfcvtld | is_fb(fb) | is_fc(fc) ); } +void Assembler::vfcvtdl( FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vfcvtdl | is_fb(fb) | is_fc(fc) ); } +void Assembler::vfcvtdl_g( FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vfcvtdl_g | is_fb(fb) | is_fc(fc) ); } +void Assembler::vfcvtdl_p( FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vfcvtdl_p | is_fb(fb) | is_fc(fc) ); } +void Assembler::vfcvtdl_z( FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vfcvtdl_z | is_fb(fb) | is_fc(fc) ); } +void Assembler::vfcvtdl_n( FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vfcvtdl_n | is_fb(fb) | is_fc(fc) ); } + +void Assembler::vfris( FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vfris | is_fb(fb) | is_fc(fc) ); } +void Assembler::vfris_g( FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vfris_g | is_fb(fb) | is_fc(fc) ); } +void Assembler::vfris_p( FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vfris_p | is_fb(fb) | is_fc(fc) ); } +void Assembler::vfris_z( FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vfris_z | is_fb(fb) | is_fc(fc) ); } +void Assembler::vfris_n( FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vfris_n | is_fb(fb) | is_fc(fc) ); } +void Assembler::vfrid( FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vfrid | is_fb(fb) | is_fc(fc) ); } +void Assembler::vfrid_g( FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vfrid_g | is_fb(fb) | is_fc(fc) ); } +void Assembler::vfrid_p( FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vfrid_p | is_fb(fb) | is_fc(fc) ); } +void Assembler::vfrid_z( FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vfrid_z | is_fb(fb) | is_fc(fc) ); } +void Assembler::vfrid_n( FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vfrid_n | is_fb(fb) | is_fc(fc) ); } +void Assembler::vfrecs( FloatRegister fa, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vfrecs | is_fa(fa) | is_fc(fc) ); } +void Assembler::vfrecd( FloatRegister fa, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vfrecd | is_fa(fa) | is_fc(fc) ); } +void Assembler::vmaxs( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vmaxs | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vmins( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vmins | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vmaxd( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vmaxd | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vmind( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vmind | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } + +void Assembler::vmas( FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc ) +{ emit_sw2_long( op_vmas | is_fa(fa) | is_fb(fb) | is_f3(f3) | is_fc(fc) ); } +void Assembler::vmad( FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc ) +{ emit_sw2_long( op_vmad | is_fa(fa) | is_fb(fb) | is_f3(f3) | is_fc(fc) ); } +void Assembler::vmss( FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc ) +{ emit_sw2_long( op_vmss | is_fa(fa) | is_fb(fb) | is_f3(f3) | is_fc(fc) ); } +void Assembler::vmsd( FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc ) +{ emit_sw2_long( op_vmsd | is_fa(fa) | is_fb(fb) | is_f3(f3) | is_fc(fc) ); } +void Assembler::vnmas( FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc ) +{ emit_sw2_long( op_vnmas | is_fa(fa) | is_fb(fb) | is_f3(f3) | is_fc(fc) ); } +void Assembler::vnmad( FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc ) +{ emit_sw2_long( op_vnmad | is_fa(fa) | is_fb(fb) | is_f3(f3) | is_fc(fc) ); } +void Assembler::vnmss( FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc ) +{ emit_sw2_long( op_vnmss | is_fa(fa) | is_fb(fb) | is_f3(f3) | is_fc(fc) ); } +void Assembler::vnmsd( FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc ) +{ emit_sw2_long( op_vnmsd | is_fa(fa) | is_fb(fb) | is_f3(f3) | is_fc(fc) ); } + +void Assembler::vfseleq( FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc ) +{ emit_sw2_long( op_vfseleq | is_fa(fa) | is_fb(fb) | is_f3(f3) | is_fc(fc) ); } +void Assembler::vfsellt( FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc ) +{ emit_sw2_long( op_vfsellt | is_fa(fa) | is_fb(fb) | is_f3(f3) | is_fc(fc) ); } +void Assembler::vfselle( FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc ) +{ emit_sw2_long( op_vfselle | is_fa(fa) | is_fb(fb) | is_f3(f3) | is_fc(fc) ); } +void Assembler::vseleqw( FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc ) +{ emit_sw2_long( op_vseleqw | is_fa(fa) | is_fb(fb) | is_f3(f3) | is_fc(fc) ); } +void Assembler::vseleqw( FloatRegister fa,FloatRegister fb, int fmalit, FloatRegister fc ) +{ emit_sw2_long( op_vseleqw_l | is_fa(fa) | is_fb(fb) | is_fmalit(fmalit) | is_fc(fc) ); } +void Assembler::vsellbcw( FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc ) +{ emit_sw2_long( op_vsellbcw | is_fa(fa) | is_fb(fb) | is_f3(f3) | is_fc(fc) ); } +void Assembler::vsellbcw( FloatRegister fa,FloatRegister fb, int fmalit, FloatRegister fc ) +{ emit_sw2_long( op_vsellbcw_l | is_fa(fa) | is_fb(fb) | is_fmalit(fmalit) | is_fc(fc) ); } +void Assembler::vselltw( FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc ) +{ emit_sw2_long( op_vselltw | is_fa(fa) | is_fb(fb) | is_f3(f3) | is_fc(fc) ); } +void Assembler::vselltw( FloatRegister fa,FloatRegister fb, int fmalit, FloatRegister fc ) +{ emit_sw2_long( op_vselltw_l | is_fa(fa) | is_fb(fb) | is_fmalit(fmalit) | is_fc(fc) ); } +void Assembler::vsellew( FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc ) +{ emit_sw2_long( op_vsellew | is_fa(fa) | is_fb(fb) | is_f3(f3) | is_fc(fc) ); } +void Assembler::vsellew( FloatRegister fa,FloatRegister fb, int fmalit, FloatRegister fc ) +{ emit_sw2_long( op_vsellew_l | is_fa(fa) | is_fb(fb) | is_fmalit(fmalit) | is_fc(fc) ); } + +void Assembler::vinsw( FloatRegister fa,FloatRegister fb, int fmalit, FloatRegister fc ) +{ emit_sw2_long( op_vinsw_l | is_fa(fa) | is_fb(fb) | is_fmalit(fmalit) | is_fc(fc) ); } +void Assembler::vinsf( FloatRegister fa,FloatRegister fb, int fmalit, FloatRegister fc ) +{ emit_sw2_long( op_vinsf_l | is_fa(fa) | is_fb(fb) | is_fmalit(fmalit) | is_fc(fc) ); } +void Assembler::vextw( FloatRegister fa, int fmalit, FloatRegister fc) +{ emit_sw2_long( op_vextw_l | is_fa(fa) | is_fmalit(fmalit) | is_fc(fc) ); } +void Assembler::vextf( FloatRegister fa, int fmalit, FloatRegister fc) +{ emit_sw2_long( op_vextf_l | is_fa(fa) | is_fmalit(fmalit) | is_fc(fc) ); } +void Assembler::vcpyw( FloatRegister fa, FloatRegister fc) +{ emit_sw2_long( op_vcpyw | is_fa(fa) | is_fc(fc) ); } +void Assembler::vcpyf( FloatRegister fa, FloatRegister fc) +{ emit_sw2_long( op_vcpyf | is_fa(fa) | is_fc(fc) ); } +void Assembler::vconw( FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc ) +{ emit_sw2_long( op_vconw | is_fa(fa) | is_fb(fb) | is_f3(f3) | is_fc(fc) ); } +void Assembler::vshfw( FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc ) +{ emit_sw2_long( op_vshfw | is_fa(fa) | is_fb(fb) | is_f3(f3) | is_fc(fc) ); } +void Assembler::vcons( FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc ) +{ emit_sw2_long( op_vcons | is_fa(fa) | is_fb(fb) | is_f3(f3) | is_fc(fc) ); } +void Assembler::vcond( FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc ) +{ emit_sw2_long( op_vcond | is_fa(fa) | is_fb(fb) | is_f3(f3) | is_fc(fc) ); } + +void Assembler::vinsb( FloatRegister fa,FloatRegister fb, int fmalit, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vinsb_l | is_fa(fa) | is_fb(fb) | is_fmalit(fmalit) | is_fc(fc) ); } +void Assembler::vinsh( FloatRegister fa,FloatRegister fb, int fmalit, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vinsh_l | is_fa(fa) | is_fb(fb) | is_fmalit(fmalit) | is_fc(fc) ); } +void Assembler::vinsectlh( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vinsectlh | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vinsectlw( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vinsectlw | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vinsectll( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vinsectll | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vinsectlb( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vinsectlb | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vshfq( FloatRegister fa,FloatRegister fb, int fmalit, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vshfq_l | is_fa(fa) | is_fb(fb) | is_fmalit(fmalit) | is_fc(fc) ); } +void Assembler::vshfqb( FloatRegister fa, FloatRegister fb, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vshfqb | is_fa(fa) | is_fb(fb) | is_fc(fc) ); } +void Assembler::vcpyb( FloatRegister fa, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vcpyb | is_fa(fa) | is_fc(fc) ); } +void Assembler::vcpyh( FloatRegister fa, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vcpyh | is_fa(fa) | is_fc(fc) ); } +void Assembler::vsm3r( FloatRegister fa,FloatRegister fb, int fmalit, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vsm3r_l | is_fa(fa) | is_fb(fb) | is_fmalit(fmalit) | is_fc(fc) ); } +void Assembler::vfcvtsh( FloatRegister fa,FloatRegister fb, int fmalit, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vfcvtsh_l | is_fa(fa) | is_fb(fb) | is_fmalit(fmalit) | is_fc(fc) ); } +void Assembler::vfcvths( FloatRegister fa,FloatRegister fb, int fmalit, FloatRegister fc ) +{ sw4_only(); emit_sw2_long( op_vfcvths_l | is_fa(fa) | is_fb(fb) | is_fmalit(fmalit) | is_fc(fc) ); } + +void Assembler::vldw_u( FloatRegister fa, int atmdisp, Register rb ) +{ emit_sw2_long( op_vldw_u | is_fa(fa) | is_atmdisp(atmdisp) | is_rb(rb) ); } +void Assembler::vstw_u( FloatRegister fa, int atmdisp, Register rb ) +{ emit_sw2_long( op_vstw_u | is_fa(fa) | is_atmdisp(atmdisp) | is_rb(rb) ); } +void Assembler::vlds_u( FloatRegister fa, int atmdisp, Register rb ) +{ emit_sw2_long( op_vsts_u | is_fa(fa) | is_atmdisp(atmdisp) | is_rb(rb) ); } +void Assembler::vsts_u( FloatRegister fa, int atmdisp, Register rb ) +{ emit_sw2_long( op_vsts_u | is_fa(fa) | is_atmdisp(atmdisp) | is_rb(rb) ); } +void Assembler::vldd_u( FloatRegister fa, int atmdisp, Register rb ) +{ emit_sw2_long( op_vldd_u | is_fa(fa) | is_atmdisp(atmdisp) | is_rb(rb) ); } +void Assembler::vstd_u( FloatRegister fa, int atmdisp, Register rb ) +{ emit_sw2_long( op_vstd_u | is_fa(fa) | is_atmdisp(atmdisp) | is_rb(rb) ); } +void Assembler::vstw_ul( FloatRegister fa, int atmdisp, Register rb ) +{ emit_sw2_long( op_vstw_ul | is_fa(fa) | is_atmdisp(atmdisp) | is_rb(rb) ); } +void Assembler::vstw_uh( FloatRegister fa, int atmdisp, Register rb ) +{ emit_sw2_long( op_vstw_uh | is_fa(fa) | is_atmdisp(atmdisp) | is_rb(rb) ); } +void Assembler::vsts_ul( FloatRegister fa, int atmdisp, Register rb ) +{ emit_sw2_long( op_vsts_ul | is_fa(fa) | is_atmdisp(atmdisp) | is_rb(rb) ); } +void Assembler::vsts_uh( FloatRegister fa, int atmdisp, Register rb ) +{ emit_sw2_long( op_vsts_uh | is_fa(fa) | is_atmdisp(atmdisp) | is_rb(rb) ); } +void Assembler::vstd_ul( FloatRegister fa, int atmdisp, Register rb ) +{ emit_sw2_long( op_vstd_ul | is_fa(fa) | is_atmdisp(atmdisp) | is_rb(rb) ); } +void Assembler::vstd_uh( FloatRegister fa, int atmdisp, Register rb ) +{ emit_sw2_long( op_vstd_uh | is_fa(fa) | is_atmdisp(atmdisp) | is_rb(rb) ); } + +void Assembler::lbr( int palfn ) +{ sw4_only(); emit_sw2_long( op_lbr | is_palfn(palfn) ); } + +void Assembler::ldbu_a( Register ra, int atmdisp, Register rb ) +{ sw4_only(); emit_sw2_long( op_ldbu_a | is_ra(ra) | is_atmdisp(atmdisp) | is_rb(rb) ); } +void Assembler::ldhu_a( Register ra, int atmdisp, Register rb ) +{ sw4_only(); emit_sw2_long( op_ldhu_a | is_ra(ra) | is_atmdisp(atmdisp) | is_rb(rb) ); } +void Assembler::ldw_a( Register ra, int atmdisp, Register rb ) +{ sw4_only(); emit_sw2_long( op_ldw_a | is_ra(ra) | is_atmdisp(atmdisp) | is_rb(rb) ); } +void Assembler::ldl_a( Register ra, int atmdisp, Register rb ) +{ sw4_only(); emit_sw2_long( op_ldl_a | is_ra(ra) | is_atmdisp(atmdisp) | is_rb(rb) ); } +void Assembler::stb_a( Register ra, int atmdisp, Register rb ) +{ sw4_only(); emit_sw2_long( op_stb_a | is_ra(ra) | is_atmdisp(atmdisp) | is_rb(rb) ); } +void Assembler::sth_a( Register ra, int atmdisp, Register rb ) +{ sw4_only(); emit_sw2_long( op_sth_a | is_ra(ra) | is_atmdisp(atmdisp) | is_rb(rb) ); } +void Assembler::stw_a( Register ra, int atmdisp, Register rb ) +{ sw4_only(); emit_sw2_long( op_stw_a | is_ra(ra) | is_atmdisp(atmdisp) | is_rb(rb) ); } +void Assembler::stl_a( Register ra, int atmdisp, Register rb ) +{ sw4_only(); emit_sw2_long( op_stl_a | is_ra(ra) | is_atmdisp(atmdisp) | is_rb(rb) ); } +void Assembler::flds_a( FloatRegister fa, int atmdisp, Register rb ) +{ sw4_only(); emit_sw2_long( op_flds_a | is_fa(fa) | is_atmdisp(atmdisp) | is_rb(rb) ); } +void Assembler::fldd_a( FloatRegister fa, int atmdisp, Register rb ) +{ sw4_only(); emit_sw2_long( op_fldd_a | is_fa(fa) | is_atmdisp(atmdisp) | is_rb(rb) ); } +void Assembler::fsts_a( FloatRegister fa, int atmdisp, Register rb ) +{ sw4_only(); emit_sw2_long( op_fsts_a | is_fa(fa) | is_atmdisp(atmdisp) | is_rb(rb) ); } +void Assembler::fstd_a( FloatRegister fa, int atmdisp, Register rb ) +{ sw4_only(); emit_sw2_long( op_fstd_a | is_fa(fa) | is_atmdisp(atmdisp) | is_rb(rb) ); } + +void Assembler::dpfhr( int th, int atmdisp, Register rb ) +{ sw4_only(); emit_sw2_long( op_dpfhr | is_th(th) | is_atmdisp(atmdisp) | is_rb(rb) ); } +void Assembler::dpfhw( int th, int atmdisp, Register rb ) +{ sw4_only(); emit_sw2_long( op_dpfhw | is_th(th) | is_atmdisp(atmdisp) | is_rb(rb) ); } + +void Assembler::ldbu( Register ra, int mdisp, Register rb ) +{ emit_sw2_long( op_ldbu | is_ra(ra) | is_mdisp(mdisp) | is_rb(rb) ); } +void Assembler::ldhu( Register ra, int mdisp, Register rb ) +{ emit_sw2_long( op_ldhu | is_ra(ra) | is_mdisp(mdisp) | is_rb(rb) ); } +void Assembler::ldw( Register ra, int mdisp, Register rb ) +{ emit_sw2_long( op_ldw | is_ra(ra) | is_mdisp(mdisp) | is_rb(rb) ); } +void Assembler::ldl( Register ra, int mdisp, Register rb ) +{ emit_sw2_long( op_ldl | is_ra(ra) | is_mdisp(mdisp) | is_rb(rb) ); } +void Assembler::ldl_u( Register ra, int mdisp, Register rb ) +{ emit_sw2_long( op_ldl_u | is_ra(ra) | is_mdisp(mdisp) | is_rb(rb) ); } + +void Assembler::pri_ld( Register ra, int ev6hwdisp, Register rb ) +{ emit_sw2_long( op_pri_ld | is_ra(ra) | is_ev6hwdisp(ev6hwdisp) | is_rb(rb) ); } + +void Assembler::flds( FloatRegister fa, int mdisp, Register rb ) +{ emit_sw2_long( op_flds | is_fa(fa) | is_mdisp(mdisp) | is_rb(rb) ); } +void Assembler::fldd( FloatRegister fa, int mdisp, Register rb ) +{ emit_sw2_long( op_fldd | is_fa(fa) | is_mdisp(mdisp) | is_rb(rb) ); } +void Assembler::stb( Register ra, int mdisp, Register rb ) +{ emit_sw2_long( op_stb | is_ra(ra) | is_mdisp(mdisp) | is_rb(rb) ); } +void Assembler::sth( Register ra, int mdisp, Register rb ) +{ emit_sw2_long( op_sth | is_ra(ra) | is_mdisp(mdisp) | is_rb(rb) ); } +void Assembler::stw( Register ra, int mdisp, Register rb ) +{ emit_sw2_long( op_stw | is_ra(ra) | is_mdisp(mdisp) | is_rb(rb) ); } +void Assembler::stl( Register ra, int mdisp, Register rb ) +{ emit_sw2_long( op_stl | is_ra(ra) | is_mdisp(mdisp) | is_rb(rb) ); } +void Assembler::stl_u( Register ra, int mdisp, Register rb ) +{ emit_sw2_long( op_stl_u | is_ra(ra) | is_mdisp(mdisp) | is_rb(rb) ); } + +void Assembler::pri_st( Register ra, int ev6hwdisp, Register rb ) +{ emit_sw2_long( op_pri_st | is_ra(ra) | is_ev6hwdisp(ev6hwdisp) | is_rb(rb) ); } + +void Assembler::fsts( FloatRegister fa, int mdisp, Register rb ) +{ emit_sw2_long( op_fsts | is_fa(fa) | is_mdisp(mdisp) | is_rb(rb) ); } +void Assembler::fstd( FloatRegister fa, int mdisp, Register rb ) +{ emit_sw2_long( op_fstd | is_fa(fa) | is_mdisp(mdisp) | is_rb(rb) ); } + +void Assembler::beq( Register ra, int bdisp ) +{ emit_sw2_long( op_beq | is_ra(ra) | is_bdisp(bdisp) ); } +void Assembler::bne( Register ra, int bdisp ) +{ emit_sw2_long( op_bne | is_ra(ra) | is_bdisp(bdisp) ); } +void Assembler::blt( Register ra, int bdisp ) +{ emit_sw2_long( op_blt | is_ra(ra) | is_bdisp(bdisp) ); } +void Assembler::ble( Register ra, int bdisp ) +{ emit_sw2_long( op_ble | is_ra(ra) | is_bdisp(bdisp) ); } +void Assembler::bgt( Register ra, int bdisp ) +{ emit_sw2_long( op_bgt | is_ra(ra) | is_bdisp(bdisp) ); } +void Assembler::bge( Register ra, int bdisp ) +{ emit_sw2_long( op_bge | is_ra(ra) | is_bdisp(bdisp) ); } +void Assembler::blbc( Register ra, int bdisp ) +{ emit_sw2_long( op_blbc | is_ra(ra) | is_bdisp(bdisp) ); } +void Assembler::blbs( Register ra, int bdisp ) +{ emit_sw2_long( op_blbs | is_ra(ra) | is_bdisp(bdisp) ); } +void Assembler::fbeq( FloatRegister fa, int bdisp ) +{ emit_sw2_long( op_fbeq | is_fa(fa) | is_bdisp(bdisp) ); } +void Assembler::fbne( FloatRegister fa, int bdisp ) +{ emit_sw2_long( op_fbne | is_fa(fa) | is_bdisp(bdisp) ); } +void Assembler::fblt( FloatRegister fa, int bdisp ) +{ emit_sw2_long( op_fblt | is_fa(fa) | is_bdisp(bdisp) ); } +void Assembler::fble( FloatRegister fa, int bdisp ) +{ emit_sw2_long( op_fble | is_fa(fa) | is_bdisp(bdisp) ); } +void Assembler::fbgt( FloatRegister fa, int bdisp ) +{ emit_sw2_long( op_fbgt | is_fa(fa) | is_bdisp(bdisp) ); } +void Assembler::fbge( FloatRegister fa, int bdisp ) +{ emit_sw2_long( op_fbge | is_fa(fa) | is_bdisp(bdisp) ); } + +void Assembler::ldi( Register ra, int mdisp, Register rb ) +{ emit_sw2_long( op_ldi | is_ra(ra) | is_mdisp(mdisp) | is_rb(rb) ); } +void Assembler::ldih( Register ra, int mdisp, Register rb ) +{ emit_sw2_long( op_ldih | is_ra(ra) | is_mdisp(mdisp) | is_rb(rb) ); } + +// cache control instruction +void Assembler::s_fillcs( int mdisp, Register rb ) +{ ldw( R0, mdisp, rb); } +void Assembler::s_fillde( int mdisp, Register rb ) +{ ldl( R0, mdisp, rb); } +void Assembler::fillde( int mdisp, Register rb ) +{ flds( f31, mdisp, rb); } +void Assembler::fillde_e( int mdisp, Register rb ) +{ fldd( f31, mdisp, rb); } +void Assembler::fillcs( int mdisp, Register rb ) +{ ldwe( f31, mdisp, rb); } +void Assembler::fillcs_e( int mdisp, Register rb ) +{ ldde( f31, mdisp, rb); } +void Assembler::e_fillcs( int mdisp, Register rb ) +{ ldse( f31, mdisp, rb); } +void Assembler::e_fillde( int mdisp, Register rb ) +{ vlds( f31/*V31*/, mdisp, rb); } +void Assembler::flushd( int mdisp, Register rb ) +{ ldbu( R0, mdisp, rb); } +void Assembler::evictdl( int mdisp, Register rb ) +{ ldl_u( R0, mdisp, rb); } +void Assembler::evictdg( int mdisp, Register rb ) +{ ldhu( R0, mdisp, rb); } \ No newline at end of file diff --git a/src/hotspot/cpu/sw64/assembler_sw64.hpp b/src/hotspot/cpu/sw64/assembler_sw64.hpp new file mode 100644 index 00000000000..e6a834f44be --- /dev/null +++ b/src/hotspot/cpu/sw64/assembler_sw64.hpp @@ -0,0 +1,2010 @@ +/* + * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2019, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SW64_VM_ASSEMBLER_SW64_HPP +#define CPU_SW64_VM_ASSEMBLER_SW64_HPP + +#include "asm/register.hpp" +#include "utilities/powerOfTwo.hpp" +#include "runtime/vm_version.hpp" + +// Define some macros to help SW64 Instructions' implementation. +#define OP(x) (((x) & 0x3F) << 26) +#define PCD(oo) (OP(oo)) +#define OPMEM(oo) (OP(oo)) +#define BRA(oo) (OP(oo)) +#define OFP(oo,ff) (OP(oo) | (((ff) & 0xFF) << 5)) +#define FMA(oo,ff) (OP(oo) | (((ff) & 0x3F) << 10)) +#define MFC(oo,ff) (OP(oo) | ((ff) & 0xFFFF)) +#define OPR(oo,ff) (OP(oo) | (((ff) & 0xFF) << 5)) +#define OPRL(oo,ff) (OP(oo) | (((ff) & 0xFF) << 5)) +#define TOPR(oo,ff) (OP(oo) | (((ff) & 0x07) << 10)) +#define TOPRL(oo,ff) (OP(oo) | (((ff) & 0x07) << 10)) + +#define ATMEM(oo,h) (OP(oo) | (((h) & 0xF) << 12)) +#define PRIRET(oo,h) (OP(oo) | (((h) & 0x1) << 20)) +#define EV6HWMEM(oo,ff) (OP(oo) | (((ff) & 0xF) << 12)) +#define CSR(oo,ff) (OP(oo) | (((ff) & 0xFF) << 8)) + +#define LOGX(oo,ff) (OP(oo) | (((ff) & 0x3F) << 10)) +#define PSE_LOGX(oo,ff) (OP(oo) | (((ff) & 0x3F) << 10) | (((ff) >> 0x6) << 26 ) | 0x3E0 ) + +REGISTER_DECLARATION(Register, V0, i0); +REGISTER_DECLARATION(Register, T0, i1); +REGISTER_DECLARATION(Register, T1, i2); +REGISTER_DECLARATION(Register, T2, i3); +REGISTER_DECLARATION(Register, T3, i4); +REGISTER_DECLARATION(Register, T4, i5); +REGISTER_DECLARATION(Register, T5, i6); +REGISTER_DECLARATION(Register, T6, i7); +REGISTER_DECLARATION(Register, T7, i8); +REGISTER_DECLARATION(Register, S0, i9); +REGISTER_DECLARATION(Register, S1, i10); +REGISTER_DECLARATION(Register, S2, i11); +REGISTER_DECLARATION(Register, S3, i12); +REGISTER_DECLARATION(Register, S4, i13); +REGISTER_DECLARATION(Register, S5, i14); +REGISTER_DECLARATION(Register, FP, i15); +REGISTER_DECLARATION(Register, A0, i16); +REGISTER_DECLARATION(Register, A1, i17); +REGISTER_DECLARATION(Register, A2, i18); +REGISTER_DECLARATION(Register, A3, i19); +REGISTER_DECLARATION(Register, A4, i20); +REGISTER_DECLARATION(Register, A5, i21); +REGISTER_DECLARATION(Register, T8, i22); +REGISTER_DECLARATION(Register, T9, i23); +REGISTER_DECLARATION(Register, T10, i24); +REGISTER_DECLARATION(Register, T11, i25); +REGISTER_DECLARATION(Register, RA, i26); +REGISTER_DECLARATION(Register, T12, i27); +REGISTER_DECLARATION(Register, AT, i28); +REGISTER_DECLARATION(Register, GP, i29); +REGISTER_DECLARATION(Register, SP, i30); +REGISTER_DECLARATION(Register, R0, i31); + +REGISTER_DECLARATION(FloatRegister, F0, f0); +REGISTER_DECLARATION(FloatRegister, F1, f1); +REGISTER_DECLARATION(FloatRegister, F2, f2); +REGISTER_DECLARATION(FloatRegister, F3, f3); +REGISTER_DECLARATION(FloatRegister, F4, f4); +REGISTER_DECLARATION(FloatRegister, F5, f5); +REGISTER_DECLARATION(FloatRegister, F6, f6); +REGISTER_DECLARATION(FloatRegister, F7, f7); +REGISTER_DECLARATION(FloatRegister, F8, f8); +REGISTER_DECLARATION(FloatRegister, F9, f9); +REGISTER_DECLARATION(FloatRegister, F10, f10); +REGISTER_DECLARATION(FloatRegister, F11, f11); +REGISTER_DECLARATION(FloatRegister, F12, f12); +REGISTER_DECLARATION(FloatRegister, F13, f13); +REGISTER_DECLARATION(FloatRegister, F14, f14); +REGISTER_DECLARATION(FloatRegister, F15, f15); +REGISTER_DECLARATION(FloatRegister, F16, f16); +REGISTER_DECLARATION(FloatRegister, F17, f17); +REGISTER_DECLARATION(FloatRegister, F18, f18); +REGISTER_DECLARATION(FloatRegister, F19, f19); +REGISTER_DECLARATION(FloatRegister, F20, f20); +REGISTER_DECLARATION(FloatRegister, F21, f21); +REGISTER_DECLARATION(FloatRegister, F22, f22); +REGISTER_DECLARATION(FloatRegister, F23, f23); +REGISTER_DECLARATION(FloatRegister, F24, f24); +REGISTER_DECLARATION(FloatRegister, F25, f25); +REGISTER_DECLARATION(FloatRegister, F26, f26); +REGISTER_DECLARATION(FloatRegister, F27, f27); +REGISTER_DECLARATION(FloatRegister, F28, f28); +REGISTER_DECLARATION(FloatRegister, F29, f29); +REGISTER_DECLARATION(FloatRegister, F30, f30); +REGISTER_DECLARATION(FloatRegister, F31, f31); + +////REGISTER_DECLARATION(Register, c_rarg0, i0); +REGISTER_DECLARATION(Register, c_rarg0, A0); +REGISTER_DECLARATION(Register, c_rarg1, A1); +REGISTER_DECLARATION(Register, c_rarg2, A2); +REGISTER_DECLARATION(Register, c_rarg3, A3); +REGISTER_DECLARATION(Register, c_rarg4, A4); +REGISTER_DECLARATION(Register, c_rarg5, A5); + +REGISTER_DECLARATION(FloatRegister, c_farg0, F16); +REGISTER_DECLARATION(FloatRegister, c_farg1, F17); +REGISTER_DECLARATION(FloatRegister, c_farg2, F18); +REGISTER_DECLARATION(FloatRegister, c_farg3, F19); +REGISTER_DECLARATION(FloatRegister, c_farg4, F20); +REGISTER_DECLARATION(FloatRegister, c_farg5, F21); + +// Symbolically name the register arguments used by the Java calling convention. +// We have control over the convention for java so we can do what we please. +// What pleases us is to offset the java calling convention so that when +// we call a suitable jni method the arguments are lined up and we don't +// have to do much shuffling. A suitable jni method is non-static and a +// small number of arguments +// +// |--------------------------------------------------------------------| +// | c_rarg0 c_rarg1 c_rarg2 c_rarg3 c_rarg4 c_rarg5 c_rarg6 c_rarg7 | +// |--------------------------------------------------------------------| +// | r0 r1 r2 r3 r4 r5 r6 r7 | +// |--------------------------------------------------------------------| +// | j_rarg7 j_rarg0 j_rarg1 j_rarg2 j_rarg3 j_rarg4 j_rarg5 j_rarg6 | +// |--------------------------------------------------------------------| + + +REGISTER_DECLARATION(Register, j_rarg0, c_rarg1); +REGISTER_DECLARATION(Register, j_rarg1, c_rarg2); +REGISTER_DECLARATION(Register, j_rarg2, c_rarg3); +REGISTER_DECLARATION(Register, j_rarg3, c_rarg4); +REGISTER_DECLARATION(Register, j_rarg4, c_rarg5); +REGISTER_DECLARATION(Register, j_rarg5, c_rarg0); + +// Java floating args are passed as per C + +REGISTER_DECLARATION(FloatRegister, j_farg0, F16); +REGISTER_DECLARATION(FloatRegister, j_farg1, F17); +REGISTER_DECLARATION(FloatRegister, j_farg2, F18); +REGISTER_DECLARATION(FloatRegister, j_farg3, F19); +REGISTER_DECLARATION(FloatRegister, j_farg4, F20); +REGISTER_DECLARATION(FloatRegister, j_farg5, F21); + +// registers used to hold VM data either temporarily within a method +// or across method calls + +// volatile (caller-save) registers + +// r8 is used for indirect result location return +// we use it and r9 as scratch registers +REGISTER_DECLARATION(Register, rscratch1, T5); +REGISTER_DECLARATION(Register, rscratch2, T6); +REGISTER_DECLARATION(Register, rscratch3, T11); +REGISTER_DECLARATION(Register, rscratch4, AT); +//TODO:need delete, we should not use rscratch1_GP & rscratch2_AT, we should use rcc or rscratch4 to replace jzy +REGISTER_DECLARATION(Register, rscratch1_GP, GP); +REGISTER_DECLARATION(Register, rscratch2_AT, AT); + + +// non-volatile (callee-save) registers are r16-29 +// of which the following are dedicated global state + +// link register +REGISTER_DECLARATION(Register, lr, RA); +// frame pointer +REGISTER_DECLARATION(Register, rfp, FP); + +REGISTER_DECLARATION(Register, rbcp, S0); +REGISTER_DECLARATION(Register, rlocals, S1); +REGISTER_DECLARATION(Register, rthread, S2); +REGISTER_DECLARATION(Register, rmethod, S3); +REGISTER_DECLARATION(Register, rsender, S4); +REGISTER_DECLARATION(Register, rheapbase, S5); + +REGISTER_DECLARATION(Register, rdispatch, T8); +REGISTER_DECLARATION(Register, rnext, T10); +REGISTER_DECLARATION(Register, rmonitors, T11); +//REGISTER_DECLARATION(Register, rcpool, T12); //??? +REGISTER_DECLARATION(Register, pv, T12); // as target procedure, maybe be used as temp register + +REGISTER_DECLARATION(Register, esp, SP); +REGISTER_DECLARATION(Register, rcc, GP); + +REGISTER_DECLARATION(Register, FSR, V0); +REGISTER_DECLARATION(Register, SSR, T4); +REGISTER_DECLARATION(FloatRegister, FSF, f0); +REGISTER_DECLARATION(FloatRegister, SSF, f1); +REGISTER_DECLARATION(FloatRegister, FTF, f14); +REGISTER_DECLARATION(FloatRegister, FcmpRES, f29); //TODO:need delete jzy +REGISTER_DECLARATION(FloatRegister, fcc, f29); +REGISTER_DECLARATION(FloatRegister, fscratch1, f28); +REGISTER_DECLARATION(FloatRegister, fzero, f31); + +// x86 GPR simulation +REGISTER_DECLARATION(Register, rax, V0); +REGISTER_DECLARATION(Register, rdi, A0); +REGISTER_DECLARATION(Register, rsi, A1); +REGISTER_DECLARATION(Register, rdx, A2); +REGISTER_DECLARATION(Register, rcx, A3); +REGISTER_DECLARATION(Register, r8, A4); +REGISTER_DECLARATION(Register, r9, A5); +REGISTER_DECLARATION(Register, rbx, S3); +REGISTER_DECLARATION(Register, rbp, FP); +REGISTER_DECLARATION(Register, r12, S5); +REGISTER_DECLARATION(Register, r13, S0); +REGISTER_DECLARATION(Register, r14, S1); +REGISTER_DECLARATION(Register, r15, S2); +REGISTER_DECLARATION(Register, r10, T5); +REGISTER_DECLARATION(Register, r11, T6); +REGISTER_DECLARATION(Register, rsp, SP); + +REGISTER_DECLARATION(Register, r12_heapbase, r12); // callee-saved +REGISTER_DECLARATION(Register, r15_thread, r15); // callee-saved + +REGISTER_DECLARATION(FloatRegister, xmm0, f0);//check +REGISTER_DECLARATION(FloatRegister, xmm1, f1); +REGISTER_DECLARATION(FloatRegister, xmm2, f2); +REGISTER_DECLARATION(FloatRegister, xmm3, f3); +REGISTER_DECLARATION(FloatRegister, xmm4, f4); +REGISTER_DECLARATION(FloatRegister, xmm5, f5); +REGISTER_DECLARATION(FloatRegister, xmm6, f6); +REGISTER_DECLARATION(FloatRegister, xmm7, f7); + +#define OPT_SAFEPOINT 1 + +#define assert_cond(ARG1) assert(ARG1, #ARG1) + +class Assembler; + +class ArrayAddress; + +// Addressing modes +class Address { +public: + enum ScaleFactor { + no_scale = -1, + times_1 = 0, + times_2 = 1, + times_4 = 2, + times_8 = 3, + times_ptr = times_8 + }; + + static ScaleFactor times(int size) { + assert(size >= 1 && size <= 8 && is_power_of_2(size), "bad scale size"); + if (size == 8) return times_8; + if (size == 4) return times_4; + if (size == 2) return times_2; + return times_1; + } + + static int scale_size(ScaleFactor scale) { + assert(scale != no_scale, ""); + assert(((1 << (int)times_1) == 1 && + (1 << (int)times_2) == 2 && + (1 << (int)times_4) == 4 && + (1 << (int)times_8) == 8), ""); + return (1 << (int)scale); + } + + enum mode { base_plus_disp, base_index_scale_disp }; + + private: + Register _base; + Register _index; + Register _tmp; + ScaleFactor _scale; + long _offset; + int _disp;//why int not long? jzy + enum mode _mode; + + RelocationHolder _rspec; + + Address(int disp, address loc, relocInfo::relocType rtype); + + // Typically we use AddressLiterals we want to use their rval + // However in some situations we want the lval (effect address) of + // the item. We provide a special factory for making those lvals. + bool _is_lval; + + // If the target is far we'll need to load the ea of this to a + // register to reach it. Otherwise if near we can do PC-relative + // addressing. + address _target; + + public: +// Address() +// : _base(noreg), +// _disp(0) { +// } + + Address(Register base, Register index, ScaleFactor scale, int disp = 0) + : _base (base), + _index(index), + _scale(scale), + _disp (disp), + _mode (base_index_scale_disp), + _tmp (noreg) { + assert(!index->is_valid() == (scale == Address::no_scale), + "inconsistent address"); + } + + Address(Register base, RegisterOrConstant index, ScaleFactor scale = times_1, int disp = 0) + : _base (base), + _index(index.register_or_noreg()), + _scale(scale), + _disp (disp + (index.constant_or_zero() * scale_size(scale))), + _mode (index.is_constant() ? base_plus_disp : base_index_scale_disp), + _tmp (noreg){ + if (!index.is_register()) scale = Address::no_scale; + assert(!_index->is_valid() == (scale == Address::no_scale), + "inconsistent address"); + } + + Address(Register base = noreg, int disp = 0) + : _base(base), + _index(noreg), + _scale(no_scale), + _disp(disp), + _tmp(noreg), + _mode(base_plus_disp){ + } + + // The following overloads are used in connection with the + // ByteSize type (see sizes.hpp). They simplify the use of + // ByteSize'd arguments in assembly code. + + Address(Register base, ByteSize disp) + : Address(base, in_bytes(disp)) {} + Address(Register base, Register index, ScaleFactor scale, ByteSize disp) + : Address(base, index, scale, in_bytes(disp)) {} + Address(Register base, RegisterOrConstant index, ScaleFactor scale, ByteSize disp) + : Address(base, index, scale, in_bytes(disp)) {} + + void emit(Register ra, Assembler* as, int opcode); + void emit(FloatRegister ra, Assembler* as, int opcode); + + // accessors + bool uses(Register reg) const { return _base == reg || _index == reg; } + Register base() const { return _base; } + int disp() const { return _disp; } + Register index() const { return _index; } + ScaleFactor scale() const { return _scale; } + void setTmp(Register reg) { + _tmp = reg; + } + long offset() const { + return _offset; + } + mode getMode() const { + return _mode; + } + address target() const { return _target; } + const RelocationHolder& rspec() const { return _rspec; } + static Address make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc); + + private: + + RelocationHolder rspec_from_rtype(relocInfo::relocType rtype, address addr) { + switch (rtype) { + case relocInfo::external_word_type: + return external_word_Relocation::spec(addr); + case relocInfo::internal_word_type: + return internal_word_Relocation::spec(addr); + case relocInfo::opt_virtual_call_type: + return opt_virtual_call_Relocation::spec(); + case relocInfo::static_call_type: + return static_call_Relocation::spec(); + case relocInfo::runtime_call_type: + return runtime_call_Relocation::spec(); + case relocInfo::poll_type: + case relocInfo::poll_return_type: + return Relocation::spec_simple(rtype); + case relocInfo::none: + case relocInfo::oop_type: + // Oops are a special case. Normally they would be their own section + // but in cases like icBuffer they are literals in the code stream that + // we don't have a section for. We use none so that we get a literal address + // which is always patchable. + return RelocationHolder(); + default: + ShouldNotReachHere(); + return RelocationHolder(); + } + } + +public: + + friend class Assembler; + friend class MacroAssembler; + friend class LIR_Assembler; // base/index/scale/disp +}; + +class Argument { + private: + int _number; + public: + enum { + n_register_parameters = 6, // 6 integer registers used to pass parameters + n_float_register_parameters = 6, // 6 float registers used to pass parameters + + n_int_register_parameters_c = 6, // r0, r1, ... r7 (c_rarg0, c_rarg1, ...) + n_float_register_parameters_c = 6, // v0, v1, ... v7 (c_farg0, c_farg1, ... ) + n_int_register_parameters_j = 6, // r1, ... r7, r0 (rj_rarg0, j_rarg1, ... + n_float_register_parameters_j = 6 // v0, v1, ... v7 (j_farg0, j_farg1, .. + }; + + Argument(int number):_number(number){ } + + int number()const {return _number;} + bool is_Register()const {return _number < n_register_parameters;} + bool is_FloatRegister()const {return _number < n_float_register_parameters;} + + Register as_Register()const { + assert(is_Register(), "must be a register argument"); + return ::as_Register(A0->encoding() + _number); + } + FloatRegister as_FloatRegister()const { + assert(is_FloatRegister(), "must be a float register argument"); + return ::as_FloatRegister(F16->encoding() + _number); + } + + Address as_caller_address()const {return Address(esp, (number() - n_register_parameters) * wordSize);} +}; + +class AddressLiteral { + friend class ArrayAddress; + RelocationHolder _rspec; + // Typically we use AddressLiterals we want to use their rval + // However in some situations we want the lval (effect address) of the item. + // We provide a special factory for making those lvals. + bool _is_lval; + + // If the target is far we'll need to load the ea of this to + // a register to reach it. Otherwise if near we can do rip + // relative addressing. + + address _target; + + protected: + // creation + AddressLiteral() + : _is_lval(false), + _target(NULL) + {} + + public: + + + AddressLiteral(address target, relocInfo::relocType rtype); + + AddressLiteral(address target, RelocationHolder const& rspec) + : _rspec(rspec), + _is_lval(false), + _target(target) + {} + + AddressLiteral addr() { + AddressLiteral ret = *this; + ret._is_lval = true; + return ret; + } + + + private: + + address target() { return _target; } + bool is_lval() { return _is_lval; } + + relocInfo::relocType reloc() const { return _rspec.type(); } + const RelocationHolder& rspec() const { return _rspec; } + + friend class Assembler; + friend class MacroAssembler; + friend class Address; + friend class LIR_Assembler; +}; + +// Convience classes +class RuntimeAddress: public AddressLiteral { + + public: + + RuntimeAddress(address target) : AddressLiteral(target, relocInfo::runtime_call_type) {} + +}; + +class ExternalAddress: public AddressLiteral { + private: + static relocInfo::relocType reloc_for_target(address target) { + // Sometimes ExternalAddress is used for values which aren't + // exactly addresses, like the card table base. + // external_word_type can't be used for values in the first page + // so just skip the reloc in that case. + return external_word_Relocation::can_be_relocated(target) ? relocInfo::external_word_type : relocInfo::none; + } + + public: + + ExternalAddress(address target) : AddressLiteral(target, reloc_for_target(target)) {} + +}; + +class InternalAddress: public AddressLiteral { + + public: + + InternalAddress(address target) : AddressLiteral(target, relocInfo::internal_word_type) {} +}; + +// x86 can do array addressing as a single operation since disp can be an absolute +// address amd64 can't. We create a class that expresses the concept but does extra +// magic on amd64 to get the final result + +class ArrayAddress { + private: + + AddressLiteral _base; + Address _index; + + public: + + ArrayAddress() {}; + ArrayAddress(AddressLiteral base, Address index): _base(base), _index(index) {}; + AddressLiteral base() { return _base; } + Address index() { return _index; } + +}; + +class Assembler : public AbstractAssembler { + friend class AbstractAssembler; // for the non-virtual hack + +#ifndef PRODUCT + static const unsigned long asm_bp; + + void emit_long(jint x) { + if ((unsigned long)pc() == asm_bp) + asm volatile ("nop"); + AbstractAssembler::emit_int32(x); + } +#else + void emit_long(jint x) { + AbstractAssembler::emit_int32(x); + } +#endif + +public: + enum Condition { + zero = 0x4, + notZero = 0x5, + equal = 0x4, + notEqual = 0x5, + less = 0xc, + lessEqual = 0xe, + greater = 0xf, + greaterEqual = 0xd, + below = 0x2, + belowEqual = 0x6, + above = 0x7, + aboveEqual = 0x3, + overflow = 0x0, + noOverflow = 0x1, + carrySet = 0x2, + carryClear = 0x3, + positive = 0x9, + negative = 0x8, + notNegative = 0x10, + success = 0xa, + failed = 0xb, +// // Conditional branch (immediate) +// EQ, NE, HS, CS=HS, LO, CC=LO, MI, PL, VS, VC, HI, LS, GE, LT, GT, LE, AL, NV + }; + enum ConditionLength { + bitl = 64, + bitw = 32, + bith = 16, + bitb = 8 + }; + + enum WhichOperand { + imm_operand = 0, // embedded 32-bit|64-bit immediate operand + disp32_operand = 1, // embedded 32-bit displacement or address + call32_operand = 2, // embedded 32-bit self-relative displacement + narrow_oop_operand = 3, // embedded 32-bit immediate narrow oop + _WhichOperand_limit = 4 + }; + enum { instruction_size = 4 }; + + //---< calculate length of instruction >--- + // As instruction size can't be found out easily on x86/x64, + // we just use '4' for len and maxlen. + // instruction must start at passed address + static unsigned int instr_len(unsigned char *instr) { return 4; } + + //---< longest instructions >--- + // Max instruction length is not specified in architecture documentation. + // We could use a "safe enough" estimate (15), but just default to + // instruction length guess from above. + static unsigned int instr_maxlen() { return 4; } + +// // The maximum range of a branch is fixed for the Sw64 +// // architecture. In debug mode we shrink it in order to test +// // trampolines, but not so small that branches in the interpreter +// // are out of range. +// static const unsigned long branch_range = NOT_DEBUG(128 * M) DEBUG_ONLY(2 * M); +// +// static bool reachable_from_branch_at(address branch, address target) { +// return uabs(target - branch) < branch_range; +// } + + // Floating-point Move (immediate) +private: + unsigned pack(double value); + +public: + + Assembler(CodeBuffer* code) : AbstractAssembler(code) { +#ifdef CHECK_DELAY + delay_state = no_delay; +#endif + } + + virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, + Register tmp, + int offset) { + ShouldNotCallThis(); + return RegisterOrConstant(); + } + + // Stack overflow checking + virtual void bang_stack_with_offset(int offset); + + static bool operand_valid_for_logical_immediate(bool is32, uint64_t imm); + static bool operand_valid_for_add_sub_immediate(long imm); + static bool operand_valid_for_float_immediate(double imm); + + void emit_data64(jlong data, relocInfo::relocType rtype, int format = 0); + void emit_data64(jlong data, RelocationHolder const& rspec, int format = 0); + +public: + enum ops_mem { + op_call = OPMEM(0x01), + op_ret = OPMEM(0x02), + op_jmp = OPMEM(0x03), + op_ldwe = OPMEM(0x09), op_fillcs = op_ldwe, + op_ldse = OPMEM(0x0A), op_e_fillcs = op_ldse, + op_ldde = OPMEM(0x0B), op_fillcs_e = op_ldde, + op_vlds = OPMEM(0x0C), op_e_fillde = op_vlds, + op_vldd = OPMEM(0x0D), + op_vsts = OPMEM(0x0E), + op_vstd = OPMEM(0x0F), + op_ldbu = OPMEM(0x20), op_flushd = op_ldbu, + op_ldhu = OPMEM(0x21), op_evictdg = op_ldhu, + op_ldw = OPMEM(0x22), op_s_fillcs = op_ldw, + op_ldl = OPMEM(0x23), op_s_fillde = op_ldl, + op_ldl_u = OPMEM(0x24), op_evictdl = op_ldl_u, + op_flds = OPMEM(0x26), op_fillde = op_flds, + op_fldd = OPMEM(0x27), op_fillde_e = op_fldd, + op_stb = OPMEM(0x28), + op_sth = OPMEM(0x29), + op_stw = OPMEM(0x2A), + op_stl = OPMEM(0x2B), + op_stl_u = OPMEM(0x2C), + op_fsts = OPMEM(0x2E), + op_fstd = OPMEM(0x2F), + op_ldi = OPMEM(0x3E), + op_ldih = OPMEM(0x3F) + }; + + enum ops_atmem { + op_lldw = ATMEM(0x08, 0x0), + op_lldl = ATMEM(0x08, 0x1), + op_ldw_inc = ATMEM(0x08, 0x2), //SW2F + op_ldl_inc = ATMEM(0x08, 0x3), //SW2F + op_ldw_dec = ATMEM(0x08, 0x4), //SW2F + op_ldl_dec = ATMEM(0x08, 0x5), //SW2F + op_ldw_set = ATMEM(0x08, 0x6), //SW2F + op_ldl_set = ATMEM(0x08, 0x7), //SW2F + op_lstw = ATMEM(0x08, 0x8), + op_lstl = ATMEM(0x08, 0x9), + op_ldw_nc = ATMEM(0x08, 0xA), + op_ldl_nc = ATMEM(0x08, 0xB), + op_ldd_nc = ATMEM(0x08, 0xC), + op_stw_nc = ATMEM(0x08, 0xD), + op_stl_nc = ATMEM(0x08, 0xE), + op_std_nc = ATMEM(0x08, 0xF), + op_vldw_u = ATMEM(0x1C, 0x0), + op_vstw_u = ATMEM(0x1C, 0x1), + op_vlds_u = ATMEM(0x1C, 0x2), + op_vsts_u = ATMEM(0x1C, 0x3), + op_vldd_u = ATMEM(0x1C, 0x4), + op_vstd_u = ATMEM(0x1C, 0x5), + op_vstw_ul = ATMEM(0x1C, 0x8), + op_vstw_uh = ATMEM(0x1C, 0x9), + op_vsts_ul = ATMEM(0x1C, 0xA), + op_vsts_uh = ATMEM(0x1C, 0xB), + op_vstd_ul = ATMEM(0x1C, 0xC), + op_vstd_uh = ATMEM(0x1C, 0xD), + op_vldd_nc = ATMEM(0x1C, 0xE), + op_vstd_nc = ATMEM(0x1C, 0xF), + op_ldbu_a = ATMEM(0x1E, 0x0), //SW8A + op_ldhu_a = ATMEM(0x1E, 0x1), //SW8A + op_ldw_a = ATMEM(0x1E, 0x2), //SW8A + op_ldl_a = ATMEM(0x1E, 0x3), //SW8A + op_flds_a = ATMEM(0x1E, 0x4), //SW8A + op_fldd_a = ATMEM(0x1E, 0x5), //SW8A + op_stb_a = ATMEM(0x1E, 0x6), //SW8A + op_sth_a = ATMEM(0x1E, 0x7), //SW8A + op_stw_a = ATMEM(0x1E, 0x8), //SW8A + op_stl_a = ATMEM(0x1E, 0x9), //SW8A + op_fsts_a = ATMEM(0x1E, 0xA), //SW8A + op_fstd_a = ATMEM(0x1E, 0xB) //SW8A + }; + + enum ops_ev6hwmem { + op_pri_ld = EV6HWMEM(0x25, 0x0), + op_pri_st = EV6HWMEM(0x2D, 0x0), + }; + + enum ops_opr { + op_addw = OPR(0x10, 0x00), + op_subw = OPR(0x10, 0x01), + op_s4addw = OPR(0x10, 0x02), + op_s4subw = OPR(0x10, 0x03), + op_s8addw = OPR(0x10, 0x04), + op_s8subw = OPR(0x10, 0x05), + op_addl = OPR(0x10, 0x08), + op_subl = OPR(0x10, 0x09), + op_s4addl = OPR(0x10, 0x0A), + op_s4subl = OPR(0x10, 0x0B), + op_s8addl = OPR(0x10, 0x0C), + op_s8subl = OPR(0x10, 0x0D), + op_mulw = OPR(0x10, 0x10), + op_divw = OPR(0x10, 0x11), //SW8A + op_udivw = OPR(0x10, 0x12), //SW8A + op_remw = OPR(0x10, 0x13), //SW8A + op_uremw = OPR(0x10, 0x14), //SW8A + op_mull = OPR(0x10, 0x18), + op_umulh = OPR(0x10, 0x19), + op_divl = OPR(0x10, 0x1A), //SW8A + op_udivl = OPR(0x10, 0x1B), //SW8A + op_reml = OPR(0x10, 0x1C), //SW8A + op_ureml = OPR(0x10, 0x1D), //SW8A + op_addpi = OPR(0x10, 0x1E), //SW8A + op_addpis = OPR(0x10, 0x1F), //SW8A + op_cmpeq = OPR(0x10, 0x28), + op_cmplt = OPR(0x10, 0x29), + op_cmple = OPR(0x10, 0x2A), + op_cmpult = OPR(0x10, 0x2B), + op_cmpule = OPR(0x10, 0x2C), + op_sbt = OPR(0x10, 0x2D), //SW8A + op_cbt = OPR(0x10, 0x2E), //SW8A + op_and = OPR(0x10, 0x38), + op_bic = OPR(0x10, 0x39), + op_bis = OPR(0x10, 0x3A), + op_ornot = OPR(0x10, 0x3B), + op_xor = OPR(0x10, 0x3C), + op_eqv = OPR(0x10, 0x3D), + op_inslb = OPR(0x10, 0x40), //0x10.40~0x10.47 + op_inslh = OPR(0x10, 0x41), + op_inslw = OPR(0x10, 0x42), + op_insll = OPR(0x10, 0x43), + op_inshb = OPR(0x10, 0x44), + op_inshh = OPR(0x10, 0x45), + op_inshw = OPR(0x10, 0x46), + op_inshl = OPR(0x10, 0x47), + op_slll = OPR(0x10, 0x48), + op_srll = OPR(0x10, 0x49), + op_sral = OPR(0x10, 0x4A), + op_roll = OPR(0x10, 0x4B), //SW8A + op_sllw = OPR(0x10, 0x4C), //SW8A + op_srlw = OPR(0x10, 0x4D), //SW8A + op_sraw = OPR(0x10, 0x4E), //SW8A + op_rolw = OPR(0x10, 0x4F), //SW8A + op_extlb = OPR(0x10, 0x50), //0x10.50~0x10.57 + op_extlh = OPR(0x10, 0x51), + op_extlw = OPR(0x10, 0x52), + op_extll = OPR(0x10, 0x53), + op_exthb = OPR(0x10, 0x54), + op_exthh = OPR(0x10, 0x55), + op_exthw = OPR(0x10, 0x56), + op_exthl = OPR(0x10, 0x57), + op_ctpop = OPR(0x10, 0x58), + op_ctlz = OPR(0x10, 0x59), + op_cttz = OPR(0x10, 0x5A), + op_revbh = OPR(0x10, 0x5B), //SW8A + op_revbw = OPR(0x10, 0x5C), //SW8A + op_revbl = OPR(0x10, 0x5D), //SW8A + op_casw = OPR(0x10, 0x5E), //SW8A + op_casl = OPR(0x10, 0x5F), //SW8A + op_masklb = OPR(0x10, 0x60), //0x10.60~0x10.67 + op_masklh = OPR(0x10, 0x61), + op_masklw = OPR(0x10, 0x62), + op_maskll = OPR(0x10, 0x63), + op_maskhb = OPR(0x10, 0x64), + op_maskhh = OPR(0x10, 0x65), + op_maskhw = OPR(0x10, 0x66), + op_maskhl = OPR(0x10, 0x67), + op_zap = OPR(0x10, 0x68), + op_zapnot = OPR(0x10, 0x69), + op_sextb = OPR(0x10, 0x6A), + op_sexth = OPR(0x10, 0x6B), + op_cmpgeb = OPR(0x10, 0x6C), //0x10.6C + op_fimovs = OPR(0x10, 0x70), + op_fimovd = OPR(0x10, 0x78), + op_cmovdl = OFP(0x10, 0x72),//SW8A + op_cmovdl_g = OFP(0x10, 0x74), + op_cmovdl_p = OFP(0x10, 0x7A), + op_cmovdl_z = OFP(0x10, 0x7C), + op_cmovdl_n = OFP(0x10, 0x80), + op_cmovdlu = OFP(0x10, 0x81), + op_cmovdlu_g= OFP(0x10, 0x82), + op_cmovdlu_p= OFP(0x10, 0x83), + op_cmovdlu_z= OFP(0x10, 0x84), + op_cmovdlu_n= OFP(0x10, 0x85), + op_cmovdw = OFP(0x10, 0x8B), + op_cmovdw_g = OFP(0x10, 0x8C), + op_cmovdw_p = OFP(0x10, 0x8D), + op_cmovdw_z = OFP(0x10, 0x8E), + op_cmovdw_n = OFP(0x10, 0x8F), + op_cmovdwu = OFP(0x10, 0x86), + op_cmovdwu_g= OFP(0x10, 0x87), + op_cmovdwu_p= OFP(0x10, 0x88), + op_cmovdwu_z= OFP(0x10, 0x89), + op_cmovdwu_n= OFP(0x10, 0x8A),//SW8A + op_seleq = TOPR(0x11, 0x0), + op_selge = TOPR(0x11, 0x1), + op_selgt = TOPR(0x11, 0x2), + op_selle = TOPR(0x11, 0x3), + op_sellt = TOPR(0x11, 0x4), + op_selne = TOPR(0x11, 0x5), + op_sellbc = TOPR(0x11, 0x6), + op_sellbs = TOPR(0x11, 0x7) + }; + + enum ops_oprl{ + op_addw_l = OPRL(0x12, 0x00), + op_subw_l = OPRL(0x12, 0x01), + op_s4addw_l = OPRL(0x12, 0x02), + op_s4subw_l = OPRL(0x12, 0x03), + op_s8addw_l = OPRL(0x12, 0x04), + op_s8subw_l = OPRL(0x12, 0x05), + op_addl_l = OPRL(0x12, 0x08), + op_subl_l = OPRL(0x12, 0x09), + op_s4addl_l = OPRL(0x12, 0x0A), + op_s4subl_l = OPRL(0x12, 0x0B), + op_s8addl_l = OPRL(0x12, 0x0C), + op_s8subl_l = OPRL(0x12, 0x0D), + op_mulw_l = OPRL(0x12, 0x10), + op_mull_l = OPRL(0x12, 0x18), + op_umulh_l = OPRL(0x12, 0x19), + op_cmpeq_l = OPRL(0x12, 0x28), + op_cmplt_l = OPRL(0x12, 0x29), + op_cmple_l = OPRL(0x12, 0x2A), + op_cmpult_l = OPRL(0x12, 0x2B), + op_cmpule_l = OPRL(0x12, 0x2C), + op_sbt_l = OPRL(0x12, 0x2D), //SW8A + op_cbt_l = OPRL(0x12, 0x2E), //SW8A + op_and_l = OPRL(0x12, 0x38), + op_bic_l = OPRL(0x12, 0x39), + op_bis_l = OPRL(0x12, 0x3A), + op_ornot_l = OPRL(0x12, 0x3B), + op_xor_l = OPRL(0x12, 0x3C), + op_eqv_l = OPRL(0x12, 0x3D), + op_inslb_l = OPRL(0x12, 0x40), //0x12.40~0x12.47 + op_inslh_l = OPRL(0x12, 0x41), + op_inslw_l = OPRL(0x12, 0x42), + op_insll_l = OPRL(0x12, 0x43), + op_inshb_l = OPRL(0x12, 0x44), + op_inshh_l = OPRL(0x12, 0x45), + op_inshw_l = OPRL(0x12, 0x46), + op_inshl_l = OPRL(0x12, 0x47), + op_slll_l = OPRL(0x12, 0x48), + op_srll_l = OPRL(0x12, 0x49), + op_sral_l = OPRL(0x12, 0x4A), + op_roll_l = OPRL(0x12, 0x4B), //SW8A + op_sllw_l = OPRL(0x12, 0x4C), //SW8A + op_srlw_l = OPRL(0x12, 0x4D), //SW8A + op_sraw_l = OPRL(0x12, 0x4E), //SW8A + op_rolw_l = OPRL(0x12, 0x4F), //SW8A + op_extlb_l = OPRL(0x12, 0x50), //0x12.50~0x12.57 + op_extlh_l = OPRL(0x12, 0x51), + op_extlw_l = OPRL(0x12, 0x52), + op_extll_l = OPRL(0x12, 0x53), + op_exthb_l = OPRL(0x12, 0x54), + op_exthh_l = OPRL(0x12, 0x55), + op_exthw_l = OPRL(0x12, 0x56), + op_exthl_l = OPRL(0x12, 0x57), + op_masklb_l = OPRL(0x12, 0x60), //0x12.60~0x12.67 + op_masklh_l = OPRL(0x12, 0x61), + op_masklw_l = OPRL(0x12, 0x62), + op_maskll_l = OPRL(0x12, 0x63), + op_maskhb_l = OPRL(0x12, 0x64), + op_maskhh_l = OPRL(0x12, 0x65), + op_maskhw_l = OPRL(0x12, 0x66), + op_maskhl_l = OPRL(0x12, 0x67), + op_zap_l = OPRL(0x12, 0x68), + op_zapnot_l = OPRL(0x12, 0x69), + op_sextb_l = OPRL(0x12, 0x6A), + op_sexth_l = OPRL(0x12, 0x6B), + op_cmpgeb_l = OPRL(0x12, 0x6C), //0x12.6C + op_seleq_l = TOPRL(0x13, 0x0), + op_selge_l = TOPRL(0x13, 0x1), + op_selgt_l = TOPRL(0x13, 0x2), + op_selle_l = TOPRL(0x13, 0x3), + op_sellt_l = TOPRL(0x13, 0x4), + op_selne_l = TOPRL(0x13, 0x5), + op_sellbc_l = TOPRL(0x13, 0x6), + op_sellbs_l = TOPRL(0x13, 0x7) + }; + + enum ops_bra { + op_br = BRA(0x04), + op_bsr = BRA(0x05), + op_beq = BRA(0x30), + op_bne = BRA(0x31), + op_blt = BRA(0x32), + op_ble = BRA(0x33), + op_bgt = BRA(0x34), + op_bge = BRA(0x35), + op_blbc = BRA(0x36), + op_blbs = BRA(0x37), + op_fbeq = BRA(0x38), + op_fbne = BRA(0x39), + op_fblt = BRA(0x3A), + op_fble = BRA(0x3B), + op_fbgt = BRA(0x3C), + op_fbge = BRA(0x3D), + op_lbr = BRA(0x1D), //SW8A + }; + + enum ops_fp { + op_fadds = OFP(0x18, 0x00), + op_faddd = OFP(0x18, 0x01), + op_fsubs = OFP(0x18, 0x02), + op_fsubd = OFP(0x18, 0x03), + op_fmuls = OFP(0x18, 0x04), + op_fmuld = OFP(0x18, 0x05), + op_fdivs = OFP(0x18, 0x06), + op_fdivd = OFP(0x18, 0x07), + op_fsqrts = OFP(0x18, 0x08), + op_fsqrtd = OFP(0x18, 0x09), + op_fcmpeq = OFP(0x18, 0x10), + op_fcmple = OFP(0x18, 0x11), + op_fcmplt = OFP(0x18, 0x12), + op_fcmpun = OFP(0x18, 0x13), + op_fcvtsd = OFP(0x18, 0x20), + op_fcvtds = OFP(0x18, 0x21), + op_fcvtdl_g = OFP(0x18, 0x22), //lx_fcvtdl + op_fcvtdl_p = OFP(0x18, 0x23), + op_fcvtdl_z = OFP(0x18, 0x24), + op_fcvtdl_n = OFP(0x18, 0x25), //lx_fcvtdl + op_fcvtdl = OFP(0x18, 0x27), + op_fcvtwl = OFP(0x18, 0x28), + op_fcvtlw = OFP(0x18, 0x29), + op_fcvtls = OFP(0x18, 0x2D), + op_fcvtld = OFP(0x18, 0x2F), + op_fcpys = OFP(0x18, 0x30), + op_fcpyse = OFP(0x18, 0x31), + op_fcpysn = OFP(0x18, 0x32), + op_ifmovs = OFP(0x18, 0x40), + op_ifmovd = OFP(0x18, 0x41), + op_cmovls = OFP(0x18, 0x48),//SW8A + op_cmovld = OFP(0x18, 0x4A), + op_cmovuls = OFP(0x18, 0x4C), + op_cmovuld = OFP(0x18, 0x4E), + op_cmovws = OFP(0x18, 0x49), + op_cmovwd = OFP(0x18, 0x4B), + op_cmovuws = OFP(0x18, 0x4D), + op_cmovuwd = OFP(0x18, 0x4F),//SW8A + op_rfpcr = OFP(0x18, 0x50), + op_wfpcr = OFP(0x18, 0x51), + op_setfpec0 = OFP(0x18, 0x54), + op_setfpec1 = OFP(0x18, 0x55), + op_setfpec2 = OFP(0x18, 0x56), + op_setfpec3 = OFP(0x18, 0x57), + op_frecs = OFP(0x18, 0x58), //SW8A + op_frecd = OFP(0x18, 0x59), //SW8A + op_fris = OFP(0x18, 0x5A), //SW8A + op_fris_g = OFP(0x18, 0x5B), //SW8A + op_fris_p = OFP(0x18, 0x5C), //SW8A + op_fris_z = OFP(0x18, 0x5D), //SW8A + op_fris_n = OFP(0x18, 0x5F), //SW8A + op_frid = OFP(0x18, 0x60), //SW8A + op_frid_g = OFP(0x18, 0x61), //SW8A + op_frid_p = OFP(0x18, 0x62), //SW8A + op_frid_z = OFP(0x18, 0x63), //SW8A + op_frid_n = OFP(0x18, 0x64), //SW8A + op_vaddw = OFP(0x1A, 0x00), + op_vsubw = OFP(0x1A, 0x01), + op_vcmpgew = OFP(0x1A, 0x02), + op_vcmpeqw = OFP(0x1A, 0x03), + op_vcmplew = OFP(0x1A, 0x04), + op_vcmpltw = OFP(0x1A, 0x05), + op_vcmpulew = OFP(0x1A, 0x06), + op_vcmpultw = OFP(0x1A, 0x07), + op_vsllw = OFP(0x1A, 0x08), + op_vsrlw = OFP(0x1A, 0x09), + op_vsraw = OFP(0x1A, 0x0A), + op_vrolw = OFP(0x1A, 0x0B), + op_sllow = OFP(0x1A, 0x0C), + op_srlow = OFP(0x1A, 0x0D), + op_vaddl = OFP(0x1A, 0x0E), + op_vsubl = OFP(0x1A, 0x0F), + op_vsllb = OFP(0x1A, 0x10), //SW8A + op_vsrlb = OFP(0x1A, 0x11), //SW8A + op_vsrab = OFP(0x1A, 0x12), //SW8A + op_vrolb = OFP(0x1A, 0x13), //SW8A + op_vsllh = OFP(0x1A, 0x14), //SW8A + op_vsrlh = OFP(0x1A, 0x15), //SW8A + op_vsrah = OFP(0x1A, 0x16), //SW8A + op_vrolh = OFP(0x1A, 0x17), //SW8A + op_ctpopow = OFP(0x1A, 0x18), + op_ctlzow = OFP(0x1A, 0x19), + op_vslll = OFP(0x1A, 0x1A), //SW8A + op_vsrll = OFP(0x1A, 0x1B), //SW8A + op_vsral = OFP(0x1A, 0x1C), //SW8A + op_vroll = OFP(0x1A, 0x1D), //SW8A + op_vmaxb = OFP(0x1A, 0x1E), //SW8A + op_vminb = OFP(0x1A, 0x1F), //SW8A + op_vucaddw = OFP(0x1A, 0x40), + op_vucsubw = OFP(0x1A, 0x41), + op_vucaddh = OFP(0x1A, 0x42), + op_vucsubh = OFP(0x1A, 0x43), + op_vucaddb = OFP(0x1A, 0x44), + op_vucsubb = OFP(0x1A, 0x45), + op_sraow = OFP(0x1A, 0x46), //SW8A + op_vsumw = OFP(0x1A, 0x47), //SW8A + op_vsuml = OFP(0x1A, 0x48), //SW8A + op_vsm4r = OFP(0x1A, 0x49), //SW8A, ENCRYPT + op_vbinvw = OFP(0x1A, 0x4A), //SW8A, ENCRYPT + op_vcmpueqb = OFP(0x1A, 0x4B), //SW8A + op_vcmpugtb = OFP(0x1A, 0x4C), //SW8A + op_vsm3msw = OFP(0x1A, 0x4D), //SW8A, ENCRYPT + op_vmaxh = OFP(0x1A, 0x50), //SW8A + op_vminh = OFP(0x1A, 0x51), //SW8A + op_vmaxw = OFP(0x1A, 0x52), //SW8A + op_vminw = OFP(0x1A, 0x53), //SW8A + op_vmaxl = OFP(0x1A, 0x54), //SW8A + op_vminl = OFP(0x1A, 0x55), //SW8A + op_vumaxb = OFP(0x1A, 0x56), //SW8A + op_vuminb = OFP(0x1A, 0x57), //SW8A + op_vumaxh = OFP(0x1A, 0x58), //SW8A + op_vuminh = OFP(0x1A, 0x59), //SW8A + op_vumaxw = OFP(0x1A, 0x5A), //SW8A + op_vuminw = OFP(0x1A, 0x5B), //SW8A + op_vumaxl = OFP(0x1A, 0x5C), //SW8A + op_vuminl = OFP(0x1A, 0x5D), //SW8A + op_vadds = OFP(0x1A, 0x80), + op_vaddd = OFP(0x1A, 0x81), + op_vsubs = OFP(0x1A, 0x82), + op_vsubd = OFP(0x1A, 0x83), + op_vmuls = OFP(0x1A, 0x84), + op_vmuld = OFP(0x1A, 0x85), + op_vdivs = OFP(0x1A, 0x86), + op_vdivd = OFP(0x1A, 0x87), + op_vsqrts = OFP(0x1A, 0x88), + op_vsqrtd = OFP(0x1A, 0x89), + op_vfcmpeq = OFP(0x1A, 0x8C), + op_vfcmple = OFP(0x1A, 0x8D), + op_vfcmplt = OFP(0x1A, 0x8E), + op_vfcmpun = OFP(0x1A, 0x8F), + op_vcpys = OFP(0x1A, 0x90), + op_vcpyse = OFP(0x1A, 0x91), + op_vcpysn = OFP(0x1A, 0x92), + op_vsums = OFP(0x1A, 0x93), //SW8A + op_vsumd = OFP(0x1A, 0x94), //SW8A + op_vfcvtsd = OFP(0x1A, 0x95), //SW8A + op_vfcvtds = OFP(0x1A, 0x96), //SW8A + op_vfcvtls = OFP(0x1A, 0x99), //SW8A + op_vfcvtld = OFP(0x1A, 0x9A), //SW8A + op_vfcvtdl = OFP(0x1A, 0x9B), //SW8A + op_vfcvtdl_g = OFP(0x1A, 0x9C), //SW8A + op_vfcvtdl_p = OFP(0x1A, 0x9D), //SW8A + op_vfcvtdl_z = OFP(0x1A, 0x9E), //SW8A + op_vfcvtdl_n = OFP(0x1A, 0x9F), //SW8A + op_vfris = OFP(0x1A, 0xA0), //SW8A + op_vfris_g = OFP(0x1A, 0xA1), //SW8A + op_vfris_p = OFP(0x1A, 0xA2), //SW8A + op_vfris_z = OFP(0x1A, 0xA3), //SW8A + op_vfris_n = OFP(0x1A, 0xA4), //SW8A + op_vfrid = OFP(0x1A, 0xA5), //SW8A + op_vfrid_g = OFP(0x1A, 0xA6), //SW8A + op_vfrid_p = OFP(0x1A, 0xA7), //SW8A + op_vfrid_z = OFP(0x1A, 0xA8), //SW8A + op_vfrid_n = OFP(0x1A, 0xA9), //SW8A + op_vfrecs = OFP(0x1A, 0xAA), //SW8A + op_vfrecd = OFP(0x1A, 0xAB), //SW8A + op_vmaxs = OFP(0x1A, 0xAC), //SW8A + op_vmins = OFP(0x1A, 0xAD), //SW8A + op_vmaxd = OFP(0x1A, 0xAE), //SW8A + op_vmind = OFP(0x1A, 0xAF), //SW8A + }; + + enum ops_fpl { + op_vaddw_l = OFP(0x1A, 0x20), + op_vsubw_l = OFP(0x1A, 0x21), + op_vcmpgew_l = OFP(0x1A, 0x22), + op_vcmpeqw_l = OFP(0x1A, 0x23), + op_vcmplew_l = OFP(0x1A, 0x24), + op_vcmpltw_l = OFP(0x1A, 0x25), + op_vcmpulew_l = OFP(0x1A, 0x26), + op_vcmpultw_l = OFP(0x1A, 0x27), + op_vsllw_l = OFP(0x1A, 0x28), + op_vsrlw_l = OFP(0x1A, 0x29), + op_vsraw_l = OFP(0x1A, 0x2A), + op_vrolw_l = OFP(0x1A, 0x2B), + op_sllow_l = OFP(0x1A, 0x2C), + op_srlow_l = OFP(0x1A, 0x2D), + op_vaddl_l = OFP(0x1A, 0x2E), + op_vsubl_l = OFP(0x1A, 0x2F), + op_vsllb_l = OFP(0x1A, 0x30), //SW8A + op_vsrlb_l = OFP(0x1A, 0x31), //SW8A + op_vsrab_l = OFP(0x1A, 0x32), //SW8A + op_vrolb_l = OFP(0x1A, 0x33), //SW8A + op_vsllh_l = OFP(0x1A, 0x34), //SW8A + op_vsrlh_l = OFP(0x1A, 0x35), //SW8A + op_vsrah_l = OFP(0x1A, 0x36), //SW8A + op_vrolh_l = OFP(0x1A, 0x37), //SW8A + op_vslll_l = OFP(0x1A, 0x3A), //SW8A + op_vsrll_l = OFP(0x1A, 0x3B), //SW8A + op_vsral_l = OFP(0x1A, 0x3C), //SW8A + op_vroll_l = OFP(0x1A, 0x3D), //SW8A + op_vucaddw_l = OFP(0x1A, 0x60), + op_vucsubw_l = OFP(0x1A, 0x61), + op_vucaddh_l = OFP(0x1A, 0x62), + op_vucsubh_l = OFP(0x1A, 0x63), + op_vucaddb_l = OFP(0x1A, 0x64), + op_vucsubb_l = OFP(0x1A, 0x65), + op_sraow_l = OFP(0x1A, 0x66), //SW8A + op_vsm4key_l = OFP(0x1A, 0x68), //SW8A, ENCRYPT + op_vcmpueqb_l = OFP(0x1A, 0x6B), //SW8A + op_vcmpugtb_l = OFP(0x1A, 0x6C), //SW8A + op_vfcvtsh_l = OFP(0x1B, 0x35), //SW8A + op_vfcvths_l = OFP(0x1B, 0x36) //SW8A + }; + + enum ops_fma { + op_fmas = FMA(0x19, 0x00), + op_fmad = FMA(0x19, 0x01), + op_fmss = FMA(0x19, 0x02), + op_fmsd = FMA(0x19, 0x03), + op_fnmas = FMA(0x19, 0x04), + op_fnmad = FMA(0x19, 0x05), + op_fnmss = FMA(0x19, 0x06), + op_fnmsd = FMA(0x19, 0x07), + op_fseleq = FMA(0x19, 0x10), + op_fselne = FMA(0x19, 0x11), + op_fsellt = FMA(0x19, 0x12), + op_fselle = FMA(0x19, 0x13), + op_fselgt = FMA(0x19, 0x14), + op_fselge = FMA(0x19, 0x15), + op_vmas = FMA(0x1B, 0x00), + op_vmad = FMA(0x1B, 0x01), + op_vmss = FMA(0x1B, 0x02), + op_vmsd = FMA(0x1B, 0x03), + op_vnmas = FMA(0x1B, 0x04), + op_vnmad = FMA(0x1B, 0x05), + op_vnmss = FMA(0x1B, 0x06), + op_vnmsd = FMA(0x1B, 0x07), + op_vfseleq = FMA(0x1B, 0x10), + op_vfsellt = FMA(0x1B, 0x12), + op_vfselle = FMA(0x1B, 0x13), + op_vseleqw = FMA(0x1B, 0x18), + op_vsellbcw = FMA(0x1B, 0x19), + op_vselltw = FMA(0x1B, 0x1A), + op_vsellew = FMA(0x1B, 0x1B), + op_vcpyw = FMA(0x1B, 0x24), + op_vcpyf = FMA(0x1B, 0x25), + op_vconw = FMA(0x1B, 0x26), + op_vshfw = FMA(0x1B, 0x27), + op_vcons = FMA(0x1B, 0x28), + op_vcond = FMA(0x1B, 0x29), + op_vinsectlh = FMA(0x1B, 0x2C), //SW8A + op_vinsectlw = FMA(0x1B, 0x2D), //SW8A + op_vinsectll = FMA(0x1B, 0x2E), //SW8A + op_vinsectlb = FMA(0x1B, 0x2F), //SW8A + op_vshfqb = FMA(0x1B, 0x31), //SW8A + op_vcpyb = FMA(0x1B, 0x32), //SW8A + op_vcpyh = FMA(0x1B, 0x33) //SW8A + }; + + enum ops_fmal { + op_vinsw_l = FMA(0x1B, 0x20), + op_vinsf_l = FMA(0x1B, 0x21), + op_vextw_l = FMA(0x1B, 0x22), + op_vextf_l = FMA(0x1B, 0x23), + op_vinsb_l = FMA(0x1B, 0x2A), //SW8A + op_vinsh_l = FMA(0x1B, 0x2B), //SW8A + op_vshfq_l = FMA(0x1B, 0x30), //SW8A + op_vsm3r_l = FMA(0x1B, 0x34), //SW8A, ENCRYPT + op_vseleqw_l = FMA(0x1B, 0x38), + op_vsellbcw_l = FMA(0x1B, 0x39), + op_vselltw_l = FMA(0x1B, 0x3A), + op_vsellew_l = FMA(0x1B, 0x3B) + }; + + enum ops_extra { + op_sys_call = PCD(0x00), + op_memb = MFC(0x06, 0x0000), + op_imemb = MFC(0x06, 0x0001), //SW8A + op_wmemb = MFC(0x06, 0x0002), //SW8A + op_rtc = MFC(0x06, 0x0020), + op_rcid = MFC(0x06, 0x0040), + op_halt = MFC(0x06, 0x0080), + op_rd_f = MFC(0x06, 0x1000), //SW2F + op_wr_f = MFC(0x06, 0x1020), //SW2F + op_rtid = MFC(0x06, 0x1040), + op_csrws = CSR(0x06, 0xFC), //SW8A + op_csrwc = CSR(0x06, 0xFD), //SW8A + op_csrr = CSR(0x06, 0xFE), + op_csrw = CSR(0x06, 0xFF), + op_pri_ret = PRIRET(0x07, 0x0), + op_vlog = LOGX(0x14, 0x00), + op_vbisw = PSE_LOGX(0x14, 0x30), + op_vxorw = PSE_LOGX(0x14, 0x3c), + op_vandw = PSE_LOGX(0x14, 0xc0), + op_veqvw = PSE_LOGX(0x14, 0xc3), + op_vornotw = PSE_LOGX(0x14, 0xf3), + op_vbicw = PSE_LOGX(0x14, 0xfc), + op_dpfhr = ATMEM(0x1E, 0xE), //SW6B + op_dpfhw = ATMEM(0x1E, 0xF), //SW6B + }; + + // compute inverse of simm + static int inv_simm(int x, int nbits) { + return (int)(x << (32 - nbits)) >> (32 - nbits); + } + + static int inv_simm16( int x ) { return inv_simm(x, 16); } //ZHJ20110307 modified + + // inverse of u_field + static int inv_u_field(int x, int hi_bit, int lo_bit) { + juint r = juint(x) >> lo_bit; + r &= fmask( hi_bit, lo_bit); + return int(r); + } + + static int sw2_op(int inst) {return (int)(inst & OP(-1)); } + static int sw2_arith_op(int inst) {return (int)(inst & OPR(-1, -1)); } + static int sw2_mfc_op(int inst) {return (int)(inst & MFC(-1, -1)); } + + static Register sw2_ra( int x ) { return as_Register(inv_u_field(x, 25, 21)); } + static Register sw2_rb( int x ) { return as_Register(inv_u_field(x, 20, 16)); } + static Register sw2_rc( int x ) { return as_Register(inv_u_field(x, 4, 0)); } + static int sw2_mdisp( int x ) { return inv_simm16(x); } + + static int fmask(uint32_t hi_bit, uint32_t lo_bit) { + assert( hi_bit >= lo_bit && hi_bit < 32, "bad bits"); + return (1 << ( hi_bit-lo_bit + 1 )) - 1; + } + +#ifdef ASSERT + static int u_field(int x, int hi_bit, int lo_bit) { + assert( ( x & ~fmask(hi_bit, lo_bit)) == 0, + "value out of range"); + int r = x << lo_bit; + assert( inv_u_field(r, hi_bit, lo_bit) == x, "just checking"); + return r; + } +#else + // make sure this is inlined as it will reduce code size significantly + #define u_field(x, hi_bit, lo_bit) ((x) << (lo_bit)) +#endif + + static int opcode(int insn) { return (insn>>26)&0x3f; } + static int rs(int insn) { return (insn>>21)&0x1f; } + static int rt(int insn) { return (insn>>16)&0x1f; } + static int imm_off(int insn) { return (short)bitfield(insn, 0, 16); } + + // the plain int register fields. + static int is_ra (Register ra) { return u_field ( ra->encoding(), 25, 21 ); }; + static int is_rb (Register rb) { return u_field ( rb->encoding(), 20, 16 ); }; + static int is_rc (Register rc) { return u_field ( rc->encoding(), 4, 0 ); }; + /* for the third operand of ternary operands integer insn. */ + static int is_r3 (Register r3) { return u_field ( r3->encoding(), 9, 5 ); }; + /* th th fields for dpfhr and dpfhw instructions */ + static int is_th (int th) { return u_field ( th, 25, 21 ); }; + + //the plain fp register fields. + static int is_fa (FloatRegister fa) { return u_field ( fa->encoding(), 25, 21 ); }; + static int is_fb (FloatRegister fb) { return u_field ( fb->encoding(), 20, 16 ); }; + static int is_fc (FloatRegister fc) { return u_field ( fc->encoding(), 4, 0 ); }; + /* the plain fp register fields */ + static int is_f3 (FloatRegister f3) { return u_field ( f3->encoding(), 9, 5 ); }; + + static void assert_signed_range(intptr_t x, int nbits) { + assert(nbits == 32 || (-(1 << nbits-1) <= x && x < ( 1 << nbits-1)), + "value out of range"); + } + + // signed immediate, in low bits, nbits long + static int simm(int x, int nbits) { + assert_signed_range(x, nbits); + return x & (( 1 << nbits ) - 1); + } + static int simm2(int64_t val, int msb, int lsb) { + int nbits = msb - lsb + 1; + int64_t chk = val >> (nbits - 1); + guarantee (chk == -1 || chk == 0, "Field too big for insn"); + unsigned uval = val; + unsigned mask = checked_cast(right_n_bits(nbits)); + uval &= mask; + uval <<= lsb; + return uval; + } + inline void check_delay() { +# ifdef CHECK_DELAY + guarantee(delay_state != at_delay_slot, "must say delayed() when filling delay slot"); + delay_state = no_delay; +# endif + } + + void emit_sw2_long(int); // shadows AbstractAssembler::emit_long + + void nop(int i = 1) { assert(i > 0, "count > 0"); for (; i > 0 ; i--) emit_sw2_long( op_ldi | is_ra(R0) ); } + + /* the unsigned 8-bit literal of operate format insns. */ + static int is_lit (int lit) { return u_field ( lit ,20, 13 ); }; + + /* the signed 13-bit literal of operate format insns. */ + static int is_apint (int apint) { return simm2 ( apint, 25, 13 ); }; + + /* the signed 16-bit displacement of memory format insns. from here + we can't tell what relocation should be used, so don't use a default. */ + static int is_mdisp (int mdisp) { return simm ( mdisp ,16 ); }; + + /* the signed "23-bit" aligned displacement of branch format insns. */ + static int is_bdisp (int bdisp) { return simm ( bdisp ,21 ); }; + + /* the 26-bit palcode function */ + static int is_palfn (int palfn) { return simm ( palfn, 26 ); }; + /* the optional signed "16-bit" aligned displacement of the jmp/jsr hint */ + static int is_jmphint (int jmphint) { return simm ( jmphint, 16); }; + + /* the optional hint to ret/jsr_coroutine */ + static int is_rethint (int rethint) { return simm ( rethint, 16); }; + /* the 12-bit displacement for the ev[46] hw_{ return u_field (ld,st} (pal1b/pal1f) insns. */ + static int is_ev6hwdisp (int ev6hwdisp) { return simm ( ev6hwdisp, 12 ); }; + + /* sw2 simd settle instruction lit */ + static int is_fmalit (int fmalit) { return u_field ( fmalit ,9 ,5 ); };//v1.1 + + static int is_rpiindex (int rpiindex) { return u_field ( rpiindex ,7, 0 ); }; + + static int is_atmdisp ( int atmdisp ) { return u_field ( atmdisp, 10, 0 ); }; + + static int is_vlog_h ( int vlog ) { return u_field ( (vlog & 0xff) >>6 , 27, 26 ); }; + static int is_vlog_l ( int vlog ) { return u_field ( vlog & 0x3f , 15, 10 ); }; + + void flush() { +#ifdef CHECK_DELAY + guarantee( delay_state == no_delay, "ending code with a delay slot"); +#endif + AbstractAssembler::flush(); + } + + void assert_not_delayed() { +#ifdef CHECK_DELAY + assert_not_delayed("next instruction should not be a delay slot"); +#endif + } + + void assert_not_delayed(const char* msg) { +#ifdef CHECK_DELAY + if(delay_state != no_delay){ + tty->print_cr("%s:%d, pc: %lx", __func__, __LINE__, pc()); + } + assert(delay_state == no_delay, msg); +#endif + } + +protected: +#ifdef ASSERT + void check_relocation(RelocationHolder const& rspec, int format); +#endif + + // instruction only in sw2, including sw2f, sw4a, sw6a + static void sw2_only() { assert( VM_Version::sw2only(), "This instruction only works on sw2f, sw4a or sw6a"); } + // instruction only in sw3, including sw6b + static void sw3_only() { assert( VM_Version::sw3only(), "This instruction only works on sw6b"); } + static void sw4_only() { assert( VM_Version::sw4only(), "This instruction only works on sw8a"); } + +public: + // SW64 common helper functions + static bool operand_valid_for_simple_type_instruction_immediate(int imm) { return is_lit(imm); } + static bool operand_valid_for_storage_type_instruction_immediate(int imm) { return is_simm16(imm); } + + // SW64 Generic instructions + void sys_call_b( int palfn ); + void sys_call ( int palfn ); + void call ( Register ra, Register rb, int jmphint ); + void ret ( Register ra, Register rb, int rethint ); + void jmp ( Register ra, Register rb, int jmphint ); + void br ( Register ra, int bdisp ); + void bsr ( Register ra, int bdisp ); + void memb ( void ); + void imemb ( void ); + void wmemb ( void ); + void rtc ( Register ra, Register rb ); + void rcid ( Register ra); + void halt ( void); + void rd_f ( Register ra ); //SW2F + void wr_f ( Register ra ); //SW2F + void rtid ( Register ra); + void csrws ( Register ra, int rpiindex );//SW8A + void csrwc ( Register ra, int rpiindex );//SW8A + void csrr ( Register ra, int rpiindex ); + void csrw ( Register ra, int rpiindex ); + void pri_ret ( Register ra ); + void lldw ( Register ra, int atmdisp, Register rb ); + void lldl ( Register ra, int atmdisp, Register rb ); + void ldw_inc ( Register ra, int atmdisp, Register rb ); //SW2F + void ldl_inc ( Register ra, int atmdisp, Register rb ); //SW2F + void ldw_dec ( Register ra, int atmdisp, Register rb ); //SW2F + void ldl_dec ( Register ra, int atmdisp, Register rb ); //SW2F + void ldw_set ( Register ra, int atmdisp, Register rb ); //SW2F + void ldl_set ( Register ra, int atmdisp, Register rb ); //SW2F + void lstw ( Register ra, int atmdisp, Register rb ); + void lstl ( Register ra, int atmdisp, Register rb ); + void ldw_nc ( Register ra, int atmdisp, Register rb ); + void ldl_nc ( Register ra, int atmdisp, Register rb ); + void ldd_nc ( Register ra, int atmdisp, Register rb ); + void stw_nc ( Register ra, int atmdisp, Register rb ); + void stl_nc ( Register ra, int atmdisp, Register rb ); + void std_nc ( Register ra, int atmdisp, Register rb ); + void ldwe ( FloatRegister fa, int mdisp, Register rb ); + void ldse ( FloatRegister fa, int mdisp, Register rb ); + void ldde ( FloatRegister fa, int mdisp, Register rb ); + void vlds ( FloatRegister fa, int mdisp, Register rb ); + void vldd ( FloatRegister fa, int mdisp, Register rb ); + void vsts ( FloatRegister fa, int mdisp, Register rb ); + void vstd ( FloatRegister fa, int mdisp, Register rb ); + + void addw ( Register ra, Register rb, Register rc ); + void addw ( Register ra, int lit, Register rc ); + void subw ( Register ra, Register rb, Register rc ); + void subw ( Register ra, int lit, Register rc ); + void s4addw ( Register ra, Register rb, Register rc ); + void s4addw ( Register ra, int lit, Register rc ); + void s4subw ( Register ra, Register rb, Register rc ); + void s4subw ( Register ra, int lit, Register rc ); + void s8addw ( Register ra, Register rb, Register rc ); + void s8addw ( Register ra, int lit, Register rc ); + void s8subw ( Register ra, Register rb, Register rc ); + void s8subw ( Register ra, int lit, Register rc ); + void addl ( Register ra, Register rb, Register rc ); + void addl ( Register ra, int lit, Register rc ); + void subl ( Register ra, Register rb, Register rc ); + void subl ( Register ra, int lit, Register rc ); + void s4addl ( Register ra, Register rb, Register rc ); + void s4addl ( Register ra, int lit, Register rc ); + void s4subl ( Register ra, Register rb, Register rc ); + void s4subl ( Register ra, int lit, Register rc ); + void s8addl ( Register ra, Register rb, Register rc ); + void s8addl ( Register ra, int lit, Register rc ); + void s8subl ( Register ra, Register rb, Register rc ); + void s8subl ( Register ra, int lit, Register rc ); + void mulw ( Register ra, Register rb, Register rc ); + void mulw ( Register ra, int lit, Register rc ); + void divw ( Register ra, Register rb, Register rc ); //SW6B + void udivw ( Register ra, Register rb, Register rc ); //SW6B + void remw ( Register ra, Register rb, Register rc ); //SW6B + void uremw ( Register ra, Register rb, Register rc ); //SW6B + void mull ( Register ra, Register rb, Register rc ); + void mull ( Register ra, int lit, Register rc ); + void umulh ( Register ra, Register rb, Register rc ); + void umulh ( Register ra, int lit, Register rc ); + void divl ( Register ra, Register rb, Register rc ); //SW6B + void udivl ( Register ra, Register rb, Register rc ); //SW6B + void reml ( Register ra, Register rb, Register rc ); //SW6B + void ureml ( Register ra, Register rb, Register rc ); //SW6B + void addpi ( int apint, Register rc ); //SW6B + void addpis ( int apint, Register rc ); //SW6B + + void cmpeq ( Register ra, Register rb, Register rc ); + void cmpeq ( Register ra, int lit, Register rc ); + void cmplt ( Register ra, Register rb, Register rc ); + void cmplt ( Register ra, int lit, Register rc ); + void cmple ( Register ra, Register rb, Register rc ); + void cmple ( Register ra, int lit, Register rc ); + void cmpult ( Register ra, Register rb, Register rc ); + void cmpult ( Register ra, int lit, Register rc ); + void cmpule ( Register ra, Register rb, Register rc ); + void cmpule ( Register ra, int lit, Register rc ); + void sbt ( Register ra, Register rb, Register rc ); + void sbt ( Register ra, int lit, Register rc ); + void cbt ( Register ra, Register rb, Register rc ); + void cbt ( Register ra, int lit, Register rc ); + void and_ins ( Register ra, Register rb, Register rc ); + void and_ins ( Register ra, int lit, Register rc ); + void bic ( Register ra, Register rb, Register rc ); + void bic ( Register ra, int lit, Register rc ); + void bis ( Register ra, Register rb, Register rc ); + void bis ( Register ra, int lit, Register rc ); + void ornot ( Register ra, Register rb, Register rc ); + void ornot ( Register ra, int lit, Register rc ); + void xor_ins ( Register ra, Register rb, Register rc ); + void xor_ins ( Register ra, int lit, Register rc ); + void eqv ( Register ra, Register rb, Register rc ); + void eqv ( Register ra, int lit, Register rc ); + void inslb ( Register ra, Register rb, Register rc ); + void inslb ( Register ra, int lit, Register rc ); + void inslh ( Register ra, Register rb, Register rc ); + void inslh ( Register ra, int lit, Register rc ); + void inslw ( Register ra, Register rb, Register rc ); + void inslw ( Register ra, int lit, Register rc ); + void insll ( Register ra, Register rb, Register rc ); + void insll ( Register ra, int lit, Register rc ); + void inshb ( Register ra, Register rb, Register rc ); + void inshb ( Register ra, int lit, Register rc ); + void inshh ( Register ra, Register rb, Register rc ); + void inshh ( Register ra, int lit, Register rc ); + void inshw ( Register ra, Register rb, Register rc ); + void inshw ( Register ra, int lit, Register rc ); + void inshl ( Register ra, Register rb, Register rc ); + void inshl ( Register ra, int lit, Register rc ); + void slll ( Register ra, Register rb, Register rc ); + void slll ( Register ra, int lit, Register rc ); + void srll ( Register ra, Register rb, Register rc ); + void srll ( Register ra, int lit, Register rc ); + void sral ( Register ra, Register rb, Register rc ); + void sral ( Register ra, int lit, Register rc ); + void roll ( Register ra, Register rb, Register rc ); + void roll ( Register ra, int lit, Register rc ); + void sllw ( Register ra, Register rb, Register rc ); + void sllw ( Register ra, int lit, Register rc ); + void srlw ( Register ra, Register rb, Register rc ); + void srlw ( Register ra, int lit, Register rc ); + void sraw ( Register ra, Register rb, Register rc ); + void sraw ( Register ra, int lit, Register rc ); + void rolw ( Register ra, Register rb, Register rc ); + void rolw ( Register ra, int lit, Register rc ); + void extlb ( Register ra, Register rb, Register rc ); + void extlb ( Register ra, int lit, Register rc ); + void extlh ( Register ra, Register rb, Register rc ); + void extlh ( Register ra, int lit, Register rc ); + void extlw ( Register ra, Register rb, Register rc ); + void extlw ( Register ra, int lit, Register rc ); + void extll ( Register ra, Register rb, Register rc ); + void extll ( Register ra, int lit, Register rc ); + void exthb ( Register ra, Register rb, Register rc ); + void exthb ( Register ra, int lit, Register rc ); + void exthh ( Register ra, Register rb, Register rc ); + void exthh ( Register ra, int lit, Register rc ); + void exthw ( Register ra, Register rb, Register rc ); + void exthw ( Register ra, int lit, Register rc ); + void exthl ( Register ra, Register rb, Register rc ); + void exthl ( Register ra, int lit, Register rc ); + void ctpop ( Register rb, Register rc ); + void ctlz ( Register rb, Register rc ); + void cttz ( Register rb, Register rc ); + void revbh ( Register rb, Register rc ); + void revbw ( Register rb, Register rc ); + void revbl ( Register rb, Register rc ); + void casw ( Register ra, Register rb, Register rc ); + void casl ( Register ra, Register rb, Register rc ); + void masklb ( Register ra, Register rb, Register rc ); + void masklb ( Register ra, int lit, Register rc ); + void masklh ( Register ra, Register rb, Register rc ); + void masklh ( Register ra, int lit, Register rc ); + void masklw ( Register ra, Register rb, Register rc ); + void masklw ( Register ra, int lit, Register rc ); + void maskll ( Register ra, Register rb, Register rc ); + void maskll ( Register ra, int lit, Register rc ); + void maskhb ( Register ra, Register rb, Register rc ); + void maskhb ( Register ra, int lit, Register rc ); + void maskhh ( Register ra, Register rb, Register rc ); + void maskhh ( Register ra, int lit, Register rc ); + void maskhw ( Register ra, Register rb, Register rc ); + void maskhw ( Register ra, int lit, Register rc ); + void maskhl ( Register ra, Register rb, Register rc ); + void maskhl ( Register ra, int lit, Register rc ); + void zap ( Register ra, Register rb, Register rc ); + void zap ( Register ra, int lit, Register rc ); + void zapnot ( Register ra, Register rb, Register rc ); + void zapnot ( Register ra, int lit, Register rc ); + void sextb ( Register rb, Register rc); + void sextb ( int lit, Register rc ); + void sexth ( Register rb, Register rc ); + void sexth ( int lit, Register rc ); + void cmpgeb ( Register ra, Register rb, Register rc ); + void cmpgeb ( Register ra, int lit, Register rc ); + void fimovs ( FloatRegister fa, Register rc ); // For sw4a SQData + void fimovd ( FloatRegister fa, Register rc ); // For sw4a SQData + void seleq ( Register ra, Register rb,Register r3, Register rc ); + void seleq ( Register ra, int lit, Register r3,Register rc ); + void selge ( Register ra, Register rb,Register r3, Register rc ); + void selge ( Register ra, int lit, Register r3,Register rc ); + void selgt ( Register ra, Register rb,Register r3, Register rc ); + void selgt ( Register ra, int lit, Register r3,Register rc ); + void selle ( Register ra, Register rb,Register r3, Register rc ); + void selle ( Register ra, int lit, Register r3,Register rc ); + void sellt ( Register ra, Register rb,Register r3, Register rc ); + void sellt ( Register ra, int lit, Register r3,Register rc ); + void selne ( Register ra, Register rb,Register r3, Register rc ); + void selne ( Register ra, int lit, Register r3,Register rc ); + void sellbc ( Register ra, Register rb,Register r3, Register rc ); + void sellbc ( Register ra, int lit, Register r3,Register rc ); + void sellbs ( Register ra, Register rb,Register r3, Register rc ); + void sellbs ( Register ra, int lit, Register r3,Register rc ); + + void vlog ( int vlog, FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc ); + void vbisw ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vxorw ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vandw ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void veqvw ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vornotw ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vbicw ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + + void fadds ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void faddd ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void fsubs ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void fsubd ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void fmuls ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void fmuld ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void fdivs ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void fdivd ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void fsqrts ( FloatRegister fb, FloatRegister fc ); + void fsqrtd ( FloatRegister fb, FloatRegister fc ); + void fcmpeq ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void fcmple ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void fcmplt ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void fcmpun ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void fcvtsd ( FloatRegister fb, FloatRegister fc ); + void fcvtds ( FloatRegister fb, FloatRegister fc ); + void fcvtdl_g ( FloatRegister fb, FloatRegister fc ); //lx_fcvtdl + void fcvtdl_p ( FloatRegister fb, FloatRegister fc ); + void fcvtdl_z ( FloatRegister fb, FloatRegister fc ); + void fcvtdl_n ( FloatRegister fb, FloatRegister fc ); //lx_fcvtdl + void fcvtdl ( FloatRegister fb, FloatRegister fc ); + void fcvtwl ( FloatRegister fb, FloatRegister fc ); + void fcvtlw ( FloatRegister fb, FloatRegister fc ); + void fcvtls ( FloatRegister fb, FloatRegister fc ); + void fcvtld ( FloatRegister fb, FloatRegister fc ); + void fcpys ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void fcpyse ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void fcpysn ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void ifmovs ( Register ra, FloatRegister fc ); // For sw4a SQData + void ifmovd ( Register ra, FloatRegister fc ); // For sw4a SQData + //cmov + void cmovdl ( Register rc, FloatRegister fb ); + void cmovdl_g ( Register rc, FloatRegister fb ); + void cmovdl_p ( Register rc, FloatRegister fb ); + void cmovdl_z ( Register rc, FloatRegister fb ); + void cmovdl_n ( Register rc, FloatRegister fb ); + + void cmovdlu ( Register rc, FloatRegister fb ); + void cmovdlu_g ( Register rc, FloatRegister fb ); + void cmovdlu_p ( Register rc, FloatRegister fb ); + void cmovdlu_z ( Register rc, FloatRegister fb ); + void cmovdlu_n ( Register rc, FloatRegister fb ); + + void cmovdw ( Register rc, FloatRegister fb ); + void cmovdw_g ( Register rc, FloatRegister fb ); + void cmovdw_p ( Register rc, FloatRegister fb ); + void cmovdw_z ( Register rc, FloatRegister fb ); + void cmovdw_n ( Register rc, FloatRegister fb ); + + void cmovdwu ( Register rc, FloatRegister fb ); + void cmovdwu_g ( Register rc, FloatRegister fb ); + void cmovdwu_p ( Register rc, FloatRegister fb ); + void cmovdwu_z ( Register rc, FloatRegister fb ); + void cmovdwu_n ( Register rc, FloatRegister fb ); + + void cmovls ( FloatRegister fc, Register rb ); + void cmovld ( FloatRegister fc, Register rb ); + void cmovuls ( FloatRegister fc, Register rb ); + void cmovuld ( FloatRegister fc, Register rb ); + void cmovws ( FloatRegister fc, Register rb ); + void cmovwd ( FloatRegister fc, Register rb ); + void cmovuws ( FloatRegister fc, Register rb ); + void cmovuwd ( FloatRegister fc, Register rb ); + + void rfpcr ( FloatRegister fa); + void wfpcr ( FloatRegister fa); + void setfpec0 (); + void setfpec1 (); + void setfpec2 (); + void setfpec3 (); + void frecs ( FloatRegister fa, FloatRegister fc ); + void frecd ( FloatRegister fa, FloatRegister fc ); + void fris ( FloatRegister fb, FloatRegister fc ); + void fris_g ( FloatRegister fb, FloatRegister fc ); + void fris_p ( FloatRegister fb, FloatRegister fc ); + void fris_z ( FloatRegister fb, FloatRegister fc ); + void fris_n ( FloatRegister fb, FloatRegister fc ); + void frid ( FloatRegister fb, FloatRegister fc ); + void frid_g ( FloatRegister fb, FloatRegister fc ); + void frid_p ( FloatRegister fb, FloatRegister fc ); + void frid_z ( FloatRegister fb, FloatRegister fc ); + void frid_n ( FloatRegister fb, FloatRegister fc ); + void fmas ( FloatRegister fa, FloatRegister fb, FloatRegister f3, FloatRegister fc ); + void fmad ( FloatRegister fa, FloatRegister fb, FloatRegister f3, FloatRegister fc ); + void fmss ( FloatRegister fa, FloatRegister fb, FloatRegister f3, FloatRegister fc ); + void fmsd ( FloatRegister fa, FloatRegister fb, FloatRegister f3, FloatRegister fc ); + void fnmas ( FloatRegister fa, FloatRegister fb, FloatRegister f3, FloatRegister fc ); + void fnmad ( FloatRegister fa, FloatRegister fb, FloatRegister f3, FloatRegister fc ); + void fnmss ( FloatRegister fa, FloatRegister fb, FloatRegister f3, FloatRegister fc ); + void fnmsd ( FloatRegister fa, FloatRegister fb, FloatRegister f3, FloatRegister fc ); + void fseleq ( FloatRegister fa, FloatRegister fb, FloatRegister f3, FloatRegister fc ); + void fselne ( FloatRegister fa, FloatRegister fb, FloatRegister f3, FloatRegister fc ); + void fsellt ( FloatRegister fa, FloatRegister fb, FloatRegister f3, FloatRegister fc ); + void fselle ( FloatRegister fa, FloatRegister fb, FloatRegister f3, FloatRegister fc ); + void fselgt ( FloatRegister fa, FloatRegister fb, FloatRegister f3, FloatRegister fc ); + void fselge ( FloatRegister fa, FloatRegister fb, FloatRegister f3, FloatRegister fc ); + + void vaddw ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vaddw ( FloatRegister fa, int lit, FloatRegister fc ); + void vsubw ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vsubw ( FloatRegister fa, int lit, FloatRegister fc ); + void vcmpgew ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vcmpgew ( FloatRegister fa, int lit, FloatRegister fc ); + void vcmpeqw ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vcmpeqw ( FloatRegister fa, int lit, FloatRegister fc ); + void vcmplew ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vcmplew ( FloatRegister fa, int lit, FloatRegister fc ); + void vcmpltw ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vcmpltw ( FloatRegister fa, int lit, FloatRegister fc ); + void vcmpulew ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vcmpulew ( FloatRegister fa, int lit, FloatRegister fc ); + void vcmpultw ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vcmpultw ( FloatRegister fa, int lit, FloatRegister fc ); + void vsllw ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vsllw ( FloatRegister fa, int lit, FloatRegister fc ); + void vsrlw ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vsrlw ( FloatRegister fa, int lit, FloatRegister fc ); + void vsraw ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vsraw ( FloatRegister fa, int lit, FloatRegister fc ); + void vrolw ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vrolw ( FloatRegister fa, int lit, FloatRegister fc ); + void sllow ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void sllow ( FloatRegister fa, int lit, FloatRegister fc ); + void srlow ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void srlow ( FloatRegister fa, int lit, FloatRegister fc ); + void vaddl ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vaddl ( FloatRegister fa, int lit, FloatRegister fc ); + void vsubl ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vsubl ( FloatRegister fa, int lit, FloatRegister fc ); + void vsllb ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vsllb ( FloatRegister fa, int lit, FloatRegister fc ); + void vsrlb ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vsrlb ( FloatRegister fa, int lit, FloatRegister fc ); + void vsrab ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vsrab ( FloatRegister fa, int lit, FloatRegister fc ); + void vrolb ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vrolb ( FloatRegister fa, int lit, FloatRegister fc ); + void vsllh ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vsllh ( FloatRegister fa, int lit, FloatRegister fc ); + void vsrlh ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vsrlh ( FloatRegister fa, int lit, FloatRegister fc ); + void vsrah ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vsrah ( FloatRegister fa, int lit, FloatRegister fc ); + void vrolh ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vrolh ( FloatRegister fa, int lit, FloatRegister fc ); + void ctpopow ( FloatRegister fa, FloatRegister fc ); + void ctlzow ( FloatRegister fa, FloatRegister fc ); + void vslll ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vslll ( FloatRegister fa, int lit, FloatRegister fc ); + void vsrll ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vsrll ( FloatRegister fa, int lit, FloatRegister fc ); + void vsral ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vsral ( FloatRegister fa, int lit, FloatRegister fc ); + void vroll ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vroll ( FloatRegister fa, int lit, FloatRegister fc ); + void vmaxb ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vminb ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + + void vucaddw ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vucaddw ( FloatRegister fa, int lit, FloatRegister fc ); + void vucsubw ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vucsubw ( FloatRegister fa, int lit, FloatRegister fc ); + void vucaddh ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vucaddh ( FloatRegister fa, int lit, FloatRegister fc ); + void vucsubh ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vucsubh ( FloatRegister fa, int lit, FloatRegister fc ); + void vucaddb ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vucaddb ( FloatRegister fa, int lit, FloatRegister fc ); + void vucsubb ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vucsubb ( FloatRegister fa, int lit, FloatRegister fc ); + void sraow ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void sraow ( FloatRegister fa, int lit, FloatRegister fc ); + void vsumw ( FloatRegister fa, FloatRegister fc ); + void vsuml ( FloatRegister fa, FloatRegister fc ); + void vcmpueqb ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vcmpueqb ( FloatRegister fa, int lit, FloatRegister fc ); + void vcmpugtb ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vcmpugtb ( FloatRegister fa, int lit, FloatRegister fc ); + void vmaxh ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vminh ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vmaxw ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vminw ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vmaxl ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vminl ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vumaxb ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vuminb ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vumaxh ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vuminh ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vumaxw ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vuminw ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vumaxl ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vuminl ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + + void vsm3msw ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vsm4key ( FloatRegister fa, int lit, FloatRegister fc ); + void vsm4r ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vbinvw ( FloatRegister fb, FloatRegister fc ); + + void vadds ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vaddd ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vsubs ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vsubd ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vmuls ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vmuld ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vdivs ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vdivd ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vsqrts ( FloatRegister fb, FloatRegister fc ); + void vsqrtd ( FloatRegister fb, FloatRegister fc ); + void vfcmpeq ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vfcmple ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vfcmplt ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vfcmpun ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vcpys ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vfmov ( FloatRegister fa, FloatRegister fc ); + void vcpyse ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vcpysn ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vsums ( FloatRegister fa, FloatRegister fc ); + void vsumd ( FloatRegister fa, FloatRegister fc ); + void vfrecs ( FloatRegister fa, FloatRegister fc ); + void vfrecd ( FloatRegister fa, FloatRegister fc ); + void vfcvtsd ( FloatRegister fb, FloatRegister fc ); + void vfcvtds ( FloatRegister fb, FloatRegister fc ); + void vfcvtls ( FloatRegister fb, FloatRegister fc ); + void vfcvtld ( FloatRegister fb, FloatRegister fc ); + void vfcvtdl ( FloatRegister fb, FloatRegister fc ); + void vfcvtdl_g ( FloatRegister fb, FloatRegister fc ); + void vfcvtdl_p ( FloatRegister fb, FloatRegister fc ); + void vfcvtdl_z ( FloatRegister fb, FloatRegister fc ); + void vfcvtdl_n ( FloatRegister fb, FloatRegister fc ); + void vfris ( FloatRegister fb, FloatRegister fc ); + void vfris_g ( FloatRegister fb, FloatRegister fc ); + void vfris_p ( FloatRegister fb, FloatRegister fc ); + void vfris_z ( FloatRegister fb, FloatRegister fc ); + void vfris_n ( FloatRegister fb, FloatRegister fc ); + void vfrid ( FloatRegister fb, FloatRegister fc ); + void vfrid_g ( FloatRegister fb, FloatRegister fc ); + void vfrid_p ( FloatRegister fb, FloatRegister fc ); + void vfrid_z ( FloatRegister fb, FloatRegister fc ); + void vfrid_n ( FloatRegister fb, FloatRegister fc ); + void vmaxs ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vmins ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vmaxd ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vmind ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + + void vmas ( FloatRegister fa, FloatRegister fb, FloatRegister f3, FloatRegister fc ); + void vmad ( FloatRegister fa, FloatRegister fb, FloatRegister f3, FloatRegister fc ); + void vmss ( FloatRegister fa, FloatRegister fb, FloatRegister f3, FloatRegister fc ); + void vmsd ( FloatRegister fa, FloatRegister fb, FloatRegister f3, FloatRegister fc ); + void vnmas ( FloatRegister fa, FloatRegister fb, FloatRegister f3, FloatRegister fc ); + void vnmad ( FloatRegister fa, FloatRegister fb, FloatRegister f3, FloatRegister fc ); + void vnmss ( FloatRegister fa, FloatRegister fb, FloatRegister f3, FloatRegister fc ); + void vnmsd ( FloatRegister fa, FloatRegister fb, FloatRegister f3, FloatRegister fc ); + void vfseleq ( FloatRegister fa, FloatRegister fb, FloatRegister f3, FloatRegister fc ); + void vfsellt ( FloatRegister fa, FloatRegister fb, FloatRegister f3, FloatRegister fc ); + void vfselle ( FloatRegister fa, FloatRegister fb, FloatRegister f3, FloatRegister fc ); + void vseleqw ( FloatRegister fa, FloatRegister fb, FloatRegister f3, FloatRegister fc ); + void vseleqw ( FloatRegister fa, FloatRegister fb, int fmalit, FloatRegister fc ); + void vsellbcw ( FloatRegister fa, FloatRegister fb, FloatRegister f3, FloatRegister fc ); + void vsellbcw ( FloatRegister fa, FloatRegister fb, int fmalit, FloatRegister fc ); + void vselltw ( FloatRegister fa, FloatRegister fb, FloatRegister f3, FloatRegister fc ); + void vselltw ( FloatRegister fa, FloatRegister fb, int fmalit, FloatRegister fc ); + void vsellew ( FloatRegister fa, FloatRegister fb, FloatRegister f3, FloatRegister fc ); + void vsellew ( FloatRegister fa, FloatRegister fb, int fmalit, FloatRegister fc ); + void vinsw ( FloatRegister fa, FloatRegister fb, int fmalit, FloatRegister fc ); + void vinsf ( FloatRegister fa, FloatRegister fb, int fmalit, FloatRegister fc ); + void vextw ( FloatRegister fa, int fmalit, FloatRegister fc); + void vextf ( FloatRegister fa, int fmalit, FloatRegister fc); + void vcpyw ( FloatRegister fa, FloatRegister fc); + void vcpyf ( FloatRegister fa, FloatRegister fc); + void vconw ( FloatRegister va, FloatRegister vb, FloatRegister fc, FloatRegister vd ); + void vshfw ( FloatRegister va, FloatRegister vb, FloatRegister fc, FloatRegister vd ); + void vcons ( FloatRegister va, FloatRegister vb, FloatRegister fc, FloatRegister vd ); + void vcond ( FloatRegister va, FloatRegister vb, FloatRegister fc, FloatRegister vd ); + void vinsb ( FloatRegister fa, FloatRegister fb, int fmalit, FloatRegister fc ); + void vinsh ( FloatRegister fa, FloatRegister fb, int fmalit, FloatRegister fc ); + void vinsectlh ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vinsectlw ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vinsectll ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vinsectlb ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vshfq ( FloatRegister fa, FloatRegister fb, int fmalit, FloatRegister fc ); + void vshfqb ( FloatRegister fa, FloatRegister fb, FloatRegister fc ); + void vcpyb ( FloatRegister fa, FloatRegister fc ); + void vcpyh ( FloatRegister fa, FloatRegister fc ); + void vsm3r ( FloatRegister fa, FloatRegister fb, int fmalit, FloatRegister fc ); + void vfcvtsh ( FloatRegister fa, FloatRegister fb, int fmalit, FloatRegister fc ); + void vfcvths ( FloatRegister fa, FloatRegister fb, int fmalit, FloatRegister fc ); + + void vldw_u ( FloatRegister fa, int atmdisp, Register rb ); + void vstw_u ( FloatRegister fa, int atmdisp, Register rb ); + void vlds_u ( FloatRegister fa, int atmdisp, Register rb ); + void vsts_u ( FloatRegister fa, int atmdisp, Register rb ); + void vldd_u ( FloatRegister fa, int atmdisp, Register rb ); + void vstd_u ( FloatRegister fa, int atmdisp, Register rb ); + void vstw_ul ( FloatRegister fa, int atmdisp, Register rb ); + void vstw_uh ( FloatRegister fa, int atmdisp, Register rb ); + void vsts_ul ( FloatRegister fa, int atmdisp, Register rb ); + void vsts_uh ( FloatRegister fa, int atmdisp, Register rb ); + void vstd_ul ( FloatRegister fa, int atmdisp, Register rb ); + void vstd_uh ( FloatRegister fa, int atmdisp, Register rb ); + void lbr ( int palfn ); + void ldbu_a ( Register ra, int atmdisp, Register rb ); + void ldhu_a ( Register ra, int atmdisp, Register rb ); + void ldw_a ( Register ra, int atmdisp, Register rb ); + void ldl_a ( Register ra, int atmdisp, Register rb ); + void stb_a ( Register ra, int atmdisp, Register rb ); + void sth_a ( Register ra, int atmdisp, Register rb ); + void stw_a ( Register ra, int atmdisp, Register rb ); + void stl_a ( Register ra, int atmdisp, Register rb ); + void flds_a ( FloatRegister fa, int atmdisp, Register rb ); + void fldd_a ( FloatRegister fa, int atmdisp, Register rb ); + void fsts_a ( FloatRegister fa, int atmdisp, Register rb ); + void fstd_a ( FloatRegister fa, int atmdisp, Register rb ); + void dpfhr ( int th, int atmdisp, Register rb ); + void dpfhw ( int th, int atmdisp, Register rb ); + void ldbu ( Register ra, int mdisp, Register rb ); + void ldhu ( Register ra, int mdisp, Register rb ); + void ldw ( Register ra, int mdisp, Register rb ); + void ldl ( Register ra, int mdisp, Register rb ); + void ldl_u ( Register ra, int mdisp, Register rb ); + void pri_ld ( Register ra, int ev6hwdisp, Register rb ); + void flds ( FloatRegister fa, int mdisp, Register rb ); + void fldd ( FloatRegister fa, int mdisp, Register rb ); + void stb ( Register ra, int mdisp, Register rb ); + void sth ( Register ra, int mdisp, Register rb ); + void stw ( Register ra, int mdisp, Register rb ); + void stl ( Register ra, int mdisp, Register rb ); + void stl_u ( Register ra, int mdisp, Register rb ); + void pri_st ( Register ra, int ev6hwdisp, Register rb ); + void fsts ( FloatRegister fa, int mdisp, Register rb ); + void fstd ( FloatRegister fa, int mdisp, Register rb ); + void beq ( Register ra, int bdisp ); + void bne ( Register ra, int bdisp ); + void blt ( Register ra, int bdisp ); + void ble ( Register ra, int bdisp ); + void bgt ( Register ra, int bdisp ); + void bge ( Register ra, int bdisp ); + void blbc ( Register ra, int bdisp ); + void blbs ( Register ra, int bdisp ); + void fbeq ( FloatRegister fa, int bdisp ); + void fbne ( FloatRegister fa, int bdisp ); + void fblt ( FloatRegister fa, int bdisp ); + void fble ( FloatRegister fa, int bdisp ); + void fbgt ( FloatRegister fa, int bdisp ); + void fbge ( FloatRegister fa, int bdisp ); + void ldi ( Register ra, int mdisp, Register rb ); + void ldih ( Register ra, int mdisp, Register rb ); + + // cache control instruction + void s_fillcs ( int mdisp, Register rb ); + void s_fillde ( int mdisp, Register rb ); + void fillde ( int mdisp, Register rb ); + void fillde_e ( int mdisp, Register rb ); + void fillcs ( int mdisp, Register rb ); + void fillcs_e ( int mdisp, Register rb ); + void e_fillcs ( int mdisp, Register rb ); + void e_fillde ( int mdisp, Register rb ); + void flushd ( int mdisp, Register rb ); + void evictdl ( int mdisp, Register rb ); + void evictdg ( int mdisp, Register rb ); + + //jzy just for compiling, maybe delete in future + static address locate_operand(address inst, WhichOperand which) { assert(false, "unimplement locate_operand:jzy"); return inst;} + static address locate_next_instruction(address inst) { assert(false, "unimplement locate_next_instruction:jzy"); return inst;} + static bool is_polling_page_far() { assert(false, "unimplement is_polling_page_far:jzy");; return false; } + void clflush(Address addr) { assert(false, "unimplement clflush:jzy"); } +}; + +// Invert a condition +inline const Assembler::Condition operator~(const Assembler::Condition cond) { + return Assembler::Condition(int(cond) ^ 1); +} + +class BiasedLockingCounters; + +extern "C" void das(uint64_t start, int len); + +#endif // CPU_SW64_VM_ASSEMBLER_SW64_HPP diff --git a/src/hotspot/cpu/sw64/assembler_sw64.inline.hpp b/src/hotspot/cpu/sw64/assembler_sw64.inline.hpp new file mode 100644 index 00000000000..c8dbb843cfc --- /dev/null +++ b/src/hotspot/cpu/sw64/assembler_sw64.inline.hpp @@ -0,0 +1,33 @@ +/* + * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SW64_VM_ASSEMBLER_SW64_INLINE_HPP +#define CPU_SW64_VM_ASSEMBLER_SW64_INLINE_HPP + +#include "asm/assembler.inline.hpp" +#include "asm/codeBuffer.hpp" +#include "code/codeCache.hpp" + +#endif // CPU_SW64_VM_ASSEMBLER_SW64_INLINE_HPP diff --git a/src/hotspot/cpu/sw64/bytes_sw64.hpp b/src/hotspot/cpu/sw64/bytes_sw64.hpp new file mode 100644 index 00000000000..94cb932e13f --- /dev/null +++ b/src/hotspot/cpu/sw64/bytes_sw64.hpp @@ -0,0 +1,131 @@ +/* + * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SW64_VM_BYTES_SW64_HPP +#define CPU_SW64_VM_BYTES_SW64_HPP + +#include "memory/allocation.hpp" + +class Bytes: AllStatic { +public: + // Efficient reading and writing of unaligned unsigned data in platform-specific byte ordering + // (no special code is needed since x86 CPUs can access unaligned data) + static inline u2 get_native_u2(address p) { + if ((intptr_t)p & 0x1) { + return ((u2)p[1] << 8) | (u2)p[0]; + } else { + return *(u2*)p; + } + } + + static inline u4 get_native_u4(address p) { + return *(u4*)p; + } + + static inline u8 get_native_u8(address p) { + return *(u8*)p; + } + + //use mips unaligned load instructions + static inline void put_native_u2(address p, u2 x) { + if((intptr_t)p & 0x1) { + p[0] = (u_char)(x); + p[1] = (u_char)(x>>8); + } else { + *(u2*)p = x; + } + } + + static inline void put_native_u4(address p, u4 x) { + // refer to sparc implementation. + // Note that sparc is big-endian, while mips is little-endian + switch ( intptr_t(p) & 3 ) { + case 0: *(u4*)p = x; + break; + + case 2: ((u2*)p)[1] = x >> 16; + ((u2*)p)[0] = x; + break; + + default: ((u1*)p)[3] = x >> 24; + ((u1*)p)[2] = x >> 16; + ((u1*)p)[1] = x >> 8; + ((u1*)p)[0] = x; + break; + } + } + + static inline void put_native_u8(address p, u8 x) { + // refer to sparc implementation. + // Note that sparc is big-endian, while sw64 is little-endian + switch ( intptr_t(p) & 7 ) { + case 0: *(u8*)p = x; + break; + + case 4: ((u4*)p)[1] = x >> 32; + ((u4*)p)[0] = x; + break; + + case 2: ((u2*)p)[3] = x >> 48; + ((u2*)p)[2] = x >> 32; + ((u2*)p)[1] = x >> 16; + ((u2*)p)[0] = x; + break; + + default: ((u1*)p)[7] = x >> 56; + ((u1*)p)[6] = x >> 48; + ((u1*)p)[5] = x >> 40; + ((u1*)p)[4] = x >> 32; + ((u1*)p)[3] = x >> 24; + ((u1*)p)[2] = x >> 16; + ((u1*)p)[1] = x >> 8; + ((u1*)p)[0] = x; + } + } + + + // Efficient reading and writing of unaligned unsigned data in Java + // byte ordering (i.e. big-endian ordering). Byte-order reversal is + // needed since SW64 CPUs use little-endian format. + static inline u2 get_Java_u2(address p) { return swap_u2(get_native_u2(p)); } + static inline u4 get_Java_u4(address p) { return swap_u4(get_native_u4(p)); } + static inline u8 get_Java_u8(address p) { return swap_u8(get_native_u8(p)); } + + static inline void put_Java_u2(address p, u2 x) { put_native_u2(p, swap_u2(x)); } + static inline void put_Java_u4(address p, u4 x) { put_native_u4(p, swap_u4(x)); } + static inline void put_Java_u8(address p, u8 x) { put_native_u8(p, swap_u8(x)); } + + + // Efficient swapping of byte ordering + static inline u2 swap_u2(u2 x); // compiler-dependent implementation + static inline u4 swap_u4(u4 x); // compiler-dependent implementation + static inline u8 swap_u8(u8 x); +}; + + +//// The following header contains the implementations of swap_u2, swap_u4, and swap_u8[_base] +#include OS_CPU_HEADER(bytes) + +#endif // CPU_SW64_VM_BYTES_SW64_HPP diff --git a/src/hotspot/cpu/sw64/c2_MacroAssembler_sw64.cpp b/src/hotspot/cpu/sw64/c2_MacroAssembler_sw64.cpp new file mode 100644 index 00000000000..a3c3f48902e --- /dev/null +++ b/src/hotspot/cpu/sw64/c2_MacroAssembler_sw64.cpp @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "asm/assembler.inline.hpp" +#include "opto/c2_MacroAssembler.hpp" +#include "opto/intrinsicnode.hpp" +#include "opto/subnode.hpp" + +#ifdef PRODUCT +#define BLOCK_COMMENT(str) /* nothing */ +#define STOP(error) stop(error) +#else +#define BLOCK_COMMENT(str) block_comment(str) +#define STOP(error) block_comment(error); stop(error) +#endif + +#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") + +// Compare strings. +void C2_MacroAssembler::string_compare(Register str1, Register str2, + Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2, + FloatRegister vtmp1, FloatRegister vtmp2, FloatRegister vtmp3, int ae) { + should_not_reach_here("string_compare"); +} + +// Search for str1 in str2 and return index or -1 +void C2_MacroAssembler::string_indexof(Register str2, Register str1, + Register cnt2, Register cnt1, + Register tmp1, Register tmp2, + Register tmp3, Register tmp4, + Register tmp5, Register tmp6, + int icnt1, Register result, int ae) { + should_not_reach_here("string_indexof"); +} + +void C2_MacroAssembler::string_indexof_char(Register str1, Register cnt1, + Register ch, Register result, + Register tmp1, Register tmp2, Register tmp3) +{ + should_not_reach_here("string_indexof_char"); +} + +// This method checks if provided byte array contains byte with highest bit set. +void C2_MacroAssembler::has_negatives(Register ary1, Register len, Register result) { + // a1: byte array + // a2: len + // v0: result + //ShortBranchVerifier sbv(this); + Register tmp1 = rscratch3; + assert_different_registers(ary1, len, result, tmp1); + //assert_different_registers(vec1, vec2); + Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_CHAR, COMPARE_VECTORS, COMPARE_BYTE; + + // len == 0 + //testl(len, len); + jcc(Assembler::zero, FALSE_LABEL, len); + + movwu(result, len); // copy + + // Compare 4-byte vectors + andw(len, 0xfffffffc, len); // vector count (in bytes) + jcc(Assembler::zero, COMPARE_CHAR, len); + + lea(ary1, Address(ary1, len, Address::times_1)); + negptr(len); + + bind(COMPARE_VECTORS); + ldwu(tmp1, Address(ary1, len, Address::times_1)); + andw(tmp1, 0x80808080, tmp1); + jcc(Assembler::notZero, TRUE_LABEL, tmp1); + addptr(len, 4, len); + jcc(Assembler::notZero, COMPARE_VECTORS, len); + + // Compare trailing char (final 2 bytes), if any + bind(COMPARE_CHAR); + testl(result, 0x2); // tail char + jcc(Assembler::zero, COMPARE_BYTE); + load_unsigned_short(tmp1, Address(ary1, 0)); + andw(tmp1, 0x00008080, tmp1); + jcc(Assembler::notZero, TRUE_LABEL, tmp1); + subptr(result, 2, result); + lea(ary1, Address(ary1, 2)); + + bind(COMPARE_BYTE); + testw(result, 0x1); // tail byte + jcc(Assembler::zero, FALSE_LABEL); + load_unsigned_byte(tmp1, Address(ary1, 0)); + andw(tmp1, 0x00000080, tmp1); + jcc(Assembler::notEqual, TRUE_LABEL, tmp1); + jmp(FALSE_LABEL); + + bind(TRUE_LABEL); + mov_immediate32u(result, 1); // return true + jmp(DONE); + + bind(FALSE_LABEL); + mov_immediate32u(result, 0); // return false + + // That's it + bind(DONE); +} \ No newline at end of file diff --git a/src/hotspot/cpu/sw64/c2_MacroAssembler_sw64.hpp b/src/hotspot/cpu/sw64/c2_MacroAssembler_sw64.hpp new file mode 100644 index 00000000000..f74ffef36ea --- /dev/null +++ b/src/hotspot/cpu/sw64/c2_MacroAssembler_sw64.hpp @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SW64_C2_MACROASSEMBLER_SW64_HPP +#define CPU_SW64_C2_MACROASSEMBLER_SW64_HPP + +// C2_MacroAssembler contains high-level macros for C2 + +public: + +void string_compare(Register str1, Register str2, + Register cnt1, Register cnt2, Register result, + Register tmp1, Register tmp2, FloatRegister vtmp1, + FloatRegister vtmp2, FloatRegister vtmp3, int ae); + +void string_indexof(Register str1, Register str2, + Register cnt1, Register cnt2, + Register tmp1, Register tmp2, + Register tmp3, Register tmp4, + Register tmp5, Register tmp6, + int int_cnt1, Register result, int ae); + +void string_indexof_char(Register str1, Register cnt1, + Register ch, Register result, + Register tmp1, Register tmp2, Register tmp3); + +void has_negatives(Register ary1, Register len, Register result); + +#endif // CPU_SW64_C2_MACROASSEMBLER_SW64_HPP diff --git a/src/hotspot/cpu/sw64/c2_globals_sw64.hpp b/src/hotspot/cpu/sw64/c2_globals_sw64.hpp new file mode 100644 index 00000000000..19b737baf0e --- /dev/null +++ b/src/hotspot/cpu/sw64/c2_globals_sw64.hpp @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SW64_VM_C2_GLOBALS_SW64_HPP +#define CPU_SW64_VM_C2_GLOBALS_SW64_HPP + +#include "utilities/globalDefinitions.hpp" +#include "utilities/macros.hpp" + +// Sets the default values for platform dependent flags used by the server compiler. +// (see c2_globals.hpp). Alpha-sorted. + +define_pd_global(bool, BackgroundCompilation, true); +define_pd_global(bool, CICompileOSR, true); +define_pd_global(bool, InlineIntrinsics, true); +define_pd_global(bool, PreferInterpreterNativeStubs, false); +define_pd_global(bool, ProfileTraps, true); +define_pd_global(bool, UseOnStackReplacement, true); +define_pd_global(bool, ProfileInterpreter, true); +define_pd_global(bool, TieredCompilation, COMPILER1_PRESENT(true) NOT_COMPILER1(false)); +define_pd_global(intx, CompileThreshold, 10000); + +define_pd_global(intx, OnStackReplacePercentage, 140); +define_pd_global(intx, ConditionalMoveLimit, 3); +define_pd_global(intx, FLOATPRESSURE, 32); +define_pd_global(intx, FreqInlineSize, 325); +define_pd_global(intx, MinJumpTableSize, 10); +define_pd_global(intx, INTPRESSURE, 22); //TODO check (value 24 will cause compile skiped) +define_pd_global(intx, InteriorEntryAlignment, 16); +define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K)); +define_pd_global(intx, LoopUnrollLimit, 60); +define_pd_global(intx, LoopPercentProfileLimit, 30); +// InitialCodeCacheSize derived from specjbb2000 run. +define_pd_global(intx, InitialCodeCacheSize, 2496*K); // Integral multiple of CodeCacheExpansionSize +define_pd_global(intx, CodeCacheExpansionSize, 64*K); + +// Ergonomics related flags +define_pd_global(uint64_t,MaxRAM, 128ULL*G); +define_pd_global(intx, RegisterCostAreaRatio, 16000); + +// Peephole and CISC spilling both break the graph, and so makes the +// scheduler sick. +define_pd_global(bool, OptoPeephole, false); +define_pd_global(bool, UseCISCSpill, false); +define_pd_global(bool, OptoScheduling, false); +define_pd_global(bool, OptoBundling, false); +define_pd_global(bool, OptoRegScheduling, false); +define_pd_global(bool, SuperWordLoopUnrollAnalysis, true); +define_pd_global(bool, IdealizeClearArrayNode, true); + +define_pd_global(intx, ReservedCodeCacheSize, 248*M); +define_pd_global(intx, NonProfiledCodeHeapSize, 21*M); +define_pd_global(intx, ProfiledCodeHeapSize, 22*M); +define_pd_global(intx, NonNMethodCodeHeapSize, 5*M ); +define_pd_global(uintx, CodeCacheMinBlockLength, 6); +define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K); + +// Ergonomics related flags +define_pd_global(bool, NeverActAsServerClassMachine, false); + +define_pd_global(bool, TrapBasedRangeChecks, false); // Not needed. + +#endif // CPU_SW64_VM_C2_GLOBALS_SW64_HPP diff --git a/src/hotspot/cpu/sw64/c2_init_sw64.cpp b/src/hotspot/cpu/sw64/c2_init_sw64.cpp new file mode 100644 index 00000000000..9ec3a2e7f3a --- /dev/null +++ b/src/hotspot/cpu/sw64/c2_init_sw64.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "opto/compile.hpp" +#include "opto/node.hpp" +#include "runtime/vm_version.hpp" + +// processor dependent initialization for i486 + +void Compile::pd_compiler2_init() { + guarantee(CodeEntryAlignment >= InteriorEntryAlignment, "" ); + // QQQ presumably all 64bit cpu's support this. Seems like the ifdef could + // simply be left out. +} diff --git a/src/hotspot/cpu/sw64/c2_safepointPollStubTable_sw64.cpp b/src/hotspot/cpu/sw64/c2_safepointPollStubTable_sw64.cpp new file mode 100644 index 00000000000..ec90a00a193 --- /dev/null +++ b/src/hotspot/cpu/sw64/c2_safepointPollStubTable_sw64.cpp @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "opto/compile.hpp" +#include "opto/node.hpp" +#include "opto/output.hpp" +#include "runtime/sharedRuntime.hpp" + +#define __ masm. +void C2SafepointPollStubTable::emit_stub_impl(MacroAssembler& masm, C2SafepointPollStub* entry) const { + assert(SharedRuntime::polling_page_return_handler_blob() != NULL, + "polling page return stub not created yet"); + address stub = SharedRuntime::polling_page_return_handler_blob()->entry_point(); + + RuntimeAddress callback_addr(stub); + + __ bind(entry->_stub_label); + InternalAddress safepoint_pc(masm.pc() - masm.offset() + entry->_safepoint_offset); + + __ lea(rscratch1, safepoint_pc); + __ stptr(rscratch1, Address(rthread, JavaThread::saved_exception_pc_offset())); + + __ jump(callback_addr); +} +#undef __ diff --git a/src/hotspot/cpu/sw64/cas.m4 b/src/hotspot/cpu/sw64/cas.m4 new file mode 100644 index 00000000000..2f7b1ff9ee5 --- /dev/null +++ b/src/hotspot/cpu/sw64/cas.m4 @@ -0,0 +1,142 @@ +dnl Copyright (c) 2016, Red Hat Inc. All rights reserved. +dnl DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +dnl +dnl This code is free software; you can redistribute it and/or modify it +dnl under the terms of the GNU General Public License version 2 only, as +dnl published by the Free Software Foundation. +dnl +dnl This code is distributed in the hope that it will be useful, but WITHOUT +dnl ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +dnl FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl version 2 for more details (a copy is included in the LICENSE file that +dnl accompanied this code). +dnl +dnl You should have received a copy of the GNU General Public License version +dnl 2 along with this work; if not, write to the Free Software Foundation, +dnl Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +dnl +dnl Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +dnl or visit www.oracle.com if you need additional information or have any +dnl questions. +dnl +dnl +dnl Process this file with m4 cas.m4 to generate the CAE and wCAS +dnl instructions used in sw64.ad. +dnl + +// BEGIN This section of the file is automatically generated. Do not edit -------------- + +// Sundry CAS operations. Note that release is always true, +// regardless of the memory ordering of the CAS. This is because we +// need the volatile case to be sequentially consistent but there is +// no trailing StoreLoad barrier emitted by C2. Unfortunately we +// can't check the type of memory ordering here, so we always emit a +// STLXR. + +// This section is generated from sw64_ad_cas.m4 + + +define(`CAS_INSN', +` +instruct compareAndExchange$1$5(iReg$2NoSp res, indirect mem, iReg$2 oldval, iReg$2 newval, rFlagsReg cr) %{ + match(Set res (CompareAndExchange$1 mem (Binary oldval newval))); + ifelse($5,Acq,' predicate(needs_acquiring_load_exclusive(n)); + ins_cost(VOLATILE_REF_COST);`,' ins_cost(2 * VOLATILE_REF_COST);`) + effect(TEMP_DEF res, KILL cr); + format %{ + "cmpxchg $res = $mem, $oldval, $newval\t# ($3, weak) if $mem == $oldval then $mem <-- $newval" + %} + ins_encode %{ + __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, + Assembler::$4, /*acquire*/ ifelse($5,Acq,true,false), /*release*/ true, + /*weak*/ false, $res$$Register); + %} + ins_pipe(pipe_slow); +%}')dnl +define(`CAS_INSN4', +` +instruct compareAndExchange$1$7(iReg$2NoSp res, indirect mem, iReg$2 oldval, iReg$2 newval, rFlagsReg cr) %{ + match(Set res (CompareAndExchange$1 mem (Binary oldval newval))); + ifelse($7,Acq,' predicate(needs_acquiring_load_exclusive(n)); + ins_cost(VOLATILE_REF_COST);`,' ins_cost(2 * VOLATILE_REF_COST);`) + effect(TEMP_DEF res, KILL cr); + format %{ + "cmpxchg $res = $mem, $oldval, $newval\t# ($3, weak) if $mem == $oldval then $mem <-- $newval" + %} + ins_encode %{ + __ $5(rscratch2, $oldval$$Register); + __ cmpxchg($mem$$Register, rscratch2, $newval$$Register, + Assembler::$4, /*acquire*/ ifelse($5,Acq,true,false), /*release*/ true, + /*weak*/ false, $res$$Register); + __ $6($res$$Register, $res$$Register); + %} + ins_pipe(pipe_slow); +%}')dnl +CAS_INSN4(B,I,byte,byte,uxtbw,sxtbw) +CAS_INSN4(S,I,short,halfword,uxthw,sxthw) +CAS_INSN(I,I,int,word) +CAS_INSN(L,L,long,xword) +CAS_INSN(N,N,narrow oop,word) +CAS_INSN(P,P,ptr,xword) +dnl +dnl CAS_INSN4(B,I,byte,byte,uxtbw,sxtbw,Acq) +dnl CAS_INSN4(S,I,short,halfword,uxthw,sxthw,Acq) +dnl CAS_INSN(I,I,int,word,Acq) +dnl CAS_INSN(L,L,long,xword,Acq) +dnl CAS_INSN(N,N,narrow oop,word,Acq) +dnl CAS_INSN(P,P,ptr,xword,Acq) +dnl +define(`CAS_INSN2', +` +instruct weakCompareAndSwap$1$6(iRegINoSp res, indirect mem, iReg$2 oldval, iReg$2 newval, rFlagsReg cr) %{ + match(Set res (WeakCompareAndSwap$1 mem (Binary oldval newval))); + ifelse($6,Acq,' predicate(needs_acquiring_load_exclusive(n)); + ins_cost(VOLATILE_REF_COST);`,' ins_cost(2 * VOLATILE_REF_COST);`) + effect(KILL cr); + format %{ + "cmpxchg $res = $mem, $oldval, $newval\t# ($3, weak) if $mem == $oldval then $mem <-- $newval" + "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)" + %} + ins_encode %{ + __ uxt$5(rscratch2, $oldval$$Register); + __ cmpxchg($mem$$Register, rscratch2, $newval$$Register, + Assembler::$4, /*acquire*/ ifelse($6,Acq,true,false), /*release*/ true, + /*weak*/ true, noreg); + __ csetw($res$$Register, Assembler::EQ); + %} + ins_pipe(pipe_slow); +%}')dnl +define(`CAS_INSN3', +` +instruct weakCompareAndSwap$1$5(iRegINoSp res, indirect mem, iReg$2 oldval, iReg$2 newval, rFlagsReg cr) %{ + match(Set res (WeakCompareAndSwap$1 mem (Binary oldval newval))); + ifelse($5,Acq,' predicate(needs_acquiring_load_exclusive(n)); + ins_cost(VOLATILE_REF_COST);`,' ins_cost(2 * VOLATILE_REF_COST);`) + effect(KILL cr); + format %{ + "cmpxchg $res = $mem, $oldval, $newval\t# ($3, weak) if $mem == $oldval then $mem <-- $newval" + "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)" + %} + ins_encode %{ + __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, + Assembler::$4, /*acquire*/ ifelse($5,Acq,true,false), /*release*/ true, + /*weak*/ true, noreg); + __ csetw($res$$Register, Assembler::EQ); + %} + ins_pipe(pipe_slow); +%}')dnl +CAS_INSN2(B,I,byte,byte,bw) +CAS_INSN2(S,I,short,halfword,hw) +CAS_INSN3(I,I,int,word) +CAS_INSN3(L,L,long,xword) +CAS_INSN3(N,N,narrow oop,word) +CAS_INSN3(P,P,ptr,xword) +dnl CAS_INSN2(B,I,byte,byte,bw,Acq) +dnl CAS_INSN2(S,I,short,halfword,hw,Acq) +dnl CAS_INSN3(I,I,int,word,Acq) +dnl CAS_INSN3(L,L,long,xword,Acq) +dnl CAS_INSN3(N,N,narrow oop,word,Acq) +dnl CAS_INSN3(P,P,ptr,xword,Acq) +dnl + +// END This section of the file is automatically generated. Do not edit -------------- diff --git a/src/hotspot/cpu/sw64/codeBuffer_sw64.hpp b/src/hotspot/cpu/sw64/codeBuffer_sw64.hpp new file mode 100644 index 00000000000..0afd63cca74 --- /dev/null +++ b/src/hotspot/cpu/sw64/codeBuffer_sw64.hpp @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SW64_VM_CODEBUFFER_SW64_HPP +#define CPU_SW64_VM_CODEBUFFER_SW64_HPP + +private: + void pd_initialize() {} + +public: + void flush_bundle(bool start_new_bundle) {} + +#endif // CPU_SW64_VM_CODEBUFFER_SW64_HPP diff --git a/src/hotspot/cpu/sw64/compiledIC_sw64.cpp b/src/hotspot/cpu/sw64/compiledIC_sw64.cpp new file mode 100644 index 00000000000..878b40f7785 --- /dev/null +++ b/src/hotspot/cpu/sw64/compiledIC_sw64.cpp @@ -0,0 +1,154 @@ +/* + * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2018, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "code/codeCache.hpp" +#include "code/compiledIC.hpp" +#include "code/icBuffer.hpp" +#include "code/nmethod.hpp" +#include "memory/resourceArea.hpp" +#include "runtime/mutexLocker.hpp" +#include "runtime/safepoint.hpp" + +// ---------------------------------------------------------------------------- + +#define __ _masm. +address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark) { + // Stub is fixed up when the corresponding call is converted from + // calling compiled code to calling interpreted code. + // movq rbx, 0 + // jmp -5 # to self + Register rbx = rmethod; + + if (mark == NULL) { + mark = cbuf.insts_mark(); // Get mark within main instrs section. + } + + // Note that the code buffer's insts_mark is always relative to insts. + // That's why we must use the macroassembler to generate a stub. + MacroAssembler _masm(&cbuf); + //__ stop("emit_to_interp_stub :not check jzy"); + address base = __ start_a_stub(to_interp_stub_size()); + if (base == NULL) { + return NULL; // CodeBuffer::expand failed. + } + // Static stub relocation stores the instruction address of the call. + __ relocate(static_stub_Relocation::spec(mark)); + // Static stub relocation also tags the Method* in the code-stream. + //__ movl(rbx, R0); // Method is zapped till fixup time. + __ prepare_patch_li48(rbx, 0); + + // This is recognized as unresolved by relocs/nativeinst/ic code. + __ relocate(relocInfo::runtime_call_type); + cbuf.set_insts_mark(); + address call_pc = (address)-1; + __ patchable_jump(call_pc); + __ align(16); + + assert(__ pc() - base <= to_interp_stub_size(), "wrong stub size"); + + // Update current stubs pointer and restore insts_end. + __ end_a_stub(); + return base; +} +#undef __ + +int CompiledStaticCall::to_interp_stub_size() {//TODO:check jzy + int size = 4 * 4 + NativeCall::instruction_size; // sizeof(prepare_patch_li48) + NativeCall::instruction_size + return round_to(size, 16); +} + +int CompiledStaticCall::to_trampoline_stub_size() {//Unimplemented(); + // x86 doesn't use trampolines. + return 0; +} + +// Relocation entries for call stub, compiled java to interpreter. +int CompiledStaticCall::reloc_to_interp_stub() {//Unimplemented(); + return 16; // todo:not check jzy +} + +void CompiledDirectStaticCall::set_to_interpreted(const methodHandle& callee, address entry) { + address stub = find_stub(); + guarantee(stub != NULL, "stub not found"); + if (TraceICs) { + ResourceMark rm; + tty->print_cr("CompiledDirectStaticCall@" INTPTR_FORMAT ": set_to_interpreted %s", + p2i(instruction_address()), + callee->name_and_sig_as_C_string()); + } + + // Creation also verifies the object. + NativeMovConstReg* method_holder = nativeMovConstReg_at(stub); + NativeJump* jump = nativeJump_at(method_holder->next_instruction_address()); + verify_mt_safe(callee, entry, method_holder, jump); + + // Update stub. + method_holder->set_data((intptr_t)callee()); + jump->set_jump_destination(entry); + + // Update jump to call. + set_destination_mt_safe(stub); +} + +void CompiledDirectStaticCall::set_stub_to_clean(static_stub_Relocation* static_stub) { + assert(CompiledICLocker::is_safe(static_stub->addr()), "mt unsafe call"); + // Reset stub. + address stub = static_stub->addr(); + assert(stub != NULL, "stub not found"); + // Creation also verifies the object. + NativeMovConstReg* method_holder = nativeMovConstReg_at(stub); + method_holder->set_data(0); + NativeJump* jump = nativeJump_at(method_holder->next_instruction_address()); + jump->set_jump_destination((address)-1); +} + + +//----------------------------------------------------------------------------- +// Non-product mode code +#ifndef PRODUCT + +void CompiledDirectStaticCall::verify() { + // Verify call. + _call->verify(); + _call->verify_alignment(); + +#ifdef ASSERT + CodeBlob *cb = CodeCache::find_blob_unsafe((address) _call); + assert(cb != NULL, "sanity"); +#endif + + // Verify stub. + address stub = find_stub(); + assert(stub != NULL, "no stub found for static call"); + // Creation also verifies the object. + NativeMovConstReg* method_holder = nativeMovConstReg_at(stub); + NativeJump* jump = nativeJump_at(method_holder->next_instruction_address()); + + // Verify state. + assert(is_clean() || is_call_to_compiled() || is_call_to_interpreted(), "sanity check"); +} +#endif // !PRODUCT diff --git a/src/hotspot/cpu/sw64/copy_sw64.hpp b/src/hotspot/cpu/sw64/copy_sw64.hpp new file mode 100644 index 00000000000..fa140fdf2a7 --- /dev/null +++ b/src/hotspot/cpu/sw64/copy_sw64.hpp @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SW64_VM_COPY_SW64_HPP +#define CPU_SW64_VM_COPY_SW64_HPP + +#include OS_CPU_HEADER(copy) + +static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) { + julong* to = (julong*) tohw; + julong v = ((julong) value << 32) | value; + while (count-- > 0) { + *to++ = v; + } +} + +static void pd_fill_to_aligned_words(HeapWord* tohw, size_t count, juint value) { + pd_fill_to_words(tohw, count, value); +} + +static void pd_fill_to_bytes(void* to, size_t count, jubyte value) { + (void)memset(to, value, count); +} + +static void pd_zero_to_words(HeapWord* tohw, size_t count) { + pd_fill_to_words(tohw, count, 0); +} + +static void pd_zero_to_bytes(void* to, size_t count) { + (void)memset(to, 0, count); +} + +#endif // CPU_SW64_VM_COPY_SW64_HPP diff --git a/src/hotspot/cpu/sw64/depChecker_sw64.hpp b/src/hotspot/cpu/sw64/depChecker_sw64.hpp new file mode 100644 index 00000000000..c0afd0ba30d --- /dev/null +++ b/src/hotspot/cpu/sw64/depChecker_sw64.hpp @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SW64_VM_DEPCHECKER_SW64_HPP +#define CPU_SW64_VM_DEPCHECKER_SW64_HPP + +// Nothing to do on sw64 + +#endif // CPU_SW64_VM_DEPCHECKER_SW64_HPP diff --git a/src/hotspot/cpu/sw64/disassembler_sw64.hpp b/src/hotspot/cpu/sw64/disassembler_sw64.hpp new file mode 100644 index 00000000000..65e646ed81a --- /dev/null +++ b/src/hotspot/cpu/sw64/disassembler_sw64.hpp @@ -0,0 +1,57 @@ +/* + * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SW64_VM_DISASSEMBLER_SW64_HPP +#define CPU_SW64_VM_DISASSEMBLER_SW64_HPP + + static int pd_instruction_alignment() { + return sizeof(int); + } + + static const char* pd_cpu_opts() { + return "sw64only"; + } + + // Returns address of n-th instruction preceding addr, + // NULL if no preceding instruction can be found. + // On CISC architectures, it is difficult to impossible to step + // backwards in the instruction stream. Therefore just return NULL. + // It might be beneficial to check "is_readable" as we do on ppc and s390. + static address find_prev_instr(address addr, int n_instr) { + return NULL; + } + + // special-case instruction decoding. + // There may be cases where the binutils disassembler doesn't do + // the perfect job. In those cases, decode_instruction0 may kick in + // and do it right. + // If nothing had to be done, just return "here", otherwise return "here + instr_len(here)" + static address decode_instruction0(address here, outputStream* st, address virtual_begin = NULL) { + return here; + } + + // platform-specific instruction annotations (like value of loaded constants) + static void annotate(address pc, outputStream* st) { }; +#endif // CPU_SW64_VM_DISASSEMBLER_SW64_HPP diff --git a/src/hotspot/cpu/sw64/foreign_globals_sw64.cpp b/src/hotspot/cpu/sw64/foreign_globals_sw64.cpp new file mode 100644 index 00000000000..c230816044d --- /dev/null +++ b/src/hotspot/cpu/sw64/foreign_globals_sw64.cpp @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +#include "precompiled.hpp" +#include "runtime/jniHandles.hpp" +#include "runtime/jniHandles.inline.hpp" +#include "oops/typeArrayOop.inline.hpp" +#include "prims/foreign_globals.hpp" +#include "prims/foreign_globals.inline.hpp" + +bool ABIDescriptor::is_volatile_reg(Register reg) const { + return _integer_argument_registers.contains(reg) + || _integer_additional_volatile_registers.contains(reg); +} + +bool ABIDescriptor::is_volatile_reg(FloatRegister reg) const { + return _vector_argument_registers.contains(reg) + || _vector_additional_volatile_registers.contains(reg); +} + +#define INTEGER_TYPE 0 +#define VECTOR_TYPE 1 + +const ABIDescriptor ForeignGlobals::parse_abi_descriptor_impl(jobject jabi) const { + oop abi_oop = JNIHandles::resolve_non_null(jabi); + ABIDescriptor abi; + + objArrayOop inputStorage = cast(abi_oop->obj_field(ABI.inputStorage_offset)); + loadArray(inputStorage, INTEGER_TYPE, abi._integer_argument_registers, as_Register); + loadArray(inputStorage, VECTOR_TYPE, abi._vector_argument_registers, as_FloatRegister); + + objArrayOop outputStorage = cast(abi_oop->obj_field(ABI.outputStorage_offset)); + loadArray(outputStorage, INTEGER_TYPE, abi._integer_return_registers, as_Register); + loadArray(outputStorage, VECTOR_TYPE, abi._vector_return_registers, as_FloatRegister); + + objArrayOop volatileStorage = cast(abi_oop->obj_field(ABI.volatileStorage_offset)); + loadArray(volatileStorage, INTEGER_TYPE, abi._integer_additional_volatile_registers, as_Register); + loadArray(volatileStorage, VECTOR_TYPE, abi._vector_additional_volatile_registers, as_FloatRegister); + + abi._stack_alignment_bytes = abi_oop->int_field(ABI.stackAlignment_offset); + abi._shadow_space_bytes = abi_oop->int_field(ABI.shadowSpace_offset); + + return abi; +} + +const BufferLayout ForeignGlobals::parse_buffer_layout_impl(jobject jlayout) const { + oop layout_oop = JNIHandles::resolve_non_null(jlayout); + BufferLayout layout; + + layout.stack_args_bytes = layout_oop->long_field(BL.stack_args_bytes_offset); + layout.stack_args = layout_oop->long_field(BL.stack_args_offset); + layout.arguments_next_pc = layout_oop->long_field(BL.arguments_next_pc_offset); + + typeArrayOop input_offsets = cast(layout_oop->obj_field(BL.input_type_offsets_offset)); + layout.arguments_integer = (size_t) input_offsets->long_at(INTEGER_TYPE); + layout.arguments_vector = (size_t) input_offsets->long_at(VECTOR_TYPE); + + typeArrayOop output_offsets = cast(layout_oop->obj_field(BL.output_type_offsets_offset)); + layout.returns_integer = (size_t) output_offsets->long_at(INTEGER_TYPE); + layout.returns_vector = (size_t) output_offsets->long_at(VECTOR_TYPE); + + layout.buffer_size = layout_oop->long_field(BL.size_offset); + + return layout; +} + +const CallRegs ForeignGlobals::parse_call_regs_impl(jobject jconv) const { + oop conv_oop = JNIHandles::resolve_non_null(jconv); + objArrayOop arg_regs_oop = cast(conv_oop->obj_field(CallConvOffsets.arg_regs_offset)); + objArrayOop ret_regs_oop = cast(conv_oop->obj_field(CallConvOffsets.ret_regs_offset)); + + CallRegs result; + result._args_length = arg_regs_oop->length(); + result._arg_regs = NEW_RESOURCE_ARRAY(VMReg, result._args_length); + + result._rets_length = ret_regs_oop->length(); + result._ret_regs = NEW_RESOURCE_ARRAY(VMReg, result._rets_length); + + for (int i = 0; i < result._args_length; i++) { + oop storage = arg_regs_oop->obj_at(i); + jint index = storage->int_field(VMS.index_offset); + jint type = storage->int_field(VMS.type_offset); + result._arg_regs[i] = VMRegImpl::vmStorageToVMReg(type, index); + } + + for (int i = 0; i < result._rets_length; i++) { + oop storage = ret_regs_oop->obj_at(i); + jint index = storage->int_field(VMS.index_offset); + jint type = storage->int_field(VMS.type_offset); + result._ret_regs[i] = VMRegImpl::vmStorageToVMReg(type, index); + } + + return result; +} \ No newline at end of file diff --git a/src/hotspot/cpu/sw64/foreign_globals_sw64.hpp b/src/hotspot/cpu/sw64/foreign_globals_sw64.hpp new file mode 100644 index 00000000000..ed7bbe9cf62 --- /dev/null +++ b/src/hotspot/cpu/sw64/foreign_globals_sw64.hpp @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +#ifndef CPU_SW64_VM_FOREIGN_GLOBALS_SW64_HPP +#define CPU_SW64_VM_FOREIGN_GLOBALS_SW64_HPP + +#include "asm/macroAssembler.hpp" +#include "utilities/growableArray.hpp" + +constexpr size_t float_reg_size = 16; // bytes + +struct ABIDescriptor { + GrowableArray _integer_argument_registers; + GrowableArray _integer_return_registers; + GrowableArray _vector_argument_registers; + GrowableArray _vector_return_registers; + + GrowableArray _integer_additional_volatile_registers; + GrowableArray _vector_additional_volatile_registers; + + int32_t _stack_alignment_bytes; + int32_t _shadow_space_bytes; + + bool is_volatile_reg(Register reg) const; + bool is_volatile_reg(FloatRegister reg) const; +}; + +struct BufferLayout { + size_t stack_args_bytes; + size_t stack_args; + size_t arguments_vector; + size_t arguments_integer; + size_t arguments_next_pc; + size_t returns_vector; + size_t returns_integer; + size_t buffer_size; +}; + +#endif // CPU_SW64_VM_FOREIGN_GLOBALS_SW64_HPP diff --git a/src/hotspot/cpu/sw64/frame_sw64.cpp b/src/hotspot/cpu/sw64/frame_sw64.cpp new file mode 100644 index 00000000000..703e8f7c8ec --- /dev/null +++ b/src/hotspot/cpu/sw64/frame_sw64.cpp @@ -0,0 +1,836 @@ +/* + * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "compiler/oopMap.hpp" +#include "interpreter/interpreter.hpp" +#include "memory/resourceArea.hpp" +#include "memory/universe.hpp" +#include "oops/markWord.hpp" +#include "oops/method.hpp" +#include "oops/oop.inline.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/handles.inline.hpp" +#include "runtime/javaCalls.hpp" +#include "runtime/monitorChunk.hpp" +#include "runtime/signature.hpp" +#include "runtime/stackWatermarkSet.hpp" +#include "runtime/stubCodeGenerator.hpp" +#include "runtime/stubRoutines.hpp" +#include "vmreg_sw64.inline.hpp" +#include "utilities/formatBuffer.hpp" + +#ifdef COMPILER1 +#include "c1/c1_Runtime1.hpp" +#include "runtime/vframeArray.hpp" +#endif + +#ifdef ASSERT +void RegisterMap::check_location_valid() { +} +#endif + +// Profiling/safepoint support + +bool frame::safe_for_sender(JavaThread *thread) { + address sp = (address)_sp; + address fp = (address)_fp; + address unextended_sp = (address)_unextended_sp; + + // consider stack guards when trying to determine "safe" stack pointers + // sp must be within the usable part of the stack (not in guards) + if (!thread->is_in_usable_stack(sp)) { + return false; + } + + // unextended sp must be within the stack and above or equal sp + if (!thread->is_in_stack_range_incl(unextended_sp, sp)) { + return false; + } + + // an fp must be within the stack and above (but not equal) sp + // second evaluation on fp+ is added to handle situation where fp is -1 + bool fp_safe = ((fp!=NULL) && thread->is_in_stack_range_excl(fp, sp) && + thread->is_in_full_stack_checked(fp + (return_addr_offset * sizeof(void*)))); //TODO 20240722 + + // We know sp/unextended_sp are safe only fp is questionable here + + // If the current frame is known to the code cache then we can attempt to + // construct the sender and do some validation of it. This goes a long way + // toward eliminating issues when we get in frame construction code + + if (_cb != NULL ) { + + // First check if frame is complete and tester is reliable + // Unfortunately we can only check frame complete for runtime stubs and nmethod + // other generic buffer blobs are more problematic so we just assume they are + // ok. adapter blobs never have a frame complete and are never ok. + + if (!_cb->is_frame_complete_at(_pc)) { + if (_cb->is_compiled() || _cb->is_adapter_blob() || _cb->is_runtime_stub()) { + return false; + } + } + + // Could just be some random pointer within the codeBlob + if (!_cb->code_contains(_pc)) { + return false; + } + + // Entry frame checks + if (is_entry_frame()) { + // an entry frame must have a valid fp. + return fp_safe && is_entry_frame_valid(thread); + } else if (is_optimized_entry_frame()) { + return fp_safe; + } + + intptr_t* sender_sp = NULL; + intptr_t* sender_unextended_sp = NULL; + address sender_pc = NULL; + intptr_t* saved_fp = NULL; + + if (is_interpreted_frame()) { + // fp must be safe + if (!fp_safe) { + return false; + } + + sender_pc = (address) this->fp()[return_addr_offset]; + // for interpreted frames, the value below is the sender "raw" sp, + // which can be different from the sender unextended sp (the sp seen + // by the sender) because of current frame local variables + sender_sp = (intptr_t*) addr_at(sender_sp_offset); + sender_unextended_sp = (intptr_t*) this->fp()[interpreter_frame_sender_sp_offset]; + saved_fp = (intptr_t*) this->fp()[link_offset]; + + } else { + // must be some sort of compiled/runtime frame + // fp does not have to be safe (although it could be check for c1?) + + // check for a valid frame_size, otherwise we are unlikely to get a valid sender_pc + if (_cb->frame_size() <= 0) { + return false; +} + + sender_sp = _unextended_sp + _cb->frame_size(); + // Is sender_sp safe? + if (!thread->is_in_full_stack_checked((address)sender_sp)) { + return false; + } + sender_unextended_sp = sender_sp; + // On Intel the return_address is always the word on the stack + sender_pc = (address) *(sender_sp-1); + // Note: frame::sender_sp_offset is only valid for compiled frame + saved_fp = (intptr_t*) *(sender_sp - frame::sender_sp_offset); + } + + + // If the potential sender is the interpreter then we can do some more checking + if (Interpreter::contains(sender_pc)) { + + // ebp is always saved in a recognizable place in any code we generate. However + // only if the sender is interpreted/call_stub (c1 too?) are we certain that the saved ebp + // is really a frame pointer. + + bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp); + + if (!thread->is_in_stack_range_excl((address)saved_fp, (address)sender_sp)) { + return false; + } + + // construct the potential sender + + frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc); + + return sender.is_interpreted_frame_valid(thread); + + } + + // We must always be able to find a recognizable pc + CodeBlob* sender_blob = CodeCache::find_blob_unsafe(sender_pc); + if (sender_pc == NULL || sender_blob == NULL) { + return false; + } + + // Could be a zombie method + if (sender_blob->is_zombie() || sender_blob->is_unloaded()) { + return false; + } + + // Could just be some random pointer within the codeBlob + if (!sender_blob->code_contains(sender_pc)) { + return false; + } + + // We should never be able to see an adapter if the current frame is something from code cache + if (sender_blob->is_adapter_blob()) { + return false; + } + + // Could be the call_stub + if (StubRoutines::returns_to_call_stub(sender_pc)) { + bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp); + + if (!saved_fp_safe) { + return false; + } + + // construct the potential sender + + frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc); + + // Validate the JavaCallWrapper an entry frame must have + address jcw = (address)sender.entry_frame_call_wrapper(); + + return thread->is_in_stack_range_excl(jcw, (address)sender.fp()); + } else if (sender_blob->is_optimized_entry_blob()) { + return false; + } + + CompiledMethod* nm = sender_blob->as_compiled_method_or_null(); + if (nm != NULL) { + if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc) || + nm->method()->is_method_handle_intrinsic()) { + return false; + } + } + + // If the frame size is 0 something (or less) is bad because every nmethod has a non-zero frame size + // because the return address counts against the callee's frame. + + if (sender_blob->frame_size() <= 0) { + assert(!sender_blob->is_compiled(), "should count return address at least"); + return false; + } + + // We should never be able to see anything here except an nmethod. If something in the + // code cache (current frame) is called by an entity within the code cache that entity + // should not be anything but the call stub (already covered), the interpreter (already covered) + // or an nmethod. + + if (!sender_blob->is_compiled()) { + return false; + } + + // Could put some more validation for the potential non-interpreted sender + // frame we'd create by calling sender if I could think of any. Wait for next crash in forte... + + // One idea is seeing if the sender_pc we have is one that we'd expect to call to current cb + + // We've validated the potential sender that would be created + return true; + } + + // Must be native-compiled frame. Since sender will try and use fp to find + // linkages it must be safe + + if (!fp_safe) { + return false; + } + + // Will the pc we fetch be non-zero (which we'll find at the oldest frame) + + if ( (address) this->fp()[return_addr_offset] == NULL) return false; + + + // could try and do some more potential verification of native frame if we could think of some... + + return true; + +} + + +void frame::patch_pc(Thread* thread, address pc) { + assert(_cb == CodeCache::find_blob(pc), "unexpected pc"); + address* pc_addr = &(((address*) sp())[-1]); + if (TracePcPatching) { + tty->print_cr("patch_pc at address " INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "]", + p2i(pc_addr), p2i(*pc_addr), p2i(pc)); + } + // Either the return address is the original one or we are going to + // patch in the same address that's already there. + assert(_pc == *pc_addr || pc == *pc_addr, "must be"); + *pc_addr = pc; + address original_pc = CompiledMethod::get_deopt_original_pc(this); + if (original_pc != NULL) { + assert(original_pc == _pc, "expected original PC to be stored before patching"); + _deopt_state = is_deoptimized; + // leave _pc as is + } else { + _deopt_state = not_deoptimized; + _pc = pc; + } +} + +bool frame::is_interpreted_frame() const { + return Interpreter::contains(pc()); +} + +int frame::frame_size(RegisterMap* map) const { + frame sender = this->sender(map); + return sender.sp() - sp(); +} + +intptr_t* frame::entry_frame_argument_at(int offset) const { + // convert offset to index to deal with tsi + int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize); + // Entry frame's arguments are always in relation to unextended_sp() + return &unextended_sp()[index]; +} + +// sender_sp + +intptr_t* frame::interpreter_frame_sender_sp() const { + assert(is_interpreted_frame(), "interpreted frame expected"); + return (intptr_t*) at(interpreter_frame_sender_sp_offset); +} + +void frame::set_interpreter_frame_sender_sp(intptr_t* sender_sp) { + assert(is_interpreted_frame(), "interpreted frame expected"); + ptr_at_put(interpreter_frame_sender_sp_offset, (intptr_t) sender_sp); +} + + +// monitor elements + +BasicObjectLock* frame::interpreter_frame_monitor_begin() const { + return (BasicObjectLock*) addr_at(interpreter_frame_monitor_block_bottom_offset); +} + +BasicObjectLock* frame::interpreter_frame_monitor_end() const { + BasicObjectLock* result = (BasicObjectLock*) *addr_at(interpreter_frame_monitor_block_top_offset); + // make sure the pointer points inside the frame + assert(sp() <= (intptr_t*) result, "monitor end should be above the stack pointer"); + assert((intptr_t*) result < fp(), "monitor end should be strictly below the frame pointer"); + return result; +} + +void frame::interpreter_frame_set_monitor_end(BasicObjectLock* value) { + *((BasicObjectLock**)addr_at(interpreter_frame_monitor_block_top_offset)) = value; +} + +// Used by template based interpreter deoptimization +void frame::interpreter_frame_set_last_sp(intptr_t* sp) { + *((intptr_t**)addr_at(interpreter_frame_last_sp_offset)) = sp; +} + +frame frame::sender_for_entry_frame(RegisterMap* map) const { + assert(map != NULL, "map must be set"); + // Java frame called from C; skip all C frames and return top C + // frame of that chunk as the sender + JavaFrameAnchor* jfa = entry_frame_call_wrapper()->anchor(); + assert(!entry_frame_is_first(), "next Java fp must be non zero"); + assert(jfa->last_Java_sp() > sp(), "must be above this frame on stack"); + // Since we are walking the stack now this nested anchor is obviously walkable + // even if it wasn't when it was stacked. + jfa->make_walkable(); + map->clear(); + assert(map->include_argument_oops(), "should be set by clear"); + frame fr(jfa->last_Java_sp(), jfa->last_Java_fp(), jfa->last_Java_pc()); + + return fr; +} + +OptimizedEntryBlob::FrameData* OptimizedEntryBlob::frame_data_for_frame(const frame& frame) const { + assert(frame.is_optimized_entry_frame(), "wrong frame"); + // need unextended_sp here, since normal sp is wrong for interpreter callees + return reinterpret_cast( + reinterpret_cast(frame.unextended_sp()) + in_bytes(_frame_data_offset)); +} + +bool frame::optimized_entry_frame_is_first() const { + assert(is_optimized_entry_frame(), "must be optimzed entry frame"); + OptimizedEntryBlob* blob = _cb->as_optimized_entry_blob(); + JavaFrameAnchor* jfa = blob->jfa_for_frame(*this); + return jfa->last_Java_sp() == NULL; +} + +frame frame::sender_for_optimized_entry_frame(RegisterMap* map) const { + assert(map != NULL, "map must be set"); + OptimizedEntryBlob* blob = _cb->as_optimized_entry_blob(); + // Java frame called from C; skip all C frames and return top C + // frame of that chunk as the sender + JavaFrameAnchor* jfa = blob->jfa_for_frame(*this); + assert(!optimized_entry_frame_is_first(), "must have a frame anchor to go back to"); + assert(jfa->last_Java_sp() > sp(), "must be above this frame on stack"); + // Since we are walking the stack now this nested anchor is obviously walkable + // even if it wasn't when it was stacked. + jfa->make_walkable(); + map->clear(); + assert(map->include_argument_oops(), "should be set by clear"); + vmassert(jfa->last_Java_pc() != NULL, "not walkable"); + frame fr(jfa->last_Java_sp(), jfa->last_Java_fp(), jfa->last_Java_pc()); + + return fr; +} + + +//------------------------------------------------------------------------------ +// frame::verify_deopt_original_pc +// +// Verifies the calculated original PC of a deoptimization PC for the +// given unextended SP. +#ifdef ASSERT +void frame::verify_deopt_original_pc(CompiledMethod* nm, intptr_t* unextended_sp) { + frame fr; + + // This is ugly but it's better than to change {get,set}_original_pc + // to take an SP value as argument. And it's only a debugging + // method anyway. + fr._unextended_sp = unextended_sp; + + address original_pc = nm->get_original_pc(&fr); + assert(nm->insts_contains_inclusive(original_pc), + "original PC must be in the main code section of the the compiled method (or must be immediately following it)"); +} +#endif + +//------------------------------------------------------------------------------ +// frame::adjust_unextended_sp +#ifdef ASSERT +void frame::adjust_unextended_sp() { + // On sw64, sites calling method handle intrinsics and lambda forms are treated + // as any other call site. Therefore, no special action is needed when we are + // returning to any of these call sites. + + if (_cb != NULL) { + CompiledMethod* sender_cm = _cb->as_compiled_method_or_null(); + if (sender_cm != NULL) { + // If the sender PC is a deoptimization point, get the original PC. + if (sender_cm->is_deopt_entry(_pc) || + sender_cm->is_deopt_mh_entry(_pc)) { + verify_deopt_original_pc(sender_cm, _unextended_sp); + } + } + } +} +#endif + +//------------------------------------------------------------------------------ +// frame::update_map_with_saved_link +void frame::update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr) { + // The interpreter and compiler(s) always save EBP/RBP in a known + // location on entry. We must record where that location is + // so this if EBP/RBP was live on callout from c2 we can find + // the saved copy no matter what it called. + + // Since the interpreter always saves EBP/RBP if we record where it is then + // we don't have to always save EBP/RBP on entry and exit to c2 compiled + // code, on entry will be enough. + map->set_location(rfp->as_VMReg(), (address) link_addr); + + // this is weird "H" ought to be at a higher address however the + // oopMaps seems to have the "H" regs at the same address and the + // vanilla register. + // XXXX make this go away + if (true) { + map->set_location(rfp->as_VMReg()->next(), (address) link_addr); + } + +} + + +//------------------------------------------------------------------------------ +// frame::sender_for_interpreter_frame +frame frame::sender_for_interpreter_frame(RegisterMap* map) const { + // SP is the raw SP from the sender after adapter or interpreter + // extension. + intptr_t* sender_sp = this->sender_sp(); + + // This is the sp before any possible extension (adapter/locals). + intptr_t* unextended_sp = interpreter_frame_sender_sp(); + +#if COMPILER2_OR_JVMCI + if (map->update_map()) { + update_map_with_saved_link(map, (intptr_t**) addr_at(link_offset)); + } +#endif // COMPILER2_OR_JVMCI + + return frame(sender_sp, unextended_sp, link(), sender_pc()); +} + + +//------------------------------------------------------------------------------ +// frame::sender_for_compiled_frame +frame frame::sender_for_compiled_frame(RegisterMap* map) const { + assert(map != NULL, "map must be set"); + + // frame owned by optimizing compiler + assert(_cb->frame_size() >= 0, "must have non-zero frame size"); + intptr_t* sender_sp = unextended_sp() + _cb->frame_size(); + intptr_t* unextended_sp = sender_sp; + + // On Intel the return_address is always the word on the stack + address sender_pc = (address) *(sender_sp-1); + + // This is the saved value of EBP which may or may not really be an FP. + // It is only an FP if the sender is an interpreter frame (or C1?). + intptr_t** saved_fp_addr = (intptr_t**) (sender_sp - frame::sender_sp_offset); + + if (map->update_map()) { + // Tell GC to use argument oopmaps for some runtime stubs that need it. + // For C1, the runtime stub might not have oop maps, so set this flag + // outside of update_register_map. + map->set_include_argument_oops(_cb->caller_must_gc_arguments(map->thread())); + if (_cb->oop_maps() != NULL) { + OopMapSet::update_register_map(this, map); + } + + // Since the prolog does the save and restore of EBP there is no oopmap + // for it so we must fill in its location as if there was an oopmap entry + // since if our caller was compiled code there could be live jvm state in it. + update_map_with_saved_link(map, saved_fp_addr); + } + + assert(sender_sp != sp(), "must have changed"); + return frame(sender_sp, unextended_sp, *saved_fp_addr, sender_pc); +} + + +//------------------------------------------------------------------------------ +// frame::sender +frame frame::sender_raw(RegisterMap* map) const { + // Default is we done have to follow them. The sender_for_xxx will + // update it accordingly + map->set_include_argument_oops(false); + + if (is_entry_frame()) return sender_for_entry_frame(map); + if (is_optimized_entry_frame()) return sender_for_optimized_entry_frame(map); + if (is_interpreted_frame()) return sender_for_interpreter_frame(map); + assert(_cb == CodeCache::find_blob(pc()),"Must be the same"); + + if (_cb != NULL) { + return sender_for_compiled_frame(map); + } + // Must be native-compiled frame, i.e. the marshaling code for native + // methods that exists in the core system. + return frame(sender_sp(), link(), sender_pc()); +} + +frame frame::sender(RegisterMap* map) const { + frame result = sender_raw(map); + + if (map->process_frames()) { + StackWatermarkSet::on_iteration(map->thread(), result); + } + + return result; +} + +bool frame::is_interpreted_frame_valid(JavaThread* thread) const { + assert(is_interpreted_frame(), "Not an interpreted frame"); + // These are reasonable sanity checks + if (fp() == 0 || (intptr_t(fp()) & (wordSize-1)) != 0) { + return false; + } + if (sp() == 0 || (intptr_t(sp()) & (wordSize-1)) != 0) { + return false; + } + if (fp() + interpreter_frame_initial_sp_offset < sp()) { + return false; + } + // These are hacks to keep us out of trouble. + // The problem with these is that they mask other problems + if (fp() <= sp()) { // this attempts to deal with unsigned comparison above + return false; + } + + // do some validation of frame elements + // first the method + + Method* m = safe_interpreter_frame_method(); + + // validate the method we'd find in this potential sender + if (!Method::is_valid_method(m)) return false; + + // stack frames shouldn't be much larger than max_stack elements + // this test requires the use the unextended_sp which is the sp as seen by + // the current frame, and not sp which is the "raw" pc which could point + // further because of local variables of the callee method inserted after + // method arguments + if (fp() - unextended_sp() > 1024 + m->max_stack()*Interpreter::stackElementSize) { + return false; + } + + // validate bci/bcp + + address bcp = interpreter_frame_bcp(); + if (m->validate_bci_from_bcp(bcp) < 0) { + return false; + } + + // validate ConstantPoolCache* + ConstantPoolCache* cp = *interpreter_frame_cache_addr(); + if (MetaspaceObj::is_valid(cp) == false) return false; + + // validate locals + + address locals = (address) *interpreter_frame_locals_addr(); + + return thread->is_in_stack_range_incl(locals, (address)fp()); +} + +BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result) { + assert(is_interpreted_frame(), "interpreted frame expected"); + Method* method = interpreter_frame_method(); + BasicType type = method->result_type(); + + intptr_t* tos_addr; + if (method->is_native()) { + // Prior to calling into the runtime to report the method_exit the possible + // return value is pushed to the native stack. If the result is a jfloat/jdouble + // then ST0 is saved before EAX/EDX. See the note in generate_native_result + tos_addr = (intptr_t*)sp(); + if (type == T_FLOAT || type == T_DOUBLE) { + // QQQ seems like this code is equivalent on the two platforms + // This is times two because we do a push(ltos) after pushing XMM0 + // and that takes two interpreter stack slots. + tos_addr += 2 * Interpreter::stackElementWords; + } + } else { + tos_addr = (intptr_t*)interpreter_frame_tos_address(); + } + + switch (type) { + case T_OBJECT : + case T_ARRAY : { + oop obj; + if (method->is_native()) { + obj = cast_to_oop(at(interpreter_frame_oop_temp_offset)); + } else { + oop* obj_p = (oop*)tos_addr; + obj = (obj_p == NULL) ? (oop)NULL : *obj_p; + } + assert(Universe::is_in_heap_or_null(obj), "sanity check"); + *oop_result = obj; + break; + } + case T_BOOLEAN : value_result->z = *(jboolean*)tos_addr; break; + case T_BYTE : value_result->b = *(jbyte*)tos_addr; break; + case T_CHAR : value_result->c = *(jchar*)tos_addr; break; + case T_SHORT : value_result->s = *(jshort*)tos_addr; break; + case T_INT : value_result->i = *(jint*)tos_addr; break; + case T_LONG : value_result->j = *(jlong*)tos_addr; break; + case T_FLOAT : value_result->f = *(jfloat*)tos_addr; break; +// yj not sure +// case T_FLOAT : { +// if (method->is_native()) { +// jdouble d = *(jdouble*)tos_addr; // Result was in ST0 so need to convert to jfloat +// value_result->f = (jfloat)d; +// } else { +// value_result->f = *(jfloat*)tos_addr; +// } +// break; +// } + case T_DOUBLE : value_result->d = *(jdouble*)tos_addr; break; + case T_VOID : /* Nothing to do */ break; + default : ShouldNotReachHere(); + } + + return type; +} + + +intptr_t* frame::interpreter_frame_tos_at(jint offset) const { + int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize); + return &interpreter_frame_tos_address()[index]; +} + +#ifndef PRODUCT + +#define DESCRIBE_FP_OFFSET(name) \ + values.describe(frame_no, fp() + frame::name##_offset, #name) + +void frame::describe_pd(FrameValues& values, int frame_no) { + if (is_interpreted_frame()) { + DESCRIBE_FP_OFFSET(interpreter_frame_sender_sp); + DESCRIBE_FP_OFFSET(interpreter_frame_last_sp); + DESCRIBE_FP_OFFSET(interpreter_frame_method); + DESCRIBE_FP_OFFSET(interpreter_frame_mirror); + DESCRIBE_FP_OFFSET(interpreter_frame_mdp); + DESCRIBE_FP_OFFSET(interpreter_frame_cache); + DESCRIBE_FP_OFFSET(interpreter_frame_locals); + DESCRIBE_FP_OFFSET(interpreter_frame_bcp); + DESCRIBE_FP_OFFSET(interpreter_frame_initial_sp); + } else if (is_entry_frame()) { + // This could be more descriptive if we use the enum in + // stubGenerator to map to real names but it's most important to + // claim these frame slots so the error checking works. + for (int i = 0; i < entry_frame_after_call_words; i++) { + values.describe(frame_no, fp() - i, err_msg("call_stub word fp - %d", i)); + } + } +} +#endif // !PRODUCT + +intptr_t *frame::initial_deoptimization_info() { + // used to reset the saved FP + return fp(); +} + +intptr_t* frame::real_fp() const { + if (_cb != NULL) { + // use the frame size if valid + int size = _cb->frame_size(); + if (size > 0) { + return unextended_sp() + size; + } + } + // else rely on fp() + assert(! is_compiled_frame(), "unknown compiled frame size"); + return fp(); +} + +void frame::init(intptr_t* sp, intptr_t* fp, address pc) { + assert(pc != NULL, "no pc?"); + _cb = CodeCache::find_blob(pc); + if (sp == NULL && _cb != NULL) { + sp = fp - _cb->frame_size() + 2; + } + _sp = sp; + _unextended_sp = sp; + _fp = fp; + _pc = pc; + + adjust_unextended_sp(); + + address original_pc = CompiledMethod::get_deopt_original_pc(this); + if (original_pc != NULL) { + _pc = original_pc; + _deopt_state = is_deoptimized; + } else { + _deopt_state = not_deoptimized; + } + +// _is_c_frame = false; +// _sender_fp_for_c_frame = NULL; +// _sender_address_for_c_frame = NULL; +// init_sender_for_c_frame(); +} + +void frame::init_sender_for_c_frame() { + if (is_java_frame() || + is_native_frame() || + is_runtime_frame() || + is_stub_frame()) { + _is_c_frame = false; + return; + } + + _is_c_frame = true; + + if (_fp == NULL) return; + bool stop_flag = false; + address pinsn = _pc ; + while ((_sender_fp_for_c_frame == NULL || _sender_address_for_c_frame == NULL) && (*((int *) pinsn)) && !stop_flag) { + int insn = *((int *) pinsn); + if (_sender_fp_for_c_frame == NULL && (insn & 0xffff0000) == 0xadfe0000) { // stl fp,yy(sp) + int yy = (insn & 0x0000ffff) / 8; + _sender_fp_for_c_frame = (intptr_t *) (*(_fp + yy)); + } else if ( _sender_address_for_c_frame == NULL && (insn & 0xffff0000) == 0xaf5e0000) { // stl ra,xx(sp) + int xx = (insn & 0x0000ffff) / 8; + _sender_address_for_c_frame = (address) (*(_fp + xx)); + } else if ((insn & 0xffff0000) == 0xffbb0000){ // ldih gp,zz(t12) + stop_flag = true; + } + pinsn -= 4; + // scan function to _pc + } +} + +void frame::init_sender_for_c_frame(address f_start_pc) { + do{ + int insn = *((int *) f_start_pc); + if ( _sender_address_for_c_frame == NULL && (insn & 0xffff0000) == 0xaf5e0000) { // stl ra,xx(sp) + int xx = (insn & 0x0000ffff) / 8; + _sender_address_for_c_frame = (address) (*(_sp + xx)); + } else if (_sender_fp_for_c_frame == NULL && (insn & 0xffff0000) == 0xadfe0000) { // stl fp,yy(sp) + int yy = (insn & 0x0000ffff) / 8; + _sender_fp_for_c_frame = (intptr_t *) (*(_sp + yy)); + } + f_start_pc += 4; + // scan function to _pc + } while ((_sender_fp_for_c_frame == NULL || _sender_address_for_c_frame == NULL) && (*((int *) f_start_pc))); +} + +// when thread stop before stl ra at stack +void frame::fixRa(const void* ucVoid) { + if (!_is_c_frame) return; + if (_sender_address_for_c_frame != NULL) { + return; + } else { + const ucontext_t *uc = (const ucontext_t *) ucVoid; + if (uc != NULL) { + _sender_address_for_c_frame = os::ucontext_get_ra(uc); + } else { + _sender_address_for_c_frame = NULL; + } + } +} + + +intptr_t* frame::sender_sp() const { +// if (_is_c_frame) { +// return _sender_fp_for_c_frame;// for sw C frame, sp is always the same as fp +// } else { + return addr_at(sender_sp_offset); +// } +} + +intptr_t* frame::link() const { +// if (_is_c_frame) +// return _sender_fp_for_c_frame; +// else + return (intptr_t*) *(intptr_t **)addr_at(link_offset); +} + +address frame::sender_pc() const { +// if (_is_c_frame) +// return _sender_address_for_c_frame; +// else { + return *sender_pc_addr(); +// } +} + +#ifndef PRODUCT +// This is a generic constructor which is only used by pns() in debug.cpp. +frame::frame(void* sp, void* fp, void* pc) { + init((intptr_t*)sp, (intptr_t*)fp, (address)pc); +} + +void frame::pd_ps() {} +#endif + +void JavaFrameAnchor::make_walkable() { + // last frame set? + if (last_Java_sp() == NULL) return; + // already walkable? + if (walkable()) return;//assert(false, "to check here: yj"); + vmassert(last_Java_sp() != NULL, "not called from Java code?"); + vmassert(last_Java_pc() == NULL, "already walkable"); + _last_Java_pc = (address)_last_Java_sp[-1]; + vmassert(walkable(), "something went wrong"); +} diff --git a/src/hotspot/cpu/sw64/frame_sw64.hpp b/src/hotspot/cpu/sw64/frame_sw64.hpp new file mode 100644 index 00000000000..0a8f671f248 --- /dev/null +++ b/src/hotspot/cpu/sw64/frame_sw64.hpp @@ -0,0 +1,161 @@ +/* + * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SW64_VM_FRAME_SW64_HPP +#define CPU_SW64_VM_FRAME_SW64_HPP + +#include "runtime/synchronizer.hpp" + +// A frame represents a physical stack frame (an activation). Frames can be +// C or Java frames, and the Java frames can be interpreted or compiled. +// In contrast, vframes represent source-level activations, so that one physical frame +// can correspond to multiple source level frames because of inlining. +// A frame is comprised of {pc, fp, sp} +// ------------------------------ Asm interpreter ---------------------------------------- +// Layout of asm interpreter frame: +// [expression stack ] * <- sp +// [monitors ] \ +// ... | monitor block size +// [monitors ] / +// [monitor block size ] +// [byte code pointer ] = bcp() bcp_offset +// [pointer to locals ] = locals() locals_offset +// [constant pool cache ] = cache() cache_offset +// [methodData ] = mdp() mdx_offset +// [Method* ] = method() method_offset +// [last sp ] = last_sp() last_sp_offset +// [old stack pointer ] (sender_sp) sender_sp_offset +// [old frame pointer ] <- fp = link() +// [return pc ] +// [oop temp ] (only for native calls) +// [locals and parameters ] +// <- sender sp +// ------------------------------ Asm interpreter ---------------------------------------- + + public: + enum { + pc_return_offset = 0, + // All frames + link_offset = 0, + return_addr_offset = 1, + // non-interpreter frames + sender_sp_offset = 2, + + // Interpreter frames + interpreter_frame_result_handler_offset = 3, // for native calls only + interpreter_frame_oop_temp_offset = 2, // for native calls only + + interpreter_frame_sender_sp_offset = -1, + // outgoing sp before a call to an invoked method + interpreter_frame_last_sp_offset = interpreter_frame_sender_sp_offset - 1, + interpreter_frame_method_offset = interpreter_frame_last_sp_offset - 1, + interpreter_frame_mirror_offset = interpreter_frame_method_offset - 1, + interpreter_frame_mdp_offset = interpreter_frame_mirror_offset - 1, + interpreter_frame_cache_offset = interpreter_frame_mdp_offset - 1, + interpreter_frame_locals_offset = interpreter_frame_cache_offset - 1, + interpreter_frame_bcp_offset = interpreter_frame_locals_offset - 1, + interpreter_frame_initial_sp_offset = interpreter_frame_bcp_offset - 1, + + interpreter_frame_monitor_block_top_offset = interpreter_frame_initial_sp_offset, + interpreter_frame_monitor_block_bottom_offset = interpreter_frame_initial_sp_offset, + + // Entry frames + // n.b. these values are determined by the layout defined in + // stubGenerator for the Java call stub + entry_frame_after_call_words = 21, + entry_frame_call_wrapper_offset = -6, //generate_call_stub's call_wrapper_off + + arg_reg_save_area_bytes = 0 + }; + + intptr_t ptr_at(int offset) const { + return *ptr_at_addr(offset); + } + + void ptr_at_put(int offset, intptr_t value) { + *ptr_at_addr(offset) = value; + } + + private: + // an additional field beyond _sp and _pc: + intptr_t* _fp; // frame pointer + + bool _is_c_frame; + intptr_t* _sender_fp_for_c_frame; + address _sender_address_for_c_frame; + void init_sender_for_c_frame(); + + // The interpreter and adapters will extend the frame of the caller. + // Since oopMaps are based on the sp of the caller before extension + // we need to know that value. However in order to compute the address + // of the return address we need the real "raw" sp. Since sparc already + // uses sp() to mean "raw" sp and unextended_sp() to mean the caller's + // original sp we use that convention. + + intptr_t* _unextended_sp; + void adjust_unextended_sp() NOT_DEBUG_RETURN; + + intptr_t* ptr_at_addr(int offset) const { + return (intptr_t*) addr_at(offset); + } + +#ifdef ASSERT + // Used in frame::sender_for_{interpreter,compiled}_frame + static void verify_deopt_original_pc(CompiledMethod* nm, intptr_t* unextended_sp); +#endif + + public: + // Constructors + + frame(intptr_t* sp, intptr_t* fp, address pc); + + frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc); + + frame(intptr_t* sp, intptr_t* fp); + + void init(intptr_t* sp, intptr_t* fp, address pc); + + // accessors for the instance variables + // Note: not necessarily the real 'frame pointer' (see real_fp) + intptr_t* fp() const { return _fp; } + + inline address* sender_pc_addr() const; + + // expression stack tos if we are nested in a java call + intptr_t* interpreter_frame_last_sp() const; + void fixRa(const void* ucVoid); + void init_sender_for_c_frame(address f_start_pc); + + // helper to update a map with callee-saved RBP + static void update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr); + + // deoptimization support + void interpreter_frame_set_last_sp(intptr_t* sp); + + static jint interpreter_frame_expression_stack_direction() { return -1; } + + // returns the sending frame, without applying any barriers + frame sender_raw(RegisterMap* map) const; + +#endif // CPU_SW64_VM_FRAME_SW64_HPP \ No newline at end of file diff --git a/src/hotspot/cpu/sw64/frame_sw64.inline.hpp b/src/hotspot/cpu/sw64/frame_sw64.inline.hpp new file mode 100644 index 00000000000..c507d31e409 --- /dev/null +++ b/src/hotspot/cpu/sw64/frame_sw64.inline.hpp @@ -0,0 +1,224 @@ +/* + * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SW64_VM_FRAME_SW64_INLINE_HPP +#define CPU_SW64_VM_FRAME_SW64_INLINE_HPP + +#include "code/codeCache.hpp" +#include "code/vmreg.inline.hpp" +#include "runtime/registerMap.hpp" + +// Inline functions for Sw64 frames: + +// Constructors: + +inline frame::frame() { + _pc = NULL; + _sp = NULL; + _unextended_sp = NULL; + _fp = NULL; + _cb = NULL; + _deopt_state = unknown; +} + +inline frame::frame(intptr_t* sp, intptr_t* fp, address pc) { + init(sp, fp, pc); +} + +inline frame::frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc) { + _sp = sp; + _unextended_sp = unextended_sp; + _fp = fp; + _pc = pc; + assert(pc != NULL, "no pc?"); + _cb = CodeCache::find_blob(pc); + adjust_unextended_sp(); + + address original_pc = CompiledMethod::get_deopt_original_pc(this); + if (original_pc != NULL) { + _pc = original_pc; + _deopt_state = is_deoptimized; + } else { + if (_cb->is_deoptimization_stub()) { + _deopt_state = is_deoptimized; + } else { + _deopt_state = not_deoptimized; + } + } +// _is_c_frame = false; +} + +inline frame::frame(intptr_t* sp, intptr_t* fp) { + _sp = sp; + _unextended_sp = sp; + _fp = fp; + _pc = (address)(sp[-1]); + Unimplemented(); //ZHJ + + // Here's a sticky one. This constructor can be called via AsyncGetCallTrace + // when last_Java_sp is non-null but the pc fetched is junk. If we are truly + // unlucky the junk value could be to a zombied method and we'll die on the + // find_blob call. This is also why we can have no asserts on the validity + // of the pc we find here. AsyncGetCallTrace -> pd_get_top_frame_for_signal_handler + // -> pd_last_frame should use a specialized version of pd_last_frame which could + // call a specilaized frame constructor instead of this one. + // Then we could use the assert below. However this assert is of somewhat dubious + // value. + // assert(_pc != NULL, "no pc?"); + + _cb = CodeCache::find_blob(_pc); + adjust_unextended_sp(); + + address original_pc = CompiledMethod::get_deopt_original_pc(this); + if (original_pc != NULL) { + _pc = original_pc; + _deopt_state = is_deoptimized; + } else { + _deopt_state = not_deoptimized; + } +} + +// Accessors + +inline bool frame::equal(frame other) const { + bool ret = sp() == other.sp() + && unextended_sp() == other.unextended_sp() + && fp() == other.fp() + && pc() == other.pc(); + assert(!ret || ret && cb() == other.cb() && _deopt_state == other._deopt_state, "inconsistent construction"); + return ret; +} + +// Return unique id for this frame. The id must have a value where we can distinguish +// identity and younger/older relationship. NULL represents an invalid (incomparable) +// frame. +inline intptr_t* frame::id(void) const { return unextended_sp(); } + +// Return true if the frame is older (less recent activation) than the frame represented by id +inline bool frame::is_older(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id"); + return this->id() > id ; } + +inline intptr_t* frame::link_or_null() const { + intptr_t** ptr = (intptr_t **)addr_at(link_offset); + return os::is_readable_pointer(ptr) ? *ptr : NULL; +} + +//inline intptr_t* frame::link() const { return (intptr_t*) *(intptr_t **)addr_at(link_offset); } + + +inline intptr_t* frame::unextended_sp() const { return _unextended_sp; } + +// Return address: + +inline address* frame::sender_pc_addr() const { return (address*) addr_at( return_addr_offset); } +//inline address frame::sender_pc() const { return *sender_pc_addr(); } +// +//inline intptr_t* frame::sender_sp() const { return addr_at(sender_sp_offset); } + +inline intptr_t** frame::interpreter_frame_locals_addr() const { + return (intptr_t**)addr_at(interpreter_frame_locals_offset); +} + +inline intptr_t* frame::interpreter_frame_last_sp() const { + return *(intptr_t**)addr_at(interpreter_frame_last_sp_offset); +} + +inline intptr_t* frame::interpreter_frame_bcp_addr() const { + return (intptr_t*)addr_at(interpreter_frame_bcp_offset); +} + +inline intptr_t* frame::interpreter_frame_mdp_addr() const { + return (intptr_t*)addr_at(interpreter_frame_mdp_offset); +} + + +// Constant pool cache + +inline ConstantPoolCache** frame::interpreter_frame_cache_addr() const { + return (ConstantPoolCache**)addr_at(interpreter_frame_cache_offset); +} + +// Method + +inline Method** frame::interpreter_frame_method_addr() const { + return (Method**)addr_at(interpreter_frame_method_offset); +} + +// Mirror + +inline oop* frame::interpreter_frame_mirror_addr() const { + return (oop*)addr_at(interpreter_frame_mirror_offset); +} + +// top of expression stack +inline intptr_t* frame::interpreter_frame_tos_address() const { + intptr_t* last_sp = interpreter_frame_last_sp(); + if (last_sp == NULL) { + return sp(); + } else { + // sp() may have been extended or shrunk by an adapter. At least + // check that we don't fall behind the legal region. + // For top deoptimized frame last_sp == interpreter_frame_monitor_end. + assert(last_sp <= (intptr_t*) interpreter_frame_monitor_end(), "bad tos"); + return last_sp; + } +} + +inline oop* frame::interpreter_frame_temp_oop_addr() const { + return (oop *)(fp() + interpreter_frame_oop_temp_offset); +} + +inline int frame::interpreter_frame_monitor_size() { + return BasicObjectLock::size(); +} + + +// expression stack +// (the max_stack arguments are used by the GC; see class FrameClosure) + +inline intptr_t* frame::interpreter_frame_expression_stack() const { + intptr_t* monitor_end = (intptr_t*) interpreter_frame_monitor_end(); + return monitor_end-1; +} + + +// Entry frames + +inline JavaCallWrapper** frame::entry_frame_call_wrapper_addr() const { + return (JavaCallWrapper**)addr_at(entry_frame_call_wrapper_offset); +} + + +// Compiled frames + +inline oop frame::saved_oop_result(RegisterMap* map) const { + return *((oop*) map->location(V0->as_VMReg())); +} + +inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) { + *((oop*) map->location(V0->as_VMReg())) = obj; +} + +#endif // CPU_SW64_VM_FRAME_SW64_INLINE_HPP diff --git a/src/hotspot/cpu/sw64/gc/g1/g1BarrierSetAssembler_sw64.cpp b/src/hotspot/cpu/sw64/gc/g1/g1BarrierSetAssembler_sw64.cpp new file mode 100644 index 00000000000..56b3e486bd6 --- /dev/null +++ b/src/hotspot/cpu/sw64/gc/g1/g1BarrierSetAssembler_sw64.cpp @@ -0,0 +1,589 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "gc/g1/g1BarrierSet.hpp" +#include "gc/g1/g1BarrierSetAssembler.hpp" +#include "gc/g1/g1BarrierSetRuntime.hpp" +#include "gc/g1/g1CardTable.hpp" +#include "gc/g1/g1ThreadLocalData.hpp" +#include "gc/g1/heapRegion.hpp" +#include "gc/shared/collectedHeap.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/thread.hpp" +#include "interpreter/interp_masm.hpp" +#include "utilities/debug.hpp" +#include "runtime/sharedRuntime.hpp" +#ifdef COMPILER1 +#include "c1/c1_LIRAssembler.hpp" +#include "c1/c1_MacroAssembler.hpp" +#include "gc/g1/c1/g1BarrierSetC1.hpp" +#endif + +#define __ masm-> + +void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, + Register addr, Register count) {SCOPEMARK_NAME(G1BarrierSetAssembler::gen_write_ref_array_pre_barrier, masm) + bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0; +// __ stop("TODO:should check gen_write_ref_array_pre_barrier jzy"); +// ShouldNotReachHere(); + if (!dest_uninitialized) { + Register thread = rthread; + + Label filtered; + Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); + // Is marking active? + if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { + __ cmpw(in_progress, 0); + } else { + assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); + __ cmpb(in_progress, 0); + } + + __ jcc(Assembler::equal, filtered); + + __ pushad(); // push registers + + if (count == c_rarg0) { + if (addr == c_rarg1) { + // exactly backwards!! + __ xchgptr(c_rarg1, c_rarg0); + } else { + __ movl(c_rarg1, count); + __ movl(c_rarg0, addr); + } + } else { + __ movl(c_rarg0, addr); + __ movl(c_rarg1, count); + } + if (UseCompressedOops) { + __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_narrow_oop_entry), 2); + } else { + __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_oop_entry), 2); + } + + __ popad(); + + __ bind(filtered); + } +} + +void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, + Register addr, Register count, Register tmp) { + __ pushad(); // push registers (overkill) +// __ stop("should check:gen_write_ref_array_post_barrier jzy"); + if (c_rarg0 == count) { // On win64 c_rarg0 == rcx ?jzy + assert_different_registers(c_rarg1, addr); + __ movl(c_rarg1, count); + __ movl(c_rarg0, addr); + } else { + assert_different_registers(c_rarg0, count); + __ movl(c_rarg0, addr); + __ movl(c_rarg1, count); + } + __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_post_entry), 2); + + __ popad(); +} + +void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register dst, Address src, Register tmp1, Register tmp_thread) { + bool on_oop = is_reference_type(type); + // __ stop("TODO:check load_at jzy"); + bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0; + bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0; + bool on_reference = on_weak || on_phantom; + ModRefBarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); + if (on_oop && on_reference) { + const Register thread = rthread; + __ enter(); + //__ sys_call(0xabc); + //__ br(R0, -1); + + // Generate the G1 pre-barrier code to log the value of + // the referent field in an SATB buffer. + g1_write_barrier_pre(masm /* masm */, + noreg /* obj */, + dst /* pre_val */, + thread /* thread */, + tmp1 /* tmp */, + true /* tosca_live */, + true /* expand_call */); + __ leave(); + } +} + +void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, + Register obj, + Register pre_val, + Register thread, + Register tmp, + bool tosca_live, + bool expand_call) { + // If expand_call is true then we expand the call_VM_leaf macro + // directly to skip generating the check by + // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. + + assert(thread == rthread, "must be"); + const Register rax = V0; + Label done; + Label runtime; + assert(pre_val != noreg, "check this code"); + + if (obj != noreg) { + assert_different_registers(obj, pre_val, tmp); + assert(pre_val != rax, "check this code"); + } + + Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); + Address index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset())); + Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset())); + + // Is marking active? + if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { + __ cmpw(in_progress, 0); + } else { + assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); + __ ldbu(rscratch4, in_progress); + __ sextb(rscratch4, rcc); +// __ cmpb(in_progress, 0); + } + __ jcc(Assembler::equal, done); + + // Do we need to load the previous value? + if (obj != noreg) { + __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW); + } + + // Is the previous value null? + __ cmpptr(pre_val, (int32_t) NULL_WORD); + __ jcc(Assembler::equal, done); + + // Can we store original value in the thread's buffer? + // Is index == 0? + // (The index field is typed as size_t.) + + __ ldptr(tmp, index); // tmp := *index_adr + __ cmpptr(tmp, 0); // tmp == 0? + __ jcc(Assembler::equal, runtime); // If yes, goto runtime//sny beq(tmp, runtime); + + __ subptr(tmp, wordSize, tmp); // tmp := tmp - wordSize + __ stptr(tmp, index); // *index_adr := tmp + __ addptr(tmp, buffer); // tmp := tmp + *buffer_adr tmp=T5 + + // Record the previous value + __ stptr(pre_val, Address(tmp, 0)); + __ jmp(done); + + __ bind(runtime); + // save the live input values + if(tosca_live) __ push(rax); + + if (obj != noreg && obj != rax) + __ push(obj); + + if (pre_val != rax) + __ push(pre_val); + + // Calling the runtime using the regular call_VM_leaf mechanism generates + // code (generated by InterpreterMacroAssember::call_VM_leaf_base) + // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL. + // + // If we care generating the pre-barrier without a frame (e.g. in the + // intrinsified Reference.get() routine) then ebp might be pointing to + // the caller frame and so this check will most likely fail at runtime. + // + // Expanding the call directly bypasses the generation of the check. + // So when we do not have have a full interpreter frame on the stack + // expand_call should be passed true. + + + if (expand_call) { + assert(pre_val != c_rarg1, "smashed arg"); + + if (c_rarg1 != thread) { + __ movl(c_rarg1, thread); + } + if (c_rarg0 != pre_val) { + __ movl(c_rarg0, pre_val); + } + + __ MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), 2); + } else { + __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); + } + + // save the live input values + if (pre_val != rax) + __ pop(pre_val); + + if (obj != noreg && obj != rax) + __ pop(obj); + + if(tosca_live) __ pop(rax); + + __ bind(done); +} + +void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, + Register store_addr, + Register new_val, + Register thread, + Register tmp, + Register tmp2) { + // Generated code assumes that buffer index is pointer sized. + STATIC_ASSERT(in_bytes(SATBMarkQueue::byte_width_of_index()) == sizeof(intptr_t)); + + assert(thread == rthread, "must be"); + assert(tmp != AT, "must be"); + assert(tmp2 != AT, "must be"); + Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset())); + Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset())); + + CardTableBarrierSet* ct = + barrier_set_cast(BarrierSet::barrier_set()); + // assert(sizeof(*ct->card_table()->byte_map_base()) == sizeof(jbyte), "adjust this code"); + + Label done; + Label runtime; + + // Does store cross heap regions? + +// __ movl(rscratch4, store_addr); + __ xorptr(store_addr, new_val, rscratch4); + __ srll(rscratch4, HeapRegion::LogOfHRGrainBytes, rscratch4); + __ jcc(Assembler::equal, done, rscratch4); + + // crosses regions, storing NULL? + + __ cmpptr(new_val, (int32_t) NULL_WORD); + __ jcc(Assembler::equal, done); + + // storing region crossing non-NULL, is card already dirty? + + assert(sizeof(*ct->card_table()->byte_map_base()) == sizeof(jbyte), "adjust this code"); //dx? + + const Register card_addr = tmp; + const Register cardtable = tmp2; + + __ movl(card_addr, store_addr); + __ srll(card_addr, CardTable::card_shift, card_addr); + // Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT + // a valid address and therefore is not properly handled by the relocation code. + __ mov_immediate64(cardtable, (intptr_t)ct->card_table()->byte_map_base()); + __ addptr(card_addr, cardtable, card_addr); + + __ cmpb(Address(card_addr, 0), (int)G1CardTable::g1_young_card_val()); + __ jcc(Assembler::equal, done); + + assert((int)CardTable::dirty_card_val() == 0, "must be 0"); //dx? + + //__ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad)); + __ memb(); + __ cmpb(Address(card_addr, 0), (int)G1CardTable::dirty_card_val()); + __ jcc(Assembler::equal, done); + + + // storing a region crossing, non-NULL oop, card is clean. + // dirty card and log. + __ mov_immediate32(rscratch4, (int)G1CardTable::dirty_card_val()); + __ stb(rscratch4, Address(card_addr, 0));//movb + + __ ldws(rcc, queue_index); + __ beq_l(rcc, runtime); + __ jcc(Assembler::equal, runtime); + __ subl(rcc, wordSize, rcc); + __ stw (rcc, queue_index); //LSP!! + __ ldptr(tmp2, buffer); + __ ldl(rscratch4, queue_index);//?sny ldw + __ addl(tmp2, rscratch4, tmp2); + __ stl(card_addr, Address(tmp2, 0)); + + __ jmp(done); + + __ bind(runtime); + // save the live input values + __ push(store_addr); + + __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread); + + __ pop(store_addr); + + __ bind(done); +} + +void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address dst, Register val, Register tmp1, Register tmp2) { + bool in_heap = (decorators & IN_HEAP) != 0; + bool as_normal = (decorators & AS_NORMAL) != 0; + assert((decorators & IS_DEST_UNINITIALIZED) == 0, "unsupported"); + // __ stop("TODO:check oop_store_at jzy"); + bool needs_pre_barrier = as_normal; + bool needs_post_barrier = val != noreg && in_heap; + + Register tmp3 = r8; //need different? x86 uses r8 + Register thread = rthread; + assert_different_registers(tmp1, tmp2, tmp3, thread); //need this check? jzy + + // flatten object address if needed + // We do it regardless of precise because we need the registers + if (dst.index() == noreg && dst.disp() == 0) { + if (dst.base() != tmp1) { + __ movl(tmp1, dst.base()); // ! + } + } else { + __ lea(tmp1, dst); + } + + + if (needs_pre_barrier) { + g1_write_barrier_pre(masm /*masm*/, + tmp1 /* obj */, + tmp2 /* pre_val */, + thread /* thread */, + tmp3 /* tmp */, + val != noreg /* tosca_live */, + false /* expand_call */); + } + if (val == noreg) { + BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg); + } else { + Register new_val = val; + if (needs_post_barrier) { + // G1 barrier needs uncompressed oop for region cross check. + if (UseCompressedOops) { + new_val = tmp2; + __ movl(new_val, val); + } + } + BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg); + if (needs_post_barrier) { + g1_write_barrier_post(masm /*masm*/, + tmp1 /* store_adr */, + new_val /* new_val */, + thread /* thread */, + tmp3 /* tmp */, + tmp2 /* tmp2 */); + } +} + +} + +#ifdef COMPILER1 + +#undef __ +#define __ ce->masm()-> + +void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub) { + G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); + // At this point we know that marking is in progress. + // If do_load() is true then we have to emit the + // load of the previous value; otherwise it has already + // been loaded into _pre_val. + // __ stop("TODO:check gen_pre_barrier_stub jzy"); + __ bind(*stub->entry()); + assert(stub->pre_val()->is_register(), "Precondition."); + + Register pre_val_reg = stub->pre_val()->as_register(); + + if (stub->do_load()) { + ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/, false /*unaligned*/); + } + + __ cmpptr(pre_val_reg, (int32_t)NULL_WORD); + __ jcc(Assembler::equal, *stub->continuation()); + ce->store_parameter(stub->pre_val()->as_register(), 0); + __ call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin())); + __ jmp(*stub->continuation()); + +} + +void G1BarrierSetAssembler::gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub) { + G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); + // __ stop("TODO:check gen_post_barrier_stub jzy"); + __ bind(*stub->entry()); + assert(stub->addr()->is_register(), "Precondition."); + assert(stub->new_val()->is_register(), "Precondition."); + Register new_val_reg = stub->new_val()->as_register(); + __ cmpptr(new_val_reg, (int32_t) NULL_WORD); + __ jcc(Assembler::equal, *stub->continuation()); + ce->store_parameter(stub->addr()->as_pointer_register(), 0); + __ call(RuntimeAddress(bs->post_barrier_c1_runtime_code_blob()->code_begin())); + __ jmp(*stub->continuation()); +} + +#undef __ + +#define __ sasm-> + +void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) { + // Generated code assumes that buffer index is pointer sized. + STATIC_ASSERT(in_bytes(SATBMarkQueue::byte_width_of_index()) == sizeof(intptr_t)); + + __ prologue("g1_pre_barrier", false); + // arg0 : previous value of memory + const Register rax = V0; + const Register rdx = T0; + const Register rcx = T1; + // __ stop("TODO:check generate_c1_pre_barrier_runtime_stub jzy"); + __ push(rax); + __ push(rdx); + + const Register pre_val = rax; + const Register thread = rthread; + const Register tmp = rdx; + + + Address queue_active(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); + Address queue_index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset())); + Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset())); + + Label done; + Label runtime; + + // Is marking still active? + if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { + __ cmpw(queue_active, 0); + } else { + assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); + __ cmpb(queue_active, 0); + } + __ jcc(Assembler::equal, done); + + // Can we store original value in the thread's buffer? + + __ ldptr(tmp, queue_index); + __ jcc(Assembler::zero, runtime, tmp); + __ subptr(tmp, wordSize, tmp); + __ stl(tmp, queue_index); + __ addptr(tmp, buffer, tmp); + + // prev_val (rax) + __ load_parameter(0, pre_val); + __ stl(pre_val, Address(tmp, 0)); + __ jmp(done); + + __ bind(runtime); + + __ save_live_registers_no_oop_map(true); + + // load the pre-value + __ load_parameter(0, rcx); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), rcx, thread); + + __ restore_live_registers(true); + + __ bind(done); + + __ pop(rdx); + __ pop(rax); + + __ epilogue(); +} + +void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* sasm) { + __ prologue("g1_post_barrier", false); + + // arg0: store_address + Address store_addr(rbp, 2*BytesPerWord); + // __ stop("TODO:check generate_c1_post_barrier_runtime_stub jzy"); + CardTableBarrierSet* ct = + barrier_set_cast(BarrierSet::barrier_set()); + // assert(sizeof(*ct->card_table()->byte_map_base()) == sizeof(jbyte), "adjust this code"); + + Label done; + Label enqueued; + Label runtime; + + // At this point we know new_value is non-NULL and the new_value crosses regions. + // Must check to see if card is already dirty + const Register rax = V0; + const Register rdx = T0; + const Register rcx = T1; + + const Register thread = NOT_LP64(rax) LP64_ONLY(rthread); + + Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset())); + Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset())); + + __ push(rax); + __ push(rcx); + + const Register cardtable = rax; + const Register card_addr = rcx; + + __ load_parameter(0, card_addr); + __ srll(card_addr, CardTable::card_shift, card_addr); + // Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT + // a valid address and therefore is not properly handled by the relocation code. + __ mov_immediate64(cardtable, (intptr_t)ct->card_table()->byte_map_base()); + __ addptr(card_addr, cardtable, card_addr); + + + __ cmpb(Address(card_addr, 0), (int)G1CardTable::g1_young_card_val()); + __ jcc(Assembler::equal, done); + + //__ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad)); + __ memb(); + __ cmpb(Address(card_addr, 0), (int)CardTable::dirty_card_val()); + __ jcc(Assembler::equal, done); + + // storing region crossing non-NULL, card is clean. + // dirty card and log. + const Register tmp = rdx; + __ push(rdx); + + __ mov_immediate32(tmp, (int)CardTable::dirty_card_val()); + __ stb(tmp, Address(card_addr, 0)); + + __ ldptr(tmp, queue_index); + __ jcc(Assembler::zero, runtime, tmp); + __ subptr(tmp, wordSize, tmp); + __ stl(tmp, queue_index); + __ addptr(tmp, buffer, tmp); + __ stl(card_addr, Address(tmp, 0)); + __ jmp(enqueued); + + __ bind(runtime); + + __ save_live_registers_no_oop_map(true); + + __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread); + + __ restore_live_registers(true); + + __ bind(enqueued); + __ pop(rdx); + + __ bind(done); + __ pop(rcx); + __ pop(rax); + + __ epilogue(); +} + +#undef __ + +#endif // COMPILER1 diff --git a/src/hotspot/cpu/sw64/gc/g1/g1BarrierSetAssembler_sw64.hpp b/src/hotspot/cpu/sw64/gc/g1/g1BarrierSetAssembler_sw64.hpp new file mode 100644 index 00000000000..4320e5caaa5 --- /dev/null +++ b/src/hotspot/cpu/sw64/gc/g1/g1BarrierSetAssembler_sw64.hpp @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SW64_GC_G1_G1BARRIERSETASSEMBLER_SW64_HPP +#define CPU_SW64_GC_G1_G1BARRIERSETASSEMBLER_SW64_HPP + +#include "asm/macroAssembler.hpp" +#include "gc/shared/modRefBarrierSetAssembler.hpp" +#include "utilities/macros.hpp" + +class LIR_Assembler; +class StubAssembler; +class G1PreBarrierStub; +class G1PostBarrierStub; + +class G1BarrierSetAssembler: public ModRefBarrierSetAssembler { +protected: + void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, + Register addr, Register count); + void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, + Register start, Register count, Register tmp); + + void g1_write_barrier_pre(MacroAssembler* masm, + Register obj, + Register pre_val, + Register thread, + Register tmp, + bool tosca_live, + bool expand_call); + + void g1_write_barrier_post(MacroAssembler* masm, + Register store_addr, + Register new_val, + Register thread, + Register tmp, + Register tmp2); + + virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address dst, Register val, Register tmp1, Register tmp2); + +public: +#ifdef COMPILER1 + void gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub); + void gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub); + + void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm); + void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm); +#endif + + void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register dst, Address src, Register tmp1, Register tmp_thread); +}; + +#endif // CPU_SW64_GC_G1_G1BARRIERSETASSEMBLER_SW64_HPP diff --git a/src/hotspot/cpu/sw64/gc/g1/g1Globals_sw64.hpp b/src/hotspot/cpu/sw64/gc/g1/g1Globals_sw64.hpp new file mode 100644 index 00000000000..fc30d603135 --- /dev/null +++ b/src/hotspot/cpu/sw64/gc/g1/g1Globals_sw64.hpp @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +#ifndef CPU_SW64_GC_G1_G1GLOBALS_SW64_HPP +#define CPU_SW64_GC_G1_G1GLOBALS_SW64_HPP + +const size_t G1MergeHeapRootsPrefetchCacheSize = 16; //need check dx + +#endif // CPU_SW64_GC_G1_G1GLOBALS_SW64_HPP diff --git a/src/hotspot/cpu/sw64/gc/shared/barrierSetAssembler_sw64.cpp b/src/hotspot/cpu/sw64/gc/shared/barrierSetAssembler_sw64.cpp new file mode 100644 index 00000000000..c054d5e30be --- /dev/null +++ b/src/hotspot/cpu/sw64/gc/shared/barrierSetAssembler_sw64.cpp @@ -0,0 +1,316 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "classfile/classLoaderData.hpp" +#include "gc/shared/barrierSet.hpp" +#include "gc/shared/barrierSetAssembler.hpp" +#include "gc/shared/barrierSetNMethod.hpp" +#include "gc/shared/collectedHeap.hpp" +#include "interpreter/interp_masm.hpp" +#include "memory/universe.hpp" +#include "runtime/jniHandles.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/thread.hpp" + +#define __ masm-> +#ifdef PRODUCT +#define BLOCK_COMMENT(str) /* nothing */ +#else +#define BLOCK_COMMENT(str) { char line[1024];sprintf(line,"%s:%s:%d",str,__FILE__, __LINE__); __ block_comment(line);} +#endif +#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") + +void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register dst, Address src, Register tmp1, Register tmp_thread) {SCOPEMARK_NAME(BarrierSetAssembler::load_at, masm) + bool in_heap = (decorators & IN_HEAP) != 0; + bool in_native = (decorators & IN_NATIVE) != 0; + bool is_not_null = (decorators & IS_NOT_NULL) != 0; + bool atomic = (decorators & MO_RELAXED) != 0; + + switch (type) { + case T_OBJECT: + case T_ARRAY: { + if (in_heap) { + if (UseCompressedOops) { + __ ldwu(dst, src); + if (is_not_null) { + __ decode_heap_oop_not_null(dst); + } else { + __ decode_heap_oop(dst); // + } + } else { + __ ldl(dst, src); + } + } else { + assert(in_native, "why else?"); + __ ldl(dst, src); + } + break; + } + case T_BOOLEAN: __ load_unsigned_byte(dst, src); break; + case T_BYTE: __ load_signed_byte64(dst, src); break; + case T_CHAR: __ load_unsigned_short(dst, src); break; + case T_SHORT: __ load_signed_short(dst, src); break; + case T_INT: __ ldws (dst, src); break; + case T_ADDRESS: __ ldl (dst, src); break; + case T_FLOAT: + assert(dst == noreg, "only to ftos"); + __ load_float(FSF, src); + break; + case T_DOUBLE: + assert(dst == noreg, "only to dtos"); + __ load_double(FSF, src); + break; + case T_LONG: + assert(dst == noreg, "only to ltos"); + __ ldl(FSR, src); + break; + default: Unimplemented(); + } +} + +void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address dst, Register val, Register tmp1, Register tmp2) { + bool in_heap = (decorators & IN_HEAP) != 0; + bool in_native = (decorators & IN_NATIVE) != 0; + bool is_not_null = (decorators & IS_NOT_NULL) != 0; + bool atomic = (decorators & MO_RELAXED) != 0; + + switch (type) { + case T_OBJECT: + case T_ARRAY: { + if (in_heap) { + if (val == noreg) { + assert(!is_not_null, "inconsistent access"); + + if (UseCompressedOops) { + __ stw(R0, dst); + } else { + __ stl(R0, dst); + } + + } else { + + if (UseCompressedOops) { + assert(!dst.uses(val), "not enough registers"); + if (is_not_null) { + __ encode_heap_oop_not_null(val); + } else { + __ encode_heap_oop(val); + } + __ stw(val, dst); + } else { + __ stl(val, dst); + } + } + } else { + assert(in_native, "why else?"); + assert(val != noreg, "not supported"); + __ stl(val, dst); + } + break; + } + case T_BOOLEAN: + __ andw(val, 0x1, val); // boolean is true if LSB is 1 + __ stb(val, dst); + break; + case T_BYTE: + __ stb(val, dst); + break; + case T_SHORT: + __ sth(val, dst); + break; + case T_CHAR: + __ sth(val, dst); + break; + case T_INT: + __ stw(val, dst); + break; + case T_LONG: + assert(val == noreg, "only tos"); + __ stl(FSR, dst); + break; + case T_FLOAT: + assert(val == noreg, "only tos"); + __ store_float(FSF, dst); + break; + case T_DOUBLE: + assert(val == noreg, "only tos"); + __ store_double(FSF, dst); + break; + case T_ADDRESS: + __ stptr(val, dst); + break; + default: Unimplemented(); + } +} + +// yj todo: below +void BarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, + Register obj, Register tmp, Label& slowpath) { + const int32_t inverted_jweak_mask = ~static_cast(JNIHandles::weak_tag_mask); + STATIC_ASSERT(inverted_jweak_mask == -2); // otherwise check this code + __ andptr(obj, inverted_jweak_mask, obj); + __ ldptr(obj, Address(obj, 0)); // *obj +} + +void BarrierSetAssembler::tlab_allocate(MacroAssembler* masm, + Register thread, Register obj, + Register var_size_in_bytes, + int con_size_in_bytes, + Register t1, + Register t2, + Label& slow_case) { + assert_different_registers(obj, t1, t2); + assert_different_registers(obj, var_size_in_bytes, t1); + Register end = t2; + if (!thread->is_valid()) { + thread = rthread; + } + + __ verify_tlab(); + + __ ldptr(obj, Address(thread, JavaThread::tlab_top_offset())); + if (var_size_in_bytes == noreg) { + __ lea(end, Address(obj, con_size_in_bytes)); + } else { + __ lea(end, Address(obj, var_size_in_bytes, Address::times_1)); + } + __ cmpptr(end, Address(thread, JavaThread::tlab_end_offset())); + __ jcc(Assembler::above, slow_case); + + // update the tlab top pointer + __ stptr(end, Address(thread, JavaThread::tlab_top_offset())); + + // recover var_size_in_bytes if necessary + if (var_size_in_bytes == end) { + __ subptr(var_size_in_bytes, obj, var_size_in_bytes); + } + __ verify_tlab(); +} + +// Defines obj, preserves var_size_in_bytes +void BarrierSetAssembler::eden_allocate(MacroAssembler* masm, + Register thread, Register obj, + Register var_size_in_bytes, + int con_size_in_bytes, + Register t1, + Label& slow_case) { + Register rax = V0; + assert(obj == rax, "obj must be in rax, for cmpxchg"); + assert_different_registers(obj, var_size_in_bytes, t1); + if (!Universe::heap()->supports_inline_contig_alloc()) { + __ jmp(slow_case); + } else { + Register end = t1; + Label retry; + __ bind(retry); + ExternalAddress heap_top((address) Universe::heap()->top_addr()); + __ ldptr(obj, heap_top); + if (var_size_in_bytes == noreg) { + __ lea(end, Address(obj, con_size_in_bytes)); + } else { + __ lea(end, Address(obj, var_size_in_bytes, Address::times_1)); + } + // if end < obj then we wrapped around => object too long => slow case + __ cmpptr(end, obj); + __ jcc(Assembler::below, slow_case); + __ cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr())); + __ jcc(Assembler::above, slow_case); + // Compare obj with the top addr, and if still equal, store the new top addr in + // end at the address of the top addr pointer. Sets ZF if was equal, and clears + // it otherwise. Use lock prefix for atomicity on MPs. + __ cmpxchgptr(end, heap_top, obj, rscratch2);//AT==0 should retry, it's special TODO:Fixme jzy + __ jcc(Assembler::failed, retry); + incr_allocated_bytes(masm, thread, var_size_in_bytes, con_size_in_bytes); + } +} + +void BarrierSetAssembler::incr_allocated_bytes(MacroAssembler* masm, Register thread, + Register var_size_in_bytes, + int con_size_in_bytes) { + if (!thread->is_valid()) { + thread = rthread; + } + + __ ldl(rscratch4, Address(thread, in_bytes(JavaThread::allocated_bytes_offset()))); + if (var_size_in_bytes->is_valid()) { + __ addl(rscratch4, var_size_in_bytes, rscratch4); + } else { + __ addl(rscratch4, con_size_in_bytes, rscratch4); + } + __ stl(rscratch4, Address(thread, in_bytes(JavaThread::allocated_bytes_offset()))); +} + +void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm) { + BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod(); + if (bs_nm == NULL) { + return; + } + + Label continuation; + Register thread = rthread; + Address disarmed_addr(thread, in_bytes(bs_nm->thread_disarmed_offset())); + __ align(8); + __ cmpw(disarmed_addr, 0); + __ jcc(Assembler::equal, continuation); + __ call(RuntimeAddress(StubRoutines::sw64::method_entry_barrier())); + __ bind(continuation); + +} + +void BarrierSetAssembler::c2i_entry_barrier(MacroAssembler* masm) { + BarrierSetNMethod* bs = BarrierSet::barrier_set()->barrier_set_nmethod(); + if (bs == NULL) { + return; + } + + Label bad_call; + __ cmpptr(rbx, 0); // rbx contains the incoming method for c2i adapters. + __ jcc(Assembler::equal, bad_call); + + Register tmp1 = rscratch1; + Register tmp2 = rscratch2; + + // Pointer chase to the method holder to find out if the method is concurrently unloading. + Label method_live; + __ load_method_holder_cld(tmp1, rbx); + + // Is it a strong CLD? // dx: need check + __ ldw(tmp2, Address(rscratch1, ClassLoaderData::keep_alive_offset())); + __ cmpptr(tmp2, 0); + __ jcc(Assembler::greater, method_live); + + // Is it a weak but alive CLD? + __ ldptr(tmp1, Address(tmp1, ClassLoaderData::holder_offset())); + __ resolve_weak_handle(tmp1, tmp2); + __ cmpptr(tmp1, 0); + __ jcc(Assembler::notEqual, method_live); + + __ bind(bad_call); + __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); + __ bind(method_live); +} \ No newline at end of file diff --git a/src/hotspot/cpu/sw64/gc/shared/barrierSetAssembler_sw64.hpp b/src/hotspot/cpu/sw64/gc/shared/barrierSetAssembler_sw64.hpp new file mode 100644 index 00000000000..006c0678bfe --- /dev/null +++ b/src/hotspot/cpu/sw64/gc/shared/barrierSetAssembler_sw64.hpp @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SW64_GC_SHARED_BARRIERSETASSEMBLER_SW64_HPP +#define CPU_SW64_GC_SHARED_BARRIERSETASSEMBLER_SW64_HPP + +#include "asm/macroAssembler.hpp" +#include "memory/allocation.hpp" +#include "oops/access.hpp" + +class BarrierSetAssembler: public CHeapObj { +private: + void incr_allocated_bytes(MacroAssembler* masm, Register thread, + Register var_size_in_bytes, int con_size_in_bytes); + +public: + virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register src, Register dst, Register count) {} + virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register start, Register end, Register tmp) {} + virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register dst, Address src, Register tmp1, Register tmp_thread); + virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address dst, Register val, Register tmp1, Register tmp2); + + virtual void resolve(MacroAssembler* masm, DecoratorSet decorators, Register obj) { + // Default implementation does not need to do anything. + } + + virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, + Register obj, Register tmp, Label& slowpath); + + virtual void tlab_allocate(MacroAssembler* masm, + Register thread, // Current thread + Register obj, // result: pointer to object after successful allocation + Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise + int con_size_in_bytes, // object size in bytes if known at compile time + Register t1, // temp register + Register t2, // temp register + Label& slow_case // continuation point if fast allocation fails + ); + + void eden_allocate(MacroAssembler* masm, + Register thread, // Current thread + Register obj, // result: pointer to object after successful allocation + Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise + int con_size_in_bytes, // object size in bytes if known at compile time + Register t1, // temp register + Label& slow_case // continuation point if fast allocation fails + ); + virtual void barrier_stubs_init() {} + + virtual void nmethod_entry_barrier(MacroAssembler* masm); + + virtual void c2i_entry_barrier(MacroAssembler* masm); + +}; + +#endif // CPU_SW64_GC_SHARED_BARRIERSETASSEMBLER_SW64_HPP diff --git a/src/hotspot/cpu/sw64/gc/shared/barrierSetNMethod_sw64.cpp b/src/hotspot/cpu/sw64/gc/shared/barrierSetNMethod_sw64.cpp new file mode 100644 index 00000000000..eb3350155c9 --- /dev/null +++ b/src/hotspot/cpu/sw64/gc/shared/barrierSetNMethod_sw64.cpp @@ -0,0 +1,153 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "code/codeCache.hpp" +#include "code/nativeInst.hpp" +#include "gc/shared/barrierSetNMethod.hpp" +#include "logging/log.hpp" +#include "memory/resourceArea.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/thread.hpp" +#include "utilities/align.hpp" +#include "utilities/debug.hpp" + +class NativeNMethodCmpBarrier: public NativeInstruction { +public: + enum Intel_specific_constants { + instruction_code = 0x81, + instruction_size = 8, + imm_offset = 4, + instruction_rex_prefix = 0x40 | 0x41, + instruction_modrm = 0x7f // [r15 + offset] + }; + + address instruction_address() const { return addr_at(0); } + address immediate_address() const { return addr_at(imm_offset); } + + jint get_immedate() const { return int_at(imm_offset); } + void set_immediate(jint imm) { set_int_at(imm_offset, imm); } + void verify() const; +}; + +void NativeNMethodCmpBarrier::verify() const { + if (((uintptr_t) instruction_address()) & 0x7) { + fatal("Not properly aligned"); + } + + int prefix = ubyte_at(0); + if (prefix != instruction_rex_prefix) { + tty->print_cr("Addr: " INTPTR_FORMAT " Prefix: 0x%x", p2i(instruction_address()), + prefix); + fatal("not a cmp barrier"); + } + + int inst = ubyte_at(1); + if (inst != instruction_code) { + tty->print_cr("Addr: " INTPTR_FORMAT " Code: 0x%x", p2i(instruction_address()), + inst); + fatal("not a cmp barrier"); + } + + int modrm = ubyte_at(2); + if (modrm != instruction_modrm) { + tty->print_cr("Addr: " INTPTR_FORMAT " mod/rm: 0x%x", p2i(instruction_address()), + modrm); + fatal("not a cmp barrier"); + } +} + +void BarrierSetNMethod::deoptimize(nmethod* nm, address* return_address_ptr) { + /* + * [ callers frame ] + * [ callers return address ] <- callers rsp + * [ callers rbp ] <- callers rbp + * [ callers frame slots ] + * [ return_address ] <- return_address_ptr + * [ cookie ] <- used to write the new rsp (callers rsp) + * [ stub rbp ] + * [ stub stuff ] + */ + + address* stub_rbp = return_address_ptr - 2; + address* callers_rsp = return_address_ptr + nm->frame_size(); /* points to callers return_address now */ + address* callers_rbp = callers_rsp - 1; // 1 to move to the callers return address, 1 more to move to the rbp + address* cookie = return_address_ptr - 1; + + LogTarget(Trace, nmethod, barrier) out; + if (out.is_enabled()) { + JavaThread* jth = JavaThread::current(); + ResourceMark mark; + log_trace(nmethod, barrier)("deoptimize(nmethod: %p, return_addr: %p, osr: %d, thread: %p(%s), making rsp: %p) -> %p", + nm, (address *) return_address_ptr, nm->is_osr_method(), jth, + jth->get_thread_name(), callers_rsp, nm->verified_entry_point()); + } + + assert(nm->frame_size() >= 3, "invariant"); + assert(*cookie == (address) -1, "invariant"); + + // Preserve caller rbp. + *stub_rbp = *callers_rbp; + + // At the cookie address put the callers rsp. + *cookie = (address) callers_rsp; // should point to the return address + + // In the slot that used to be the callers rbp we put the address that our stub needs to jump to at the end. + // Overwriting the caller rbp should be okay since our stub rbp has the same value. + address* jmp_addr_ptr = callers_rbp; + *jmp_addr_ptr = SharedRuntime::get_handle_wrong_method_stub(); +} + +// This is the offset of the entry barrier from where the frame is completed. +// If any code changes between the end of the verified entry where the entry +// barrier resides, and the completion of the frame, then +// NativeNMethodCmpBarrier::verify() will immediately complain when it does +// not find the expected native instruction at this offset, which needs updating. +// Note that this offset is invariant of PreserveFramePointer. +static const int entry_barrier_offset = -19; + +static NativeNMethodCmpBarrier* native_nmethod_barrier(nmethod* nm) { + address barrier_address = nm->code_begin() + nm->frame_complete_offset() + entry_barrier_offset; + NativeNMethodCmpBarrier* barrier = reinterpret_cast(barrier_address); + debug_only(barrier->verify()); + return barrier; +} + +void BarrierSetNMethod::disarm(nmethod* nm) { + if (!supports_entry_barrier(nm)) { + return; + } + + NativeNMethodCmpBarrier* cmp = native_nmethod_barrier(nm); + cmp->set_immediate(disarmed_value()); +} + +bool BarrierSetNMethod::is_armed(nmethod* nm) { + if (!supports_entry_barrier(nm)) { + return false; + } + + NativeNMethodCmpBarrier* cmp = native_nmethod_barrier(nm); + return (disarmed_value() != cmp->get_immedate()); +} diff --git a/src/hotspot/cpu/sw64/gc/shared/cardTableBarrierSetAssembler_sw64.cpp b/src/hotspot/cpu/sw64/gc/shared/cardTableBarrierSetAssembler_sw64.cpp new file mode 100644 index 00000000000..8c547d6ca2d --- /dev/null +++ b/src/hotspot/cpu/sw64/gc/shared/cardTableBarrierSetAssembler_sw64.cpp @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "gc/shared/barrierSet.hpp" +#include "gc/shared/cardTable.hpp" +#include "gc/shared/cardTableBarrierSet.hpp" +#include "gc/shared/cardTableBarrierSetAssembler.hpp" +#include "interpreter/interp_masm.hpp" +#include "gc/shared/gc_globals.hpp" + +#define __ masm-> +#ifdef PRODUCT +#define BLOCK_COMMENT(str) /* nothing */ +#else +#define BLOCK_COMMENT(str) { char line[1024];sprintf(line,"%s:%s:%d",str,__FILE__, __LINE__); __ block_comment(line);} +#endif + +#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") + +#define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8) + +void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, + Register addr, Register count, Register tmp) {SCOPEMARK_NAME(CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier, masm) +// ShouldNotReachHere(); + BarrierSet *bs = BarrierSet::barrier_set(); + CardTableBarrierSet* ctbs = barrier_set_cast(bs); + CardTable* ct = ctbs->card_table(); + // assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); + intptr_t disp = (intptr_t) ct->byte_map_base(); + + Label L_loop, L_done; + const Register end = count; + assert_different_registers(addr, end); + + __ testw(count, count); + __ jcc(Assembler::zero, L_done); // zero count - nothing to do + + + __ lea(end, Address(addr, count, TIMES_OOP, 0)); // end == addr+count*oop_size + __ subptr(end, BytesPerHeapOop, end); // end - 1 to make inclusive + __ srll(addr, CardTable::card_shift, addr); + __ srll(end, CardTable::card_shift, end); + __ subptr(end, addr, end); // end --> cards count + + __ mov_immediate64(tmp, disp); + __ addptr(addr, tmp, addr); + __ bind(L_loop); + __ stb(R0, Address(addr, count, Address::times_1)); + __ decrement(count); + __ jcc(Assembler::greaterEqual, L_loop, count); + + __ bind(L_done); +} + +void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register obj, Address dst) {SCOPEMARK_NAME(CardTableBarrierSetAssembler::store_check, masm) + // Does a store check for the oop in register obj. The content of + // register obj is destroyed afterwards. + BarrierSet* bs = BarrierSet::barrier_set(); + + CardTableBarrierSet* ctbs = barrier_set_cast(bs); + CardTable* ct = ctbs->card_table(); + // assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); + + __ srll(obj, CardTable::card_shift, obj); + + Address card_addr; + + // The calculation for byte_map_base is as follows: + // byte_map_base = _byte_map - (uintptr_t(low_bound) >> card_shift); + // So this essentially converts an address to a displacement and it will + // never need to be relocated. On 64bit however the value may be too + // large for a 32bit displacement. + intptr_t byte_map_base = (intptr_t)ct->byte_map_base(); + if (__ is_simm32(byte_map_base)) { + card_addr = Address(noreg, obj, Address::times_1, byte_map_base); + } else { + // By doing it as an ExternalAddress 'byte_map_base' could be converted to a rip-relative + // displacement and done in a single instruction given favorable mapping and a + // smarter version of as_Address. However, 'ExternalAddress' generates a relocation + // entry and that entry is not properly handled by the relocation code. + AddressLiteral cardtable((address)byte_map_base, relocInfo::none); + Address index(noreg, obj, Address::times_1); + card_addr = __ as_Address(ArrayAddress(cardtable, index), rscratch4); + } + int dirty = CardTable::dirty_card_val(); + if (UseCondCardMark) { + Label L_already_dirty; + __ cmpb(card_addr, dirty); + __ jcc(Assembler::equal, L_already_dirty); +// __ movw(temp, dirty); +// __ stb(temp, card_addr); + __ stb(R0, card_addr); + __ bind(L_already_dirty); + } else { +// __ movw(temp, dirty); +// __ stb(temp, card_addr); + __ stb(R0, card_addr); + } +} + +void CardTableBarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address dst, Register val, Register tmp1, Register tmp2) {SCOPEMARK_NAME(CardTableBarrierSetAssembler::oop_store_at, masm) + bool in_heap = (decorators & IN_HEAP) != 0; + + bool is_array = (decorators & IS_ARRAY) != 0; + bool on_anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0; + bool precise = is_array || on_anonymous; + + bool needs_post_barrier = val != noreg && in_heap; + + BarrierSetAssembler::store_at(masm, decorators, type, dst, val, noreg, noreg); + if (needs_post_barrier) { + // flatten object address if needed + if (!precise || (dst.index() == noreg && dst.disp() == 0)) { + store_check(masm, dst.base(), dst); + } else { + __ lea(tmp1, dst); + store_check(masm, tmp1, dst); + } + } +} diff --git a/src/hotspot/cpu/sw64/gc/shared/cardTableBarrierSetAssembler_sw64.hpp b/src/hotspot/cpu/sw64/gc/shared/cardTableBarrierSetAssembler_sw64.hpp new file mode 100644 index 00000000000..55b8d224f17 --- /dev/null +++ b/src/hotspot/cpu/sw64/gc/shared/cardTableBarrierSetAssembler_sw64.hpp @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SW64_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_SW64_HPP +#define CPU_SW64_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_SW64_HPP + +#include "asm/macroAssembler.hpp" +#include "gc/shared/modRefBarrierSetAssembler.hpp" + +class CardTableBarrierSetAssembler: public ModRefBarrierSetAssembler { +protected: + void store_check(MacroAssembler* masm, Register obj, Address dst); + + virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, + Register start, Register count, Register tmp); + virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address dst, Register val, Register tmp1, Register tmp2); + +}; + +#endif // #ifndef CPU_SW64_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_SW64_HPP diff --git a/src/hotspot/cpu/sw64/gc/shared/modRefBarrierSetAssembler_sw64.cpp b/src/hotspot/cpu/sw64/gc/shared/modRefBarrierSetAssembler_sw64.cpp new file mode 100644 index 00000000000..700b1606b22 --- /dev/null +++ b/src/hotspot/cpu/sw64/gc/shared/modRefBarrierSetAssembler_sw64.cpp @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "gc/shared/modRefBarrierSetAssembler.hpp" + +#define __ masm-> + +void ModRefBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register src, Register dst, Register count) {SCOPEMARK_NAME(ModRefBarrierSetAssembler::arraycopy_prologue, masm) + bool checkcast = (decorators & ARRAYCOPY_CHECKCAST) != 0; + bool disjoint = (decorators & ARRAYCOPY_DISJOINT) != 0; + bool obj_int = type == T_OBJECT && UseCompressedOops; +// ShouldNotReachHere(); + //use T11 is very special, related to stubGenerator_sw64::array_copy* TODO:refactor jzy + if (is_reference_type(type)) { + if (!checkcast) { + if (!obj_int) { + // Save count for barrier + __ movl(T11, count); + } else if (disjoint) { + // Save dst in r11 in the disjoint case + __ movl(T11, dst); + } + } + gen_write_ref_array_pre_barrier(masm, decorators, dst, count); + } +} + +void ModRefBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register src, Register dst, Register count) {SCOPEMARK_NAME(ModRefBarrierSetAssembler::arraycopy_epilogue, masm) + bool checkcast = (decorators & ARRAYCOPY_CHECKCAST) != 0; + bool disjoint = (decorators & ARRAYCOPY_DISJOINT) != 0; + bool obj_int = type == T_OBJECT && UseCompressedOops; + Register tmp = V0; +// ShouldNotReachHere(); + if (is_reference_type(type)) { + if (!checkcast) { + if (!obj_int) { + // Save count for barrier + count = T11; + } else if (disjoint) { + // Use the saved dst in the disjoint case + dst = T11; + } + } else { + assert_different_registers(src, dst, count, rscratch1);//need this? jzy + tmp = rscratch1; + } + gen_write_ref_array_post_barrier(masm, decorators, dst, count, tmp); + } +} + +void ModRefBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address dst, Register val, Register tmp1, Register tmp2) { + if (is_reference_type(type)) { + oop_store_at(masm, decorators, type, dst, val, tmp1, tmp2); + } else { + BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2); + } +} diff --git a/src/hotspot/cpu/sw64/gc/shared/modRefBarrierSetAssembler_sw64.hpp b/src/hotspot/cpu/sw64/gc/shared/modRefBarrierSetAssembler_sw64.hpp new file mode 100644 index 00000000000..22bfed475d9 --- /dev/null +++ b/src/hotspot/cpu/sw64/gc/shared/modRefBarrierSetAssembler_sw64.hpp @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SW64_GC_SHARED_MODREFBARRIERSETASSEMBLER_SW64_HPP +#define CPU_SW64_GC_SHARED_MODREFBARRIERSETASSEMBLER_SW64_HPP + +#include "asm/macroAssembler.hpp" +#include "gc/shared/barrierSetAssembler.hpp" + +// The ModRefBarrierSetAssembler filters away accesses on BasicTypes other +// than T_OBJECT/T_ARRAY (oops). The oop accesses call one of the protected +// accesses, which are overridden in the concrete BarrierSetAssembler. + +class ModRefBarrierSetAssembler: public BarrierSetAssembler { +protected: + virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, + Register addr, Register count) {} + virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, + Register start, Register count, Register tmp) {} + + virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address dst, Register val, Register tmp1, Register tmp2) = 0; + +public: + virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register src, Register dst, Register count); + virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register start, Register count, Register tmp); + virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address dst, Register val, Register tmp1, Register tmp2); +}; + +#endif // CPU_SW64_GC_SHARED_MODREFBARRIERSETASSEMBLER_SW64_HPP diff --git a/src/hotspot/cpu/sw64/globalDefinitions_sw64.hpp b/src/hotspot/cpu/sw64/globalDefinitions_sw64.hpp new file mode 100644 index 00000000000..169528339b3 --- /dev/null +++ b/src/hotspot/cpu/sw64/globalDefinitions_sw64.hpp @@ -0,0 +1,66 @@ +/* + * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ +#include "metaprogramming/enableIf.hpp" + +#ifndef CPU_SW64_VM_GLOBALDEFINITIONS_SW64_HPP +#define CPU_SW64_VM_GLOBALDEFINITIONS_SW64_HPP +// Size of SW Instructions +const int BytesPerInstWord = 4; + +const int StackAlignmentInBytes = (2*wordSize); + +// Indicates whether the C calling conventions require that +// 32-bit integer argument values are extended to 64 bits. +const bool CCallingConventionRequiresIntsAsLongs = false; + +#define CPU_MULTI_COPY_ATOMIC + +// true if x is a power of 2, false otherwise +//template bool is_power_of_2(T x) ; +template ::value)> +constexpr bool is_power_of_2(T x); + +inline intptr_t mask_bits (intptr_t x, intptr_t m); + +// returns integer round-up to the nearest multiple of s (s must be a power of two) +inline intptr_t round_to(intptr_t x, uintx s) { + #ifdef ASSERT + if (!is_power_of_2(s)) fatal("s must be a power of 2"); + #endif + const uintx m = s - 1; + return mask_bits(x + m, ~m); +} + +#define SUPPORTS_NATIVE_CX8 + +#define SUPPORT_RESERVED_STACK_AREA + +#if INCLUDE_JVMCI +#define COMPRESSED_CLASS_POINTERS_DEPENDS_ON_COMPRESSED_OOPS EnableJVMCI // todo check +#else +#define COMPRESSED_CLASS_POINTERS_DEPENDS_ON_COMPRESSED_OOPS false +#endif + +#endif // CPU_SW64_VM_GLOBALDEFINITIONS_SW64_HPP diff --git a/src/hotspot/cpu/sw64/globals_sw64.hpp b/src/hotspot/cpu/sw64/globals_sw64.hpp new file mode 100644 index 00000000000..d915c00c761 --- /dev/null +++ b/src/hotspot/cpu/sw64/globals_sw64.hpp @@ -0,0 +1,177 @@ +/* + * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SW64_VM_GLOBALS_SW64_HPP +#define CPU_SW64_VM_GLOBALS_SW64_HPP + +#include "utilities/globalDefinitions.hpp" +#include "utilities/macros.hpp" + +// Sets the default values for platform dependent flags used by the runtime system. +// (see globals.hpp) + +define_pd_global(bool, ImplicitNullChecks, true); // Generate code for implicit null checks +define_pd_global(bool, TrapBasedNullChecks, false); +define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap NULLs past to check cast + +define_pd_global(uintx, CodeCacheSegmentSize, 64 COMPILER1_AND_COMPILER2_PRESENT(+64)); // Tiered compilation has large code-entry alignment. +define_pd_global(intx, CodeEntryAlignment, 16); +define_pd_global(intx, OptoLoopAlignment, 16); +define_pd_global(intx, InlineFrequencyCount, 100); + +#define DEFAULT_STACK_YELLOW_PAGES (2) +#define DEFAULT_STACK_RED_PAGES (1) +// Java_java_net_SocketOutputStream_socketWrite0() uses a 64k buffer on the +// stack if compiled for unix and LP64. To pass stack overflow tests we need +// 20 shadow pages. +#define DEFAULT_STACK_SHADOW_PAGES (20 DEBUG_ONLY(+5)) +#define DEFAULT_STACK_RESERVED_PAGES (1) + +#define MIN_STACK_YELLOW_PAGES DEFAULT_STACK_YELLOW_PAGES +#define MIN_STACK_RED_PAGES DEFAULT_STACK_RED_PAGES +#define MIN_STACK_SHADOW_PAGES DEFAULT_STACK_SHADOW_PAGES +#define MIN_STACK_RESERVED_PAGES (0) + +define_pd_global(intx, StackYellowPages, DEFAULT_STACK_YELLOW_PAGES); +define_pd_global(intx, StackRedPages, DEFAULT_STACK_RED_PAGES); +define_pd_global(intx, StackShadowPages, DEFAULT_STACK_SHADOW_PAGES); +////define_pd_global(intx, StackShadowPages, 3 DEBUG_ONLY(+1)); +define_pd_global(intx, StackReservedPages, DEFAULT_STACK_RESERVED_PAGES); + +define_pd_global(uintx, TLABSize, 0); +define_pd_global(uintx, NewSize, 1024 * K); +define_pd_global(intx, PreInflateSpin, 10); + +define_pd_global(intx, PrefetchCopyIntervalInBytes, -1); +define_pd_global(intx, PrefetchScanIntervalInBytes, -1); +define_pd_global(intx, PrefetchFieldsAhead, -1); + +define_pd_global(bool, RewriteBytecodes, true); +define_pd_global(bool, RewriteFrequentPairs, true); + +////define_pd_global(bool, PreserveFramePointer, false); + +define_pd_global(uintx, TypeProfileLevel, 111); + +define_pd_global(bool, PreserveFramePointer, false); +// Only c2 cares about this at the moment +define_pd_global(intx, AllocatePrefetchStyle, 2); +define_pd_global(intx, AllocatePrefetchDistance, -1); +define_pd_global(bool, CompactStrings, true); + +// Clear short arrays bigger than one word in an arch-specific way +define_pd_global(intx, InitArrayShortSize, 4096 * BytesPerLong); +define_pd_global(intx, InlineSmallCode, 4000); + +////#if defined(COMPILER1) || defined(COMPILER2) +////define_pd_global(intx, InlineSmallCode, 1000); +////#endif + +#define ARCH_FLAGS(develop, \ + product, \ + notproduct, \ + range, \ + constraint) \ + \ + product(bool, UseSW8A, false, \ + "Use SW8A on Shenwei CPUs") \ + product(bool, UseAddpi, false, \ + "Use addpi of SW8A's instructions") \ + product(bool, UseCAS, false, \ + "Use CASx of SW8A's instructions") \ + product(bool, UseWmemb, false, \ + "Use wmemb on SW8A CPU") \ + product(bool, NearCpool, true, \ + "constant pool is close to instructions") \ + product(bool, TraceSignalHandling, false, \ + "Trace signal handling") \ + product(bool, UseBarriersForVolatile, false, \ + "Use memory barriers to implement volatile accesses") \ + product(bool, UseNeon, false, \ + "Use Neon for CRC32 computation") \ + product(bool, UseCRC32, false, \ + "Use CRC32 instructions for CRC32 computation") \ + product(bool, UseSIMDForMemoryOps, false, \ + "Use SIMD instructions in generated memory move code") \ + product(bool, UseSIMDForArrayEquals, true, \ + "Use SIMD instructions in generated array equals code") \ + product(bool, UseSimpleArrayEquals, false, \ + "Use simpliest and shortest implementation for array equals") \ + product(bool, AvoidUnalignedAccesses, false, \ + "Avoid generating unaligned memory accesses") \ + product(bool, UseLSE, false, \ + "Use LSE instructions") \ + product(bool, UseBlockZeroing, true, \ + "Use DC ZVA for block zeroing") \ + product(intx, BlockZeroingLowLimit, 256, \ + "Minimum size in bytes when block zeroing will be used") \ + range(1, max_jint) \ + product(bool, TraceTraps, false, "Trace all traps the signal handler")\ + product(int, SoftwarePrefetchHintDistance, -1, \ + "Use prfm hint with specified distance in compiled code." \ + "Value -1 means off.") \ + range(-1, 4096) \ + product(bool, UseSW6B, false, \ + "Use SW6B on Shenwei CPUs") \ + \ + product(bool, UseSimdForward, false, \ + "arraycopy disjoint stubs with SIMD instructions") \ + product(bool, UseSimdBackward, false, \ + "arraycopy conjoint stubs with SIMD instructions") \ + product(bool, UseSimdLongOop, false, \ + "conjoint oop copy with SIMD instructions") \ + /* product(bool, UseCodeCacheAllocOpt, true, */ \ + /* "Allocate code cache within 32-bit memory address space") */ \ + \ + product(bool, UseCountLeadingZerosInstruction, true, \ + "Use count leading zeros instruction") \ + \ + product(bool, UseCountTrailingZerosInstruction, false, \ + "Use count trailing zeros instruction") \ + \ + product(bool, FastIntDiv, false, \ + "make Integer division faster") \ + \ + product(bool, FastLongDiv, false, \ + "make Long division faster") \ + \ + product(bool, FastIntRem, false, \ + "make Integer remainder faster") \ + \ + product(bool, FastLongRem, false, \ + "make Long remainder faster") \ + \ + product(bool, SafePatch, true, \ + "make patch operations is atomic") \ + \ + product(bool, FRegisterConflict, true, \ + "When FRegisterConflict is true, prevent source and destination FloatRegisters from being the same. " \ + "When FRegisterConflict is false, ignore the conflict") \ + product(bool, UseSetfpec, false, \ + "true for 9906, false for 9916") \ + product(bool, UseGetLongIntrinsic, false, \ + "Use Unsafe.getLong intrinsic") + +#endif // CPU_SW64_VM_GLOBALS_SW64_HPP diff --git a/src/hotspot/cpu/sw64/icBuffer_sw64.cpp b/src/hotspot/cpu/sw64/icBuffer_sw64.cpp new file mode 100644 index 00000000000..6a3866eef02 --- /dev/null +++ b/src/hotspot/cpu/sw64/icBuffer_sw64.cpp @@ -0,0 +1,91 @@ +/* + * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "code/icBuffer.hpp" +#include "gc/shared/collectedHeap.inline.hpp" +#include "interpreter/bytecodes.hpp" +#include "memory/resourceArea.hpp" +#include "nativeInst_sw64.hpp" +#include "oops/oop.inline.hpp" + +int InlineCacheBuffer::ic_stub_code_size() { + return NativeMovConstReg::instruction_size + NativeJump::instruction_size; +} + + + +void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, void* cached_value, address entry_point) {//Unimplemented(); + ResourceMark rm; + CodeBuffer code(code_begin, ic_stub_code_size()); + MacroAssembler* masm = new MacroAssembler(&code); + // note: even though the code contains an embedded value, we do not need reloc info + // because + // (1) the value is old (i.e., doesn't matter for scavenges) + // (2) these ICStubs are removed *before* a GC happens, so the roots disappear + // assert(cached_value == NULL || cached_oop->is_perm(), "must be perm oop"); + + // TODO:confirm jzy + // 1. need flush? + // 2. need relocate? + #define __ masm-> + __ prepare_patch_li48(V0, (long)cached_value); //sw64.ad #frame + + __ patchable_jump(entry_point); + __ flush(); +#undef __ +} + + +address InlineCacheBuffer::ic_buffer_entry_point(address code_begin) { + NativeMovConstReg* move = nativeMovConstReg_at(code_begin); // creation also verifies the object + address jmp = move->next_instruction_address(); + NativeInstruction* ni = nativeInstruction_at(jmp); + if (ni->is_jump()) { + NativeJump* jump = nativeJump_at(jmp); + return jump->jump_destination(); + } else { + fatal("no a ic buffer entry"); + return NULL; + } +} + + +void* InlineCacheBuffer::ic_buffer_cached_value(address code_begin) { + // creation also verifies the object + NativeMovConstReg* move = nativeMovConstReg_at(code_begin); + // Verifies the jump + address jmp = move->next_instruction_address(); + NativeInstruction* ni = nativeInstruction_at(jmp); + if (ni->is_jump()) { + NativeJump* jump = nativeJump_at(jmp); + } else { + fatal("no a ic buffer entry"); + } + void* o = (void*)move->data(); + return o; +} diff --git a/src/hotspot/cpu/sw64/icache_sw64.cpp b/src/hotspot/cpu/sw64/icache_sw64.cpp new file mode 100644 index 00000000000..5fb940774f4 --- /dev/null +++ b/src/hotspot/cpu/sw64/icache_sw64.cpp @@ -0,0 +1,96 @@ +/* + * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "runtime/icache.hpp" + +#define __ _masm-> +extern void sw64TestHook(); + +void ICache::initialize() { + sw64TestHook(); +} + +void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub) { + StubCodeMark mark(this, "ICache", "flush_icache_stub"); + + Register rax = V0; + address start = __ pc(); + + const Register addr = c_rarg0; + const Register lines = c_rarg1; + const Register magic = c_rarg2; + + Label flush_line, done; + + __ jcc(Assembler::zero, done, lines); + + // Force ordering wrt cflush. + // Other fence and sync instructions won't do the job. + __ memb(); + + __ bind(flush_line); + __ clflush(Address(addr, 0)); + __ addptr(addr, ICache::line_size, addr); + __ decrementw(lines); + __ jcc(Assembler::notZero, flush_line, lines); + + __ memb(); + + __ bind(done); + + __ movl(rax, magic); // Handshake with caller to make sure it happened! + __ ret_sw(); + + // Must be set here so StubCodeMark destructor can call the flush stub. + *flush_icache_stub = (ICache::flush_icache_stub_t)start; +} + +void ICache::call_flush_stub(address start, int lines) { + //in fact, the current os implementation simply flush all ICACHE&DCACHE +// sysmips(3, 0, 0, 0); +// __asm__ __volatile__ ("ldi $0,266"); +// __asm__ __volatile__ ("sys_call 0x83"); +} + +void ICache::invalidate_word(address addr) { + //cacheflush(addr, 4, ICACHE); + +// sysmips(3, 0, 0, 0); +// __asm__ __volatile__ ("ldi $0,266"); +// __asm__ __volatile__ ("sys_call 0x83"); +} + +void ICache::invalidate_range(address start, int nbytes) { +// sysmips(3, 0, 0, 0); +// __asm__ __volatile__ ("ldi $0,266"); +// __asm__ __volatile__ ("sys_call 0x83"); +} + +void ICache::invalidate_all() { +// __asm__ __volatile__ ("ldi $0,266"); +// __asm__ __volatile__ ("sys_call 0x83"); +} diff --git a/src/hotspot/cpu/sw64/icache_sw64.hpp b/src/hotspot/cpu/sw64/icache_sw64.hpp new file mode 100644 index 00000000000..22bfdb90a5c --- /dev/null +++ b/src/hotspot/cpu/sw64/icache_sw64.hpp @@ -0,0 +1,52 @@ +/* + * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SW64_VM_ICACHE_SW64_HPP +#define CPU_SW64_VM_ICACHE_SW64_HPP + +// Interface for updating the instruction cache. Whenever the VM +// modifies code, part of the processor instruction cache potentially +// has to be flushed. + +class ICache : public AbstractICache { + public: + enum { + stub_size = 0, // Size of the icache flush stub in bytes + line_size = 32, // flush instruction affects a dword + log2_line_size = 5 // log2(line_size) + }; + static void initialize(); + + static void call_flush_stub(address start, int lines); + + static void invalidate_word(address addr); + + static void invalidate_range(address start, int nbytes); + + static void invalidate_all(); + +}; + +#endif // CPU_SW64_VM_ICACHE_SW64_HPP diff --git a/src/hotspot/cpu/sw64/interp_masm_sw64.cpp b/src/hotspot/cpu/sw64/interp_masm_sw64.cpp new file mode 100644 index 00000000000..6ae67e84bfe --- /dev/null +++ b/src/hotspot/cpu/sw64/interp_masm_sw64.cpp @@ -0,0 +1,1921 @@ +/* + * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "interp_masm_sw64.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "logging/log.hpp" +#include "oops/arrayOop.hpp" +#include "oops/markWord.hpp" +#include "oops/methodData.hpp" +#include "oops/method.hpp" +#include "prims/jvmtiExport.hpp" +#include "prims/jvmtiThreadState.hpp" +#include "runtime/basicLock.hpp" +#include "runtime/biasedLocking.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/safepointMechanism.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/thread.inline.hpp" +#include "utilities/powerOfTwo.hpp" + +// Implementation of InterpreterMacroAssembler + +void InterpreterMacroAssembler::jump_to_entry(address entry) { + assert(entry, "Entry must have been generated by now"); + jump(RuntimeAddress(entry)); +} + +void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& mdo_addr) { + Label update, next, none; + + assert_different_registers(obj, AT, mdo_addr.base(), mdo_addr.index()); + + interp_verify_oop(obj, atos); + + jcc(Assembler::notZero, update, obj); + ldptr(AT, mdo_addr); + orptr(AT, TypeEntries::null_seen, AT); + stptr(AT, mdo_addr); + jmp(next); + + bind(update); + load_klass(obj, obj); + + ldptr(AT, mdo_addr); + xorptr(obj, AT, obj); + testptr(obj, TypeEntries::type_klass_mask); + jcc(Assembler::zero, next); // klass seen before, nothing to + // do. The unknown bit may have been + // set already but no need to check. + + testptr(obj, TypeEntries::type_unknown); + jcc(Assembler::notZero, next); // already unknown. Nothing to do anymore. + + jcc(Assembler::zero, none, AT); + cmpptr(AT, TypeEntries::null_seen); + jcc(Assembler::equal, none); + // There is a chance that the checks above + // fail if another thread has just set the + // profiling to this obj's klass + + xorptr(obj, AT, obj); + ldptr(AT, mdo_addr); + xorptr(obj, AT, obj); + testptr(obj, TypeEntries::type_klass_mask); + jcc(Assembler::zero, next); + + // different than before. Cannot keep accurate profile. + orptr(AT, TypeEntries::type_unknown, AT); + stptr(AT, mdo_addr); + jmp(next); + + bind(none); + // first time here. Set profile type. + stptr(obj, mdo_addr); +#ifdef ASSERT + andptr(obj, TypeEntries::type_mask, obj); + verify_klass_ptr(obj); +#endif + + bind(next); +} + +void InterpreterMacroAssembler::profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual) { + if (!ProfileInterpreter) { + return; + } + + if (MethodData::profile_arguments() || MethodData::profile_return()) { + Label profile_continue; + + test_method_data_pointer(mdp, profile_continue); + + int off_to_start = is_virtual ? in_bytes(VirtualCallData::virtual_call_data_size()) : in_bytes(CounterData::counter_data_size()); + + cmpb(Address(mdp, in_bytes(DataLayout::tag_offset()) - off_to_start), is_virtual ? DataLayout::virtual_call_type_data_tag : DataLayout::call_type_data_tag); + jcc(Assembler::notEqual, profile_continue); + + if (MethodData::profile_arguments()) { + Label done; + int off_to_args = in_bytes(TypeEntriesAtCall::args_data_offset()); + addptr(mdp, off_to_args, mdp); + + for (int i = 0; i < TypeProfileArgsLimit; i++) { + if (i > 0 || MethodData::profile_return()) { + // If return value type is profiled we may have no argument to profile + ldptr(tmp, Address(mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args)); + subw(tmp, i*TypeStackSlotEntries::per_arg_count(), tmp); + cmpw(tmp, TypeStackSlotEntries::per_arg_count()); + jcc(Assembler::less, done); + } + ldptr(tmp, Address(callee, Method::const_offset())); + ldhu(tmp, Address(tmp, ConstMethod::size_of_parameters_offset())); + // stack offset o (zero based) from the start of the argument + // list, for n arguments translates into offset n - o - 1 from + // the end of the argument list + ldptr(AT, Address(mdp, in_bytes(TypeEntriesAtCall::stack_slot_offset(i))-off_to_args)); + subw(tmp, AT, tmp); + subw(tmp, 1, tmp); + Address arg_addr = argument_address(tmp); + ldptr(tmp, arg_addr); + + Address mdo_arg_addr(mdp, in_bytes(TypeEntriesAtCall::argument_type_offset(i))-off_to_args); + profile_obj_type(tmp, mdo_arg_addr); + + int to_add = in_bytes(TypeStackSlotEntries::per_arg_size()); + addptr(mdp, to_add, mdp); + off_to_args += to_add; + } + + if (MethodData::profile_return()) { + ldptr(tmp, Address(mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args)); + subw(tmp, TypeProfileArgsLimit*TypeStackSlotEntries::per_arg_count(), tmp); + } + + bind(done); + + if (MethodData::profile_return()) { + // We're right after the type profile for the last + // argument. tmp is the number of cells left in the + // CallTypeData/VirtualCallTypeData to reach its end. Non null + // if there's a return to profile. + assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type"); + slll(tmp, log2i_exact((int)DataLayout::cell_size), tmp); + //addw(tmp, 0, tmp); + addptr(mdp, tmp, mdp); + } + stptr(mdp, Address(rfp, frame::interpreter_frame_mdp_offset * wordSize)); + } else { + assert(MethodData::profile_return(), "either profile call args or call ret"); + update_mdp_by_constant(mdp, in_bytes(TypeEntriesAtCall::return_only_size())); + } + + // mdp points right after the end of the + // CallTypeData/VirtualCallTypeData, right after the cells for the + // return value type if there's one + + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::profile_return_type(Register mdp, Register ret, Register tmp) { + assert_different_registers(mdp, ret, tmp, _bcp_register); + if (ProfileInterpreter && MethodData::profile_return()) { + Label profile_continue; + + test_method_data_pointer(mdp, profile_continue); + + if (MethodData::profile_return_jsr292_only()) { + assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2"); + + // If we don't profile all invoke bytecodes we must make sure + // it's a bytecode we indeed profile. We can't go back to the + // begining of the ProfileData we intend to update to check its + // type because we're right after it and we don't known its + // length + Label do_profile; + cmpb(Address(_bcp_register, 0), Bytecodes::_invokedynamic); + jcc(Assembler::equal, do_profile); + cmpb(Address(_bcp_register, 0), Bytecodes::_invokehandle); + jcc(Assembler::equal, do_profile); + get_method(tmp); + cmph(Address(tmp, Method::intrinsic_id_offset_in_bytes()), static_cast(vmIntrinsics::_compiledLambdaForm)); + jcc(Assembler::notEqual, profile_continue); + + bind(do_profile); + } + + Address mdo_ret_addr(mdp, -in_bytes(ReturnTypeEntry::size())); + movl(tmp, ret); + profile_obj_type(tmp, mdo_ret_addr); + + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::profile_parameters_type(Register mdp, Register tmp1, Register tmp2) { + if (ProfileInterpreter && MethodData::profile_parameters()) { + Label profile_continue; + + test_method_data_pointer(mdp, profile_continue); + + // Load the offset of the area within the MDO used for + // parameters. If it's negative we're not profiling any parameters + ldws(tmp1, Address(mdp, in_bytes(MethodData::parameters_type_data_di_offset()) - in_bytes(MethodData::data_offset()))); + jcc(Assembler::negative, profile_continue, tmp1); + + // Compute a pointer to the area for parameters from the offset + // and move the pointer to the slot for the last + // parameters. Collect profiling from last parameter down. + // mdo start + parameters offset + array length - 1 + addptr(mdp, tmp1, mdp); + ldptr(tmp1, Address(mdp, ArrayData::array_len_offset())); + decrementl(tmp1, TypeStackSlotEntries::per_arg_count()); + + Label loop; + bind(loop); + + int off_base = in_bytes(ParametersTypeData::stack_slot_offset(0)); + int type_base = in_bytes(ParametersTypeData::type_offset(0)); + Address::ScaleFactor per_arg_scale = Address::times(DataLayout::cell_size); + Address arg_off(mdp, tmp1, per_arg_scale, off_base); + Address arg_type(mdp, tmp1, per_arg_scale, type_base); + + // load offset on the stack from the slot for this parameter + ldptr(tmp2, arg_off); + negptr(tmp2); + // read the parameter from the local area + ldptr(tmp2, Address(rlocals, tmp2, Interpreter::stackElementScale())); + + // profile the parameter + profile_obj_type(tmp2, arg_type); + + // go to next parameter + decrementl(tmp1, TypeStackSlotEntries::per_arg_count()); + jcc(Assembler::positive, loop, tmp1); + + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::call_VM_leaf_base(address entry_point, + int number_of_arguments) {SCOPEMARK_NAME(InterpreterMacroAssembler::call_VM_leaf_base, this) + // interpreter specific + // + // Note: No need to save/restore bcp & locals (r13 & r14) pointer + // since these are callee saved registers and no blocking/ + // GC can happen in leaf calls. + // Further Note: DO NOT save/restore bcp/locals. If a caller has + // already saved them so that it can use BCP/LVP as temporaries + // then a save/restore here will DESTROY the copy the caller + // saved! There used to be a save_bcp() that only happened in + // the ASSERT path (no restore_bcp). Which caused bizarre failures + // when jvm built with ASSERTs. +#ifdef ASSERT + { + Label L; + cmpptr(Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize), (int32_t)NULL_WORD); + jcc(Assembler::equal, L); + stop("InterpreterMacroAssembler::call_VM_leaf_base:" + " last_sp != NULL"); + bind(L); + } +#endif + // super call + MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments); + // interpreter specific + // LP64: Used to ASSERT that BCP/LVP were equal to frame's bcp/locals + // but since they may not have been saved (and we don't want to + // save them here (see note above) the assert is invalid. +} + +void InterpreterMacroAssembler::call_VM_base(Register oop_result, + Register java_thread, + Register last_java_sp, + address entry_point, + int number_of_arguments, + bool check_exceptions) {SCOPEMARK_NAME(InterpreterMacroAssembler::call_VM_base, this) + // interpreter specific + // + // Note: Could avoid restoring locals ptr (callee saved) - however doesn't + // really make a difference for these runtime calls, since they are + // slow anyway. Btw., bcp must be saved/restored since it may change + // due to GC. + save_bcp(); +#ifdef ASSERT + { + Label L; + cmpptr(Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize), (int32_t)NULL_WORD); + jcc(Assembler::equal, L); + stop("InterpreterMacroAssembler::call_VM_base:" + " last_sp != NULL"); + bind(L); + } +#endif /* ASSERT */ + // super call + MacroAssembler::call_VM_base(oop_result, noreg, last_java_sp, + entry_point, number_of_arguments, + check_exceptions); + // interpreter specific + restore_bcp(); + restore_locals(); +} + +void InterpreterMacroAssembler::check_and_handle_popframe(Register java_thread) { + if (JvmtiExport::can_pop_frame()) { + Label L; + // Initiate popframe handling only if it is not already being + // processed. If the flag has the popframe_processing bit set, it + // means that this code is called *during* popframe handling - we + // don't want to reenter. + // This method is only called just after the call into the vm in + // call_VM_base, so the arg registers are available. + Register pop_cond = c_rarg0; + ldw(pop_cond, Address(java_thread, JavaThread::popframe_condition_offset())); + testw(pop_cond, JavaThread::popframe_pending_bit); + jcc(Assembler::zero, L); + testw(pop_cond, JavaThread::popframe_processing_bit); + jcc(Assembler::notZero, L); + // Call Interpreter::remove_activation_preserving_args_entry() to get the + // address of the same-named entrypoint in the generated interpreter code. + call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_preserving_args_entry)); + jmp(V0); + bind(L); + } +} + +void InterpreterMacroAssembler::load_earlyret_value(TosState state) { + const Register rcx = T11; + const Register rax = V0; + Register thread = rthread; + ldptr(rcx, Address(thread, JavaThread::jvmti_thread_state_offset())); + const Address tos_addr(rcx, JvmtiThreadState::earlyret_tos_offset()); + const Address oop_addr(rcx, JvmtiThreadState::earlyret_oop_offset()); + const Address val_addr(rcx, JvmtiThreadState::earlyret_value_offset()); + switch (state) { + case atos: ldptr(rax, oop_addr); + stptr(R0, oop_addr); + interp_verify_oop(rax, state); break; + case ltos: ldptr(rax, val_addr); break; + case btos: // fall through + case ztos: // fall through + case ctos: // fall through + case stos: // fall through + case itos: ldws(rax, val_addr); break; + case ftos: load_float(FSF, val_addr); break; + case dtos: load_double(FSF, val_addr); break; + case vtos: /* nothing to do */ break; + default : ShouldNotReachHere(); + } + // Clean up tos value in the thread object + movw(AT, (int) ilgl); + stw(AT, tos_addr); + stptr(R0, val_addr); +} + + +void InterpreterMacroAssembler::check_and_handle_earlyret(Register java_thread) { + if (JvmtiExport::can_force_early_return()) { + Label L; + Register tmp = c_rarg0; + + ldptr(tmp, Address(rthread, JavaThread::jvmti_thread_state_offset())); + testptr(tmp, tmp); + jcc(Assembler::zero, L); // if (thread->jvmti_thread_state() == NULL) exit; + + // Initiate earlyret handling only if it is not already being processed. + // If the flag has the earlyret_processing bit set, it means that this code + // is called *during* earlyret handling - we don't want to reenter. + ldwu(tmp, Address(tmp, JvmtiThreadState::earlyret_state_offset())); + cmpw(tmp, JvmtiThreadState::earlyret_pending); + jcc(Assembler::notEqual, L); + + // Call Interpreter::remove_activation_early_entry() to get the address of the + // same-named entrypoint in the generated interpreter code. + ldptr(tmp, Address(rthread, JavaThread::jvmti_thread_state_offset())); + ldws(tmp, Address(tmp, JvmtiThreadState::earlyret_tos_offset())); + call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_early_entry), tmp); + jmp(V0); + bind(L); + } +} + +void InterpreterMacroAssembler::get_unsigned_2_byte_index_at_bcp(Register reg, + int bcp_offset) { + assert(bcp_offset >= 0, "bcp is still pointing to start of bytecode"); + ldbu(AT, bcp_offset, rbcp); + ldbu(reg, bcp_offset + 1, rbcp); + slll(AT, 8, AT); + bis(reg, AT, reg); +} + +void InterpreterMacroAssembler::get_cache_index_at_bcp(Register index, + int bcp_offset, + size_t index_size) {SCOPEMARK_NAME(get_cache_index_at_bcp, this) + assert(bcp_offset > 0, "bcp is still pointing to start of bytecode"); + if (index_size == sizeof(u2)) { + ldhu_unaligned(index, Address(rbcp, bcp_offset)); + } else if (index_size == sizeof(u4)) { + ldw(index, Address(rbcp, bcp_offset)); + // Check if the secondary index definition is still ~x, otherwise + // we have to change the following assembler code to calculate the + // plain index. + assert(ConstantPool::decode_invokedynamic_index(~123) == 123, "else change next line"); + notw(index, index); + } else if (index_size == sizeof(u1)) { + ldbu(index, Address(rbcp, bcp_offset)); + } else { + ShouldNotReachHere(); + } +} + +void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache, + Register index, + int bcp_offset, + size_t index_size) {SCOPEMARK_NAME(get_cache_and_index_at_bcp, this) + assert_different_registers(cache, index); + get_cache_index_at_bcp(index, bcp_offset, index_size); + ldptr(cache, Address(rfp, frame::interpreter_frame_cache_offset * wordSize)); + assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below"); + // convert from field index to ConstantPoolCacheEntry index + assert(exact_log2(in_words(ConstantPoolCacheEntry::size())) == 2, "else change next line"); + slll(index, 2, index); + memb(); +} + +void InterpreterMacroAssembler::get_cache_and_index_and_bytecode_at_bcp(Register cache, + Register index, + Register bytecode, + int byte_no, + int bcp_offset, + size_t index_size) {SCOPEMARK_NAME(get_cache_and_index_and_bytecode_at_bcp, this) + get_cache_and_index_at_bcp(cache, index, bcp_offset, index_size); + // We use a 32-bit load here since the layout of 64-bit words on + // little-endian machines allow us that. + lea(bytecode, Address(cache, index, Address::times_ptr, ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset())); + memb(); + ldwu(bytecode, Address(bytecode,0)); + memb(); + const int shift_count = (1 + byte_no) * BitsPerByte; + assert((byte_no == TemplateTable::f1_byte && shift_count == ConstantPoolCacheEntry::bytecode_1_shift) || + (byte_no == TemplateTable::f2_byte && shift_count == ConstantPoolCacheEntry::bytecode_2_shift), + "correct shift count"); + srll(bytecode, shift_count, bytecode); + assert(ConstantPoolCacheEntry::bytecode_1_mask == ConstantPoolCacheEntry::bytecode_2_mask, "common mask"); + andw(bytecode, ConstantPoolCacheEntry::bytecode_1_mask, bytecode); +} + +void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache, + Register tmp, + int bcp_offset, + size_t index_size) { + assert_different_registers(cache, tmp); + get_cache_index_at_bcp(tmp, bcp_offset, index_size); + assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below"); + // convert from field index to ConstantPoolCacheEntry index + // and from word offset to byte offset + assert(exact_log2(in_bytes(ConstantPoolCacheEntry::size_in_bytes())) == 2 + LogBytesPerWord, "else change next line"); + slll(tmp, 2 + LogBytesPerWord, tmp); + ldptr(cache, Address(rfp, frame::interpreter_frame_cache_offset * wordSize)); + // skip past the header + addptr(cache, in_bytes(ConstantPoolCache::base_offset()), cache); + addptr(cache, tmp, cache); +} + +// Load object from cpool->resolved_references(index) +void InterpreterMacroAssembler::load_resolved_reference_at_index(Register result, + Register index, + Register tmp) { + assert_different_registers(result, index); + + get_constant_pool(result); + // load pointer for resolved_references[] objArray + ldptr(result, Address(result, ConstantPool::cache_offset_in_bytes())); + ldptr(result, Address(result, ConstantPoolCache::resolved_references_offset_in_bytes())); + resolve_oop_handle(result, tmp); + load_heap_oop(result, Address(result, index, + UseCompressedOops ? Address::times_4 : Address::times_ptr, + arrayOopDesc::base_offset_in_bytes(T_OBJECT)), tmp); +} + +// load cpool->resolved_klass_at(index) +void InterpreterMacroAssembler::load_resolved_klass_at_index(Register klass, + Register cpool, + Register index) { + assert_different_registers(cpool, index); + + ldhu(index, Address(cpool, index, Address::times_ptr, sizeof(ConstantPool))); + memb(); + Register resolved_klasses = cpool; + ldptr(resolved_klasses, Address(cpool, ConstantPool::resolved_klasses_offset_in_bytes())); + memb(); + ldptr(klass, Address(resolved_klasses, index, Address::times_ptr, Array::base_offset_in_bytes())); +} + +void InterpreterMacroAssembler::load_resolved_method_at_index(int byte_no, + Register method, + Register cache, + Register index) { + assert_different_registers(cache, index); + + const int method_offset = in_bytes( + ConstantPoolCache::base_offset() + + ((byte_no == TemplateTable::f2_byte) + ? ConstantPoolCacheEntry::f2_offset() + : ConstantPoolCacheEntry::f1_offset())); + + ldptr(method, Address(cache, index, Address::times_ptr, method_offset)); // get f1 Method* +} + +// Generate a subtype check: branch to ok_is_subtype if sub_klass is a +// subtype of super_klass. +// +// Args: +// FSR: superklass +// Rsub_klass: subklass +// +// Kills: +// T0, T1 +void InterpreterMacroAssembler::gen_subtype_check(Register Rsub_klass, + Label& ok_is_subtype) {SCOPEMARK_NAME(gen_subtype_check, this) + Register rcx = c_rarg4; + Register rdi = T0; + Register rax = FSR; + assert(Rsub_klass != FSR, "FSR holds superklass"); + assert(Rsub_klass != rlocals, "s1 holds locals"); + assert(Rsub_klass != rbcp, "s0 holds bcp"); + assert(Rsub_klass != rcx, "T1 holds 2ndary super array length"); + assert(Rsub_klass != rdi, "T0 holds 2ndary super array scan ptr"); + + // Profile the not-null value's klass. + profile_typecheck(rcx, Rsub_klass, rdi); // blows rcx, reloads rdi + +// Do the check. + check_klass_subtype(Rsub_klass, rax, rcx, ok_is_subtype); // blows rcx + +// Profile the failure of the check. + profile_typecheck_failed(rcx); // blows rcx +} + +// Java Expression Stack + +void InterpreterMacroAssembler::pop_ptr(Register r) { + assert(r != esp, "current not consider esp"); + ldptr(r, Address(esp, 0)); + addl(esp, Interpreter::stackElementSize, esp); +} + +void InterpreterMacroAssembler::push_ptr(Register r) { + assert(r != esp, "current not consider esp"); + subl(esp, Interpreter::stackElementSize, esp); + stptr(r, Address(esp, 0)); +} + +void InterpreterMacroAssembler::push_i(Register r) { + assert(r != esp, "current not consider esp"); + // For compatibility reason, don't change to sw. + movw(r, r);//clear high 32-bits zero + subl(esp, Interpreter::stackElementSize, esp); + stl(r, Address(esp, 0)); +} + + +//void InterpreterMacroAssembler::push_i_or_ptr(Register r) { +// assert(r != esp, "current not consider esp"); +// // For compatibility reason, don't change to sw. +// movw(r, r); +// stl(r, Address(esp, - Interpreter::stackElementSize));//clear high 32-bits zero +// subl(esp, Interpreter::stackElementSize, esp); +//} + +void InterpreterMacroAssembler::push_f(FloatRegister r) { + subl(esp, Interpreter::stackElementSize, esp); + fsts(r, Address(esp, 0)); +} + +void InterpreterMacroAssembler::pop_f(FloatRegister r) { + flds(r, Address(esp, 0)); + addl(esp, Interpreter::stackElementSize, esp); +} + +void InterpreterMacroAssembler::push_d(FloatRegister r) { + subl(esp, 2 * Interpreter::stackElementSize, esp); + fstd(r, Address(esp, 0)); +} + +void InterpreterMacroAssembler::pop_d(FloatRegister r) { + fldd(r, Address(esp, 0)); + addl(esp, 2 * Interpreter::stackElementSize, esp); +} + +void InterpreterMacroAssembler::pop_i(Register r) { + assert(r != esp, "current not consider esp"); + ldws(r, Address(esp, 0)); + addl(esp, Interpreter::stackElementSize, esp); +} + +void InterpreterMacroAssembler::pop_l(Register r) { + assert(r != esp, "current not consider esp"); + ldptr(r, Address(esp, 0)); + addl(esp, 2 * Interpreter::stackElementSize, esp); +} + +void InterpreterMacroAssembler::push_l(Register r) { + assert(r != esp, "current not consider esp"); + subl(esp, 2 * Interpreter::stackElementSize, esp); + stptr(R0, Address(esp, Interpreter::stackElementSize)); + stptr(r, Address(esp, 0)); +} + +void InterpreterMacroAssembler::pop(TosState state) { + switch (state) { + case atos: pop_ptr(); break; + case btos: + case ztos: + case ctos: + case stos: + case itos: pop_i(); break; + case ltos: pop_l(); break; + case ftos: pop_f(); break; + case dtos: pop_d(); break; + case vtos: /* nothing to do */ break; + default: ShouldNotReachHere(); + } + interp_verify_oop(FSR, state); +} + +//FSR=V0,SSR=T4 +void InterpreterMacroAssembler::push(TosState state) { + interp_verify_oop(FSR, state); + switch (state) { + case atos: push_ptr(); break; + case btos: + case ztos: + case ctos: + case stos: + case itos: push_i(); break; + case ltos: push_l(); break; + case ftos: push_f(); break; + case dtos: push_d(); break; + case vtos: /* nothing to do */ break; + default : ShouldNotReachHere(); + } +} + +// Helpers for swap and dup +void InterpreterMacroAssembler::load_ptr(int n, Register val) { + ldptr(val, Address(esp, Interpreter::expr_offset_in_bytes(n))); +} + +void InterpreterMacroAssembler::store_ptr(int n, Register val) { + stptr(val, Address(esp, Interpreter::expr_offset_in_bytes(n))); +} + +void InterpreterMacroAssembler::prepare_to_jump_from_interpreted() {SCOPEMARK_NAME(prepare_to_jump_from_interpreted, this) + // set sender sp + movl(rsender, esp); + // record last_sp + stptr(esp, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize)); +} + + +// Jump to from_interpreted entry of a call unless single stepping is possible +// in this thread in which case we must call the i2i entry +void InterpreterMacroAssembler::jump_from_interpreted(Register method, Register temp) {SCOPEMARK_NAME(jump_from_interpreted, this) + prepare_to_jump_from_interpreted(); + + if (JvmtiExport::can_post_interpreter_events()) { + Label run_compiled_code; + // JVMTI events, such as single-stepping, are implemented partly by avoiding running + // compiled code in threads for which the event is enabled. Check here for + // interp_only_mode if these events CAN be enabled. + // interp_only is an int, on little endian it is sufficient to test the byte only + // Is a cmpl faster? + ldbu(AT, Address(rthread, JavaThread::interp_only_mode_offset())); + jcc(Assembler::zero, run_compiled_code, AT); + jmp(Address(method, Method::interpreter_entry_offset())); + bind(run_compiled_code); + } + + jmp(Address(method, Method::from_interpreted_offset())); +} + +// The following two routines provide a hook so that an implementation +// can schedule the dispatch in two parts. sw64 does not do this. +void InterpreterMacroAssembler::dispatch_prolog(TosState state, int step) { + // Nothing sw64 specific to be done here +} + +void InterpreterMacroAssembler::dispatch_epilog(TosState state, int step) { + dispatch_next(state, step); +} + +void InterpreterMacroAssembler::dispatch_base(TosState state, + address* table, + bool verifyoop, + bool generate_poll) { + // it's a convention that the bytecode to dispatch to in rnext + if (VerifyActivationFrameSize) { + Label L; + subptr(rfp, esp, rscratch1); + int32_t min_frame_size = + (frame::link_offset - frame::interpreter_frame_initial_sp_offset) * + wordSize; + cmpptr(rscratch1, (int32_t)min_frame_size); + jcc(Assembler::greaterEqual, L); + stop("broken stack frame"); + bind(L); + } + if (verifyoop) { + interp_verify_oop(FSR, state); + } + + address* const safepoint_table = Interpreter::safept_table(state); + Label no_safepoint, dispatch; + if (table != safepoint_table && generate_poll) { + NOT_PRODUCT(block_comment("Thread-local Safepoint poll")); + testb(Address(rthread, JavaThread::polling_word_offset()), SafepointMechanism::poll_bit()); + jcc(Assembler::zero, no_safepoint); + lea(rscratch1, ExternalAddress((address)safepoint_table)); + jmp(dispatch); + } + + bind(no_safepoint); + lea(rscratch1, ExternalAddress((address)table)); + bind(dispatch); + jmp(Address(rscratch1, rnext, Address::times_8)); //set rnext like dispatch_next +} + +void InterpreterMacroAssembler::dispatch_only(TosState state, bool generate_poll) { + dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll); +} + +void InterpreterMacroAssembler::dispatch_only_normal(TosState state) { + dispatch_base(state, Interpreter::normal_table(state)); +} + +void InterpreterMacroAssembler::dispatch_only_noverify(TosState state) { + dispatch_base(state, Interpreter::normal_table(state), false); +} + + +void InterpreterMacroAssembler::dispatch_next(TosState state, int step, bool generate_poll) {SCOPEMARK_NAME(InterpreterMacroAssembler::dispatch_next, this) + // load next bytecode (load before advancing rbcp to prevent AGI) + load_unsigned_byte(rnext, Address(rbcp, step));//use rnext in dispatch_base + // advance rbcp + incrementl(rbcp, step); + dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll); +} + +void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) { + // load current bytecode + load_unsigned_byte(rnext, Address(rbcp, 0)); + dispatch_base(state, table); +} + +void InterpreterMacroAssembler::narrow(Register result) { + + const Register rcx = T9; + // Get method->_constMethod->_result_type + ldptr(rcx, Address(rfp, frame::interpreter_frame_method_offset * wordSize)); + ldptr(rcx, Address(rcx, Method::const_offset())); + load_unsigned_byte(rcx, Address(rcx, ConstMethod::result_type_offset())); + + Label done, notBool, notByte, notChar; + + // common case first + cmpw(rcx, T_INT); + jcc(Assembler::equal, done); + + // mask integer result to narrower return type. + cmpw(rcx, T_BOOLEAN); + jcc(Assembler::notEqual, notBool); + andw(result, 0x1, result); + jmp(done); + + bind(notBool); + cmpw(rcx, T_BYTE); + jcc(Assembler::notEqual, notByte); + sextb(result, result); //TODO jzy 64-bits? 32-bits? + jmp(done); + + bind(notByte); + cmpw(rcx, T_CHAR); + jcc(Assembler::notEqual, notChar); + zapnot(result, 0x3, result); + jmp(done); + + bind(notChar); + // cmpw(rcx, T_SHORT); // all that's left + // jcc(Assembler::notEqual, done); + sexth(result, result); + + // Nothing to do for T_INT + bind(done); +} + +// remove activation +// +// Unlock the receiver if this is a synchronized method. +// Unlock any Java monitors from syncronized blocks. +// Remove the activation from the stack. +// +// If there are locked Java monitors +// If throw_monitor_exception +// throws IllegalMonitorStateException +// Else if install_monitor_exception +// installs IllegalMonitorStateException +// Else +// no error processing +void InterpreterMacroAssembler::remove_activation( + TosState state, + Register ret_addr, + bool throw_monitor_exception, + bool install_monitor_exception, + bool notify_jvmdi) {SCOPEMARK_NAME(remove_activation, this) + // Note: Registers V0, T4 and f0, f1 may be in use for the + // result check if synchronized method + Label unlocked, unlock, no_unlock; + + const Register rbx = T2; + const Register rcx = T3; + const Register robj = c_rarg1; + const Register rmon = c_rarg1; + + // The below poll is for the stack watermark barrier. It allows fixing up frames lazily, + // that would normally not be safe to use. Such bad returns into unsafe territory of + // the stack, will call InterpreterRuntime::at_unwind. + Label slow_path; + Label fast_path; + safepoint_poll(slow_path, rthread, rscratch1, true /* at_return */, false /* acquire */, false /* in_nmethod */); + jmp(fast_path); + bind(slow_path); + push(state); + set_last_Java_frame(rsp, rbp, (address)pc() , rscratch3); // need check dx + super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::at_unwind), rthread); + reset_last_Java_frame(rthread, true); + pop(state); + bind(fast_path); + + // get the value of _do_not_unlock_if_synchronized into rdx + const Address do_not_unlock_if_synchronized(rthread, + in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); + ldbu(rbx, do_not_unlock_if_synchronized); + stb(R0, do_not_unlock_if_synchronized); // reset the flag + + // get method access flags + ldptr(rcx, Address(rfp, frame::interpreter_frame_method_offset * wordSize)); + ldw(rcx, Address(rcx, Method::access_flags_offset())); + testw(rcx, JVM_ACC_SYNCHRONIZED); + jcc(Assembler::zero, unlocked); + + // Don't unlock anything if the _do_not_unlock_if_synchronized flag + // is set. + jcc(Assembler::notZero, no_unlock, rbx); + + // unlock monitor + push(state); // save result + + // BasicObjectLock will be first in list, since this is a + // synchronized method. However, need to check that the object has + // not been unlocked by an explicit monitorexit bytecode. + const Address monitor(rfp, frame::interpreter_frame_initial_sp_offset * + wordSize - (int) sizeof(BasicObjectLock)); + // We use c_rarg1 so that if we go slow path it will be the correct + // register for unlock_object to pass to VM directly + lea(robj, monitor); // address of first monitor + + ldptr(FSR, Address(robj, BasicObjectLock::obj_offset_in_bytes())); + jcc(Assembler::notZero, unlock, FSR); + + pop(state); + if (throw_monitor_exception) { + // Entry already unlocked, need to throw exception + call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_illegal_monitor_state_exception)); + should_not_reach_here("throw_illegal_monitor_state_exception"); + } else { + // Monitor already unlocked during a stack unroll. If requested, + // install an illegal_monitor_state_exception. Continue with + // stack unrolling. + if (install_monitor_exception) { + call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::new_illegal_monitor_state_exception)); + } + jmp(unlocked); + } + + bind(unlock); + unlock_object(robj); + pop(state); + + // Check that for block-structured locking (i.e., that all locked + // objects has been unlocked) + bind(unlocked); + + // FSR, rdx: Might contain return value + + // Check that all monitors are unlocked + { + Label loop, exception, entry, restart; + const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; + const Address monitor_block_top( + rfp, frame::interpreter_frame_monitor_block_top_offset * wordSize); + const Address monitor_block_bot( + rfp, frame::interpreter_frame_initial_sp_offset * wordSize); + + bind(restart); + // We use c_rarg1 so that if we go slow path it will be the correct + // register for unlock_object to pass to VM directly + ldptr(rmon, monitor_block_top); // points to current entry, starting + // with top-most entry + lea(rbx, monitor_block_bot); // points to word before bottom of + // monitor block + jmp(entry); + + // Entry already locked, need to throw exception + bind(exception); + + if (throw_monitor_exception) { + // Throw exception + MacroAssembler::call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime:: + throw_illegal_monitor_state_exception)); + should_not_reach_here("892 throw_illegal_monitor_state_exception"); + } else { + // Stack unrolling. Unlock object and install illegal_monitor_exception. + // Unlock does not block, so don't have to worry about the frame. + // We don't have to preserve c_rarg1 since we are going to throw an exception. + + push(state); + movl(robj, rmon); // nop if robj and rmon are the same + unlock_object(robj); + pop(state); + + if (install_monitor_exception) { + call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime:: + new_illegal_monitor_state_exception)); + } + + jmp(restart); + } + + bind(loop); + // check if current entry is used + ldptr(rcc, Address(rmon, BasicObjectLock::obj_offset_in_bytes())); + jcc(Assembler::notZero, exception); + + addptr(rmon, entry_size, rmon); // otherwise advance to next entry + bind(entry); + cmpptr(rmon, rbx); // check if bottom reached + jcc(Assembler::notEqual, loop); // if not at bottom then check this entry + } + + bind(no_unlock); + + // jvmti support + if (notify_jvmdi) { + notify_method_exit(state, NotifyJVMTI); // preserve TOSCA + } else { + notify_method_exit(state, SkipNotifyJVMTI); // preserve TOSCA + } + + // remove activation + // get sender sp + ldptr(rbx, + Address(rfp, frame::interpreter_frame_sender_sp_offset * wordSize)); + if (StackReservedPages > 0) { + // testing if reserved zone needs to be re-enabled + Label no_reserved_zone_enabling; + + ldwu(AT, Address(rthread, JavaThread::stack_guard_state_offset())); + cmpw(AT, StackOverflow::stack_guard_enabled); + jcc(Assembler::equal, no_reserved_zone_enabling); + + ldptr(AT, Address(rthread, JavaThread::reserved_stack_activation_offset())); + cmpptr(rbx, AT); + jcc(Assembler::lessEqual, no_reserved_zone_enabling); + + call_VM_leaf( + CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), rthread); + call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_delayed_StackOverflowError)); + should_not_reach_here("throw_delayed_StackOverflowError"); + + bind(no_reserved_zone_enabling); + } + leave(); // remove frame anchor + movl(ret_addr, RA); // get return address TODO:jzy + movl(esp, rbx); // set sp to sender sp +} + +void InterpreterMacroAssembler::get_method_counters(Register method, + Register mcs, Label& skip) { + Label has_counters; + ldptr(mcs, Address(method, Method::method_counters_offset())); + testptr(mcs, mcs); + jcc(Assembler::notZero, has_counters); + call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::build_method_counters), method); + ldptr(mcs, Address(method,Method::method_counters_offset())); + testptr(mcs, mcs); + jcc(Assembler::zero, skip); // No MethodCounters allocated, OutOfMemory + bind(has_counters); +} + +// Lock object +// +// Args: +// rdx, c_rarg1: BasicObjectLock to be used for locking +// +// Kills: +// rax, rbx +void InterpreterMacroAssembler::lock_object(Register lock_reg) { + assert(lock_reg == c_rarg1, + "The argument is only for looks. It must be c_rarg1"); + + const Register rax = T2; + const Register rbx = T1; + + if (UseHeavyMonitors) { + call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), + lock_reg); + } else { + Label done; + + const Register swap_reg = rax; // Must use rax for cmpxchg instruction + const Register tmp_reg = rbx; // Will be passed to biased_locking_enter to avoid a + // problematic case where tmp_reg = no_reg. + const Register obj_reg = c_rarg3; // Will contain the oop + + const int obj_offset = BasicObjectLock::obj_offset_in_bytes(); + const int lock_offset = BasicObjectLock::lock_offset_in_bytes (); + const int mark_offset = lock_offset + + BasicLock::displaced_header_offset_in_bytes(); + + Label slow_case; + + // Load object pointer into obj_reg + ldptr(obj_reg, Address(lock_reg, obj_offset)); + + if (DiagnoseSyncOnValueBasedClasses != 0) { + load_klass(tmp_reg, obj_reg); + ldw(tmp_reg, Address(tmp_reg, Klass::access_flags_offset())); + testw(tmp_reg, JVM_ACC_IS_VALUE_BASED_CLASS); + jcc(Assembler::notZero, slow_case); + } //dx + + if (UseBiasedLocking) { + biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp_reg, false, done, &slow_case); + } + + // Load immediate 1 into swap_reg %rax + ldi(swap_reg, (int32_t)1, R0); + + // Load (object->mark() | 1) into swap_reg %rax + ldptr(AT, Address(obj_reg, oopDesc::mark_offset_in_bytes())); + orptr(swap_reg, AT, swap_reg); + + // Save (object->mark() | 1) into BasicLock's displaced header + stptr(swap_reg, Address(lock_reg, mark_offset)); + + assert(lock_offset == 0, + "displaced header must be first word in BasicObjectLock"); + + cmpxchg(lock_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()), swap_reg);// + + if (PrintBiasedLockingStatistics) { + Label L; + jcc(Assembler::failed, L, AT); + atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, tmp_reg, rscratch1); + jmp(done); + bind(L); + } else { + jcc(Assembler::success, done, AT); + } + + const int zero_bits = 7;// yj todo: 7 or 3?? 7 means lsb 3 bits must be same, while 3 mean 2 bits + + // Test if the oopMark is an obvious stack pointer, i.e., + // 1) (mark & zero_bits) == 0, and + // 2) esp <= mark < mark + os::pagesize() + // + // These 3 tests can be done by evaluating the following + // expression: ((mark - esp) & (zero_bits - os::vm_page_size())), + // assuming both stack pointer and pagesize have their + // least significant bits clear. + // NOTE: the oopMark is in swap_reg %rax as the result of cmpxchg + subptr(swap_reg, esp, swap_reg); + andptr(swap_reg, zero_bits - os::vm_page_size(), swap_reg); + + // Save the test result, for recursive case, the result is zero + stptr(swap_reg, Address(lock_reg, mark_offset)); + + if (PrintBiasedLockingStatistics) { + Label L; + jcc(Assembler::notZero, L, swap_reg); + atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, tmp_reg, rscratch1); + jmp(done); + bind(L); + } else { + jcc(Assembler::zero, done, swap_reg); + } + + bind(slow_case); + + // Call the runtime routine for slow case + call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), + lock_reg); + + bind(done); + } +} + + +// Unlocks an object. Used in monitorexit bytecode and +// remove_activation. Throws an IllegalMonitorException if object is +// not locked by current thread. +// +// Args: +// c_rarg1: BasicObjectLock for lock +// +// Kills: +// rax +// c_rarg0, c_rarg1, c_rarg2, c_rarg3, ... (param regs) +// rscratch1, rscratch2 (scratch regs) +void InterpreterMacroAssembler::unlock_object(Register lock_reg) { + assert(lock_reg == c_rarg1, + "The argument is only for looks. It must be c_rarg1"); + + if (UseHeavyMonitors) { + call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg); + } else { + Label done; + + const Register swap_reg = T2; + const Register header_reg = c_rarg2; // Will contain the old oopMark + const Register obj_reg = c_rarg3; // Will contain the oop + + save_bcp(); // Save in case of exception + + // Convert from BasicObjectLock structure to object and BasicLock structure + // Store the BasicLock address into %T2 + lea(swap_reg, Address(lock_reg, BasicObjectLock::lock_offset_in_bytes())); + + // Load oop into obj_reg(%c_rarg3) + ldptr(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); + + // Free entry + stptr(R0, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); + + if (UseBiasedLocking) { + biased_locking_exit(obj_reg, header_reg, done); + } + + // Load the old header from BasicLock structure + ldptr(header_reg, Address(swap_reg, + BasicLock::displaced_header_offset_in_bytes())); + + // Test for recursion + testptr(header_reg, header_reg); + + // zero for recursive case + jcc(Assembler::zero, done); + + // Atomic swap back the old header + cmpxchg(header_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()), swap_reg); + + // zero for recursive case + jcc(Assembler::success, done); + + // Call the runtime routine for slow case. + stptr(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); // restore obj + + call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg); + + bind(done); + + restore_bcp(); + } +} + +void InterpreterMacroAssembler::test_method_data_pointer(Register mdp, + Label& zero_continue) { + assert(ProfileInterpreter, "must be profiling interpreter"); + ldptr(mdp, Address(rfp, frame::interpreter_frame_mdp_offset * wordSize)); + jcc(Assembler::zero, zero_continue, mdp); +} + +// Set the method data pointer for the current bcp. +void InterpreterMacroAssembler::set_method_data_pointer_for_bcp() { + assert(ProfileInterpreter, "must be profiling interpreter"); + Label set_mdp; + const Register rax = V0;//TODO:why not save? jzy + const Register rbx = T9; +// V0 and T0 will be used as two temporary registers. + + subl(esp, 2 * wordSize, esp); + stl(rax, wordSize, esp); + stl(rbx, 0, esp); + + get_method(rbx); + // Test MDO to avoid the call if it is NULL. + ldptr(rax, Address(rbx, in_bytes(Method::method_data_offset()))); + testptr(rax, rax); + jcc(Assembler::zero, set_mdp); + // rbx: method + // _bcp_register: bcp + call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::bcp_to_di), rbx, _bcp_register); + // rax: mdi + // mdo is guaranteed to be non-zero here, we checked for it before the call. + get_method(rbx); + ldptr(rbx, Address(rbx, in_bytes(Method::method_data_offset()))); + addptr(rbx, in_bytes(MethodData::data_offset()), rbx); + addptr(rax, rbx, rax); + bind(set_mdp); + stptr(rax, Address(rfp, frame::interpreter_frame_mdp_offset * wordSize)); //TODO check? lsp + ldl(rax, wordSize, esp); + ldl(rbx, 0, esp); + addl(esp, 2 * wordSize, esp); +} +//TODO:why not save c_rarg0 c_rarg1 +void InterpreterMacroAssembler::verify_method_data_pointer() { + assert(ProfileInterpreter, "must be profiling interpreter"); +#ifdef ASSERT + Label verify_continue; + push(rax); + push(rbx); + Register arg3_reg = c_rarg3; + Register arg2_reg = c_rarg2; + push(arg3_reg); + push(arg2_reg); + test_method_data_pointer(arg3_reg, verify_continue); // If mdp is zero, continue + get_method(rbx); + + // If the mdp is valid, it will point to a DataLayout header which is + // consistent with the bcp. The converse is highly probable also. + load_unsigned_short(arg2_reg, + Address(arg3_reg, in_bytes(DataLayout::bci_offset()))); + addptr(arg2_reg, Address(rbx, Method::const_offset())); + lea(arg2_reg, Address(arg2_reg, ConstMethod::codes_offset())); + cmpptr(arg2_reg, _bcp_register); + jcc(Assembler::equal, verify_continue); + // rbx: method + // _bcp_register: bcp + // c_rarg3: mdp + call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::verify_mdp), + rbx, _bcp_register, arg3_reg); + bind(verify_continue); + pop(arg2_reg); + pop(arg3_reg); + pop(rbx); + pop(rax); +#endif // ASSERT +} + + +void InterpreterMacroAssembler::set_mdp_data_at(Register mdp_in, + int constant, + Register value) { + assert(ProfileInterpreter, "must be profiling interpreter"); + Address data(mdp_in, constant); + stptr(value, data); +} + + +void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in, + int constant, + bool decrement) { + // Counter address + Address data(mdp_in, constant); + + increment_mdp_data_at(data, decrement); +} + +void InterpreterMacroAssembler::increment_mdp_data_at(Address data, + bool decrement) { + //TODO check lsp??? + assert(ProfileInterpreter, "must be profiling interpreter"); + // %%% this does 64bit counters at best it is wasting space + // at worst it is a rare bug when counters overflow + + if (decrement) { + // Decrement the register. + ldptr(rscratch4, data); + ldi(rscratch4, (int32_t) -DataLayout::counter_increment, rscratch4); + // If the decrement causes the counter to overflow, stay negative + Label L; + jcc(Assembler::greaterEqual, L, rscratch4); + stptr(rscratch4, data); + bind(L); + } else { + assert(DataLayout::counter_increment == 1, + "flow-free idiom only works with 1"); + ldptr(rscratch4, data); + // Increment the register. + ldi(rscratch1, DataLayout::counter_increment, rscratch4); + // If the increment causes the counter to overflow, pull back by 1. + Label L; + cmpult(rscratch1, rscratch4, rcc); + bne_l(rcc, L); + stptr(rscratch1, data); + bind(L); + } +} + + +void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in, + Register reg, + int constant, + bool decrement) {SCOPEMARK_NAME(InterpreterMacroAssembler::increment_mdp_data_at, this) + assert_different_registers(mdp_in, reg, rscratch1, rscratch4); + Address data(mdp_in, reg, Address::times_1, constant); + + increment_mdp_data_at(data, decrement); +} + +void InterpreterMacroAssembler::set_mdp_flag_at(Register mdp_in, + int flag_byte_constant) { + assert(ProfileInterpreter, "must be profiling interpreter"); + int header_offset = in_bytes(DataLayout::flags_offset()); + int header_bits = flag_byte_constant; + // Set the flag + ldbu(rscratch4, Address(mdp_in, header_offset)); + bis(rscratch4, header_bits, rscratch4); + stb(rscratch4, Address(mdp_in, header_offset)); +} + + + +void InterpreterMacroAssembler::test_mdp_data_at(Register mdp_in, + int offset, + Register value, + Register test_value_out, + Label& not_equal_continue) { + assert(ProfileInterpreter, "must be profiling interpreter"); + if (test_value_out == noreg) { + cmpptr(value, Address(mdp_in, offset)); + } else { + // Put the test value into a register, so caller can use it: + ldptr(test_value_out, Address(mdp_in, offset)); + cmpptr(test_value_out, value); + } + jcc(Assembler::notEqual, not_equal_continue); +} + + +void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in, + int offset_of_disp) { + assert(ProfileInterpreter, "must be profiling interpreter"); + Address disp_address(mdp_in, offset_of_disp); + addptr(mdp_in, disp_address); + stptr(mdp_in, Address(rfp, frame::interpreter_frame_mdp_offset * wordSize)); +} + + +void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in, + Register reg, + int offset_of_disp) { + assert(ProfileInterpreter, "must be profiling interpreter"); + Address disp_address(mdp_in, reg, Address::times_1, offset_of_disp); + addptr(mdp_in, disp_address); + stptr(mdp_in, Address(rfp, frame::interpreter_frame_mdp_offset * wordSize)); +} + + +void InterpreterMacroAssembler::update_mdp_by_constant(Register mdp_in, + int constant) { + assert(ProfileInterpreter, "must be profiling interpreter"); + addptr(mdp_in, constant, mdp_in); + stptr(mdp_in, Address(rfp, frame::interpreter_frame_mdp_offset * wordSize)); +} + + +void InterpreterMacroAssembler::update_mdp_for_ret(Register return_bci) { + assert(ProfileInterpreter, "must be profiling interpreter"); + push(return_bci); // save/restore across call_VM + call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::update_mdp_for_ret), + return_bci); + pop(return_bci); +} + + +void InterpreterMacroAssembler::profile_taken_branch(Register mdp, + Register bumped_count) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + // Otherwise, assign to mdp + test_method_data_pointer(mdp, profile_continue); + + // We are taking a branch. Increment the taken count. + // We inline increment_mdp_data_at to return bumped_count in a register + //increment_mdp_data_at(mdp, in_bytes(JumpData::taken_offset())); + Address data(mdp, in_bytes(JumpData::taken_offset())); + ldptr(rscratch4, data); + assert(DataLayout::counter_increment == 1, + "flow-free idiom only works with 1"); + // yj: we learn aarch64 here to test overflow + Label L; + ldi(bumped_count, DataLayout::counter_increment, rscratch4); + cmpult(bumped_count, rscratch4, rcc); + bne_l(rcc, L); + stptr(bumped_count, data); + bind(L); +// stptr(bumped_count, in_bytes(JumpData::taken_offset()), mdp); // Store back out + // The method data pointer needs to be updated to reflect the new target. + update_mdp_by_offset(mdp, in_bytes(JumpData::displacement_offset())); + bind(profile_continue); + } +} + + +void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // We are taking a branch. Increment the not taken count. + increment_mdp_data_at(mdp, in_bytes(BranchData::not_taken_offset())); + + // The method data pointer needs to be updated to correspond to + // the next bytecode + update_mdp_by_constant(mdp, in_bytes(BranchData::branch_data_size())); + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::profile_call(Register mdp) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // We are making a call. Increment the count. + increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); + + // The method data pointer needs to be updated to reflect the new target. + update_mdp_by_constant(mdp, in_bytes(CounterData::counter_data_size())); + bind(profile_continue); + } +} + + +void InterpreterMacroAssembler::profile_final_call(Register mdp) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // We are making a call. Increment the count. + increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); + + // The method data pointer needs to be updated to reflect the new target. + update_mdp_by_constant(mdp, + in_bytes(VirtualCallData:: + virtual_call_data_size())); + bind(profile_continue); + } +} + + +void InterpreterMacroAssembler::profile_virtual_call(Register receiver, + Register mdp, + Register reg2, + bool receiver_can_be_null) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + Label skip_receiver_profile; + if (receiver_can_be_null) { + Label not_null; + jcc(Assembler::notZero, not_null, receiver); + // We are making a call. Increment the count for null receiver. + increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); + jmp(skip_receiver_profile); + bind(not_null); + } + + // Record the receiver type. + record_klass_in_profile(receiver, mdp, reg2, true); + bind(skip_receiver_profile); + + // The method data pointer needs to be updated to reflect the new target. + update_mdp_by_constant(mdp, in_bytes(VirtualCallData::virtual_call_data_size())); + bind(profile_continue); + } +} + +// This routine creates a state machine for updating the multi-row +// type profile at a virtual call site (or other type-sensitive bytecode). +// The machine visits each row (of receiver/count) until the receiver type +// is found, or until it runs out of rows. At the same time, it remembers +// the location of the first empty row. (An empty row records null for its +// receiver, and can be allocated for a newly-observed receiver type.) +// Because there are two degrees of freedom in the state, a simple linear +// search will not work; it must be a decision tree. Hence this helper +// function is recursive, to generate the required tree structured code. +// It's the interpreter, so we are trading off code space for speed. +// See below for example code. +void InterpreterMacroAssembler::record_klass_in_profile_helper( + Register receiver, Register mdp, + Register reg2, int start_row, + Label& done, bool is_virtual_call) { + if (TypeProfileWidth == 0) { + if (is_virtual_call) { + increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); + } +#if INCLUDE_JVMCI + else if (EnableJVMCI) { + increment_mdp_data_at(mdp, in_bytes(ReceiverTypeData::nonprofiled_receiver_count_offset())); + } +#endif // INCLUDE_JVMCI + } else { + int non_profiled_offset = -1; + if (is_virtual_call) { + non_profiled_offset = in_bytes(CounterData::count_offset()); + } +#if INCLUDE_JVMCI + else if (EnableJVMCI) { + non_profiled_offset = in_bytes(ReceiverTypeData::nonprofiled_receiver_count_offset()); + } +#endif // INCLUDE_JVMCI + + record_item_in_profile_helper(receiver, mdp, reg2, 0, done, TypeProfileWidth, + &VirtualCallData::receiver_offset, &VirtualCallData::receiver_count_offset, non_profiled_offset); + } +} + +void InterpreterMacroAssembler::record_item_in_profile_helper(Register item, Register mdp, + Register reg2, int start_row, Label& done, int total_rows, + OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn, + int non_profiled_offset) { + int last_row = total_rows - 1; + assert(start_row <= last_row, "must be work left to do"); + // Test this row for both the item and for null. + // Take any of three different outcomes: + // 1. found item => increment count and goto done + // 2. found null => keep looking for case 1, maybe allocate this cell + // 3. found something else => keep looking for cases 1 and 2 + // Case 3 is handled by a recursive call. + for (int row = start_row; row <= last_row; row++) { + Label next_test; + bool test_for_null_also = (row == start_row); + + // See if the item is item[n]. + int item_offset = in_bytes(item_offset_fn(row)); + test_mdp_data_at(mdp, item_offset, item, + (test_for_null_also ? reg2 : noreg), + next_test); + // (Reg2 now contains the item from the CallData.) + + // The item is item[n]. Increment count[n]. + int count_offset = in_bytes(item_count_offset_fn(row)); + increment_mdp_data_at(mdp, count_offset); + jmp(done); + bind(next_test); + + if (test_for_null_also) { + // Failed the equality check on item[n]... Test for null. + testptr(reg2, reg2); + if (start_row == last_row) { + // The only thing left to do is handle the null case. + if (non_profiled_offset >= 0) { + Label found_null; + jcc(Assembler::zero, found_null); + // Item did not match any saved item and there is no empty row for it. + // Increment total counter to indicate polymorphic case. + increment_mdp_data_at(mdp, non_profiled_offset); + jmp(done); + bind(found_null); + } else { + jcc(Assembler::notZero, done); + } + break; + } + + Label found_null; + // Since null is rare, make it be the branch-taken case. + jcc(Assembler::zero, found_null); + + // Put all the "Case 3" tests here. + record_item_in_profile_helper(item, mdp, reg2, start_row + 1, done, total_rows, + item_offset_fn, item_count_offset_fn, non_profiled_offset); + + // Found a null. Keep searching for a matching item, + // but remember that this is an empty (unused) slot. + bind(found_null); + } + } + + // In the fall-through case, we found no matching item, but we + // observed the item[start_row] is NULL. + + // Fill in the item field and increment the count. + int item_offset = in_bytes(item_offset_fn(start_row)); + set_mdp_data_at(mdp, item_offset, item); + int count_offset = in_bytes(item_count_offset_fn(start_row)); + movw(reg2, DataLayout::counter_increment); + set_mdp_data_at(mdp, count_offset, reg2); + if (start_row > 0) { + jmp(done); + } +} + +// Example state machine code for three profile rows: +// // main copy of decision tree, rooted at row[1] +// if (row[0].rec == rec) { row[0].incr(); goto done; } +// if (row[0].rec != NULL) { +// // inner copy of decision tree, rooted at row[1] +// if (row[1].rec == rec) { row[1].incr(); goto done; } +// if (row[1].rec != NULL) { +// // degenerate decision tree, rooted at row[2] +// if (row[2].rec == rec) { row[2].incr(); goto done; } +// if (row[2].rec != NULL) { count.incr(); goto done; } // overflow +// row[2].init(rec); goto done; +// } else { +// // remember row[1] is empty +// if (row[2].rec == rec) { row[2].incr(); goto done; } +// row[1].init(rec); goto done; +// } +// } else { +// // remember row[0] is empty +// if (row[1].rec == rec) { row[1].incr(); goto done; } +// if (row[2].rec == rec) { row[2].incr(); goto done; } +// row[0].init(rec); goto done; +// } +// done: + +void InterpreterMacroAssembler::record_klass_in_profile(Register receiver, + Register mdp, Register reg2, + bool is_virtual_call) { + assert(ProfileInterpreter, "must be profiling"); + Label done; + + record_klass_in_profile_helper(receiver, mdp, reg2, 0, done, is_virtual_call); + + bind (done); +} + +void InterpreterMacroAssembler::profile_ret(Register return_bci, + Register mdp) { + if (ProfileInterpreter) { + Label profile_continue; + uint row; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // Update the total ret count. + increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); + + for (row = 0; row < RetData::row_limit(); row++) { + Label next_test; + + // See if return_bci is equal to bci[n]: + test_mdp_data_at(mdp, + in_bytes(RetData::bci_offset(row)), + return_bci, noreg, + next_test); + + // return_bci is equal to bci[n]. Increment the count. + increment_mdp_data_at(mdp, in_bytes(RetData::bci_count_offset(row))); + + // The method data pointer needs to be updated to reflect the new target. + update_mdp_by_offset(mdp, + in_bytes(RetData::bci_displacement_offset(row))); + jmp(profile_continue); + bind(next_test); + } + + update_mdp_for_ret(return_bci); + + bind(profile_continue); + } +} + + +void InterpreterMacroAssembler::profile_null_seen(Register mdp) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + set_mdp_flag_at(mdp, BitData::null_seen_byte_constant()); + + // The method data pointer needs to be updated. + int mdp_delta = in_bytes(BitData::bit_data_size()); + if (TypeProfileCasts) { + mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size()); + } + update_mdp_by_constant(mdp, mdp_delta); + + bind(profile_continue); + } +} + + +void InterpreterMacroAssembler::profile_typecheck_failed(Register mdp) { + if (ProfileInterpreter && TypeProfileCasts) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + int count_offset = in_bytes(CounterData::count_offset()); + // Back up the address, since we have already bumped the mdp. + count_offset -= in_bytes(VirtualCallData::virtual_call_data_size()); + + // *Decrement* the counter. We expect to see zero or small negatives. + increment_mdp_data_at(mdp, count_offset, true); + + bind (profile_continue); + } +} + + +void InterpreterMacroAssembler::profile_typecheck(Register mdp, Register klass, Register reg2) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // The method data pointer needs to be updated. + int mdp_delta = in_bytes(BitData::bit_data_size()); + if (TypeProfileCasts) { + mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size()); + + // Record the object type. + record_klass_in_profile(klass, mdp, reg2, false); + } + update_mdp_by_constant(mdp, mdp_delta); + + bind(profile_continue); + } +} + + +void InterpreterMacroAssembler::profile_switch_default(Register mdp) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // Update the default case count + increment_mdp_data_at(mdp, + in_bytes(MultiBranchData::default_count_offset())); + + // The method data pointer needs to be updated. + update_mdp_by_offset(mdp, + in_bytes(MultiBranchData:: + default_displacement_offset())); + + bind(profile_continue); + } +} + + +void InterpreterMacroAssembler::profile_switch_case(Register index, + Register mdp, + Register reg2) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // Build the base (index * per_case_size_in_bytes()) + + // case_array_offset_in_bytes() + movw(reg2, in_bytes(MultiBranchData::per_case_size())); + mull(index, reg2, index); + addptr(index, in_bytes(MultiBranchData::case_array_offset()), index); + + // Update the case count + increment_mdp_data_at(mdp, + index, + in_bytes(MultiBranchData::relative_count_offset())); + + // The method data pointer needs to be updated. + update_mdp_by_offset(mdp, + index, + in_bytes(MultiBranchData:: + relative_displacement_offset())); + + bind(profile_continue); + } +} + + + +void InterpreterMacroAssembler::_interp_verify_oop(Register reg, TosState state, const char* file, int line) { + if (state == atos) { + MacroAssembler::_verify_oop_checked(reg, "broken oop", file, line); + } +} + +void InterpreterMacroAssembler::verify_FPU(int stack_depth, TosState state) { ; } + +// Jump if ((*counter_addr += increment) & mask) satisfies the condition. +void InterpreterMacroAssembler::increment_mask_and_jump(Address counter_addr, + int increment, Address mask, + Register scratch, bool preloaded, + Condition cond, Label* where) { + assert_different_registers(scratch, rcc); + if (!preloaded) { + ldwu(scratch, counter_addr); + } + incrementw(scratch, increment); + stw(scratch, counter_addr); + ldwu(rcc, mask); + andw(scratch, rcc, scratch); + if (where != NULL) { + jcc(cond, *where, scratch); + } +} + +void InterpreterMacroAssembler::notify_method_entry() {SCOPEMARK_NAME(notify_method_entry, this) + // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to + // track stack depth. If it is possible to enter interp_only_mode we add + // the code to check if the event should be sent. + Register rdx = rscratch4; + Register rarg = c_rarg1; + if (JvmtiExport::can_post_interpreter_events()) { + Label L; + get_thread(rthread); + ldw(rdx, Address(rthread, JavaThread::interp_only_mode_offset())); + jcc(Assembler::zero, L, rdx); + call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::post_method_entry)); + bind(L); + } + + { + SkipIfEqual skip_if(this, &DTraceMethodProbes, 0); + get_method(rarg); + call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), + rthread, rarg); + } + + // RedefineClasses() tracing support for obsolete method entry + if (log_is_enabled(Trace, redefine, class, obsolete)) { + NOT_LP64(get_thread(rthread);) + get_method(rarg); + call_VM_leaf( + CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), + rthread, rarg); + } +} + + +void InterpreterMacroAssembler::notify_method_exit( + TosState state, NotifyMethodExitMode mode) {SCOPEMARK_NAME(notify_method_exit, this) + // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to + // track stack depth. If it is possible to enter interp_only_mode we add + // the code to check if the event should be sent. + Register rdx = rscratch4; + Register rarg = c_rarg1; + if (mode == NotifyJVMTI && JvmtiExport::can_post_interpreter_events()) { + Label L; + // Note: frame::interpreter_frame_result has a dependency on how the + // method result is saved across the call to post_method_exit. If this + // is changed then the interpreter_frame_result implementation will + // need to be updated too. + + // template interpreter will leave the result on the top of the stack. + push(state); + ldw(rdx, Address(rthread, JavaThread::interp_only_mode_offset())); + jcc(Assembler::zero, L, rdx); + call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit)); + bind(L); + pop(state); + } + + { + SkipIfEqual skip(this, &DTraceMethodProbes, false); + push(state); + get_thread(rthread); + get_method(rarg); + call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), + rthread, rarg); + pop(state); + } +} diff --git a/src/hotspot/cpu/sw64/interp_masm_sw64.hpp b/src/hotspot/cpu/sw64/interp_masm_sw64.hpp new file mode 100644 index 00000000000..b999bf09f83 --- /dev/null +++ b/src/hotspot/cpu/sw64/interp_masm_sw64.hpp @@ -0,0 +1,304 @@ +/* + * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SW64_VM_INTERP_MASM_SW64_64_HPP +#define CPU_SW64_VM_INTERP_MASM_SW64_64_HPP + +#include "asm/macroAssembler.hpp" +#include "interpreter/invocationCounter.hpp" +#include "oops/method.hpp" +#include "runtime/frame.hpp" + +// This file specializes the assember with interpreter-specific macros + +typedef ByteSize (*OffsetFunction)(uint); + +class InterpreterMacroAssembler: public MacroAssembler { + protected: + + // Interpreter specific version of call_VM_base + using MacroAssembler::call_VM_leaf_base; + + // Interpreter specific version of call_VM_base + virtual void call_VM_leaf_base(address entry_point, + int number_of_arguments); + + virtual void call_VM_base(Register oop_result, + Register java_thread, + Register last_java_sp, + address entry_point, + int number_of_arguments, + bool check_exceptions); + + // base routine for all dispatches + void dispatch_base(TosState state, address* table, bool verifyoop = true, bool generate_poll = false); + + public: + InterpreterMacroAssembler(CodeBuffer* code) : MacroAssembler(code), + _locals_register(rlocals), + _bcp_register(rbcp) {} + + void jump_to_entry(address entry); + + virtual void check_and_handle_popframe(Register java_thread); + virtual void check_and_handle_earlyret(Register java_thread); + + void load_earlyret_value(TosState state); + + // Interpreter-specific registers + void save_bcp() {assert(_bcp_register == rbcp, "_bcp_register should rbcp"); + stptr(_bcp_register, Address(rfp, frame::interpreter_frame_bcp_offset * wordSize)); + } + + void restore_bcp() {assert(_bcp_register == rbcp, "_bcp_register should rbcp"); + ldptr(_bcp_register, Address(rfp, frame::interpreter_frame_bcp_offset * wordSize)); + } + + void restore_locals() {assert(_locals_register == rlocals, "_locals_register should rlocals"); + ldptr(_locals_register, Address(rfp, frame::interpreter_frame_locals_offset * wordSize)); + } + + // Helpers for runtime call arguments/results + void get_method(Register reg) { + ldptr(reg, Address(rfp, frame::interpreter_frame_method_offset * wordSize)); + } + + void get_const(Register reg) { + get_method(reg); + ldptr(reg, Address(reg, Method::const_offset())); + } + + void get_constant_pool(Register reg) { + get_const(reg); + ldptr(reg, Address(reg, ConstMethod::constants_offset())); + } + + void get_constant_pool_cache(Register reg) { + get_constant_pool(reg); + ldptr(reg, Address(reg, ConstantPool::cache_offset_in_bytes())); + } + + void get_cpool_and_tags(Register cpool, Register tags) { + get_constant_pool(cpool); + ldptr(tags, Address(cpool, ConstantPool::tags_offset_in_bytes())); + } + + void get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset); + void get_cache_and_index_at_bcp(Register cache, + Register index, + int bcp_offset, + size_t index_size = sizeof(u2)); + void get_cache_and_index_and_bytecode_at_bcp(Register cache, + Register index, + Register bytecode, + int byte_no, + int bcp_offset, + size_t index_size = sizeof(u2)); + void get_cache_entry_pointer_at_bcp(Register cache, + Register tmp, + int bcp_offset, + size_t index_size = sizeof(u2)); + void get_cache_index_at_bcp(Register index, + int bcp_offset, + size_t index_size = sizeof(u2)); + + // load cpool->resolved_references(index); + void load_resolved_reference_at_index(Register result, Register index, Register tmp = rscratch2); + + // load cpool->resolved_klass_at(index) + void load_resolved_klass_at_index(Register klass, // contains the Klass on return + Register cpool, // the constant pool (corrupted on return) + Register index); // the constant pool index (corrupted on return) + + void load_resolved_method_at_index(int byte_no, + Register method, + Register cache, + Register index); + + void pop_ptr(Register r = FSR); + void pop_i(Register r = FSR); + void push_ptr(Register r = FSR); + void push_i(Register r = FSR); + +// // push_i_or_ptr is provided for when explicitly allowing either a ptr or +// // an int might have some advantage, while still documenting the fact that a +// // ptr might be pushed to the stack. +// void push_i_or_ptr(Register r = rax); + + void push_f(FloatRegister r = FSF); + void pop_f(FloatRegister r = FSF); + void pop_d(FloatRegister r = FSF); + void push_d(FloatRegister r = FSF); + + void pop_l(Register r = FSR); + void push_l(Register r = FSR); + + void pop(Register r) { ((MacroAssembler*)this)->pop(r); } + void push(Register r) { ((MacroAssembler*)this)->push(r); } + void push(int32_t imm ) { ((MacroAssembler*)this)->push(imm); } + + void pop(TosState state); // transition vtos -> state + void push(TosState state); // transition state -> vtos + +// void pop(RegSet regs, Register stack) { ((MacroAssembler*)this)->pop(regs, stack); } +// void push(RegSet regs, Register stack) { ((MacroAssembler*)this)->push(regs, stack); } + + void empty_expression_stack() { + ldptr(esp, Address(rfp, frame::interpreter_frame_monitor_block_top_offset * wordSize)); + // NULL last_sp until next java call + stptr(R0, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize)); + } + + // Helpers for swap and dup + void load_ptr(int n, Register val); + void store_ptr(int n, Register val); + + // Generate a subtype check: branch to ok_is_subtype if sub_klass is + // a subtype of super_klass. + void gen_subtype_check( Register sub_klass, Label &ok_is_subtype ); + + // Dispatching + void dispatch_prolog(TosState state, int step = 0); + void dispatch_epilog(TosState state, int step = 0); + // dispatch via rbx (assume rbx is loaded already) + void dispatch_only(TosState state, bool generate_poll = false); + // dispatch normal table via rbx (assume rbx is loaded already) + void dispatch_only_normal(TosState state); + void dispatch_only_noverify(TosState state); + // load rbx from [_bcp_register + step] and dispatch via rbx + void dispatch_next(TosState state, int step = 0, bool generate_poll = false); + // load rbx from [_bcp_register] and dispatch via rbx and table + void dispatch_via (TosState state, address* table); + + // jump to an invoked target + void prepare_to_jump_from_interpreted(); + void jump_from_interpreted(Register method, Register temp); + + // narrow int return value + void narrow(Register result); + + // Returning from interpreted functions + // + // Removes the current activation (incl. unlocking of monitors) + // and sets up the return address. This code is also used for + // exception unwindwing. In that case, we do not want to throw + // IllegalMonitorStateExceptions, since that might get us into an + // infinite rethrow exception loop. + // Additionally this code is used for popFrame and earlyReturn. + // In popFrame case we want to skip throwing an exception, + // installing an exception, and notifying jvmdi. + // In earlyReturn case we only want to skip throwing an exception + // and installing an exception. + void remove_activation(TosState state, Register ret_addr, + bool throw_monitor_exception = true, + bool install_monitor_exception = true, + bool notify_jvmdi = true); + void get_method_counters(Register method, Register mcs, Label& skip); + + // Object locking + void lock_object (Register lock_reg); + void unlock_object(Register lock_reg); + + // Interpreter profiling operations + void set_method_data_pointer_for_bcp(); + void test_method_data_pointer(Register mdp, Label& zero_continue); + void verify_method_data_pointer(); + + void set_mdp_data_at(Register mdp_in, int constant, Register value); + void increment_mdp_data_at(Address data, bool decrement = false); + void increment_mdp_data_at(Register mdp_in, int constant, + bool decrement = false); + void increment_mdp_data_at(Register mdp_in, Register reg, int constant, + bool decrement = false); + void increment_mask_and_jump(Address counter_addr, + int increment, Address mask, + Register scratch, bool preloaded, + Condition cond, Label* where); + void set_mdp_flag_at(Register mdp_in, int flag_constant); + void test_mdp_data_at(Register mdp_in, int offset, Register value, + Register test_value_out, + Label& not_equal_continue); + + void record_klass_in_profile(Register receiver, Register mdp, + Register reg2, bool is_virtual_call); + void record_klass_in_profile_helper(Register receiver, Register mdp, + Register reg2, int start_row, + Label& done, bool is_virtual_call); + void record_item_in_profile_helper(Register item, Register mdp, + Register reg2, int start_row, Label& done, int total_rows, + OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn, + int non_profiled_offset); + + void update_mdp_by_offset(Register mdp_in, int offset_of_offset); + void update_mdp_by_offset(Register mdp_in, Register reg, int offset_of_disp); + void update_mdp_by_constant(Register mdp_in, int constant); + void update_mdp_for_ret(Register return_bci); + + void profile_taken_branch(Register mdp, Register bumped_count); + void profile_not_taken_branch(Register mdp); + void profile_call(Register mdp); + void profile_final_call(Register mdp); + void profile_virtual_call(Register receiver, Register mdp, + Register scratch2, + bool receiver_can_be_null = false); + void profile_called_method(Register method, Register mdp, Register reg2) NOT_JVMCI_RETURN; + void profile_ret(Register return_bci, Register mdp); + void profile_null_seen(Register mdp); + void profile_typecheck(Register mdp, Register klass, Register scratch); + void profile_typecheck_failed(Register mdp); + void profile_switch_default(Register mdp); + void profile_switch_case(Register index_in_scratch, Register mdp, + Register scratch2); + + // Debugging + // only if +VerifyOops && state == atos +#define interp_verify_oop(reg, state) _interp_verify_oop(reg, state, __FILE__, __LINE__); + void _interp_verify_oop(Register reg, TosState state, const char* file, int line); + // only if +VerifyFPU && (state == ftos || state == dtos) + void verify_FPU(int stack_depth, TosState state = ftos); + + typedef enum { NotifyJVMTI, SkipNotifyJVMTI } NotifyMethodExitMode; + + // support for jvmti/dtrace + void notify_method_entry(); + void notify_method_exit(TosState state, NotifyMethodExitMode mode); + + void get_2_byte_integer_at_bcp(Register reg, Register tmp, int offset); + void get_4_byte_integer_at_bcp(Register reg, Register tmp, int offset); + + private: + + Register _locals_register; // register that contains the pointer to the locals + Register _bcp_register; // register that contains the bcp + + public: + void profile_obj_type(Register obj, const Address& mdo_addr); + void profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual); + void profile_return_type(Register mdp, Register ret, Register tmp); + void profile_parameters_type(Register mdp, Register tmp1, Register tmp2); + +}; + +#endif // CPU_SW64_VM_INTERP_MASM_SW64_64_HPP diff --git a/src/hotspot/cpu/sw64/interpreterRT_sw64.cpp b/src/hotspot/cpu/sw64/interpreterRT_sw64.cpp new file mode 100644 index 00000000000..f45b04d5046 --- /dev/null +++ b/src/hotspot/cpu/sw64/interpreterRT_sw64.cpp @@ -0,0 +1,323 @@ +/* + * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "interpreter/interp_masm.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "memory/allocation.inline.hpp" +#include "oops/method.hpp" +#include "oops/oop.inline.hpp" +#include "runtime/handles.inline.hpp" +#include "runtime/icache.hpp" +#include "runtime/interfaceSupport.inline.hpp" +#include "runtime/signature.hpp" + +#define __ _masm-> +#ifdef PRODUCT +#define BLOCK_COMMENT(str) /* nothing */ +#else +#define BLOCK_COMMENT(str) { char line[1024];sprintf(line,"%s:%s:%d",str,__FILE__, __LINE__); __ block_comment(line);} +#endif + +#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") + +// Implementation of SignatureHandlerGenerator + +InterpreterRuntime::SignatureHandlerGenerator::SignatureHandlerGenerator(const methodHandle& method, CodeBuffer* buffer) : + NativeSignatureIterator(method) { + _masm = new MacroAssembler(buffer); + _num_args = (method->is_static() ? 1 : 0); + _stack_offset = 0; // don't overwrite return address + _floatreg_start_index = FloatRegisterImpl::float_arg_base + 1; //because a0(16) must be env in JNI, so float parameter register should start 17. same reason in generator_slow_signature +} + +Register InterpreterRuntime::SignatureHandlerGenerator::from() { return rlocals; } +Register InterpreterRuntime::SignatureHandlerGenerator::to() { return esp; } +Register InterpreterRuntime::SignatureHandlerGenerator::temp() { return rscratch1; } + +void InterpreterRuntime::SignatureHandlerGenerator::pass_int() { + const Address src(from(), Interpreter::local_offset_in_bytes(offset())); + switch (_num_args) { + case 0: + __ ldws(c_rarg1, src); + _num_args++; + break; + case 1: + __ ldws(c_rarg2, src); + _num_args++; + break; + case 2: + __ ldws(c_rarg3, src); + _num_args++; + break; + case 3: + __ ldws(c_rarg4, src); + _num_args++; + break; + case 4: + __ ldws(c_rarg5, src); + _num_args++; + break; + default: + __ ldws(V0, src); + __ stw(V0, Address(to(), _stack_offset)); + _stack_offset += wordSize; + break; + } +} + +void InterpreterRuntime::SignatureHandlerGenerator::pass_long() { + const Address src(from(), Interpreter::local_offset_in_bytes(offset() + 1)); + + switch (_num_args) { + case 0: + __ ldptr(c_rarg1, src); + _num_args++; + break; + case 1: + __ ldptr(c_rarg2, src); + _num_args++; + break; + case 2: + __ ldptr(c_rarg3, src); + _num_args++; + break; + case 3: + __ ldptr(c_rarg4, src); + _num_args++; + break; + case 4: + __ ldptr(c_rarg5, src); + _num_args++; + break; + default: + __ ldptr(V0, src); + __ stptr(V0, Address(to(), _stack_offset)); + _stack_offset += wordSize; + break; + } +} + +void InterpreterRuntime::SignatureHandlerGenerator::pass_float() { + const Address src(from(), Interpreter::local_offset_in_bytes(offset())); + + if (_num_args < Argument::n_float_register_parameters_c-1) { + __ flds(as_FloatRegister(_floatreg_start_index + _num_args), src); + _num_args++; + } else { + __ ldws(V0, src); + __ stw(V0, Address(to(), _stack_offset)); + _stack_offset += wordSize; + } +} + +void InterpreterRuntime::SignatureHandlerGenerator::pass_double() { + const Address src(from(), Interpreter::local_offset_in_bytes(offset() + 1)); + + if (_num_args < Argument::n_float_register_parameters_c-1) { + __ fldd(as_FloatRegister(_floatreg_start_index + _num_args), src); + _num_args++; + } else { + __ ldptr(V0, src); + __ stptr(V0, Address(to(), _stack_offset)); + _stack_offset += wordSize; + } +} + +void InterpreterRuntime::SignatureHandlerGenerator::pass_object() { + const Address src(from(), Interpreter::local_offset_in_bytes(offset())); + Register rax = V0; + + switch (_num_args) { + case 0: + assert(offset() == 0, "argument register 1 can only be (non-null) receiver"); + __ lea(c_rarg1, src); + _num_args++; + break; + case 1: + __ lea(rax, src); + __ movl(c_rarg2, R0); + __ cmpptr(src, R0); + __ cmove(Assembler::notEqual, c_rarg2, rax, c_rarg2); + _num_args++; + break; + case 2: + __ lea(rax, src); + __ movl(c_rarg3, R0); + __ cmpptr(src, R0); + __ cmove(Assembler::notEqual, c_rarg3, rax, c_rarg3); + _num_args++; + break; + case 3: + __ lea(rax, src); + __ movl(c_rarg4, R0); + __ cmpptr(src, R0); + __ cmove(Assembler::notEqual, c_rarg4, rax, c_rarg4); + _num_args++; + break; + case 4: + __ lea(rax, src); + __ movl(c_rarg5, R0); + __ cmpptr(src, R0); + __ cmove(Assembler::notEqual, c_rarg5, rax, c_rarg5); + _num_args++; + break; + default: + __ lea(rax, src); + __ movl(temp(), R0); + __ cmpptr(src, R0); + __ cmove(Assembler::notEqual, temp(), rax, temp()); + __ stptr(temp(), Address(to(), _stack_offset)); + _stack_offset += wordSize; + break; + } +} + +void InterpreterRuntime::SignatureHandlerGenerator::generate(uint64_t fingerprint) { + // generate code to handle arguments + iterate(fingerprint); + + // return result handler + __ lea(V0, ExternalAddress(Interpreter::result_handler(method()->result_type()))); + // return + __ ret_sw(); + + __ flush(); +} + + +// Implementation of SignatureHandlerLibrary + +void SignatureHandlerLibrary::pd_set_handler(address handler) {} + + +class SlowSignatureHandler + : public NativeSignatureIterator { + private: + address _from; + intptr_t* _to; + intptr_t* _reg_args; + intptr_t* _fp_identifiers; + unsigned int _num_args; + + virtual void pass_int() + { + jint from_obj = *(jint *)(_from+Interpreter::local_offset_in_bytes(0)); + _from -= Interpreter::stackElementSize; + + if (_num_args < Argument::n_int_register_parameters_c-1) { + *_reg_args++ = from_obj; + _num_args++; + } else { + *_to++ = from_obj; + } + } + + virtual void pass_long() + { + intptr_t from_obj = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1)); + _from -= 2*Interpreter::stackElementSize; + + if (_num_args < Argument::n_int_register_parameters_c-1) { + *_reg_args++ = from_obj; + _num_args++; + } else { + *_to++ = from_obj; + } + } + + virtual void pass_object() + { + intptr_t *from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(0)); + _from -= Interpreter::stackElementSize; + if (_num_args < Argument::n_int_register_parameters_c-1) { + *_reg_args++ = (*from_addr == 0) ? NULL : (intptr_t) from_addr; + _num_args++; + } else { + *_to++ = (*from_addr == 0) ? NULL : (intptr_t) from_addr; + } + } + + virtual void pass_float() + { + jint from_obj = *(jint *)(_from+Interpreter::local_offset_in_bytes(0)); + _from -= Interpreter::stackElementSize; + + if (_num_args < Argument::n_float_register_parameters_c-1) { + assert((_num_args*2) < BitsPerWord, "_num_args*2 is out of range"); + *_reg_args++ = from_obj; + *_fp_identifiers |= ((intptr_t)0x01 << (_num_args*2)); // mark as float + _num_args++; + } else { + *_to++ = from_obj; + } + } + + virtual void pass_double() + { + intptr_t from_obj = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1)); + _from -= 2*Interpreter::stackElementSize; + + if (_num_args < Argument::n_float_register_parameters_c-1) { + assert((_num_args*2) < BitsPerWord, "_num_args*2 is out of range"); + *_reg_args++ = from_obj; + *_fp_identifiers |= ((intptr_t)0x3 << (_num_args*2)); // mark as double + _num_args++; + } else { + *_to++ = from_obj; + } + } + + public: + SlowSignatureHandler(const methodHandle& method, address from, intptr_t* to) + : NativeSignatureIterator(method) + { + _from = from; + _to = to; + + _reg_args = to - (method->is_static() ? 6 : 7); + _fp_identifiers = to - 2; + *(int*) _fp_identifiers = 0; + _num_args = (method->is_static() ? 1 : 0); + } +}; + + +JRT_ENTRY(address, + InterpreterRuntime::slow_signature_handler(JavaThread* current, + Method* method, + intptr_t* from, + intptr_t* to)) + methodHandle m(current, (Method*)method); + assert(m->is_native(), "sanity check"); + + // handle arguments + SlowSignatureHandler(m, (address)from, to).iterate((uint64_t)CONST64(-1));//sw doesn't need to modify 'to' position + + // return result handler + return Interpreter::result_handler(m->result_type()); +JRT_END diff --git a/src/hotspot/cpu/sw64/interpreterRT_sw64.hpp b/src/hotspot/cpu/sw64/interpreterRT_sw64.hpp new file mode 100644 index 00000000000..a9de9f6c5b5 --- /dev/null +++ b/src/hotspot/cpu/sw64/interpreterRT_sw64.hpp @@ -0,0 +1,60 @@ +/* + * Copyright (c) 1998, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SW64_VM_INTERPRETERRT_SW64_HPP +#define CPU_SW64_VM_INTERPRETERRT_SW64_HPP + +// This is included in the middle of class Interpreter. +// Do not include files here. + +// native method calls + +class SignatureHandlerGenerator: public NativeSignatureIterator { + private: + MacroAssembler* _masm; + unsigned int _num_args; + int _stack_offset; + int _floatreg_start_index; //because a0(16) must be env in JNI, so float parameter register should start 17. same reason in generator_slow_signature + + void pass_int(); + void pass_long(); + void pass_float(); + void pass_double(); + void pass_object(); + + public: + // Creation + SignatureHandlerGenerator(const methodHandle& method, CodeBuffer* buffer); + + // Code generation + void generate(uint64_t fingerprint); + + // Code generation support + static Register from(); + static Register to(); + static Register temp(); +}; + +#endif // CPU_SW64_VM_INTERPRETERRT_SW64_HPP diff --git a/src/hotspot/cpu/sw64/javaFrameAnchor_sw64.hpp b/src/hotspot/cpu/sw64/javaFrameAnchor_sw64.hpp new file mode 100644 index 00000000000..4111c5a1c3a --- /dev/null +++ b/src/hotspot/cpu/sw64/javaFrameAnchor_sw64.hpp @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SW64_VM_JAVAFRAMEANCHOR_SW64_HPP +#define CPU_SW64_VM_JAVAFRAMEANCHOR_SW64_HPP + +private: + + // FP value associated with _last_Java_sp: + intptr_t* volatile _last_Java_fp; // pointer is volatile not what it points to + +public: + // Each arch must define reset, save, restore + // These are used by objects that only care about: + // 1 - initializing a new state (thread creation, javaCalls) + // 2 - saving a current state (javaCalls) + // 3 - restoring an old state (javaCalls) + + void clear(void) { + // clearing _last_Java_sp must be first + _last_Java_sp = NULL; + OrderAccess::release(); + _last_Java_fp = NULL; + _last_Java_pc = NULL; + } + + void copy(JavaFrameAnchor* src) { + // In order to make sure the transition state is valid for "this" + // We must clear _last_Java_sp before copying the rest of the new data + // + // Hack Alert: Temporary bugfix for 4717480/4721647 + // To act like previous version (pd_cache_state) don't NULL _last_Java_sp + // unless the value is changing + // + if (_last_Java_sp != src->_last_Java_sp) { + _last_Java_sp = NULL; + OrderAccess::release(); + } + _last_Java_fp = src->_last_Java_fp; + _last_Java_pc = src->_last_Java_pc; + // Must be last so profiler will always see valid frame if has_last_frame() is true + _last_Java_sp = src->_last_Java_sp; + } + + bool walkable(void) { return _last_Java_sp != NULL && _last_Java_pc != NULL; } + + void make_walkable(); + + intptr_t* last_Java_sp(void) const { return _last_Java_sp; } + + address last_Java_pc(void) { return _last_Java_pc; } + + static ByteSize last_Java_fp_offset() { return byte_offset_of(JavaFrameAnchor, _last_Java_fp); } + +public: + + void set_last_Java_sp(intptr_t* sp) { _last_Java_sp = sp; OrderAccess::release(); } + + intptr_t* last_Java_fp(void) { return _last_Java_fp; } + +#endif // CPU_SW64_VM_JAVAFRAMEANCHOR_SW64_HPP diff --git a/src/hotspot/cpu/sw64/jniFastGetField_sw64.cpp b/src/hotspot/cpu/sw64/jniFastGetField_sw64.cpp new file mode 100644 index 00000000000..9cf8180c0f6 --- /dev/null +++ b/src/hotspot/cpu/sw64/jniFastGetField_sw64.cpp @@ -0,0 +1,243 @@ +/* + * Copyright (c) 2004, 2017, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "code/codeBlob.hpp" +#include "gc/shared/barrierSet.hpp" +#include "gc/shared/barrierSetAssembler.hpp" +#include "memory/resourceArea.hpp" +#include "prims/jniFastGetField.hpp" +#include "prims/jvm_misc.hpp" +#include "prims/jvmtiExport.hpp" +#include "runtime/safepoint.hpp" + +#define __ masm-> + +#define BUFFER_SIZE 30*wordSize + +#ifdef PRODUCT +#define BLOCK_COMMENT(str) /* nothing */ +#else +#define BLOCK_COMMENT(str) { char line[1024];sprintf(line,"%s:%s:%d",str,__FILE__, __LINE__); __ block_comment(line);} +#endif +#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") +// Instead of issuing a LoadLoad barrier we create an address +// dependency between loads; this might be more efficient. + +// Common register usage: +// r0/v0: result +// c_rarg0: jni env +// c_rarg1: obj +// c_rarg2: jfield id + +#define BUFFER_SIZE 30*wordSize + +static const Register rtmp = T1; +static const Register robj = T2; +static const Register rcounter = T3; +static const Register roffset = T4; +static const Register rcounter_addr = T5; + +address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) { + const char *name = NULL; + switch (type) { + case T_BOOLEAN: name = "jni_fast_GetBooleanField"; break; + case T_BYTE: name = "jni_fast_GetByteField"; break; + case T_CHAR: name = "jni_fast_GetCharField"; break; + case T_SHORT: name = "jni_fast_GetShortField"; break; + case T_INT: name = "jni_fast_GetIntField"; break; + case T_LONG: name = "jni_fast_GetLongField"; break; + default: ShouldNotReachHere(); + } + ResourceMark rm; + BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE); + CodeBuffer cbuf(blob); + MacroAssembler* masm = new MacroAssembler(&cbuf); + address fast_entry = __ pc(); + + Label slow; + + ExternalAddress counter(SafepointSynchronize::safepoint_counter_addr()); + __ ldwu (rcounter, counter); + __ movl (robj, c_rarg1); + __ testb (rcounter, 1); + __ jcc (Assembler::notZero, slow); + + if (JvmtiExport::can_post_field_access()) { + // Check to see if a field access watch has been set before we + // take the fast path. + assert_different_registers(rscratch1, robj, rcounter); // cmp32 clobbers rscratch1! + __ memb(); + __ cmpw(ExternalAddress((address) JvmtiExport::get_field_access_count_addr()), 0); + __ jcc(Assembler::notZero, slow); + } + + __ movl (roffset, c_rarg2); + __ srll(roffset, 2, roffset); // offset + + // Both robj and rtmp are clobbered by try_resolve_jobject_in_native. + BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); + bs->try_resolve_jobject_in_native(masm, /* jni_env */ c_rarg0, robj, rtmp, slow); + DEBUG_ONLY(__ movw(rtmp, 0xDEADC0DE);) + + Register rax = V0; + assert(count < LIST_CAPACITY, "LIST_CAPACITY too small"); + speculative_load_pclist[count] = __ pc(); + switch (type) { + case T_BOOLEAN: __ ldbu (rax, Address(robj, roffset, Address::times_1)); break; + case T_BYTE: __ ldbu (AT, Address(robj, roffset, Address::times_1));__ sextb(AT, rax); break; + case T_CHAR: __ ldhu (rax, Address(robj, roffset, Address::times_1)); break; + case T_SHORT: __ ldhu (AT, Address(robj, roffset, Address::times_1));__ sexth(AT, rax); break; + case T_INT: __ ldws (rax, Address(robj, roffset, Address::times_1)); break; + case T_LONG: __ ldl (rax, Address(robj, roffset, Address::times_1)); break; + default: ShouldNotReachHere(); + } + + + __ lea(rcounter_addr, counter); + __ memb(); + __ cmpw (rcounter, Address(rcounter_addr, 0)); + __ jcc (Assembler::notEqual, slow); + + __ ret_sw(); + + slowcase_entry_pclist[count++] = __ pc(); + __ bind (slow); + address slow_case_addr = NULL; + switch (type) { + case T_BOOLEAN: slow_case_addr = jni_GetBooleanField_addr(); break; + case T_BYTE: slow_case_addr = jni_GetByteField_addr(); break; + case T_CHAR: slow_case_addr = jni_GetCharField_addr(); break; + case T_SHORT: slow_case_addr = jni_GetShortField_addr(); break; + case T_INT: slow_case_addr = jni_GetIntField_addr(); break; + case T_LONG: slow_case_addr = jni_GetLongField_addr(); break; + default: break; + } + // tail call + __ jump (ExternalAddress(slow_case_addr)); + + __ flush (); + + return fast_entry; +} + +address JNI_FastGetField::generate_fast_get_boolean_field() { + return generate_fast_get_int_field0(T_BOOLEAN); +} + +address JNI_FastGetField::generate_fast_get_byte_field() { + return generate_fast_get_int_field0(T_BYTE); +} + +address JNI_FastGetField::generate_fast_get_char_field() { + return generate_fast_get_int_field0(T_CHAR); +} + +address JNI_FastGetField::generate_fast_get_short_field() { + return generate_fast_get_int_field0(T_SHORT); +} + +address JNI_FastGetField::generate_fast_get_int_field() { + return generate_fast_get_int_field0(T_INT); +} + +address JNI_FastGetField::generate_fast_get_long_field() { + return generate_fast_get_int_field0(T_LONG); +} + +address JNI_FastGetField::generate_fast_get_float_field0(BasicType type) { + const char *name =NULL; + switch (type) { + case T_FLOAT: name = "jni_fast_GetFloatField"; break; + case T_DOUBLE: name = "jni_fast_GetDoubleField"; break; + default: ShouldNotReachHere(); + } + ResourceMark rm; + BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE); + CodeBuffer cbuf(blob); + MacroAssembler* masm = new MacroAssembler(&cbuf); + address fast_entry = __ pc(); + + Label slow; + + ExternalAddress counter(SafepointSynchronize::safepoint_counter_addr()); + __ ldwu (rcounter, counter); + __ movl (robj, c_rarg1); + __ testb (rcounter, 1); + __ jcc (Assembler::notZero, slow); + + if (JvmtiExport::can_post_field_access()) { + // Check to see if a field access watch has been set before we + // take the fast path. + __ cmpw(ExternalAddress((address) JvmtiExport::get_field_access_count_addr()), 0); + __ jcc(Assembler::notZero, slow); + } + + // Both robj and rtmp are clobbered by try_resolve_jobject_in_native. + BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); + bs->try_resolve_jobject_in_native(masm, /* jni_env */ c_rarg0, robj, rtmp, slow); + DEBUG_ONLY(__ movw(rtmp, 0xDEADC0DE);) + + __ movl (roffset, c_rarg2); + __ srll (roffset, 2, roffset); // offset + + assert(count < LIST_CAPACITY, "LIST_CAPACITY too small"); + speculative_load_pclist[count] = __ pc(); + switch (type) { + case T_FLOAT: __ load_float (FSF, Address(robj, roffset, Address::times_1)); break; + case T_DOUBLE: __ load_double(FSF, Address(robj, roffset, Address::times_1)); break; + default: ShouldNotReachHere(); + } + + __ lea(rcounter_addr, counter); + __ cmpw (rcounter, Address(rcounter_addr, 0)); + __ jcc (Assembler::notEqual, slow); + + __ ret_sw(); + + slowcase_entry_pclist[count++] = __ pc(); + __ bind (slow); + address slow_case_addr = NULL; + switch (type) { + case T_FLOAT: slow_case_addr = jni_GetFloatField_addr(); break; + case T_DOUBLE: slow_case_addr = jni_GetDoubleField_addr(); break; + default: break; + } + // tail call + __ jump (ExternalAddress(slow_case_addr)); + + __ flush (); + + return fast_entry; +} + +address JNI_FastGetField::generate_fast_get_float_field() { + return generate_fast_get_float_field0(T_FLOAT); +} + +address JNI_FastGetField::generate_fast_get_double_field() { + return generate_fast_get_float_field0(T_DOUBLE); +} diff --git a/src/hotspot/cpu/sw64/jniTypes_sw64.hpp b/src/hotspot/cpu/sw64/jniTypes_sw64.hpp new file mode 100644 index 00000000000..118a21b416b --- /dev/null +++ b/src/hotspot/cpu/sw64/jniTypes_sw64.hpp @@ -0,0 +1,123 @@ +/* + * Copyright (c) 1998, 2017, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SW64_VM_JNITYPES_SW64_HPP +#define CPU_SW64_VM_JNITYPES_SW64_HPP + +#include "jni.h" +#include "memory/allocation.hpp" +#include "oops/oop.hpp" + +// This file holds platform-dependent routines used to write primitive jni +// types to the array of arguments passed into JavaCalls::call + +class JNITypes : AllStatic { + // These functions write a java primitive type (in native format) + // to a java stack slot array to be passed as an argument to JavaCalls:calls. + // I.e., they are functionally 'push' operations if they have a 'pos' + // formal parameter. Note that jlong's and jdouble's are written + // _in reverse_ of the order in which they appear in the interpreter + // stack. This is because call stubs (see stubGenerator_sparc.cpp) + // reverse the argument list constructed by JavaCallArguments (see + // javaCalls.hpp). + +private: + + // 32bit Helper routines. + static inline void put_int2r(jint *from, intptr_t *to) { *(jint *)(to++) = from[1]; + *(jint *)(to ) = from[0]; } + static inline void put_int2r(jint *from, intptr_t *to, int& pos) { put_int2r(from, to + pos); pos += 2; } + +public: + // Ints are stored in native format in one JavaCallArgument slot at *to. + static inline void put_int(jint from, intptr_t *to) { *(intptr_t *)(to + 0 ) = from; } + static inline void put_int(jint from, intptr_t *to, int& pos) { *(intptr_t *)(to + pos++) = from; } + static inline void put_int(jint *from, intptr_t *to, int& pos) { *(intptr_t *)(to + pos++) = *from; } + + // Longs are stored in native format in one JavaCallArgument slot at + // *(to). + // In theory, *(to + 1) is an empty slot. But, for several Java2D testing programs (TestBorderLayout, SwingTest), + // *(to+1). + static inline void put_long(jlong from, intptr_t *to) { + *(jlong*) (to + 1) = from; + *(jlong*) (to) = from; + } + + static inline void put_long(jlong from, intptr_t *to, int& pos) { + *(jlong*) (to + 1 + pos) = from; + *(jlong*) (to + pos) = from; + pos += 2; + } + + static inline void put_long(jlong *from, intptr_t *to, int& pos) { + *(jlong*) (to + 1 + pos) = *from; + *(jlong*) (to + pos) = *from; + pos += 2; + } + + // Oops are stored in native format in one JavaCallArgument slot at *to. + static inline void put_obj(const Handle& from_handle, intptr_t *to, int& pos) { *(to + pos++) = (intptr_t)from_handle.raw_value(); } + static inline void put_obj(jobject from_handle, intptr_t *to, int& pos) { *(to + pos++) = (intptr_t)from_handle; } + + // Floats are stored in native format in one JavaCallArgument slot at *to. + static inline void put_float(jfloat from, intptr_t *to) { *(jfloat *)(to + 0 ) = from; } + static inline void put_float(jfloat from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = from; } + static inline void put_float(jfloat *from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = *from; } + +#undef _JNI_SLOT_OFFSET +#define _JNI_SLOT_OFFSET 0 + // Doubles are stored in native word format in one JavaCallArgument + // slot at *(to). + // In theory, *(to + 1) is an empty slot. But, for several Java2D testing programs (TestBorderLayout, SwingTest), + // *(to + 1) must contains a copy of the long value. Otherwise it will corrupts. + static inline void put_double(jdouble from, intptr_t *to) { + *(jdouble*) (to + 1) = from; + *(jdouble*) (to) = from; + } + + static inline void put_double(jdouble from, intptr_t *to, int& pos) { + *(jdouble*) (to + 1 + pos) = from; + *(jdouble*) (to + pos) = from; + pos += 2; + } + + static inline void put_double(jdouble *from, intptr_t *to, int& pos) { + *(jdouble*) (to + 1 + pos) = *from; + *(jdouble*) (to + pos) = *from; + pos += 2; + } + + // The get_xxx routines, on the other hand, actually _do_ fetch + // java primitive types from the interpreter stack. + // No need to worry about alignment on Intel. + static inline jint get_int (intptr_t *from) { return *(jint *) from; } + static inline jlong get_long (intptr_t *from) { return *(jlong *) (from + _JNI_SLOT_OFFSET); } + static inline oop get_obj (intptr_t *from) { return *(oop *) from; } + static inline jfloat get_float (intptr_t *from) { return *(jfloat *) from; } + static inline jdouble get_double(intptr_t *from) { return *(jdouble *)(from + _JNI_SLOT_OFFSET); } +#undef _JNI_SLOT_OFFSET +}; + +#endif // CPU_SW64_VM_JNITYPES_SW64_HPP diff --git a/src/hotspot/cpu/sw64/jvmciCodeInstaller_sw64.cpp b/src/hotspot/cpu/sw64/jvmciCodeInstaller_sw64.cpp new file mode 100644 index 00000000000..84bfa3d6a38 --- /dev/null +++ b/src/hotspot/cpu/sw64/jvmciCodeInstaller_sw64.cpp @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +#include "jvmci/jvmci.hpp" +#include "jvmci/jvmciCodeInstaller.hpp" +#include "jvmci/jvmciRuntime.hpp" +#include "jvmci/jvmciCompilerToVM.hpp" +#include "jvmci/jvmciJavaClasses.hpp" +#include "oops/oop.inline.hpp" +#include "runtime/handles.inline.hpp" +#include "runtime/jniHandles.hpp" +#include "runtime/sharedRuntime.hpp" +#include "vmreg_sw64.inline.hpp" + +jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, Handle method, TRAPS) { + Unimplemented(); + return 0; +} + +void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle constant, TRAPS) { + Unimplemented(); +} + +void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, Handle constant, TRAPS) { + Unimplemented(); +} + +void CodeInstaller::pd_patch_DataSectionReference(int pc_offset, int data_offset, TRAPS) { + Unimplemented(); +} + +void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination, TRAPS) { + Unimplemented(); +} + +void CodeInstaller::pd_relocate_JavaMethod(CodeBuffer &cbuf, Handle hotspot_method, jint pc_offset, TRAPS) { + Unimplemented(); +} + +void CodeInstaller::pd_relocate_poll(address pc, jint mark, TRAPS) { + Unimplemented(); +} + +// convert JVMCI register indices (as used in oop maps) to HotSpot registers +VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg, TRAPS) { + return NULL; +} + +bool CodeInstaller::is_general_purpose_reg(VMReg hotspotRegister) { + return false; +} \ No newline at end of file diff --git a/src/hotspot/cpu/sw64/macroAssembler_sw64.cpp b/src/hotspot/cpu/sw64/macroAssembler_sw64.cpp new file mode 100644 index 00000000000..f768d8a94b9 --- /dev/null +++ b/src/hotspot/cpu/sw64/macroAssembler_sw64.cpp @@ -0,0 +1,5261 @@ +/* + * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "jvm.h" +#include "asm/assembler.hpp" +#include "asm/assembler.inline.hpp" +#include "compiler/compiler_globals.hpp" +#include "code/nativeInst.hpp" +#include "compiler/disassembler.hpp" +#include "gc/shared/barrierSet.hpp" +#include "gc/shared/barrierSetAssembler.hpp" +#include "gc/shared/collectedHeap.inline.hpp" +#include "gc/shared/tlab_globals.hpp" +#include "interpreter/bytecodeHistogram.hpp" +#include "interpreter/interpreter.hpp" +#include "memory/resourceArea.hpp" +#include "memory/universe.hpp" +#include "oops/accessDecorators.hpp" +#include "oops/compressedOops.inline.hpp" +#include "oops/klass.inline.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/biasedLocking.hpp" +#include "runtime/flags/flagSetting.hpp" +#include "runtime/interfaceSupport.inline.hpp" +#include "runtime/jniHandles.hpp" +#include "runtime/objectMonitor.hpp" +#include "runtime/os.hpp" +#include "runtime/safepoint.hpp" +#include "runtime/safepointMechanism.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/thread.hpp" +#include "utilities/macros.hpp" +#include "utilities/globalDefinitions_gcc.hpp" +//#include "crc32c.h" + +#ifdef PRODUCT +#define BLOCK_COMMENT(str) /* nothing */ +#define STOP(error) stop(error) +#else +#define BLOCK_COMMENT(str) block_comment(str) +#define STOP(error) block_comment(error); stop(error) +#endif + +#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") + +static Assembler::Condition reverse[] = { + Assembler::noOverflow /* overflow = 0x0 */ , + Assembler::overflow /* noOverflow = 0x1 */ , + Assembler::aboveEqual /* carrySet = 0x2, below = 0x2 */ , + Assembler::below /* aboveEqual = 0x3, carryClear = 0x3 */ , + Assembler::notZero /* zero = 0x4, equal = 0x4 */ , + Assembler::zero /* notZero = 0x5, notEqual = 0x5 */ , + Assembler::above /* belowEqual = 0x6 */ , + Assembler::belowEqual /* above = 0x7 */ , + Assembler::positive /* negative = 0x8 */ , + Assembler::negative /* positive = 0x9 */ , + Assembler::failed /* success = 0xa */ , + Assembler::success /* failed = 0xb */ , + Assembler::greaterEqual /* less = 0xc */ , + Assembler::less /* greaterEqual = 0xd */ , + Assembler::greater /* lessEqual = 0xe */ , + Assembler::lessEqual /* greater = 0xf, */ +}; + +Address MacroAssembler::as_Address(AddressLiteral adr) { + // we can be absolute or disp based on the instruction type + // jmp/call are displacements others are absolute + assert(!adr.is_lval(), "must be rval"); + // assert(reachable(adr), "must be"); + return Address((int32_t)(intptr_t)(adr.target() - pc()), adr.target(), adr.reloc()); +} + +Address MacroAssembler::as_Address(ArrayAddress adr, Register base_reg) { + AddressLiteral base = adr.base(); + lea(base_reg, base); + Address index = adr.index(); + assert(index._disp == 0, "must not have disp"); // maybe it can? + Address array(base_reg, index._index, index._scale, index._disp); + return array; +} + +void MacroAssembler::call_VM_leaf_base(address entry_point, + int number_of_arguments) {SCOPEMARK_NAME(MacroAssembler::call_VM_base, this) + Label E, L; + + testptr(esp, 0xf, rcc); + jcc(Assembler::zero, L, rcc); + + subptr(esp, 8, esp); + call(RuntimeAddress(entry_point)); + addptr(esp, 8, esp); + jmp(E); + + bind(L); + call(RuntimeAddress(entry_point)); + bind(E); +} + +void MacroAssembler::call_VM_leaf_base(address entry_point, + int number_of_arguments, + Label *retaddr, Register rscratch) {SCOPEMARK_NAME(MacroAssembler::call_VM_base-label, this) + /*Label E, L, exit; + + testptr(esp, 0xf, rcc); + jcc(Assembler::zero, L, rcc); + + mov_immediate64(rscratch1, 0xf7f7f7f7); + push(rscratch1); + jmp(E); + + bind(L); + //TODO:assert(esp[0] != 0xf7f7f7f7) jzy + + bind(E); + call(RuntimeAddress(entry_point)); + if (retaddr) + bind(*retaddr); + mov_immediate64(rscratch1, 0xf7f7f7f7); + ldl(rscratch2, esp, 0); + cmpl(rscratch1, rscratch2); + jcc(Assembler::notEqual, exit); + addptr(esp, 8, esp); + bind(exit);*/ + + //TODO:different from x86, stack not aligned is OK? jzy + call(RuntimeAddress(entry_point), retaddr, rscratch); +} + +int MacroAssembler::corrected_idivq(Register result, Register ra, Register rb, + bool want_remainder, Register scratch) +{ + ShouldNotReachHere(); + int idivq_offset = offset(); + + return idivq_offset; +} + +void MacroAssembler::decrementw(ExternalAddress dst, int value, Register tmp1, Register tmp2){ + incrementw(dst, -value, tmp1, tmp2); +} + +void MacroAssembler::decrementw(Address dst, int value, Register tmp) +{ + incrementw(dst, -value, tmp); +} + +void MacroAssembler::decrementw(Register reg, int value) +{ + decrementl(reg, value); + zapnot(reg, 0xf, reg); +} + +void MacroAssembler::decrementl(ExternalAddress dst, int value , Register tmp1, Register tmp2){ + incrementl(dst, -value, tmp1, tmp2); +} + +void MacroAssembler::decrementl(Address dst, int value, Register tmp){ + incrementl(dst, -value, tmp); +} + +void MacroAssembler::decrementl(Register reg, int value) { + incrementl(reg, -value); +} + +/** + * x86 + * @param dst + * @param value + * @param tmp1 + * @param tmp2 + */ +void MacroAssembler::incrementw(AddressLiteral dst, int value, Register tmp1, Register tmp2) { + assert_different_registers(tmp1, tmp2); + if (!value) return; + + lea(tmp1, dst); + ldws(tmp2, Address(tmp1, 0)); + if(is_simm16(value)) { + ldi(tmp2, value, tmp2); + } else { + ShouldNotReachHere(); + } + stw(tmp2, Address(tmp1, 0)); +} + +/** + * x86 + * @param dst + * @param value + * @param tmp_not_rcc + */ +void MacroAssembler::incrementw(Address dst, int value, Register tmp_not_rcc) { + if (!value) return; + ldws(tmp_not_rcc, dst); + if(is_simm16(value)) { + ldi(tmp_not_rcc, value, tmp_not_rcc); + } else { + ShouldNotReachHere(); + } + stw(tmp_not_rcc, dst); +} + +/** + * x86 + * @param reg + * @param value + */ +void MacroAssembler::incrementw(Register reg, int value) { + incrementl(reg, value); + zapnot(reg, 0xf, reg); +} + +void MacroAssembler::incrementl(ExternalAddress dst, int value, Register tmp1, Register tmp2){ + assert_different_registers(tmp1, tmp2); + if (!value) return; + mov_immediate64(tmp1, (intptr_t)dst.target(), dst.rspec()); + ldptr(tmp2, Address(tmp1, 0)); //ldwu + if (is_simm16(value)) { + ldi(tmp2, value, tmp2); + } else { + ShouldNotReachHere(); + } + stptr(tmp2, Address(tmp1, 0)); +} + +void MacroAssembler::incrementl(Address dst, int value, Register tmp){ + if (!value) return; + ldptr(tmp, dst); + if(is_simm16(value)) { + ldi(tmp, value, tmp); + } else { + ShouldNotReachHere(); +// mov_immediate32(AT, value); +// addl(tmp, AT, tmp); + } + stptr(tmp, dst); +} + +void MacroAssembler::incrementl(Register reg, int value) { + if (!value) return; + if (is_simm16(value)) { + ldi(reg, value, reg); + } else { + ShouldNotReachHere(); +// mov_immediate32(AT, value); +// addl(reg, AT, reg); + } +} + +// 32bit can do a case table jump in one instruction but we no longer allow the base +// to be installed in the Address class +void MacroAssembler::jump(ArrayAddress entry, Register tmp1, Register tmp2) { + assert_different_registers(tmp1, tmp2); + lea(tmp1, entry.base()); + Address dispatch = entry.index(); + assert(dispatch._base == noreg, "must be"); + dispatch._base = tmp1; + jmp(dispatch, tmp2); +} + +/** + * x86 + * lea(Register rd, Address addr) + * sw64 + * lea(Register rd, Address addr) + * note + * No diffrence. No temp reg is needed and rd can be same with addr._base or addr._index + */ +void MacroAssembler::lea(Register rd, Address addr) { + ldi(rd, addr); +} + +void MacroAssembler::lea(Register rd, AddressLiteral addr) { + mov_immediate64(rd, (intptr_t)addr.target(), addr.rspec()); +} + +void MacroAssembler::lea(Address dst, AddressLiteral addr, Register tmp_not_rcc) { + assert_different_registers(tmp_not_rcc, rcc); + lea(tmp_not_rcc, addr); + stl(tmp_not_rcc, dst, rcc); +} + +void MacroAssembler::leave() { + move(esp, rfp); + ldl(RA, wordSize, esp); + ldl(rfp, 0, esp); + addptr(esp, 2 * wordSize, esp); +} + +// Move an oop into a register. immediate is true if we want +// immediate instrcutions, i.e. we are not going to patch this +// instruction while the code is being executed by another thread. In +// that case we can use move immediates rather than the constant pool. +void MacroAssembler::movoop(Register dst, jobject obj, bool immediate) { + ShouldNotReachHere(); +} + +// Move a metadata address into a register. +void MacroAssembler::mov_metadata(Register dst, Metadata* obj) { + int oop_index; + if (obj) { + oop_index = oop_recorder()->find_index(obj); + } else { + oop_index = oop_recorder()->allocate_metadata_index(obj); + } + RelocationHolder rspec = metadata_Relocation::spec(oop_index); + relocate(rspec); + prepare_patch_li48(dst, (long)(obj)); +} + +void MacroAssembler::reset_last_Java_frame(bool clear_fp) { + reset_last_Java_frame(rthread, clear_fp); +} + +/*void MacroAssembler::set_last_Java_frame(Register last_java_sp, + Register last_java_fp, + address last_java_pc) {ShouldNotReachHere(); + // determine last_java_sp register + if (!last_java_sp->is_valid()) { + last_java_sp = esp; + } + + Register thread = rthread; + // last_java_fp is optional + if (last_java_fp->is_valid()) { + std(last_java_fp, Address(thread, JavaThread::last_Java_fp_offset())); + } + + // last_java_pc is optional + if (last_java_pc != NULL) { + relocate(relocInfo::internal_word_type); +// movptr(AT, (long)last_java_pc); + sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); + } + + std(last_java_sp, Address(thread, JavaThread::last_Java_sp_offset())); +}*/ +void MacroAssembler::set_last_Java_frame(Register last_java_sp, + Register last_java_fp, + address last_java_pc, Register scratch) { + // determine last_java_sp register + if (!last_java_sp->is_valid()) { + last_java_sp = esp; + } + + Register thread = rthread; + // last_java_fp is optional + if (last_java_fp->is_valid()) { + stptr(last_java_fp, Address(thread, JavaThread::last_Java_fp_offset()), scratch); + } + + // last_java_pc is optional + if (last_java_pc != NULL) { + relocate(relocInfo::internal_word_type); + prepare_patch_li48(scratch, (long)last_java_pc); + stptr(scratch, Address(rthread, + JavaThread::frame_anchor_offset() + + JavaFrameAnchor::last_Java_pc_offset())); + } + + stptr(last_java_sp, Address(thread, JavaThread::last_Java_sp_offset()), scratch); + +} +//TODO:delete we don't need this edition jzy +/*void MacroAssembler::set_last_Java_frame(Register last_java_sp, + Register last_java_fp, + Register last_java_pc, + Register scratch) { + // determine last_java_sp register + if (!last_java_sp->is_valid()) { + last_java_sp = esp; + } + + Register thread = rthread; + // last_java_fp is optional + if (last_java_fp->is_valid()) { + stptr(last_java_fp, Address(thread, JavaThread::last_Java_fp_offset()), scratch); + } + + // last_java_pc is optional + if (last_java_pc->is_valid()) {Unimplemented(); + stptr(last_java_pc, Address(rthread, + JavaThread::frame_anchor_offset() + + JavaFrameAnchor::last_Java_pc_offset()), scratch); + } + + stptr(last_java_sp, Address(thread, JavaThread::last_Java_sp_offset()), scratch); +}*/ + + +void MacroAssembler::set_last_Java_frame(Register last_java_sp, + Register last_java_fp, + Label &L, + Register scratch, Register scratch2) {BLOCK_COMMENT("MacroAssembler::set_last_Java_frame enter"); + //br scratch,0; + //add scratch,0,scratch; this instruction need patch TODO:check jzy + assert_different_registers(scratch, scratch2); + br(scratch, 0); + int offset = 0; + if (L.is_bound()) {assert(false, "TODO:should check jzy"); + offset = (target(L) - pc()/*add instruction*/) >> 2; + } else { + L.add_patch_at(code(), locator()); + } + ldi(scratch, offset, scratch);//immediate need special flag when patch? jzy + stptr(scratch, Address(rthread, + JavaThread::frame_anchor_offset() + + JavaFrameAnchor::last_Java_pc_offset()), scratch2); + + set_last_Java_frame(last_java_sp, last_java_fp, NULL, scratch); BLOCK_COMMENT("MacroAssembler::set_last_Java_frame leave"); +} + +static void pass_arg0(MacroAssembler* masm, Register arg) { + if (c_rarg0 != arg ) { + masm->movl(c_rarg0, arg); + } +} + +static void pass_arg1(MacroAssembler* masm, Register arg) { + if (c_rarg1 != arg ) { + masm->movl(c_rarg1, arg); + } +} + +static void pass_arg2(MacroAssembler* masm, Register arg) { + if (c_rarg2 != arg ) { + masm->movl(c_rarg2, arg); + } +} + +void MacroAssembler::stop(const char* msg) {SCOPEMARK_NAME(MacroAssembler::stop, this); + // pusha(); // get regs on stack + if (ShowMessageBoxOnError) { + address rip = pc(); + lea(c_rarg1, InternalAddress(rip)); + } +// movq(c_rarg2, rsp); // pass pointer to regs array +// andq(rsp, -16); // align stack as required by ABI + lea(c_rarg0, ExternalAddress((address) msg)); + call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug64))); + brk(17); +} + +void MacroAssembler::debug_stop(const char* msg) {block_comment("debug_stop { "); + ldi(rscratch1_GP, 0); + beq(rscratch1_GP, -1);block_comment("debug_stop } "); +} + +void MacroAssembler::warn(const char* msg) { + warning("warning: %s", msg); +} + +void MacroAssembler::align(int modulus) { + while (offset() % modulus != 0) nop(); +} + +// tmp_reg1 and tmp_reg2 should be saved outside of atomic_inc32 (caller saved). +void MacroAssembler::atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2) { + Label again; + SizedScope sc(this, 64); + mov(tmp_reg1, counter_addr); + memb(); + bind(again); + lldw(tmp_reg2, 0, tmp_reg1); + ldi(GP, 1, R0); + wr_f(GP); + addl(tmp_reg2, inc, tmp_reg2); + move(AT, tmp_reg2); + align(8); // must align + lstw(AT, 0, tmp_reg1); + rd_f(AT); + beq_l(AT, again); +} + +void MacroAssembler::atomic_incw(Register counter_addr, Register tmp, Register tmp2) { + ShouldNotReachHere(); +} + +// Writes to stack successive pages until offset reached to check for +// stack overflow + shadow pages. This clobbers tmp. +void MacroAssembler::bang_stack_size(Register size, Register tmp) {SCOPEMARK_NAME(bang_stack_size, this) + assert_different_registers(tmp, size, rscratch4); + movl(tmp, esp); + // Bang stack for total size given plus shadow page size. + // Bang one page at a time because large size can bang beyond yellow and + // red zones. + Label loop; + bind(loop); + mov_immediate64(rscratch4, (-os::vm_page_size())); + stw(size, Address(tmp, rscratch4)); + //mov_immediate64(rscratch4, os::vm_page_size()); + addptr(tmp, rscratch4, tmp); + addptr(size, rscratch4, size); + jcc(Assembler::greater, loop, size); + + // Bang down shadow pages too. + // At this point, (tmp-0) is the last address touched, so don't + // touch it again. (It was touched as (tmp-pagesize) but then tmp + // was post-decremented.) Skip this address by starting at i=1, and + // touch a few more pages below. N.B. It is important to touch all + // the way down including all pages in the shadow zone. + for (int i = 1; i < ((int)StackOverflow::stack_shadow_zone_size() / os::vm_page_size()); i++) { + // this could be any sized move but this is can be a debugging crumb + // so the bigger the better. + mov_immediate64(rscratch4, (-i*os::vm_page_size())); + stptr(size, Address(tmp, rscratch4)); + } +} + +void MacroAssembler::reserved_stack_check() { + // testing if reserved zone needs to be enabled + Label no_reserved_zone_enabling; + Register thread = rthread; + + cmpptr(esp, Address(thread, JavaThread::reserved_stack_activation_offset())); + jcc(Assembler::below, no_reserved_zone_enabling); + + call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), thread); + jump(RuntimeAddress(StubRoutines::throw_delayed_StackOverflowError_entry())); + should_not_reach_here("throw_delayed_StackOverflowError_entry"); + + bind(no_reserved_zone_enabling); +} + +void MacroAssembler::biased_locking_enter(Register lock_reg, + Register obj_reg, + Register swap_reg, + Register tmp_reg, + bool swap_reg_contains_mark, + Label& done, + Label* slow_case, + BiasedLockingCounters* counters) { + assert(UseBiasedLocking, "why call this otherwise?"); + assert(tmp_reg != noreg, "tmp_reg must be supplied"); + assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg); + assert(markWord::age_shift == markWord::lock_bits + markWord::biased_lock_bits, "biased locking makes assumptions about bit layout"); + Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); + + if (PrintBiasedLockingStatistics && counters == NULL) { + counters = BiasedLocking::counters(); + } + // Biased locking + // See whether the lock is currently biased toward our thread and + // whether the epoch is still valid + // Note that the runtime guarantees sufficient alignment of JavaThread + // pointers to allow age to be placed into low bits + // First check to see whether biasing is even enabled for this object + Label cas_label; + if (!swap_reg_contains_mark) { + ldptr(swap_reg, mark_addr); + } + bis(R0, swap_reg, tmp_reg); + andptr(tmp_reg, markWord::biased_lock_mask_in_place, tmp_reg); + cmpptr(tmp_reg, markWord::biased_lock_pattern); + jcc(Assembler::notEqual, cas_label); + // The bias pattern is present in the object's header. Need to check + // whether the bias owner and the epoch are both still current. + load_prototype_header(tmp_reg, obj_reg); + orptr(tmp_reg, rthread, tmp_reg); + xorptr(swap_reg, tmp_reg, tmp_reg); + Register header_reg = tmp_reg; + andptr(header_reg, ~((int) markWord::age_mask_in_place), header_reg); + move(rcc, header_reg); + if (counters != NULL) { + Label L; + jcc(Assembler::notZero, L, header_reg); + atomic_inc32((address)counters->biased_lock_entry_count_addr(), 1, rscratch1, rscratch2); + jmp(done); + bind(L); + } else { + jcc(Assembler::equal, done, header_reg); + } + + Label try_revoke_bias; + Label try_rebias; + + // At this point we know that the header has the bias pattern and + // that we are not the bias owner in the current epoch. We need to + // figure out more details about the state of the header in order to + // know what operations can be legally performed on the object's + // header. + + // If the low three bits in the xor result aren't clear, that means + // the prototype header is no longer biased and we have to revoke + // the bias on this object. + testptr(header_reg, markWord::biased_lock_mask_in_place); + jcc(Assembler::notZero, try_revoke_bias); + + // Biasing is still enabled for this data type. See whether the + // epoch of the current bias is still valid, meaning that the epoch + // bits of the mark word are equal to the epoch bits of the + // prototype header. (Note that the prototype header's epoch bits + // only change at a safepoint.) If not, attempt to rebias the object + // toward the current thread. Note that we must be absolutely sure + // that the current epoch is invalid in order to do this because + // otherwise the manipulations it performs on the mark word are + // illegal. + testptr(header_reg, markWord::epoch_mask_in_place); + jcc(Assembler::notZero, try_rebias); + + // The epoch of the current bias is still valid but we know nothing + // about the owner; it might be set or it might be clear. Try to + // acquire the bias of the object using an atomic operation. If this + // fails we will go in to the runtime to revoke the object's bias. + // Note that we first construct the presumed unbiased header so we + // don't accidentally blow away another thread's valid bias. + andptr(swap_reg, + markWord::biased_lock_mask_in_place | markWord::age_mask_in_place | markWord::epoch_mask_in_place, + swap_reg); + bis(R0, swap_reg, tmp_reg); + orptr(tmp_reg, rthread, tmp_reg); + cmpxchg(tmp_reg, mark_addr, swap_reg); + // If the biasing toward our thread failed, this means that + // another thread succeeded in biasing it toward itself and we + // need to revoke that bias. The revocation will occur in the + // interpreter runtime in the slow case. + if (slow_case != NULL) { + jcc(Assembler::failed, *slow_case); + } + if (counters != NULL) { + Label L; + jcc(Assembler::success, L); + atomic_inc32((address)counters->anonymously_biased_lock_entry_count_addr(), 1, rscratch1, rscratch2); + BIND(L); + } + jmp(done); + + bind(try_rebias); + // At this point we know the epoch has expired, meaning that the + // current "bias owner", if any, is actually invalid. Under these + // circumstances _only_, we are allowed to use the current header's + // value as the comparison value when doing the cas to acquire the + // bias in the current epoch. In other words, we allow transfer of + // the bias from one thread to another directly in this situation. + // + // FIXME: due to a lack of registers we currently blow away the age + // bits in this situation. Should attempt to preserve them. + load_prototype_header(tmp_reg, obj_reg); + orptr(tmp_reg, rthread, tmp_reg); + cmpxchg(tmp_reg, mark_addr, swap_reg); + // If the biasing toward our thread failed, then another thread + // succeeded in biasing it toward itself and we need to revoke that + // bias. The revocation will occur in the runtime in the slow case. + if (slow_case != NULL) { + jcc(Assembler::failed, *slow_case); + } + if (counters != NULL) { + Label L; + jcc(Assembler::success, L); + atomic_inc32((address) counters->rebiased_lock_entry_count_addr(), 1, rscratch1, rscratch2); + BIND(L); + } + jmp(done); + + bind(try_revoke_bias); + // The prototype mark in the klass doesn't have the bias bit set any + // more, indicating that objects of this data type are not supposed + // to be biased any more. We are going to try to reset the mark of + // this object to the prototype value and fall through to the + // CAS-based locking scheme. Note that if our CAS fails, it means + // that another thread raced us for the privilege of revoking the + // bias of this particular object, so it's okay to continue in the + // normal locking code. + // + // FIXME: due to a lack of registers we currently blow away the age + // bits in this situation. Should attempt to preserve them. + load_prototype_header(tmp_reg, obj_reg); + cmpxchg(tmp_reg, mark_addr, swap_reg); + // Fall through to the normal CAS-based lock, because no matter what + // the result of the above CAS, some thread must have succeeded in + // removing the bias bit from the object's header. + jcc(Assembler::failed, cas_label, AT); + if (counters != NULL) { + atomic_inc32((address) counters->revoked_lock_entry_count_addr(), 1, rscratch1, rscratch2); + } + + bind(cas_label); + +} + +void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) { + assert(UseBiasedLocking, "why call this otherwise?"); + + // Check for biased locking unlock case, which is a no-op + // Note: we do not have to check the thread ID for two reasons. + // First, the interpreter checks for IllegalMonitorStateException at + // a higher level. Second, if the bias was revoked while we held the + // lock, the object could not be rebiased toward another thread, so + // the bias bit would be clear. + ldl(temp_reg, oopDesc::mark_offset_in_bytes(), obj_reg); + andi(temp_reg, markWord::biased_lock_mask_in_place, temp_reg); + addiu(R0, markWord::biased_lock_pattern, AT); + cmpptr(AT, temp_reg); + jcc(Assembler::equal, done); +} + +#ifdef COMPILER2 +// tmp_reg1 and tmp_reg2 should be saved outside of atomic_inc32 (caller saved). +void MacroAssembler::atomic_incw(AddressLiteral counter_addr, int inc, Register tmp_reg1) { + Label again; + assert_different_registers(tmp_reg1, rscratch1_GP, rscratch2_AT); + assert(Assembler::operand_valid_for_simple_type_instruction_immediate(inc), "exceed limit"); + Register tmp_reg2 = rscratch2_AT;//TODO:check we donot need tmp_reg2 jzy + SizedScope sc(this, 64); + mov_immediate64(tmp_reg1, (intptr_t)counter_addr.target(), counter_addr.rspec()); + memb(); + bind(again); + lldw(tmp_reg2, 0, tmp_reg1); + ldi(rscratch1_GP, 1, R0); + wr_f(rscratch1_GP); + addl(tmp_reg2, inc, tmp_reg2); + move(rscratch2_AT, tmp_reg2); + align(8); // must align + lstw(rscratch2_AT, 0, tmp_reg1); + rd_f(rscratch2_AT); + beq_l(rscratch2_AT, again); +} +// Fast_Lock and Fast_Unlock used by C2 + +// Because the transitions from emitted code to the runtime +// monitorenter/exit helper stubs are so slow it's critical that +// we inline both the stack-locking fast-path and the inflated fast path. +// +// See also: cmpFastLock and cmpFastUnlock. +// +// What follows is a specialized inline transliteration of the code +// in slow_enter() and slow_exit(). If we're concerned about I$ bloat +// another option would be to emit TrySlowEnter and TrySlowExit methods +// at startup-time. These methods would accept arguments as +// (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure +// indications in the icc.ZFlag. Fast_Lock and Fast_Unlock would simply +// marshal the arguments and emit calls to TrySlowEnter and TrySlowExit. +// In practice, however, the # of lock sites is bounded and is usually small. +// Besides the call overhead, TrySlowEnter and TrySlowExit might suffer +// if the processor uses simple bimodal branch predictors keyed by EIP +// Since the helper routines would be called from multiple synchronization +// sites. +// +// An even better approach would be write "MonitorEnter()" and "MonitorExit()" +// in java - using j.u.c and unsafe - and just bind the lock and unlock sites +// to those specialized methods. That'd give us a mostly platform-independent +// implementation that the JITs could optimize and inline at their pleasure. +// Done correctly, the only time we'd need to cross to native could would be +// to park() or unpark() threads. We'd also need a few more unsafe operators +// to (a) prevent compiler-JIT reordering of non-volatile accesses, and +// (b) explicit barriers or fence operations. +// +// TODO: +// +// * Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr). +// This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals. +// Given TLAB allocation, Self is usually manifested in a register, so passing it into +// the lock operators would typically be faster than reifying Self. +// +// * Ideally I'd define the primitives as: +// fast_lock (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED. +// fast_unlock (nax Obj, EAX box, nax tmp) where box and tmp are KILLED +// Unfortunately ADLC bugs prevent us from expressing the ideal form. +// Instead, we're stuck with a rather awkward and brittle register assignments below. +// Furthermore the register assignments are overconstrained, possibly resulting in +// sub-optimal code near the synchronization site. +// +// * Eliminate the sp-proximity tests and just use "== Self" tests instead. +// Alternately, use a better sp-proximity test. +// +// * Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value. +// Either one is sufficient to uniquely identify a thread. +// TODO: eliminate use of sp in _owner and use get_thread(tr) instead. +// +// * Intrinsify notify() and notifyAll() for the common cases where the +// object is locked by the calling thread but the waitlist is empty. +// avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll(). +// +// * use jccb and jmpb instead of jcc and jmp to improve code density. +// But beware of excessive branch density on AMD Opterons. +// +// * Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success +// or failure of the fast-path. If the fast-path fails then we pass +// control to the slow-path, typically in C. In Fast_Lock and +// Fast_Unlock we often branch to DONE_LABEL, just to find that C2 +// will emit a conditional branch immediately after the node. +// So we have branches to branches and lots of ICC.ZF games. +// Instead, it might be better to have C2 pass a "FailureLabel" +// into Fast_Lock and Fast_Unlock. In the case of success, control +// will drop through the node. ICC.ZF is undefined at exit. +// In the case of failure, the node will branch directly to the +// FailureLabel + + +// obj: object to lock +// box: on-stack box address (displaced header location) - KILLED +// rax,: tmp -- KILLED +// scr: tmp -- KILLED +void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg, + Register scrReg, Register cx1Reg, Register cx2Reg, + BiasedLockingCounters* counters, + Metadata* method_data, + bool use_rtm, bool profile_rtm) { + // Ensure the register assignments are disjoint + assert(tmpReg == V0, ""); + use_rtm = false; + profile_rtm = false; + + if (use_rtm) {//TODO:sw doesnot need this, we should delete this code jzy + assert_different_registers(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg); + } else { + assert(cx1Reg == noreg, ""); + assert(cx2Reg == noreg, ""); + assert_different_registers(objReg, boxReg, tmpReg, scrReg); + } + + if (counters != NULL) { + atomic_incw(ExternalAddress((address)counters->total_entry_count_addr()), 1, rscratch3);//TODO:swjdk8 use OK? jzy + } + + // Possible cases that we'll encounter in fast_lock + // ------------------------------------------------ + // * Inflated + // -- unlocked + // -- Locked + // = by self + // = by other + // * biased + // -- by Self + // -- by other + // * neutral + // * stack-locked + // -- by self + // = sp-proximity test hits + // = sp-proximity test generates false-negative + // -- by other + // + + Label IsInflated, DONE_LABEL; + + if (DiagnoseSyncOnValueBasedClasses != 0) { + load_klass(tmpReg, objReg); + ldw(tmpReg, Address(tmpReg, Klass::access_flags_offset())); + testw(tmpReg, JVM_ACC_IS_VALUE_BASED_CLASS); + jcc(Assembler::notZero, DONE_LABEL); + } //dx + + // it's stack-locked, biased or neutral + // TODO: optimize away redundant LDs of obj->mark and improve the markword triage + // order to reduce the number of conditional branches in the most common cases. + // Beware -- there's a subtle invariant that fetch of the markword + // at [FETCH], below, will never observe a biased encoding (*101b). + // If this invariant is not held we risk exclusion (safety) failure. + if (UseBiasedLocking && !UseOptoBiasInlining) { + biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, counters); + } + + ldptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // [FETCH] + testptr(tmpReg, markWord::monitor_value); // inflated vs stack-locked|neutral|biased + jcc(Assembler::notZero, IsInflated); + + // Attempt stack-locking ... + orptr (tmpReg, markWord::unlocked_value, tmpReg); + stptr(tmpReg, Address(boxReg, 0)); // Anticipate successful CAS + memb(); + cmpxchg(boxReg, Address(objReg, oopDesc::mark_offset_in_bytes()), tmpReg); // Updates tmpReg + //mov_immediate32(rcc, 1); + //cmove(Assembler::success, rcc, R0, rcc, rscratch2_AT); //TODO:refactor we should refactor cmpxchg, not add this instruction jzy + + if (counters != NULL) { + Label skip; + jcc(Assembler::notZero, skip);//failed + atomic_incw(ExternalAddress((address)counters->fast_path_entry_count_addr()), 1, rscratch3); + bind(skip); + } + jcc(Assembler::zero, DONE_LABEL); // Success + + // Recursive locking. + // The object is stack-locked: markword contains stack pointer to BasicLock. + // Locked by current thread if difference with current SP is less than one page. + subptr(tmpReg, esp, tmpReg); + // Next instruction set ZFlag == 1 (Success) if difference is less then one page. + mov_immediate64(rscratch3, (intptr_t)(7 - os::vm_page_size() )); + andptr(tmpReg, rscratch3, tmpReg);//TODO:which value? jzy + move(rcc, tmpReg); + stptr(tmpReg, Address(boxReg, 0)); + if (counters != NULL) { + Label skip; + jcc(Assembler::notEqual, skip); + atomic_incw(ExternalAddress((address)counters->fast_path_entry_count_addr()), 1, rscratch3); + bind(skip); + } + jmp(DONE_LABEL); + + bind(IsInflated); + // The object is inflated. tmpReg contains pointer to ObjectMonitor* + markWord::monitor_value + + // It's inflated + movl(scrReg, tmpReg); + movl(tmpReg, R0); + + memb(); + + cmpxchg(rthread, Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), tmpReg); + //mov_immediate32(rcc, 1); + //cmove(Assembler::success, rcc, R0, rcc, AT); //TODO:refactor we should refactor cmpxchg, not add this instruction jzy + // Unconditionally set box->_displaced_header = markWord::unused_mark(). + // Without cast to int32_t movptr will destroy r10 which is typically obj. + mov_immediate32s(rscratch3, (int32_t)intptr_t(markWord::unused_mark().value())); + stl(rscratch3, Address(boxReg, 0)); + // Intentional fall-through into DONE_LABEL ... + // Propagate ICC.ZF from CAS above into DONE_LABEL. + + // DONE_LABEL is a hot target - we'd really like to place it at the + // start of cache line by padding with NOPs. + // See the AMD and Intel software optimization manuals for the + // most efficient "long" NOP encodings. + // Unfortunately none of our alignment mechanisms suffice. + bind(DONE_LABEL); + + // At DONE_LABEL the icc ZFlag is set as follows ... + // Fast_Unlock uses the same protocol. + // ZFlag == 1 -> Success + // ZFlag == 0 -> Failure - force control through the slow-path + +} + +// obj: object to unlock +// box: box address (displaced header location), killed. Must be EAX. +// tmp: killed, cannot be obj nor box. +// +// Some commentary on balanced locking: +// +// Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites. +// Methods that don't have provably balanced locking are forced to run in the +// interpreter - such methods won't be compiled to use fast_lock and fast_unlock. +// The interpreter provides two properties: +// I1: At return-time the interpreter automatically and quietly unlocks any +// objects acquired the current activation (frame). Recall that the +// interpreter maintains an on-stack list of locks currently held by +// a frame. +// I2: If a method attempts to unlock an object that is not held by the +// the frame the interpreter throws IMSX. +// +// Lets say A(), which has provably balanced locking, acquires O and then calls B(). +// B() doesn't have provably balanced locking so it runs in the interpreter. +// Control returns to A() and A() unlocks O. By I1 and I2, above, we know that O +// is still locked by A(). +// +// The only other source of unbalanced locking would be JNI. The "Java Native Interface: +// Programmer's Guide and Specification" claims that an object locked by jni_monitorenter +// should not be unlocked by "normal" java-level locking and vice-versa. The specification +// doesn't specify what will occur if a program engages in such mixed-mode locking, however. +// Arguably given that the spec legislates the JNI case as undefined our implementation +// could reasonably *avoid* checking owner in Fast_Unlock(). +// In the interest of performance we elide m->Owner==Self check in unlock. +// A perfectly viable alternative is to elide the owner check except when +// Xcheck:jni is enabled. + +void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg, bool use_rtm) {SCOPEMARK_NAME(MacroAssembler::fast_unlock, this) + assert(boxReg == V0, ""); + assert_different_registers(objReg, boxReg, tmpReg); + + Label DONE_LABEL, Stacked, CheckSucc; + + // Critically, the biased locking test must have precedence over + // and appear before the (box->dhw == 0) recursive stack-lock test. + if (UseBiasedLocking && !UseOptoBiasInlining) { + biased_locking_exit(objReg, tmpReg, DONE_LABEL); + } + + cmpptr(Address(boxReg, 0), R0); // Examine the displaced header + jcc (Assembler::zero, DONE_LABEL); // 0 indicates recursive stack-lock + ldptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Examine the object's markword + testptr(tmpReg, markWord::monitor_value); // Inflated? + jcc (Assembler::zero, Stacked); + + // Despite our balanced locking property we still check that m->_owner == Self + // as java routines or native JNI code called by this thread might + // have released the lock. + // Refer to the comments in synchronizer.cpp for how we might encode extra + // state in _succ so we can avoid fetching EntryList|cxq. + // + // I'd like to add more cases in fast_lock() and fast_unlock() -- + // such as recursive enter and exit -- but we have to be wary of + // I$ bloat, T$ effects and BP$ effects. + // + // If there's no contention try a 1-0 exit. That is, exit without + // a costly MEMBAR or CAS. See synchronizer.cpp for details on how + // we detect and recover from the race that the 1-0 exit admits. + // + // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier + // before it STs null into _owner, releasing the lock. Updates + // to data protected by the critical section must be visible before + // we drop the lock (and thus before any other thread could acquire + // the lock and observe the fields protected by the lock). + // IA32's memory-model is SPO, so STs are ordered with respect to + // each other and there's no need for an explicit barrier (fence). + // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html. +#ifndef _LP64 + +#else // _LP64 + + xorptr(boxReg, rthread, boxReg); + ldptr(rscratch3, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions))); + orptr(boxReg, rscratch3, rcc);//result should put in rcc + jcc (Assembler::notZero, DONE_LABEL); + ldptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq))); + ldptr(rscratch3, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList))); + orptr(boxReg, rscratch3, rcc);//result should put in rcc + jcc (Assembler::notZero, CheckSucc); + stptr(R0, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); + jmp (DONE_LABEL); + + + // Try to avoid passing control into the slow_path ... + Label LSuccess, LGoSlowPath ; + bind (CheckSucc); + + // The following optional optimization can be elided if necessary + // Effectively: if (succ == null) goto SlowPath + // The code reduces the window for a race, however, + // and thus benefits performance. + cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), R0); + jcc (Assembler::zero, LGoSlowPath); + + movl(boxReg, R0); + + stptr(R0, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); + + // Memory barrier/fence + // Dekker pivot point -- fulcrum : ST Owner; MEMBAR; LD Succ + // Instead of MFENCE we use a dummy locked add of 0 to the top-of-stack. + // This is faster on Nehalem and AMD Shanghai/Barcelona. + // See https://blogs.oracle.com/dave/entry/instruction_selection_for_volatile_fences + // We might also restructure (ST Owner=0;barrier;LD _Succ) to + // (mov box,0; xchgq box, &m->Owner; LD _succ) . + //lock(); addl(Address(rsp, 0), 0); + memb();//TODO:how to resolve this ? jzy + + cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), R0); + jcc (Assembler::notZero, LSuccess); + + // Rare inopportune interleaving - race. + // The successor vanished in the small window above. + // The lock is contended -- (cxq|EntryList) != null -- and there's no apparent successor. + // We need to ensure progress and succession. + // Try to reacquire the lock. + // If that fails then the new owner is responsible for succession and this + // thread needs to take no further action and can exit via the fast path (success). + // If the re-acquire succeeds then pass control into the slow path. + // As implemented, this latter mode is horrible because we generated more + // coherence traffic on the lock *and* artifically extended the critical section + // length while by virtue of passing control into the slow path. + + // box is really RAX -- the following CMPXCHG depends on that binding + // cmpxchg R,[M] is equivalent to rax = CAS(M,rax,R) + memb(); + cmpxchg(rthread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), boxReg); + //mov_immediate32(rcc, 1); + //cmove(Assembler::success, rcc, R0, rcc, AT); //TODO:refactor we should refactor cmpxchg, not add this instruction jzy + // There's no successor so we tried to regrab the lock. + // If that didn't work, then another thread grabbed the + // lock so we're done (and exit was a success). + jcc (Assembler::notEqual, LSuccess); + // Intentional fall-through into slow-path + + BIND (LGoSlowPath); + mov_immediate32u (rcc, 1); // set ICC.ZF=0 to indicate failure + jmp (DONE_LABEL); + + BIND (LSuccess); + mov_immediate32u (rcc, 0); // set ICC.ZF=1 to indicate success + jmp (DONE_LABEL); + + + BIND (Stacked); + ldptr(tmpReg, Address (boxReg, 0)); // re-fetch + memb(); + cmpxchg(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes()), boxReg); // Uses RAX which is box + assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); + //mov_immediate32(rcc, 1); + //cmove(Assembler::success, rcc, R0, rcc, rscratch2_AT); //TODO:refactor we should refactor cmpxchg, not add this instruction jzy + + +#endif + BIND(DONE_LABEL); + +} +#endif // COMPILER2 +void MacroAssembler::generate_fill(BasicType t, bool aligned, + Register to, Register value, Register count, + Register rtmp) { + //ShortBranchVerifier sbv(this); //sw need this? jzy + assert_different_registers(to, value, count, rtmp); + Label L_exit; + Label L_fill_2_bytes, L_fill_4_bytes; + + int shift = -1; + switch (t) { + case T_BYTE: + shift = 2; + break; + case T_SHORT: + shift = 1; + break; + case T_INT: + shift = 0; + break; + default: ShouldNotReachHere(); + } + + if (t == T_BYTE) { + andw(value, 0xff, value); + movl(rtmp, value); + slll(rtmp, 8, rtmp); + orw(value, rtmp, value); + } + if (t == T_SHORT) { + andw(value, 0xffff, value); + } + if (t == T_BYTE || t == T_SHORT) { + movw(rtmp, value); + slll(rtmp, 16, rtmp); + orw(value, rtmp, value); + } + + cmpw(count, 2<is_valid()) { + java_thread = rthread; + } + + // determine last_java_sp register + if (!last_java_sp->is_valid()) { + last_java_sp = esp; + } + + // debugging support + assert(number_of_arguments >= 0 , "cannot have negative number of arguments"); + assert(number_of_arguments <= 4 , "6 - rthread - ?? "); + assert(java_thread == rthread , "unexpected register"); +#ifdef ASSERT + // TraceBytecodes does not use r12 but saves it over the call, so don't verify + // r12 is the heapbase. + if (UseCompressedOops && !TraceBytecodes) verify_heapbase("call_VM_base: heap base corrupted?"); +#endif // ASSERT + + movl(c_rarg0, rthread); + + // set last Java frame before call + assert(last_java_sp != rfp, "can't use ebp/rbp"); + + // set last Java frame before call + address before_call_pc = (address)pc(); + set_last_Java_frame(last_java_sp, rfp, before_call_pc, rscratch1); +// Label l; +// set_last_Java_frame(last_java_sp, rfp, l, rscratch1); + // do the call +// MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments, &l); +// call(entry_point, relocInfo::runtime_call_type); + call(RuntimeAddress(entry_point)); + // reset last Java frame + reset_last_Java_frame(java_thread, true); // + + check_and_handle_popframe(java_thread); + check_and_handle_earlyret(java_thread); + + if (check_exceptions) { + // check for pending exceptions (java_thread is set upon return) + cmpptr(Address(java_thread, Thread::pending_exception_offset()), (int32_t) NULL_WORD); + + // This used to conditionally jump to forward_exception however it is + // possible if we relocate that the branch will not reach. So we must jump + // around so we can always reach + + Label ok; + jcc(Assembler::equal, ok); + jump(RuntimeAddress(StubRoutines::forward_exception_entry())); + bind(ok); + } + + // get oop result if there is one and reset the value in the thread + if (oop_result->is_valid()) { + get_vm_result(oop_result, java_thread); + } +} + +void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {SCOPEMARK_NAME(MacroAssembler::call_VM_helper, this) + bis(R0, esp, V0); + call_VM_base(oop_result, noreg, V0, entry_point, number_of_arguments, check_exceptions); +} + +// Use this method when MacroAssembler version of call_VM_leaf_base() should be called from Interpreter. +void MacroAssembler::call_VM_leaf0(address entry_point) { + MacroAssembler::call_VM_leaf_base(entry_point, 0); +} + +void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) { + call_VM_leaf_base(entry_point, number_of_arguments); +} + +void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) { + pass_arg0(this, arg_0); + call_VM_leaf(entry_point, 1); +} + +void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { + assert(arg_0 != c_rarg1, "smashed arg"); + pass_arg1(this, arg_1); + pass_arg0(this, arg_0); + call_VM_leaf(entry_point, 2); +} + +void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, + Register arg_1, Register arg_2) { + assert(arg_0 != c_rarg2, "smashed arg"); + assert(arg_1 != c_rarg2, "smashed arg"); + pass_arg2(this, arg_2); + assert(arg_0 != c_rarg1, "smashed arg"); + pass_arg1(this, arg_1); + pass_arg0(this, arg_0); + call_VM_leaf(entry_point, 3); +} + +void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) { + pass_arg0(this, arg_0); + MacroAssembler::call_VM_leaf_base(entry_point, 1); +} + +void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { + assert(arg_0 != c_rarg1, "smashed arg"); + pass_arg1(this, arg_1); + pass_arg0(this, arg_0); + MacroAssembler::call_VM_leaf_base(entry_point, 2); +} + +void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { + assert(arg_0 != c_rarg2, "smashed arg"); + assert(arg_1 != c_rarg2, "smashed arg"); + pass_arg2(this, arg_2); + assert(arg_0 != c_rarg1, "smashed arg"); + pass_arg1(this, arg_1); + pass_arg0(this, arg_0); + MacroAssembler::call_VM_leaf_base(entry_point, 3); +} + +void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) { + ShouldNotReachHere(); +} + +void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) { + ld(oop_result, Address(java_thread, JavaThread::vm_result_offset())); + std(R0, Address(java_thread, JavaThread::vm_result_offset())); + verify_oop_msg(oop_result, "broken oop in call_VM_base"); +} + +void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) { + ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset())); + std(R0, Address(java_thread, JavaThread::vm_result_2_offset())); +} + +// these are no-ops overridden by InterpreterMacroAssembler + +void MacroAssembler::check_and_handle_earlyret(Register java_thread) { } + +void MacroAssembler::check_and_handle_popframe(Register java_thread) { } + +void MacroAssembler::cmpb(Register lh, int imm8, Register ccReg) { + if (is_uimm8(imm8)) { + ldi(ccReg, -imm8, lh); + } else { + Unimplemented(); + } +} + +void MacroAssembler::cmpb(Address addr, int imm8, Register ccReg) { + ldbu(ccReg, addr); + cmpb(ccReg, imm8, ccReg); +} + +void MacroAssembler::cmpab(Address addr, int imm8, Register ccReg) { + memb(); + ldbu(ccReg, addr); + memb(); + cmpb(ccReg, imm8, ccReg); +} + +void MacroAssembler::cmpb(AddressLiteral src1, int imm8, Register ccReg) { + mov_immediate64(ccReg, (intptr_t)src1.target(), src1.rspec()); + ldbu(ccReg, 0, ccReg); + cmpb(ccReg, imm8, ccReg); +} + +void MacroAssembler::cmph(Address addr, int imm16, Register ccReg) { + ldhu(ccReg, addr); + ldi(ccReg, -imm16, ccReg); +} + +/** + * x86 + * Assembler::cmpl(Register dst, int32_t imm32) + * sw64 + * MacroAssembler::cmpw(Register lh, int rh, Register ccReg=rcc) + * note + * lh is signed 64bit int in register. + * rh is a sign extended 64bit int. + */ +void MacroAssembler::cmpw(Register lh, int rh, Register ccReg) { + //if (-(1 << 16-1) <= rh && rh < ( 1 << 16-1)) { + if (Assembler::operand_valid_for_storage_type_instruction_immediate(rh)) { + ldi(ccReg, -rh, lh); + } else { + assert_different_registers(lh, ccReg); + mov_immediate64(ccReg, rh);// 考虑到rh如果是负数会占满64位, 因此要mov_imm64 + subl(lh, ccReg, ccReg); + } +} + +/** + * x86 + * Assembler::cmpl(Register dst, Register src) + * sw64 + * MacroAssembler::cmpw(Register lh, Register rh, Register ccReg=rcc) + * note + * it's a convention that lh and rh are signed extended int in 64bit reg, + */ +void MacroAssembler::cmpw(Register lh, Register rh, Register ccReg) { + assert_different_registers(lh, ccReg); + assert_different_registers(rh, ccReg); + subl(lh, rh, ccReg);// 考虑到lh和rh都是符号扩展到64位的int, 这里用subl肯定不会有下溢或上溢的问题 +} + +/** + * x86 + * Assembler::cmpl(Register dst, Address src) + * sw64 + * MacroAssembler::cmpw(Register lh, Address rh, Register ccReg=rcc) + * note + * lh holds a signed extended 64bit int. + */ +void MacroAssembler::cmpw(Register lh, Address rh, Register ccReg) { + assert_different_registers(lh, ccReg); + ldws(ccReg, rh); + subl(lh, ccReg, ccReg); +} + +/** + * x86 + * Assembler::cmpl(Address dst, int32_t imm32) + * sw64 + * MacroAssembler::cmpw(Address lh, int32_t imm, Register ccReg=rcc, Register tmp=rscratch1) + * note + * imm will be treated as a signed extened 64bit int. + */ +void MacroAssembler::cmpw(Address lh, int32_t imm, Register ccReg, Register tmp) { + assert_different_registers(ccReg, tmp); + ldws(tmp, lh); + if (imm == 0) { + movl(ccReg, tmp); + } else { + mov_immediate64(ccReg, imm); + subl(tmp, ccReg, ccReg); + } +} + +/** + * x86 + * to del + * @param lh + * @param rh + * @param ccReg + */ +void MacroAssembler::cmpw(Address lh, Register rh, Register ccReg) { + cmpw(rh, lh, ccReg); + subl(R0, ccReg, ccReg); +} + +/** + * x86 + * + * sw64 + * MacroAssembler::cmpw(AddressLiteral src1, int32_t imm, Register ccReg=rcc, Register tmp=rscratch1) + * note + * imm is a sign extended 64bit int. + * ccReg and tmp can't be the same reg. + */ +void MacroAssembler::cmpw(AddressLiteral src1, int32_t imm, Register ccReg, Register tmp) { + ldws(tmp, src1); + cmpw(tmp, imm, ccReg); +} + +void MacroAssembler::cmpw(AddressLiteral src1, Register rh, Register ccReg) { + ldws(ccReg, src1); + subl(ccReg, rh, ccReg); +} + +/** + * x86 + * + * sw64 + * + * note + * compare lh and rh as unsigned word + */ +void MacroAssembler::cmpwu(Register lh, Address rh, Register ccReg) { + ldwu(ccReg, rh); + movw(lh, lh); //as unsigned int + subl(lh, ccReg, ccReg); + addw(lh, R0, lh); +} + +void MacroAssembler::cmpl(Register lh, int rh, Register ccReg) { + // yj todo: is ldi ok here? + guarantee(-(1 << 16-1) <= rh && rh < ( 1 << 16-1), "rh value out of simm16"); + ldi(ccReg, -rh, lh); +} + +/** + * x86 + * cmpq(Register dst, Register src) + * sw64 + * cmpl_raw(Register lh, Register rh, Register ccReg=rcc) + * note + * 64bit compare and set result into ccReg. + * just sub lh to rh, don't consider overflow and underflow of the result, use carefully + */ +void MacroAssembler::cmpl_raw(Register lh, Register rh, Register ccReg) { + subl(lh, rh, ccReg); +} + +// use cmpl_raw ASAP +void MacroAssembler::cmpq(Register lh, Register rh, Register ccReg) { + set_cmp_insn_mark(lh, rh, true); + subl(lh, rh, ccReg); +} + +//similar to cmpl +void MacroAssembler::cmpUL(Register lh, Register rh, Register ccReg) { + set_cmp_insn_mark(lh, rh, true); + cmpule(rh, lh, ccReg); + subl(ccReg, 0x1, ccReg); +} + +void MacroAssembler::set_cmp_insn_mark(Register lh, Register rh, bool lcmp) { + cmp_insn_mark = pc(); + cmp_lh = lh; + cmp_rh = rh; + cmp_long = lcmp; +} + +void MacroAssembler::clear_cmp_insn_mark() { + cmp_insn_mark = NULL; + cmp_lh = noreg; + cmp_rh = noreg; + cmp_long = false; +} + +bool MacroAssembler::cmp_insn_marked() { + return cmp_insn_mark != NULL; +} + +// beside cmp, there can be test before jcc or nothing, and sub/add can set cc too +void MacroAssembler::jccb(Condition cc, Label& L) { + switch(cc) { + case equal: +// case zero: +// case carryClear: + beq_l(rcc, L); + clear_cmp_insn_mark(); + break; + case notEqual: +// case notZero: +// case carrySet: + bne_l(rcc, L); + clear_cmp_insn_mark(); + break; + case greaterEqual: + if (cmp_insn_marked() && cmp_long) { + InstructionMark mark(this); + code_section()->set_end(cmp_insn_mark); + cmple(cmp_rh, cmp_lh, rcc); + code_section()->set_end(inst_mark()); + bne_l(rcc, L); + } else + bge_l(rcc, L); + clear_cmp_insn_mark(); + break; + case notNegative: + bge_l(rcc, L); + clear_cmp_insn_mark(); + break; + case aboveEqual:// unsigned >= + if (cmp_insn_marked()) { + InstructionMark mark(this); + code_section()->set_end(cmp_insn_mark); + cmpule(cmp_rh, cmp_lh, rcc); + code_section()->set_end(inst_mark()); + bne_l(rcc, L); + } else + bge_l(rcc, L); + clear_cmp_insn_mark(); + break; + case greater: + if (cmp_insn_marked() && cmp_long) { + InstructionMark mark(this); + code_section()->set_end(cmp_insn_mark); + cmplt(cmp_rh, cmp_lh, rcc); + code_section()->set_end(inst_mark()); + bne_l(rcc, L); + } else + bgt_l(rcc, L); + clear_cmp_insn_mark(); + break; + case positive: + bgt_l(rcc, L); + clear_cmp_insn_mark(); + break; + case above:// unsigned > + if (cmp_insn_marked()) { + InstructionMark mark(this); + code_section()->set_end(cmp_insn_mark); + cmpult(cmp_rh, cmp_lh, rcc); + code_section()->set_end(inst_mark()); + bne_l(rcc, L); + } else + bgt_l(rcc, L); + clear_cmp_insn_mark(); + break; + case lessEqual: + if (cmp_insn_marked() && cmp_long) { + InstructionMark mark(this); + code_section()->set_end(cmp_insn_mark); + cmple(cmp_lh, cmp_rh, rcc); + code_section()->set_end(inst_mark()); + bne_l(rcc, L); + } else + ble_l(rcc, L); + clear_cmp_insn_mark(); + break; + case belowEqual: //unsigned <= + if (cmp_insn_marked()) { + InstructionMark mark(this); + code_section()->set_end(cmp_insn_mark); + cmpule(cmp_lh, cmp_rh, rcc); + code_section()->set_end(inst_mark()); + bne_l(rcc, L); + } else + ble_l(rcc, L); + clear_cmp_insn_mark(); + break; + case less: + if (cmp_insn_marked() && cmp_long) { + InstructionMark mark(this); + code_section()->set_end(cmp_insn_mark); + cmplt(cmp_lh, cmp_rh, rcc); + code_section()->set_end(inst_mark()); + bne_l(rcc, L); + } else + blt_l(rcc, L); + clear_cmp_insn_mark(); + break; + case below: // unsigned < + if (cmp_insn_marked()) { + InstructionMark mark(this); + code_section()->set_end(cmp_insn_mark); + cmpult(cmp_lh, cmp_rh, rcc); + code_section()->set_end(inst_mark()); + bne_l(rcc, L); + } else + blt_l(rcc, L); + clear_cmp_insn_mark(); + break; + case negative: + blt_l(rcc, L); + clear_cmp_insn_mark(); + break; + default: + Unimplemented(); + } +} + +/** + * x86 + * cmpq(Register dst, Register src) + * sw64 + * cmpl(Register lh, Register rh, Register ccReg=rcc) + * note + * 64bit compare and set result into ccReg. + * just sub lh to rh can cause overflow or underflow of the result, so this compare is more complex. + */ +void MacroAssembler::cmpl(Register lh, Register rh, Register ccReg) { + assert_different_registers(lh, ccReg); + assert_different_registers(rh, ccReg); + xorptr(lh, rh, ccReg); //check sign + bge(ccReg, 2);// if same sign, just sub + selge(lh, 1, lh, ccReg); // if not && lh >= 0, ccReg=1, else ccReg = -1 + Assembler::br(R0, 1); + subl(lh, rh, ccReg); +} + +void MacroAssembler::cmpptr(Register lh, Address rh, Register ccReg) { + ldptr(ccReg, rh); + cmpptr(lh, ccReg, ccReg); +} + +void MacroAssembler::cmpptr(Address lh, Register rh, Register ccReg) { + ldptr(ccReg, lh); + cmpptr(ccReg, rh, ccReg); +} + +void MacroAssembler::cmpptr(Address lh, int32_t rh, Register ccReg) { + ldptr(ccReg, lh); + cmpl(ccReg, rh, ccReg);//TODO:refactor jzy +} + +void MacroAssembler::cmpptr(Register lh, int rh, Register ccReg) { + guarantee(-(1 << 16-1) <= rh && rh < ( 1 << 16-1), "rh value out of simm16"); + ldi(ccReg, -rh, lh); +} + +/** + * x86: + * cmpptr(Register src1, Register src2) + * sw64: + * cmpptr(Register src1, Register src2, Register ccReg=rcc) + * note: + * Sw64 use `rcc` as default compare result reg. + * The result should be consumed by instructions (e.g. `jcc`) ASAP with no interleaving + * instructions (e.g. `stx(reg, Address)`) that will clobber rcc by default. + * Note that ldx(reg, Address) don't need temp reg. + */ +void MacroAssembler::cmpptr(Register src1, Register src2, Register ccReg) { + subl(src1, src2, ccReg); +} + +/** + * x86 + * + * sw64 + * + * note + * + */ +void MacroAssembler::cmpptr(Register lh, AddressLiteral rh, Register ccReg) { + if (rh.is_lval()) { + lea(ccReg, rh); + cmpptr(lh, ccReg, ccReg); + } else { + ldptr(ccReg, rh); + cmpptr(lh, ccReg, ccReg); + } +} + +/** + * x86: + * movq/movl/movw(Register ra, Address addr) + * sw64: + * ldl/ldl_u/ldptr/ldw/ldhu/ldbu(Register ra, Address addr) + * note: + * SW64 don't need temp reg for the load, and ra can be same with addr._base or addr._index. + * + * ldl_u will 8-byte align the addr then load 64bit + * ldl will load 64bit + * ldw will sign extend the 32bit + * ldhu will zero extend the 16bit + * ldbu will zero extend the 8bit + */ +#define LDFROMADDR_DEF(LDX) \ + void MacroAssembler::LDX(Register ra, Address addr) {\ + if (addr.getMode() == Address::base_index_scale_disp) {\ + addr.setTmp(ra);\ + }\ + addr.emit(ra, this, op_##LDX);\ + } + + LDINSNLIST(LDFROMADDR_DEF) + +#undef LDFROMADDR_DEF + +/** + * x86 + * Assembler::movq/movl/movw/movb(Address, addr, Register ra) + * sw64 + * MacroAssembler::stl/stl_u/stw/sth/stb(Register ra, Address addr, Register _tmp=rcc) + * note + * ra can't same with rcc, but can be same with addr._base, or addr._index + */ +#define ST2ADDR_DEF(STX) \ + void MacroAssembler::STX(Register ra, Address addr, Register _tmp) {\ + if (addr.getMode() == Address::base_index_scale_disp) {\ + assert_different_registers(ra, _tmp);\ + addr.setTmp(_tmp);\ + }\ + addr.emit(ra, this, op_##STX);\ + } + + STINSNLIST(ST2ADDR_DEF) + +#undef ST2ADDR_DEF + +#define ADDR_DEF(FLOATINSN) \ + void MacroAssembler::FLOATINSN(FloatRegister fa, Address addr, Register _tmp) {\ + if (addr.getMode() == Address::base_index_scale_disp) {\ + addr.setTmp(_tmp);\ + }\ + addr.emit(fa, this, op_##FLOATINSN);\ + } + + FLOATINSNLIST(ADDR_DEF) + +#undef ADDR_DEF + +void MacroAssembler::cmpoop(Register src1, Register src2, Register ccReg) { + cmpptr(src1, src2, ccReg); +} + +static void change_cmxchg_result(MacroAssembler* _masm) { + _masm->mov_immediate32(rcc, 1); + _masm->cmove(Assembler::success, rcc, R0, rcc, AT); //TODO:refactor we should refactor cmpxchg, not add this instruction jzy +} +//todo scw +// if c_reg == content(dest) { c_reg = dest ; store x_reg to dest;} else { c_reg = dest; } +void MacroAssembler::cmpxchg(Register x_reg, Address dest, Register c_reg) {SCOPEMARK_NAME(MacroAssembler::cmpxchg, this); + if (UseSW6B) { + Label done, again, nequal; + + BIND(again); + memb(); + lldl(AT, dest.disp(), dest.base()); + bne_c(AT, c_reg, nequal); + + if(x_reg != AT) bis(R0, x_reg, AT); + lstl(AT, dest.disp(), dest.base()); + beq_l(AT, again); + beq_l(R0, done); + + // not xchged + BIND(nequal); + // memb(); + if(AT != c_reg) bis(R0, AT, c_reg); + bis(R0, R0, AT); + + BIND(done); + ShouldNotReachHere(); + } else { + assert_different_registers(AT, GP, rscratch3, c_reg, dest.base()); + assert_different_registers(AT, GP, rscratch3, x_reg, dest.base()); + SizedScope sc(this, 64); + Label done, again, nequal; + + //subl(esp, 16, esp); + //stl(rscratch3, 0, esp); + + BIND(again); + lldl(rscratch3, dest.disp(), dest.base()); + cmpeq(rscratch3, c_reg, GP); + wr_f(GP); + move(AT, x_reg); + align(8); + lstl(AT, dest.disp(), dest.base()); + rd_f(AT); + beq_l(GP, nequal); + beq_l(AT, again); + // not xchged + BIND(nequal); + //if(rscratch3 != c_reg) bis(R0, rscratch3, c_reg); + bis(R0, rscratch3, c_reg); + + //ldl(rscratch3, 0, esp); + //addl(esp, 16, esp); + //ornot(R0, AT, rcc); + + change_cmxchg_result(this); +// mov_immediate32(rcc, 1); +// cmove(Assembler::success, rcc, R0, rcc, AT); //TODO:refactor we should refactor cmpxchg, not add this instruction jzy + } +} +//todo scw +// if c_reg == content(dest) { Address(dest) = x_reg ;} else { c_reg = content(dest); } +void MacroAssembler::cmpxchg32(Register x_reg, Address dest, Register c_reg) { + if (UseSW6B) { + Label done, again, nequal; + + BIND(again); + memb(); + lldw(AT, dest.disp(), dest.base()); + bne_c(AT, c_reg, nequal); + + if(x_reg != AT) bis(R0, x_reg, AT); + lstw(AT, dest.disp(), dest.base()); + beq_l(AT, again); + beq_l(R0, done); + + // not xchged + BIND(nequal); + // memb(); + if(AT != c_reg) bis(R0, AT, c_reg); + bis(R0, R0, AT); + + BIND(done); + ShouldNotReachHere(); + } else { + assert_different_registers(AT, GP, rscratch3, c_reg, dest.base()); + assert_different_registers(AT, GP, rscratch3, x_reg, dest.base()); + SizedScope sc(this, 64); + Label done, again, nequal; + + //subl(esp, 16, esp); + //stl(rscratch3, 0, esp); + + BIND(again); + lldw(rscratch3, dest.disp(), dest.base()); + cmpeq(rscratch3, c_reg, GP); + wr_f(GP); + move(AT, x_reg); + align(8); + lstw(AT, dest.disp(), dest.base()); + rd_f(AT); + beq_l(GP, nequal); + beq_l(AT, again); + // not xchged + BIND(nequal); + bis(R0, rscratch3, c_reg); + + //ldl(rscratch3, 0, esp); + //addl(esp, 16, esp); + //xor_ins(AT, R0, rcc); + //ornot(R0, AT, rcc); + change_cmxchg_result(this); + //mov_immediate32(rcc, 1); + //cmove(Assembler::success, rcc, R0, rcc, AT); //TODO:refactor we should refactor cmpxchg, not add this instruction jzy + } +} + +void MacroAssembler::fill_to_size(address start, int size) { + if (pc() - start > size) should_not_reach_here("size expanded!"); + while (pc() - start < size) + nop(); +} + +//If oldval == *dest then store newval into dest +void MacroAssembler::storeLcon(Register oldval, Address dest, Register newval) { + //guarantee((dest.index()->encoding() == sp->encoding()), "impossible encoding storeLCon 1"); + //guarantee((dest.disp() == 0), "impossible encoding storeLCon 2"); + //guarantee((dest.index()->encoding() == sp->encoding() && dest.disp() == 0), "impossible encoding storeLCon"); + SizedScope sc(this, 64); + assert_different_registers(AT, GP, oldval, newval, dest.base()); + Label again, failure; + BIND(again); + lldl(AT, dest.disp(), dest.base()); + cmpeq(AT, oldval, GP); + wr_f(GP); + move(AT,newval); + align(8); + lstl(AT, dest.disp(), dest.base()); + rd_f(AT); + beq_l(GP, failure); + beq_l(AT, again); + BIND(failure); + //xor_ins(AT, R0, rcc);//need it ?? + change_cmxchg_result(this); + //mov_immediate32(rcc, 1); + //cmove(Assembler::success, rcc, R0, rcc, AT); //TODO:refactor we should refactor cmpxchg, not add this instruction jzy +} + +void MacroAssembler::storeIcon(Register oldval, Address dest, Register newval) { + //guarantee((dest.index() == sp && dest.disp() == 0), "impossible encoding storeICon"); + SizedScope sc(this, 64); + assert_different_registers(AT, GP, oldval, newval, dest.base()); + Label again, failure; + BIND(again); + lldw(AT, dest.disp(), dest.base()); + cmpeq(AT, oldval, GP); + wr_f(GP); + move(AT,newval); + align(8); + lstw(AT, dest.disp(), dest.base()); + rd_f(AT); + beq_l(GP, failure); + beq_l(AT, again); + + BIND(failure); + //xor_ins(AT, R0, rcc);// need it? + change_cmxchg_result(this); + //mov_immediate32(rcc, 1); + //cmove(Assembler::success, rcc, R0, rcc, AT); //TODO:refactor we should refactor cmpxchg, not add this instruction jzy +} + +void MacroAssembler::cmpxchgptr(Register xreg, AddressLiteral adr, Register creg, Register tmp) { + assert_different_registers(tmp, AT); + lea(tmp, adr); + cmpxchg(xreg, Address(tmp, 0), creg); +} + +void MacroAssembler::xchgptr(Register src1, Register src2) { + movl(rscratch2, src1); + movl(src1, src2); + movl(src2, rscratch2); +} + +int MacroAssembler::corrected_idivl(Register result, Register ra, Register rb, + bool want_remainder, Register scratch) +{ + ShouldNotReachHere(); + int idivl_offset = offset(); + return idivl_offset; +} + +void MacroAssembler::enter() { + subptr(esp, 16, esp); + stl(rfp, 0, esp); + stl(RA, 8, esp); + movl(rfp, esp); +} + +void MacroAssembler::load_float(FloatRegister rd, AddressLiteral addr, Register tmp) { + mov_immediate64(tmp, (intptr_t)addr.target(), addr.rspec()); + flds(rd, 0, tmp); +} + +void MacroAssembler::load_double(FloatRegister rd, AddressLiteral addr, Register tmp) { + mov_immediate64(tmp, (intptr_t)addr.target(), addr.rspec()); + fldd(rd, 0, tmp); +} + +void MacroAssembler::load_float(FloatRegister fa, Address src, Register tmp) { + flds(fa, src, tmp); +} + +void MacroAssembler::load_double(FloatRegister fa, Address src, Register tmp) { + fldd(fa, src, tmp); +} + +void MacroAssembler::store_float(FloatRegister fa, Address src, Register tmp) { + fsts(fa, src, tmp); +} + +void MacroAssembler::store_double(FloatRegister fa, Address src, Register tmp) { + fstd(fa, src, tmp); +} + +void MacroAssembler::jump(AddressLiteral addr, Register tmp) { + mov_address64(T12, (intptr_t)addr.target(), addr.rspec()); + Assembler::jmp(tmp, T12, 0);// set ra=AT for debug +} + +void MacroAssembler::jump(RuntimeAddress addr, Register tmp) { + mov_address64(T12, (intptr_t)addr.target(), addr.rspec()); + Assembler::jmp(tmp, T12, 0);// set ra=AT for debug +} + +//TODO:check right jzy +void MacroAssembler::jump_cc(Condition cc, AddressLiteral dst, Register ccReg, Register tmp) { + Label skip; + jcc(reverse[cc], skip, ccReg); + jump(dst, tmp); + bind(skip); +} + +void MacroAssembler::jcc(Condition cc, Label& L, Register ccReg, ConditionLength cl) { + if (cl == bitw) { + subw(ccReg, 0, ccReg); + } + switch(cc) { +// case equal: + case zero: + beq_l(ccReg, L); + break; +// case notEqual: + case notZero: + bne_l(ccReg, L); + break; + case greaterEqual: + case aboveEqual: + case notNegative: + bge_l(ccReg, L); + break; + case greater: + case positive: + case above://unsigned> + bgt_l(ccReg, L); + break; + case lessEqual: + case belowEqual: //unsigned<= + ble_l(ccReg, L); + break; + case less: + case below: + case negative: + blt_l(ccReg, L); + break; + case success:// for cas success + bne_l(AT, L); + break; + case failed:// for cas failed + beq_l(AT, L); + break; + default: + Unimplemented(); + } +} + +void MacroAssembler::cmpws(int cc, Register op1, Register op2, Register ccReg) { + switch((Condition)cc) { + case equal: + cmpeq(op1, op2, ccReg); + break; + case notEqual: + subw(op1, op2, ccReg); + break; + case greater: + cmplt(op2, op1, ccReg); + break; + case greaterEqual: + cmple(op2, op1, ccReg); + break; + case less: + cmplt(op1, op2, ccReg); + break; + case lessEqual: + cmple(op1, op2, ccReg); + break; + default: + Unimplemented(); + } +} + +void MacroAssembler::cmpls(int cc, Register op1, Register op2, Register ccReg) { + switch((Condition)cc) { + case equal: + cmpeq(op1, op2, ccReg); + break; + case notEqual: + subl(op1, op2, ccReg); + break; + case greater: + cmplt(op2, op1, ccReg); + break; + case greaterEqual: + cmple(op2, op1, ccReg); + break; + case less: + cmplt(op1, op2, ccReg); + break; + case lessEqual: + cmple(op1, op2, ccReg); + break; + default: + Unimplemented(); + } +} + +void MacroAssembler::cmpwu(int cc, Register op1, Register op2, Register ccReg) { + switch((Condition)cc) { + case equal: + cmpeq(op1, op2, ccReg); + break; + case notEqual: + subw(op1, op2, ccReg);//TODO:refactor jzy use subl to replace? + break; + case above://unsigned> + cmpult(op2, op1, ccReg); + break; + case aboveEqual: + cmpule(op2, op1, ccReg); + break; + case below: + cmpult(op1, op2, ccReg); + break; + case belowEqual: //unsigned<= + cmpule(op1, op2, ccReg); + break; + default: + Unimplemented(); + } +} + +void MacroAssembler::cmplu(int cc, Register op1, Register op2, Register ccReg) { + switch((Condition)cc) { + case equal: + cmpeq(op1, op2, ccReg); + break; + case notEqual: + subl(op1, op2, ccReg); + break; + case above://unsigned> + cmpult(op2, op1, ccReg); + break; + case aboveEqual: + cmpule(op2, op1, ccReg); + break; + case below: + cmpult(op1, op2, ccReg); + break; + case belowEqual: //unsigned<= + cmpule(op1, op2, ccReg); + break; + default: + Unimplemented(); + } +} + +void MacroAssembler::cmpfs(int cc, FloatRegister op1, FloatRegister op2, FloatRegister ccReg, bool is_order) {SCOPEMARK_NAME(MacroAssembler::cmpfs, this); + switch((Condition)cc) { + case equal: + fcmpeq(op1, op2, ccReg); + break; + case notEqual: + fcmpeq(op1, op2, ccReg); + fcmpeq(ccReg, fzero, ccReg); + break; + case greater: + c_olt_s(op2, op1); + break; + case greaterEqual: + c_ole_s(op2, op1); + break; + case less: + block_comment("less;;"); + if (is_order) { + c_olt_s(op1, op2); + } else { + c_ult_s(op1, op2); + } + break; + case lessEqual: + block_comment("lessEqual;;"); + if (is_order) { + c_ole_s(op1, op2); + } else { + c_ule_s(op1, op2); + } + break; + } +} + +void MacroAssembler::cmpfd(int cc, FloatRegister op1, FloatRegister op2, FloatRegister ccReg, bool is_order) {SCOPEMARK_NAME(MacroAssembler::cmpfd, this); + switch((Condition)cc) { + case equal: + fcmpeq(op1, op2, ccReg); + break; + case notEqual: + //TODO:performance jzy +// mov_immediate64(rscratch3, 1); +// ifmovd(rscratch3, fscratch1); +// fcmpeq(op1, op2, ccReg); +// fseleq(ccReg, fscratch1, fzero, ccReg); + fcmpeq(op1, op2, ccReg); + fcmpeq(ccReg, fzero, ccReg); + break; + case greater: + c_olt_d(op2, op1); + break; + case greaterEqual: + c_ole_d(op2, op1); + break; + case less: + block_comment("less;;"); + if (is_order) { + c_olt_d(op1, op2); + } else { + c_ult_d(op1, op2); + } + break; + case lessEqual: + if (is_order) { + c_ole_d(op1, op2); + } else { + c_ule_d(op1, op2); + } + break; + } +} + +void MacroAssembler::load_unsigned_short(Register dst, Address src) { + ldhu(dst, src); +} + +void MacroAssembler::load_unsigned_byte(Register dst, Address src) { + ldbu(dst, src); +} + +void MacroAssembler::load_signed_short(Register rd, Address addr) { + ldhu(rd, addr); + sexth(rd, rd); +} + +void MacroAssembler::load_signed_byte32(Register rd, Address addr, Register tmp) { + ldbu(rd, addr); + sextb(rd, rd); + movw(rd, rd); +} + +void MacroAssembler::load_signed_byte64(Register rd, Address addr, Register tmp) { + ldbu(rd, addr); + sextb(rd, rd); +} + +void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) { + switch (size_in_bytes) { + case 8: ld(dst, src); break; + case 4: lw(dst, src); break; + case 2: if (is_signed) load_signed_short(dst, src); else load_unsigned_short(dst, src); break; + case 1: if (is_signed) load_signed_byte32( dst, src); else load_unsigned_byte( dst, src); break; + default: ShouldNotReachHere(); + } +} + +void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) { + ShouldNotReachHere(); +} + +void MacroAssembler::null_check(Register reg, int offset) { + if (needs_explicit_null_check(offset)) { + // provoke OS NULL exception if reg = NULL by + // accessing M[reg] w/o changing any registers + // NOTE: this is plenty to provoke a segv + cmpptr(V0, Address(reg, 0)); + } else { + // nothing to do, (later) access of M[reg + offset] + // will provoke OS NULL exception if reg = NULL + } +} + +void MacroAssembler::unimplemented(const char* what) { + const char* buf = NULL; + { + ResourceMark rm; + stringStream ss; + ss.print("unimplemented: %s", what); + buf = code_string(ss.as_string()); + } + stop(buf); +} + +void MacroAssembler::pop_CPU_state(bool restore_vectors) { + ShouldNotReachHere(); +} + +void MacroAssembler::push_CPU_state(bool save_vectors) { + ShouldNotReachHere(); +} + +void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp) { + // determine java_thread register + if (!java_thread->is_valid()) { + java_thread = rthread; + } + // we must set sp to zero to clear frame + stl(R0, in_bytes(JavaThread::last_Java_sp_offset()), java_thread); + if(clear_fp) { + stl(R0, in_bytes(JavaThread::last_Java_fp_offset()), java_thread); + } + + // Always clear the pc because it could have been set by make_walkable() + stl(R0, in_bytes(JavaThread::last_Java_pc_offset()), java_thread); +} + +void MacroAssembler::round_to(Register reg, int modulus) { + addptr(reg, modulus - 1, reg); + andptr(reg, -modulus, reg); +} + +/*void MacroAssembler::set_last_Java_frame(Register java_thread, + Register last_java_sp, + Register last_java_fp, + address last_java_pc) {ShouldNotReachHere(); + // determine java_thread register + if (!java_thread->is_valid()) { + java_thread = rthread; + } + // determine last_java_sp register + if (!last_java_sp->is_valid()) { + last_java_sp = esp; + } + + // last_java_fp is optional + if (last_java_fp->is_valid()) { + stptr(last_java_fp, Address(java_thread, JavaThread::last_Java_fp_offset())); + } + + if (last_java_pc != NULL) { + lea(Address(java_thread, + JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()), + InternalAddress(last_java_pc), AT); + } else { + ShouldNotReachHere(); + } + stptr(last_java_sp, Address(java_thread, JavaThread::last_Java_sp_offset())); +}*/ + +void MacroAssembler::testb(Register lh, int imm8, Register ccReg) { + if (is_uimm8(imm8)) { + and_ins(lh, imm8, ccReg); + } else { + Unimplemented(); + } +} + +void MacroAssembler::testb(Register lh, Register rh, Register ccReg) { + ShouldNotReachHere(); +} + +void MacroAssembler::testb(Address addr, int imm8, Register ccReg) { + ldbu(ccReg, addr); + if (is_uimm8(imm8)) { + and_ins(ccReg, imm8, ccReg); + } else { + Unimplemented(); + } +} + +/** + * x86 + * Assembler::testl(Register dst, int32_t imm32) + * sw64 + * testw(Register lh, int rh, Register ccReg, Register scratch) + * note + * test 32bit of lh and rh. the msb32 of lh don't cares + */ +void MacroAssembler::testw(Register lh, int rh, Register ccReg, Register scratch) { + andw(lh, rh, ccReg, scratch); +} + +void MacroAssembler::testw(Register lh, Register rh, Register ccReg) { + andw(lh, rh, ccReg); +} + +void MacroAssembler::testl(Register lh, long rh, Register res, Register scratch) { + testptr(lh, rh, res, scratch); +} + +void MacroAssembler::testl(Register lh, Register rh, Register ccReg) { + testptr(lh, rh, ccReg); +} + +void MacroAssembler::testptr(Register lh, long rh, Register res, Register scratch) { + andptr(lh, rh, res, scratch); +} + +void MacroAssembler::testptr(Register lh, Register rh, Register ccReg) { + and_ins(lh, rh, ccReg); +} + +void MacroAssembler::resolve_jobject(Register value, + Register thread, + Register tmp) {SCOPEMARK_NAME(MacroAssembler::resolve_jobject, this); + assert_different_registers(value, thread, tmp); + Label done, not_weak; + testptr(value, value); + jcc(Assembler::zero, done); // Use NULL as-is. + testptr(value, JNIHandles::weak_tag_mask); // Test for jweak tag. + jcc(Assembler::zero, not_weak); + // Resolve jweak. + access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, + value, Address(value, -JNIHandles::weak_tag_value), tmp, thread); + verify_oop(value); + jmp(done); + bind(not_weak); + // Resolve (untagged) jobject. + access_load_at(T_OBJECT, IN_NATIVE, value, Address(value, 0), tmp, thread); + verify_oop(value); + bind(done); +} + +// Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes. +void MacroAssembler::tlab_allocate(Register thread, Register obj, + Register var_size_in_bytes, + int con_size_in_bytes, + Register t1, + Register t2, + Label& slow_case) { + BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); + bs->tlab_allocate(this, thread, obj, var_size_in_bytes, con_size_in_bytes, t1, t2, slow_case); +} + +// Defines obj, preserves var_size_in_bytes +void MacroAssembler::eden_allocate(Register thread, Register obj, + Register var_size_in_bytes, + int con_size_in_bytes, + Register t1, + Label& slow_case) {SCOPEMARK_NAME(eden_allocate, this) + BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); + bs->eden_allocate(this, thread, obj, var_size_in_bytes, con_size_in_bytes, t1, slow_case); +} + +// Zero words; len is in bytes +// Destroys all registers except addr +// len must be a nonzero multiple of wordSize +void MacroAssembler::zero_memory(Register addr, Register len, Register t1) { + should_not_reach_here("zero_memory"); +} + +// Look up the method for a megamorphic invokeinterface call. +// The target method is determined by . +// The receiver klass is in recv_klass. +// On success, the result will be in method_result, and execution falls through. +// On failure, execution transfers to the given label. +void MacroAssembler::lookup_interface_method(Register recv_klass, + Register intf_klass, + RegisterOrConstant itable_index, + Register method_result, + Register scan_temp, + Label& L_no_such_interface, + bool return_method) { + assert_different_registers(recv_klass, intf_klass, scan_temp, rcc); + assert_different_registers(method_result, intf_klass, scan_temp, rcc); + assert(recv_klass != method_result || !return_method, + "recv_klass can be destroyed when method isn't needed"); + + assert(itable_index.is_constant() || itable_index.as_register() == method_result, + "caller must use same register for non-constant itable index as for method"); + + // Compute start of first itableOffsetEntry (which is at the end of the vtable) + int vtable_base = in_bytes(Klass::vtable_start_offset()); + int itentry_off = itableMethodEntry::method_offset_in_bytes(); + int scan_step = itableOffsetEntry::size() * wordSize; + int vte_size = vtableEntry::size_in_bytes(); + Address::ScaleFactor times_vte_scale = Address::times_ptr; + assert(vte_size == wordSize, "else adjust times_vte_scale"); + + ldws(scan_temp, Address(recv_klass, Klass::vtable_length_offset())); + + // %%% Could store the aligned, prescaled offset in the klassoop. + lea(scan_temp, Address(recv_klass, scan_temp, times_vte_scale, vtable_base)); + + if (return_method) { + // Adjust recv_klass by scaled itable_index, so we can free itable_index. + assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); + lea(recv_klass, Address(recv_klass, itable_index, Address::times_ptr, itentry_off)); + } + + // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) { + // if (scan->interface() == intf) { + // result = (klass + scan->offset() + itable_index); + // } + // } + Label search, found_method; + + for (int peel = 1; peel >= 0; peel--) { + ldptr(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes())); + cmpptr(intf_klass, method_result); + + if (peel) { + jcc(Assembler::equal, found_method); + } else { + jcc(Assembler::notEqual, search); + // (invert the test to fall through to found_method...) + } + + if (!peel) break; + + bind(search); + + // Check that the previous entry is non-null. A null entry means that + // the receiver class doesn't implement the interface, and wasn't the + // same as when the caller was compiled. + jcc(Assembler::zero, L_no_such_interface, method_result); + addptr(scan_temp, scan_step, scan_temp); + } + + bind(found_method); + + if (return_method) { + // Got a hit. + ldws(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes())); + ldptr(method_result, Address(recv_klass, scan_temp, Address::times_1)); + } +} + + +// virtual method calling +void MacroAssembler::lookup_virtual_method(Register recv_klass, + RegisterOrConstant vtable_index, + Register method_result) { + const int base = in_bytes(Klass::vtable_start_offset()); + assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below"); + Address vtable_entry_addr(recv_klass, + vtable_index, Address::times_ptr, + base + vtableEntry::method_offset_in_bytes()); + ldptr(method_result, vtable_entry_addr); +} + + +void MacroAssembler::check_klass_subtype(Register sub_klass, + Register super_klass, + Register temp_reg, + Label& L_success) { + Label L_failure; + check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL); + check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL); + bind(L_failure); +} + + +void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, + Register super_klass, + Register temp_reg, + Label* L_success, + Label* L_failure, + Label* L_slow_path, + RegisterOrConstant super_check_offset) { + assert_different_registers(sub_klass, super_klass, temp_reg); + bool must_load_sco = (super_check_offset.constant_or_zero() == -1); + if (super_check_offset.is_register()) { + assert_different_registers(sub_klass, super_klass, + super_check_offset.as_register()); + } else if (must_load_sco) { + assert(temp_reg != noreg, "supply either a temp or a register offset"); + } + + Label L_fallthrough; + int label_nulls = 0; + if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } + if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } + if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } + assert(label_nulls <= 1, "at most one NULL in the batch"); + + int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); + int sco_offset = in_bytes(Klass::super_check_offset_offset()); + Address super_check_offset_addr(super_klass, sco_offset); + + // Hacked jcc, which "knows" that L_fallthrough, at least, is in + // range of a jccb. If this routine grows larger, reconsider at + // least some of these. +#define local_jcc(assembler_cond, label) \ + if (&(label) == &L_fallthrough) jcc(assembler_cond, label); \ + else jcc( assembler_cond, label) /*omit semi*/ + + // Hacked jmp, which may only be used just before L_fallthrough. +#define final_jmp(label) \ + if (&(label) == &L_fallthrough) { /*do nothing*/ } \ + else jmp(label) /*omit semi*/ + + // If the pointers are equal, we are done (e.g., String[] elements). + // This self-check enables sharing of secondary supertype arrays among + // non-primary types such as array-of-interface. Otherwise, each such + // type would need its own customized SSA. + // We move this check to the front of the fast path because many + // type checks are in fact trivially successful in this manner, + // so we get a nicely predicted branch right at the start of the check. + cmpptr(sub_klass, super_klass); + local_jcc(Assembler::equal, *L_success); + + // Check the supertype display: + if (must_load_sco) { + // Positive movl does right thing on LP64. + ldws(temp_reg, super_check_offset_addr); + super_check_offset = RegisterOrConstant(temp_reg); + } + Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0); + cmpptr(super_klass, super_check_addr); // load displayed supertype + + // This check has worked decisively for primary supers. + // Secondary supers are sought in the super_cache ('super_cache_addr'). + // (Secondary supers are interfaces and very deeply nested subtypes.) + // This works in the same check above because of a tricky aliasing + // between the super_cache and the primary super display elements. + // (The 'super_check_addr' can address either, as the case requires.) + // Note that the cache is updated below if it does not help us find + // what we need immediately. + // So if it was a primary super, we can just fail immediately. + // Otherwise, it's the slow path for us (no success at this point). + + if (super_check_offset.is_register()) { + local_jcc(Assembler::equal, *L_success); + cmpw(super_check_offset.as_register(), sc_offset); + if (L_failure == &L_fallthrough) { + local_jcc(Assembler::equal, *L_slow_path); + } else { + local_jcc(Assembler::notEqual, *L_failure); + final_jmp(*L_slow_path); + } + } else if (super_check_offset.as_constant() == sc_offset) { + // Need a slow path; fast failure is impossible. + if (L_slow_path == &L_fallthrough) { + local_jcc(Assembler::equal, *L_success); + } else { + local_jcc(Assembler::notEqual, *L_slow_path); + final_jmp(*L_success); + } + } else { + // No slow path; it's a fast decision. + if (L_failure == &L_fallthrough) { + local_jcc(Assembler::equal, *L_success); + } else { + local_jcc(Assembler::notEqual, *L_failure); + final_jmp(*L_success); + } + } + + bind(L_fallthrough); + +#undef local_jcc +#undef final_jmp +} + + +void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, + Register super_klass, + Register temp_reg, + Register temp2_reg, + Label* L_success, + Label* L_failure, + bool set_cond_codes) { + if (temp2_reg == noreg) temp2_reg = AT; + assert_different_registers(temp_reg, noreg); + assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg); + + const Register rcx = temp2_reg; + const Register rdi = temp_reg; + + Label L_fallthrough; + int label_nulls = 0; + if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } + if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } + assert(label_nulls <= 1, "at most one NULL in the batch"); + + // a couple of useful fields in sub_klass: + int ss_offset = in_bytes(Klass::secondary_supers_offset()); + int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); + Address secondary_supers_addr(sub_klass, ss_offset); + Address super_cache_addr( sub_klass, sc_offset); + + // Do a linear scan of the secondary super-klass chain. + // This code is rarely used, so simplicity is a virtue here. + // The repne_scan instruction uses fixed registers, which we must spill. + // Don't worry too much about pre-existing connections with the input regs. + +#ifndef PRODUCT + int* pst_counter = &SharedRuntime::_partial_subtype_ctr; + ExternalAddress pst_counter_addr((address) pst_counter); + lea(rcx, pst_counter_addr); + ldws(rdi, Address(rcx, 0)); + addw(rdi, 1, rdi); + stw(rdi, Address(rcx, 0)); +#endif //PRODUCT + + // We will consult the secondary-super array. + ldptr(rdi, secondary_supers_addr); + // Load the array length. (Positive movl does right thing on LP64.) + ldws(rcx, Address(rdi, Array::length_offset_in_bytes())); + // Skip to start of data. + addptr(rdi, Array::base_offset_in_bytes(), rdi); + + // Scan RCX words at [RDI] for an occurrence of super_klass. + Label Loop, found; + bind(Loop); + jcc(Assembler::zero, *L_failure, rcx); + cmpptr(Address(rdi, 0), super_klass); + jcc(Assembler::equal, found); + addptr(rdi, 1 * wordSize, rdi); + subw(rcx, 1, rcx); + jmp(Loop); + + bind(found); + // Success. Cache the super we found and proceed in triumph. + stptr(super_klass, super_cache_addr); + if (L_success != &L_fallthrough) { + jmp(*L_success); + } + + bind(L_fallthrough); +} + +void MacroAssembler::clinit_barrier(Register klass, Register thread, Label* L_fast_path, Label* L_slow_path) { + assert(L_fast_path != NULL || L_slow_path != NULL, "at least one is required"); + + Label L_fallthrough; + if (L_fast_path == NULL) { + L_fast_path = &L_fallthrough; + } else if (L_slow_path == NULL) { + L_slow_path = &L_fallthrough; + } + + // Fast path check: class is fully initialized + cmpb(Address(klass, InstanceKlass::init_state_offset()), InstanceKlass::fully_initialized); + jcc(Assembler::equal, *L_fast_path); + + // Fast path check: current thread is initializer thread + cmpptr(thread, Address(klass, InstanceKlass::init_thread_offset())); + if (L_slow_path == &L_fallthrough) { + jcc(Assembler::equal, *L_fast_path); + bind(*L_slow_path); + } else if (L_fast_path == &L_fallthrough) { + jcc(Assembler::notEqual, *L_slow_path); + bind(*L_fast_path); + } else { + Unimplemented(); + } +} + +void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, int line) { + if (!VerifyOops) return; + + // Pass register number to verify_oop_subroutine + const char * b = NULL; + {//name(), s, file, line); + b = code_string(ss.as_string()); + } + block_comment("verify_oop {"); + const Register rax = V0; + //push(rscratch1); // trash by call, sw doesn't trash rscratch1 + push(rax); // save rax, + push(reg); // pass register argument + ExternalAddress buffer((address) b); + lea(rax, buffer.addr()); + push(rax); + // call indirectly to solve generation ordering problem + ldptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address())); + push(RA); + call(rax); + pop(RA); + pop(R0);//just pop + pop(reg); + pop(rax); + // Caller pops the arguments (oop, message) and restores rax, r10 + block_comment("} verify_oop"); +} + +Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, + int extra_slot_offset) { + // cf. TemplateTable::prepare_invoke(), if (load_receiver). + int stackElementSize = Interpreter::stackElementSize; + int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0); +#ifdef ASSERT + int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1); + assert(offset1 - offset == stackElementSize, "correct arithmetic"); +#endif + Register scale_reg = noreg; + Address::ScaleFactor scale_factor = Address::no_scale; + if (arg_slot.is_constant()) { + offset += arg_slot.as_constant() * stackElementSize; + } else { + scale_reg = arg_slot.as_register(); + scale_factor = Address::times(stackElementSize); + } + // offset += wordSize; // return PC is on stack // yj todo: we don't push PC on stack?? + return Address(esp, scale_reg, scale_factor, offset); +} + + +void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) { + ShouldNotReachHere(); +} + +void MacroAssembler::verify_tlab() { +#ifdef ASSERT + if (UseTLAB && VerifyOops) { + Label next, ok; + Register thread_reg = rthread; + Register t1 = rscratch1; + Register t2 = rscratch2; + get_thread(thread_reg); + //push(t1); + ldptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); + cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset()))); + jcc(Assembler::aboveEqual, next); + STOP("assert(top >= start)"); + should_not_reach_here("assert(top >= start)"); + + bind(next); + ldptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset()))); + cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); + jcc(Assembler::aboveEqual, ok); + STOP("assert(top <= end)"); + should_not_reach_here("assert(top <= end)"); + + bind(ok); + //pop(t1); + } +#endif +} + +void MacroAssembler::verify_FPU(int stack_depth, const char* s) { + //Unimplemented(); +} + +// ((OopHandle)result).resolve(); +void MacroAssembler::resolve_oop_handle(Register result, Register tmp) {//warn("TODO:modify resolve_oop_handle jzy"); + assert_different_registers(result, tmp); + // OopHandle::resolve is an indirection. + access_load_at(T_OBJECT, IN_NATIVE, result, Address(result, 0), tmp, noreg); +} + +// ((WeakHandle)result).resolve(); +void MacroAssembler::resolve_weak_handle(Register rresult, Register rtmp) { //todo: need check dx + assert_different_registers(rresult, rtmp); + Label resolved; + + // A null weak handle resolves to null. + cmpptr(rresult, 0); + jcc(Assembler::equal, resolved); + + // Only 64 bit platforms support GCs that require a tmp register + // Only IN_HEAP loads require a thread_tmp register + // WeakHandle::resolve is an indirection like jweak. + access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, + rresult, Address(rresult, 0), rtmp, /*tmp_thread*/noreg); + bind(resolved); +} + +void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp) { + // get mirror + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); + load_method_holder(mirror, method); + ldl(mirror, Address(mirror, mirror_offset)); + resolve_oop_handle(mirror, tmp); +} + +void MacroAssembler::load_method_holder_cld(Register rresult, Register rmethod) { + load_method_holder(rresult, rmethod); + ldptr(rresult, Address(rresult, InstanceKlass::class_loader_data_offset())); +} + +void MacroAssembler::load_method_holder(Register holder, Register method) { + ldptr(holder, Address(method, Method::const_offset())); // ConstMethod* + ldptr(holder, Address(holder, ConstMethod::constants_offset())); // ConstantPool* + ldptr(holder, Address(holder, ConstantPool::pool_holder_offset_in_bytes())); // InstanceKlass* +} + +void MacroAssembler::load_klass(Register dst, Register src) { + if (UseCompressedClassPointers) { + ldwu(dst, Address(src, oopDesc::klass_offset_in_bytes())); + decode_klass_not_null(dst); + } else { + ldptr(dst, Address(src, oopDesc::klass_offset_in_bytes())); + } +} + +void MacroAssembler::load_prototype_header(Register dst, Register src) { + load_klass(dst, src); + ld(dst, Address(dst, Klass::prototype_header_offset())); +} + +void MacroAssembler::store_klass(Register dst, Register src) { + if (UseCompressedClassPointers) { + encode_klass_not_null(src); + stw(src, oopDesc::klass_offset_in_bytes(), dst); + } else { + stl(src, oopDesc::klass_offset_in_bytes(), dst); + } +} + +void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators, + Register dst, Address src, + Register tmp1, Register thread_tmp) {SCOPEMARK_NAME(MacroAssembler::access_load_at, this) + BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); + decorators = AccessInternal::decorator_fixup(decorators); + bool as_raw = (decorators & AS_RAW) != 0; + if (as_raw) { + bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, thread_tmp); + } else { + bs->load_at(this, decorators, type, dst, src, tmp1, thread_tmp); + } +} + +void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, + Address dst, Register src, + Register tmp1, Register thread_tmp) {SCOPEMARK_NAME(MacroAssembler::access_store_at, this) + BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); + decorators = AccessInternal::decorator_fixup(decorators); + bool as_raw = (decorators & AS_RAW) != 0; + if (as_raw) { + bs->BarrierSetAssembler::store_at(this, decorators, type, dst, src, tmp1, thread_tmp); + } else { + bs->store_at(this, decorators, type, dst, src, tmp1, thread_tmp); + } +} + +void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1, + Register thread_tmp, DecoratorSet decorators) { + access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp); +} + +void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1, + Register thread_tmp, DecoratorSet decorators) { + access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL | decorators, dst, src, tmp1, thread_tmp); +} + +void MacroAssembler::store_heap_oop(Address dst, Register src, Register tmp1, + Register thread_tmp, DecoratorSet decorators) { + access_store_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp); +} + +// Used for storing NULLs. +void MacroAssembler::store_heap_oop_null(Address dst) { + access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg); +} + +void MacroAssembler::store_klass_gap(Register dst, Register src) { + if (UseCompressedClassPointers) { + stw(src, oopDesc::klass_gap_offset_in_bytes(), dst); + } +} + +#ifdef ASSERT +void MacroAssembler::verify_heapbase(const char* msg) {SCOPEMARK_NAME(MacroAssembler::verify_heapbase, this) + assert (UseCompressedOops || UseCompressedClassPointers, "should be compressed"); + assert (Universe::heap() != NULL, "java heap should be initialized"); + if (CheckCompressedOops) { + Label ok; +// push(1 << rscratch1->encoding(), sp); +// push(rscratch1); // cmpptr trashes rscratch1 + cmpptr(rheapbase, ExternalAddress((address)CompressedOops::ptrs_base_addr())); + jcc(Assembler::equal, ok); + STOP(msg); + bind(ok); +// pop(1 << rscratch1->encoding(), sp); +// pop(rscratch1); + } +} +#endif + + +// Algorithm must match CompressedOops::encode. +void MacroAssembler::encode_heap_oop(Register dst, Register src) { +#ifdef ASSERT + verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?"); +#endif + verify_oop_msg(src, "broken oop in encode_heap_oop"); + if (CompressedOops::base() == NULL) { + if (CompressedOops::shift() != 0) { + assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); + srll(src, LogMinObjAlignmentInBytes, dst); + } else { + if (dst != src) bis(R0, src, dst); + } + } else { + if (dst == src) { + seleq(dst, rheapbase, dst, dst); + subl(dst, rheapbase, dst); + if (CompressedOops::shift() != 0) { + assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); + srll(dst, LogMinObjAlignmentInBytes, dst); + } + } else { + subl(src, rheapbase, dst); + if (CompressedOops::shift() != 0) { + assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); + srll(dst, LogMinObjAlignmentInBytes, dst); + } + seleq(src, R0, dst, dst); + } + } +} + +void MacroAssembler::encode_heap_oop_not_null(Register r) { +// stop("encode_heap_oop_not_null not check lsp"); +#ifdef ASSERT + verify_heapbase("MacroAssembler::encode_heap_oop_not_null: heap base corrupted?"); + if (CheckCompressedOops) { + Label ok; + testl(r, r); + jcc(Assembler::notEqual, ok); + STOP("null oop passed to encode_heap_oop_not_null"); + bind(ok); + } + +#endif + verify_oop_msg(r, "broken oop in encode_heap_oop_not_null"); + if (CompressedOops::base() != NULL) { + subl(r, rheapbase, r); + } + if (CompressedOops::shift() != 0) { + assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); + srll(r, LogMinObjAlignmentInBytes, r); + } +} + +void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) { +// stop("encode_heap_oop_not_null 2 not check lsp"); +#ifdef ASSERT + verify_heapbase("MacroAssembler::encode_heap_oop_not_null2: heap base corrupted?"); + if (CheckCompressedOops) { + Label ok; + testl(src, src); + jcc(Assembler::notEqual, ok); + STOP("null oop passed to encode_heap_oop_not_null2"); + bind(ok); + } +#endif + verify_oop_msg(src, "broken oop in encode_heap_oop_not_null2"); + if (dst != src) { + movl(dst, src); + } + if (CompressedOops::base() != NULL) { + subl(dst, rheapbase, dst); + } + if (CompressedOops::shift() != 0) { + assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); + srll(dst, LogMinObjAlignmentInBytes, dst); + } +} + +void MacroAssembler::decode_heap_oop(Register dst, Register src) { +#ifdef ASSERT + verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?"); +#endif + if (CompressedOops::base() == NULL) { + if (CompressedOops::shift() != 0) { + assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); + if (dst != src) nop(); // DON'T DELETE THIS GUY. + slll(src, LogMinObjAlignmentInBytes, dst); + } else { + if (dst != src) bis(R0, src, dst); + } + } else { + if (dst == src) { + if (dst != AT) bis(R0, dst, AT); + if (CompressedOops::shift() != 0) { + assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); + slll(dst, LogMinObjAlignmentInBytes, dst); + } + addl(dst, rheapbase, dst); + seleq(AT, R0, dst, dst); + } else { + if (CompressedOops::shift() != 0) { + assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); + slll(src, LogMinObjAlignmentInBytes, dst); + addl(dst, rheapbase, dst); + } else { + addl(src, rheapbase, dst); + } + seleq(src, R0, dst, dst); + } + } + verify_oop_msg(dst, "broken oop in decode_heap_oop"); +} + +void MacroAssembler::decode_heap_oop_not_null(Register r) { + assert (UseCompressedOops, "should only be used for compressed headers"); + assert (Universe::heap() != NULL, "java heap should be initialized"); + // Cannot assert, unverified entry point counts instructions (see .ad file) + // vtableStubs also counts instructions in pd_code_size_limit. + // Also do not verify_oop as this is called by verify_oop. + if (CompressedOops::shift() != 0) { + assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); + slll(r, LogMinObjAlignmentInBytes, r); + if (CompressedOops::base() != NULL) { + addl(r, rheapbase, r); + } + } else { + assert (CompressedOops::base() == NULL, "sanity"); + } +} + +void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { +// stop("decode_heap_oop_not_null 2 not check lsp"); + // Note: it will change flags + assert (UseCompressedOops, "should only be used for compressed headers"); + assert (Universe::heap() != NULL, "java heap should be initialized"); + // Cannot assert, unverified entry point counts instructions (see .ad file) + // vtableStubs also counts instructions in pd_code_size_limit. + // Also do not verify_oop as this is called by verify_oop. + if (CompressedOops::shift() != 0) { + assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); + if (LogMinObjAlignmentInBytes == Address::times_8) { + lea(dst, Address(rheapbase, src, Address::times_8, 0)); + } else { + if (dst != src) { + movl(dst, src); + } + slll(dst, LogMinObjAlignmentInBytes, dst); + if (CompressedOops::base() != NULL) { + addl(dst, rheapbase, dst); + } + } + } else { + assert (CompressedOops::base() == NULL, "sanity"); + if (dst != src) { + movl(dst, src); + } + } +} + +void MacroAssembler::encode_klass_not_null(Register r) { + if (CompressedKlassPointers::base() != NULL) { + assert(r != rscratch3, "Encoding a klass in rcc"); + set64(rscratch3, (int64_t)CompressedKlassPointers::base()); + subl(r, rscratch3, r); + } + if (CompressedKlassPointers::shift() != 0) { + assert (LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); + srll(r, LogKlassAlignmentInBytes, r); + } +// if (CompressedKlassPointers::base() != NULL) { +// reinit_heapbase(); +// } +} + +void MacroAssembler::encode_klass_not_null(Register dst, Register src) { + if (dst == src) { + encode_klass_not_null(src); + } else { + if (CompressedKlassPointers::base() != NULL) { + set64(dst, (int64_t)CompressedKlassPointers::base()); + subl(src, dst, dst); + } else { + movl(dst, src); + } + if (CompressedKlassPointers::shift() != 0) { + assert (LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); + srll(src, LogKlassAlignmentInBytes, dst); + } + } +} + +// !!! If the instructions that get generated here change then function +// instr_size_for_decode_klass_not_null() needs to get updated. +void MacroAssembler::decode_klass_not_null(Register r) { +// stop("decode_klass_not_null not check lsp"); + // Note: it will change flags + assert (UseCompressedClassPointers, "should only be used for compressed headers"); + // Cannot assert, unverified entry point counts instructions (see .ad file) + // vtableStubs also counts instructions in pd_code_size_limit. + // Also do not verify_oop as this is called by verify_oop. + if (CompressedKlassPointers::shift() != 0) { + assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); + slll(r, LogKlassAlignmentInBytes, r); + } + if (CompressedKlassPointers::base() != NULL) { + set64(rheapbase, (int64_t)CompressedKlassPointers::base()); + addl(r, rheapbase, r); + reinit_heapbase(); + } +} + +void MacroAssembler::decode_klass_not_null(Register dst, Register src) { +// stop("decode_klass_not_null 2 not check lsp"); + // Note: it will change flags + assert (UseCompressedClassPointers, "should only be used for compressed headers"); + if (dst == src) { + decode_klass_not_null(dst); + } else { + // Cannot assert, unverified entry point counts instructions (see .ad file) + // vtableStubs also counts instructions in pd_code_size_limit. + // Also do not verify_oop as this is called by verify_oop. + set64(dst, (int64_t)CompressedKlassPointers::base()); + if (CompressedKlassPointers::shift() != 0) { + assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); + assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?"); + lea(dst, Address(dst, src, Address::times_8, 0)); + } else { + addl(dst, src, dst); + } + } +} + +void MacroAssembler::emit_data(RelocationHolder const& rspec, int format) { + if (rspec.type() != relocInfo::none) { +#ifdef ASSERT + //check_relocation(rspec, format);//sw will be wrong +#endif + if (format == call32_operand){ + ShouldNotReachHere(); + code_section()->relocate(code_section()->end(), rspec, disp32_operand); + } + else + code_section()->relocate(code_section()->end(), rspec, format); + } +} + +void MacroAssembler::mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec){ + emit_data(rspec, narrow_oop_operand); + int16_t high = (imm32 - (int16_t)(imm32))>>16; + int16_t low = (int16_t)(imm32); + ldih(dst, high, R0); + ldi(dst, low, dst); + // if imm32=0x0000ffff, ldih/ldi will result in 0x10000ffff, so we must zapnot + zapnot(dst, 0xf, dst); +} + + +void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { +// should_not_reach_here(); +// stop("set_narrow_oop not check lsp"); + assert(UseCompressedOops, "should only be used for compressed header"); + assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); + + int oop_index = oop_recorder()->find_index(obj); + RelocationHolder rspec = oop_Relocation::spec(oop_index); + + relocate(rspec, Assembler::narrow_oop_operand); + prepare_patch_li48(dst, oop_index); +} + +void MacroAssembler::set_narrow_oop(Address dst, jobject obj) { + should_not_reach_here("set_narrow_oop"); +} + +void MacroAssembler::cmp_narrow_oop(Register dst, jobject obj, Register ccReg) { + should_not_reach_here("cmp_narrow_oop"); +} + +void MacroAssembler::cmp_narrow_oop(Address dst, jobject obj, Register ccReg) { + should_not_reach_here("cmp_narrow_oop"); +} + +void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { +// stop("set_narrow_klass not check lsp"); + assert(UseCompressedClassPointers, "should only be used for compressed header"); + assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); + + int klass_index = oop_recorder()->find_index(k); + RelocationHolder rspec = metadata_Relocation::spec(klass_index); + + mov_narrow_oop(dst, CompressedKlassPointers::encode(k), rspec); +} + +void MacroAssembler::set_narrow_klass(Address dst, Klass* k) { + should_not_reach_here("set_narrow_klass"); +} + +void MacroAssembler::cmp_narrow_klass(Register dst, Klass* k, Register ccReg) { + should_not_reach_here("cmp_narrow_klass"); +} + +void MacroAssembler::cmp_narrow_klass(Address dst, Klass* k, Register ccReg) { + should_not_reach_here("cmp_narrow_klass"); +} + +void MacroAssembler::reinit_heapbase() { + if (UseCompressedOops) { + if (Universe::heap() != NULL) { + if (CompressedOops::base() == NULL) { + movl(rheapbase, R0); + } else { + mov_immediate64(rheapbase, (int64_t)CompressedOops::base()); + } + } else { + ldptr(rheapbase, ExternalAddress((address)CompressedOops::ptrs_base_addr())); + } + } +} + +void MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3, + Register tmp4, Register tmp5, Register result, + Register cnt1, int elem_size) { + should_not_reach_here("arrays_equals not implement"); +} + +// Intrinsic for sun/nio/cs/ISO_8859_1$Encoder.implEncodeISOArray and +// java/lang/StringUTF16.compress. +void MacroAssembler::encode_iso_array(Register src, Register dst, + Register len, Register result, + FloatRegister Vtmp1, FloatRegister Vtmp2, + FloatRegister Vtmp3, FloatRegister Vtmp4) +{ + should_not_reach_here("encode_iso_array not implement"); +} + +/** + * Helpers for multiply_to_len(). + */ +void MacroAssembler::add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo, + Register src1, Register src2) { + ShouldNotReachHere(); +} + +/** + * Multiply 64 bit by 64 bit first loop. + */ +void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart, + Register y, Register y_idx, Register z, + Register carry, Register product, + Register idx, Register kdx) { + ShouldNotReachHere(); +} + +/** + * Multiply 128 bit by 128. Unrolled inner loop. + * + */ +void MacroAssembler::multiply_128_x_128_loop(Register y, Register z, + Register carry, Register carry2, + Register idx, Register jdx, + Register yz_idx1, Register yz_idx2, + Register tmp, Register tmp3, Register tmp4, + Register tmp6, Register product_hi) { + ShouldNotReachHere(); +} + +/** + * Code for BigInteger::multiplyToLen() instrinsic. + * + * i0: x + * i1: xlen + * i2: y + * i3: ylen + * i4: z + * i5: zlen + * i10: tmp1 + * i11: tmp2 + * i12: tmp3 + * i13: tmp4 + * i14: tmp5 + * i15: tmp6 + * i16: tmp7 + * + */ +void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Register ylen, + Register z, Register zlen, + Register tmp1, Register tmp2, Register tmp3, Register tmp4, + Register tmp5, Register tmp6, Register product_hi) { + ShouldNotReachHere(); +} + +// Code for BigInteger::mulAdd instrinsic +// out = i0 +// in = i1 +// offset = i2 (already out.length-offset) +// len = i3 +// k = i4 +// +// pseudo code from java implementation: +// carry = 0; +// offset = out.length-offset - 1; +// for (int j=len-1; j >= 0; j--) { +// product = (in[j] & LONG_MASK) * kLong + (out[offset] & LONG_MASK) + carry; +// out[offset--] = (int)product; +// carry = product >>> 32; +// } +// return (int)carry; +void MacroAssembler::mul_add(Register out, Register in, Register offset, + Register len, Register k) { + ShouldNotReachHere(); +} + +/** + * Emits code to update CRC-32 with a byte value according to constants in table + * + * @param [in,out]crc Register containing the crc. + * @param [in]val Register containing the byte to fold into the CRC. + * @param [in]table Register containing the table of crc constants. + * + * uint32_t crc; + * val = crc_table[(val ^ crc) & 0xFF]; + * crc = val ^ (crc >> 8); + * + */ +void MacroAssembler::update_byte_crc32(Register crc, Register val, Register table) { + xorr(val, crc, val); + and_ins(val, 0xFF, val); + srll(crc, 8, crc); // unsigned shift +// zapnot(crc, 0xF, crc); + + dsll(AT, val, Address::times_4); + addl(table, AT, AT); + ldw(AT, 0, AT); + zapnot(AT, 0xF, AT); + xorr(crc, AT, crc); +} + +/** + * @param crc register containing existing CRC (32-bit) + * @param buf register pointing to input byte buffer (byte*) + * @param len register containing number of bytes + * @param table register that will contain address of CRC table + * @param tmp scratch register + */ +void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len, + Register table0, Register table1, Register table2, Register table3, + Register tmp, Register tmp2, Register tmp3) { + ShouldNotReachHere(); +} + +// Compress char[] array to byte[]. +void MacroAssembler::char_array_compress(Register src, Register dst, Register len, + FloatRegister tmp1Reg, FloatRegister tmp2Reg, + FloatRegister tmp3Reg, FloatRegister tmp4Reg, + Register result) { + should_not_reach_here("char_array_compress"); +} + +// Inflate byte[] array to char[]. +void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len, + FloatRegister vtmp1, FloatRegister vtmp2, FloatRegister vtmp3, + Register tmp4) { + should_not_reach_here("byte_array_inflate"); +} + +SkipIfEqual::SkipIfEqual( + MacroAssembler* masm, const bool* flag_addr, bool value) { + _masm = masm; + _masm->mov(AT, (address)flag_addr); + _masm->ldbu(AT, 0, AT); + _masm->addiu(AT, -value, AT); + _masm->beq_l(AT,_label); +} + +SkipIfEqual::~SkipIfEqual() { + _masm->bind(_label); +} + +// get_thread() can be called anywhere inside generated code so we +// need to save whatever non-callee save context might get clobbered +// by the call to JavaThread::sw64_get_thread_helper() or, indeed, +// the call setup code. +// +// sw64_get_thread_helper() clobbers only i0, i1, and flags. +// +void MacroAssembler::get_thread(Register thread) { + pushad(thread); + MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, Thread::current), 0); + + int off;//depending on the sd sequence in pushad(); + + /* + * in [assembler_sw64.cpp] pushad(), F12 is inserted between A7 and T0. + * Therefore, the offsets before A7 need to be adjusted by 8 bytes. + * + * NOTE: I have tried removing the push action of F12 from pushad(), but failed. + * Maybe other modules in Hotspot depend on this special layout. + */ + move(thread, V0); + popad(thread); +} + +//--------------------------------------------------------------------------------------------------------------- + +Register temp_regs[] = {T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, A0, A1, A2, A3, A4, A5, GP, V0, AT}; +void MacroAssembler::saveTRegisters(){ + int i, index; + + // Fixed-point registers + int len = sizeof(temp_regs) / sizeof(temp_regs[0]); + + addiu(esp, -1 * len * wordSize, esp); + for (i = 0, index = len - 1; i < len; i++) { + stl(temp_regs[i], index * wordSize, esp); + index--; //index not equal i + } +} + +void MacroAssembler::restoreTRegisters(){ + int i, index; + /* Fixed-point registers */ + int len = sizeof(temp_regs) / sizeof(temp_regs[0]); + for (i = len-1, index = 0; i >= 0; i--) { + ldl(temp_regs[i], index * wordSize, esp); + index++; + } + addiu(esp, index * wordSize, esp); +} + +Register caller_saved_registers[] = {V0, T0, T1, T2, T3, T4, T5, T6, T7, rfp, A0, A1, A2, A3, A4, A5, T8, T9, T10, T11, RA, T12, AT, GP}; + +// In SW64, F0~23 are all caller-saved registers +FloatRegister caller_saved_fpu_registers[] = {f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15, F16, F17, f18, f19, f20, f21, f22, f23}; + +////We preserve all caller-saved register +//void MacroAssembler::pushad(Register skip){ +// int i, index; +// +// // Fixed-point registers +// int len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]); +// //int stack_len = skip == noreg ? len : (len-1); +// +// for (i = 0, index = 1; i < len; i++) { +// if (skip != caller_saved_registers[i]) { +// stl(caller_saved_registers[i], -1 * index * wordSize, esp); +// index++; //index not equal i +// } +// } +// addiu(esp, -1 * (index-1) * wordSize, esp); +// +// /* Floating-point registers */ +// len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]); +// addiu(esp, -1 * len * wordSize, esp); +// for (i = 0; i < len; i++) { +// fstd(caller_saved_fpu_registers[i], (len - i - 1) * wordSize, esp); +// } +//}; +// +//void MacroAssembler::popad(Register skip){ +// int i, index; +// +// /* Floating-point registers */ +// int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]); +// for (i = 0; i < len; i++) { +// fldd(caller_saved_fpu_registers[i], (len - i - 1) * wordSize, esp); +// } +// addiu(esp, len * wordSize, esp); +// +// /* Fixed-point registers */ +// len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]); +// for (i = len-1, index = 0; i >= 0; i--) { +// if (skip != caller_saved_registers[i]) { +// ldl(caller_saved_registers[i], index * wordSize, esp); +// index++; +// } +// } +// addiu(esp, index * wordSize, esp); +//}; + +void MacroAssembler::pushad(Register skip){ + int i, index; + // Fixed-point registers + int len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]); + subl(sp, 1 * len * wordSize, sp); + for (i = 0; i < len; i++) { + if (skip != caller_saved_registers[i]){ + stl(caller_saved_registers[i], (len - i - 1) * wordSize, sp); + } + } + + // Floating-point registers + len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]); + subl(sp, 1 * len * wordSize, sp); + for (i = 0; i < len; i++) { + fstd(caller_saved_fpu_registers[i], (len - i - 1) * wordSize, sp); + } +}; + +void MacroAssembler::popad(Register skip){ + int i, index; + + /* Floating-point registers */ + int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]); + for (i = 0; i < len; i++) { + fldd(caller_saved_fpu_registers[i], (len - i - 1) * wordSize, esp); + } + addiu(esp, len * wordSize, esp); + + /* Fixed-point registers */ + len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]); + for (i = 0; i < len; i++) { + if (skip != caller_saved_registers[i]) { + ldl(caller_saved_registers[i], (len - i - 1) * wordSize, esp); + } + } + addiu(esp, len * wordSize, esp); +}; + +void MacroAssembler::notw(Register rd, Register rs) { + ornot(R0, rs, rd); +// zapnot(rd, 0xf, rd); +} + +/** + * x86 + * Assembler::movl(Register dst, Address src) + * sw64 + * MacroAssembler::ldws(Register rd, Address addr) + * note + * load 32bit into reg. + * for x86 the reg can be viewed just as 32bit, they don't care the msb32 since their instructions can operate 32bit directly. + * for sw64 the msb32 cares and ldws sign extend into msb32. + * it's recommend to use ldws to substitue movl when transplanting, except for the ocassions mentioned in ldwu + */ +void MacroAssembler::ldws(Register rd, Address addr) { + ldw(rd, addr); +} + +/** + * x86 + * Assembler::movl(Register dst, Address src) + * sw64 + * MacroAssembler::ldwu(Register rd, Address addr) + * note + * load 32bit into reg. + * for x86 the reg can be viewed just as 32bit, they don't care the msb32 since their instructions can operate 32bit directly. + * for sw64 the msb32 cares and ldwu zero the msb32. + * if rd is loaded as a flag, a status, a mode, following by a test, a and, we must use ldwu + */ +void MacroAssembler::ldwu(Register rd, Address addr) { + ldw(rd, addr); + zapnot(rd, 0xf, rd); +} + +void MacroAssembler::ldptr(Register rd, Address addr, Register tmp) { + ldl(rd, addr); +} + +/** + * x86 + * MacroAssembler::movptr(Address dst, Register src) + * sw64 + * MacroAssembler::stptr(Register rd, Address addr, Register tmp=rcc) + * note + * rd can't be same with tmp + */ +void MacroAssembler::stptr(Register rd, Address addr, Register tmp) { + assert_different_registers(rd, tmp); + stl(rd, addr, tmp); +} + +void MacroAssembler::addptr(Register rd, Address addr) { + assert_different_registers(rd, rcc); + ldptr(rcc, addr); + addptr(rd, rcc, rd); +} + +/** + * x86 + * no corresponding + * sw64 + * MacroAssembler::ldws(Register rd, AddressLiteral addr) + * note + * use ldws ASAP + */ +void MacroAssembler::ldws(Register rd, AddressLiteral addr) { + mov_immediate64(rd, (intptr_t)addr.target(), addr.rspec()); + ldw(rd, 0, rd); +} + +/** + * x86 + * Assembler:: + * sw64 + * MacroAssembler::ldwu(Register rd, AddressLiteral addr) + * note + * use when load a flag/status/mode + */ +void MacroAssembler::ldwu(Register rd, AddressLiteral addr) { + ldws(rd, addr); + zapnot(rd, 0xf, rd); +} + +/** + * x86 + * movptr + * sw64 + * ldptr + * note + * same + */ +void MacroAssembler::ldptr(Register rd, AddressLiteral addr) { + mov_immediate64(rd, (intptr_t)addr.target(), addr.rspec()); + ldl(rd, 0, rd); +} + +/** + * x86 + * jmp + * sw64 + * jmp(Address rd, Register tmp=T12) + * note + * sw use t12 as jump target, especially when jump into runtime + */ +void MacroAssembler::jmp(Address rd, Register tmp) { + ldl(T12, rd); + Assembler::jmp(tmp, T12, 0);// set ra=AT for debug +} + +/** + * x86 + * jmp + * sw64 + * jmp(Register rd, Register tmp=T12); + * note + * sw use AT as link reg for debug + */ +void MacroAssembler::jmp(Register rd, Register tmp) { + assert_different_registers(rd, tmp); + if (rd != T12) + movl(T12, rd); + Assembler::jmp(tmp, T12, 0);// set ra=tmp for debug +} + +void MacroAssembler::jmp(Label& lbl) { + beq_l(R0, lbl); +} + +/** + * x86 + * Assembler::movzwl(Register dst, Address src) + * sw64 + * MacroAssembler::ldhu_unaligned(Register rd, Address addr, Register tmp=rcc) + * note + * load and zero-extend a 16bit into a reg. + * movzwl and ldhu_unaligned are all little endian, so maybe have to swap in some occasion. + * x86 zero-extends a 16bit into 32bit, sw64 zero-extends 16bit into a 64bit reg. + * tmp can't be same with rd. + */ +void MacroAssembler::ldhu_unaligned(Register rd, Address addr, Register tmp) { + assert_different_registers(rd, tmp); + lea(tmp, addr); + Assembler::ldbu(rd, 1, tmp); + slll(rd, 8, rd); + Assembler::ldbu(tmp, 0, tmp); + bis(tmp, rd, rd); +} + +/** + * x86 + * Assembler::movzwl(Register dst, Address src) + * sw64 + * MacroAssembler::ldhu_unaligned_be(Register rd, Address addr, Register tmp=rcc) + * note + * load and zero-extend a 16bit into a reg. + * movzwl is little endian, so have to bswapl after movzwl. + * ldhu_unaligned_be is big endian, so don't have to swap. + * x86 zero-extend a 16bit into 32bit, we zero-extend into a 64bit reg. + * tmp can't be same with rd. + */ +void MacroAssembler::ldhu_unaligned_be(Register rd, Address addr, Register tmp) { +// Assembler::ldhu(rd, addr);// unaligned exception may occur here + assert_different_registers(rd, tmp); + lea(tmp, addr); + Assembler::ldbu(rd, 1, tmp); + Assembler::ldbu(tmp, 0, tmp); + slll(tmp, 8, tmp); + bis(tmp, rd, rd); +} + +void MacroAssembler::cmove(Condition cc, Register dst, Register src1, Register src2, Register ccReg) { + switch(cc) { +// case equal: + case zero: + seleq(ccReg, src1, src2, dst); + break; +// case notEqual: + case notZero: + selne(ccReg, src1, src2, dst); + break; + case greaterEqual: + case aboveEqual: + selge(ccReg, src1, src2, dst); + break; + case greater: + case positive: + selgt(ccReg, src1, src2, dst); + break; + case lessEqual: + selle(ccReg, src1, src2, dst); + break; + case less: + case below: + sellt(ccReg, src1, src2, dst); + break; + + case success: + selne(ccReg, src1, src2, dst); + break; + + case failed: + ShouldNotReachHere(); + break; + default: + Unimplemented(); + } +} + +// Patch any kind of instruction; there may be several instructions. +// Return the total length (in bytes) of the instructions. +int MacroAssembler::pd_patch_instruction_size(address branch, address target) { + ShouldNotReachHere(); + return 0; +} + +int MacroAssembler::patch_oop(address insn_addr, address o) { + ShouldNotReachHere(); + return 0; +} + +int MacroAssembler::patch_narrow_klass(address insn_addr, narrowKlass n) { + ShouldNotReachHere(); + return 0; +} + +void MacroAssembler::prepare_patch_li48(Register rd, long imm) { + assert_not_delayed(); + assert(is_simm16(imm >> 32), "Not a 48-bit address"); + + int16_t msb_l, lsb_h, lsb_l; + NativeInstruction::imm48_split(imm, msb_l, lsb_h, lsb_l); + block_comment(";;li48 {"); + ldi(rd, msb_l, R0); + slll(rd, 32, rd); + ldih(rd, lsb_h, rd); + ldi(rd, lsb_l, rd); + char buf[50]; + sprintf(buf, "0x%lx }", imm); + block_comment(buf); +} + +address MacroAssembler::target_addr_for_insn(address insn_addr, unsigned insn) { + ShouldNotReachHere(); + return 0; +} + +//without check, maybe fixed +int MacroAssembler::patched_branch(int dest_pos, int inst, int inst_pos) { + int m; // mask for displacement field + int v; // new value for displacement field + const int word_aligned_ones = -4; + + switch (sw2_op(inst)) { + default: ShouldNotReachHere(); + case op_ret: + case op_jmp: + case op_call: m = wdisp(word_aligned_ones, 0, 16); v = wdisp( dest_pos, inst_pos+4, 16); break; + case op_br: + case op_bsr: + case op_beq: + case op_bne: + case op_blt: + case op_ble: + case op_bgt: + case op_bge: + case op_blbc: + case op_blbs: + case op_fbeq: + case op_fbne: + case op_fblt: + case op_fble: + case op_fbgt: + case op_fbge: m = wdisp(word_aligned_ones, 0, 21); v = wdisp( dest_pos, inst_pos+4, 21); break; + case op_ldi: m = simm(-1, 16); v = simm(dest_pos-inst_pos, 16); break; + } + + return inst & ~m | v; +} + +/* +// used registers : T0, T1 +void MacroAssembler::verify_oop_subroutine() { + // RA: ra + // A0: char* error message + // A1: oop object to verify + + Label exit, error; + // increment counter + mov(T0, (long)StubRoutines::verify_oop_count_addr()); + ldw(AT, 0, T0); + addiu(AT, 1, AT); + stw(AT, 0, T0); + + // make sure object is 'reasonable' + beq_l(A1, exit); // if obj is NULL it is ok + + // Check if the oop is in the right area of memory + //const int oop_mask = Universe::verify_oop_mask(); + //const int oop_bits = Universe::verify_oop_bits(); + const uintptr_t oop_mask = Universe::verify_oop_mask(); + const uintptr_t oop_bits = Universe::verify_oop_bits(); + if (Assembler::is_simm8(oop_mask)) { + and_ins(A1, oop_mask, T0); + } else { + mov(AT, oop_mask); + and_ins(A1, AT, T0); + } + if (Assembler::is_simm8(oop_bits)) { + cmpeq(T0, oop_bits, AT); + beq(AT, offset(target(error))); + } else { + mov(AT, oop_bits); + bne_c(T0, AT, error); + } + + // make sure klass is 'reasonable' + //add for compressedoops + reinit_heapbase(); + //add for compressedoops + load_klass(T0, A1); + beq_l(T0, error); // if klass is NULL it is broken + // return if everything seems ok + BIND(exit); + + ret_sw(); + + // handle errors + BIND(error); + pushad(); + call_patch(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type); + popad(); + ret_sw(); +}*/ + +// MacroAssembler protected routines needed to implement +// public methods + +//void MacroAssembler::mov(Register r, Address dest) { +// code_section()->relocate(pc(), dest.rspec()); +// u_int64_t imm64 = (u_int64_t)dest.target(); +//// movptr(r, imm64); +//} + +// Move a constant pointer into r. In Sw64 mode the virtual +// address space is 48 bits in size, so we only need three +// instructions to create a patchable instruction sequence that can +// reach anywhere. +//void MacroAssembler::movptr(Register r, long imm64) { +// assert_not_delayed(); +// assert(is_simm16(imm64 >> 32), "Not a 48-bit address"); +// +// int16_t msb_l, lsb_h, lsb_l; +// imm48_split(imm64, msb_l, lsb_h, lsb_l); +// ldi(r, msb_l, R0); +// slll(r, 32, r); +// ldih(r, lsb_h, r); +// ldi(r, lsb_l, r); +//} + +// must get argument(a double) in F16/F17 +//void MacroAssembler::trigfunc(char trig, bool preserve_cpu_regs, int num_fpu_regs_in_use) { +//We need to preseve the register which maybe modified during the Call +void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) { +//save all modified register here +//FIXME, in the disassembly of tirgfunc, only used V0,T4,T12, SP,RA,so we ony save V0,T4,T12 + pushad(); +//we should preserve the stack space before we call + addiu(esp, -wordSize * 2, esp); + switch (trig){ + case 's' : + call_patch( CAST_FROM_FN_PTR(address, SharedRuntime::dsin), relocInfo::runtime_call_type ); + break; + case 'c': + call_patch( CAST_FROM_FN_PTR(address, SharedRuntime::dcos), relocInfo::runtime_call_type ); + break; + case 't': + call_patch( CAST_FROM_FN_PTR(address, SharedRuntime::dtan), relocInfo::runtime_call_type ); + break; + default:assert (false, "bad intrinsic"); + break; + } + addiu(esp, wordSize * 2, esp); + popad(); +} + +/** + * x86 + * Assembler::movl(Address dst, int32_t imm32) + * sw64 + * MacroAssembler::stw(int src, Address dst, Register tmp=rcc) + * note + * store a imm32 to a Address. only support base_plus_disp type Address. tmp can be any reg. + */ +void MacroAssembler::stw(int imm32, Address dst, Register tmp) { + if (dst.getMode() == Address::base_plus_disp) { + mov_immediate32(tmp, imm32); + stw(tmp, dst); + } else { + ShouldNotReachHere(); + } +} + +void MacroAssembler::mov_immediate64(Register dst, u_int64_t imm64) { +// int32_t lsb32 = (int32_t) (imm64); +// int32_t msb32 = (int32_t) ((imm64 - lsb32) >> 32); +// int16_t msb_h = (msb32-(int16_t)msb32) >> 16; +// int16_t msb_l = (int16_t)msb32; +// int16_t lsb_h = (lsb32-(int16_t)lsb32) >> 16; +// int16_t lsb_l = (int16_t)lsb32; +// block_comment(";;imm64 {"); +// if(msb_h == 0) { +// ldi(dst, msb_l, R0); +// } else { +// ldih(dst, msb_h, R0); +// if(msb_l != 0) +// ldi(dst, msb_l, dst); +// } +// slll(dst, 32, dst); +// if( ((int)lsb_h == -32768) && (lsb_l < 0) ) { +// ldih(dst, 0x4000, dst);// yj todo +// ldih(dst, 0x4000, dst); +// ldi(dst, lsb_l, dst); +// } else { +// ldih(dst, lsb_h, dst); +// ldi(dst, lsb_l, dst); +// } +// char buf[50]; +// sprintf(buf, "0x%lx }", imm64); +// block_comment(buf); + int32_t lo = (int32_t) (imm64); + int32_t hi = (int32_t) ((imm64 - lo) >> 32); + + int16_t lo_h16 = (lo - (int16_t)(lo))>>16; + int16_t lo_l16 = (int16_t)(lo); + int16_t hi_h16 = (hi - (int16_t)(hi))>>16; + int16_t hi_l16 = (int16_t)(hi); + block_comment(";;imm64 {"); + if ( is_simm16(imm64) ) { + ldi(dst, imm64, R0); + } else if ( hi != 0 ) { + if ( is_simm16(hi) ) { + ldi(dst, hi, R0); + } else { + ldih(dst, hi_h16, R0); + if (hi_l16 != 0) + ldi(dst, hi_l16, dst); + } + slll(dst, 32, dst); + if ( lo != 0 ) { + if ( ((int)lo_h16 == -32768) && ((int)lo_l16 < 0)) { + // original val was in range 0x7FFF8000..0x7FFFFFFF + ldih(dst, 0x4000, dst); + ldih(dst, 0x4000, dst); + if (lo_l16 != 0) + ldi(dst, lo_l16, dst); + } else { + ldih(dst, lo_h16, dst); + if (lo_l16 != 0) + ldi(dst, lo_l16, dst); + } + } + } else if ( (hi == 0) && (lo != 0) ) { + if ( ((int)lo_h16 == -32768) && ((int)lo_l16 < 0)) { + // original val was in range 0x7FFF8000..0x7FFFFFFF + /* ldih(d, lo_h16, R0); + * ldi(d, lo_l16, d); + * addw(d, 0, d); */ + ldih(dst, 0x4000, R0); + ldih(dst, 0x4000, dst); + if (lo_l16 != 0) + ldi(dst, lo_l16, dst); + } else { + ldih(dst, lo_h16, R0); + if (lo_l16 != 0) + ldi(dst, lo_l16, dst); + } + } else { + tty->print_cr("value = 0x%lx", imm64); + guarantee(false, "Not supported yet in set64!"); + } + char buf[50]; + sprintf(buf, "0x%lx }", imm64); + block_comment(buf); +} + +/** + * x86 + * Assembler::mov_literal64(Register, long, RelocationHolder const&) + * sw64 + * MacroAssembler::mov_immediate64(Register dst, u_int64_t imm64, RelocationHolder const& rspec, int format + * note + * x86's imm64 is just following the opcode, while sw64 is split and embeded in the ldi/sll/ldih/ldi seq. + * x86's imm64 format is set when mov_literal64 invoke emit_data64. sw's formate is set here. + */ +void MacroAssembler::mov_immediate64(Register dst, u_int64_t imm64, RelocationHolder const& rspec, int format) { + InstructionMark im(this); + assert(inst_mark() != NULL, "must be inside InstructionMark"); + // Do not use AbstractAssembler::relocate, which is not intended for + // embedded words. Instead, relocate to the enclosing instruction. + code_section()->relocate(inst_mark(), rspec, format); +#ifdef ASSERT + check_relocation(rspec, format); +#endif + + assert(imm64 <= ((intptr_t(1) << 48) - 1), "imm64 is too large"); + prepare_patch_li48(dst, imm64); +} + +void MacroAssembler::mov_address64(Register dst, u_int64_t imm64, RelocationHolder const &rspec, int format) { + InstructionMark im(this); + assert(inst_mark() != NULL, "must be inside InstructionMark"); + // Do not use AbstractAssembler::relocate, which is not intended for + // embedded words. Instead, relocate to the enclosing instruction. + code_section()->relocate(inst_mark(), rspec, format); +#ifdef ASSERT + check_relocation(rspec, format); +#endif + if (SafePatch) { + if (offset() % 8 == 0) { + nop(); + br(T12, 2); + emit_int64((long) imm64); + ldl(T12, 0, T12); + } else { + br(T12, 2); + emit_int64((long) imm64); + ldl(T12, 0, T12); + nop(); + } + } else { + assert(imm64 <= ((intptr_t(1) << 48) - 1), "imm64 is too large"); + prepare_patch_li48(dst, imm64); + } +} +// zero extend imm32 into dst +void MacroAssembler::mov_immediate32(Register dst, int imm32) { + if (imm32>=0 && imm32 < (1<<15)) { + // if imm32=0x0000ffff, ldi will result in 0xf..fffff since it's sign extened + // so imm32 must less then 1<<15, not 1<<16 + ldi(dst, imm32, R0); + } else { + int16_t high = (imm32 - (int16_t)(imm32))>>16; + int16_t low = (int16_t)(imm32); + ldih(dst, high, R0); + ldi(dst, low, dst); + // if imm32=0x0000ffff, ldih/ldi will result in 0x10000ffff, so we must zapnot + zapnot(dst, 0xf, dst); + } +} + +// zero extend imm32 into dst +void MacroAssembler::mov_immediate32u(Register dst, int imm32) { + if (imm32>=0 && imm32 < (1<<15)) { + // if imm32=0x0000ffff, ldi will result in 0xf..fffff since it's sign extened + // so imm32 must less then 1<<15, not 1<<16 + ldi(dst, imm32, R0); + } else { + int16_t high = (imm32 - (int16_t)(imm32))>>16; + int16_t low = (int16_t)(imm32); + ldih(dst, high, R0); + ldi(dst, low, dst); + // if imm32=0x7fffffff, high=0x8000, low=0xffff ldih/ldi will result in 0xffffffff 7fffffff, so we must zapnot +// if( ((int)high == (-32768)) && (low < 0) ) //TODO CHECK lsp: if((imm32<0) || (((int)high == (-32768)) && (low < 0))) + zapnot(dst, 0xf, dst); + } +} +// signed extend imm32 into dst +void MacroAssembler::mov_immediate32s(Register dst, int imm32) { +// if (imm32>=0 && imm32 < (1<<15)) { +// // if imm32=0x0000ffff, ldi will result in 0xf..fffff since it's sign extened +// // so imm32 must less then 1<<15, not 1<<16 +// ldi(dst, imm32, R0); +// } else { +// int16_t high = (imm32 - (int16_t)(imm32))>>16; +// int16_t low = (int16_t)(imm32); +// ldih(dst, high, R0); +// ldi(dst, low, dst); +// // if imm32=0x7fffffff, high=0x8000,low=0xffff ldih/ldi will result in 0xffffffff 7fffffff, so we must addw +// if( ((int)high == (-32768)) && (low < 0) ) +// addw(dst, R0, dst); +// } + assert(is_simm(imm32, 32), "imm should be simm32 in MacroAssembler::li32"); + int16_t high = (imm32 - (int16_t)(imm32))>>16; + int16_t low = (int16_t)(imm32); + if(is_simm16(imm32)){ + ldi(dst, imm32, R0); + } else { + ldih(dst, high, R0); + ldi(dst, low, dst); + if( ((int)high == (-32768)) && (low < 0) ) + addw(dst, R0, dst); + } +} + +void MacroAssembler::hswap(Register reg) { + if (UseSW6B) { + revbh(reg, reg); + sexth(reg, reg); + } else { + srll(reg, 8, AT); + slll(reg, 24, reg); + addw(reg, 0, reg); + sral(reg, 16, reg); + or_ins(reg, AT, reg); + } +} + +void MacroAssembler::huswap(Register reg) { + if (UseSW6B) { + revbh(reg, reg); + } else { + srll(reg, 8, AT); + slll(reg, 8, reg); + zapnot(reg, 0x2, reg); + or_ins(reg, AT, reg); + } +} + +// something funny to do this will only one more register AT +// 32 bits +void MacroAssembler::swap(Register reg) { + if (UseSW6B) { + revbw(reg, reg); + addw(reg, 0, reg); + } else { + assert_different_registers(reg, AT); + zapnot(reg, 0xf, reg); + srll(reg, 8, AT); + slll(reg, 24, reg); + or_ins(reg, AT, reg); + srll(AT, 16, AT); + xor_ins(AT, reg, AT); + and_ins(AT, 0xff, AT); + xor_ins(reg, AT, reg); + slll(AT, 16, AT); + xor_ins(reg, AT, reg); + addw(reg, 0x0, reg); + } +} + +void MacroAssembler::bswapw(Register reg) { + swap(reg); +} + +void MacroAssembler::boundary_test(FloatRegister ft, Register res){ + Register tmp1 = AT; + Register tmp2 = GP; + fimovd(ft,tmp1); + slll(tmp1, 0x1, tmp2); + srll(tmp2, 53, tmp2); + ldi(tmp1, 2047, R0); + subl(tmp2, tmp1, res); +} + +void MacroAssembler::set64(Register d, long value) { + // yj todo: check and merge with mov_immediate64 + assert_not_delayed(); + + int32_t lo = (int32_t) (value); + int32_t hi = (int32_t) ((value - lo) >> 32); + + int16_t lo_h16 = (lo - (int16_t)(lo))>>16; + int16_t lo_l16 = (int16_t)(lo); + int16_t hi_h16 = (hi - (int16_t)(hi))>>16; + int16_t hi_l16 = (int16_t)(hi); + + if ( is_simm16(value) ) { + ldi(d, value, R0); + } else if ( hi != 0 ) { + if ( is_simm16(hi) ) { + ldi(d, hi, R0); + } else { + ldih(d, hi_h16, R0); + if (hi_l16 != 0) + ldi(d, hi_l16, d); + } + slll(d, 32, d); + if ( lo != 0 ) { + if ( ((int)lo_h16 == -32768) && ((int)lo_l16 < 0)) { + // original val was in range 0x7FFF8000..0x7FFFFFFF + ldih(d, 0x4000, d); + ldih(d, 0x4000, d); + if (lo_l16 != 0) + ldi(d, lo_l16, d); + } else { + ldih(d, lo_h16, d); + if (lo_l16 != 0) + ldi(d, lo_l16, d); + } + } + } else if ( (hi == 0) && (lo != 0) ) { + if ( ((int)lo_h16 == -32768) && ((int)lo_l16 < 0)) { + // original val was in range 0x7FFF8000..0x7FFFFFFF + /* ldih(d, lo_h16, R0); + * ldi(d, lo_l16, d); + * addw(d, 0, d); */ + ldih(d, 0x4000, R0); + ldih(d, 0x4000, d); + if (lo_l16 != 0) + ldi(d, lo_l16, d); + } else { + ldih(d, lo_h16, R0); + if (lo_l16 != 0) + ldi(d, lo_l16, d); + } + } else { + tty->print_cr("value = 0x%lx", value); + guarantee(false, "Not supported yet in set64!"); + } +} + +void MacroAssembler::push(int32_t imm32) { + assert(imm32==NULL_WORD, "we don't support imm other than 0"); + subl(esp, 8, esp); + stl(R0, 0, esp); +} + +void MacroAssembler::push(Register src) { + subl(esp, 8, esp); + stl(src, 0, esp); +} + +void MacroAssembler::pop(Register dst) { + ldl(dst, 0, esp); + addl(esp, 8, esp); +} + +void MacroAssembler::push2(Register reg1, Register reg2) { + addiu(esp, -16, esp); + stl(reg2, 0, esp); + stl(reg1, 8, esp); +} + +void MacroAssembler::pusha() { + ShouldNotReachHere(); +} + +void MacroAssembler::popa() { + ShouldNotReachHere(); +} + +void MacroAssembler::add(Register Rd, Register Rn, RegisterOrConstant increment) { + ShouldNotReachHere(); +} + +void MacroAssembler::sub(Register Rd, Register Rn, RegisterOrConstant decrement) { + ShouldNotReachHere(); +} + +// this simulates the behaviour of the x86 cmpxchg instruction using a +// load linked/store conditional pair. we use the acquire/release +// versions of these instructions so that we flush pending writes as +// per Java semantics. + +// n.b the x86 version assumes the old value to be compared against is +// in rax and updates rax with the value located in memory if the +// cmpxchg fails. we supply a register for the old value explicitly + +// the sw64 load linked/store conditional instructions do not +// accept an offset. so, unlike x86, we must provide a plain register +// to identify the memory word to be compared/exchanged rather than a +// register+offset Address. + +void MacroAssembler::cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, + Label &succeed, Label *fail) { + ShouldNotReachHere(); +} + +void MacroAssembler::cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp, + Label &succeed, Label *fail) { + ShouldNotReachHere(); +} + +void MacroAssembler::cmpxchgw(Register oldv, Register newv, Register addr, Register tmp, + Label &succeed, Label *fail) { + ShouldNotReachHere(); +} + +#ifndef PRODUCT +extern "C" void findpc(intptr_t x); +#endif + +void MacroAssembler::debug(char* msg) { + if ( ShowMessageBoxOnError ) { + JavaThreadState saved_state = JavaThread::current()->thread_state(); + JavaThread::current()->set_thread_state(_thread_in_vm); + { + // In order to get locks work, we need to fake a in_VM state + ttyLocker ttyl; + ::tty->print_cr("EXECUTION STOPPED: %s\n", msg); + if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { + BytecodeCounter::print(); + } + + } + ThreadStateTransition::transition(JavaThread::current(), _thread_in_vm, saved_state); + } + else { + ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); + assert(false, "DEBUG MESSAGE: %s", msg); + } +} + +void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) +{ + //::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); + if ( ShowMessageBoxOnError ) { + JavaThreadState saved_state = JavaThread::current()->thread_state(); + JavaThread::current()->set_thread_state(_thread_in_vm); +#ifndef PRODUCT + // In order to get locks work, we need to fake a in_VM state + ttyLocker ttyl; + ::tty->print_cr("EXECUTION STOPPED: %s\n", msg); + if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { + BytecodeCounter::print(); + } +#endif + } + fatal("DEBUG MESSAGE: %s", msg); +} + +// The java_calling_convention describes stack locations as ideal slots on +// a frame with no abi restrictions. Si +// nce we must observe abi restrictions +// (like the placement of the register window) the slots must be biased by +// the following value. +int reg2offset_in(VMReg r) { + // Account for saved rbp and return address + // This should really be in_preserve_stack_slots + return (r->reg2stack() + 4) * VMRegImpl::stack_slot_size; +} + +int reg2offset_out(VMReg r) { + return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; +} + +// A long move +void MacroAssembler::long_move(VMRegPair src, VMRegPair dst, Register tmp_reg) { + + // The calling conventions assures us that each VMregpair is either + // all really one physical register or adjacent stack slots. + // This greatly simplifies the cases here compared to sparc. + + if (src.is_single_phys_reg() ) { + if (dst.is_single_phys_reg()) { + if (dst.first() != src.first()) { + movl(dst.first()->as_Register(), src.first()->as_Register()); + } + } else { + assert(dst.is_single_reg(), "not a stack pair"); + stl(src.first()->as_Register(), Address(rsp, reg2offset_out(dst.first()))); + } + } else if (dst.is_single_phys_reg()) { + assert(src.is_single_reg(), "not a stack pair"); + ldl(dst.first()->as_Register(), Address(rbp, reg2offset_out(src.first()))); + } else { + assert(src.is_single_reg() && dst.is_single_reg(), "not stack pairs"); + ldl(tmp_reg, Address(rbp, reg2offset_in(src.first()))); + stl(tmp_reg, Address(rsp, reg2offset_out(dst.first()))); + } +} + +// A double move +void MacroAssembler::double_move(VMRegPair src, VMRegPair dst, Register tmp_reg) { + + // The calling conventions assures us that each VMregpair is either + // all really one physical register or adjacent stack slots. + // This greatly simplifies the cases here compared to sparc. + + if (src.is_single_phys_reg() ) { + if (dst.is_single_phys_reg()) { + // In theory these overlap but the ordering is such that this is likely a nop + if ( src.first() != dst.first()) { + fmovd(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); + } + } else { + assert(dst.is_single_reg(), "not a stack pair"); + store_double(src.first()->as_FloatRegister(), Address(rsp, reg2offset_out(dst.first()))); + } + } else if (dst.is_single_phys_reg()) { + assert(src.is_single_reg(), "not a stack pair"); + load_double(dst.first()->as_FloatRegister(), Address(rbp, reg2offset_out(src.first()))); + } else { + assert(src.is_single_reg() && dst.is_single_reg(), "not stack pairs"); + ldl(tmp_reg, Address(rbp, reg2offset_in(src.first()))); + stl(tmp_reg, Address(rsp, reg2offset_out(dst.first()))); + } +} + +// A float arg may have to do float reg int reg conversion +void MacroAssembler::float_move(VMRegPair src, VMRegPair dst, Register tmp_reg) { + assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move"); + + // The calling conventions assures us that each VMregpair is either + // all really one physical register or adjacent stack slots. + // This greatly simplifies the cases here compared to sparc. + + if (src.first()->is_stack()) { + if (dst.first()->is_stack()) { + ldwu (tmp_reg, Address(rfp, reg2offset_in(src.first())));//TODO:check jzy + stptr(tmp_reg, Address(rsp, reg2offset_out(dst.first()))); + } else { + // stack to reg + assert(dst.first()->is_FloatRegister(), "only expect float registers as parameters"); + load_float(dst.first()->as_FloatRegister(), Address(rfp, reg2offset_in(src.first()))); + } + } else if (dst.first()->is_stack()) { + // reg to stack + assert(src.first()->is_FloatRegister(), "only expect xmm registers as parameters"); + store_float(src.first()->as_FloatRegister(), Address(esp, reg2offset_out(dst.first()))); + } else { + // reg to reg + // In theory these overlap but the ordering is such that this is likely a nop + if ( src.first() != dst.first()) { + fmovs(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); + } + } +} + +// On 64 bit we will store integer like items to the stack as +// 64 bits items (sparc abi) even though java would only store +// 32bits for a parameter. On 32bit it will simply be 32 bits +// So this routine will do 32->32 on 32bit and 32->64 on 64bit +void MacroAssembler::move32_64(VMRegPair src, VMRegPair dst, Register tmp_reg) { + if (src.first()->is_stack()) { + if (dst.first()->is_stack()) { + // stack to stack + ldws(tmp_reg, Address(rbp, reg2offset_in(src.first()))); + stl(tmp_reg, Address(rsp, reg2offset_out(dst.first()))); + } else { + // stack to reg + ldws(dst.first()->as_Register(), Address(rfp, reg2offset_in(src.first()))); + } + } else if (dst.first()->is_stack()) { + // reg to stack + // Do we really have to sign extend??? + // movslq(src.first()->as_Register(), src.first()->as_Register()); + stl(src.first()->as_Register(), Address(rsp, reg2offset_out(dst.first()))); + } else { + // Do we really have to sign extend??? + // movslq(dst.first()->as_Register(), src.first()->as_Register()); + if (dst.first() != src.first()) { + movl(dst.first()->as_Register(), src.first()->as_Register()); + } + } +} + +void MacroAssembler::move_ptr(VMRegPair src, VMRegPair dst, Register tmp_reg) { + if (src.first()->is_stack()) { + if (dst.first()->is_stack()) { + // stack to stack + ldl(tmp_reg, Address(rbp, reg2offset_in(src.first()))); + stl(tmp_reg, Address(rsp, reg2offset_out(dst.first()))); + } else { + // stack to reg + ldl(dst.first()->as_Register(), Address(rbp, reg2offset_in(src.first()))); + } + } else if (dst.first()->is_stack()) { + // reg to stack + stl(src.first()->as_Register(), Address(rsp, reg2offset_out(dst.first()))); + } else { + if (dst.first() != src.first()) { + movl(dst.first()->as_Register(), src.first()->as_Register()); + } + } +} + +// An oop arg. Must pass a handle not the oop itself +void MacroAssembler::object_move(OopMap* map, + int oop_handle_offset, + int framesize_in_slots, + VMRegPair src, + VMRegPair dst, + bool is_receiver, + int* receiver_offset, Register tmp_reg) { + + // must pass a handle. First figure out the location we use as a handle + + Register rHandle = dst.first()->is_stack() ? tmp_reg : dst.first()->as_Register(); + + // See if oop is NULL if it is we need no handle + + if (src.first()->is_stack()) { + + // Oop is already on the stack as an argument + int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots(); + map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots)); + if (is_receiver) { + *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size; + } + + cmpptr(Address(rbp, reg2offset_in(src.first())), R0); + lea(rHandle, Address(rbp, reg2offset_in(src.first()))); + // conditionally move a NULL + ldptr(rscratch3, Address(rbp, reg2offset_in(src.first()))); + cmove(Assembler::equal, rHandle, rscratch3, rHandle); + } else { + + // Oop is in an a register we must store it to the space we reserve + // on the stack for oop_handles and pass a handle if oop is non-NULL + + const Register rOop = src.first()->as_Register(); + int oop_slot; + if (rOop == j_rarg0) + oop_slot = 0; + else if (rOop == j_rarg1) + oop_slot = 1; + else if (rOop == j_rarg2) + oop_slot = 2; + else if (rOop == j_rarg3) + oop_slot = 3; + else if (rOop == j_rarg4) + oop_slot = 4; + else { + assert(rOop == j_rarg5, "wrong register"); + oop_slot = 5; + } + + oop_slot = oop_slot * VMRegImpl::slots_per_word + oop_handle_offset; + int offset = oop_slot*VMRegImpl::stack_slot_size; + + map->set_oop(VMRegImpl::stack2reg(oop_slot)); + // Store oop in handle area, may be NULL + stptr(rOop, Address(rsp, offset)); + if (is_receiver) { + *receiver_offset = offset; + } + + cmpptr(rOop, R0); + lea(rHandle, Address(rsp, offset)); + // conditionally move a NULL from the handle area where it was just stored + ldptr(rscratch3, Address(rsp, offset)); + cmove(Assembler::equal, rHandle, rscratch3, rHandle); + } + + // If arg is on the stack then place it otherwise it is already in correct reg. + if (dst.first()->is_stack()) { + stptr(rHandle, Address(rsp, reg2offset_out(dst.first()))); + } +} + +void MacroAssembler::push_call_clobbered_registers() { + ShouldNotReachHere(); +} + +void MacroAssembler::pop_call_clobbered_registers() { + ShouldNotReachHere(); +} + +Address MacroAssembler::spill_address(int size, int offset, Register tmp) +{ + ShouldNotReachHere(); + Register base = sp; + return Address(base, offset); +} + +// Checks whether offset is aligned. +// Returns true if it is, else false. +bool MacroAssembler::merge_alignment_check(Register base, + size_t size, + long cur_offset, + long prev_offset) const { + ShouldNotReachHere(); + return 0; +} + +// Checks whether current and previous loads/stores can be merged. +// Returns true if it can be merged, else false. +bool MacroAssembler::ldst_can_merge(Register rt, + const Address &adr, + size_t cur_size_in_bytes, + bool is_store) const { + ShouldNotReachHere(); + return 0; +} + +// Merge current load/store with previous load/store into ldp/stp. +void MacroAssembler::merge_ldst(Register rt, + const Address &adr, + size_t cur_size_in_bytes, + bool is_store) { + ShouldNotReachHere(); +} + +/** + * Emits code to update CRC-32 with a 32-bit value according to tables 0 to 3 + * + * @param [in,out]crc Register containing the crc. + * @param [in]v Register containing the 32-bit to fold into the CRC. + * @param [in]table0 Register containing table 0 of crc constants. + * @param [in]table1 Register containing table 1 of crc constants. + * @param [in]table2 Register containing table 2 of crc constants. + * @param [in]table3 Register containing table 3 of crc constants. + * + * uint32_t crc; + * v = crc ^ v + * crc = table3[v&0xff]^table2[(v>>8)&0xff]^table1[(v>>16)&0xff]^table0[v>>24] + * + */ +void MacroAssembler::update_word_crc32(Register crc, Register v, Register tmp, + Register table0, Register table1, Register table2, Register table3, + bool upper) { + ShouldNotReachHere(); +} + +void MacroAssembler::kernel_crc32_using_crc32(Register crc, Register buf, + Register len, Register tmp0, Register tmp1, Register tmp2, + Register tmp3) { + ShouldNotReachHere(); +} + +void MacroAssembler::kernel_crc32c_using_crc32c(Register crc, Register buf, + Register len, Register tmp0, Register tmp1, Register tmp2, + Register tmp3) { + ShouldNotReachHere(); +} + +/** + * @param crc register containing existing CRC (32-bit) + * @param buf register pointing to input byte buffer (byte*) + * @param len register containing number of bytes + * @param table register that will contain address of CRC table + * @param tmp scratch register + */ +void MacroAssembler::kernel_crc32c(Register crc, Register buf, Register len, + Register table0, Register table1, Register table2, Register table3, + Register tmp, Register tmp2, Register tmp3) { + kernel_crc32c_using_crc32c(crc, buf, len, table0, table1, table2, table3); +} + +void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp) { + ShouldNotReachHere(); +} + +Address MacroAssembler::allocate_metadata_address(Metadata* obj) { + ShouldNotReachHere(); + int index = oop_recorder()->allocate_metadata_index(obj); + RelocationHolder rspec = metadata_Relocation::spec(index); + return Address(); +} + +Address MacroAssembler::constant_oop_address(jobject obj) { + ShouldNotReachHere(); + int oop_index = oop_recorder()->find_index(obj); + return Address(); +} + +// Move the address of the polling page into dest. +void MacroAssembler::get_polling_page(Register dest, address page, relocInfo::relocType rtype) { + should_not_reach_here("get_polling_page"); +} + +// Move the address of the polling page into r, then read the polling +// page. +address MacroAssembler::read_polling_page(Register r, address page, relocInfo::relocType rtype) { + get_polling_page(r, page, rtype); + return read_polling_page(r, rtype); +} + +// Read the polling page. The address of the polling page must +// already be in r. +address MacroAssembler::read_polling_page(Register r, relocInfo::relocType rtype) { + should_not_reach_here("read_polling_page"); + return 0; +} + +void MacroAssembler::adrp(Register reg1, const Address &dest, unsigned long &byte_offset) { + should_not_reach_here("adrp"); +} + +void MacroAssembler::load_byte_map_base(Register reg) { + should_not_reach_here("load_byte_map_base"); +} + +void MacroAssembler::build_frame(int framesize) { + should_not_reach_here("build_frame"); +} + +void MacroAssembler::remove_frame(int framesize) { + should_not_reach_here("remove_frame"); +} + +typedef void (MacroAssembler::* chr_insn)(Register Rt, const Address &adr); + +typedef void (MacroAssembler::* chr_insn)(Register Rt, const Address &adr); +typedef void (MacroAssembler::* uxt_insn)(Register Rd, Register Rn); + +// Compare Strings + +// For Strings we're passed the address of the first characters in a1 +// and a2 and the length in cnt1. +// elem_size is the element size in bytes: either 1 or 2. +// There are two implementations. For arrays >= 8 bytes, all +// comparisons (including the final one, which may overlap) are +// performed 8 bytes at a time. For strings < 8 bytes, we compare a +// halfword, then a short, and then a byte. + +void MacroAssembler::string_equals(Register a1, Register a2, + Register result, Register cnt1, int elem_size) +{ + should_not_reach_here("string_equals"); +} + + +// The size of the blocks erased by the zero_blocks stub. We must +// handle anything smaller than this ourselves in zero_words(). +const int MacroAssembler::zero_words_block_size = 8; + +// zero_words() is used by C2 ClearArray patterns. It is as small as +// possible, handling small word counts locally and delegating +// anything larger to the zero_blocks stub. It is expanded many times +// in compiled code, so it is important to keep it short. + +// ptr: Address of a buffer to be zeroed. +// cnt: Count in HeapWords. +// +// ptr, cnt, rscratch1, and rscratch2 are clobbered. +void MacroAssembler::zero_words(Register ptr, Register cnt) +{ + should_not_reach_here("zero_words"); +} + +// base: Address of a buffer to be zeroed, 8 bytes aligned. +// cnt: Immediate count in HeapWords. +#define SmallArraySize (18 * BytesPerLong) +void MacroAssembler::zero_words(Register base, u_int64_t cnt) +{ + should_not_reach_here("zero_words"); +} + +////// Zero blocks of memory by using DC ZVA. +////// +////// Aligns the base address first sufficently for DC ZVA, then uses +////// DC ZVA repeatedly for every full block. cnt is the size to be +////// zeroed in HeapWords. Returns the count of words left to be zeroed +////// in cnt. +////// +////// NOTE: This is intended to be used in the zero_blocks() stub. If +////// you want to use it elsewhere, note that cnt must be >= 2*zva_length. +////void MacroAssembler::zero_dcache_blocks(Register base, Register cnt) { +//// Register tmp = rscratch1; +//// Register tmp2 = rscratch2; +//// int zva_length = VM_Version::zva_length(); +//// Label initial_table_end, loop_zva; +//// Label fini; +//// +//// // Base must be 16 byte aligned. If not just return and let caller handle it +//// tst(base, 0x0f); +//// br(Assembler::NE, fini); +//// // Align base with ZVA length. +//// neg(tmp, base); +//// andr(tmp, tmp, zva_length - 1); +//// +//// // tmp: the number of bytes to be filled to align the base with ZVA length. +//// add(base, base, tmp); +//// sub(cnt, cnt, tmp, Assembler::ASR, 3); +//// adr(tmp2, initial_table_end); +//// sub(tmp2, tmp2, tmp, Assembler::LSR, 2); +//// br(tmp2); +//// +//// for (int i = -zva_length + 16; i < 0; i += 16) +//// stp(zr, zr, Address(base, i)); +//// BIND(initial_table_end); +//// +//// sub(cnt, cnt, zva_length >> 3); +//// BIND(loop_zva); +//// dc(Assembler::ZVA, base); +//// subs(cnt, cnt, zva_length >> 3); +//// add(base, base, zva_length); +//// br(Assembler::GE, loop_zva); +//// add(cnt, cnt, zva_length >> 3); // count not zeroed by DC ZVA +//// BIND(fini); +////} + +// base: Address of a buffer to be filled, 8 bytes aligned. +// cnt: Count in 8-byte unit. +// value: Value to be filled with. +// base will point to the end of the buffer after filling. +void MacroAssembler::fill_words(Register base, Register cnt, Register value) +{ + should_not_reach_here("fill_words"); +} + +void MacroAssembler::safepoint_poll(Label& slow_path, Register thread_reg, Register temp_reg, bool at_return, bool acquire, bool in_nmethod) {SCOPEMARK_NAME(safepoint_poll, this); + // assert(thread_reg == rthread, "should be"); + ldptr(temp_reg, Address(thread_reg, JavaThread::polling_word_offset())); + if (acquire){ + memb(); + } + if (at_return) { + // Note that when in_nmethod is set, the stack pointer is incremented before the poll. Therefore, + // we may safely use rsp instead to perform the stack watermark check. + cmpule(in_nmethod ? rsp : rfp, temp_reg, rcc); + beq_l(rcc, slow_path); + }else{ + testb(temp_reg, exact_log2(SafepointMechanism::poll_bit()), temp_reg); + jcc(Assembler::notZero, slow_path, temp_reg); + } +} + +// Just like safepoint_poll, but use an acquiring load for thread- +// local polling. +// +// We need an acquire here to ensure that any subsequent load of the +// global SafepointSynchronize::_state flag is ordered after this load +// of the local Thread::_polling page. We don't want this poll to +// return false (i.e. not safepointing) and a later poll of the global +// SafepointSynchronize::_state spuriously to return true. +// +// This is to avoid a race when we're in a native->Java transition +// racing the code which wakes up from a safepoint. +// +void MacroAssembler::safepoint_poll_acquire(Label& slow_path) { + should_not_reach_here("safepoint_poll_acquire"); +} + +void MacroAssembler::far_call(Address entry, CodeBuffer *cbuf, Register tmp) { + ShouldNotReachHere(); +} + +void MacroAssembler::far_jump(Address entry, CodeBuffer *cbuf, Register tmp) { + ShouldNotReachHere(); +} + +void MacroAssembler::jr(address entry) { + patchable_jump(entry); +} + +void MacroAssembler::jr(address entry, relocInfo::relocType rtype) { + switch (rtype) { + case relocInfo::runtime_call_type: + case relocInfo::none: + jr(entry); + break; + default: + { + InstructionMark im(this); + relocate(rtype); + patchable_jump(entry); + } + break; + } +} + +void MacroAssembler::patchable_jump(address target) { + if (reachable_from_cache(target)) { + nop(); + nop(); + nop(); + nop(); + beq_a(R0, target); + } else { + if (SafePatch) { + if (offset() % 8 == 0) { + nop(); + br(T12, 2); + emit_int64((long) target); + ldl(T12, 0, T12); + } else { + br(T12, 2); + emit_int64((long) target); + ldl(T12, 0, T12); + nop(); + } + } else { + prepare_patch_li48(T12, (long) target); + } + jmp(T12); + } +} + +void MacroAssembler::call_patch(address entry) { +// c/c++ code assume T12 is entry point, so we just always move entry to t12 +// maybe there is some more graceful method to handle this. FIXME +// For more info, see class NativeCall. + patchable_call(entry); +} + +void MacroAssembler::call_patch(address entry, relocInfo::relocType rtype) { + switch (rtype) { +// case relocInfo::runtime_call_type: +// patchable_call_setfpec1(entry); +// break; + case relocInfo::none: + call_patch(entry); + break; + default: + { + InstructionMark im(this); + relocate(rtype); + call_patch(entry); + } + break; + } +} + +void MacroAssembler::patchable_call(address target, Label *retAddr, Register tmp) { + if (reachable_from_cache(target)) { + nop(); + nop(); + nop(); + nop(); + bsr(RA, (int) (long) target); + } else { + if (SafePatch) { + if (offset() % 8 == 0) { + nop(); + br(T12, 2); + emit_int64((long) target); + ldl(T12, 0, T12); + } else { + br(T12, 2); + emit_int64((long) target); + ldl(T12, 0, T12); + nop(); + } + } else { + prepare_patch_li48(tmp, (long) target); + if (tmp != T12) { + movl(T12, tmp); + } + } + Assembler::call(RA, T12, 0); + if (retAddr) + bind(*retAddr); + if (UseSetfpec) + setfpec1(); + } +} + +//void MacroAssembler::patchable_call_setfpec1(address target) { +// if (reachable_from_cache(target)) { +// nop(); +// nop(); +// nop(); +// nop(); +// bsr(RA, (int)(long)target); +// } else { +//// movptr(T12, (long)target); +// //jalr_setfpec1(T12); +// jmp(T12, rscratch1); +// } +//} + +// Maybe emit a call via a trampoline. If the code cache is small +// trampolines won't be emitted. + +address MacroAssembler::trampoline_call(Address entry, CodeBuffer *cbuf) { + ShouldNotReachHere(); + return 0; +} + + +// Emit a trampoline stub for a call to a target which is too far away. +// +// code sequences: +// +// call-site: +// branch-and-link to or +// +// Related trampoline stub for this call site in the stub section: +// load the call target from the constant pool +// branch (LR still points to the call site above) + +address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset, + address dest) { + ShouldNotReachHere(); + return 0; +} + +void MacroAssembler::emit_static_call_stub() { + ShouldNotReachHere(); +} + +// These two are taken from x86, but they look generally useful + +// scans count pointer sized words at [addr] for occurence of value, +// generic +void MacroAssembler::repne_scan(Register addr, Register value, Register count, + Register scratch) { + ShouldNotReachHere(); +} + +// scans count 4 byte words at [addr] for occurence of value, +// generic +void MacroAssembler::repne_scanw(Register addr, Register value, Register count, + Register scratch) { + ShouldNotReachHere(); +} diff --git a/src/hotspot/cpu/sw64/macroAssembler_sw64.hpp b/src/hotspot/cpu/sw64/macroAssembler_sw64.hpp new file mode 100644 index 00000000000..5e44ac21dce --- /dev/null +++ b/src/hotspot/cpu/sw64/macroAssembler_sw64.hpp @@ -0,0 +1,2113 @@ +/* + * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SW64_VM_MACROASSEMBLER_SW64_HPP +#define CPU_SW64_VM_MACROASSEMBLER_SW64_HPP + +#include "asm/assembler.hpp" +#include "code/vmreg.inline.hpp" +#include "compiler/oopMap.hpp" +#include "utilities/macros.hpp" +#include "runtime/rtmLocking.hpp" +#include "runtime/vm_version.hpp" + +#ifdef PRODUCT +#define SCOPEMARK /* nothing */ +#define SCOPEMARK_NAME(name, masm) /* nothing */ +#else +#define SCOPEMARK \ +char line[200]; sprintf(line,"%s:%d",__FILE__, __LINE__);\ +ScopeMark scopeMark(_masm, line); + +#define SCOPEMARK2 \ +char line[200]; sprintf(line,"%s:%d",__FILE__, __LINE__);\ +ScopeMark scopeMark(this, line); + +#define SCOPEMARK_NAME(name, masm) \ +char line[200]; sprintf(line,"%s:%d",__FILE__, __LINE__);\ +ScopeMark scopeMark(masm, line, #name); + +#endif + +// MacroAssembler extends Assembler by frequently used macros. +// +// Instructions for which a 'better' code sequence exists depending +// on arguments should also go in here. + +class MacroAssembler: public Assembler { + friend class LIR_Assembler; + + public: + using Assembler::offset; + + // Support for VM calls + // + // This is the base routine called by the different versions of call_VM_leaf. The interpreter + // may customize this version by overriding it for its purposes (e.g., to save/restore + // additional registers when doing a VM call). + + virtual void call_VM_leaf_base( + address entry_point, // the entry point + int number_of_arguments // the number of arguments to pop after the call + ); + + //TODO:refactor use this edition to deal with label + virtual void call_VM_leaf_base( + address entry_point, // the entry point + int number_of_arguments, // the number of arguments to pop after the call + Label *retaddr, + Register rscratch = T12 + ); + + protected: + // This is the base routine called by the different versions of call_VM. The interpreter + // may customize this version by overriding it for its purposes (e.g., to save/restore + // additional registers when doing a VM call). + // + // If no java_thread register is specified (noreg) than rthread will be used instead. call_VM_base + // returns the register which contains the thread upon return. If a thread register has been + // specified, the return value will correspond to that register. If no last_java_sp is specified + // (noreg) than rsp will be used instead. + virtual void call_VM_base( // returns the register containing the thread upon return + Register oop_result, // where an oop-result ends up if any; use noreg otherwise + Register java_thread, // the thread if computed before ; use noreg otherwise + Register last_java_sp, // to set up last_Java_frame in stubs; use noreg otherwise + address entry_point, // the entry point + int number_of_arguments, // the number of arguments (w/o thread) to pop after the call + bool check_exceptions // whether to check for pending exceptions after return + ); + + void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions = true); + + public: + MacroAssembler(CodeBuffer* code) : Assembler(code) {} + + // These routines should emit JVMTI PopFrame and ForceEarlyReturn handling code. + // The implementation is only non-empty for the InterpreterMacroAssembler, + // as only the interpreter handles PopFrame and ForceEarlyReturn requests. + virtual void check_and_handle_popframe(Register java_thread); + virtual void check_and_handle_earlyret(Register java_thread); + + Address as_Address(AddressLiteral adr); + Address as_Address(ArrayAddress adr, Register base_reg); + + // Support for NULL-checks + // + // Generates code that causes a NULL OS exception if the content of reg is NULL. + // If the accessed location is M[reg + offset] and the offset is known, provide the + // offset. No explicit code generation is needed if the offset is within a certain + // range (0 <= offset <= page_size). + + virtual void null_check(Register reg, int offset = -1); + static bool needs_explicit_null_check(intptr_t offset); + static bool uses_implicit_null_check(void* address); + + // Required platform-specific helpers for Label::patch_instructions. + // They _shadow_ the declarations in AbstractAssembler, which are undefined. + static int pd_patch_instruction_size(address branch, address target); + static void pd_patch_instruction_aarch(address branch, address target) { + pd_patch_instruction_size(branch, target); + } + static address pd_call_destination(address branch) { + ShouldNotReachHere(); + return 0; + } + int patched_branch(int dest_pos, int inst, int inst_pos); + void pd_patch_instruction(address branch, address target, const char* file=NULL, int line=0) { + jint& stub_inst = *(jint*) branch; + stub_inst = patched_branch(target - branch, stub_inst, 0); + } + +#ifndef PRODUCT + static void pd_print_patched_instruction(address branch); +#endif + + static int patch_oop(address insn_addr, address o); + static int patch_narrow_klass(address insn_addr, narrowKlass n); + + //void li64(Register rd, long imm); + //prepare target address for patcher(li48) + void prepare_patch_li48(Register rd, long imm); + + address emit_trampoline_stub(int insts_call_instruction_offset, address target); + void emit_static_call_stub(); + + void load_unsigned_byte(Register dst, Address src); + void load_unsigned_short(Register dst, Address src); + + void load_signed_byte32(Register rd, Address addr, Register tmp=rcc); + void load_signed_byte64(Register rd, Address addr, Register tmp=rcc); + void load_signed_short(Register rd, Address addr); + + // Support for sign-extension (hi:lo = extend_sign(lo)) + void extend_sign(Register hi, Register lo); + + // Load and store values by size and signed-ness + void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg); + void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg); + + // Support for inc/dec with optimal instruction selection depending on value + + // x86_64 aliases an unqualified register/address increment and + // decrement to call incrementq and decrementq but also supports + // explicitly sized calls to incrementq/decrementq or + // incrementl/decrementl + + // for sw64 the proper convention would be to use + // increment/decrement for 64 bit operatons and + // incrementw/decrementw for 32 bit operations. so when porting + // x86_64 code we can leave calls to increment/decrement as is, + // replace incrementq/decrementq with increment/decrement and + // replace incrementl/decrementl with incrementw/decrementw. + + // n.b. increment/decrement calls with an Address destination will + // need to use a scratch register to load the value to be + // incremented. increment/decrement calls which add or subtract a + // constant value greater than 2^12 will need to use a 2nd scratch + // register to hold the constant. so, a register increment/decrement + // may trash rscratch2 and an address increment/decrement trash + // rscratch and rscratch2 + + void decrement(Register reg, int value = 1){decrementl(reg, value);} + void increment(Register reg, int value = 1){incrementl(reg, value);} + + void decrementw(ExternalAddress dst, int value = 1, Register tmp1 = rscratch1, Register tmp2 = rscratch2); + void decrementw(Address dst, int value = 1, Register tmp = rcc); + void decrementw(Register reg, int value = 1); + + void decrementl(ExternalAddress dst, int value = 1, Register tmp1 = rscratch1, Register tmp2 = rscratch2); + void decrementl(Address dst, int value = 1, Register tmp = rcc); + void decrementl(Register reg, int value = 1); + + void incrementw(AddressLiteral dst, int value = 1, Register tmp1 = rscratch1, Register tmp2 = rscratch2); + void incrementw(Address dst, int value = 1, Register tmp_not_rcc=rscratch1); + void incrementw(Register reg, int value = 1); + + void incrementl(ExternalAddress dst, int value = 1, Register tmp1 = rscratch1, Register tmp2 = rscratch2); + void incrementl(Address dst, int value = 1, Register tmp = rcc); + void incrementl(Register reg, int value = 1); + + + // Alignment + void align(int modulus); + + // Stack frame creation/removal + void enter(); + void leave(); + + // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information) + // The pointer will be loaded into the thread register. + void get_thread(Register thread); + + // Support for argument shuffling + void move32_64(VMRegPair src, VMRegPair dst, Register tmp_reg = rax); + void long_move(VMRegPair src, VMRegPair dst, Register tmp_reg = rax); + void float_move(VMRegPair src, VMRegPair dst, Register tmp_reg = rax); + void double_move(VMRegPair src, VMRegPair dst, Register tmp_reg = rax); + void move_ptr(VMRegPair src, VMRegPair dst, Register tmp_reg = rax); + void object_move(OopMap* map, + int oop_handle_offset, + int framesize_in_slots, + VMRegPair src, + VMRegPair dst, + bool is_receiver, + int* receiver_offset, Register tmp_reg = rax); + + // Support for VM calls + // + // It is imperative that all calls into the VM are handled via the call_VM macros. + // They make sure that the stack linkage is setup correctly. call_VM's correspond + // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points. + + + void call_VM(Register oop_result, + address entry_point, + bool check_exceptions = true); + void call_VM(Register oop_result, + address entry_point, + Register arg_1, + bool check_exceptions = true); + void call_VM(Register oop_result, + address entry_point, + Register arg_1, Register arg_2, + bool check_exceptions = true); + void call_VM(Register oop_result, + address entry_point, + Register arg_1, Register arg_2, Register arg_3, + bool check_exceptions = true); + + // Overloadings with last_Java_sp + void call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + int number_of_arguments = 0, + bool check_exceptions = true); + void call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + Register arg_1, bool + check_exceptions = true); + void call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + Register arg_1, Register arg_2, + bool check_exceptions = true); + void call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + Register arg_1, Register arg_2, Register arg_3, + bool check_exceptions = true); + + void get_vm_result (Register oop_result, Register thread); + void get_vm_result_2(Register metadata_result, Register thread); + +// // These always tightly bind to MacroAssembler::call_VM_base +// // bypassing the virtual implementation +// void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments = 0, bool check_exceptions = true); +// void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions = true); +// void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions = true); +// void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions = true); +// void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4, bool check_exceptions = true); + + void call_VM_leaf0(address entry_point); + void call_VM_leaf(address entry_point, + int number_of_arguments = 0); + void call_VM_leaf(address entry_point, + Register arg_1); + void call_VM_leaf(address entry_point, + Register arg_1, Register arg_2); + void call_VM_leaf(address entry_point, + Register arg_1, Register arg_2, Register arg_3); + + // These always tightly bind to MacroAssembler::call_VM_leaf_base + // bypassing the virtual implementation + void super_call_VM_leaf(address entry_point); + void super_call_VM_leaf(address entry_point, Register arg_1); + void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2); + void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3); + void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4); + + // last Java Frame (fills frame anchor) +// void set_last_Java_frame(Register thread, +// Register last_java_sp, +// Register last_java_fp, +// address last_java_pc); + + // thread in the default location +// void set_last_Java_frame(Register last_java_sp, +// Register last_java_fp, +// address last_java_pc); + + void set_last_Java_frame(Register last_java_sp, + Register last_java_fp, + address last_java_pc, + Register scratch); + + void set_last_Java_frame(Register last_java_sp, + Register last_java_fp, + Label &last_java_pc, + Register scratch, Register scratch2=rscratch2_AT); + + /*void set_last_Java_frame(Register last_java_sp, + Register last_java_fp, + Register last_java_pc, + Register scratch);*/ + + void reset_last_Java_frame(Register thread, bool clear_fp); + +//// void reset_last_Java_frame(Register thread); + + // thread in the default location (rthread) + void reset_last_Java_frame(bool clear_fp); + + // Stores +//// void store_check(Register obj); // store check for obj - register is destroyed afterwards +//// void store_check(Register obj, Address dst); // same as above, dst is exact store location (reg. is destroyed) + + void resolve_jobject(Register value, Register thread, Register tmp); + + // C 'boolean' to Java boolean: x == 0 ? 0 : 1 + void c2bool(Register x); + + // oop manipulations + void load_klass(Register dst, Register src); + void store_klass(Register dst, Register src); + + void access_load_at(BasicType type, DecoratorSet decorators, Register dst, Address src, + Register tmp1, Register tmp_thread); + + void access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register src, + Register tmp1, Register tmp_thread); + + void load_heap_oop(Register dst, Address src, Register tmp1 = noreg, + Register thread_tmp = noreg, DecoratorSet decorators = 0); + void load_heap_oop_not_null(Register dst, Address src, Register tmp1 = noreg, + Register thread_tmp = noreg, DecoratorSet decorators = 0); + void store_heap_oop(Address dst, Register src, Register tmp1 = noreg, + Register tmp_thread = noreg, DecoratorSet decorators = 0); + + // currently unimplemented + // Used for storing NULL. All other oop constants should be + // stored using routines that take a jobject. + void store_heap_oop_null(Address dst); + + void load_prototype_header(Register dst, Register src); + + void store_klass_gap(Register dst, Register src); + + // This dummy is to prevent a call to store_heap_oop from + // converting a zero (like NULL) into a Register by giving + // the compiler two choices it can't resolve + +//// void store_heap_oop(Address dst, void* dummy); + + void encode_heap_oop(Register dst, Register src); + void encode_heap_oop(Register r) { encode_heap_oop(r, r); } + void decode_heap_oop(Register dst, Register src); + void decode_heap_oop(Register r) { decode_heap_oop(r, r); } + void encode_heap_oop_not_null(Register r); + void decode_heap_oop_not_null(Register r); + void encode_heap_oop_not_null(Register dst, Register src); + void decode_heap_oop_not_null(Register dst, Register src); + + void set_narrow_oop(Register dst, jobject obj); + void set_narrow_oop(Address dst, jobject obj); + void cmp_narrow_oop(Register dst, jobject obj, Register ccReg=rcc); + void cmp_narrow_oop(Address dst, jobject obj, Register ccReg=rcc); + + void emit_data(RelocationHolder const& rspec, int format); + void mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec); + + void encode_klass_not_null(Register r); + void decode_klass_not_null(Register r); + void encode_klass_not_null(Register dst, Register src); + void decode_klass_not_null(Register dst, Register src); + void set_narrow_klass(Register dst, Klass* k); + void set_narrow_klass(Address dst, Klass* k); + void cmp_narrow_klass(Register dst, Klass* k, Register ccReg=rcc); + void cmp_narrow_klass(Address dst, Klass* k, Register ccReg=rcc); + + // if heap base register is used - reinit it with the correct value + void reinit_heapbase(); + + DEBUG_ONLY(void verify_heapbase(const char* msg);) + + void push_CPU_state(bool save_vectors = false); + void pop_CPU_state(bool restore_vectors = false) ; + + // Round up to a power of two + void round_to(Register reg, int modulus); + + // allocation + void eden_allocate( + Register thread, // Current thread + Register obj, // result: pointer to object after successful allocation + Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise + int con_size_in_bytes, // object size in bytes if known at compile time + Register t1, // temp register + Label& slow_case // continuation point if fast allocation fails + ); + void tlab_allocate( + Register thread, // Current thread + Register obj, // result: pointer to object after successful allocation + Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise + int con_size_in_bytes, // object size in bytes if known at compile time + Register t1, // temp register + Register t2, // temp register + Label& slow_case // continuation point if fast allocation fails + ); + void zero_memory(Register addr, Register len, Register t1); + + // interface method calling + void lookup_interface_method(Register recv_klass, + Register intf_klass, + RegisterOrConstant itable_index, + Register method_result, + Register scan_temp, + Label& no_such_interface, + bool return_method = true); + + // virtual method calling + // n.b. x86 allows RegisterOrConstant for vtable_index + void lookup_virtual_method(Register recv_klass, + RegisterOrConstant vtable_index, + Register method_result); + + // Test sub_klass against super_klass, with fast and slow paths. + + // The fast path produces a tri-state answer: yes / no / maybe-slow. + // One of the three labels can be NULL, meaning take the fall-through. + // If super_check_offset is -1, the value is loaded up from super_klass. + // No registers are killed, except temp_reg. + void check_klass_subtype_fast_path(Register sub_klass, + Register super_klass, + Register temp_reg, + Label* L_success, + Label* L_failure, + Label* L_slow_path, + RegisterOrConstant super_check_offset = RegisterOrConstant(-1)); + + // The rest of the type check; must be wired to a corresponding fast path. + // It does not repeat the fast path logic, so don't use it standalone. + // The temp_reg and temp2_reg can be noreg, if no temps are available. + // Updates the sub's secondary super cache as necessary. + // If set_cond_codes, condition codes will be Z on success, NZ on failure. + void check_klass_subtype_slow_path(Register sub_klass, + Register super_klass, + Register temp_reg, + Register temp2_reg, + Label* L_success, + Label* L_failure, + bool set_cond_codes = false); + + // Simplified, combined version, good for typical uses. + // Falls through on failure. + void check_klass_subtype(Register sub_klass, + Register super_klass, + Register temp_reg, + Label& L_success); + + void clinit_barrier(Register klass, + Register thread, + Label* L_fast_path = NULL, + Label* L_slow_path = NULL); + + Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0); + + + // Debugging + + // only if +VerifyOops + void _verify_oop(Register reg, const char* s, const char* file, int line); + void _verify_oop_addr(Address addr, const char * s, const char* file, int line); + + void _verify_oop_checked(Register reg, const char* s, const char* file, int line) { + if (VerifyOops) { + _verify_oop(reg, s, file, line); + } + } + + void _verify_oop_addr_checked(Address reg, const char* s, const char* file, int line) { + if (VerifyOops) { + _verify_oop_addr(reg, s, file, line); + } + } + +// TODO: verify method and klass metadata (compare against vptr?) + void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {} + void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){} + +#define verify_oop(reg) _verify_oop_checked(reg, "broken oop " #reg, __FILE__, __LINE__) +#define verify_oop_msg(reg, msg) _verify_oop_checked(reg, "broken oop " #reg ", " #msg, __FILE__, __LINE__) +#define verify_oop_addr(addr) _verify_oop_addr_checked(addr, "broken oop addr " #addr, __FILE__, __LINE__) +#define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__) +#define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__) + + // only if +VerifyFPU + void verify_FPU(int stack_depth, const char* s = "illegal FPU state"); + + // prints msg, dumps registers and stops execution + void stop(const char* msg); + //use for sw debug, need to refactor, like int3 in x86 platform jzy + void debug_stop(const char* msg); + + void int3() { + call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint))); + } +// // prints msg and continues + void warn(const char* msg); + + static void debug(char* msg); + static void debug64(char* msg, int64_t pc, int64_t regs[]); + +// void untested() { stop("untested"); } +// + void unimplemented(const char* what = ""); + + void should_not_reach_here(const char* what="should_not_reach_here") { stop(what); } + + // Stack overflow checking + void bang_stack_with_offset(int offset) { + // stack grows down, caller passes positive offset + assert(offset > 0, "must bang with negative offset"); + if (offset <= 32768) { + stw(R0, -offset, esp); + } else { + mov_immediate64(rscratch2, offset); + subl(esp, rscratch2, rscratch2); + stw(R0, 0, rscratch2); + } + } + + // Writes to stack successive pages until offset reached to check for + // stack overflow + shadow pages. Also, clobbers tmp + void bang_stack_size(Register size, Register tmp); + + // Check for reserved stack access in method being exited (for JIT) + void reserved_stack_check(); + + void safepoint_poll(Label& slow_path, Register thread_reg, Register temp_reg, bool at_return, bool acquire, bool in_nmethod); + void safepoint_poll_acquire(Label& slow_path); + + void verify_tlab(); + + // Biased locking support + // lock_reg and obj_reg must be loaded up with the appropriate values. + // swap_reg is killed. + // tmp_reg must be supplied and must not be rscratch1 or rscratch2 + // Optional slow case is for implementations (interpreter and C1) which branch to + // slow case directly. Leaves condition codes set for C2's Fast_Lock node. + void biased_locking_enter(Register lock_reg, Register obj_reg, + Register swap_reg, Register tmp_reg, + bool swap_reg_contains_mark, + Label& done, Label* slow_case = NULL, + BiasedLockingCounters* counters = NULL); + void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done); + #ifdef COMPILER2 + void atomic_incw(AddressLiteral counter_addr, int inc, Register tmp_reg1); + // Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file. + // See full desription in macroAssembler_x86.cpp. + void fast_lock(Register obj, Register box, Register tmp, + Register scr, Register cx1, Register cx2, + BiasedLockingCounters* counters, + Metadata* method_data, + bool use_rtm, bool profile_rtm); + void fast_unlock(Register obj, Register box, Register tmp, bool use_rtm); +#endif + // Fill primitive arrays + void generate_fill(BasicType t, bool aligned, + Register to, Register value, Register count, + Register rtmp); + +// gpr load store instructions + +#define LDINSNLIST(FUNC) \ + FUNC(ldbu)\ + FUNC(ldhu)\ + FUNC(ldw)\ + FUNC(ldl)\ + FUNC(ldl_u)\ + FUNC(ldi) + +#define LDFROMADDR_DEC(LDX) \ + using Assembler::LDX; \ + void LDX(Register ra, Address addr); + + LDINSNLIST(LDFROMADDR_DEC) + +#undef LDFROMADDR_DEC + +#define STINSNLIST(FUNC) \ + FUNC(stb)\ + FUNC(sth)\ + FUNC(stw)\ + FUNC(stl)\ + FUNC(stl_u) + +#define ST2ADDR_DEC(STX) \ + using Assembler::STX; \ + void STX(Register ra, Address addr, Register tmp=rcc); + + STINSNLIST(ST2ADDR_DEC) + +#undef ST2ADDR_DEC + + void stw(int, Address, Register tmp = rcc); + void stptr(Register rd, Address addr, Register tmp=rcc); + void ldhu_unaligned(Register rd, Address addr, Register tmp=rcc); + void ldhu_unaligned_be(Register rd, Address addr, Register tmp=rcc); + void ldwu(Register rd, Address addr); + void ldws(Register rd, Address addr); + void ldwu(Register rd, AddressLiteral addr); + void ldws(Register rd, AddressLiteral addr); + void ldptr(Register rd, Address addr, Register tmp=rcc); + void ldptr(Register rd, AddressLiteral addr); + + +// float register load store instructions + +#define FLOATINSNLIST(FUNC) \ + FUNC(flds)\ + FUNC(fldd)\ + FUNC(fsts)\ + FUNC(fstd) + +#define ADDR_DEC(FLOATINSN) \ + using Assembler::FLOATINSN; \ + void FLOATINSN(FloatRegister ra, Address addr, Register tmp=rcc); + + FLOATINSNLIST(ADDR_DEC) + +#undef ADDR_DEC + + void load_float(FloatRegister ra, Address src, Register tmp=rcc); + void load_float(FloatRegister rd, AddressLiteral addr, Register tmp=rcc); + void load_double(FloatRegister ra, Address src, Register tmp=rcc); + void load_double(FloatRegister rd, AddressLiteral addr, Register tmp=rcc); + void store_float(FloatRegister ra, Address src, Register tmp=rcc); + void store_double(FloatRegister ra, Address src, Register tmp=rcc); + + void lea(Register rd, Address src); + void lea(Register rd, AddressLiteral addr); + void lea(Address dst, AddressLiteral adr, Register tmp_not_rcc); + +// arithmathic instrunctions + +/** + * x86 + * Assembler::andl/orl/xorl(Register dst, int32_t imm32) + * sw64 + * MacroAssembler::andw/orw/xorw(Register lh, int rh, Register res, Register scratch=rcc) + * note + * we will clear the msb32 of res, so the msb32 of lh is no matter. + */ +#define LOGICINSNLIST(FUNC) \ + FUNC(andw, and_ins)\ + FUNC(orw, bis)\ + FUNC(xorw, xor_ins) + +#define ARITHINSNLIST(FUNC) \ + FUNC(addwu, addw)\ + FUNC(subwu, subw)\ + FUNC(mulwu, mulw) + + /* I introduce scratch reg in NAMEw since it's possible that lh and res could be the same reg */ +#define EXPAND_W(NAME, INSN) \ + void NAME(Register lh, int rh, Register res, Register scratch=rcc){\ + assert_different_registers(lh, scratch);\ + if (rh >=0 && rh < (1<<8)) {\ + INSN(lh, rh, res);\ + }\ + else if (rh >=0 && rh < (1<<15)) {\ + ldi(scratch, rh, R0);\ + INSN(lh, scratch, res);\ + } else {\ + mov_immediate32(scratch, rh);\ + INSN(lh, scratch, res);\ + } \ + }\ + void NAME(Register lh, Register rh, Register res){INSN(lh, rh, res); zapnot(res, 0xf, res); } + + LOGICINSNLIST(EXPAND_W) + ARITHINSNLIST(EXPAND_W) + +#undef EXPAND_W + +#undef LOGICINSNLIST +#undef ARITHINSNLIST + + +#define LOGICINSNLIST(FUNC) \ + FUNC(andptr, and_ins)\ + FUNC(orptr, bis)\ + FUNC(xorptr, xor_ins) + +#define ARITHINSNLIST(FUNC) \ + FUNC(addptr, addl)\ + FUNC(subptr, subl) + + /* I introduce scratch reg in NAMEptr since it's possible that lh and res could be the same reg */ +#define EXPAND_PTR(NAME, INSN) \ + void NAME(Register lh, long rh, Register res, Register scratch=rcc){\ + assert_different_registers(lh, scratch);\ + if (rh >=0 && rh < (1<<8))\ + INSN(lh, rh, res);\ + else if (rh >=0 && rh < (1<<15)) {\ + ldi(scratch, rh, R0);\ + INSN(lh, scratch, res);\ + } else {\ + mov_immediate64(scratch, rh);\ + INSN(lh, scratch, res);\ + }\ + }\ + void NAME(Register lh, Register rh, Register res){INSN(lh, rh, res);} + + LOGICINSNLIST(EXPAND_PTR) + ARITHINSNLIST(EXPAND_PTR) + +#undef EXPAND_PTR + + void notl (Register res) { ornot(R0, res, res); } + void notptr(Register res) { notl(res); } + void addptr(Register rd, Address addr); + void notw(Register rd, Register rs); + void negptr(Register rs) { subl(R0, rs, rs); } + +// compare instructions + + void cmpoop(Register lh, Register rh, Register ccReg=rcc); + + void cmpb(Register lh, int rh, Register ccReg=rcc); + void cmpb(Address addr, int imm8, Register ccReg=rcc); + void cmpab(Address addr, int imm8, Register ccReg=rcc); + void cmpb(AddressLiteral src1, int imm8, Register ccReg=rcc); + + void cmph(Address addr, int imm16, Register ccReg=rcc); + + void cmpw(Register lh, int rh, Register ccReg=rcc); + void cmpw(Register lh, Register rh, Register ccReg=rcc); + void cmpw(Register lh, Address rh, Register ccReg=rcc); + void cmpw(Address lh, Register rh, Register ccReg=rcc); + void cmpw(Address lh, int32_t imm, Register ccReg=rcc, Register tmp=rscratch1); + void cmpw(AddressLiteral src1, int32_t imm, Register ccReg=rcc, Register tmp=rscratch1); + void cmpw(AddressLiteral src1, Register rh, Register ccReg=rcc); + void cmpwu(Register lh, Address rh, Register ccReg=rcc); + void cmpws(int cc, Register op1, Register op2, Register ccReg=rcc); + void cmpls(int cc, Register op1, Register op2, Register ccReg=rcc); + void cmpwu(int cc, Register op1, Register op2, Register ccReg=rcc); + void cmplu(int cc, Register op1, Register op2, Register ccReg=rcc); + void cmpfs(int cc, FloatRegister op1, FloatRegister op2, FloatRegister ccReg=FcmpRES, bool is_order = false); + void cmpfd(int cc, FloatRegister op1, FloatRegister op2, FloatRegister ccReg=FcmpRES, bool is_order = false); + void cmpfcc(int cc, FloatRegister op1, FloatRegister op2); + void cmpdcc(int cc, FloatRegister op1, FloatRegister op2); + + void cmpl(Register lh, int rh, Register ccReg=rcc); + void cmpl(Register lh, Register rh, Register ccReg=rcc); + void cmpl_raw(Register lh, Register rh, Register ccReg=rcc); + void cmpq(Register lh, Register rh, Register ccReg=rcc); + void cmpUL(Register lh, Register rh, Register ccReg); + + address cmp_insn_mark = NULL; + Register cmp_lh ; + Register cmp_rh; + bool cmp_long; + void set_cmp_insn_mark(Register lh, Register rh, bool lcmp=false); + void clear_cmp_insn_mark(); + bool cmp_insn_marked(); + void jccb(Condition cc, Label& L); + + void cmpptr(Register lh, int rh, Register ccReg=rcc); + void cmpptr(Register lh, Register rh, Register ccReg=rcc); + void cmpptr(Register lh, Address rh, Register ccReg=rcc); + void cmpptr(Address lh, Register rh, Register ccReg=rcc); + void cmpptr(Address lh, int32_t rh, Register ccReg=rcc); + void cmpptr(Register lh, AddressLiteral rh, Register ccReg=rcc); + + void cmpxchgptr(Register xreg, AddressLiteral adr, Register creg, Register tmp); + + void jump(AddressLiteral addr, Register tmp=AT); //scw tmp=T12 + void jump(RuntimeAddress addr, Register tmp=AT); //scw tmp=T12 + void jump(ArrayAddress entry, Register tmp1, Register tmp2); + void jump_cc(Condition cc, AddressLiteral dst, Register ccReg=rcc, Register tmp=rscratch1); + + void call(Register entry, Label *retAddr = NULL); + void call(Register entry, address& retAddr); + void call(AddressLiteral addr, Label *retAddr = NULL, Register tmp=T12); //scw tmp=T12 + void call(RuntimeAddress addr, Label *retAddr = NULL, Register tmp=T12); //scw tmp=T12 + // void call(Address addr) { Assembler::call(addr); } + + void jmp(Label& lbl); + void jmp(Address rd, Register tmp=AT); + void jmp(Register rd, Register tmp=AT); + + void jcc(Condition cc, Label& L, Register ccReg=rcc, ConditionLength cl = bitl); +// void jccb(Condition cc, Label& L, Register ccReg=rcc); + + // Helper functions for statistics gathering. + // Unconditional atomic increment. + void atomic_incw(Register counter_addr, Register tmp, Register tmp2); + + void testb(Address addr, int imm8, Register ccReg=rcc); + void testb(Register lh, int rh, Register res=rcc); + void testb(Register lh, Register rh, Register res=rcc); + void testw(Register lh, int rh, Register res=rcc, Register scratch=rcc); + void testw(Register lh, Register rh, Register res=rcc); + void testl(Register lh, long rh, Register res=rcc, Register scratch=rcc); + void testl(Register lh, Register rh, Register ccReg=rcc); + void testptr(Register lh, long rh, Register res=rcc, Register scratch=rcc); + void testptr(Register lh, Register rh, Register ccReg=rcc); + + void inline fmovs(FloatRegister dst, FloatRegister src) { + fcpys(src, src, dst); + } + void inline fmovd(FloatRegister dst, FloatRegister src) { + fcpys(src, src, dst); + } + + // swap the two byte of the low 16-bit halfword + // this directive will use AT, be sure the high 16-bit of reg is zero + void hswap(Register reg); + void huswap(Register reg); + + // convert big endian integer to little endian integer + void swap(Register reg); + void bswapw(Register reg); + + /** + * if c_reg == *dest then *dest <= x_reg, + * else c_reg <= *dest. + * The AT indicate if xchg occurred, 1 for xchged, else 0 + * @param x_reg + * @param dest + * @param c_reg + */ + void cmpxchg(Register x_reg, Address dest, Register c_reg); + void cmpxchg32(Register x_reg, Address dest, Register c_reg); + void fill_to_size(address start, int size); + + /** + * if oldval == *dest then *dest <= newval + * @param oldval + * @param dest + * @param newval + */ + void storeLcon(Register oldval, Address dest, Register newval); + void storeIcon(Register oldval, Address dest, Register newval); + void boundary_test(FloatRegister ft, Register res); + + // test if x is within signed immediate range for nbits + static bool is_uimm(intptr_t x, int nbits) { return intptr_t(0) <= x && x < ( intptr_t(1) << nbits ); } + // test if 0 <= x <= 255 + static bool is_uimm8(intptr_t x) { return is_uimm(x, 8); } + + // Various forms of CAS + + void cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp, + Label &suceed, Label *fail); + void cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, + Label &suceed, Label *fail); + + void cmpxchgw(Register oldv, Register newv, Register addr, Register tmp, + Label &suceed, Label *fail); + + void atomic_add(Register prev, RegisterOrConstant incr, Register addr); + void atomic_addw(Register prev, RegisterOrConstant incr, Register addr); + void atomic_addal(Register prev, RegisterOrConstant incr, Register addr); + void atomic_addalw(Register prev, RegisterOrConstant incr, Register addr); + + void atomic_xchg(Register prev, Register newv, Register addr); + void atomic_xchgw(Register prev, Register newv, Register addr); + void atomic_xchgal(Register prev, Register newv, Register addr); + void atomic_xchgalw(Register prev, Register newv, Register addr); + +public: + // Calls + + address trampoline_call(Address entry, CodeBuffer *cbuf = NULL); + + static bool far_branches() { + ShouldNotReachHere(); + return 0; + } + + // Jumps that can reach anywhere in the code cache. + // Trashes tmp. + void far_call(Address entry, CodeBuffer *cbuf = NULL, Register tmp = rscratch1); + void far_jump(Address entry, CodeBuffer *cbuf = NULL, Register tmp = rscratch1); + + static int far_branch_size() { + ShouldNotReachHere(); + return 0; + } + + // Emit the CompiledIC call idiom + void ic_call(address entry, jint method_index = 0); + +public: + + // Data + + // dst = src1 if rcc match cc, else dst = src2 + void cmove(Condition cc, Register dst, Register src1, Register src2, Register ccReg=rcc); + + void mov_metadata(Register dst, Metadata* obj); + Address allocate_metadata_address(Metadata* obj); + Address constant_oop_address(jobject obj); + + void movoop(Register dst, jobject obj, bool immediate = false); + + // CRC32 code for java.util.zip.CRC32::updateBytes() instrinsic. + void kernel_crc32(Register crc, Register buf, Register len, + Register table0, Register table1, Register table2, Register table3, + Register tmp, Register tmp2, Register tmp3); + // CRC32 code for java.util.zip.CRC32C::updateBytes() instrinsic. + void kernel_crc32c(Register crc, Register buf, Register len, + Register table0, Register table1, Register table2, Register table3, + Register tmp, Register tmp2, Register tmp3); + + // Stack push and pop individual 64 bit registers + void push(Register src); + void push(int32_t imm32); + void pop(Register dst); + void push2(Register reg1, Register reg2); + void push (FloatRegister reg) { subl(esp, 8, esp); fstd(reg, 0, esp); } + void pop (FloatRegister reg) { fldd(reg, 0, esp); addl(esp, 8, esp); } + + // push all registers onto the stack + void pusha(); + void popa(); + + void pushptr(Address src, Register tmp = rcc) { ldptr(rcc, src); push(rcc);} + void repne_scan(Register addr, Register value, Register count, + Register scratch); + void repne_scanw(Register addr, Register value, Register count, + Register scratch); + + void add(Register Rd, Register Rn, RegisterOrConstant increment); + void sub(Register Rd, Register Rn, RegisterOrConstant decrement); + + void adrp(Register reg1, const Address &dest, unsigned long &byte_offset); + + void tableswitch(Register index, jint lowbound, jint highbound, + Label &jumptable, Label &jumptable_end, int stride = 1) { + ShouldNotReachHere(); + } + + // Form an address from base + offset in Rd. Rd may or may not + // actually be used: you must use the Address that is returned. It + // is up to you to ensure that the shift provided matches the size + // of your data. + Address form_address(Register Rd, Register base, long byte_offset, int shift); + + // Return true iff an address is within the 48-bit Sw64 address + // space. + bool is_valid_Sw64_address(address a) { + return ((uint64_t)a >> 48) == 0; + } + + // Load the base of the cardtable byte map into reg. + void load_byte_map_base(Register reg); + + // Prolog generator routines to support switch between x86 code and + // generated ARM code + + // routine to generate an x86 prolog for a stub function which + // bootstraps into the generated ARM code which directly follows the + // stub + // + +public: + + address read_polling_page(Register r, address page, relocInfo::relocType rtype); + address read_polling_page(Register r, relocInfo::relocType rtype); + void get_polling_page(Register dest, address page, relocInfo::relocType rtype); + + // CRC32 code for java.util.zip.CRC32::updateBytes() instrinsic. + void update_byte_crc32(Register crc, Register val, Register table); + void update_word_crc32(Register crc, Register v, Register tmp, + Register table0, Register table1, Register table2, Register table3, + bool upper = false); + + void arrays_equals(Register a1, Register a2, Register result, Register cnt1, + Register tmp1, Register tmp2, Register tmp3, int elem_size); + + void string_equals(Register a1, Register a2, Register result, Register cnt1, + int elem_size); + + void fill_words(Register base, Register cnt, Register value); + void zero_words(Register base, u_int64_t cnt); + void zero_words(Register ptr, Register cnt); +//// void zero_dcache_blocks(Register base, Register cnt); + + static const int zero_words_block_size; + + void byte_array_inflate(Register src, Register dst, Register len, + FloatRegister vtmp1, FloatRegister vtmp2, + FloatRegister vtmp3, Register tmp4); + + void char_array_compress(Register src, Register dst, Register len, + FloatRegister tmp1Reg, FloatRegister tmp2Reg, + FloatRegister tmp3Reg, FloatRegister tmp4Reg, + Register result); + + void encode_iso_array(Register src, Register dst, + Register len, Register result, + FloatRegister Vtmp1, FloatRegister Vtmp2, + FloatRegister Vtmp3, FloatRegister Vtmp4); + + void fast_log(FloatRegister vtmp0, FloatRegister vtmp1, FloatRegister vtmp2, + FloatRegister vtmp3, FloatRegister vtmp4, FloatRegister vtmp5, + FloatRegister tmpC1, FloatRegister tmpC2, FloatRegister tmpC3, + FloatRegister tmpC4, Register tmp1, Register tmp2, + Register tmp3, Register tmp4, Register tmp5); + void generate_dsin_dcos(bool isCos, address npio2_hw, address two_over_pi, + address pio2, address dsin_coef, address dcos_coef); + private: + // begin trigonometric functions support block + void generate__ieee754_rem_pio2(address npio2_hw, address two_over_pi, address pio2); + void generate__kernel_rem_pio2(address two_over_pi, address pio2); + void generate_kernel_sin(FloatRegister x, bool iyIsOne, address dsin_coef); + void generate_kernel_cos(FloatRegister x, address dcos_coef); + // end trigonometric functions support block + void add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo, + Register src1, Register src2); + void add2_with_carry(Register dest_hi, Register dest_lo, Register src1, Register src2) { + add2_with_carry(dest_hi, dest_hi, dest_lo, src1, src2); + } + void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart, + Register y, Register y_idx, Register z, + Register carry, Register product, + Register idx, Register kdx); + void multiply_128_x_128_loop(Register y, Register z, + Register carry, Register carry2, + Register idx, Register jdx, + Register yz_idx1, Register yz_idx2, + Register tmp, Register tmp3, Register tmp4, + Register tmp7, Register product_hi); + void kernel_crc32_using_crc32(Register crc, Register buf, + Register len, Register tmp0, Register tmp1, Register tmp2, + Register tmp3); + void kernel_crc32c_using_crc32c(Register crc, Register buf, + Register len, Register tmp0, Register tmp1, Register tmp2, + Register tmp3); +public: + void multiply_to_len(Register x, Register xlen, Register y, Register ylen, Register z, + Register zlen, Register tmp1, Register tmp2, Register tmp3, + Register tmp4, Register tmp5, Register tmp6, Register tmp7); + void mul_add(Register out, Register in, Register offs, Register len, Register k); + // ISB may be needed because of a safepoint + void maybe_isb() { ShouldNotReachHere();} + +private: + // Returns an address on the stack which is reachable with a ldr/str of size + // Uses rscratch2 if the address is not directly reachable + Address spill_address(int size, int offset, Register tmp=rscratch2); + + bool merge_alignment_check(Register base, size_t size, long cur_offset, long prev_offset) const; + + // Check whether two loads/stores can be merged into ldp/stp. + bool ldst_can_merge(Register rx, const Address &adr, size_t cur_size_in_bytes, bool is_store) const; + + // Merge current load/store with previous load/store into ldp/stp. + void merge_ldst(Register rx, const Address &adr, size_t cur_size_in_bytes, bool is_store); + + // Try to merge two loads/stores into ldp/stp. If success, returns true else false. + bool try_merge_ldst(Register rt, const Address &adr, size_t cur_size_in_bytes, bool is_store); + +public: + + // True if an XOR can be used to expand narrow klass references. + bool use_XOR_for_compressed_class_base; + + +// void andw(Register lh, int rh, Register res, Register scratch=rcc); +// void andw(Register lh, Register rh, Register res); +// void andptr(Register lh, long rh, Register res, Register scratch=rcc); +// void andptr(Register lh, Register rh, Register res); + + + void addiu32(Register rs, int imm, Register rt, Register cc = GP) { + if (imm >= 0 && is_uimm8(imm)) { + addw(rs, imm, rt); + } else if (imm < 0 && is_uimm8(-imm)) { + subw(rs, -imm, rt); + } else { + ldi(cc, imm, R0); + addw(rs, cc, rt); + } + } + + void addiu(Register rs, int imm, Register rt, Register cc = GP) { + if (imm >= 0 && is_uimm8(imm)) { + addl(rs, imm, rt); + } else if (imm < 0 && is_uimm8(-imm)) { + subl(rs, -imm, rt); + } else { + ldi(cc, imm, R0); + addl(rs, cc, rt); + } + } + + void ori(Register rs, int imm, Register rt, Register cc = GP) { + if (is_uimm8(imm)) { + bis(rs, imm, rt); + } else { + ldi(cc, imm, R0); + bis(rs, cc, rt); + } + } + + void andi(Register rs, int imm, Register rt, Register cc = GP) { + if (is_uimm8(imm)) { + and_ins(rs, imm, rt); + } else { + ldi(cc, imm, R0); + and_ins(rs, cc, rt); + } + } + + void idiv_sw(Register rs, Register rt, Register rd){ + if(rt == R0){ + ShouldNotReachHere(); + }else{ + FloatRegister fsrc1 = f22; + FloatRegister fsrc2 = f23; + FloatRegister fdest = f24; + ifmovd(rs, fsrc1); + ifmovd(rt, fsrc2); + fcvtld(fsrc1, fsrc1); + fcvtld(fsrc2, fsrc2); + fdivd(fsrc1, fsrc2, fdest); + fcvtdl_z(fdest, fdest); + fcvtlw(fdest, fsrc1); + fimovs(fsrc1, rd); + } + } + + void irem_sw(Register rs, Register rt, Register rd){ + if(rt == R0){ + ShouldNotReachHere(); + }else{ + FloatRegister fsrc1 = f22; + FloatRegister fsrc2 = f23; + FloatRegister fdest = f24; + Register tem1 = rscratch3; + Register tem2 = rscratch4; + ifmovd(rs, fsrc1); + ifmovd(rt, fsrc2); + fcvtld(fsrc1, fsrc1); + fcvtld(fsrc2, fsrc2); + fdivd(fsrc1, fsrc2, fdest); + fcvtdl_z(fdest, fdest); + fimovd(fdest, tem1); + mulw(tem1, rt, tem2); + subw(rs, tem2, rd); + } + } + + void ldiv_sw(Register rs, Register rt, Register rd){ + if(rt == R0){ + ShouldNotReachHere(); + }else{ + FloatRegister fsrc1 = f22; + FloatRegister fsrc2 = f23; + FloatRegister fdest = f24; + ifmovd(rs, fsrc1); + ifmovd(rt, fsrc2); + fcvtld(fsrc1, fsrc1); + fcvtld(fsrc2, fsrc2); + fdivd(fsrc1, fsrc2, fdest); + fcvtdl_z(fdest, fdest); + fimovd(fdest, rd); + } + } + + void lrem_sw(Register rs, Register rt, Register rd){ + if(rt == R0){ + ShouldNotReachHere(); + }else{ + FloatRegister fsrc1 = f22; + FloatRegister fsrc2 = f23; + FloatRegister fdest = f24; + Register tem1 = rscratch3; + Register tem2 = rscratch4; + ifmovd(rs, fsrc1); + ifmovd(rt, fsrc2); + fcvtld(fsrc1, fsrc1); + fcvtld(fsrc2, fsrc2); + fdivd(fsrc1, fsrc2, fdest); + fcvtdl_z(fdest, fdest); + fimovd(fdest, tem1); + mull(tem1, rt, tem2); + subl(rs, tem2, rd); + } + } + + void add_s(FloatRegister fd, FloatRegister fs, FloatRegister ft) { + if (FRegisterConflict) { + assert_different_registers(fs, f28); + assert_different_registers(ft, f28); + if (fs == ft && ft == fd){ + fadds(fs, ft, f28); + fcpys(f28, f28, fd); + }else if (fs == fd){ + fcpys(fs, fs, f28); + fadds(f28, ft, fd); + }else if (ft == fd){ + fcpys(ft, ft, f28); + fadds(fs, f28, fd); + }else{ + fadds(fs, ft, fd); + } + } else + fadds(fs, ft, fd); + } + + void sub_s(FloatRegister fd, FloatRegister fs, FloatRegister ft) { + if (FRegisterConflict) { + assert_different_registers(fs, f28); + assert_different_registers(ft, f28); + if (fs == ft && ft == fd){ + fsubs(fs, ft, f28); + fcpys(f28, f28, fd); + }else if (fs == fd){ + fcpys(fs, fs, f28); + fsubs(f28, ft, fd); + }else if (ft == fd){ + fcpys(ft, ft, f28); + fsubs(fs, f28, fd); + }else{ + fsubs(fs, ft, fd); + } + } else + fsubs(fs, ft, fd); + } + + void mul_s(FloatRegister fd, FloatRegister fs, FloatRegister ft) { + if (FRegisterConflict) { + assert_different_registers(fs, f28); + assert_different_registers(ft, f28); + if (fs == ft && ft == fd){ + fmuls(fs, ft, f28); + fcpys(f28, f28, fd); + }else if (fs == fd){ + fcpys(fs, fs, f28); + fmuls(f28, ft, fd); + }else if (ft == fd){ + fcpys(ft, ft, f28); + fmuls(fs, f28, fd); + }else{ + fmuls(fs, ft, fd); + } + } else + fmuls(fs, ft, fd); + } + + void div_s(FloatRegister fd, FloatRegister fs, FloatRegister ft) { + if (FRegisterConflict) { + assert_different_registers(fs, f28); + assert_different_registers(ft, f28); + if (fs == ft && ft == fd){ + fdivs(fs, ft, f28); + fcpys(f28, f28, fd); + }else if (fs == fd){ + fcpys(fs, fs, f28); + fdivs(f28, ft, fd); + }else if (ft == fd){ + fcpys(ft, ft, f28); + fdivs(fs, f28, fd); + }else{ + fdivs(fs, ft, fd); + } + } else + fdivs(fs, ft, fd); + } + + void add_d(FloatRegister fd, FloatRegister fs, FloatRegister ft) { + if (FRegisterConflict) { + assert_different_registers(fs, f28); + assert_different_registers(ft, f28); + if (fs == ft && ft == fd){ + faddd(fs, ft, f28); + fcpys(f28, f28, fd); + }else if (fs == fd){ + fcpys(fs, fs, f28); + faddd(f28, ft, fd); + }else if (ft == fd){ + fcpys(ft, ft, f28); + faddd(fs, f28, fd); + }else{ + faddd(fs, ft, fd); + } + } else + faddd(fs, ft, fd); + } + + void sub_d(FloatRegister fd, FloatRegister fs, FloatRegister ft) { + if (FRegisterConflict) { + assert_different_registers(fs, f28); + assert_different_registers(ft, f28); + if (fs == ft && ft == fd){ + fsubd(fs, ft, f28); + fcpys(f28, f28, fd); + }else if (fs == fd){ + fcpys(fs, fs, f28); + fsubd(f28, ft, fd); + }else if (ft == fd){ + fcpys(ft, ft, f28); + fsubd(fs, f28, fd); + }else{ + fsubd(fs, ft, fd); + } + } else + fsubd(fs, ft, fd); + } + + void mul_d(FloatRegister fd, FloatRegister fs, FloatRegister ft) { + if (FRegisterConflict) { + assert_different_registers(fs, f28); + assert_different_registers(ft, f28); + if (fs == ft && ft == fd){ + fmuld(fs, ft, f28); + fcpys(f28, f28, fd); + }else if (fs == fd){ + fcpys(fs, fs, f28); + fmuld(f28, ft, fd); + }else if (ft == fd){ + fcpys(ft, ft, f28); + fmuld(fs, f28, fd); + }else{ + fmuld(fs, ft, fd); + } + } else + fmuld(fs, ft, fd); + } + + void div_d(FloatRegister fd, FloatRegister fs, FloatRegister ft) { + if (FRegisterConflict) { + assert_different_registers(fs, f28); + assert_different_registers(ft, f28); + if (fs == ft && ft == fd){ + fdivd(fs, ft, f28); + fcpys(f28, f28, fd); + }else if (fs == fd){ + fcpys(fs, fs, f28); + fdivd(f28, ft, fd); + }else if (ft == fd){ + fcpys(ft, ft, f28); + fdivd(fs, f28, fd); + }else{ + fdivd(fs, ft, fd); + } + } else + fdivd(fs, ft, fd); + } + + void sqrt_s(FloatRegister fd, FloatRegister fs) { + if (FRegisterConflict) { + if(fs == fd){ + mov_s(f28, fs); + fsqrts(f28, fd); + } else + fsqrts(fs, fd); + } else + fsqrts(fs, fd); + } + + void sqrt_d(FloatRegister fd, FloatRegister fs) { + if (FRegisterConflict) { + if (fs == fd) { + mov_d(f28, fs); + fsqrtd(f28,fd); + } else + fsqrtd(fs, fd); + } else + fsqrtd(fs, fd); + } + + void cvt_s_l(FloatRegister fd, FloatRegister fs) { + if (FRegisterConflict) { + assert_different_registers(fs, f28); + assert_different_registers(fd, f28); + if (fs == fd){ + fcpys(fs, fs, f28); + fcvtls(f28, fd); + }else{ + fcvtls(fs, fd); + } + } else + fcvtls(fs, fd); + } + + void cvt_d_l(FloatRegister fd, FloatRegister fs) { + if (FRegisterConflict) { + assert_different_registers(fs, f28); + assert_different_registers(fd, f28); + if (fs == fd){ + fcpys(fs, fs, f28); + fcvtld(f28, fd); + }else{ + fcvtld(fs, fd); + } + } else + fcvtld(fs, fd); + } + + void cvt_d_s(FloatRegister fd, FloatRegister fs) { + if (FRegisterConflict) { + assert_different_registers(fs, f28); + assert_different_registers(fd, f28); + if (fs == fd){ + fcpys(fs, fs, f28); + fcvtsd(f28, fd); + }else{ + fcvtsd(fs, fd); + } + } else + fcvtsd(fs, fd); + } + + void cvt_s_d(FloatRegister fd, FloatRegister fs) { + if (FRegisterConflict) { + assert_different_registers(fs, f28); + assert_different_registers(fd, f28); + if (fs == fd){ + fcpys(fs, fs, f28); + fcvtds(f28, fd); + } else + fcvtds(fs, fd); + } else + fcvtds(fs, fd); + } + + void c_un_s (FloatRegister fs, FloatRegister ft) { + assert_different_registers(fs, FcmpRES); + assert_different_registers(ft, FcmpRES); + fcmpun(fs, ft, FcmpRES); + } + void c_eq_s (FloatRegister fs, FloatRegister ft) { + assert_different_registers(fs, FcmpRES); + assert_different_registers(ft, FcmpRES); + fcmpeq(fs, ft, FcmpRES); + } + void c_ueq_s (FloatRegister fs, FloatRegister ft) { + assert_different_registers(fs, FcmpRES); + assert_different_registers(ft, FcmpRES); + fcmpun(fs, ft, FcmpRES); + fbne(FcmpRES, 1); + fcmpeq(fs, ft, FcmpRES); + } + + void c_ult_s (FloatRegister fs, FloatRegister ft) { + assert_different_registers(fs, FcmpRES); + assert_different_registers(ft, FcmpRES); + fcmpun(fs, ft, FcmpRES); + fbne(FcmpRES, 1); + fcmplt(fs, ft, FcmpRES); + } + + void c_olt_s (FloatRegister fs, FloatRegister ft) { + assert_different_registers(fs, f28, FcmpRES); + assert_different_registers(ft, f28, FcmpRES); + fcmpun(fs, ft, f28); + fcmpeq(f28, f31, FcmpRES); + fbeq(FcmpRES, 1); + fcmplt(fs, ft, FcmpRES); + } + + void c_ult_d (FloatRegister fs, FloatRegister ft) { + assert_different_registers(fs, FcmpRES); + assert_different_registers(ft, FcmpRES); + fcmpun(fs, ft, FcmpRES); + fbne(FcmpRES, 1); + fcmplt(fs, ft, FcmpRES); + } + + void c_olt_d (FloatRegister fs, FloatRegister ft) { + assert_different_registers(fs, f28, FcmpRES); + assert_different_registers(ft, f28, FcmpRES); + fcmpun(fs, ft, f28); + fcmpeq(f28, f31, FcmpRES); + fbeq(FcmpRES, 1); + fcmplt(fs, ft, FcmpRES); + } + + void c_ole_s (FloatRegister fs, FloatRegister ft) { + assert_different_registers(fs, f28, FcmpRES); + assert_different_registers(ft, f28, FcmpRES); + fcmpun(fs, ft, f28); + fcmpeq(f28, f31, FcmpRES); + fbeq(FcmpRES, 1); + fcmple(fs, ft, FcmpRES); + } + + void c_ule_s (FloatRegister fs, FloatRegister ft) { + assert_different_registers(fs, FcmpRES); + assert_different_registers(ft, FcmpRES); + fcmpun(fs, ft, FcmpRES); + fbne(FcmpRES, 1); + fcmple(fs, ft, FcmpRES); + } + + void c_un_d (FloatRegister fs, FloatRegister ft) { + assert_different_registers(fs, FcmpRES); + assert_different_registers(ft, FcmpRES); + fcmpun(fs, ft, FcmpRES); + } + void c_eq_d (FloatRegister fs, FloatRegister ft) { + assert_different_registers(fs, FcmpRES); + assert_different_registers(ft, FcmpRES); + fcmpeq(fs, ft, FcmpRES); + } + void c_ueq_d (FloatRegister fs, FloatRegister ft) { + assert_different_registers(fs, FcmpRES); + assert_different_registers(ft, FcmpRES); + fcmpun(fs, ft, FcmpRES); + fbne(FcmpRES, 1); + fcmpeq(fs, ft, FcmpRES); + } + + void c_ole_d (FloatRegister fs, FloatRegister ft) { + assert_different_registers(fs, f28, FcmpRES); + assert_different_registers(ft, f28, FcmpRES); + fcmpun(fs, ft, f28); + fcmpeq(f28, f31, FcmpRES); + fbeq(FcmpRES, 1); + fcmple(fs, ft, FcmpRES); + } + + void c_ule_d (FloatRegister fs, FloatRegister ft) { + assert_different_registers(fs, FcmpRES); + assert_different_registers(ft, FcmpRES); + fcmpun(fs, ft, FcmpRES); + fbne(FcmpRES, 1); + fcmple(fs, ft, FcmpRES); + } + + // Frame creation and destruction shared between JITs. + void build_frame(int framesize); + void remove_frame(int framesize); + +public: +// void mov(Register dst, Address a); + void mov_immediate64(Register dst, u_int64_t imm64, RelocationHolder const& rspec, int format = 0); + void mov_address64(Register dst, u_int64_t imm64, RelocationHolder const& rspec, int format = 0); + void mov_immediate64(Register dst, u_int64_t imm64); + void mov_immediate32(Register dst, int imm32); + void mov_immediate32u(Register dst, int imm32); + void mov_immediate32s(Register dst, int imm32); + void set64(Register d, long value); + void push_RA_call(Register entry, Register tmp=T12) { + if (entry != tmp) movl(tmp, entry); + + br(RA, 0); + addl(RA, 4 * BytesPerInt, RA); + subl(rsp, wordSize, rsp); + stl(RA, 0, rsp); + Assembler::call(RA, tmp, (int)0); + addl(rsp, wordSize, rsp); + if(UseSetfpec) + setfpec1(); + } + + static void imm48_split(long imm48, int16_t &msb_l, int16_t &lsb_h, int16_t &lsb_l) { + int32_t lsb32 = (int32_t) ((intptr_t) imm48); + int32_t msb32 = (int32_t) (((intptr_t) imm48 - lsb32) >> 32); + + msb_l = (int16_t) msb32; + lsb_h = (lsb32 - (int16_t) lsb32) >> 16; + lsb_l = (int16_t) lsb32; + guarantee((msb_l >= 0x0 && msb_l < 0x7fff) || (msb_l == 0x7fff && lsb32 >= 0x0 && lsb32 < 0x7fff8000), "wrong number in li48 "); + if (lsb32 >= 0x7fff8000) + msb_l = msb_l + 1; + } + +// void push(RegSet regs, Register stack) { if (regs.bits()) push(regs.bits(), stack); } +// void pop(RegSet regs, Register stack) { if (regs.bits()) pop(regs.bits(), stack); } + + // Push and pop everything that might be clobbered by a native + // runtime call except rscratch1 and rscratch2. (They are always + // scratch, so we don't have to protect them.) Only save the lower + // 64 bits of each vector register. + void push_call_clobbered_registers(); + void pop_call_clobbered_registers(); + + // Helper functions for statistics gathering. + void atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2); + + // now mov instructions for loading absolute addresses and 32 or + // 64 bit integers + + inline void mov(Register dst, address addr) + { + mov_immediate64(dst, (u_int64_t)addr); + } + + inline void mov(Register dst, u_int64_t imm64) + { + mov_immediate64(dst, imm64); + } + + inline void movws(Register dst, u_int32_t imm32) + { + mov_immediate32(dst, imm32); + movws(dst, dst); + } + + /** + * x86 + * movslq(Register dst, Register src) + * sw64 + * movws(Register dst, Register src) + * note + * sign extend 32bit to 64bit + */ + inline void movws(Register dst, Register src) + { + addw(src, R0, dst); + } + + inline void movws(Register dst, Address src) + { ShouldNotReachHere(); + ldw(dst, src); + } + + inline void movl(Register dst, u_int64_t imm64) + { + mov_immediate64(dst, imm64); + } + + inline void movws(Register dst, int32_t imm32) + { + mov_immediate32s(dst, imm32); + } + + inline void movwu(Register dst, u_int32_t imm32) + { + mov_immediate32u(dst, imm32); + } + + inline void movw(Register dst, u_int32_t imm32) + { + mov_immediate32(dst, imm32); + } + + inline void movw(Register dst, Register src) + { + zapnot(src, 0xf, dst); + } + + inline void movwu(Register dst, Register src) + { + zapnot(src, 0xf, dst); + } + + inline void movw(Register dst, ExternalAddress addr, Register tmp=rcc) + { + mov_immediate64(tmp, (intptr_t)addr.target(), addr.rspec()); + zapnot(tmp, 0xf, dst); + } + + inline void movw(ExternalAddress addr, Register src, Register tmp=rcc) + { + mov_immediate64(tmp, (intptr_t)addr.target(), addr.rspec()); + stw(src, Address(tmp, 0)); + } + + inline void mov(Register dst, long l) + { + mov(dst, (u_int64_t)l); + } + + inline void mov(Register dst, int i) + { + mov(dst, (long)i); + } + + void mov(Register dst, RegisterOrConstant src) { + ShouldNotReachHere(); + } + + +public: + + // Can we reach target using jal/j from anywhere + // in the code cache (because code can be relocated)? + bool reachable_from_cache(address target) { + return false; + } + // Argument ops + inline void store_int_argument(Register s, Argument &a) {ShouldNotReachHere(); + if(a.is_Register()) { + move(a.as_Register(), s); + } else { + sw(s, a.as_caller_address()); + } + } + + void sign_extend_short(Register reg) { sexth(reg, reg); } + void sign_extend_byte (Register reg) { sextb(reg, reg); } + + void trigfunc(char trig, int num_fpu_regs_in_use = 1); + + void subu32(Register rd, Register rs, Register rt) { subw(rs, rt, rd); } + void dsub (Register rd, Register rs, Register rt) { subl(rs, rt, rd); } + void addu32(Register rd, Register rs, Register rt) { addw(rs, rt, rd); } + void daddu (Register rd, Register rs, Register rt) { addl(rs, rt, rd); } + void dadd (Register rd, Register rs, Register rt) { addl(rs, rt, rd); } + //move should replace with movl jzy + void move(Register rd, Register rs) { movl(rd, rs); } + void movl(Register rd, Register rs) { if (rs != rd) bis(R0, rs, rd); } + void stbool(bool boolconst, Address dst, Register tmp = rscratch1) { + ldi(tmp, (int) boolconst, R0); + if(sizeof(bool) == 1) + stb(tmp, dst); + else if(sizeof(bool) == 2) + sth(tmp, dst); + else if(sizeof(bool) == 4) + stw(tmp, dst); + else + ShouldNotReachHere(); + } + + void mov_s(FloatRegister fd, FloatRegister fs) { fcpys(fs, fs, fd); } + void mov_d(FloatRegister fd, FloatRegister fs) { fcpys(fs, fs, fd); } + void abs_d(FloatRegister fd, FloatRegister fs) { fcpys(f31, fs, fd); } + + void brk (int code) { sys_call(0x80); } + + void dsll(Register rd, Register rt , int sa) { slll(rt, sa, rd); } + void dsrl(Register rd, Register rt , int sa) { srll(rt, sa, rd); } + void sll (Register rt, int sa, Register rd) { slll(rt, sa&0x1f, rd); addw(rd, 0, rd); } + void sllv(Register rd, Register rt, Register rs, Register cc = GP) { and_ins(rs, 0x1f, cc); slll(rt, cc, rd); addw(rd, 0, rd); } + void sra (Register rd, Register rt, int sa) { addw(rt, 0, rd); sral(rt, sa&0x1f, rd); } + void srav(Register rd, Register rt, Register rs, Register cc = GP) { and_ins(rs, 0x1f, cc); addw(rt, 0, rd); sral(rd, cc, rd); } + void srlv(Register rd, Register rt, Register rs, Register cc = GP) { and_ins(rs, 0x1f, cc); zapnot(rt, 0xf, rd); srll(rd, cc, rd); addw(rd, 0x0, rd); } + + void lbu (Register rt, Address src) { ldbu(rt, src.disp(), src.base()); } +// void lb (Register rt, Address src) { lb(rt, src.disp(), src.base()); } +// void lb (Register rt, int off, Register base) { ldbu(rt, off, base); sextb(rt, rt); } +// void lh (Register rt, Address src) { ldh(rt, src.disp(), src.base()); } +// void ldh (Register rt, int off, Register base) { ldhu(rt, Address(base, off)); sexth(rt, rt); } + void lhu (Register rt, Address src) { ldhu(rt, src); } +// void lhu (Register rt, Register base, int off) { ldhu(rt, Address(base, off)); } + void lw (Register rt, Address src) { ldw(rt, src.disp(), src.base()); } +// void ldwu(Register rt, int off, Register base) { ldw(rt, off, base); zapnot(rt, 0xF, rt); } + void ld (Register rt, Address src) { ldl(rt, src.disp(), src.base()); } + void sb (Register rt, Address dst) { stb(rt, dst.disp(), dst.base()); } + void sb (Register rt, Register base, int off) { stb(rt, off, base); } + + // ld_ptr will perform lw for 32 bit VMs and ld for 64 bit VMs + inline void ld_ptr(Register rt, Address a){ + ldl(rt, a.disp(), a.base()); + } + inline void st_ptr(Register rt, Address a){ + stl(rt, a.disp(), a.base()); + } + +// void lwc1(FloatRegister rt, Address src) { lwc1(rt, src.base(), src.disp()); } +// void lwc1(FloatRegister ft, Register base, int off) { flds(ft, off, base); } +// void ldc1(FloatRegister rt, Address src) { ldc1(rt, src.base(), src.disp()); } +// void ldc1(FloatRegister ft, Register base, int off) { fldd(ft, off, base); } + void lw (Register rt, Register base, int off) { ldw(rt, off, base); } + void ld (Register rt, Register base, int off) { ldl(rt, off, base); } +// void swc1(FloatRegister ft, Register base, int off) { fsts(ft, off, base); } +// void sdc1(FloatRegister ft, Register base, int off) { fstd(ft, off, base); } + void sw (Register rt, Register base, int off) { stw(rt, off, base); } + void sd (Register rt, Register base, int off) { stl(rt, off, base); } + +// void fflds(FloatRegister rt, Address src) { flds(rt, src.disp(), src.base()); } +// void ffldd(FloatRegister rt, Address src) { fldd(rt, src.disp(), src.base()); } +// void ffsts(FloatRegister rt, Address dst) { fsts(rt, dst.disp(), dst.base()); } +// void ffstd(FloatRegister rt, Address dst) { fstd(rt, dst.disp(), dst.base()); } + + void sw(Register rt, Address dst) { + Register src = rt; + Register base = dst.base(); + int disp = dst.disp(); + + if( Assembler::is_simm16(disp) ) { + stw(src, disp, base); + } else { + mov_immediate32(AT, disp); + addl(base, AT, AT); + stw(src, 0, AT); + } + } + + void std(Register rt, Address dst) { + Register src = rt; + Register base = dst.base(); + int disp = dst.disp(); + + if(is_simm16(disp)) { + stl(src, disp, base); + } else { + mov_immediate32(AT, disp); + addl(base, AT, AT); + stl(src, 0, AT); + } + } + + void empty_FPU_stack(){/*need implemented*/}; + + inline void store_ptr_argument(Register s, Argument &a) {ShouldNotReachHere(); + if(a.is_Register()) { + move(a.as_Register(), s); + } else { + st_ptr(s, a.as_caller_address()); + } + } + + inline void store_float_argument(FloatRegister s, Argument &a) {ShouldNotReachHere(); + if(a.is_Register()) { + fcpys(s, s, a.as_FloatRegister()); + } else { + fsts(s, a.as_caller_address()); + } + } + + inline void store_double_argument(FloatRegister s, Argument &a) {ShouldNotReachHere(); + if(a.is_Register()) { + fcpys(s, s, a.as_FloatRegister()); + } else { + fstd(s, a.as_caller_address()); + } + } + + void load( int width, Register ra, int mdisp, Register rb ){ + if(width == 0) ldbu( ra, mdisp, rb ); + else if(width == 1) ldhu( ra, mdisp, rb ); + else if(width == 2) ldw( ra, mdisp, rb ); + else ldl( ra, mdisp, rb ); + } + + void store( int width, Register ra, int mdisp, Register rb ){ + if(width == 0) stb( ra, mdisp, rb ); + else if(width == 1) sth( ra, mdisp, rb ); + else if(width == 2) stw( ra, mdisp, rb ); + else stl( ra, mdisp, rb ); + } + + //get the offset field of jump/branch instruction + //sw64 for condition branch instruction the disp is 21 bits + int offset(address entry) { + assert(is_simm21((entry - pc() - 4) / 4), "change this code"); + if (!is_simm21((entry - pc() - 4) / 4)) { + tty->print_cr("!!! is_simm21: %x", (unsigned int)((entry - pc() - 4) / 4)); + } + return (entry - pc() - 4) / 4; + } + + /** + * oop_maps->add_gc_map use offset to compute map + * but sw should put setfpec1 after call where will call gcc's code in, + * so sw should not use default offset method + * + * lbl is label which use to calculate return address + * offset is codebuffer's offset. + */ + int offset(Label &lbl, address start) { + assert(lbl.is_bound(), "need bound"); + int off = lbl.loc_pos() - (start - code_section()->start()); + return off; + } + + + void beq_a (Register a, address entry) { beq(a, offset(entry)); } + void beq_l (Register a, Label& L) { beq(a, offset(target(L))); } + void beq_c (Register rs, Register rt, Label& L, Register cc = GP) { + if ( rt == R0 ) { + beq(rs, offset(target(L))); + } else if (rs == R0) { + beq(rt, offset(target(L))); + } else { + cmpeq(rs, rt, cc); + bne(cc, offset(target(L))); + } + } + void bne_l (Register a, Label& L) { bne(a, offset(target(L))); } + void bne_c (Register rs, Register rt, Label& L, Register cc = GP) { + if ( rt == R0 ) { + bne(rs, offset(target(L))); + } else if (rs == R0) { + bne(rt, offset(target(L))); + } else { + cmpeq(rs, rt, cc); + beq(cc, offset(target(L))); + } + } + + void bgtz(Register rs, address entry) { bgt(rs, offset(entry)); } + void blez(Register rs, address entry) { ble(rs, offset(entry)); } + + void bge_l( Register a, Label& L ) { bge( a, offset(target(L))); } + void bgt_l( Register a, Label& L ) { bgt( a, offset(target(L))); } + void ble_l( Register a, Label& L ) { ble( a, offset(target(L))); } + void blt_l( Register a, Label& L ) { blt( a, offset(target(L))); } + void sltu (Register rd, Register rs, Register rt) { cmpult(rs, rt, rd); } + void slti(Register rt, Register rs, int imm, Register cc = GP) { ldi(cc, imm, R0); cmplt(rs, cc, rt); } + + void ffbeq(FloatRegister rs, Label& L) { fbeq(rs, offset(target(L))); } + void ffbne(FloatRegister rs, Label& L) { fbne(rs, offset(target(L))); } + + //we need 2 fun to save and resotre general register + void pushad(Register skip = noreg); + void popad(Register skip = noreg); + void saveTRegisters(); + void restoreTRegisters(); + + void ret_sw() { Assembler::ret(R0, RA, 0);} + void ret() { Assembler::ret(R0, RA, 0);} + + //TODO:to implement + void xchgptr(Register src1, Address src2) { stop("unimplement xchgptr: jzy"); } + void xchgptr(Register src1, Register src2); + void xchgw (Register src1, Address src2) { stop("unimplement xchgw: jzy");} + + void cmpxchgq(Register src1, Address src2) { stop("unimplement cmpxchgq: jzy"); } + void cmpxchgw(Register src1, Address src2) { stop("unimplement cmpxchgw: jzy"); } + void cmpxchgb(Register src1, Address src2) { stop("unimplement cmpxchgb: jzy"); } + void lock() { memb(); } + + void xaddw (Address src1, Register src2) { stop("unimplement xaddw: jzy");} + void xaddptr (Address src1, Register src2) { stop("unimplement xaddptr: jzy");} + // Jumps + void jr(Register rs) { Assembler::jmp(rscratch2, rs, 0); } + void jr(address entry); + void jr(address entry, relocInfo::relocType rtype); + + void patchable_jump(address target); + + void jalr(Register rd, Register rs) { ShouldNotReachHere(); } + void jalr(Register rs) { jalr(RA, rs); } + void jalr() { jalr(T12); } + + void jalr_setfpec1(Register rd, Register rs) { ShouldNotReachHere(); } + void jalr_setfpec1(Register rs) { jalr_setfpec1(RA, rs); } + + // Calls + void call_patch(address entry); + void call_patch(address entry, relocInfo::relocType rtype); + + void patchable_call_setfpec1(address target); + void patchable_call(address target, Label *retAddr = NULL, Register tmp=T12); + + inline void xorr ( Register rd, Register rs, Register rt ) { xor_ins(rs, rt, rd); } + inline void andnot ( Register ra, Register rb, Register rc ) { bic( ra, rb, rc ); } + inline void andnot ( Register ra, int lit, Register rc ) { bic( ra, lit, rc ); } + inline void or_ins ( Register ra, Register rb, Register rc ) { bis( ra, rb, rc ); } + inline void or_ins ( Register ra, int lit, Register rc ) { bis( ra, lit, rc ); } + + // Generalized Test Bit And Branch, including a "far" variety which + // spans more than 32KiB. + void tbr(Condition cond, Register Rt, int bitpos, Label &dest, bool far = false) { + ShouldNotReachHere(); + } + + // idiv variant which deals with MINLONG as dividend and -1 as divisor + int corrected_idivl(Register result, Register ra, Register rb, + bool want_remainder, Register tmp = rscratch1); + int corrected_idivq(Register result, Register ra, Register rb, + bool want_remainder, Register tmp = rscratch1); + + static address target_addr_for_insn(address insn_addr, unsigned insn); + static address target_addr_for_insn(address insn_addr) { + unsigned insn = *(unsigned*)insn_addr; + return target_addr_for_insn(insn_addr, insn); + } + + static void assert_signed_word_disp_range(intptr_t x, int nbits) { + assert( (x & 3) == 0, "not word aligned"); + assert_signed_range(x, nbits + 2); + } + + static intptr_t inv_wdisp( int x, intptr_t pos, int nbits ) { + int pre_sign_extend = x & (( 1 << nbits ) - 1); + int r = pre_sign_extend >= ( 1 << (nbits-1) ) + ? pre_sign_extend | ~(( 1 << nbits ) - 1) + : pre_sign_extend; + return (r << 2) + pos; + } + + static int wdisp( intptr_t x, intptr_t off, int nbits ) { + intptr_t xx = x - off; + assert_signed_word_disp_range(xx, nbits); + int r = (xx >> 2) & (( 1 << nbits ) - 1); + assert( inv_wdisp( r, off, nbits ) == x, "inverse not inverse"); + return r; + } + + // signed immediate, in low bits, nbits long + static int simm(int x, int nbits) { + assert_signed_range(x, nbits); + return x & (( 1 << nbits ) - 1); + } + + // void verify_oop_subroutine(); + + void cmp_klass(Register oop, Register trial_klass, Register tmp); + + void resolve_oop_handle(Register result, Register tmp = rscratch1); + + void resolve_weak_handle(Register result, Register tmp); + + void load_mirror(Register mirror, Register method, Register tmp = rscratch1); + + void load_method_holder_cld(Register rresult, Register rmethod); + + void load_method_holder(Register holder, Register method); +}; + +class ScopeMark { +private: + MacroAssembler* _masm; + char _begin[300]; + char _end[300]; +public: + + ScopeMark(MacroAssembler* masm, const char* position, const char* comment = "") : _masm(masm) { + if (comment == "") { + ::sprintf(_begin, "%s{", position); + ::sprintf(_end, "%s}", position); + } else { + ::sprintf(_begin, "%s %s %s enter", position, "{", comment); + ::sprintf(_end, "%s leave }", position); + } + + _masm->block_comment(_begin); + } + + ~ScopeMark() { + _masm->block_comment(_end); + } +}; + +class SizedScope { +private: + int _size; + MacroAssembler* _masm; + address _start; +public: + SizedScope(MacroAssembler* masm, int size) { + _masm = masm; + _size = size; + _start = _masm->pc(); + } + ~SizedScope() { + if (_masm->pc() - _start > _size) Unimplemented(); + while (_masm->pc() - _start < _size) _masm->nop(); + } +}; + +#ifdef ASSERT +inline bool AbstractAssembler::pd_check_instruction_mark() { return true; } +#endif + +/** + * class SkipIfEqual: + * + * Instantiating this class will result in assembly code being output that will + * jump around any code emitted between the creation of the instance and it's + * automatic destruction at the end of a scope block, depending on the value of + * the flag passed to the constructor, which will be checked at run-time. + */ +class SkipIfEqual { + private: + MacroAssembler* _masm; + Label _label; + + public: + SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value); + ~SkipIfEqual(); +}; + +struct tableswitch { + Register _reg; + int _insn_index; jint _first_key; jint _last_key; + Label _after; + Label _branches; +}; + +#endif // CPU_SW64_VM_MACROASSEMBLER_SW64_HPP diff --git a/src/hotspot/cpu/sw64/macroAssembler_sw64.inline.hpp b/src/hotspot/cpu/sw64/macroAssembler_sw64.inline.hpp new file mode 100644 index 00000000000..1b6d78b16a6 --- /dev/null +++ b/src/hotspot/cpu/sw64/macroAssembler_sw64.inline.hpp @@ -0,0 +1,35 @@ +/* + * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SW64_VM_MACROASSEMBLER_SW64_INLINE_HPP +#define CPU_SW64_VM_MACROASSEMBLER_SW64_INLINE_HPP + +#include "asm/assembler.hpp" + +#ifndef PRODUCT + +#endif // ndef PRODUCT + +#endif // CPU_SW64_VM_MACROASSEMBLER_SW64_INLINE_HPP diff --git a/src/hotspot/cpu/sw64/macroAssembler_sw64_log.cpp b/src/hotspot/cpu/sw64/macroAssembler_sw64_log.cpp new file mode 100644 index 00000000000..2ce5a623780 --- /dev/null +++ b/src/hotspot/cpu/sw64/macroAssembler_sw64_log.cpp @@ -0,0 +1,262 @@ +/* Copyright (c) 2018, Cavium. All rights reserved. (By BELLSOFT) + * Copyright (c) 2016, Intel Corporation. + * Intel Math Library (LIBM) Source Code + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "asm/assembler.inline.hpp" +#include "macroAssembler_sw64.hpp" + +// Algorithm idea is taken from x86 hotspot intrinsic and adapted for AARCH64. +// +// For mathematical background please refer to the following literature: +// +// Tang, Ping-Tak Peter. +// Table-driven implementation of the logarithm function +// in IEEE floating-point arithmetic. +// ACM Transactions on Mathematical Software (TOMS) 16, no. 4, 1990: 378-400. + +/******************************************************************************/ +// ALGORITHM DESCRIPTION - LOG() +// --------------------- +// +// x=2^k * mx, mx in [1,2) +// +// Get B~1/mx based on the output of frecpe instruction (B0) +// B = int((B0*2^7+0.5))/2^7 +// +// Reduced argument: r=B*mx-1.0 (computed accurately in high and low parts) +// +// Result: k*log(2) - log(B) + p(r) if |x-1| >= small value (2^-6) and +// p(r) is a degree 7 polynomial +// -log(B) read from data table (high, low parts) +// Result is formed from high and low parts +// +// Special cases: +// 1. log(NaN) = quiet NaN +// 2. log(+INF) = that INF +// 3. log(0) = -INF +// 4. log(1) = +0 +// 5. log(x) = NaN if x < -0, including -INF +// +/******************************************************************************/ + +// Table with p(r) polynomial coefficients +// and table representation of logarithm values (hi and low parts) +__attribute__ ((aligned(64))) juint _L_tbl[] = +{ + // coefficients of p(r) polynomial: + // _coeff[] + 0x00000000UL, 0xbfd00000UL, // C1_0 = -0.25 + 0x92492492UL, 0x3fc24924UL, // C1_1 = 0.14285714285714285 + 0x55555555UL, 0x3fd55555UL, // C2_0 = 0.3333333333333333 + 0x3d6fb175UL, 0xbfc5555eUL, // C2_1 = -0.16666772842235003 + 0x00000000UL, 0xbfe00000UL, // C3_0 = -0.5 + 0x9999999aUL, 0x3fc99999UL, // C3_1 = 0.2 + // _log2[] + 0xfefa3800UL, 0x3fa62e42UL, // C4_0 = 0.043321698784993146 + 0x93c76730UL, 0x3ceef357UL, // C4_1 = 3.436201886692732e-15 + // _L_tbl[] with logarithm values (hi and low parts) + 0xfefa3800UL, 0x3fe62e42UL, 0x93c76730UL, 0x3d2ef357UL, 0xaa241800UL, + 0x3fe5ee82UL, 0x0cda46beUL, 0x3d220238UL, 0x5c364800UL, 0x3fe5af40UL, + 0xac10c9fbUL, 0x3d2dfa63UL, 0x26bb8c00UL, 0x3fe5707aUL, 0xff3303ddUL, + 0x3d09980bUL, 0x26867800UL, 0x3fe5322eUL, 0x5d257531UL, 0x3d05ccc4UL, + 0x835a5000UL, 0x3fe4f45aUL, 0x6d93b8fbUL, 0xbd2e6c51UL, 0x6f970c00UL, + 0x3fe4b6fdUL, 0xed4c541cUL, 0x3cef7115UL, 0x27e8a400UL, 0x3fe47a15UL, + 0xf94d60aaUL, 0xbd22cb6aUL, 0xf2f92400UL, 0x3fe43d9fUL, 0x481051f7UL, + 0xbcfd984fUL, 0x2125cc00UL, 0x3fe4019cUL, 0x30f0c74cUL, 0xbd26ce79UL, + 0x0c36c000UL, 0x3fe3c608UL, 0x7cfe13c2UL, 0xbd02b736UL, 0x17197800UL, + 0x3fe38ae2UL, 0xbb5569a4UL, 0xbd218b7aUL, 0xad9d8c00UL, 0x3fe35028UL, + 0x9527e6acUL, 0x3d10b83fUL, 0x44340800UL, 0x3fe315daUL, 0xc5a0ed9cUL, + 0xbd274e93UL, 0x57b0e000UL, 0x3fe2dbf5UL, 0x07b9dc11UL, 0xbd17a6e5UL, + 0x6d0ec000UL, 0x3fe2a278UL, 0xe797882dUL, 0x3d206d2bUL, 0x1134dc00UL, + 0x3fe26962UL, 0x05226250UL, 0xbd0b61f1UL, 0xd8bebc00UL, 0x3fe230b0UL, + 0x6e48667bUL, 0x3d12fc06UL, 0x5fc61800UL, 0x3fe1f863UL, 0xc9fe81d3UL, + 0xbd2a7242UL, 0x49ae6000UL, 0x3fe1c078UL, 0xed70e667UL, 0x3cccacdeUL, + 0x40f23c00UL, 0x3fe188eeUL, 0xf8ab4650UL, 0x3d14cc4eUL, 0xf6f29800UL, + 0x3fe151c3UL, 0xa293ae49UL, 0xbd2edd97UL, 0x23c75c00UL, 0x3fe11af8UL, + 0xbb9ddcb2UL, 0xbd258647UL, 0x8611cc00UL, 0x3fe0e489UL, 0x07801742UL, + 0x3d1c2998UL, 0xe2d05400UL, 0x3fe0ae76UL, 0x887e7e27UL, 0x3d1f486bUL, + 0x0533c400UL, 0x3fe078bfUL, 0x41edf5fdUL, 0x3d268122UL, 0xbe760400UL, + 0x3fe04360UL, 0xe79539e0UL, 0xbd04c45fUL, 0xe5b20800UL, 0x3fe00e5aUL, + 0xb1727b1cUL, 0xbd053ba3UL, 0xaf7a4800UL, 0x3fdfb358UL, 0x3c164935UL, + 0x3d0085faUL, 0xee031800UL, 0x3fdf4aa7UL, 0x6f014a8bUL, 0x3d12cde5UL, + 0x56b41000UL, 0x3fdee2a1UL, 0x5a470251UL, 0x3d2f27f4UL, 0xc3ddb000UL, + 0x3fde7b42UL, 0x5372bd08UL, 0xbd246550UL, 0x1a272800UL, 0x3fde148aUL, + 0x07322938UL, 0xbd1326b2UL, 0x484c9800UL, 0x3fddae75UL, 0x60dc616aUL, + 0xbd1ea42dUL, 0x46def800UL, 0x3fdd4902UL, 0xe9a767a8UL, 0x3d235bafUL, + 0x18064800UL, 0x3fdce42fUL, 0x3ec7a6b0UL, 0xbd0797c3UL, 0xc7455800UL, + 0x3fdc7ff9UL, 0xc15249aeUL, 0xbd29b6ddUL, 0x693fa000UL, 0x3fdc1c60UL, + 0x7fe8e180UL, 0x3d2cec80UL, 0x1b80e000UL, 0x3fdbb961UL, 0xf40a666dUL, + 0x3d27d85bUL, 0x04462800UL, 0x3fdb56faUL, 0x2d841995UL, 0x3d109525UL, + 0x5248d000UL, 0x3fdaf529UL, 0x52774458UL, 0xbd217cc5UL, 0x3c8ad800UL, + 0x3fda93edUL, 0xbea77a5dUL, 0x3d1e36f2UL, 0x0224f800UL, 0x3fda3344UL, + 0x7f9d79f5UL, 0x3d23c645UL, 0xea15f000UL, 0x3fd9d32bUL, 0x10d0c0b0UL, + 0xbd26279eUL, 0x43135800UL, 0x3fd973a3UL, 0xa502d9f0UL, 0xbd152313UL, + 0x635bf800UL, 0x3fd914a8UL, 0x2ee6307dUL, 0xbd1766b5UL, 0xa88b3000UL, + 0x3fd8b639UL, 0xe5e70470UL, 0xbd205ae1UL, 0x776dc800UL, 0x3fd85855UL, + 0x3333778aUL, 0x3d2fd56fUL, 0x3bd81800UL, 0x3fd7fafaUL, 0xc812566aUL, + 0xbd272090UL, 0x687cf800UL, 0x3fd79e26UL, 0x2efd1778UL, 0x3d29ec7dUL, + 0x76c67800UL, 0x3fd741d8UL, 0x49dc60b3UL, 0x3d2d8b09UL, 0xe6af1800UL, + 0x3fd6e60eUL, 0x7c222d87UL, 0x3d172165UL, 0x3e9c6800UL, 0x3fd68ac8UL, + 0x2756eba0UL, 0x3d20a0d3UL, 0x0b3ab000UL, 0x3fd63003UL, 0xe731ae00UL, + 0xbd2db623UL, 0xdf596000UL, 0x3fd5d5bdUL, 0x08a465dcUL, 0xbd0a0b2aUL, + 0x53c8d000UL, 0x3fd57bf7UL, 0xee5d40efUL, 0x3d1fadedUL, 0x0738a000UL, + 0x3fd522aeUL, 0x8164c759UL, 0x3d2ebe70UL, 0x9e173000UL, 0x3fd4c9e0UL, + 0x1b0ad8a4UL, 0xbd2e2089UL, 0xc271c800UL, 0x3fd4718dUL, 0x0967d675UL, + 0xbd2f27ceUL, 0x23d5e800UL, 0x3fd419b4UL, 0xec90e09dUL, 0x3d08e436UL, + 0x77333000UL, 0x3fd3c252UL, 0xb606bd5cUL, 0x3d183b54UL, 0x76be1000UL, + 0x3fd36b67UL, 0xb0f177c8UL, 0x3d116ecdUL, 0xe1d36000UL, 0x3fd314f1UL, + 0xd3213cb8UL, 0xbd28e27aUL, 0x7cdc9000UL, 0x3fd2bef0UL, 0x4a5004f4UL, + 0x3d2a9cfaUL, 0x1134d800UL, 0x3fd26962UL, 0xdf5bb3b6UL, 0x3d2c93c1UL, + 0x6d0eb800UL, 0x3fd21445UL, 0xba46baeaUL, 0x3d0a87deUL, 0x635a6800UL, + 0x3fd1bf99UL, 0x5147bdb7UL, 0x3d2ca6edUL, 0xcbacf800UL, 0x3fd16b5cUL, + 0xf7a51681UL, 0x3d2b9acdUL, 0x8227e800UL, 0x3fd1178eUL, 0x63a5f01cUL, + 0xbd2c210eUL, 0x67616000UL, 0x3fd0c42dUL, 0x163ceae9UL, 0x3d27188bUL, + 0x604d5800UL, 0x3fd07138UL, 0x16ed4e91UL, 0x3cf89cdbUL, 0x5626c800UL, + 0x3fd01eaeUL, 0x1485e94aUL, 0xbd16f08cUL, 0x6cb3b000UL, 0x3fcf991cUL, + 0xca0cdf30UL, 0x3d1bcbecUL, 0xe4dd0000UL, 0x3fcef5adUL, 0x65bb8e11UL, + 0xbcca2115UL, 0xffe71000UL, 0x3fce530eUL, 0x6041f430UL, 0x3cc21227UL, + 0xb0d49000UL, 0x3fcdb13dUL, 0xf715b035UL, 0xbd2aff2aUL, 0xf2656000UL, + 0x3fcd1037UL, 0x75b6f6e4UL, 0xbd084a7eUL, 0xc6f01000UL, 0x3fcc6ffbUL, + 0xc5962bd2UL, 0xbcf1ec72UL, 0x383be000UL, 0x3fcbd087UL, 0x595412b6UL, + 0xbd2d4bc4UL, 0x575bd000UL, 0x3fcb31d8UL, 0x4eace1aaUL, 0xbd0c358dUL, + 0x3c8ae000UL, 0x3fca93edUL, 0x50562169UL, 0xbd287243UL, 0x07089000UL, + 0x3fc9f6c4UL, 0x6865817aUL, 0x3d29904dUL, 0xdcf70000UL, 0x3fc95a5aUL, + 0x58a0ff6fUL, 0x3d07f228UL, 0xeb390000UL, 0x3fc8beafUL, 0xaae92cd1UL, + 0xbd073d54UL, 0x6551a000UL, 0x3fc823c1UL, 0x9a631e83UL, 0x3d1e0ddbUL, + 0x85445000UL, 0x3fc7898dUL, 0x70914305UL, 0xbd1c6610UL, 0x8b757000UL, + 0x3fc6f012UL, 0xe59c21e1UL, 0xbd25118dUL, 0xbe8c1000UL, 0x3fc6574eUL, + 0x2c3c2e78UL, 0x3d19cf8bUL, 0x6b544000UL, 0x3fc5bf40UL, 0xeb68981cUL, + 0xbd127023UL, 0xe4a1b000UL, 0x3fc527e5UL, 0xe5697dc7UL, 0x3d2633e8UL, + 0x8333b000UL, 0x3fc4913dUL, 0x54fdb678UL, 0x3d258379UL, 0xa5993000UL, + 0x3fc3fb45UL, 0x7e6a354dUL, 0xbd2cd1d8UL, 0xb0159000UL, 0x3fc365fcUL, + 0x234b7289UL, 0x3cc62fa8UL, 0x0c868000UL, 0x3fc2d161UL, 0xcb81b4a1UL, + 0x3d039d6cUL, 0x2a49c000UL, 0x3fc23d71UL, 0x8fd3df5cUL, 0x3d100d23UL, + 0x7e23f000UL, 0x3fc1aa2bUL, 0x44389934UL, 0x3d2ca78eUL, 0x8227e000UL, + 0x3fc1178eUL, 0xce2d07f2UL, 0x3d21ef78UL, 0xb59e4000UL, 0x3fc08598UL, + 0x7009902cUL, 0xbd27e5ddUL, 0x39dbe000UL, 0x3fbfe891UL, 0x4fa10afdUL, + 0xbd2534d6UL, 0x830a2000UL, 0x3fbec739UL, 0xafe645e0UL, 0xbd2dc068UL, + 0x63844000UL, 0x3fbda727UL, 0x1fa71733UL, 0x3d1a8940UL, 0x01bc4000UL, + 0x3fbc8858UL, 0xc65aacd3UL, 0x3d2646d1UL, 0x8dad6000UL, 0x3fbb6ac8UL, + 0x2bf768e5UL, 0xbd139080UL, 0x40b1c000UL, 0x3fba4e76UL, 0xb94407c8UL, + 0xbd0e42b6UL, 0x5d594000UL, 0x3fb9335eUL, 0x3abd47daUL, 0x3d23115cUL, + 0x2f40e000UL, 0x3fb8197eUL, 0xf96ffdf7UL, 0x3d0f80dcUL, 0x0aeac000UL, + 0x3fb700d3UL, 0xa99ded32UL, 0x3cec1e8dUL, 0x4d97a000UL, 0x3fb5e95aUL, + 0x3c5d1d1eUL, 0xbd2c6906UL, 0x5d208000UL, 0x3fb4d311UL, 0x82f4e1efUL, + 0xbcf53a25UL, 0xa7d1e000UL, 0x3fb3bdf5UL, 0xa5db4ed7UL, 0x3d2cc85eUL, + 0xa4472000UL, 0x3fb2aa04UL, 0xae9c697dUL, 0xbd20b6e8UL, 0xd1466000UL, + 0x3fb1973bUL, 0x560d9e9bUL, 0xbd25325dUL, 0xb59e4000UL, 0x3fb08598UL, + 0x7009902cUL, 0xbd17e5ddUL, 0xc006c000UL, 0x3faeea31UL, 0x4fc93b7bUL, + 0xbd0e113eUL, 0xcdddc000UL, 0x3faccb73UL, 0x47d82807UL, 0xbd1a68f2UL, + 0xd0fb0000UL, 0x3faaaef2UL, 0x353bb42eUL, 0x3d20fc1aUL, 0x149fc000UL, + 0x3fa894aaUL, 0xd05a267dUL, 0xbd197995UL, 0xf2d4c000UL, 0x3fa67c94UL, + 0xec19afa2UL, 0xbd029efbUL, 0xd42e0000UL, 0x3fa466aeUL, 0x75bdfd28UL, + 0xbd2c1673UL, 0x2f8d0000UL, 0x3fa252f3UL, 0xe021b67bUL, 0x3d283e9aUL, + 0x89e74000UL, 0x3fa0415dUL, 0x5cf1d753UL, 0x3d0111c0UL, 0xec148000UL, + 0x3f9c63d2UL, 0x3f9eb2f3UL, 0x3d2578c6UL, 0x28c90000UL, 0x3f984925UL, + 0x325a0c34UL, 0xbd2aa0baUL, 0x25980000UL, 0x3f9432a9UL, 0x928637feUL, + 0x3d098139UL, 0x58938000UL, 0x3f902056UL, 0x06e2f7d2UL, 0xbd23dc5bUL, + 0xa3890000UL, 0x3f882448UL, 0xda74f640UL, 0xbd275577UL, 0x75890000UL, + 0x3f801015UL, 0x999d2be8UL, 0xbd10c76bUL, 0x59580000UL, 0x3f700805UL, + 0xcb31c67bUL, 0x3d2166afUL, 0x00000000UL, 0x00000000UL, 0x00000000UL, + 0x80000000UL +}; + +// BEGIN dlog PSEUDO CODE: +// double dlog(double X) { +// // p(r) polynomial coefficients initialized from _L_tbl table +// double C1_0 = _L_tbl[0]; +// double C1_1 = _L_tbl[1]; +// double C2_0 = _L_tbl[2]; +// double C2_1 = _L_tbl[3]; +// double C3_0 = _L_tbl[4]; +// double C3_1 = _L_tbl[5]; +// double C4_0 = _L_tbl[6]; +// double C4_1 = _L_tbl[7]; +// // NOTE: operations with coefficients above are mostly vectorized in assembly +// // Check corner cases first +// if (X == 1.0d || AS_LONG_BITS(X) + 0x0010000000000000 <= 0x0010000000000000) { +// // NOTE: AS_LONG_BITS(X) + 0x0010000000000000 <= 0x0010000000000000 means +// // that X < 0 or X >= 0x7FF0000000000000 (0x7FF* is NaN or INF) +// if (X < 0 || X is NaN) return NaN; +// if (X == 1.0d) return 0.0d; +// if (X == 0.0d) return -INFINITY; +// if (X is INFINITY) return INFINITY; +// } +// // double representation is 2^exponent * mantissa +// // split X into two multipliers: 2^exponent and 1.0 * mantissa +// // pseudo function: zeroExponent(X) return value of X with exponent == 0 +// float vtmp5 = 1/(float)(zeroExponent(X)); // reciprocal estimate +// // pseudo function: HI16(X) returns high 16 bits of double value +// int hiWord = HI16(X); +// double vtmp1 = (double) 0x77F0 << 48 | mantissa(X); +// hiWord -= 16; +// if (AS_LONG_BITS(hiWord) > 0x8000) { +// // SMALL_VALUE branch +// vtmp0 = vtmp1 = vtmp0 * AS_DOUBLE_BITS(0x47F0000000000000); +// hiWord = HI16(vtmp1); +// vtmp0 = AS_DOUBLE_BITS(AS_LONG_BITS(vtmp0) |= 0x3FF0000000000000); +// vtmp5 = (double) (1/(float)vtmp0); +// vtmp1 <<= 12; +// vtmp1 >>= 12; +// } +// // MAIN branch +// double vtmp3 = AS_LONG_BITS(vtmp1) & 0xffffe00000000000; // hi part +// int intB0 = AS_INT_BITS(vtmp5) + 0x8000; +// double vtmp0 = AS_DOUBLE_BITS(0xffffe00000000000 & (intB0<<29)); +// int index = (intB0 >> 16) && 0xFF; +// double hiTableValue = _L_tbl[8+index]; // vtmp2[0] +// double lowTableValue = _L_tbl[16+index]; // vtmp2[1] +// vtmp5 = AS_DOUBLE_BITS(hiWord & 0x7FF0 - 0x3FE0); // 0x3FE = 1023 << 4 +// vtmp1 -= vtmp3; // low part +// vtmp3 = vtmp3*vtmp0 - 1.0; +// hiTableValue += C4_0 * vtmp5; +// lowTableValue += C4_1 * vtmp5; +// double r = vtmp1 * vtmp0 + vtmp3; // r = B*mx-1.0, computed in hi and low parts +// vtmp0 = hiTableValue + r; +// hiTableValue -= vtmp0; +// double i2 = r*r; +// double i3 = i2*r; +// double p7 = C3_0*i2 + C2_0*i3 + C1_0*i2*i2 + C3_1*i3*i2 + C2_1*i3*i3 +// + C1_1*i3*i2*i2; // degree 7 polynomial +// return p7 + (vtmp0 + ((r + hiTableValue) + lowTableValue)); +// } +// +// END dlog PSEUDO CODE + + +// Generate log(X). X passed in register f0. Return log(X) into f0. +// Generator parameters: 10 temporary FPU registers and temporary general +// purpose registers +void MacroAssembler::fast_log(FloatRegister vtmp0, FloatRegister vtmp1, + FloatRegister vtmp2, FloatRegister vtmp3, + FloatRegister vtmp4, FloatRegister vtmp5, + FloatRegister C1, FloatRegister C2, + FloatRegister C3, FloatRegister C4, + Register tmp1, Register tmp2, Register tmp3, + Register tmp4, Register tmp5) { + ShouldNotReachHere(); +} diff --git a/src/hotspot/cpu/sw64/macroAssembler_sw64_trig.cpp b/src/hotspot/cpu/sw64/macroAssembler_sw64_trig.cpp new file mode 100644 index 00000000000..d790586d6d9 --- /dev/null +++ b/src/hotspot/cpu/sw64/macroAssembler_sw64_trig.cpp @@ -0,0 +1,710 @@ +/* Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, Cavium. All rights reserved. (By BELLSOFT) + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "asm/assembler.inline.hpp" +#include "runtime/stubRoutines.hpp" +#include "macroAssembler_sw64.hpp" + +// The following code is a optimized version of fdlibm sin/cos implementation +// (C code is in share/runtime/sharedRuntimeTrig.cpp) adapted for SW64. + +// Please refer to sin/cos approximation via polynomial and +// trigonometric argument reduction techniques to the following literature: +// +// [1] Muller, Jean-Michel, Nicolas Brisebarre, Florent De Dinechin, +// Claude-Pierre Jeannerod, Vincent Lefevre, Guillaume Melquiond, +// Nathalie Revol, Damien Stehlé, and Serge Torres: +// Handbook of floating-point arithmetic. +// Springer Science & Business Media, 2009. +// [2] K. C. Ng +// Argument Reduction for Huge Arguments: Good to the Last Bit +// July 13, 1992, SunPro +// +// HOW TO READ THIS CODE: +// This code consists of several functions. Each function has following header: +// 1) Description +// 2) C-pseudo code with differences from fdlibm marked by comments starting +// with "NOTE". Check unmodified fdlibm code in +// share/runtime/SharedRuntimeTrig.cpp +// 3) Brief textual description of changes between fdlibm and current +// implementation along with optimization notes (if applicable) +// 4) Assumptions, input and output +// 5) (Optional) additional notes about intrinsic implementation +// Each function is separated in blocks which follow the pseudo-code structure +// +// HIGH-LEVEL ALGORITHM DESCRIPTION: +// - entry point: generate_dsin_dcos(...); +// - check corner cases: NaN, INF, tiny argument. +// - check if |x| < Pi/4. Then approximate sin/cos via polynomial (kernel_sin/kernel_cos) +// -- else proceed to argument reduction routine (__ieee754_rem_pio2) and +// use reduced argument to get result via kernel_sin/kernel_cos +// +// HIGH-LEVEL CHANGES BETWEEN INTRINSICS AND FDLIBM: +// 1) two_over_pi table fdlibm representation is int[], while intrinsic version +// has these int values converted to double representation to load converted +// double values directly (see stubRoutines_aarch4::_two_over_pi) +// 2) Several loops are unrolled and vectorized: see comments in code after +// labels: SKIP_F_LOAD, RECOMP_FOR1_CHECK, RECOMP_FOR2 +// 3) fdlibm npio2_hw table now has "prefix" with constants used in +// calculation. These constants are loaded from npio2_hw table instead of +// constructing it in code (see stubRoutines_sw64.cpp) +// 4) Polynomial coefficients for sin and cos are moved to table sin_coef +// and cos_coef to use the same optimization as in 3). It allows to load most of +// required constants via single instruction +// +// +// +///* __ieee754_rem_pio2(x,y) +// * +// * returns the remainder of x rem pi/2 in y[0]+y[1] (i.e. like x div pi/2) +// * x is input argument, y[] is hi and low parts of reduced argument (x) +// * uses __kernel_rem_pio2() +// */ +// // use tables(see stubRoutines_sw64.cpp): two_over_pi and modified npio2_hw +// +// BEGIN __ieee754_rem_pio2 PSEUDO CODE +// +//static int __ieee754_rem_pio2(double x, double *y) { +// double z,w,t,r,fn; +// double tx[3]; +// int e0,i,j,nx,n,ix,hx,i0; +// +// i0 = ((*(int*)&two24A)>>30)^1; /* high word index */ +// hx = *(i0+(int*)&x); /* high word of x */ +// ix = hx&0x7fffffff; +// if(ix<0x4002d97c) { /* |x| < 3pi/4, special case with n=+-1 */ +// if(hx>0) { +// z = x - pio2_1; +// if(ix!=0x3ff921fb) { /* 33+53 bit pi is good enough */ +// y[0] = z - pio2_1t; +// y[1] = (z-y[0])-pio2_1t; +// } else { /* near pi/2, use 33+33+53 bit pi */ +// z -= pio2_2; +// y[0] = z - pio2_2t; +// y[1] = (z-y[0])-pio2_2t; +// } +// return 1; +// } else { /* negative x */ +// z = x + pio2_1; +// if(ix!=0x3ff921fb) { /* 33+53 bit pi is good enough */ +// y[0] = z + pio2_1t; +// y[1] = (z-y[0])+pio2_1t; +// } else { /* near pi/2, use 33+33+53 bit pi */ +// z += pio2_2; +// y[0] = z + pio2_2t; +// y[1] = (z-y[0])+pio2_2t; +// } +// return -1; +// } +// } +// if(ix<=0x413921fb) { /* |x| ~<= 2^19*(pi/2), medium size */ +// t = fabsd(x); +// n = (int) (t*invpio2+half); +// fn = (double)n; +// r = t-fn*pio2_1; +// w = fn*pio2_1t; /* 1st round good to 85 bit */ +// // NOTE: y[0] = r-w; is moved from if/else below to be before "if" +// y[0] = r-w; +// if(n<32&&ix!=npio2_hw[n-1]) { +// // y[0] = r-w; /* quick check no cancellation */ // NOTE: moved earlier +// } else { +// j = ix>>20; +// // y[0] = r-w; // NOTE: moved earlier +// i = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff); +// if(i>16) { /* 2nd iteration needed, good to 118 */ +// t = r; +// w = fn*pio2_2; +// r = t-w; +// w = fn*pio2_2t-((t-r)-w); +// y[0] = r-w; +// i = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff); +// if(i>49) { /* 3rd iteration need, 151 bits acc */ +// t = r; /* will cover all possible cases */ +// w = fn*pio2_3; +// r = t-w; +// w = fn*pio2_3t-((t-r)-w); +// y[0] = r-w; +// } +// } +// } +// y[1] = (r-y[0])-w; +// if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;} +// else return n; +// } +// /* +// * all other (large) arguments +// */ +// // NOTE: this check is removed, because it was checked in dsin/dcos +// // if(ix>=0x7ff00000) { /* x is inf or NaN */ +// // y[0]=y[1]=x-x; return 0; +// // } +// /* set z = scalbn(|x|,ilogb(x)-23) */ +// *(1-i0+(int*)&z) = *(1-i0+(int*)&x); +// e0 = (ix>>20)-1046; /* e0 = ilogb(z)-23; */ +// *(i0+(int*)&z) = ix - (e0<<20); +// +// // NOTE: "for" loop below in unrolled. See comments in asm code +// for(i=0;i<2;i++) { +// tx[i] = (double)((int)(z)); +// z = (z-tx[i])*two24A; +// } +// +// tx[2] = z; +// nx = 3; +// +// // NOTE: while(tx[nx-1]==zeroA) nx--; is unrolled. See comments in asm code +// while(tx[nx-1]==zeroA) nx--; /* skip zero term */ +// +// n = __kernel_rem_pio2(tx,y,e0,nx,2,two_over_pi); +// if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;} +// return n; +//} +// +// END __ieee754_rem_pio2 PSEUDO CODE +// +// Changes between fdlibm and intrinsic for __ieee754_rem_pio2: +// 1. INF/NaN check for huge argument is removed in comparison with fdlibm +// code, because this check is already done in dcos/dsin code +// 2. Most constants are now loaded from table instead of direct initialization +// 3. Two loops are unrolled +// Assumptions: +// 1. Assume |X| >= PI/4 +// 2. Assume rscratch1 = 0x3fe921fb00000000 (~ PI/4) +// 3. Assume ix = i3 +// Input and output: +// 1. Input: X = i0 +// 2. Return n in i2, y[0] == y0 == f4, y[1] == y1 == f5 +// NOTE: general purpose register names match local variable names in C code +// NOTE: fpu registers are actively reused. See comments in code about their usage +void MacroAssembler::generate__ieee754_rem_pio2(address npio2_hw, + address two_over_pi, address pio2) { + ShouldNotReachHere(); +} + +///* +// * __kernel_rem_pio2(x,y,e0,nx,prec,ipio2) +// * double x[],y[]; int e0,nx,prec; int ipio2[]; +// * +// * __kernel_rem_pio2 return the last three digits of N with +// * y = x - N*pi/2 +// * so that |y| < pi/2. +// * +// * The method is to compute the integer (mod 8) and fraction parts of +// * (2/pi)*x without doing the full multiplication. In general we +// * skip the part of the product that are known to be a huge integer ( +// * more accurately, = 0 mod 8 ). Thus the number of operations are +// * independent of the exponent of the input. +// * +// * NOTE: 2/pi int representation is converted to double +// * // (2/pi) is represented by an array of 24-bit integers in ipio2[]. +// * +// * Input parameters: +// * x[] The input value (must be positive) is broken into nx +// * pieces of 24-bit integers in double precision format. +// * x[i] will be the i-th 24 bit of x. The scaled exponent +// * of x[0] is given in input parameter e0 (i.e., x[0]*2^e0 +// * match x's up to 24 bits. +// * +// * Example of breaking a double positive z into x[0]+x[1]+x[2]: +// * e0 = ilogb(z)-23 +// * z = scalbn(z,-e0) +// * for i = 0,1,2 +// * x[i] = floor(z) +// * z = (z-x[i])*2**24 +// * +// * +// * y[] ouput result in an array of double precision numbers. +// * The dimension of y[] is: +// * 24-bit precision 1 +// * 53-bit precision 2 +// * 64-bit precision 2 +// * 113-bit precision 3 +// * The actual value is the sum of them. Thus for 113-bit +// * precsion, one may have to do something like: +// * +// * long double t,w,r_head, r_tail; +// * t = (long double)y[2] + (long double)y[1]; +// * w = (long double)y[0]; +// * r_head = t+w; +// * r_tail = w - (r_head - t); +// * +// * e0 The exponent of x[0] +// * +// * nx dimension of x[] +// * +// * prec an interger indicating the precision: +// * 0 24 bits (single) +// * 1 53 bits (double) +// * 2 64 bits (extended) +// * 3 113 bits (quad) +// * +// * NOTE: ipio2[] array below is converted to double representation +// * //ipio2[] +// * // integer array, contains the (24*i)-th to (24*i+23)-th +// * // bit of 2/pi after binary point. The corresponding +// * // floating value is +// * +// * ipio2[i] * 2^(-24(i+1)). +// * +// * Here is the description of some local variables: +// * +// * jk jk+1 is the initial number of terms of ipio2[] needed +// * in the computation. The recommended value is 2,3,4, +// * 6 for single, double, extended,and quad. +// * +// * jz local integer variable indicating the number of +// * terms of ipio2[] used. +// * +// * jx nx - 1 +// * +// * jv index for pointing to the suitable ipio2[] for the +// * computation. In general, we want +// * ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8 +// * is an integer. Thus +// * e0-3-24*jv >= 0 or (e0-3)/24 >= jv +// * Hence jv = max(0,(e0-3)/24). +// * +// * jp jp+1 is the number of terms in PIo2[] needed, jp = jk. +// * +// * q[] double array with integral value, representing the +// * 24-bits chunk of the product of x and 2/pi. +// * +// * q0 the corresponding exponent of q[0]. Note that the +// * exponent for q[i] would be q0-24*i. +// * +// * PIo2[] double precision array, obtained by cutting pi/2 +// * into 24 bits chunks. +// * +// * f[] ipio2[] in floating point +// * +// * iq[] integer array by breaking up q[] in 24-bits chunk. +// * +// * fq[] final product of x*(2/pi) in fq[0],..,fq[jk] +// * +// * ih integer. If >0 it indicates q[] is >= 0.5, hence +// * it also indicates the *sign* of the result. +// * +// */ +// +// Use PIo2 table(see stubRoutines_sw64.cpp) +// +// BEGIN __kernel_rem_pio2 PSEUDO CODE +// +//static int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec, /* NOTE: converted to double */ const double *ipio2 // const int *ipio2) { +// int jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih; +// double z,fw,f[20],fq[20],q[20]; +// +// /* initialize jk*/ +// // jk = init_jk[prec]; // NOTE: prec==2 for double. jk is always 4. +// jp = jk; // NOTE: always 4 +// +// /* determine jx,jv,q0, note that 3>q0 */ +// jx = nx-1; +// jv = (e0-3)/24; if(jv<0) jv=0; +// q0 = e0-24*(jv+1); +// +// /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */ +// j = jv-jx; m = jx+jk; +// +// // NOTE: split into two for-loops: one with zeroB and one with ipio2[j]. It +// // allows the use of wider loads/stores +// for(i=0;i<=m;i++,j++) f[i] = (j<0)? zeroB : /* NOTE: converted to double */ ipio2[j]; //(double) ipio2[j]; +// +// // NOTE: unrolled and vectorized "for". See comments in asm code +// /* compute q[0],q[1],...q[jk] */ +// for (i=0;i<=jk;i++) { +// for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; q[i] = fw; +// } +// +// jz = jk; +//recompute: +// /* distill q[] into iq[] reversingly */ +// for(i=0,j=jz,z=q[jz];j>0;i++,j--) { +// fw = (double)((int)(twon24* z)); +// iq[i] = (int)(z-two24B*fw); +// z = q[j-1]+fw; +// } +// +// /* compute n */ +// z = scalbnA(z,q0); /* actual value of z */ +// z -= 8.0*floor(z*0.125); /* trim off integer >= 8 */ +// n = (int) z; +// z -= (double)n; +// ih = 0; +// if(q0>0) { /* need iq[jz-1] to determine n */ +// i = (iq[jz-1]>>(24-q0)); n += i; +// iq[jz-1] -= i<<(24-q0); +// ih = iq[jz-1]>>(23-q0); +// } +// else if(q0==0) ih = iq[jz-1]>>23; +// else if(z>=0.5) ih=2; +// +// if(ih>0) { /* q > 0.5 */ +// n += 1; carry = 0; +// for(i=0;i0) { /* rare case: chance is 1 in 12 */ +// switch(q0) { +// case 1: +// iq[jz-1] &= 0x7fffff; break; +// case 2: +// iq[jz-1] &= 0x3fffff; break; +// } +// } +// if(ih==2) { +// z = one - z; +// if(carry!=0) z -= scalbnA(one,q0); +// } +// } +// +// /* check if recomputation is needed */ +// if(z==zeroB) { +// j = 0; +// for (i=jz-1;i>=jk;i--) j |= iq[i]; +// if(j==0) { /* need recomputation */ +// for(k=1;iq[jk-k]==0;k++); /* k = no. of terms needed */ +// +// for(i=jz+1;i<=jz+k;i++) { /* add q[jz+1] to q[jz+k] */ +// f[jx+i] = /* NOTE: converted to double */ ipio2[jv+i]; //(double) ipio2[jv+i]; +// for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; +// q[i] = fw; +// } +// jz += k; +// goto recompute; +// } +// } +// +// /* chop off zero terms */ +// if(z==0.0) { +// jz -= 1; q0 -= 24; +// while(iq[jz]==0) { jz--; q0-=24;} +// } else { /* break z into 24-bit if necessary */ +// z = scalbnA(z,-q0); +// if(z>=two24B) { +// fw = (double)((int)(twon24*z)); +// iq[jz] = (int)(z-two24B*fw); +// jz += 1; q0 += 24; +// iq[jz] = (int) fw; +// } else iq[jz] = (int) z ; +// } +// +// /* convert integer "bit" chunk to floating-point value */ +// fw = scalbnA(one,q0); +// for(i=jz;i>=0;i--) { +// q[i] = fw*(double)iq[i]; fw*=twon24; +// } +// +// /* compute PIo2[0,...,jp]*q[jz,...,0] */ +// for(i=jz;i>=0;i--) { +// for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k]; +// fq[jz-i] = fw; +// } +// +// // NOTE: switch below is eliminated, because prec is always 2 for doubles +// /* compress fq[] into y[] */ +// //switch(prec) { +// //case 0: +// // fw = 0.0; +// // for (i=jz;i>=0;i--) fw += fq[i]; +// // y[0] = (ih==0)? fw: -fw; +// // break; +// //case 1: +// //case 2: +// fw = 0.0; +// for (i=jz;i>=0;i--) fw += fq[i]; +// y[0] = (ih==0)? fw: -fw; +// fw = fq[0]-fw; +// for (i=1;i<=jz;i++) fw += fq[i]; +// y[1] = (ih==0)? fw: -fw; +// // break; +// //case 3: /* painful */ +// // for (i=jz;i>0;i--) { +// // fw = fq[i-1]+fq[i]; +// // fq[i] += fq[i-1]-fw; +// // fq[i-1] = fw; +// // } +// // for (i=jz;i>1;i--) { +// // fw = fq[i-1]+fq[i]; +// // fq[i] += fq[i-1]-fw; +// // fq[i-1] = fw; +// // } +// // for (fw=0.0,i=jz;i>=2;i--) fw += fq[i]; +// // if(ih==0) { +// // y[0] = fq[0]; y[1] = fq[1]; y[2] = fw; +// // } else { +// // y[0] = -fq[0]; y[1] = -fq[1]; y[2] = -fw; +// // } +// //} +// return n&7; +//} +// +// END __kernel_rem_pio2 PSEUDO CODE +// +// Changes between fdlibm and intrinsic: +// 1. One loop is unrolled and vectorized (see comments in code) +// 2. One loop is split into 2 loops (see comments in code) +// 3. Non-double code is removed(last switch). Sevaral variables became +// constants because of that (see comments in code) +// 4. Use of jx, which is nx-1 instead of nx +// Assumptions: +// 1. Assume |X| >= PI/4 +// Input and output: +// 1. Input: X = i0, jx == nx - 1 == i6, e0 == rscratch1 +// 2. Return n in i2, y[0] == y0 == f4, y[1] == y1 == f5 +// NOTE: general purpose register names match local variable names in C code +// NOTE: fpu registers are actively reused. See comments in code about their usage +void MacroAssembler::generate__kernel_rem_pio2(address two_over_pi, address pio2) { + ShouldNotReachHere(); +} + +///* __kernel_sin( x, y, iy) +// * kernel sin function on [-pi/4, pi/4], pi/4 ~ 0.7854 +// * Input x is assumed to be bounded by ~pi/4 in magnitude. +// * Input y is the tail of x. +// * Input iy indicates whether y is 0. (if iy=0, y assume to be 0). +// * +// * Algorithm +// * 1. Since sin(-x) = -sin(x), we need only to consider positive x. +// * 2. if x < 2^-27 (hx<0x3e400000 0), return x with inexact if x!=0. +// * 3. sin(x) is approximated by a polynomial of degree 13 on +// * [0,pi/4] +// * 3 13 +// * sin(x) ~ x + S1*x + ... + S6*x +// * where +// * +// * |sin(x) 2 4 6 8 10 12 | -58 +// * |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x +S6*x )| <= 2 +// * | x | +// * +// * 4. sin(x+y) = sin(x) + sin'(x')*y +// * ~ sin(x) + (1-x*x/2)*y +// * For better accuracy, let +// * 3 2 2 2 2 +// * r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6)))) +// * then 3 2 +// * sin(x) = x + (S1*x + (x *(r-y/2)+y)) +// */ +//static const double +//S1 = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */ +//S2 = 8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */ +//S3 = -1.98412698298579493134e-04, /* 0xBF2A01A0, 0x19C161D5 */ +//S4 = 2.75573137070700676789e-06, /* 0x3EC71DE3, 0x57B1FE7D */ +//S5 = -2.50507602534068634195e-08, /* 0xBE5AE5E6, 0x8A2B9CEB */ +//S6 = 1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */ +// +// NOTE: S1..S6 were moved into a table: StubRoutines::sw64::_dsin_coef +// +// BEGIN __kernel_sin PSEUDO CODE +// +//static double __kernel_sin(double x, double y, bool iy) +//{ +// double z,r,v; +// +// // NOTE: not needed. moved to dsin/dcos +// //int ix; +// //ix = high(x)&0x7fffffff; /* high word of x */ +// +// // NOTE: moved to dsin/dcos +// //if(ix<0x3e400000) /* |x| < 2**-27 */ +// // {if((int)x==0) return x;} /* generate inexact */ +// +// z = x*x; +// v = z*x; +// r = S2+z*(S3+z*(S4+z*(S5+z*S6))); +// if(iy==0) return x+v*(S1+z*r); +// else return x-((z*(half*y-v*r)-y)-v*S1); +//} +// +// END __kernel_sin PSEUDO CODE +// +// Changes between fdlibm and intrinsic: +// 1. Removed |x| < 2**-27 check, because if was done earlier in dsin/dcos +// 2. Constants are now loaded from table dsin_coef +// 3. C code parameter "int iy" was modified to "bool iyIsOne", because +// iy is always 0 or 1. Also, iyIsOne branch was moved into +// generation phase instead of taking it during code execution +// Input ans output: +// 1. Input for generated function: X argument = x +// 2. Input for generator: x = register to read argument from, iyIsOne +// = flag to use low argument low part or not, dsin_coef = coefficients +// table address +// 3. Return sin(x) value in f0 +void MacroAssembler::generate_kernel_sin(FloatRegister x, bool iyIsOne, + address dsin_coef) { + ShouldNotReachHere(); +} + +///* +// * __kernel_cos( x, y ) +// * kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164 +// * Input x is assumed to be bounded by ~pi/4 in magnitude. +// * Input y is the tail of x. +// * +// * Algorithm +// * 1. Since cos(-x) = cos(x), we need only to consider positive x. +// * 2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0. +// * 3. cos(x) is approximated by a polynomial of degree 14 on +// * [0,pi/4] +// * 4 14 +// * cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x +// * where the remez error is +// * +// * | 2 4 6 8 10 12 14 | -58 +// * |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x +C6*x )| <= 2 +// * | | +// * +// * 4 6 8 10 12 14 +// * 4. let r = C1*x +C2*x +C3*x +C4*x +C5*x +C6*x , then +// * cos(x) = 1 - x*x/2 + r +// * since cos(x+y) ~ cos(x) - sin(x)*y +// * ~ cos(x) - x*y, +// * a correction term is necessary in cos(x) and hence +// * cos(x+y) = 1 - (x*x/2 - (r - x*y)) +// * For better accuracy when x > 0.3, let qx = |x|/4 with +// * the last 32 bits mask off, and if x > 0.78125, let qx = 0.28125. +// * Then +// * cos(x+y) = (1-qx) - ((x*x/2-qx) - (r-x*y)). +// * Note that 1-qx and (x*x/2-qx) is EXACT here, and the +// * magnitude of the latter is at least a quarter of x*x/2, +// * thus, reducing the rounding error in the subtraction. +// */ +// +//static const double +//C1 = 4.16666666666666019037e-02, /* 0x3FA55555, 0x5555554C */ +//C2 = -1.38888888888741095749e-03, /* 0xBF56C16C, 0x16C15177 */ +//C3 = 2.48015872894767294178e-05, /* 0x3EFA01A0, 0x19CB1590 */ +//C4 = -2.75573143513906633035e-07, /* 0xBE927E4F, 0x809C52AD */ +//C5 = 2.08757232129817482790e-09, /* 0x3E21EE9E, 0xBDB4B1C4 */ +//C6 = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */ +// +// NOTE: C1..C6 were moved into a table: StubRoutines::sw64::_dcos_coef +// +// BEGIN __kernel_cos PSEUDO CODE +// +//static double __kernel_cos(double x, double y) +//{ +// double a,h,z,r,qx=0; +// +// // NOTE: ix is already initialized in dsin/dcos. Reuse value from register +// //int ix; +// //ix = high(x)&0x7fffffff; /* ix = |x|'s high word*/ +// +// // NOTE: moved to dsin/dcos +// //if(ix<0x3e400000) { /* if x < 2**27 */ +// // if(((int)x)==0) return one; /* generate inexact */ +// //} +// +// z = x*x; +// r = z*(C1+z*(C2+z*(C3+z*(C4+z*(C5+z*C6))))); +// if(ix < 0x3FD33333) /* if |x| < 0.3 */ +// return one - (0.5*z - (z*r - x*y)); +// else { +// if(ix > 0x3fe90000) { /* x > 0.78125 */ +// qx = 0.28125; +// } else { +// set_high(&qx, ix-0x00200000); /* x/4 */ +// set_low(&qx, 0); +// } +// h = 0.5*z-qx; +// a = one-qx; +// return a - (h - (z*r-x*y)); +// } +//} +// +// END __kernel_cos PSEUDO CODE +// +// Changes between fdlibm and intrinsic: +// 1. Removed |x| < 2**-27 check, because if was done earlier in dsin/dcos +// 2. Constants are now loaded from table dcos_coef +// Input and output: +// 1. Input for generated function: X argument = x +// 2. Input for generator: x = register to read argument from, dcos_coef +// = coefficients table address +// 2. Return cos(x) value in f0 +void MacroAssembler::generate_kernel_cos(FloatRegister x, address dcos_coef) { + ShouldNotReachHere(); +} + +// generate_dsin_dcos creates stub for dsin and dcos +// Generation is done via single call because dsin and dcos code is almost the +// same(see C code below). These functions work as follows: +// 1) handle corner cases: |x| ~< pi/4, x is NaN or INF, |x| < 2**-27 +// 2) perform argument reduction if required +// 3) call kernel_sin or kernel_cos which approximate sin/cos via polynomial +// +// BEGIN dsin/dcos PSEUDO CODE +// +//dsin_dcos(jdouble x, bool isCos) { +// double y[2],z=0.0; +// int n, ix; +// +// /* High word of x. */ +// ix = high(x); +// +// /* |x| ~< pi/4 */ +// ix &= 0x7fffffff; +// if(ix <= 0x3fe921fb) return isCos ? __kernel_cos : __kernel_sin(x,z,0); +// +// /* sin/cos(Inf or NaN) is NaN */ +// else if (ix>=0x7ff00000) return x-x; +// else if (ix<0x3e400000) { /* if ix < 2**27 */ +// if(((int)x)==0) return isCos ? one : x; /* generate inexact */ +// } +// /* argument reduction needed */ +// else { +// n = __ieee754_rem_pio2(x,y); +// switch(n&3) { +// case 0: return isCos ? __kernel_cos(y[0],y[1]) : __kernel_sin(y[0],y[1], true); +// case 1: return isCos ? -__kernel_sin(y[0],y[1],true) : __kernel_cos(y[0],y[1]); +// case 2: return isCos ? -__kernel_cos(y[0],y[1]) : -__kernel_sin(y[0],y[1], true); +// default: +// return isCos ? __kernel_sin(y[0],y[1],1) : -__kernel_cos(y[0],y[1]); +// } +// } +//} +// END dsin/dcos PSEUDO CODE +// +// Changes between fdlibm and intrinsic: +// 1. Moved ix < 2**27 from kernel_sin/kernel_cos into dsin/dcos +// 2. Final switch use equivalent bit checks(tbz/tbnz) +// Input ans output: +// 1. Input for generated function: X = i0 +// 2. Input for generator: isCos = generate sin or cos, npio2_hw = address +// of npio2_hw table, two_over_pi = address of two_over_pi table, +// pio2 = address if pio2 table, dsin_coef = address if dsin_coef table, +// dcos_coef = address of dcos_coef table +// 3. Return result in f0 +// NOTE: general purpose register names match local variable names in C code +void MacroAssembler::generate_dsin_dcos(bool isCos, address npio2_hw, + address two_over_pi, address pio2, address dsin_coef, address dcos_coef) { + ShouldNotReachHere(); +} diff --git a/src/hotspot/cpu/sw64/matcher_sw64.hpp b/src/hotspot/cpu/sw64/matcher_sw64.hpp new file mode 100644 index 00000000000..3ac2c8f843e --- /dev/null +++ b/src/hotspot/cpu/sw64/matcher_sw64.hpp @@ -0,0 +1,156 @@ +/* + * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SW64_MATCHER_SW64_HPP +#define CPU_SW64_MATCHER_SW64_HPP + + // Defined within class Matcher + + // The ecx parameter to rep stosq for the ClearArray node is in words. + static const bool init_array_count_is_in_bytes = false; + + // Whether this platform implements the scalable vector feature + static const bool implements_scalable_vector = false; + + static constexpr bool supports_scalable_vector() { + return false; + } + + // sw64 supports misaligned vectors store/load. + static constexpr bool misaligned_vectors_ok() { + return false; + } + + // Whether code generation need accurate ConvI2L types. + static const bool convi2l_type_required = true; + + // Do the processor's shift instructions only use the low 5/6 bits + // of the count for 32/64 bit ints? If not we need to do the masking + // ourselves. + static const bool need_masked_shift_count = false; + + // Does the CPU require late expand (see block.cpp for description of late expand)? + static const bool require_postalloc_expand = false; + + // sw64 supports generic vector operands: vec and legVec. + static const bool supports_generic_vector_operands = false; + + static constexpr bool isSimpleConstant64(jlong value) { + // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. + //return value == (int) value; // Cf. storeImmL and immL32. + + // Probably always true, even if a temp register is required. + return true; + } + + // No additional cost for CMOVL. + static constexpr int long_cmove_cost() { return 0; } + + // No CMOVF/CMOVD with SSE2 + static int float_cmove_cost() { return ConditionalMoveLimit; } + + static bool narrow_oop_use_complex_address() { + assert(UseCompressedOops, "only for compressed oops code"); +// return (LogMinObjAlignmentInBytes <= 3); + //Unimplemented(); + return false; + } + + static bool narrow_klass_use_complex_address() { + assert(UseCompressedClassPointers, "only for compressed klass code"); +// return (LogKlassAlignmentInBytes <= 3); + //Unimplemented(); + return false; + } + + // Prefer ConN+DecodeN over ConP. + static const bool const_oop_prefer_decode() { + // Prefer ConN+DecodeN over ConP. + return true; + } + + // Prefer ConP over ConNKlass+DecodeNKlass. + static const bool const_klass_prefer_decode() { + // TODO: Either support matching DecodeNKlass (heap-based) in operand + // or condisider the following: + // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode. + //return Universe::narrow_klass_base() == NULL; + return true; + } + + // Is it better to copy float constants, or load them directly from memory? + // Intel can load a float constant from a direct address, requiring no + // extra registers. Most RISCs will have to materialize an address into a + // register first, so they would do better to copy the constant from stack. + static const bool rematerialize_float_constants = true; + + // If CPU can load and store mis-aligned doubles directly then no fixup is + // needed. Else we split the double into 2 integer pieces and move it + // piece-by-piece. Only happens when passing doubles into C code as the + // Java calling convention forces doubles to be aligned. + static const bool misaligned_doubles_ok = true; + + // Advertise here if the CPU requires explicit rounding operations to implement strictfp mode. + static const bool strict_fp_requires_explicit_rounding = false; + + // Are floats converted to double when stored to stack during deoptimization? + static constexpr bool float_in_double() { + return true; //swjdk8 is return true lsp?? + } + + // Do ints take an entire long register or just half? + static const bool int_in_long = true; + + + // Does the CPU supports vector variable shift instructions? + static bool supports_vector_variable_shifts(void) { + return false; + } + + // Does the CPU supports vector variable rotate instructions? + static constexpr bool supports_vector_variable_rotates(void) { + return true; + } + + // Does the CPU supports vector unsigned comparison instructions? + static const bool supports_vector_comparison_unsigned(int vlen, BasicType bt) { + return false; + } + + // Some microarchitectures have mask registers used on vectors + static const bool has_predicated_vectors(void) { + return false; + } + + // true means we have fast l2f convers + // false means that conversion is done by runtime call + // This is UltraSparc specific, true just means we have fast l2f conversion + static constexpr bool convL2FSupported(void) { + return true; + } + + // Implements a variant of EncodeISOArrayNode that encode ASCII only + static const bool supports_encode_ascii_array = false; + +#endif // CPU_SW64_MATCHER_SW64_HPP diff --git a/src/hotspot/cpu/sw64/methodHandles_sw64.cpp b/src/hotspot/cpu/sw64/methodHandles_sw64.cpp new file mode 100644 index 00000000000..86807e3d3ed --- /dev/null +++ b/src/hotspot/cpu/sw64/methodHandles_sw64.cpp @@ -0,0 +1,646 @@ +/* + * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "jvm.h" +#include "asm/macroAssembler.hpp" +#include "compiler/disassembler.hpp" +#include "classfile/vmClasses.hpp" +#include "classfile/javaClasses.inline.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "logging/log.hpp" +#include "logging/logStream.hpp" +#include "memory/allocation.inline.hpp" +#include "prims/jvmtiExport.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/flags/flagSetting.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/stubRoutines.hpp" +#include "utilities/formatBuffer.hpp" +#include "utilities/preserveException.hpp" + +#define __ Disassembler::hook(__FILE__, __LINE__, _masm)-> + +#ifdef PRODUCT +#define BLOCK_COMMENT(str) /* nothing */ +#else +#define BLOCK_COMMENT(str) __ block_comment(str) +#endif + +#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") + +void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg) {SCOPEMARK_NAME(MethodHandles::load_klass_from_Class, _masm) + if (VerifyMethodHandles) + verify_klass(_masm, klass_reg, VM_CLASS_ID(java_lang_Class), + "MH argument is a Class"); + __ ldptr(klass_reg, Address(klass_reg, java_lang_Class::klass_offset())); +} + +#ifdef ASSERT +static int check_nonzero(const char* xname, int x) { + assert(x != 0, "%s should be nonzero", xname); + return x; +} +#define NONZERO(x) check_nonzero(#x, x) +#else //ASSERT +#define NONZERO(x) (x) +#endif //ASSERT + +#ifdef ASSERT +void MethodHandles::verify_klass(MacroAssembler* _masm, + Register obj, vmClassID klass_id, + const char* error_message) {SCOPEMARK_NAME(MethodHandles::verify_klass, _masm) + InstanceKlass** klass_addr = vmClasses::klass_addr_at(klass_id); + Klass* klass = vmClasses::klass_at(klass_id); + Register temp = rdi; + Register temp2 = noreg; + temp2 = rscratch3; // used by MacroAssembler::cmpptr + Label L_ok, L_bad; + BLOCK_COMMENT("verify_klass {"); + __ verify_oop(obj); + __ jcc(Assembler::zero, L_bad, obj); + __ push(temp); if (temp2 != noreg) __ push(temp2); +#define UNPUSH { if (temp2 != noreg) __ pop(temp2); __ pop(temp); } + __ load_klass(temp, obj); + __ cmpptr(temp, ExternalAddress((address) klass_addr)); + __ jcc(Assembler::equal, L_ok); + int super_check_offset = klass->super_check_offset(); //long-> int may be a problem? need modify? jzy + __ ldptr(temp, Address(temp, super_check_offset)); + __ cmpptr(temp, ExternalAddress((address) klass_addr)); + __ jcc(Assembler::equal, L_ok); + UNPUSH; + __ BIND(L_bad); + __ stop(error_message); + __ BIND(L_ok); + UNPUSH; + BLOCK_COMMENT("} verify_klass"); +} + +void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) {SCOPEMARK_NAME(MethodHandles::verify_ref_kind, _masm) + Label L; + BLOCK_COMMENT("verify_ref_kind {"); + __ ldwu(temp, Address(member_reg, NONZERO(java_lang_invoke_MemberName::flags_offset()))); + __ srll(temp, java_lang_invoke_MemberName::MN_REFERENCE_KIND_SHIFT, temp); + __ andw(temp, java_lang_invoke_MemberName::MN_REFERENCE_KIND_MASK, temp); + __ cmpw(temp, ref_kind); + __ jcc(Assembler::equal, L); + { char* buf = NEW_C_HEAP_ARRAY(char, 100, mtInternal); + jio_snprintf(buf, 100, "verify_ref_kind expected %x", ref_kind); + if (ref_kind == JVM_REF_invokeVirtual || + ref_kind == JVM_REF_invokeSpecial) + // could do this for all ref_kinds, but would explode assembly code size + trace_method_handle(_masm, buf); + __ stop(buf); + } + BLOCK_COMMENT("} verify_ref_kind"); + __ BIND(L); +} + +#endif //ASSERT + +void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp, + bool for_compiler_entry) {SCOPEMARK_NAME(MethodHandles::jump_from_method_handle, _masm) + assert(method == rmethod, "interpreter calling convention"); + + Label L_no_such_method; + __ jcc(Assembler::zero, L_no_such_method, rmethod); + + __ verify_method_ptr(method); + + if (!for_compiler_entry && JvmtiExport::can_post_interpreter_events()) { + Label run_compiled_code; + // JVMTI events, such as single-stepping, are implemented partly by avoiding running + // compiled code in threads for which the event is enabled. Check here for + // interp_only_mode if these events CAN be enabled. + //Register rthread = rthread; + // interp_only is an int, on little endian it is sufficient to test the byte only + // Is a cmpl faster? + __ cmpb(Address(rthread, JavaThread::interp_only_mode_offset()), 0); + __ jcc(Assembler::zero, run_compiled_code); + __ jmp(Address(method, Method::interpreter_entry_offset())); + __ BIND(run_compiled_code); + } + + const ByteSize entry_offset = for_compiler_entry ? Method::from_compiled_offset() : + Method::from_interpreted_offset(); + __ jmp(Address(method, entry_offset)); + + __ bind(L_no_such_method); + __ jump(RuntimeAddress(StubRoutines::throw_AbstractMethodError_entry())); +} + +void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm, + Register recv, Register method_temp, + Register temp2, + bool for_compiler_entry) { + BLOCK_COMMENT("jump_to_lambda_form {"); + // This is the initial entry point of a lazy method handle. + // After type checking, it picks up the invoker from the LambdaForm. + assert_different_registers(recv, method_temp, temp2, rscratch3); + assert(recv != noreg, "required register"); + assert(method_temp == rmethod, "required register for loading method"); + + //NOT_PRODUCT({ FlagSetting fs(TraceMethodHandles, true); trace_method_handle(_masm, "LZMH"); }); + + // Load the invoker, as MH -> MH.form -> LF.vmentry + __ verify_oop(recv); + __ load_heap_oop(method_temp, Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset())), temp2); + __ verify_oop(method_temp); + __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset())), temp2); + __ verify_oop(method_temp); + __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_MemberName::method_offset())), temp2); + __ verify_oop(method_temp); + __ access_load_at(T_ADDRESS, IN_HEAP, method_temp, + Address(method_temp, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset())), + noreg, noreg); + + if (VerifyMethodHandles && !for_compiler_entry) { + // make sure recv is already on stack + __ ldptr(temp2, Address(method_temp, Method::const_offset())); + __ load_sized_value(temp2, + Address(temp2, ConstMethod::size_of_parameters_offset()), + sizeof(u2), /*is_signed*/ false); + // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), ""); + Label L; + __ ldl(rscratch3, __ argument_address(temp2, -1)); + __ cmpoop(recv, rscratch3); + __ jcc(Assembler::equal, L); + __ ldptr(V0, __ argument_address(temp2, -1)); + __ stop("receiver not on stack"); + __ BIND(L); + } + + jump_from_method_handle(_masm, method_temp, temp2, for_compiler_entry); + BLOCK_COMMENT("} jump_to_lambda_form"); +} + + +// Code generation +address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm, + vmIntrinsics::ID iid) {SCOPEMARK_NAME(MethodHandles::generate_method_handle_interpreter_entry, _masm) + const bool not_for_compiler_entry = false; // this is the interpreter entry + assert(is_signature_polymorphic(iid), "expected invoke iid"); + if (iid == vmIntrinsics::_invokeGeneric || + iid == vmIntrinsics::_compiledLambdaForm) { + // Perhaps surprisingly, the symbolic references visible to Java are not directly used. + // They are linked to Java-generated adapters via MethodHandleNatives.linkMethod. + // They all allow an appendix argument. + __ stop("empty stubs make SG sick"); + return NULL; + } + + // No need in interpreter entry for linkToNative for now. + // Interpreter calls compiled entry through i2c. + if (iid == vmIntrinsics::_linkToNative) { + __ stop("No need in interpreter entry for linkToNative for now."); + return NULL; + } + + // rsi/r13: sender SP (must preserve; see prepare_to_jump_from_interpreted) + // rbx: Method* + // rdx: argument locator (parameter slot count, added to rsp) + // rcx: used as temp to hold mh or receiver + // rax, rdi: garbage temps, blown away + Register rdx_argp = rdx; // argument list ptr, live on error paths + Register rax_temp = rax; + Register rcx_mh = rcx; // MH receiver; dies quickly and is recycled + Register rbx_method = rbx; // eventual target of this invocation + //Register rcx = c_rarg3; + // here's where control starts out: + __ align(CodeEntryAlignment); + address entry_point = __ pc(); + + if (VerifyMethodHandles) { + assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2"); + + Label L; + BLOCK_COMMENT("verify_intrinsic_id {"); + __ movw(rscratch3, (int)iid); + __ cmpw(Address(rbx_method, Method::intrinsic_id_offset_in_bytes()), rscratch3); + __ jcc(Assembler::equal, L); + if (iid == vmIntrinsics::_linkToVirtual || + iid == vmIntrinsics::_linkToSpecial) { + // could do this for all kinds, but would explode assembly code size + trace_method_handle(_masm, "bad Method*::intrinsic_id"); + } + __ stop("bad Method*::intrinsic_id"); + __ bind(L); + BLOCK_COMMENT("} verify_intrinsic_id"); + } + + // First task: Find out how big the argument list is. + Address rdx_first_arg_addr; + int ref_kind = signature_polymorphic_intrinsic_ref_kind(iid); + assert(ref_kind != 0 || iid == vmIntrinsics::_invokeBasic, "must be _invokeBasic or a linkTo intrinsic"); + if (ref_kind == 0 || MethodHandles::ref_kind_has_receiver(ref_kind)) { + __ ldptr(rdx_argp, Address(rbx_method, Method::const_offset())); + __ load_sized_value(rdx_argp, + Address(rdx_argp, ConstMethod::size_of_parameters_offset()), + sizeof(u2), /*is_signed*/ false); + // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), ""); + rdx_first_arg_addr = __ argument_address(rdx_argp, -1); + } else { + DEBUG_ONLY(rdx_argp = noreg); + } + + if (!is_signature_polymorphic_static(iid)) { + __ ldptr(rcx_mh, rdx_first_arg_addr); + DEBUG_ONLY(rdx_argp = noreg); + } + + // rdx_first_arg_addr is live! + + trace_method_handle_interpreter_entry(_masm, iid); + + if (iid == vmIntrinsics::_invokeBasic) { + generate_method_handle_dispatch(_masm, iid, rcx_mh, noreg, not_for_compiler_entry); + + } else { + // Adjust argument list by popping the trailing MemberName argument. + Register rcx_recv = noreg; + if (MethodHandles::ref_kind_has_receiver(ref_kind)) { + // Load the receiver (not the MH; the actual MemberName's receiver) up from the interpreter stack. + __ ldptr(rcx_recv = rcx, rdx_first_arg_addr); + } + DEBUG_ONLY(rdx_argp = noreg); + Register rbx_member = rbx_method; // MemberName ptr; incoming method ptr is dead now + //TODO:__ stop("check:generate_method_handle_interpreter_entry jzy"); + //__ pop(rax_temp); // return address + __ pop(rbx_member); // extract last argument + //__ push(rax_temp); // re-push return address + generate_method_handle_dispatch(_masm, iid, rcx_recv, rbx_member, not_for_compiler_entry); + } + + return entry_point; +} + +void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm, + vmIntrinsics::ID iid, + Register receiver_reg, + Register member_reg, + bool for_compiler_entry) {SCOPEMARK_NAME(MethodHandles::generate_method_handle_dispatch, _masm) + assert(is_signature_polymorphic(iid), "expected invoke iid"); + Register rbx_method = rbx; // eventual target of this invocation + // temps used in this code are not used in *either* compiled or interpreted calling sequences + + Register temp1 = rscratch1; + Register temp2 = rscratch2; + Register temp3 = rax; + if (for_compiler_entry) { + assert(receiver_reg == (iid == vmIntrinsics::_linkToStatic ? noreg : j_rarg0), "only valid assignment"); + assert_different_registers(temp1, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5); + assert_different_registers(temp2, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5); + assert_different_registers(temp3, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5); + } + else { + assert_different_registers(temp1, temp2, temp3, saved_last_sp_register()); // don't trash lastSP + } + assert_different_registers(temp1, temp2, temp3, receiver_reg); + assert_different_registers(temp1, temp2, temp3, member_reg); + + if (iid == vmIntrinsics::_invokeBasic || iid == vmIntrinsics::_linkToNative) { + if (iid == vmIntrinsics::_linkToNative) { + assert(for_compiler_entry, "only compiler entry is supported"); + } + // indirect through MH.form.vmentry.vmtarget + jump_to_lambda_form(_masm, receiver_reg, rbx_method, temp1, for_compiler_entry); + + } else { + // The method is a member invoker used by direct method handles. + if (VerifyMethodHandles) { + // make sure the trailing argument really is a MemberName (caller responsibility) + verify_klass(_masm, member_reg, VM_CLASS_ID(java_lang_invoke_MemberName), + "MemberName required for invokeVirtual etc."); + } + //TODO:__ stop("generate_method_handle_dispatch check:jzy"); + Address member_clazz( member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset())); + Address member_vmindex( member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset())); + Address member_vmtarget( member_reg, NONZERO(java_lang_invoke_MemberName::method_offset())); + Address vmtarget_method( rbx_method, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset())); + + Register temp1_recv_klass = temp1; + if (iid != vmIntrinsics::_linkToStatic) { + __ verify_oop(receiver_reg); + if (iid == vmIntrinsics::_linkToSpecial) { + // Don't actually load the klass; just null-check the receiver. + __ null_check(receiver_reg); + } else { + // load receiver klass itself + __ null_check(receiver_reg, oopDesc::klass_offset_in_bytes()); + __ load_klass(temp1_recv_klass, receiver_reg); + __ verify_klass_ptr(temp1_recv_klass); + } + BLOCK_COMMENT("check_receiver {"); + // The receiver for the MemberName must be in receiver_reg. + // Check the receiver against the MemberName.clazz + if (VerifyMethodHandles && iid == vmIntrinsics::_linkToSpecial) { + // Did not load it above... + __ load_klass(temp1_recv_klass, receiver_reg); + __ verify_klass_ptr(temp1_recv_klass); + } + if (VerifyMethodHandles && iid != vmIntrinsics::_linkToInterface) { + Label L_ok; + Register temp2_defc = temp2; + __ load_heap_oop(temp2_defc, member_clazz, temp3); + load_klass_from_Class(_masm, temp2_defc); + __ verify_klass_ptr(temp2_defc); + __ check_klass_subtype(temp1_recv_klass, temp2_defc, temp3, L_ok); + // If we get here, the type check failed! + __ stop("receiver class disagrees with MemberName.clazz"); + __ BIND(L_ok); + } + BLOCK_COMMENT("} check_receiver"); + } + if (iid == vmIntrinsics::_linkToSpecial || + iid == vmIntrinsics::_linkToStatic) { + DEBUG_ONLY(temp1_recv_klass = noreg); // these guys didn't load the recv_klass + } + + // Live registers at this point: + // member_reg - MemberName that was the trailing argument + // temp1_recv_klass - klass of stacked receiver, if needed + // rsi/r13 - interpreter linkage (if interpreted) + // rcx, rdx, rsi, rdi, r8 - compiler arguments (if compiled) + + Label L_incompatible_class_change_error; + switch (iid) { + case vmIntrinsics::_linkToSpecial: + if (VerifyMethodHandles) { + verify_ref_kind(_masm, JVM_REF_invokeSpecial, member_reg, temp3); + } + __ load_heap_oop(rbx_method, member_vmtarget); + __ access_load_at(T_ADDRESS, IN_HEAP, rbx_method, vmtarget_method, noreg, noreg); + break; + + case vmIntrinsics::_linkToStatic: + if (VerifyMethodHandles) { + verify_ref_kind(_masm, JVM_REF_invokeStatic, member_reg, temp3); + } + __ load_heap_oop(rbx_method, member_vmtarget); + __ access_load_at(T_ADDRESS, IN_HEAP, rbx_method, vmtarget_method, noreg, noreg); + break; + + case vmIntrinsics::_linkToVirtual: + { + // same as TemplateTable::invokevirtual, + // minus the CP setup and profiling: + + if (VerifyMethodHandles) { + verify_ref_kind(_masm, JVM_REF_invokeVirtual, member_reg, temp3); + } + + // pick out the vtable index from the MemberName, and then we can discard it: + Register temp2_index = temp2; + __ access_load_at(T_ADDRESS, IN_HEAP, temp2_index, member_vmindex, noreg, noreg); + + if (VerifyMethodHandles) { + Label L_index_ok; + __ cmpw(temp2_index, 0); + __ jcc(Assembler::greaterEqual, L_index_ok); + __ stop("no virtual index"); + __ BIND(L_index_ok); + } + + // Note: The verifier invariants allow us to ignore MemberName.clazz and vmtarget + // at this point. And VerifyMethodHandles has already checked clazz, if needed. + + // get target Method* & entry point + __ lookup_virtual_method(temp1_recv_klass, temp2_index, rbx_method); + break; + } + + case vmIntrinsics::_linkToInterface: + { + // same as TemplateTable::invokeinterface + // (minus the CP setup and profiling, with different argument motion) + if (VerifyMethodHandles) { + verify_ref_kind(_masm, JVM_REF_invokeInterface, member_reg, temp3); + } + + Register temp3_intf = temp3; + __ load_heap_oop(temp3_intf, member_clazz); + load_klass_from_Class(_masm, temp3_intf); + __ verify_klass_ptr(temp3_intf); + + Register rbx_index = rbx_method; + __ access_load_at(T_ADDRESS, IN_HEAP, rbx_index, member_vmindex, noreg, noreg); + if (VerifyMethodHandles) { + Label L; + __ cmpw(rbx_index, 0); + __ jcc(Assembler::greaterEqual, L); + __ stop("invalid vtable index for MH.invokeInterface"); + __ BIND(L); + } + + // given intf, index, and recv klass, dispatch to the implementation method + __ lookup_interface_method(temp1_recv_klass, temp3_intf, + // note: next two args must be the same: + rbx_index, rbx_method, + temp2, + L_incompatible_class_change_error); + break; + } + + default: + fatal("unexpected intrinsic %d: %s", vmIntrinsics::as_int(iid), vmIntrinsics::name_at(iid)); + break; + } + + // Live at this point: + // rbx_method + // rsi/r13 (if interpreted) + + // After figuring out which concrete method to call, jump into it. + // Note that this works in the interpreter with no data motion. + // But the compiled version will require that rcx_recv be shifted out. + __ verify_method_ptr(rbx_method); + jump_from_method_handle(_masm, rbx_method, temp1, for_compiler_entry); + + if (iid == vmIntrinsics::_linkToInterface) { + __ bind(L_incompatible_class_change_error); + __ jump(RuntimeAddress(StubRoutines::throw_IncompatibleClassChangeError_entry())); + } + } +} + +#ifndef PRODUCT +void trace_method_handle_stub(const char* adaptername, + oopDesc* mh, + intptr_t* saved_regs, + intptr_t* entry_sp) { + // called as a leaf from native code: do not block the JVM! + bool has_mh = (strstr(adaptername, "/static") == NULL && + strstr(adaptername, "linkTo") == NULL); // static linkers don't have MH + const char* mh_reg_name = has_mh ? "rcx_mh" : "rcx"; + log_info(methodhandles)("MH %s %s=" PTR_FORMAT " sp=" PTR_FORMAT, adaptername, mh_reg_name, p2i(mh), p2i(entry_sp)); + + LogTarget(Trace, methodhandles) lt; + if (lt.is_enabled()) { + ResourceMark rm; + LogStream ls(lt); + ls.print_cr("Registers:"); + const int saved_regs_count = RegisterImpl::number_of_registers; + for (int i = 0; i < saved_regs_count; i++) { + Register r = as_Register(i); + // The registers are stored in reverse order on the stack (by pusha). + tty->print("%3s=" PTR_FORMAT, r->name(), saved_regs[((saved_regs_count - 1) - i)]); + if ((i + 1) % 4 == 0) { + ls.cr(); + } else { + ls.print(", "); + } + } + ls.cr(); + + // Note: We want to allow trace_method_handle from any call site. + // While trace_method_handle creates a frame, it may be entered + // without a PC on the stack top (e.g. not just after a call). + // Walking that frame could lead to failures due to that invalid PC. + // => carefully detect that frame when doing the stack walking + + { + // dumping last frame with frame::describe + + JavaThread* p = JavaThread::active(); + + // may not be needed by safer and unexpensive here + PreserveExceptionMark pem(Thread::current()); + FrameValues values; + + // Current C frame + frame cur_frame = os::current_frame(); + + if (cur_frame.fp() != 0) { // not walkable + + // Robust search of trace_calling_frame (independent of inlining). + // Assumes saved_regs comes from a pusha in the trace_calling_frame. + // + // We have to start the search from cur_frame, because trace_calling_frame may be it. + // It is guaranteed that trace_calling_frame is different from the top frame. + // But os::current_frame() does NOT return the top frame: it returns the next frame under it (caller's frame). + // (Due to inlining and tail call optimizations, caller's frame doesn't necessarily correspond to the immediate + // caller in the source code.) + assert(cur_frame.sp() < saved_regs, "registers not saved on stack ?"); + frame trace_calling_frame = cur_frame; + while (trace_calling_frame.fp() < saved_regs) { + assert(trace_calling_frame.cb() == NULL, "not a C frame"); + trace_calling_frame = os::get_sender_for_C_frame(&trace_calling_frame); + } + assert(trace_calling_frame.sp() < saved_regs, "wrong frame"); + + // safely create a frame and call frame::describe + intptr_t *dump_sp = trace_calling_frame.sender_sp(); + intptr_t *dump_fp = trace_calling_frame.link(); + + if (has_mh) { + // The previous definition of walkable may have to be refined + // if new call sites cause the next frame constructor to start + // failing. Alternatively, frame constructors could be + // modified to support the current or future non walkable + // frames (but this is more intrusive and is not considered as + // part of this RFE, which will instead use a simpler output). + frame dump_frame = frame(dump_sp, dump_fp); + dump_frame.describe(values, 1); + } else { + // Stack may not be walkable (invalid PC above FP): + // Add descriptions without building a Java frame to avoid issues + values.describe(-1, dump_fp, "fp for #1 "); + values.describe(-1, dump_sp, "sp for #1"); + } + } + values.describe(-1, entry_sp, "raw top of stack"); + + ls.print_cr("Stack layout:"); + values.print_on(p, &ls); + } + if (has_mh && oopDesc::is_oop(mh)) { + mh->print_on(&ls); + if (java_lang_invoke_MethodHandle::is_instance(mh)) { + java_lang_invoke_MethodHandle::form(mh)->print_on(&ls); + } + } + } +} + +// The stub wraps the arguments in a struct on the stack to avoid +// dealing with the different calling conventions for passing 6 +// arguments. +struct MethodHandleStubArguments { + const char* adaptername; + oopDesc* mh; + intptr_t* saved_regs; + intptr_t* entry_sp; +}; + +void trace_method_handle_stub_wrapper(MethodHandleStubArguments* args) { + trace_method_handle_stub(args->adaptername, + args->mh, + args->saved_regs, + args->entry_sp); +} + +void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adaptername) {SCOPEMARK_NAME(MethodHandles::trace_method_handle, _masm) + if (!log_is_enabled(Info, methodhandles)) return; + /* + Unimplemented(); + BLOCK_COMMENT(err_msg("trace_method_handle %s {", adaptername)); + Register rbx = R0; //? jzy + __ stop("check: trace_method_handle jzy"); + __ enter(); + __ andptr(esp, -16, esp); // align stack if needed for FPU state + __ pushad(); + __ movl(rheapbase, esp); // for retreiving saved_regs + // Note: saved_regs must be in the entered frame for the + // robust stack walking implemented in trace_method_handle_stub. + + // save FP result, valid at some call sites (adapter_opt_return_float, ...) + __ decrement(esp, 2 * wordSize); + __ store_double(FSF, Address(esp, 0)); + + // Incoming state: + // rcx: method handle + // + // To avoid calling convention issues, build a record on the stack + // and pass the pointer to that instead. + Register rbp = rfp; + Register rcx = R0; //? jzy + __ push(rbp); // entry_sp (with extra align space) + __ push(rbx); // pusha saved_regs + __ push(rcx); // mh + __ push(rcx); // slot for adaptername + __ mov_immediate64(rscratch1, (intptr_t) adaptername); + __ stptr(rscratch1, Address(esp, 0)); + __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, trace_method_handle_stub_wrapper), esp); + __ increment(esp, sizeof(MethodHandleStubArguments)); + + __ load_double(FSF, Address(esp, 0)); + __ increment(esp, 2 * wordSize); + + __ popad(); + __ leave(); + */ + BLOCK_COMMENT("} trace_method_handle"); +} +#endif //PRODUCT diff --git a/src/hotspot/cpu/sw64/methodHandles_sw64.hpp b/src/hotspot/cpu/sw64/methodHandles_sw64.hpp new file mode 100644 index 00000000000..7e3acea514c --- /dev/null +++ b/src/hotspot/cpu/sw64/methodHandles_sw64.hpp @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +// Platform-specific definitions for method handles. +// These definitions are inlined into class MethodHandles. + +// Adapters +enum /* platform_dependent_constants */ { + adapter_code_size = 32000 DEBUG_ONLY(+ 150000) +}; + +public: + + static void load_klass_from_Class(MacroAssembler* _masm, Register klass_reg); + + static void verify_klass(MacroAssembler* _masm, + Register obj, vmClassID klass_id, + const char* error_message = "wrong klass") NOT_DEBUG_RETURN; + + static void verify_method_handle(MacroAssembler* _masm, Register mh_reg) { + verify_klass(_masm, mh_reg, VM_CLASS_ID(MethodHandle_klass), + "reference is a MH"); + } + + static void verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) NOT_DEBUG_RETURN; + + // Similar to InterpreterMacroAssembler::jump_from_interpreted. + // Takes care of special dispatch from single stepping too. + static void jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp, + bool for_compiler_entry); + + static void jump_to_lambda_form(MacroAssembler* _masm, + Register recv, Register method_temp, + Register temp2, + bool for_compiler_entry); + + static Register saved_last_sp_register() { + // Should be in sharedRuntime, not here. + return i29; + } diff --git a/src/hotspot/cpu/sw64/nativeInst_sw64.cpp b/src/hotspot/cpu/sw64/nativeInst_sw64.cpp new file mode 100644 index 00000000000..173e067fe32 --- /dev/null +++ b/src/hotspot/cpu/sw64/nativeInst_sw64.cpp @@ -0,0 +1,816 @@ +/* + * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "jvm.h" +#include "asm/macroAssembler.hpp" +#include "classfile/javaClasses.inline.hpp" +#include "code/compiledIC.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "memory/allocation.inline.hpp" +#include "runtime/safepoint.hpp" +#include "memory/resourceArea.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/flags/flagSetting.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/handles.hpp" +#include "utilities/preserveException.hpp" +#include "runtime/sharedRuntime.hpp" +#include "compiler/disassembler.hpp" + +#include + +#define __ _masm-> + +#ifdef PRODUCT +#define BLOCK_COMMENT(str) /* nothing */ +#define STOP(error) stop(error) +#else +#define BLOCK_COMMENT(str) __ block_comment(str) +#define STOP(error) block_comment(error); __ stop(error) +#endif + +#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") + +int NativeCall::instruction_size = 5 * BytesPerInstWord; +int NativeCall::return_address_offset = 5 * BytesPerInstWord; +int NativeJump::instruction_size = 5 * BytesPerInstWord; +int NativeJump::next_instruction_offset = 5 * BytesPerInstWord; + +void NativeInstruction::imm48_split(long imm48, int16_t &msb_l, int16_t &lsb_h, int16_t &lsb_l) { + int32_t lsb32 = (int32_t) ((intptr_t) imm48); + int32_t msb32 = (int32_t) (((intptr_t) imm48 - lsb32) >> 32); + + msb_l = (int16_t) msb32; + lsb_h = (lsb32 - (int16_t) lsb32) >> 16; + lsb_l = (int16_t) lsb32; + guarantee((msb_l >= 0x0 && msb_l < 0x7fff) || (msb_l == 0x7fff && lsb32 >= 0x0 && lsb32 < 0x7fff8000), "wrong number in li48 "); + if (lsb32 >= 0x7fff8000) + msb_l = msb_l + 1; +} + +//void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg) { +// if (VerifyMethodHandles) +// verify_klass(_masm, klass_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_Class), +// "MH argument is a Class"); +// __ ldptr(klass_reg, Address(klass_reg, java_lang_Class::klass_offset_in_bytes())); +//} + + +/** + * x86 + * NativeInstruction::set_ptr_at(data_offset, x) + * sw64 + * NativeInstruction::set_address(address dest) + * note + * x86 call/jmp 64bits destination embedded following the opcodes + * sw64 call/jmp 48bits destination split in the disp in the ldi/sll/ldih/ldi sequence + */ +void NativeInstruction::set_address(address dest) { +// Unimplemented(); + if (SafePatch) { + if (is_op(int_at(0), Assembler::op_ldi) && + is_op(int_at(4), Assembler::op_br) && + is_op(int_at(16), Assembler::op_ldl)) { + set_long_at(8, (long) dest); + } else if (is_op(int_at(0), Assembler::op_br) && + is_op(int_at(12), Assembler::op_ldl) && + is_op(int_at(16), Assembler::op_ldi)) { + set_long_at(4, (long) dest); + } else { + tty->print_cr("\nError!\nset_address: 0x%lx", addr_at(0)); + Disassembler::decode(addr_at(0) - 10 * 4, addr_at(0) + 10 * 4, tty); + fatal("not a call "); + } + } else { + OrderAccess::fence(); + int16_t msb_l, lsb_h, lsb_l; + NativeInstruction::imm48_split((long) dest, msb_l, lsb_h, lsb_l); + /* li48 or li64 */ + if (is_op(int_at(0), Assembler::op_ldi) && is_op(int_at(4), Assembler::op_slll_l)) { + int first_word = int_at(0); + set_int_at(0, 0x13FFFFFF); /* .1: br .1 */ + set_int_at(8, (int_at(8) & 0xffff0000) | (lsb_h & 0xffff)); + set_int_at(12, (int_at(12) & 0xffff0000) | (lsb_l & 0xffff)); + set_int_at(0, (first_word & 0xffff0000) | (msb_l & 0xffff)); + + // ICache::invalidate_range(addr_at(0), 16); + } else if (is_op(int_at(0), Assembler::op_ldih) && is_op(int_at(8), Assembler::op_slll_l)) { + Unimplemented(); + } else { + fatal("not a call "); + } + } +} + +void NativeInstruction::set_long_at(int offset, long i) { + address addr = addr_at(offset); + *(long *) addr = i; +} + +void NativeInstruction::wrote(int offset) { + //ICache::invalidate_word(addr_at(offset)); +} + +#ifdef ASSERT +void NativeLoadGot::report_and_fail() const { + tty->print_cr("Addr: " INTPTR_FORMAT, p2i(instruction_address()), + (has_rex ? ubyte_at(0) : 0), ubyte_at(rex_size), ubyte_at(rex_size + 1)); + fatal("not a indirect rip mov to rbx"); +} + +void NativeLoadGot::verify() const { + if (has_rex) { + int rex = ubyte_at(0); + if (rex != rex_prefix && rex != rex_b_prefix) { + report_and_fail(); + } + } + + int inst = ubyte_at(rex_size); + if (inst != instruction_code) { + report_and_fail(); + } + int modrm = ubyte_at(rex_size + 1); + if (modrm != modrm_rbx_code && modrm != modrm_rax_code) { + report_and_fail(); + } +} +#endif + +intptr_t NativeLoadGot::data() const { + Unimplemented(); + return *(intptr_t *) got_address(); +} + +address NativePltCall::destination() const { + ShouldNotReachHere(); + NativeGotJump* jump = nativeGotJump_at(plt_jump()); + return jump->destination(); +} + +address NativePltCall::plt_entry() const { + ShouldNotReachHere(); + return return_address() + displacement(); + } + +address NativePltCall::plt_jump() const { + ShouldNotReachHere(); + address entry = plt_entry(); + // Virtual PLT code has move instruction first + if (((NativeGotJump*)entry)->is_GotJump()) { + return entry; + } else { + return nativeLoadGot_at(entry)->next_instruction_address(); + } +} + +address NativePltCall::plt_load_got() const { + ShouldNotReachHere(); + address entry = plt_entry(); + if (!((NativeGotJump*)entry)->is_GotJump()) { + // Virtual PLT code has move instruction first + return entry; + } else { + // Static PLT code has move instruction second (from c2i stub) + return nativeGotJump_at(entry)->next_instruction_address(); + } +} + +address NativePltCall::plt_c2i_stub() const { + ShouldNotReachHere(); + address entry = plt_load_got(); + // This method should be called only for static calls which has C2I stub. + NativeLoadGot* load = nativeLoadGot_at(entry); + return entry; +} + +address NativePltCall::plt_resolve_call() const { + ShouldNotReachHere(); + NativeGotJump* jump = nativeGotJump_at(plt_jump()); + address entry = jump->next_instruction_address(); + if (((NativeGotJump*)entry)->is_GotJump()) { + return entry; + } else { + // c2i stub 2 instructions + entry = nativeLoadGot_at(entry)->next_instruction_address(); + return nativeGotJump_at(entry)->next_instruction_address(); +} +} + +void NativePltCall::reset_to_plt_resolve_call() { + set_destination_mt_safe(plt_resolve_call()); +} + +void NativePltCall::set_destination_mt_safe(address dest) { + ShouldNotReachHere(); + // rewriting the value in the GOT, it should always be aligned + NativeGotJump* jump = nativeGotJump_at(plt_jump()); + address* got = (address *) jump->got_address(); + *got = dest; +} + +void NativePltCall::set_stub_to_clean() { + ShouldNotReachHere(); + NativeLoadGot* method_loader = nativeLoadGot_at(plt_c2i_stub()); + NativeGotJump* jump = nativeGotJump_at(method_loader->next_instruction_address()); + method_loader->set_data(0); + jump->set_jump_destination((address)-1); +} + +void NativePltCall::verify() const { + ShouldNotReachHere(); + // Make sure code pattern is actually a call rip+off32 instruction. + int inst = ubyte_at(0); + if (inst != instruction_code) { + tty->print_cr("Addr: " INTPTR_FORMAT " Code: 0x%x", p2i(instruction_address()), + inst); + fatal("not a call rip+off32"); + } +} + +address NativeGotJump::destination() const { + ShouldNotReachHere(); + address *got_entry = (address *) got_address(); + return *got_entry; +} + +#ifdef ASSERT +void NativeGotJump::report_and_fail() const { + ShouldNotReachHere(); + tty->print_cr("Addr: " INTPTR_FORMAT " Code: %x %x %x", p2i(instruction_address()), + (has_rex() ? ubyte_at(0) : 0), ubyte_at(rex_size()), ubyte_at(rex_size() + 1)); + fatal("not a indirect rip jump"); +} + + +void NativeGotJump::verify() const { + ShouldNotReachHere(); + int inst = ubyte_at(0); + if (inst != instruction_code) { + report_and_fail(); + } + int modrm = ubyte_at(rex_size() + 1); + if (modrm != modrm_code) { + report_and_fail(); + } +} +#endif + +void NativeCall::verify() { + NativeMovConstReg* mov = nativeMovConstReg_at(addr_at(0)); + // -4 because not include call instruction + NativeInstruction* call = nativeInstruction_at(addr_at(0) + NativeCall::instruction_size - 4); + + if (mov->is_mov_ptr() && call->is_call_reg()) return; + + fatal("not a call instruction"); +} + +address NativeCall::destination() const { + if (SafePatch) { + if (is_op(int_at(0), Assembler::op_ldi) && + is_op(int_at(4), Assembler::op_br) && + is_op(int_at(16), Assembler::op_ldl)) { + return (address) long_at(8); + } else if (is_op(int_at(0), Assembler::op_br) && + is_op(int_at(12), Assembler::op_ldl) && + is_op(int_at(16), Assembler::op_ldi)) { + return (address) long_at(4); + } else { + tty->print_cr("\nError!\ndestination: 0x%lx", addr_at(0)); + Disassembler::decode(addr_at(0) - 10 * 4, addr_at(0) + 10 * 4, tty); + fatal("not a call "); + } + } else { + NativeMovConstReg *mov = nativeMovConstReg_at(addr_at(0)); + return (address) mov->data(); + } +} + +void NativeCall::print() { + tty->print_cr(PTR_FORMAT ": call " PTR_FORMAT, + p2i(instruction_address()), p2i(destination())); +} + +// Inserts a native call instruction at a given pc +void NativeCall::insert(address code_pos, address entry) { + NativeCall *call = nativeCall_at(code_pos); + CodeBuffer cb(call->addr_at(0), instruction_size); + MacroAssembler masm(&cb); +#define __ masm. + if (SafePatch) { + if (__ offset() % 8 == 0) { + __ nop(); + __ br(T12, 2); + __ emit_int64((long) entry); + __ ldl(T12, 0, T12); + } else { + __ br(T12, 2); + __ emit_int64((long) entry); + __ ldl(T12, 0, T12); + __ nop(); + } + } else { + __ prepare_patch_li48(T12, (long) entry); + } + __ call(T12); +#undef __ + + // ICache::invalidate_range(call->addr_at(0), instruction_size); +} + +// MT-safe patching of a call instruction. +// First patches first word of instruction to two jmp's that jmps to them +// selfs (spinlock). Then patches the last byte, and then atomicly replaces +// the jmp's with the first 4 byte of the new instruction. +void NativeCall::replace_mt_safe(address instr_addr, address code_buffer) { + Unimplemented(); + assert(Patching_lock->is_locked() || + SafepointSynchronize::is_at_safepoint(), "concurrent code patching"); + assert (instr_addr != NULL, "illegal address for code patching"); + + NativeCall* n_call = nativeCall_at (instr_addr); // checking that it is a call + guarantee((intptr_t)instr_addr % BytesPerWord == 0, "must be aligned"); + + // First patch dummy jmp in place + unsigned char patch[4]; + assert(sizeof(patch)==sizeof(jint), "sanity check"); + patch[0] = 0xEB; // jmp rel8 + patch[1] = 0xFE; // jmp to self + patch[2] = 0xEB; + patch[3] = 0xFE; + + // First patch dummy jmp in place + *(jint*)instr_addr = *(jint *)patch; + + // Invalidate. Opteron requires a flush after every write. + n_call->wrote(0); + + // Patch 4th byte + instr_addr[4] = code_buffer[4]; + + n_call->wrote(4); + + // Patch bytes 0-3 + *(jint*)instr_addr = *(jint *)code_buffer; + + n_call->wrote(0); + +#ifdef ASSERT + // verify patching + for ( int i = 0; i < instruction_size; i++) { + address ptr = (address)((intptr_t)code_buffer + i); + int a_byte = (*ptr) & 0xFF; + assert(*((address)((intptr_t)instr_addr + i)) == a_byte, "mt safe patching failed"); + } +#endif + +} + + +// Similar to replace_mt_safe, but just changes the destination. The +// important thing is that free-running threads are able to execute this +// call instruction at all times. If the displacement field is aligned +// we can simply rely on atomicity of 32-bit writes to make sure other threads +// will see no intermediate states. Otherwise, the first two bytes of the +// call are guaranteed to be aligned, and can be atomically patched to a +// self-loop to guard the instruction while we change the other bytes. + +// We cannot rely on locks here, since the free-running threads must run at +// full speed. +// +// Used in the runtime linkage of calls; see class CompiledIC. +// (Cf. 4506997 and 4479829, where threads witnessed garbage displacements.) +void NativeCall::set_destination_mt_safe(address dest) {//Unimplemented(); + set_destination(dest); +} + + +void NativeMovConstReg::verify() { + if (is_op(int_at(0), Assembler::op_ldih) && + is_op(int_at(4), Assembler::op_ldi) && + is_op(int_at(8), Assembler::op_slll_l) && + is_op(int_at(12), Assembler::op_ldih) && + is_op(int_at(16), Assembler::op_ldi)) { + return; + } + + if (is_op(int_at(0), Assembler::op_ldi) && + is_op(int_at(4), Assembler::op_slll_l) && + is_op(int_at(8), Assembler::op_ldih) && + is_op(int_at(12), Assembler::op_ldi)) { + return; + } + + if (is_op(int_at(0), Assembler::op_ldi) && + is_op(int_at(4), Assembler::op_br) && + is_op(int_at(16), Assembler::op_ldl)) { + return; + } + if (is_op(int_at(0), Assembler::op_br) && + is_op(int_at(12), Assembler::op_ldl) && + is_op(int_at(16), Assembler::op_ldi)) { + return; + } + if (!nativeInstruction_at(addr_at(0))->is_mov_ptr()) { + print(); + fatal("not a mov reg64, ptr"); + } +} + + +void NativeMovConstReg::print() { + tty->print_cr(PTR_FORMAT ": mov reg, " INTPTR_FORMAT, + p2i(instruction_address()), data()); +} + +//------------------------------------------------------------------- + +int NativeMovRegMem::instruction_start() const { + Unimplemented(); + int off = 0; + u_char instr_0 = ubyte_at(off); + return off; +} + +int NativeMovRegMem::patch_offset() const { + int off = data_offset + instruction_start(); + u_char mod_rm = *(u_char*)(instruction_address() + 1); + // nnnn(r12|rsp) isn't coded as simple mod/rm since that is + // the encoding to use an SIB byte. Which will have the nnnn + // field off by one byte + if ((mod_rm & 7) == 0x4) { + off++; + } + return off; +} + +//int NativeMovRegMem::offset() const{ +// int off = data_offset + instruction_start(); +// u_char mod_rm = *(u_char*)(instruction_address() + 1); +// // nnnn(r12|rsp) isn't coded as simple mod/rm since that is +// // the encoding to use an SIB byte. Which will have the nnnn +// // field off by one byte +// if ((mod_rm & 7) == 0x4) { +// off++; +// } +// return int_at(off); +//} +// +//void NativeMovRegMem::set_offset(int x) { +// int off = data_offset + instruction_start(); +// u_char mod_rm = *(u_char*)(instruction_address() + 1); +// // nnnn(r12|rsp) isn't coded as simple mod/rm since that is +// // the encoding to use an SIB byte. Which will have the nnnn +// // field off by one byte +// if ((mod_rm & 7) == 0x4) { +// off++; +// } +// set_int_at(off, x); +//} + +void NativeMovRegMem::verify() {Unimplemented(); + // make sure code pattern is actually a mov [reg+offset], reg instruction + u_char test_byte = *(u_char*)instruction_address(); + switch (test_byte) { + case instruction_code_reg2memb: // 0x88 movb a, r + case instruction_code_reg2mem: // 0x89 movl a, r (can be movq in 64bit) + case instruction_code_mem2regb: // 0x8a movb r, a + case instruction_code_mem2reg: // 0x8b movl r, a (can be movq in 64bit) + break; + + case instruction_code_mem2reg_movslq: // 0x63 movsql r, a + case instruction_code_mem2reg_movzxb: // 0xb6 movzbl r, a (movzxb) + case instruction_code_mem2reg_movzxw: // 0xb7 movzwl r, a (movzxw) + case instruction_code_mem2reg_movsxb: // 0xbe movsbl r, a (movsxb) + case instruction_code_mem2reg_movsxw: // 0xbf movswl r, a (movsxw) + break; + + case instruction_code_float_s: // 0xd9 fld_s a + case instruction_code_float_d: // 0xdd fld_d a + case instruction_code_xmm_load: // 0x10 movsd xmm, a + case instruction_code_xmm_store: // 0x11 movsd a, xmm + case instruction_code_xmm_lpd: // 0x12 movlpd xmm, a + break; + + case instruction_code_lea: // 0x8d lea r, a + break; + + default: + fatal ("not a mov [reg+offs], reg instruction"); + } +} + + +void NativeMovRegMem::print() { + tty->print_cr(PTR_FORMAT ": mov reg, [reg + %x]", p2i(instruction_address()), offset()); +} + +//------------------------------------------------------------------- + +void NativeLoadAddress::verify() { + // make sure code pattern is actually a mov [reg+offset], reg instruction + /*u_char test_byte = *(u_char*)instruction_address(); + + if ( (test_byte == instruction_prefix_wide || + test_byte == instruction_prefix_wide_extended) ) { + test_byte = *(u_char*)(instruction_address() + 1); + } + + if ( ! ((test_byte == lea_instruction_code) + LP64_ONLY(|| (test_byte == mov64_instruction_code) ))) { + fatal ("not a lea reg, [reg+offs] instruction"); + }*/ +} + + +void NativeLoadAddress::print() { + tty->print_cr(PTR_FORMAT ": lea [reg + %x], reg", p2i(instruction_address()), offset()); +} + +//-------------------------------------------------------------------------------- + +void NativeJump::verify() { + NativeMovConstReg* mov = nativeMovConstReg_at(addr_at(0)); + // -4 because not include jmp instruction + NativeInstruction* jmp = nativeInstruction_at(addr_at(0) + NativeJump::instruction_size - 4); + + if (mov->is_mov_ptr() && jmp->is_jump_reg()) return; + + fatal("not a jump instruction"); +} + +bool NativeInstruction::is_sigill_zombie_not_entrant() { + return int_at(0) == NativeIllegalInstruction::instruction_code; +} +void NativeJump::insert(address code_pos, address entry) { + Unimplemented(); +// intptr_t disp = (intptr_t)entry - ((intptr_t)code_pos + 1 + 4); +// +// guarantee(disp == (intptr_t)(int32_t)disp, "must be 32-bit offset"); +// +// *code_pos = instruction_code; +// *((int32_t*)(code_pos + 1)) = (int32_t)disp; +// +// ICache::invalidate_range(code_pos, instruction_size); +} + +void NativeJump::check_verified_entry_alignment(address entry, address verified_entry) { + //Unimplemented(); + // Patching to not_entrant can happen while activations of the method are + // in use. The patching in that instance must happen only when certain + // alignment restrictions are true. These guarantees check those + // conditions. + + const int linesize = 64; + + // Must be wordSize aligned + guarantee(((uintptr_t) verified_entry & (wordSize -1)) == 0, + "illegal address for code patching 2"); + // First 5 bytes must be within the same cache line - 4827828 + guarantee((uintptr_t) verified_entry / linesize == + ((uintptr_t) verified_entry + 4) / linesize, + "illegal address for code patching 3"); +} + +// manual implementation of stl +// +// 00000001200009c0 : +// 0: 10 01 11 42 addq a0,a1,a0 +// 4: 00 00 50 ae stq a2,0(a0) +// 8: 01 00 fa 0b ret zero,(ra),0x1 +// c: 5f 07 ff 43 nop(excb) +// +typedef void (* atomic_store64_ptr)(long *addr, int offset, long data64); + +static int *buf; +static atomic_store64_ptr get_atomic_store64_func() { + static atomic_store64_ptr p = NULL; + if (p != NULL) + return p; + + buf = (int *)mmap(NULL, 64, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, + -1, 0); + buf[0] = 0x42110110; + buf[1] = 0xae500000; /* stq $a2, 0($a0) */ + buf[2] = 0x0bfa0001; + buf[3] = 0x43ff075f; /* nop */ + + p = (atomic_store64_ptr)buf; + return p; +} + +// MT safe inserting of a jump over an unknown instruction sequence (used by nmethod::makeZombie) +// The problem: jmp is a 5-byte instruction. Atomical write can be only with 4 bytes. +// First patches the first word atomically to be a jump to itself. +// Then patches the last byte and then atomically patches the first word (4-bytes), +// thus inserting the desired jump +// This code is mt-safe with the following conditions: entry point is 4 byte aligned, +// entry point is in same cache line as unverified entry point, and the instruction being +// patched is >= 5 byte (size of patch). +// +// In C2 the 5+ byte sized instruction is enforced by code in MachPrologNode::emit. +// In C1 the restriction is enforced by CodeEmitter::method_entry +// In JVMCI, the restriction is enforced by HotSpotFrameContext.enter(...) +// +void NativeJump::patch_verified_entry(address entry, address verified_entry, address dest) { + + // ensure 100% atomicity. + // The destination is fixed and can be cached in JavaThread. + + guarantee(dest == SharedRuntime::get_handle_wrong_method_stub(), "expected fixed destination of patch"); + NativeIllegalInstruction::insert(verified_entry); + +// guarantee(!os::is_MP() || (((long)verified_entry % BytesPerWord) == 0), "destination must be aligned for SD"); +// bool is_aligned = !os::is_MP() || (((long)verified_entry % BytesPerWord) == 0); +// +// if (is_aligned) { +// int code_buffer[4]; +// +// CodeBuffer cb((address)code_buffer, instruction_size); +// MacroAssembler masm(&cb); +//#define __ masm. +// __ ldl(T12, Address(rthread, in_bytes(JavaThread::handle_wrong_method_stub_offset()))); +// __ jmp(T12); +// __ nop(); +// __ nop(); +// +// atomic_store64_ptr func = get_atomic_store64_func(); +// (*func)((long *)verified_entry, 0, *(long *)&code_buffer[0]); +// } else { +//// if (Assembler::reachable_from_branch_at(verified_entry, dest)) { //for SW8A +//// ptrdiff_t disp = dest - verified_entry - 4; +//// guarantee(disp < 1 << 27 && disp > - (1 << 27), "branch overflow"); +//// unsigned int insn = (0x1D << 26) | ((disp >> 2) & 0x3ffffff); +////// *(unsigned int*)verified_entry = insn; +//// } else { +// // We use an illegal instruction for marking a method as +// // not_entrant or zombie +// NativeIllegalInstruction::insert(verified_entry); +// } +} + +//address NativeFarJump::jump_destination() const { +// NativeMovConstReg* mov = nativeMovConstReg_at(addr_at(0)); +// return (address)mov->data(); +//} +// +//void NativeFarJump::verify() { +// if (is_far_jump()) { +// NativeMovConstReg* mov = nativeMovConstReg_at(addr_at(0)); +// NativeInstruction* jmp = nativeInstruction_at(mov->next_instruction_address()); +// if (jmp->is_jump_reg()) return; +// } +// fatal("not a jump instruction"); +//} + +void NativePopReg::insert(address code_pos, Register reg) { + Unimplemented(); + assert(reg->encoding() < 8, "no space for REX"); + assert(NativePopReg::instruction_size == sizeof(char), "right address unit for update"); + *code_pos = (u_char)(instruction_code | reg->encoding()); + // ICache::invalidate_range(code_pos, instruction_size); +} + + +void NativeIllegalInstruction::insert(address code_pos) { + // Unimplemented(); + assert(NativeIllegalInstruction::instruction_size == sizeof(int), "right address unit for update"); + *(juint*)code_pos = instruction_code; +// ICache::invalidate_range(code_pos, instruction_size); +} + +void NativeGeneralJump::verify() { + Unimplemented(); +} + + +void NativeGeneralJump::insert_unconditional(address code_pos, address entry) { + Unimplemented(); + intptr_t disp = (intptr_t)entry - ((intptr_t)code_pos + 1 + 4); + + guarantee(disp == (intptr_t)(int32_t)disp, "must be 32-bit offset"); + + *code_pos = unconditional_long_jump; + *((int32_t *)(code_pos+1)) = (int32_t) disp; + //ICache::invalidate_range(code_pos, instruction_size); +} + + +// MT-safe patching of a long jump instruction. +// First patches first word of instruction to two jmp's that jmps to them +// selfs (spinlock). Then patches the last byte, and then atomicly replaces +// the jmp's with the first 4 byte of the new instruction. +void NativeGeneralJump::replace_mt_safe(address instr_addr, address code_buffer) { + Unimplemented(); +} + +void NativeGeneralJump::set_jump_destination(address dest) { + Unimplemented(); +} + + +address NativeGeneralJump::jump_destination() const { + Unimplemented(); + return NULL; +} + +intptr_t NativeMovConstReg::data() { + // wait_until_not_spinng(); + if (nativeInstruction_at(addr_at(0)) -> is_mov_ptr()) { + if (is_op(int_at(0), Assembler::op_ldi) && + is_op(int_at(4), Assembler::op_br) && + is_op(int_at(16), Assembler::op_ldl)) { + return (intptr_t) long_at(8); + } + if (is_op(int_at(0), Assembler::op_br) && + is_op(int_at(12), Assembler::op_ldl) && + is_op(int_at(16), Assembler::op_ldi)) { + return (intptr_t) long_at(4); + } + if (is_op(int_at(4), Assembler::op_slll_l)){ + /* li48 */ + int16_t msb_l = int_at(0)&0xffff; + int16_t lsb_h = int_at(8)&0xffff; + int16_t lsb_l = int_at(12)&0xffff; + + // -1 should be 0xffff ffff ffff ffff, so we can not use low 48 bits + return (((intptr_t) (msb_l) << 32) + ((intptr_t) (lsb_h) << 16) + (intptr_t) (lsb_l)); + } + else { + int16_t high = int_at(0)&0xffff; + int16_t low = int_at(4)&0xffff; + + // -1 should be 0xffff ffff ffff ffff, so we can not use low 48 bits + return ( ((intptr_t) (high) << 16) + (intptr_t) (low)); + } + } + + Unimplemented(); + return (intptr_t )NULL; +} + +void NativeMovConstReg::set_data(intptr_t x) { + if (is_mov_ptr()) { + OrderAccess::fence(); + //decide which type of data need be relocated li48 or 32 + if (is_op(int_at(4), Assembler::op_slll_l)) { + int16_t msb_l, lsb_h, lsb_l; + NativeInstruction::imm48_split((long)x, msb_l, lsb_h, lsb_l); + + int first_word = int_at(0); + set_int_at(0, 0x13FFFFFF); /* .1: br .1 */ + set_int_at(8, (int_at(8) & 0xffff0000) | (lsb_h & 0xffff)); + set_int_at(12, (int_at(12) & 0xffff0000) | (lsb_l & 0xffff)); + set_int_at(0, (first_word & 0xffff0000) | (msb_l & 0xffff)); + + // ICache::invalidate_range(addr_at(0), 16); + } + else if (is_op(int_at(8), Assembler::op_zapnot_l)) { + int16_t high = (x - (int16_t)(x))>>16; + int16_t low = (int16_t)(x); + int first_word = int_at(0); + set_int_at(0, 0x13FFFFFF); /* .1: br .1 */ + set_int_at(4, (int_at(4) & 0xffff0000) | (low & 0xffff)); + set_int_at(0, (first_word & 0xffff0000) | (high & 0xffff)); + + // ICache::invalidate_range(addr_at(0), 12); + } + } else { + fatal("not a call "); + } +} + +address NativeJump::jump_destination() { + NativeMovConstReg* mov = nativeMovConstReg_at(addr_at(0)); + address dest = (address) mov->data(); + // We use jump to self as the unresolved address which the inline + // cache code (and relocs) know about + + // return -1 if jump to self + dest = (dest == (address) this) ? (address) -1 : dest; + return dest; +} \ No newline at end of file diff --git a/src/hotspot/cpu/sw64/nativeInst_sw64.hpp b/src/hotspot/cpu/sw64/nativeInst_sw64.hpp new file mode 100644 index 00000000000..f229925aa25 --- /dev/null +++ b/src/hotspot/cpu/sw64/nativeInst_sw64.hpp @@ -0,0 +1,809 @@ +/* + * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SW64_VM_NATIVEINST_SW64_HPP +#define CPU_SW64_VM_NATIVEINST_SW64_HPP + +#include "asm/assembler.hpp" +#include "runtime/icache.hpp" +#include "runtime/safepointMechanism.hpp" +#include "runtime/os.hpp" + +// We have interfaces for the following instructions: +// - NativeInstruction +// - - NativeCall +// - - NativeMovConstReg +// - - NativeMovConstRegPatching +// - - NativeMovRegMem +// - - NativeMovRegMemPatching +// - - NativeJump +// - - NativeFarJump +// - - NativeIllegalOpCode +// - - NativeGeneralJump +// - - NativeReturn +// - - NativeReturnX (return with argument) +// - - NativePushConst +// - - NativeTstRegMem + +// The base class for different kinds of native instruction abstractions. +// Provides the primitive operations to manipulate code relative to this. + +class NativeInstruction { + friend class Relocation; + friend class MacroAssembler; + + public: + enum { + instruction_size = 4 + }; + enum Sw64_specific_constants { + nop_instruction_code = 0, + nop_instruction_size = BytesPerInstWord + }; + + bool is_nop() { Unimplemented(); return ubyte_at(0) == nop_instruction_code; } + inline bool is_call(); + inline bool is_call_reg(); + inline bool is_illegal(); + inline bool is_return(); + inline bool is_jump(); + inline bool is_jump_reg(); + inline bool is_far_jump(); + inline bool is_cond_jump(); + inline bool is_safepoint_poll(); + inline bool is_mov_ptr(); + void wait_until_not_spinng() { + while (*((volatile int*)this) > 0);// wait until the first inst is not spin any more. spin is 13ffffff(>0), ldi and ldih is fxxxxxxx < 0 + } + + //We use an illegal instruction for marking a method as not_entrant or zombie. + bool is_sigill_zombie_not_entrant(); + +protected: + address addr_at(int offset) const { return address(this) + offset; } + + s_char sbyte_at(int offset) const { return *(s_char*) addr_at(offset); } + u_char ubyte_at(int offset) const { return *(u_char*) addr_at(offset); } + + jint int_at(int offset) const { return *(jint*) addr_at(offset); } + + intptr_t ptr_at(int offset) const { return *(intptr_t*) addr_at(offset); } + + oop oop_at (int offset) const { return *(oop*) addr_at(offset); } + + void set_char_at(int offset, char c) { *addr_at(offset) = (u_char)c; wrote(offset); } + void set_int_at(int offset, jint i) { *(jint*)addr_at(offset) = i; wrote(offset); } + void set_ptr_at (int offset, intptr_t ptr) { *(intptr_t*) addr_at(offset) = ptr; wrote(offset); } + void set_oop_at (int offset, oop o) { *(oop*) addr_at(offset) = o; wrote(offset); } + + static void imm48_split(long imm48, int16_t &msb_l, int16_t &lsb_h, int16_t &lsb_l); + void set_address(address dest); + void set_long_at(int offset, long i); + jlong long_at(int offset) const { return *(jlong*)addr_at(offset); } + + static bool is_op (int insn, Assembler::ops_mem op) { return Assembler::sw2_op(insn) == (int)op; } + static bool is_op (int insn, Assembler::ops_opr op) { return Assembler::sw2_arith_op(insn) == (int)op; } + static bool is_op (int insn, Assembler::ops_oprl op) { return Assembler::sw2_arith_op(insn) == (int)op; } + static bool is_op (int insn, Assembler::ops_extra op) { return Assembler::sw2_mfc_op(insn) == (int)op; } + static bool is_op (int insn, Assembler::ops_bra op) { return Assembler::sw2_op(insn) == (int)op; } + static bool is_op (int insn, Assembler::ops_fp op) { return Assembler::sw2_op(insn) == (int)op; } + + // This doesn't really do anything on Intel, but it is the place where + // cache invalidation belongs, generically: + void wrote(int offset); + + public: + + // unit test stuff + static void test() {} // override for testing + + inline friend NativeInstruction* nativeInstruction_at(address address); +}; + +inline NativeInstruction* nativeInstruction_at(address address) { + NativeInstruction* inst = (NativeInstruction*)address; +#ifdef ASSERT + //inst->verify(); +#endif + return inst; +} + +class NativePltCall: public NativeInstruction { +public: + enum Sw64_specific_constants { + instruction_code = 0xE8, + instruction_size = 5, + instruction_offset = 0, + displacement_offset = 1, + return_address_offset = 5 + }; + address instruction_address() const { return addr_at(instruction_offset); } + address next_instruction_address() const { return addr_at(return_address_offset); } + address displacement_address() const { return addr_at(displacement_offset); } + int displacement() const { Unimplemented(); return (jint) int_at(displacement_offset); } + address return_address() const { return addr_at(return_address_offset); } + address destination() const; + address plt_entry() const; + address plt_jump() const; + address plt_load_got() const; + address plt_resolve_call() const; + address plt_c2i_stub() const; + void set_stub_to_clean(); + + void reset_to_plt_resolve_call(); + void set_destination_mt_safe(address dest); + + void verify() const; +}; + +inline NativePltCall* nativePltCall_at(address address) { + NativePltCall* call = (NativePltCall*) address; +#ifdef ASSERT + call->verify(); +#endif + return call; +} + +inline NativePltCall* nativePltCall_before(address addr) { + address at = addr - NativePltCall::instruction_size; + return nativePltCall_at(at); +} + +// An interface for mov ptr to reg: +// ldi +// sll +// ldih +// ldi +class NativeMovConstReg: public NativeInstruction { +public: + enum Sw64_specific_constants { + instruction_size = 4 * BytesPerInstWord, + instruction_offset = 0, + next_instruction_offset = instruction_size, + }; + + address instruction_address() const { return addr_at(instruction_offset); } + address next_instruction_address() const { return addr_at(next_instruction_offset); } + intptr_t data(); + void set_data(intptr_t x); + + void verify(); + void print(); + + // unit test stuff + static void test() {} + + // Creation + inline friend NativeMovConstReg* nativeMovConstReg_at(address address); + inline friend NativeMovConstReg* nativeMovConstReg_before(address address); +}; + +inline NativeMovConstReg* nativeMovConstReg_at(address address) { + NativeMovConstReg* test = (NativeMovConstReg*)(address - NativeMovConstReg::instruction_offset); +#ifdef ASSERT + test->verify(); +#endif + return test; +} + +inline NativeMovConstReg* nativeMovConstReg_before(address address) { + NativeMovConstReg* test = (NativeMovConstReg*)(address - NativeMovConstReg::instruction_size - NativeMovConstReg::instruction_offset); +#ifdef ASSERT + test->verify(); +#endif + return test; +} + +class NativeCall; +inline NativeCall* nativeCall_at(address address); + +class NativeCall: public NativeInstruction { + public: + enum Sw64_specific_constants { + // instruction_size = 5 * BytesPerInstWord, + instruction_offset = 0, + // return_address_offset = instruction_size + }; + static int instruction_size; //member variables can be reassigned in the templateTable_sw64.cpp_sw64.cpp when SafePatch is true. + static int return_address_offset; + + enum { cache_line_size = BytesPerWord }; // conservative estimate! + + address instruction_address() const { return addr_at(instruction_offset); } + address next_instruction_address() const { return addr_at(return_address_offset); } + address return_address() const { return addr_at(return_address_offset); } + address destination() const; + void set_destination(address dest) { + /*NativeMovConstReg* mov = nativeMovConstReg_at(addr_at(0)); + mov->set_data((intptr_t)dest);*/ + set_address(dest); + } + void set_destination_mt_safe(address dest); + + void verify_alignment() { } + void verify(); + void print(); + + // Creation + inline friend NativeCall* nativeCall_at(address address); + inline friend NativeCall* nativeCall_before(address return_address); + + static bool is_call_at(address instr) { + return nativeInstruction_at(instr)->is_call(); + } + + static bool is_call_before(address return_address) { + return is_call_at(return_address - NativeCall::return_address_offset); + } + +// static bool is_call_to(address instr, address target) { +// return nativeInstruction_at(instr)->is_call() && +// nativeCall_at(instr)->destination() == target; +// } + + // MT-safe patching of a call instruction. + static void insert(address code_pos, address entry); + + static void replace_mt_safe(address instr_addr, address code_buffer); +}; + +inline NativeCall* nativeCall_at(address address) { + NativeCall* call = (NativeCall*)(address - NativeCall::instruction_offset); +#ifdef ASSERT + call->verify(); +#endif + return call; +} + +inline NativeCall* nativeCall_before(address return_address) { + NativeCall* call = (NativeCall*)(return_address - NativeCall::return_address_offset); +#ifdef ASSERT + call->verify(); +#endif + return call; +} + +//class NativeCallReg: public NativeInstruction { +// public: +// enum Sw64_specific_constants { +// instruction_size = BytesPerInstWord +// }; +// +// int next_instruction_offset() const { +// return instruction_size; +// } +//}; + + +class NativeMovConstRegPatching: public NativeMovConstReg { + private: + friend NativeMovConstRegPatching* nativeMovConstRegPatching_at(address address) { + Unimplemented(); + NativeMovConstRegPatching* test = (NativeMovConstRegPatching*)(address - instruction_offset); + #ifdef ASSERT + test->verify(); + #endif + return test; + } +}; + +// An interface for accessing/manipulating native moves of the form: +// mov[b/w/l/q] [reg + offset], reg (instruction_code_reg2mem) +// mov[b/w/l/q] reg, [reg+offset] (instruction_code_mem2reg +// mov[s/z]x[w/b/q] [reg + offset], reg +// fld_s [reg+offset] +// fld_d [reg+offset] +// fstp_s [reg + offset] +// fstp_d [reg + offset] +// mov_literal64 scratch, ; mov[b/w/l/q] 0(scratch),reg | mov[b/w/l/q] reg,0(scratch) +// +// Warning: These routines must be able to handle any instruction sequences +// that are generated as a result of the load/store byte,word,long +// macros. For example: The load_unsigned_byte instruction generates +// an xor reg,reg inst prior to generating the movb instruction. This +// class must skip the xor instruction. + +class NativeMovRegMem: public NativeInstruction { + public: + enum Sw64_specific_constants { + //instruction_prefix_wide_lo = Assembler::REX, + //instruction_prefix_wide_hi = Assembler::REX_WRXB, + instruction_code_xor = 0x33, + instruction_extended_prefix = 0x0F, + instruction_code_mem2reg_movslq = 0x63, + instruction_code_mem2reg_movzxb = 0xB6, + instruction_code_mem2reg_movsxb = 0xBE, + instruction_code_mem2reg_movzxw = 0xB7, + instruction_code_mem2reg_movsxw = 0xBF, + instruction_operandsize_prefix = 0x66, + instruction_code_reg2mem = 0x89, + instruction_code_mem2reg = 0x8b, + instruction_code_reg2memb = 0x88, + instruction_code_mem2regb = 0x8a, + instruction_code_float_s = 0xd9, + instruction_code_float_d = 0xdd, + instruction_code_long_volatile = 0xdf, + instruction_code_xmm_ss_prefix = 0xf3, + instruction_code_xmm_sd_prefix = 0xf2, + instruction_code_xmm_code = 0x0f, + instruction_code_xmm_load = 0x10, + instruction_code_xmm_store = 0x11, + instruction_code_xmm_lpd = 0x12, + + instruction_code_lea = 0x8d, + + //instruction_VEX_prefix_2bytes = Assembler::VEX_2bytes, + //instruction_VEX_prefix_3bytes = Assembler::VEX_3bytes, + //instruction_EVEX_prefix_4bytes = Assembler::EVEX_4bytes, + + instruction_size = 4, + instruction_offset = 0, + data_offset = 2, + next_instruction_offset = 4 + }; + + // helper + int instruction_start() const; + + address instruction_address() const { + Unimplemented(); + return addr_at(instruction_start()); + } + + int num_bytes_to_end_of_patch() const { + Unimplemented(); + return patch_offset() + sizeof(jint); + } + + int offset() const { + Unimplemented(); + return int_at(patch_offset()); + } + + void set_offset(int x) { + Unimplemented(); + set_int_at(patch_offset(), x); + } + + void add_offset_in_bytes(int add_offset) { + Unimplemented(); + int patch_off = patch_offset(); + set_int_at(patch_off, int_at(patch_off) + add_offset); + } + + void verify(); + void print (); + + // unit test stuff + static void test() {} + + private: + int patch_offset() const; + inline friend NativeMovRegMem* nativeMovRegMem_at (address address); +}; + +inline NativeMovRegMem* nativeMovRegMem_at (address address) { + Unimplemented(); + NativeMovRegMem* test = (NativeMovRegMem*)(address - NativeMovRegMem::instruction_offset); +#ifdef ASSERT + test->verify(); +#endif + return test; +} + + +// An interface for accessing/manipulating native leal instruction of form: +// leal reg, [reg + offset] + +class NativeLoadAddress: public NativeMovRegMem { + public: + enum Sw64_specific_constants { + }; + + void verify(); + void print (); + + // unit test stuff + static void test() {} + + private: + friend NativeLoadAddress* nativeLoadAddress_at (address address) { + Unimplemented(); + NativeLoadAddress* test = (NativeLoadAddress*)(address - instruction_offset); + #ifdef ASSERT + test->verify(); + #endif + return test; + } +}; + +// destination is rbx or rax +// mov rbx, [rip + offset] +class NativeLoadGot: public NativeInstruction { + static const bool has_rex = true; + static const int rex_size = 1; + + enum Sw64_specific_constants { + rex_prefix = 0x48, + rex_b_prefix = 0x49, + instruction_code = 0x8b, + modrm_rbx_code = 0x1d, + modrm_rax_code = 0x05, + instruction_length = 6 + rex_size, + offset_offset = 2 + rex_size + }; + + address rip_offset_address() const { return addr_at(offset_offset); } + int rip_offset() const { return int_at(offset_offset); } + address return_address() const { return addr_at(instruction_length); } + address got_address() const { return return_address() + rip_offset(); } + +#ifdef ASSERT + void report_and_fail() const; + address instruction_address() const { return addr_at(0); } +#endif + +public: + address next_instruction_address() const { return return_address(); } + intptr_t data() const; + void set_data(intptr_t data) { + Unimplemented(); + intptr_t *addr = (intptr_t *) got_address(); + *addr = data; + } + + DEBUG_ONLY( void verify() const ); +}; + +inline NativeLoadGot* nativeLoadGot_at(address addr) { + Unimplemented(); + NativeLoadGot* load = (NativeLoadGot*) addr; +#ifdef ASSERT + load->verify(); +#endif + return load; +} + +class NativeJump: public NativeInstruction { + public: + enum Sw64_specific_constants { + // instruction_size = 5 * BytesPerInstWord, + instruction_offset = 0, + // next_instruction_offset = instruction_size + }; + static int instruction_size; //member variables can be reassigned in the templateTable_sw64.cpp when SafePatch is true. + static int next_instruction_offset; + address instruction_address() const { return addr_at(instruction_offset); } + address next_instruction_address() const { return addr_at(next_instruction_offset); } + address jump_destination(); + + void set_jump_destination(address dest) { + // NativeMovConstReg* mov = nativeMovConstReg_at(addr_at(0)); + // mov->set_data((intptr_t)dest); + set_address(dest); + } + + // Creation + inline friend NativeJump* nativeJump_at(address address); + + void verify(); + + // Unit testing stuff + static void test() {} + + // Insertion of native jump instruction + static void insert(address code_pos, address entry); + // MT-safe insertion of native jump at verified method entry + static void check_verified_entry_alignment(address entry, address verified_entry); + static void patch_verified_entry(address entry, address verified_entry, address dest); +}; + +inline NativeJump* nativeJump_at(address address) { + NativeJump* jump = (NativeJump*)(address - NativeJump::instruction_offset); +#ifdef ASSERT + jump->verify(); +#endif + return jump; +} + +//// far jump reg +//class NativeFarJump: public NativeInstruction { +// public: +// address jump_destination() const; +// +// // Creation +// inline friend NativeFarJump* nativeFarJump_at(address address); +// +// void verify(); +// +// // Unit testing stuff +// static void test() {} +// +//}; + +//inline NativeFarJump* nativeFarJump_at(address address) { +// NativeFarJump* jump = (NativeFarJump*)(address); +//#ifdef ASSERT +// jump->verify(); +//#endif +// return jump; +//} + +// Handles all kinds of jump on Intel. Long/far, conditional/unconditional +class NativeGeneralJump: public NativeInstruction { +public: + enum Sw64_specific_constants { + instruction_offset = 0, + unconditional_long_jump = 0xe9, + unconditional_short_jump = 0xeb, + instruction_size = 5 + }; + + address instruction_address() const { Unimplemented(); return addr_at(0); } + address jump_destination() const; + + void set_jump_destination(address dest); + + // Creation + inline friend NativeGeneralJump* nativeGeneralJump_at(address address); + + // Insertion of native general jump instruction + static void insert_unconditional(address code_pos, address entry); + static void replace_mt_safe(address instr_addr, address code_buffer); + + void verify(); +}; + +inline NativeGeneralJump* nativeGeneralJump_at(address address) { + Unimplemented(); + NativeGeneralJump* jump = (NativeGeneralJump*)(address); + debug_only(jump->verify();) + return jump; +} + +class NativeGotJump: public NativeInstruction { + + enum Sw64_specific_constants { + rex_prefix = 0x41, + instruction_code = 0xff, + modrm_code = 0x25, + instruction_size = 6, + rip_offset = 2 + }; + + bool has_rex() const { return ubyte_at(0) == rex_prefix; } + int rex_size() const { return has_rex() ? 1 : 0; } + + address return_address() const { Unimplemented(); return addr_at(instruction_size + rex_size()); } + int got_offset() const { return (jint) int_at(rip_offset + rex_size()); } + +#ifdef ASSERT + void report_and_fail() const; + address instruction_address() const { Unimplemented(); return addr_at(0); } +#endif + +public: + address got_address() const { return return_address() + got_offset(); } + address next_instruction_address() const { return addr_at(instruction_size); } + bool is_GotJump() const { return ubyte_at(0) == instruction_code; } + + address destination() const; + void set_jump_destination(address dest) { + Unimplemented(); + address *got_entry = (address *) got_address(); + *got_entry = dest; + } + + DEBUG_ONLY( void verify() const; ) +}; + +inline NativeGotJump* nativeGotJump_at(address addr) { + Unimplemented(); + NativeGotJump* jump = (NativeGotJump*)(addr); + debug_only(jump->verify()); + return jump; +} + +class NativePopReg : public NativeInstruction { + public: + enum Sw64_specific_constants { + instruction_code = 0x58, + instruction_size = 1, + instruction_offset = 0, + data_offset = 1, + next_instruction_offset = 1 + }; + + // Insert a pop instruction + static void insert(address code_pos, Register reg); +}; + + +class NativeIllegalInstruction: public NativeInstruction { +public: + enum Sw64_specific_constants { + instruction_code = 0x0000DEAD, // Special instruction + instruction_size = 4, //TODO:not check jzy + instruction_offset = 0, + next_instruction_offset = 4 //TODO:not check jzy + }; + + // Insert illegal opcode as specific address + static void insert(address code_pos); +}; + +// return instruction that does not pop values of the stack +class NativeReturn: public NativeInstruction { + public: + enum Sw64_specific_constants { + instruction_size = BytesPerInstWord + }; +}; + +// Simple test vs memory +class NativeTstRegMem: public NativeInstruction { + public: + enum Sw64_specific_constants { + }; +}; + +//class NativeCondJump; +//inline NativeCondJump* nativeCondJump_at(address address); +//class NativeCondJump: public NativeInstruction { +// public: +// enum Sw64_specific_constants { +// instruction_size = 16, +// instruction_offset = 12, +// next_instruction_offset = 20 +// }; +// +// +// address instruction_address() const { Unimplemented(); return addr_at(0); } +// address next_instruction_address() const { Unimplemented(); return addr_at(next_instruction_offset); } +// +// // Creation +// inline friend NativeCondJump* nativeCondJump_at(address address); +// +// address jump_destination() const { +// Unimplemented(); +// return ::nativeCondJump_at(addr_at(12))->jump_destination(); +// } +// +// void set_jump_destination(address dest) { +// Unimplemented(); +// ::nativeCondJump_at(addr_at(12))->set_jump_destination(dest); +// } +// +//}; +// +//inline NativeCondJump* nativeCondJump_at(address address) { +// Unimplemented(); +// NativeCondJump* jump = (NativeCondJump*)(address); +// return jump; +//} + +inline bool NativeInstruction::is_illegal() { Unimplemented(); return (short)int_at(0) == (short)NativeIllegalInstruction::instruction_code; } + +inline bool NativeInstruction::is_call() { + if (SafePatch) { + return is_op(int_at(20), Assembler::op_call) && + ((is_op(int_at(0), Assembler::op_ldi) && + is_op(int_at(4), Assembler::op_br) && + is_op(int_at(16), Assembler::op_ldl)) || + (is_op(int_at(0), Assembler::op_br) && + is_op(int_at(12), Assembler::op_ldl) && + is_op(int_at(16), Assembler::op_ldi))); + } else { + if (is_op(int_at(0), Assembler::op_ldi) && + is_op(int_at(4), Assembler::op_slll_l) && + is_op(int_at(8), Assembler::op_ldih) && + is_op(int_at(12), Assembler::op_ldi) && + is_op(int_at(16), Assembler::op_call)) + return true; + } + + if (is_op(int_at(0), Assembler::op_ldih) && + is_op(int_at(4), Assembler::op_ldi) && + is_op(int_at(8), Assembler::op_slll_l) && + is_op(int_at(12), Assembler::op_ldih) && + is_op(int_at(16), Assembler::op_ldi) && + is_op(int_at(16), Assembler::op_call)) + return true; + + // Unimplemented(); + return false; +} +inline bool NativeInstruction::is_call_reg() { + return is_op(int_at(0), Assembler::op_call); +} +inline bool NativeInstruction::is_return() { + return is_op(int_at(NativeMovConstReg::instruction_size), Assembler::op_ret); +} +inline bool NativeInstruction::is_jump() { + if (SafePatch) { + return is_op(int_at(20), Assembler::op_jmp) && + ((is_op(int_at(0), Assembler::op_ldi) && + is_op(int_at(4), Assembler::op_br) && + is_op(int_at(16), Assembler::op_ldl)) || + (is_op(int_at(0), Assembler::op_br) && + is_op(int_at(12), Assembler::op_ldl) && + is_op(int_at(16), Assembler::op_ldi))); + } else { + if (is_op(int_at(0), Assembler::op_ldi) && + is_op(int_at(4), Assembler::op_slll_l) && + is_op(int_at(8), Assembler::op_ldih) && + is_op(int_at(12), Assembler::op_ldi)) + return true; + } + + if (is_op(int_at(0), Assembler::op_ldih) && + is_op(int_at(4), Assembler::op_ldi) && + is_op(int_at(8), Assembler::op_slll_l) && + is_op(int_at(12), Assembler::op_ldih) && + is_op(int_at(16), Assembler::op_ldi)) + return true; + + // Unimplemented(); + return false; +} +inline bool NativeInstruction::is_jump_reg() { + return is_op(int_at(0), Assembler::op_jmp); +} +inline bool NativeInstruction::is_safepoint_poll() { + //Unimplemented(); + //refer to relocInfo::poll_return_type in sw64.ad + int x = int_at(0); + int op = Assembler::sw2_op(x); + if (op != Assembler::op_ldw) return false; + + Register ra = Assembler::sw2_ra(x); + if (ra != rscratch3) return false; //TODO:refactor jzy + + int mdisp = Assembler::sw2_mdisp(x); + if (mdisp != 0) return false; + + return true; +} + +inline bool NativeInstruction::is_mov_ptr() { + //wait_until_not_spinng(); + if ((is_op(int_at(0), Assembler::op_ldi) && + is_op(int_at(4), Assembler::op_slll_l) && + is_op(int_at(8), Assembler::op_ldih) && + is_op(int_at(12), Assembler::op_ldi)) || + (is_op(int_at(0), Assembler::op_ldih) && + is_op(int_at(4), Assembler::op_ldi) && + is_op(int_at(8), Assembler::op_zapnot_l)) || + (is_op(int_at(0), Assembler::op_ldi) && + is_op(int_at(4), Assembler::op_br) && + is_op(int_at(16), Assembler::op_ldl))|| + (is_op(int_at(0), Assembler::op_br) && + is_op(int_at(12), Assembler::op_ldl) && + is_op(int_at(16), Assembler::op_ldi) )){ + return true; + } + return false; +} + +#endif // CPU_SW64_VM_NATIVEINST_SW64_HPP diff --git a/src/hotspot/cpu/sw64/registerMap_sw64.cpp b/src/hotspot/cpu/sw64/registerMap_sw64.cpp new file mode 100644 index 00000000000..59201d5d84f --- /dev/null +++ b/src/hotspot/cpu/sw64/registerMap_sw64.cpp @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, Arm Limited. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +#include "precompiled.hpp" +#include "runtime/registerMap.hpp" +#include "vmreg_sw64.inline.hpp" + +address RegisterMap::pd_location(VMReg base_reg, int slot_idx) const { + if (base_reg->is_FloatRegister()) { + // Not all physical slots of an SVE register have corresponding + // VMRegs. However they are always saved to the stack in a + // contiguous region of memory so we can calculate the address of + // the upper slots by offsetting from the base address. + assert(base_reg->is_concrete(), "must pass base reg"); + int base_reg_enc = (base_reg->value() - ConcreteRegisterImpl::max_gpr) / + FloatRegisterImpl::max_slots_per_register; + intptr_t offset_in_bytes = slot_idx * VMRegImpl::stack_slot_size; + address base_location = location(base_reg); + if (base_location != NULL) { + return base_location + offset_in_bytes; + } else { + return NULL; + } + } else { + return location(base_reg->next(slot_idx)); + } +} diff --git a/src/hotspot/cpu/sw64/registerMap_sw64.hpp b/src/hotspot/cpu/sw64/registerMap_sw64.hpp new file mode 100644 index 00000000000..8bb48f6732a --- /dev/null +++ b/src/hotspot/cpu/sw64/registerMap_sw64.hpp @@ -0,0 +1,47 @@ +/* + * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SW64_VM_REGISTERMAP_SW64_HPP +#define CPU_SW64_VM_REGISTERMAP_SW64_HPP + +// machine-dependent implemention for register maps + friend class frame; + + private: + // This is the hook for finding a register in an "well-known" location, + // such as a register block of a predetermined format. + // Since there is none, we just return NULL. + // See registerMap_sparc.hpp for an example of grabbing registers + // from register save areas of a standard layout. + address pd_location(VMReg reg) const {return NULL;} + + address pd_location(VMReg base_reg, int slot_idx) const; + + // no PD state to clear or copy: + void pd_clear() {} + void pd_initialize() {} + void pd_initialize_from(const RegisterMap* map) {} + +#endif // CPU_SW64_VM_REGISTERMAP_SW64_HPP diff --git a/src/hotspot/cpu/sw64/register_definitions_sw64.cpp b/src/hotspot/cpu/sw64/register_definitions_sw64.cpp new file mode 100644 index 00000000000..8772d7a3c53 --- /dev/null +++ b/src/hotspot/cpu/sw64/register_definitions_sw64.cpp @@ -0,0 +1,224 @@ +/* + * Copyright (c) 2002, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "asm/register.hpp" +#include "register_sw64.hpp" +# include "interp_masm_sw64.hpp" + +REGISTER_DEFINITION(Register, noreg); + +REGISTER_DEFINITION(Register, i0); +REGISTER_DEFINITION(Register, i1); +REGISTER_DEFINITION(Register, i2); +REGISTER_DEFINITION(Register, i3); +REGISTER_DEFINITION(Register, i4); +REGISTER_DEFINITION(Register, i5); +REGISTER_DEFINITION(Register, i6); +REGISTER_DEFINITION(Register, i7); +REGISTER_DEFINITION(Register, i8); +REGISTER_DEFINITION(Register, i9); +REGISTER_DEFINITION(Register, i10); +REGISTER_DEFINITION(Register, i11); +REGISTER_DEFINITION(Register, i12); +REGISTER_DEFINITION(Register, i13); +REGISTER_DEFINITION(Register, i14); +REGISTER_DEFINITION(Register, i15); +REGISTER_DEFINITION(Register, i16); +REGISTER_DEFINITION(Register, i17); +REGISTER_DEFINITION(Register, i18); +REGISTER_DEFINITION(Register, i19); +REGISTER_DEFINITION(Register, i20); +REGISTER_DEFINITION(Register, i21); +REGISTER_DEFINITION(Register, i22); +REGISTER_DEFINITION(Register, i23); +REGISTER_DEFINITION(Register, i24); +REGISTER_DEFINITION(Register, i25); +REGISTER_DEFINITION(Register, i26); +REGISTER_DEFINITION(Register, i27); +REGISTER_DEFINITION(Register, i28); +REGISTER_DEFINITION(Register, i29); +REGISTER_DEFINITION(Register, i30); +REGISTER_DEFINITION(Register, i31); +REGISTER_DEFINITION(Register, sp); + +REGISTER_DEFINITION(FloatRegister, fnoreg); +REGISTER_DEFINITION(FloatRegister, f0); +REGISTER_DEFINITION(FloatRegister, f1); +REGISTER_DEFINITION(FloatRegister, f2); +REGISTER_DEFINITION(FloatRegister, f3); +REGISTER_DEFINITION(FloatRegister, f4); +REGISTER_DEFINITION(FloatRegister, f5); +REGISTER_DEFINITION(FloatRegister, f6); +REGISTER_DEFINITION(FloatRegister, f7); +REGISTER_DEFINITION(FloatRegister, f8); +REGISTER_DEFINITION(FloatRegister, f9); +REGISTER_DEFINITION(FloatRegister, f10); +REGISTER_DEFINITION(FloatRegister, f11); +REGISTER_DEFINITION(FloatRegister, f12); +REGISTER_DEFINITION(FloatRegister, f13); +REGISTER_DEFINITION(FloatRegister, f14); +REGISTER_DEFINITION(FloatRegister, f15); +REGISTER_DEFINITION(FloatRegister, f16); +REGISTER_DEFINITION(FloatRegister, f17); +REGISTER_DEFINITION(FloatRegister, f18); +REGISTER_DEFINITION(FloatRegister, f19); +REGISTER_DEFINITION(FloatRegister, f20); +REGISTER_DEFINITION(FloatRegister, f21); +REGISTER_DEFINITION(FloatRegister, f22); +REGISTER_DEFINITION(FloatRegister, f23); +REGISTER_DEFINITION(FloatRegister, f24); +REGISTER_DEFINITION(FloatRegister, f25); +REGISTER_DEFINITION(FloatRegister, f26); +REGISTER_DEFINITION(FloatRegister, f27); +REGISTER_DEFINITION(FloatRegister, f28); +REGISTER_DEFINITION(FloatRegister, f29); +REGISTER_DEFINITION(FloatRegister, f30); +REGISTER_DEFINITION(FloatRegister, f31); + +REGISTER_DEFINITION(Register, A0); +REGISTER_DEFINITION(Register, A1); +REGISTER_DEFINITION(Register, A2); +REGISTER_DEFINITION(Register, A3); +REGISTER_DEFINITION(Register, A4); +REGISTER_DEFINITION(Register, A5); + +REGISTER_DEFINITION(FloatRegister, F16); +REGISTER_DEFINITION(FloatRegister, F17); +REGISTER_DEFINITION(FloatRegister, F18); +REGISTER_DEFINITION(FloatRegister, F19); +REGISTER_DEFINITION(FloatRegister, F20); +REGISTER_DEFINITION(FloatRegister, F21); + +REGISTER_DEFINITION(Register, zr); +REGISTER_DEFINITION(Register, c_rarg0); +REGISTER_DEFINITION(Register, c_rarg1); +REGISTER_DEFINITION(Register, c_rarg2); +REGISTER_DEFINITION(Register, c_rarg3); +REGISTER_DEFINITION(Register, c_rarg4); +REGISTER_DEFINITION(Register, c_rarg5); + +REGISTER_DEFINITION(FloatRegister, c_farg0); +REGISTER_DEFINITION(FloatRegister, c_farg1); +REGISTER_DEFINITION(FloatRegister, c_farg2); +REGISTER_DEFINITION(FloatRegister, c_farg3); +REGISTER_DEFINITION(FloatRegister, c_farg4); +REGISTER_DEFINITION(FloatRegister, c_farg5); + +REGISTER_DEFINITION(Register, j_rarg0);//A1 +REGISTER_DEFINITION(Register, j_rarg1); +REGISTER_DEFINITION(Register, j_rarg2); +REGISTER_DEFINITION(Register, j_rarg3); +REGISTER_DEFINITION(Register, j_rarg4);//A5 +REGISTER_DEFINITION(Register, j_rarg5);//A0 + +REGISTER_DEFINITION(FloatRegister, j_farg0);//F16 +REGISTER_DEFINITION(FloatRegister, j_farg1); +REGISTER_DEFINITION(FloatRegister, j_farg2); +REGISTER_DEFINITION(FloatRegister, j_farg3); +REGISTER_DEFINITION(FloatRegister, j_farg4); +REGISTER_DEFINITION(FloatRegister, j_farg5);//F21 + +REGISTER_DEFINITION(Register, rscratch1); //t5 +REGISTER_DEFINITION(Register, rscratch2); //t6 + +REGISTER_DEFINITION(Register, rscratch3); //t11 +REGISTER_DEFINITION(Register, rscratch4); //at + +REGISTER_DEFINITION(Register, rscratch1_GP); //GP +REGISTER_DEFINITION(Register, rscratch2_AT); //AT +REGISTER_DEFINITION(Register, rdispatch); //t8 +REGISTER_DEFINITION(Register, rnext); //t10, jdk8 use s1 +REGISTER_DEFINITION(Register, rmonitors); //t11 +REGISTER_DEFINITION(Register, pv); //t12 +//REGISTER_DEFINITION(Register, rcpool); //t12, ok?? + +REGISTER_DEFINITION(Register, rbcp); //s0, consist with jdk8 +REGISTER_DEFINITION(Register, rlocals); //s1, jdk8 use s5 +REGISTER_DEFINITION(Register, rthread); //s2, consist with jdk8 +REGISTER_DEFINITION(Register, rmethod); //s3, consist with jdk8 +REGISTER_DEFINITION(Register, rsender); //s4, consist with jdk8 +REGISTER_DEFINITION(Register, rheapbase); //s5, jdk8 use t5 +REGISTER_DEFINITION(Register, rcc); //gp + +REGISTER_DEFINITION(Register, RA); +REGISTER_DEFINITION(Register, esp); +REGISTER_DEFINITION(Register, lr); +REGISTER_DEFINITION(Register, rfp); + +REGISTER_DEFINITION(Register, FSR); //v0, First Stack Register +REGISTER_DEFINITION(Register, SSR); //t4, Second Stack Register + +REGISTER_DEFINITION(FloatRegister, FSF); //f0, First Stack Float +REGISTER_DEFINITION(FloatRegister, SSF); //f1, Second Stack Float +REGISTER_DEFINITION(FloatRegister, FTF); //f14, Float temp?? +REGISTER_DEFINITION(FloatRegister, FcmpRES);//f29, TODO:need delete jzy +REGISTER_DEFINITION(FloatRegister, fcc);//f29 +REGISTER_DEFINITION(FloatRegister, fscratch1);//f28 +REGISTER_DEFINITION(FloatRegister, fzero);//f31 + +REGISTER_DEFINITION(Register, V0); +REGISTER_DEFINITION(Register, T0); +REGISTER_DEFINITION(Register, T1); +REGISTER_DEFINITION(Register, T2); +REGISTER_DEFINITION(Register, T3); +REGISTER_DEFINITION(Register, T4); +REGISTER_DEFINITION(Register, T5); +REGISTER_DEFINITION(Register, T6); +REGISTER_DEFINITION(Register, T7); +REGISTER_DEFINITION(Register, S0); +REGISTER_DEFINITION(Register, S1); +REGISTER_DEFINITION(Register, S2); +REGISTER_DEFINITION(Register, S3); +REGISTER_DEFINITION(Register, S4); +REGISTER_DEFINITION(Register, S5); +REGISTER_DEFINITION(Register, T8); +REGISTER_DEFINITION(Register, T9); +REGISTER_DEFINITION(Register, T10); +REGISTER_DEFINITION(Register, T11); +REGISTER_DEFINITION(Register, T12); +REGISTER_DEFINITION(Register, AT); +REGISTER_DEFINITION(Register, GP); +REGISTER_DEFINITION(Register, R0); + +// x86 GPR simulation +REGISTER_DEFINITION(Register, rax); +REGISTER_DEFINITION(Register, rcx); +REGISTER_DEFINITION(Register, rdx); +REGISTER_DEFINITION(Register, rbx); +REGISTER_DEFINITION(Register, rsi); +REGISTER_DEFINITION(Register, rdi); +REGISTER_DEFINITION(Register, rbp); +REGISTER_DEFINITION(Register, rsp); +REGISTER_DEFINITION(Register, r8); +REGISTER_DEFINITION(Register, r9); +REGISTER_DEFINITION(Register, r10); +REGISTER_DEFINITION(Register, r11); +REGISTER_DEFINITION(Register, r12); +REGISTER_DEFINITION(Register, r13); +REGISTER_DEFINITION(Register, r14); +REGISTER_DEFINITION(Register, r15); \ No newline at end of file diff --git a/src/hotspot/cpu/sw64/register_sw64.cpp b/src/hotspot/cpu/sw64/register_sw64.cpp new file mode 100644 index 00000000000..b3c2870071d --- /dev/null +++ b/src/hotspot/cpu/sw64/register_sw64.cpp @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "register_sw64.hpp" + +const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers << 1; + +const int ConcreteRegisterImpl::max_fpr + = ConcreteRegisterImpl::max_gpr + (FloatRegisterImpl::number_of_registers << 1); + +const char* RegisterImpl::name() const { + const char* names[number_of_registers] = { + "V0", "T0", "T1", "T2", "T3", "T4", "T5", "T6", "T7", + "S0", "S1", "S2", "S3", "S4", "S5", + "rfp", "A0", "A1", "A2", "A3", "A4", "A5", + "T8", "T9", "T10", "T11", + "RA", "T12", "AT", "GP", "esp", "Zero" + }; + return is_valid() ? names[encoding()] : "noreg"; +} + +const char* FloatRegisterImpl::name() const { + const char* names[number_of_registers] = { + "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", + "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15", + "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", + "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31" + }; + return is_valid() ? names[encoding()] : "noreg"; +} diff --git a/src/hotspot/cpu/sw64/register_sw64.hpp b/src/hotspot/cpu/sw64/register_sw64.hpp new file mode 100644 index 00000000000..4894f3fb7f9 --- /dev/null +++ b/src/hotspot/cpu/sw64/register_sw64.hpp @@ -0,0 +1,262 @@ +/* + * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SW64_VM_REGISTER_SW64_HPP +#define CPU_SW64_VM_REGISTER_SW64_HPP + +#include "asm/register.hpp" + +class VMRegImpl; +typedef VMRegImpl* VMReg; + +// Use Register as shortcut +class RegisterImpl; +typedef RegisterImpl* Register; + +inline Register as_Register(int encoding) { + return (Register)(intptr_t) encoding; +} + +class RegisterImpl: public AbstractRegisterImpl { + public: + enum { + number_of_registers = 32, + number_of_byte_registers = 32, + number_of_registers_for_jvmci = 34 // Including SP and ZR. + }; + + // derived registers, offsets, and addresses + Register successor() const { return as_Register(encoding() + 1); } + + // construction + inline friend Register as_Register(int encoding); + + VMReg as_VMReg(); + + // accessors + int encoding() const { assert(is_valid(), "invalid register"); return (intptr_t)this; } + bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; } + const char* name() const; + int encoding_nocheck() const { return (intptr_t)this; } + + // Return the bit which represents this register. This is intended + // to be ORed into a bitmask: for usage see class RegSet below. + unsigned long bit(bool should_set = true) const { return should_set ? 1 << encoding() : 0; } +}; + +// The integer registers of the sw64 architecture + +CONSTANT_REGISTER_DECLARATION(Register, noreg, (-1)); + + +CONSTANT_REGISTER_DECLARATION(Register, i0, (0)); +CONSTANT_REGISTER_DECLARATION(Register, i1, (1)); +CONSTANT_REGISTER_DECLARATION(Register, i2, (2)); +CONSTANT_REGISTER_DECLARATION(Register, i3, (3)); +CONSTANT_REGISTER_DECLARATION(Register, i4, (4)); +CONSTANT_REGISTER_DECLARATION(Register, i5, (5)); +CONSTANT_REGISTER_DECLARATION(Register, i6, (6)); +CONSTANT_REGISTER_DECLARATION(Register, i7, (7)); +CONSTANT_REGISTER_DECLARATION(Register, i8, (8)); +CONSTANT_REGISTER_DECLARATION(Register, i9, (9)); +CONSTANT_REGISTER_DECLARATION(Register, i10, (10)); +CONSTANT_REGISTER_DECLARATION(Register, i11, (11)); +CONSTANT_REGISTER_DECLARATION(Register, i12, (12)); +CONSTANT_REGISTER_DECLARATION(Register, i13, (13)); +CONSTANT_REGISTER_DECLARATION(Register, i14, (14)); +CONSTANT_REGISTER_DECLARATION(Register, i15, (15)); +CONSTANT_REGISTER_DECLARATION(Register, i16, (16)); +CONSTANT_REGISTER_DECLARATION(Register, i17, (17)); +CONSTANT_REGISTER_DECLARATION(Register, i18, (18)); +CONSTANT_REGISTER_DECLARATION(Register, i19, (19)); +CONSTANT_REGISTER_DECLARATION(Register, i20, (20)); +CONSTANT_REGISTER_DECLARATION(Register, i21, (21)); +CONSTANT_REGISTER_DECLARATION(Register, i22, (22)); +CONSTANT_REGISTER_DECLARATION(Register, i23, (23)); +CONSTANT_REGISTER_DECLARATION(Register, i24, (24)); +CONSTANT_REGISTER_DECLARATION(Register, i25, (25)); +CONSTANT_REGISTER_DECLARATION(Register, i26, (26)); +CONSTANT_REGISTER_DECLARATION(Register, i27, (27)); +CONSTANT_REGISTER_DECLARATION(Register, i28, (28)); +CONSTANT_REGISTER_DECLARATION(Register, i29, (29)); +CONSTANT_REGISTER_DECLARATION(Register, i30, (30)); +CONSTANT_REGISTER_DECLARATION(Register, i31, (31)); + + +// r31 is not a general purpose register, but represents either the +// stack pointer or the zero/discard register depending on the +// instruction. +//CONSTANT_REGISTER_DECLARATION(Register, r31_sp, (31)); +CONSTANT_REGISTER_DECLARATION(Register, zr, (31)); +CONSTANT_REGISTER_DECLARATION(Register, sp, (30)); + +// Used as a filler in instructions where a register field is unused. +const Register dummy_reg = zr; + +// Use FloatRegister as shortcut +class FloatRegisterImpl; +typedef FloatRegisterImpl* FloatRegister; + +inline FloatRegister as_FloatRegister(int encoding) { + return (FloatRegister)(intptr_t) encoding; +} + +// The implementation of floating point registers for the architecture +class FloatRegisterImpl: public AbstractRegisterImpl { + public: + enum { + float_arg_base = 16, + number_of_registers = 32, + max_slots_per_register = 8 + }; + + // construction + inline friend FloatRegister as_FloatRegister(int encoding); + + VMReg as_VMReg(); + + // derived registers, offsets, and addresses + FloatRegister successor() const { return as_FloatRegister(encoding() + 1); } + + // accessors + int encoding() const { assert(is_valid(), "invalid register"); return (intptr_t)this; } + int encoding_nocheck() const { return (intptr_t)this; } + bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; } + const char* name() const; + +}; + +// The float registers of the SW64 architecture +CONSTANT_REGISTER_DECLARATION(FloatRegister, fnoreg , (-1)); + +CONSTANT_REGISTER_DECLARATION(FloatRegister, f0 , ( 0)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f1 , ( 1)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f2 , ( 2)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f3 , ( 3)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f4 , ( 4)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f5 , ( 5)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f6 , ( 6)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f7 , ( 7)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f8 , ( 8)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f9 , ( 9)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f10 , (10)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f11 , (11)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f12 , (12)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f13 , (13)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f14 , (14)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f15 , (15)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f16 , (16)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f17 , (17)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f18 , (18)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f19 , (19)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f20 , (20)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f21 , (21)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f22 , (22)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f23 , (23)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f24 , (24)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f25 , (25)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f26 , (26)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f27 , (27)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f28 , (28)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f29 , (29)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f30 , (30)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f31 , (31)); + +// Need to know the total number of registers of all sorts for SharedInfo. +// Define a class that exports it. +class ConcreteRegisterImpl : public AbstractRegisterImpl { + public: + enum { + // A big enough number for C2: all the registers plus flags + // This number must be large enough to cover REG_COUNT (defined by c2) registers. + // There is no requirement that any ordering here matches any ordering c2 gives + // it's optoregs. + + number_of_registers = (2 * RegisterImpl::number_of_registers + + 4 * FloatRegisterImpl::number_of_registers + + 1) // flags + }; + + // added to make it compile + static const int max_gpr; + static const int max_fpr; +}; + +// A set of registers +class RegSet { + uint32_t _bitset; + + RegSet(uint32_t bitset) : _bitset(bitset) { } + +public: + + RegSet() : _bitset(0) { } + + RegSet(Register r1) : _bitset(r1->bit()) { } + + RegSet operator+(const RegSet aSet) const { + RegSet result(_bitset | aSet._bitset); + return result; + } + + RegSet operator-(const RegSet aSet) const { + RegSet result(_bitset & ~aSet._bitset); + return result; + } + + RegSet &operator+=(const RegSet aSet) { + *this = *this + aSet; + return *this; + } + + static RegSet of(Register r1) { + return RegSet(r1); + } + + static RegSet of(Register r1, Register r2) { + return of(r1) + r2; + } + + static RegSet of(Register r1, Register r2, Register r3) { + return of(r1, r2) + r3; + } + + static RegSet of(Register r1, Register r2, Register r3, Register r4) { + return of(r1, r2, r3) + r4; + } + + static RegSet range(Register start, Register end) { + uint32_t bits = ~0; + bits <<= start->encoding(); + bits <<= 31 - end->encoding(); + bits >>= 31 - end->encoding(); + + return RegSet(bits); + } + + uint32_t bits() const { return _bitset; } +}; + +#endif // CPU_SW64_VM_REGISTER_SW64_HPP diff --git a/src/hotspot/cpu/sw64/relocInfo_sw64.cpp b/src/hotspot/cpu/sw64/relocInfo_sw64.cpp new file mode 100644 index 00000000000..855e608dc3d --- /dev/null +++ b/src/hotspot/cpu/sw64/relocInfo_sw64.cpp @@ -0,0 +1,133 @@ +/* + * Copyright (c) 1998, 2017, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "code/relocInfo.hpp" +#include "memory/universe.hpp" +#include "compiler/disassembler.hpp" +#include "nativeInst_sw64.hpp" +#include "oops/compressedOops.inline.hpp" +#include "oops/klass.inline.hpp" +#include "oops/oop.inline.hpp" +#include "runtime/safepoint.hpp" +#include "runtime/safepointMechanism.hpp" + + +void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) { + x += o; + typedef Assembler::WhichOperand WhichOperand; + WhichOperand which = (WhichOperand) format(); // that is, disp32 or imm, call32, narrow oop + assert(which == Assembler::disp32_operand || + which == Assembler::narrow_oop_operand || + which == Assembler::imm_operand, "format unpacks ok"); + if (which == Assembler::imm_operand) { + if (verify_only) { + assert(nativeMovConstReg_at(addr())->data() == (long)x, "instructions must match"); + } else { + nativeMovConstReg_at(addr())->set_data((intptr_t)(x)); + } + } else if (which == Assembler::narrow_oop_operand) { +// Unimplemented(); + // both compressed oops and compressed classes look the same + if (CompressedOops::is_in((void*)x)) { + uint32_t encoded = CompressedOops::narrow_oop_value(cast_to_oop(x)); + if (verify_only) { + assert((int32_t)nativeMovConstReg_at(addr())->data() == (int32_t)encoded, "instructions must match"); + } else { + nativeMovConstReg_at(addr())->set_data(encoded); + } + } else { + if (verify_only) { + assert((int32_t)nativeMovConstReg_at(addr())->data() == (int32_t)CompressedKlassPointers::encode((Klass*)x), "instructions must match"); + } else { + nativeMovConstReg_at(addr())->set_data((intptr_t)(CompressedKlassPointers::encode((Klass*)x))); + } + } + } else { + // Note: Use runtime_call_type relocations for call32_operand. + Unimplemented(); + assert(0, "call32_operand not supported in SW64"); + } +} + + +//NOTICE HERE, this relocate is not need for SW64, since SW64 USE abosolutly target, +//Maybe We should FORGET CALL RELOCATION +address Relocation::pd_call_destination(address orig_addr) { + NativeInstruction* ni = nativeInstruction_at(addr()); + if (ni->is_call()) { + return nativeCall_at(addr())->destination(); + } else if (ni->is_jump()) { + return nativeJump_at(addr())->jump_destination(); + } else { + tty->print_cr("\nError!\ncall destination: 0x%lx", (long)addr()); + Disassembler::decode(addr() - 10 * 4, addr() + 10 * 4, tty); + Unimplemented(); + return NULL; + } +} + + +void Relocation::pd_set_call_destination(address x) { + NativeInstruction* ni = nativeInstruction_at(addr()); + if (ni->is_call()) { + nativeCall_at(addr())->set_destination(x); + } else if (ni->is_jump()) { + NativeJump* nj = nativeJump_at(addr()); + + // Unresolved jumps are recognized by a destination of -1 + // However 64bit can't actually produce such an address + // and encodes a jump to self but jump_destination will + // return a -1 as the signal. We must not relocate this + // jmp or the ic code will not see it as unresolved. + + if (nj->jump_destination() == (address) -1) { + x = addr(); // jump to self + } + nj->set_jump_destination(x); + } else { + ShouldNotReachHere(); + } +} + + +address* Relocation::pd_address_in_code() { + Unimplemented(); + return (address*)addr(); +} + + +address Relocation::pd_get_address_from_code() { + NativeMovConstReg* ni = nativeMovConstReg_at(addr()); + return (address)ni->data(); +} + + + +void poll_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) { +} + +void metadata_Relocation::pd_fix_value(address x) { +} diff --git a/src/hotspot/cpu/sw64/relocInfo_sw64.hpp b/src/hotspot/cpu/sw64/relocInfo_sw64.hpp new file mode 100644 index 00000000000..bebf11b307e --- /dev/null +++ b/src/hotspot/cpu/sw64/relocInfo_sw64.hpp @@ -0,0 +1,45 @@ +/* + * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SW64_VM_RELOCINFO_SW64_HPP +#define CPU_SW64_VM_RELOCINFO_SW64_HPP + + // machine-dependent parts of class relocInfo + private: + enum { + // Since SW64 instructions are whole words, + // the two low-order offset bits can always be discarded. + offset_unit = 4, + + // imm_oop_operand vs. narrow_oop_operand + format_width = 2 + }; + + public: + + // This platform has no oops in the code that are not also + // listed in the oop section. + static bool mustIterateImmediateOopsInCode() { return false; } + +#endif // CPU_SW64_VM_RELOCINFO_SW64_HPP diff --git a/src/hotspot/cpu/sw64/runtime_sw64.cpp b/src/hotspot/cpu/sw64/runtime_sw64.cpp new file mode 100644 index 00000000000..c33432fabb8 --- /dev/null +++ b/src/hotspot/cpu/sw64/runtime_sw64.cpp @@ -0,0 +1,187 @@ +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#ifdef COMPILER2 +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "code/vmreg.hpp" +#include "interpreter/interpreter.hpp" +#include "opto/runtime.hpp" +#include "runtime/interfaceSupport.inline.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/vframeArray.hpp" +#include "utilities/globalDefinitions.hpp" +#include "vmreg_sw64.inline.hpp" +#endif + +#define __ masm-> + +//-------------- generate_exception_blob ----------- +// creates _exception_blob. +// The exception blob is jumped to from a compiled method. +// (see emit_exception_handler in sparc.ad file) +// +// Given an exception pc at a call we call into the runtime for the +// handler in this method. This handler might merely restore state +// (i.e. callee save registers) unwind the frame and jump to the +// exception handler for the nmethod if there is no Java level handler +// for the nmethod. +// +// This code is entered with a jump, and left with a jump. +// +// Arguments: +// V0: exception oop +// T4: exception pc +// +// Results: +// A0: exception oop +// A1: exception pc in caller or ??? +// jumps to: exception handler of caller +// +// Note: the exception pc MUST be at a call (precise debug information) +// +// [stubGenerator_sw64.cpp] generate_forward_exception() +// |- V0, T4 are created +// |- T12 <= SharedRuntime::exception_handler_for_return_address +// `- jr T12 +// `- the caller's exception_handler +// `- jr OptoRuntime::exception_blob +// `- here +// +//void OptoRuntime::generate_exception_blob() { +// // Capture info about frame layout +// enum layout { +// fp_off, +// return_off, // slot for return address +// framesize +// }; +// +// // allocate space for the code +// ResourceMark rm; +// // setup code generation tools +// CodeBuffer buffer("exception_blob", 5120, 5120); +// MacroAssembler* masm = new MacroAssembler(&buffer); +// +// +// address start = __ pc(); +// +// __ addiu(esp, -1 * framesize * wordSize, esp); // Prolog! +// +// // this frame will be treated as the original caller method. +// // So, the return pc should be filled with the original exception pc. +// // ref: X86's implementation +// __ stl(T4, return_off *wordSize, esp); // return address +// __ stl(rfp, fp_off *wordSize, esp); +// +// // Save callee saved registers. None for UseSSE=0, +// // floats-only for UseSSE=1, and doubles for UseSSE=2. +// +// __ addiu(esp, fp_off * wordSize, rfp); +// +// // Store exception in Thread object. We cannot pass any arguments to the +// // handle_exception call, since we do not want to make any assumption +// // about the size of the frame where the exception happened in. +// Register thread = rthread; +// +// __ std(V0, Address(thread, JavaThread::exception_oop_offset())); +// __ std(T4, Address(thread, JavaThread::exception_pc_offset())); +// +// // This call does all the hard work. It checks if an exception handler +// // exists in the method. +// // If so, it returns the handler address. +// // If not, it prepares for stack-unwinding, restoring the callee-save +// // registers of the frame being removed. +// //no matching function for call to 'MacroAssembler::set_last_Java_frame(RegisterImpl*&, RegisterImpl* const&, RegisterImpl* const&, address) +//// __ set_last_Java_frame(thread, noreg, noreg, (address)NULL); +// +// __ mov(AT, -(StackAlignmentInBytes)); +// __ andr(esp, esp, AT); // Fix stack alignment as required by ABI +// +//#ifdef ZHJ20180909 +// __ relocate(relocInfo::internal_pc_type); +// { +// // patchable_set48 (4) + sd (1) + move (1) + patchable_call_setfpec1 +// long save_pc = (long)__ pc() + 24 + NativeCall::return_address_offset; +// __ patchable_set48(AT, save_pc); +// } +//#else +// { +// // addl (1) + sd (1) + move(1) + patchable_call_setfpec1 +// intptr_t patch_off = 3 * BytesPerInstWord + NativeCall::return_address_offset; +// __ br(AT, 0); +// __ addl(AT, patch_off, AT); +// } +//#endif +// __ stl(AT, in_bytes(JavaThread::last_Java_pc_offset()), thread); +// +// __ move(A0, thread); +// __ patchable_call_setfpec1((address)OptoRuntime::handle_exception_C); +// +// // Set an oopmap for the call site +// OopMapSet *oop_maps = new OopMapSet(); +// OopMap* map = new OopMap( framesize, 0 ); +// +// oop_maps->add_gc_map( __ offset() - 4, map); +// +// __ reset_last_Java_frame(thread, true); +// +// // Pop self-frame. +// __ leave(); // Epilog! +// +// // V0: exception handler +// +// // We have a handler in V0, (could be deopt blob) +// __ move(T12, V0); +// +// // Get the exception +// __ ld(A0, Address(thread, JavaThread::exception_oop_offset())); +// // Get the exception pc in case we are deoptimized +// __ ld(A1, Address(thread, JavaThread::exception_pc_offset())); +//#ifdef ASSERT +// __ std(R0, Address(thread, JavaThread::exception_handler_pc_offset())); +// __ std(R0, Address(thread, JavaThread::exception_pc_offset())); +//#endif +// // Clear the exception oop so GC no longer processes it as a root. +// __ std(R0, Address(thread, JavaThread::exception_oop_offset())); +// +// // Fix seg fault when running: +// // Eclipse + Plugin + Debug As +// // This is the only condition where C2 calls SharedRuntime::generate_deopt_blob() +// // +// __ move(V0, A0); +// __ move(T4, A1); +// +// // V0: exception oop +// // T12: exception handler +// // A1: exception pc +// __ jr(T12); +// +// // make sure all code is generated +// masm->flush(); +// +// _exception_blob = ExceptionBlob::create(&buffer, oop_maps, framesize); +//} diff --git a/src/hotspot/cpu/sw64/sharedRuntime_sw64.cpp b/src/hotspot/cpu/sw64/sharedRuntime_sw64.cpp new file mode 100644 index 00000000000..7f1e25d44ca --- /dev/null +++ b/src/hotspot/cpu/sw64/sharedRuntime_sw64.cpp @@ -0,0 +1,4421 @@ +/* + * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "code/debugInfoRec.hpp" +#include "code/icBuffer.hpp" +#include "code/nativeInst.hpp" +#include "code/vtableStubs.hpp" +#include "compiler/oopMap.hpp" +#include "gc/shared/collectedHeap.hpp" +#include "gc/shared/gcLocker.hpp" +#include "gc/shared/barrierSet.hpp" +#include "gc/shared/barrierSetAssembler.hpp" +#include "interpreter/interpreter.hpp" +#include "logging/log.hpp" +#include "memory/resourceArea.hpp" +#include "memory/universe.hpp" +#include "oops/compiledICHolder.hpp" +#include "oops/klass.inline.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/jniHandles.hpp" +#include "runtime/safepointMechanism.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/signature.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/vframeArray.hpp" +#include "runtime/vm_version.hpp" +#include "utilities/align.hpp" +#include "utilities/formatBuffer.hpp" +#include "utilities/macros.hpp" +#include "vmreg_sw64.inline.hpp" +#ifdef COMPILER1 +#include "c1/c1_Runtime1.hpp" +#endif +#ifdef COMPILER2 +#include "opto/runtime.hpp" +#endif +#if INCLUDE_JVMCI +#include "jvmci/jvmciJavaClasses.hpp" +#endif +#if INCLUDE_SHENANDOAHGC +#include "gc/shenandoah/shenandoahBarrierSet.hpp" +#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" +#endif + +#define __ masm-> + +#ifdef PRODUCT +#define BLOCK_COMMENT(str) /* nothing */ +#else +#define BLOCK_COMMENT(str) { char line[1024];sprintf(line,"%s:%s:%d",str,__FILE__, __LINE__); __ block_comment(line);} +#endif + +#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") + +const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size; + +class SimpleRuntimeFrame { + + public: + + // Most of the runtime stubs have this simple frame layout. + // This class exists to make the layout shared in one place. + // Offsets are for compiler stack slots, which are jints. + enum layout { + // The frame sender code expects that rbp will be in the "natural" place and + // will override any oopMap setting for it. We must therefore force the layout + // so that it agrees with the frame sender code. +// rfp_off = frame::arg_reg_save_area_bytes/BytesPerInt,//not understand? jzy + rfp_off = 0, + rfp_off2, + return_off, return_off2, + framesize + }; +}; + +class RegisterSaver { +public: + enum { FPU_regs_live = 32 }; + // Capture info about frame layout + enum layout { +#define DEF_LAYOUT_OFFS(regname) regname ## _off, regname ## H_off, + DEF_LAYOUT_OFFS(for_16_bytes_aligned) + DEF_LAYOUT_OFFS(fpr0) + DEF_LAYOUT_OFFS(fpr1) + DEF_LAYOUT_OFFS(fpr2) + DEF_LAYOUT_OFFS(fpr3) + DEF_LAYOUT_OFFS(fpr4) + DEF_LAYOUT_OFFS(fpr5) + DEF_LAYOUT_OFFS(fpr6) + DEF_LAYOUT_OFFS(fpr7) + DEF_LAYOUT_OFFS(fpr8) + DEF_LAYOUT_OFFS(fpr9) + DEF_LAYOUT_OFFS(fpr10) + DEF_LAYOUT_OFFS(fpr11) + DEF_LAYOUT_OFFS(fpr12) + DEF_LAYOUT_OFFS(fpr13) + DEF_LAYOUT_OFFS(fpr14) + DEF_LAYOUT_OFFS(fpr15) + DEF_LAYOUT_OFFS(fpr16) + DEF_LAYOUT_OFFS(fpr17) + DEF_LAYOUT_OFFS(fpr18) + DEF_LAYOUT_OFFS(fpr19) + DEF_LAYOUT_OFFS(fpr20) + DEF_LAYOUT_OFFS(fpr21) + DEF_LAYOUT_OFFS(fpr22) + DEF_LAYOUT_OFFS(fpr23) + DEF_LAYOUT_OFFS(fpr24) + DEF_LAYOUT_OFFS(fpr25) + DEF_LAYOUT_OFFS(fpr26) + DEF_LAYOUT_OFFS(fpr27) + DEF_LAYOUT_OFFS(fpr28) + DEF_LAYOUT_OFFS(fpr29) + DEF_LAYOUT_OFFS(fpr30) + DEF_LAYOUT_OFFS(fpr31) + + DEF_LAYOUT_OFFS(v0) + DEF_LAYOUT_OFFS(t0) + DEF_LAYOUT_OFFS(t1) + DEF_LAYOUT_OFFS(t2) + DEF_LAYOUT_OFFS(t3) + DEF_LAYOUT_OFFS(t4) + DEF_LAYOUT_OFFS(t5) + DEF_LAYOUT_OFFS(t6) + DEF_LAYOUT_OFFS(t7) + DEF_LAYOUT_OFFS(s0) + DEF_LAYOUT_OFFS(s1) + DEF_LAYOUT_OFFS(s2) + DEF_LAYOUT_OFFS(s3) + DEF_LAYOUT_OFFS(s4) + DEF_LAYOUT_OFFS(s5) + // rfp move down + DEF_LAYOUT_OFFS(a0) + DEF_LAYOUT_OFFS(a1) + DEF_LAYOUT_OFFS(a2) + DEF_LAYOUT_OFFS(a3) + DEF_LAYOUT_OFFS(a4) + DEF_LAYOUT_OFFS(a5) + DEF_LAYOUT_OFFS(t8) + DEF_LAYOUT_OFFS(t9) + DEF_LAYOUT_OFFS(t10) + DEF_LAYOUT_OFFS(t11) + // RA move down + DEF_LAYOUT_OFFS(t12) + // no AT + DEF_LAYOUT_OFFS(gp) + // no esp + // no R0 + DEF_LAYOUT_OFFS(fp) + DEF_LAYOUT_OFFS(return) + reg_save_size + }; + + public: + static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors = false); + static void restore_live_registers(MacroAssembler* masm, bool restore_vectors = false); + + //static int raOffset(void) { return return_off / 2; } + //static int methodOffset(void) { return s3_off / 2; } + //static int v0Offset(void) { return v0_off / 2; } + + //static int fpResultOffset_todelete(void) { ShouldNotReachHere();return fpr0_off / 2; } + static int v0_offset_in_bytes(void) { return BytesPerInt * v0_off; } + static int a2_offset_in_bytes(void) { return a2_off / 2; } + static int rmethod_offset_in_bytes(void) { return BytesPerInt * s3_off; } + static int fsf_offset_in_bytes(void) { return BytesPerInt * fpr0_off; } + static int return_offset_in_bytes(void) { return BytesPerInt * return_off; } + // During deoptimization only the result registers need to be restored, + // all the other values have already been extracted. + static void restore_result_registers(MacroAssembler* masm); +}; + +//put here becauseof RegisterSaver's layout +static void push_CPU_state(MacroAssembler* masm) { + __ subptr(esp, (RegisterSaver::reg_save_size-4) * jintSize, esp); + + __ fstd(f0, RegisterSaver::fpr0_off * jintSize, esp); __ fstd(f1, RegisterSaver::fpr1_off * jintSize, esp); + __ fstd(f2, RegisterSaver::fpr2_off * jintSize, esp); __ fstd(f3, RegisterSaver::fpr3_off * jintSize, esp); + __ fstd(f4, RegisterSaver::fpr4_off * jintSize, esp); __ fstd(f5, RegisterSaver::fpr5_off * jintSize, esp); + __ fstd(f6, RegisterSaver::fpr6_off * jintSize, esp); __ fstd(f7, RegisterSaver::fpr7_off * jintSize, esp); + __ fstd(f8, RegisterSaver::fpr8_off * jintSize, esp); __ fstd(f9, RegisterSaver::fpr9_off * jintSize, esp); + __ fstd(f10, RegisterSaver::fpr10_off * jintSize, esp); __ fstd(f11, RegisterSaver::fpr11_off * jintSize, esp); + __ fstd(f12, RegisterSaver::fpr12_off * jintSize, esp); __ fstd(f13, RegisterSaver::fpr13_off * jintSize, esp); + __ fstd(f14, RegisterSaver::fpr14_off * jintSize, esp); __ fstd(f15, RegisterSaver::fpr15_off * jintSize, esp); + __ fstd(f16, RegisterSaver::fpr16_off * jintSize, esp); __ fstd(f17, RegisterSaver::fpr17_off * jintSize, esp); + __ fstd(f18, RegisterSaver::fpr18_off * jintSize, esp); __ fstd(f19, RegisterSaver::fpr19_off * jintSize, esp); + __ fstd(f20, RegisterSaver::fpr20_off * jintSize, esp); __ fstd(f21, RegisterSaver::fpr21_off * jintSize, esp); + __ fstd(f22, RegisterSaver::fpr22_off * jintSize, esp); __ fstd(f23, RegisterSaver::fpr23_off * jintSize, esp); + __ fstd(f24, RegisterSaver::fpr24_off * jintSize, esp); __ fstd(f25, RegisterSaver::fpr25_off * jintSize, esp); + __ fstd(f26, RegisterSaver::fpr26_off * jintSize, esp); __ fstd(f27, RegisterSaver::fpr27_off * jintSize, esp); + __ fstd(f28, RegisterSaver::fpr28_off * jintSize, esp); __ fstd(f29, RegisterSaver::fpr29_off * jintSize, esp); + __ fstd(f30, RegisterSaver::fpr30_off * jintSize, esp); + + __ stl(V0, Address(esp, RegisterSaver::v0_off * jintSize)); + __ stl(i1, Address(esp, RegisterSaver::t0_off * jintSize)); + __ stl(i2, Address(esp, RegisterSaver::t1_off * jintSize)); + __ stl(i3, Address(esp, RegisterSaver::t2_off * jintSize)); + __ stl(i4, Address(esp, RegisterSaver::t3_off * jintSize)); + __ stl(i5, Address(esp, RegisterSaver::t4_off * jintSize)); + __ stl(i6, Address(esp, RegisterSaver::t5_off * jintSize)); + __ stl(i7, Address(esp, RegisterSaver::t6_off * jintSize)); + __ stl(i8, Address(esp, RegisterSaver::t7_off * jintSize)); + __ stl(i9, Address(esp, RegisterSaver::s0_off * jintSize)); + __ stl(i10, Address(esp, RegisterSaver::s1_off * jintSize)); + __ stl(i11, Address(esp, RegisterSaver::s2_off * jintSize)); + __ stl(i12, Address(esp, RegisterSaver::s3_off * jintSize)); + __ stl(i13, Address(esp, RegisterSaver::s4_off * jintSize)); + __ stl(i14, Address(esp, RegisterSaver::s5_off * jintSize)); + __ stl(i16, Address(esp, RegisterSaver::a0_off * jintSize)); + __ stl(i17, Address(esp, RegisterSaver::a1_off * jintSize)); + __ stl(i18, Address(esp, RegisterSaver::a2_off * jintSize)); + __ stl(i19, Address(esp, RegisterSaver::a3_off * jintSize)); + __ stl(i20, Address(esp, RegisterSaver::a4_off * jintSize)); + __ stl(i21, Address(esp, RegisterSaver::a5_off * jintSize)); + __ stl(i22, Address(esp, RegisterSaver::t8_off * jintSize)); + __ stl(i23, Address(esp, RegisterSaver::t9_off * jintSize)); + __ stl(i24, Address(esp, RegisterSaver::t10_off * jintSize)); + __ stl(i25, Address(esp, RegisterSaver::t11_off * jintSize)); + __ stl(i27, Address(esp, RegisterSaver::t12_off * jintSize)); + + __ stl(GP, Address(esp, RegisterSaver::gp_off * jintSize)); + //__ stl(rfp, Address(esp, RegisterSaver::fp_off * jintSize)); + //__ stl(RA, Address(esp, RegisterSaver::return_off * jintSize)); +} + +static void pop_CPU_state(MacroAssembler* masm) { + __ fldd(f0, RegisterSaver::fpr0_off * jintSize, esp); __ fldd(f1, RegisterSaver::fpr1_off * jintSize, esp); + __ fldd(f2, RegisterSaver::fpr2_off * jintSize, esp); __ fldd(f3, RegisterSaver::fpr3_off * jintSize, esp); + __ fldd(f4, RegisterSaver::fpr4_off * jintSize, esp); __ fldd(f5, RegisterSaver::fpr5_off * jintSize, esp); + __ fldd(f6, RegisterSaver::fpr6_off * jintSize, esp); __ fldd(f7, RegisterSaver::fpr7_off * jintSize, esp); + __ fldd(f8, RegisterSaver::fpr8_off * jintSize, esp); __ fldd(f9, RegisterSaver::fpr9_off * jintSize, esp); + __ fldd(f10, RegisterSaver::fpr10_off * jintSize, esp); __ fldd(f11, RegisterSaver::fpr11_off * jintSize, esp); + __ fldd(f12, RegisterSaver::fpr12_off * jintSize, esp); __ fldd(f13, RegisterSaver::fpr13_off * jintSize, esp); + __ fldd(f14, RegisterSaver::fpr14_off * jintSize, esp); __ fldd(f15, RegisterSaver::fpr15_off * jintSize, esp); + __ fldd(f16, RegisterSaver::fpr16_off * jintSize, esp); __ fldd(f17, RegisterSaver::fpr17_off * jintSize, esp); + __ fldd(f18, RegisterSaver::fpr18_off * jintSize, esp); __ fldd(f19, RegisterSaver::fpr19_off * jintSize, esp); + __ fldd(f20, RegisterSaver::fpr20_off * jintSize, esp); __ fldd(f21, RegisterSaver::fpr21_off * jintSize, esp); + __ fldd(f22, RegisterSaver::fpr22_off * jintSize, esp); __ fldd(f23, RegisterSaver::fpr23_off * jintSize, esp); + __ fldd(f24, RegisterSaver::fpr24_off * jintSize, esp); __ fldd(f25, RegisterSaver::fpr25_off * jintSize, esp); + __ fldd(f26, RegisterSaver::fpr26_off * jintSize, esp); __ fldd(f27, RegisterSaver::fpr27_off * jintSize, esp); + __ fldd(f28, RegisterSaver::fpr28_off * jintSize, esp); __ fldd(f29, RegisterSaver::fpr29_off * jintSize, esp); + __ fldd(f30, RegisterSaver::fpr30_off * jintSize, esp); + + __ ldl(V0, Address(esp, RegisterSaver::v0_off * jintSize)); + __ ldl(i1, Address(esp, RegisterSaver::t0_off * jintSize)); + __ ldl(i2, Address(esp, RegisterSaver::t1_off * jintSize)); + __ ldl(i3, Address(esp, RegisterSaver::t2_off * jintSize)); + __ ldl(i4, Address(esp, RegisterSaver::t3_off * jintSize)); + __ ldl(i5, Address(esp, RegisterSaver::t4_off * jintSize)); + __ ldl(i6, Address(esp, RegisterSaver::t5_off * jintSize)); + __ ldl(i7, Address(esp, RegisterSaver::t6_off * jintSize)); + __ ldl(i8, Address(esp, RegisterSaver::t7_off * jintSize)); + __ ldl(i9, Address(esp, RegisterSaver::s0_off * jintSize)); + __ ldl(i10, Address(esp, RegisterSaver::s1_off * jintSize)); + __ ldl(i11, Address(esp, RegisterSaver::s2_off * jintSize)); + __ ldl(i12, Address(esp, RegisterSaver::s3_off * jintSize)); + __ ldl(i13, Address(esp, RegisterSaver::s4_off * jintSize)); + __ ldl(i14, Address(esp, RegisterSaver::s5_off * jintSize)); + __ ldl(i16, Address(esp, RegisterSaver::a0_off * jintSize)); + __ ldl(i17, Address(esp, RegisterSaver::a1_off * jintSize)); + __ ldl(i18, Address(esp, RegisterSaver::a2_off * jintSize)); + __ ldl(i19, Address(esp, RegisterSaver::a3_off * jintSize)); + __ ldl(i20, Address(esp, RegisterSaver::a4_off * jintSize)); + __ ldl(i21, Address(esp, RegisterSaver::a5_off * jintSize)); + __ ldl(i22, Address(esp, RegisterSaver::t8_off * jintSize)); + __ ldl(i23, Address(esp, RegisterSaver::t9_off * jintSize)); + __ ldl(i24, Address(esp, RegisterSaver::t10_off * jintSize)); + __ ldl(i25, Address(esp, RegisterSaver::t11_off * jintSize)); + __ ldl(i27, Address(esp, RegisterSaver::t12_off * jintSize)); + + __ ldl(GP, Address(esp, RegisterSaver::gp_off * jintSize)); +// __ ldl(rfp, Address(esp, RegisterSaver::fp_off * jintSize)); +// __ ldl(RA, Address(esp, RegisterSaver::return_off * jintSize)); + + __ addptr(esp, (RegisterSaver::reg_save_size-4) * jintSize, esp); +} + + +OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) {SCOPEMARK_NAME(save_live_registers, masm);//__ stop("save_live_registers"); +/*#if COMPILER2_OR_JVMCI + if (save_vectors) { + // Save upper half of vector registers + int vect_words = 32 * 8 / wordSize; + additional_frame_words += vect_words; + } +#else + assert(!save_vectors, "vectors are generated only by C2 and JVMCI"); +#endif +*/ + int frame_size_in_bytes = align_up(additional_frame_words*wordSize + + reg_save_size*BytesPerInt, 16); + // OopMap frame size is in compiler stack slots (jint's) not bytes or words + int frame_size_in_slots = frame_size_in_bytes / BytesPerInt; + // The caller will allocate additional_frame_words + int additional_frame_slots = additional_frame_words*wordSize / BytesPerInt; + // CodeBlob frame size is in words. + int frame_size_in_words = frame_size_in_bytes / wordSize; + *total_frame_words = frame_size_in_words; + + // save registers + __ enter(); + push_CPU_state(masm); + /*__ subptr(esp, reg_save_size * jintSize, esp, rscratch1_GP); + + __ fstd(f0, fpr0_off * jintSize, esp); __ fstd(f1, fpr1_off * jintSize, esp); + __ fstd(f2, fpr2_off * jintSize, esp); __ fstd(f3, fpr3_off * jintSize, esp); + __ fstd(f4, fpr4_off * jintSize, esp); __ fstd(f5, fpr5_off * jintSize, esp); + __ fstd(f6, fpr6_off * jintSize, esp); __ fstd(f7, fpr7_off * jintSize, esp); + __ fstd(f8, fpr8_off * jintSize, esp); __ fstd(f9, fpr9_off * jintSize, esp); + __ fstd(f10, fpr10_off * jintSize, esp); __ fstd(f11, fpr11_off * jintSize, esp); + __ fstd(f12, fpr12_off * jintSize, esp); __ fstd(f13, fpr13_off * jintSize, esp); + __ fstd(f14, fpr14_off * jintSize, esp); __ fstd(f15, fpr15_off * jintSize, esp); + __ fstd(f16, fpr16_off * jintSize, esp); __ fstd(f17, fpr17_off * jintSize, esp); + __ fstd(f18, fpr18_off * jintSize, esp); __ fstd(f19, fpr19_off * jintSize, esp); + __ fstd(f20, fpr20_off * jintSize, esp); __ fstd(f21, fpr21_off * jintSize, esp); + __ fstd(f22, fpr22_off * jintSize, esp); __ fstd(f23, fpr23_off * jintSize, esp); + __ fstd(f24, fpr24_off * jintSize, esp); __ fstd(f25, fpr25_off * jintSize, esp); + __ fstd(f26, fpr26_off * jintSize, esp); __ fstd(f27, fpr27_off * jintSize, esp); + __ fstd(f28, fpr28_off * jintSize, esp); __ fstd(f29, fpr29_off * jintSize, esp); + __ fstd(f30, fpr30_off * jintSize, esp); + + __ stl(V0, Address(esp, v0_off * jintSize)); + __ stl(i1, Address(esp, t0_off * jintSize)); + __ stl(i2, Address(esp, t1_off * jintSize)); + __ stl(i3, Address(esp, t2_off * jintSize)); + __ stl(i4, Address(esp, t3_off * jintSize)); + __ stl(i5, Address(esp, t4_off * jintSize)); + __ stl(i6, Address(esp, t5_off * jintSize)); + __ stl(i7, Address(esp, t6_off * jintSize)); + __ stl(i8, Address(esp, t7_off * jintSize)); + __ stl(i9, Address(esp, s0_off * jintSize)); + __ stl(i10, Address(esp, s1_off * jintSize)); + __ stl(i11, Address(esp, s2_off * jintSize)); + __ stl(i12, Address(esp, s3_off * jintSize)); + __ stl(i13, Address(esp, s4_off * jintSize)); + __ stl(i14, Address(esp, s5_off * jintSize)); + __ stl(i16, Address(esp, a0_off * jintSize)); + __ stl(i17, Address(esp, a1_off * jintSize)); + __ stl(i18, Address(esp, a2_off * jintSize)); + __ stl(i19, Address(esp, a3_off * jintSize)); + __ stl(i20, Address(esp, a4_off * jintSize)); + __ stl(i21, Address(esp, a5_off * jintSize)); + __ stl(i22, Address(esp, t8_off * jintSize)); + __ stl(i23, Address(esp, t9_off * jintSize)); + __ stl(i24, Address(esp, t10_off * jintSize)); + __ stl(i25, Address(esp, t11_off * jintSize)); + __ stl(i27, Address(esp, t12_off * jintSize)); + + __ stl(GP, Address(esp, gp_off * jintSize)); + __ stl(rfp, Address(esp, fp_off * jintSize)); + __ stl(RA, Address(esp, return_off * jintSize));*/ + //__ addiu(SP, fp_off * jintSize, FP); //TODO:why add this in sw8? jzy + + OopMapSet *oop_maps = new OopMapSet(); + OopMap* map = new OopMap(frame_size_in_slots, 0); + +#define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_slots) + + map->set_callee_saved(STACK_OFFSET( v0_off), V0->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( t0_off), i1->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( t1_off), i2->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( t2_off), i3->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( t3_off), i4->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( t4_off), i5->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( t5_off), i6->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( t6_off), i7->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( t7_off), i8->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( s0_off), i9->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( s1_off), i10->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( s2_off), i11->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( s3_off), i12->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( s4_off), i13->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( s5_off), i14->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( a0_off), A0->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( a1_off), A1->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( a2_off), A2->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( a3_off), A3->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( a4_off), A4->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( a5_off), A5->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( t8_off), i22->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( t9_off), i23->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( t10_off), i24->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( t11_off), i25->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( t12_off), i27->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( gp_off), GP->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fp_off), rfp->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( return_off), RA->as_VMReg()); + + map->set_callee_saved(STACK_OFFSET( fpr0_off), f0->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr1_off), f1->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr2_off), f2->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr3_off), f3->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr4_off), f4->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr5_off), f5->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr6_off), f6->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr7_off), f7->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr8_off), f8->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr9_off), f9->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr10_off), f10->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr11_off), f11->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr12_off), f12->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr13_off), f13->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr14_off), f14->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr15_off), f15->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr16_off), f16->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr17_off), f17->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr18_off), f18->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr19_off), f19->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr20_off), f20->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr21_off), f21->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr22_off), f22->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr23_off), f23->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr24_off), f24->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr25_off), f25->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr26_off), f26->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr27_off), f27->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr28_off), f28->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr29_off), f29->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr30_off), f30->as_VMReg()); + +#undef STACK_OFFSET + return map; +} + +void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {SCOPEMARK_NAME(restore_live_registers, masm);//__ stop("restore_live_registers"); + /*__ fldd(f0, fpr0_off * jintSize, esp); __ fldd(f1, fpr1_off * jintSize, esp); + __ fldd(f2, fpr2_off * jintSize, esp); __ fldd(f3, fpr3_off * jintSize, esp); + __ fldd(f4, fpr4_off * jintSize, esp); __ fldd(f5, fpr5_off * jintSize, esp); + __ fldd(f6, fpr6_off * jintSize, esp); __ fldd(f7, fpr7_off * jintSize, esp); + __ fldd(f8, fpr8_off * jintSize, esp); __ fldd(f9, fpr9_off * jintSize, esp); + __ fldd(f10, fpr10_off * jintSize, esp); __ fldd(f11, fpr11_off * jintSize, esp); + __ fldd(f12, fpr12_off * jintSize, esp); __ fldd(f13, fpr13_off * jintSize, esp); + __ fldd(f14, fpr14_off * jintSize, esp); __ fldd(f15, fpr15_off * jintSize, esp); + __ fldd(f16, fpr16_off * jintSize, esp); __ fldd(f17, fpr17_off * jintSize, esp); + __ fldd(f18, fpr18_off * jintSize, esp); __ fldd(f19, fpr19_off * jintSize, esp); + __ fldd(f20, fpr20_off * jintSize, esp); __ fldd(f21, fpr21_off * jintSize, esp); + __ fldd(f22, fpr22_off * jintSize, esp); __ fldd(f23, fpr23_off * jintSize, esp); + __ fldd(f24, fpr24_off * jintSize, esp); __ fldd(f25, fpr25_off * jintSize, esp); + __ fldd(f26, fpr26_off * jintSize, esp); __ fldd(f27, fpr27_off * jintSize, esp); + __ fldd(f28, fpr28_off * jintSize, esp); __ fldd(f29, fpr29_off * jintSize, esp); + __ fldd(f30, fpr30_off * jintSize, esp); + + __ ldl(V0, Address(esp, v0_off * jintSize)); + __ ldl(i1, Address(esp, t0_off * jintSize)); + __ ldl(i2, Address(esp, t1_off * jintSize)); + __ ldl(i3, Address(esp, t2_off * jintSize)); + __ ldl(i4, Address(esp, t3_off * jintSize)); + __ ldl(i5, Address(esp, t4_off * jintSize)); + __ ldl(i6, Address(esp, t5_off * jintSize)); + __ ldl(i7, Address(esp, t6_off * jintSize)); + __ ldl(i8, Address(esp, t7_off * jintSize)); + __ ldl(i9, Address(esp, s0_off * jintSize)); + __ ldl(i10, Address(esp, s1_off * jintSize)); + __ ldl(i11, Address(esp, s2_off * jintSize)); + __ ldl(i12, Address(esp, s3_off * jintSize)); + __ ldl(i13, Address(esp, s4_off * jintSize)); + __ ldl(i14, Address(esp, s5_off * jintSize)); + __ ldl(i16, Address(esp, a0_off * jintSize)); + __ ldl(i17, Address(esp, a1_off * jintSize)); + __ ldl(i18, Address(esp, a2_off * jintSize)); + __ ldl(i19, Address(esp, a3_off * jintSize)); + __ ldl(i20, Address(esp, a4_off * jintSize)); + __ ldl(i21, Address(esp, a5_off * jintSize)); + __ ldl(i22, Address(esp, t8_off * jintSize)); + __ ldl(i23, Address(esp, t9_off * jintSize)); + __ ldl(i24, Address(esp, t10_off * jintSize)); + __ ldl(i25, Address(esp, t11_off * jintSize)); + __ ldl(i27, Address(esp, t12_off * jintSize)); + + __ ldl(GP, Address(esp, gp_off * jintSize)); + __ ldl(rfp, Address(esp, fp_off * jintSize)); + __ ldl(RA, Address(esp, return_off * jintSize)); + + __ addptr(esp, reg_save_size * jintSize, esp, rscratch1_GP);*/ + + // Recover CPU state + pop_CPU_state(masm); + // Get the rbp described implicitly by the calling convention (no oopMap) + __ leave(); +} + +void RegisterSaver::restore_result_registers(MacroAssembler* masm) {//__ stop("restore_result_registers"); + + // Just restore result register. Only used by deoptimization. By + // now any callee save register that needs to be restored to a c2 + // caller of the deoptee has been extracted into the vframeArray + // and will be stuffed into the c2i adapter we create for later + // restoration so only result registers need to be restored here. + + // Restore integer result register + __ ldl(V0, v0_offset_in_bytes(), esp); + // Restore fp result register + __ load_double(FSF, Address(esp, fsf_offset_in_bytes())); + + // Pop all of the register save are off the stack + __ addptr(esp, return_offset_in_bytes(), esp); +} + +// Is vector's size (in bytes) bigger than a size saved by default? +// 16 bytes XMM registers are saved by default using fxsave/fxrstor instructions. +bool SharedRuntime::is_wide_vector(int size) { + return size > 16; +} + + +// --------------------------------------------------------------------------- +// Read the array of BasicTypes from a signature, and compute where the +// arguments should go. Values in the VMRegPair regs array refer to 4-byte +// quantities. Values less than VMRegImpl::stack0 are registers, those above +// refer to 4-byte stack slots. All stack slots are based off of the stack pointer +// as framesizes are fixed. +// VMRegImpl::stack0 refers to the first slot 0(sp). +// and VMRegImpl::stack0+1 refers to the memory word 4-byes higher. Register +// up to RegisterImpl::number_of_registers) are the 64-bit +// integer registers. + +// Note: the INPUTS in sig_bt are in units of Java argument words, which are +// either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit +// units regardless of build. Of course for i486 there is no 64 bit build + +// The Java calling convention is a "shifted" version of the C ABI. +// By skipping the first C ABI register we can call non-static jni methods +// with small numbers of arguments without having to shuffle the arguments +// at all. Since we control the java ABI we ought to at least get some +// advantage out of it. + +int SharedRuntime::java_calling_convention(const BasicType *sig_bt, + VMRegPair *regs, + int total_args_passed) { + + // Create the mapping between argument positions and + // registers. + static const Register INT_ArgReg[Argument::n_int_register_parameters_j] = { + j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5 + }; + static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters_j] = { + j_farg0, j_farg1, j_farg2, j_farg3, + j_farg4, j_farg5, + }; + + + uint args = 0; + uint stk_args = 0; // inc by 2 each time + + for (int i = 0; i < total_args_passed; i++) { + switch (sig_bt[i]) { + case T_BOOLEAN: + case T_CHAR: + case T_BYTE: + case T_SHORT: + case T_INT: + if (args < Argument::n_int_register_parameters_j) { + regs[i].set1(INT_ArgReg[args++]->as_VMReg()); + } else { + regs[i].set1(VMRegImpl::stack2reg(stk_args)); + stk_args += 2; + } + break; + case T_VOID: + // halves of T_LONG or T_DOUBLE + assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); + regs[i].set_bad(); + break; + case T_LONG: + assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); + // fall through + case T_OBJECT: + case T_ARRAY: + case T_ADDRESS: + if (args < Argument::n_int_register_parameters_j) { + regs[i].set2(INT_ArgReg[args++]->as_VMReg()); + } else { + regs[i].set2(VMRegImpl::stack2reg(stk_args)); + stk_args += 2; + } + break; + case T_FLOAT: + if (args < Argument::n_float_register_parameters_j) { + regs[i].set1(FP_ArgReg[args++]->as_VMReg()); + } else { + regs[i].set1(VMRegImpl::stack2reg(stk_args)); + stk_args += 2; + } + break; + case T_DOUBLE: + assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); + if (args < Argument::n_float_register_parameters_j) { + regs[i].set2(FP_ArgReg[args++]->as_VMReg()); + } else { + regs[i].set2(VMRegImpl::stack2reg(stk_args)); + stk_args += 2; + } + break; + default: + ShouldNotReachHere(); + break; + } + } + + return align_up(stk_args, 2); +} + +// Patch the callers callsite with entry to compiled code if it exists. +static void patch_callers_callsite(MacroAssembler *masm) { + Label L; + //__ stop("patch_callers_callsite"); + __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), (int32_t)NULL_WORD, rscratch1_GP); + __ jcc(Assembler::equal, L, rscratch1_GP); + + __ enter(); + push_CPU_state(masm); + + // VM needs caller's callsite + // VM needs target method + // This needs to be a long call since we will relocate this adapter to + // the codeBuffer and it may not reach + +#ifndef PRODUCT + assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area"); +#endif + + __ movl(c_rarg0, rbx); + __ movl(c_rarg1, RA); + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite))); + + pop_CPU_state(masm); + __ leave(); + __ bind(L); +} + + +static void gen_c2i_adapter(MacroAssembler *masm, + int total_args_passed, + int comp_args_on_stack, + const BasicType *sig_bt, + const VMRegPair *regs, + Label& skip_fixup) {//__ stop("gen_c2i_adapter"); + // Before we get into the guts of the C2I adapter, see if we should be here + // at all. We've come from compiled code and are attempting to jump to the + // interpreter, which means the caller made a static call to get here + // (vcalls always get a compiled target if there is one). Check for a + // compiled target. If there is one, we need to patch the caller's call. + patch_callers_callsite(masm); + + __ bind(skip_fixup); + + // Since all args are passed on the stack, total_args_passed * + // Interpreter::stackElementSize is the space we need. + // Return address is in RA. + + int extraspace = (total_args_passed * Interpreter::stackElementSize); + + // stack is aligned, keep it that way + extraspace = align_up(extraspace, 2*wordSize); + + // set senderSP value + __ movl(rsender, esp); + + if (extraspace) + __ subptr(esp, extraspace, esp); + + // Now write the args into the outgoing interpreter space + for (int i = 0; i < total_args_passed; i++) { + if (sig_bt[i] == T_VOID) { + assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); + continue; + } + + // offset to start parameters + int st_off = (total_args_passed - i - 1) * Interpreter::stackElementSize; + int next_off = st_off - Interpreter::stackElementSize; + + // Say 4 args: + // i st_off + // 0 32 T_LONG + // 1 24 T_VOID + // 2 16 T_OBJECT + // 3 8 T_BOOL + // - 0 return address + // + // However to make thing extra confusing. Because we can fit a long/double in + // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter + // leaves one slot empty and only stores to a single slot. In this case the + // slot that is occupied is the T_VOID slot. See I said it was confusing. + + VMReg r_1 = regs[i].first(); + VMReg r_2 = regs[i].second(); + if (!r_1->is_valid()) { + assert(!r_2->is_valid(), ""); + continue; + } + if (r_1->is_stack()) { + // memory to memory use rax + int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace; + if (!r_2->is_valid()) { + // sign extend?? + __ ldws(rax, Address(esp, ld_off)); + __ stptr(rax, Address(esp, st_off)); + + } else { + + __ ldl(rax, Address(esp, ld_off)); + + // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG + // T_DOUBLE and T_LONG use two slots in the interpreter + if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { + // ld_off == LSW, ld_off+wordSize == MSW + // st_off == MSW, next_off == LSW + __ stl(rax, Address(esp, next_off)); +#ifdef ASSERT + // Overwrite the unused slot with known junk + __ mov_immediate64(rax, CONST64(0xdeadffffdeadaaaa)); + __ stptr(rax, Address(esp, st_off)); +#endif /* ASSERT */ + } else { + __ stl(rax, Address(esp, st_off)); + } + } + } else if (r_1->is_Register()) { + Register r = r_1->as_Register(); + if (!r_2->is_valid()) { + // must be only an int (or less ) so move only 32bits to slot + // why not sign extend?? + __ stw(r, Address(esp, st_off)); + } else { + // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG + // T_DOUBLE and T_LONG use two slots in the interpreter + if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { + // long/double in gpr +#ifdef ASSERT + // Overwrite the unused slot with known junk + __ mov_immediate64(rax, CONST64(0xdeadffffdeadaaab)); + __ stptr(rax, Address(esp, st_off)); +#endif /* ASSERT */ + __ stl(r, Address(esp, next_off)); + } else { + __ stl(r, Address(esp, st_off)); + } + } + } else { + assert(r_1->is_FloatRegister(), ""); + if (!r_2->is_valid()) { + // only a float use just part of the slot + __ store_float(r_1->as_FloatRegister(), Address(esp, st_off)); + } else { +#ifdef ASSERT + // Overwrite the unused slot with known junk + __ mov_immediate64(rax, CONST64(0xdeadffffdeadaaac)); + __ stptr(rax, Address(esp, st_off)); +#endif /* ASSERT */ + __ store_double(r_1->as_FloatRegister(), Address(esp, next_off)); + } + } + } + + // Schedule the branch target address early. + __ ldptr(rcx, Address(rbx, in_bytes(Method::interpreter_entry_offset()))); + __ jmp(rcx); +} + +static void range_check(MacroAssembler* masm, Register pc_reg, Register temp_reg, + address code_start, address code_end, + Label& L_ok) {SCOPEMARK_NAME(range_check, masm); + Label L_fail; + __ lea(temp_reg, ExternalAddress(code_start)); + __ cmpptr(pc_reg, temp_reg, temp_reg); + __ jcc(Assembler::belowEqual, L_fail, temp_reg); + __ lea(temp_reg, ExternalAddress(code_end)); + __ cmpptr(pc_reg, temp_reg, temp_reg); + __ jcc(Assembler::below, L_ok, temp_reg); + __ bind(L_fail); +} + +void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, + int total_args_passed, + int comp_args_on_stack, + const BasicType *sig_bt, + const VMRegPair *regs) {__ block_comment("gen_i2c_adapter");//__ debug_stop("gen_i2c_adapter"); + + // Note: r13 contains the senderSP on entry. We must preserve it since + // we may do a i2c -> c2i transition if we lose a race where compiled + // code goes non-entrant while we get args ready. + // In addition we use r13 to locate all the interpreter args as + // we must align the stack to 16 bytes on an i2c entry else we + // lose alignment we expect in all compiled code and register + // save code can segv when fxsave instructions find improperly + // aligned stack pointer. + + // Adapters can be frameless because they do not require the caller + // to perform additional cleanup work, such as correcting the stack pointer. + // An i2c adapter is frameless because the *caller* frame, which is interpreted, + // routinely repairs its own stack pointer (from interpreter_frame_last_sp), + // even if a callee has modified the stack pointer. + // A c2i adapter is frameless because the *callee* frame, which is interpreted, + // routinely repairs its caller's stack pointer (from sender_sp, which is set + // up via the senderSP register). + // In other words, if *either* the caller or callee is interpreted, we can + // get the stack pointer repaired after a call. + // This is why c2i and i2c adapters cannot be indefinitely composed. + // In particular, if a c2i adapter were to somehow call an i2c adapter, + // both caller and callee would be compiled methods, and neither would + // clean up the stack pointer changes performed by the two adapters. + // If this happens, control eventually transfers back to the compiled + // caller, but with an uncorrected stack, causing delayed havoc. + + // Pick up the return address + __ movl(rax, RA); + + if (VerifyAdapterCalls && + (Interpreter::code() != NULL || StubRoutines::code1() != NULL)) { + // So, let's test for cascading c2i/i2c adapters right now. + // assert(Interpreter::contains($return_addr) || + // StubRoutines::contains($return_addr), + // "i2c adapter must return to an interpreter frame"); + __ block_comment("verify_i2c { "); + Label L_ok; + if (Interpreter::code() != NULL) + range_check(masm, rax, r11, + Interpreter::code()->code_start(), Interpreter::code()->code_end(), + L_ok); + if (StubRoutines::code1() != NULL) + range_check(masm, rax, r11, + StubRoutines::code1()->code_begin(), StubRoutines::code1()->code_end(), + L_ok); + if (StubRoutines::code2() != NULL) + range_check(masm, rax, r11, + StubRoutines::code2()->code_begin(), StubRoutines::code2()->code_end(), + L_ok); + const char* msg = "i2c adapter must return to an interpreter frame"; + __ block_comment(msg); + __ stop(msg); + __ bind(L_ok); + __ block_comment("} verify_i2ce "); + } + + // Must preserve original SP for loading incoming arguments because + // we need to align the outgoing SP for compiled code. + __ movl(r11, rsp); + + // Cut-out for having no stack args. Since up to 2 int/oop args are passed + // in registers, we will occasionally have no stack args. + int comp_words_on_stack = 0; + if (comp_args_on_stack) { + // Sig words on the stack are greater-than VMRegImpl::stack0. Those in + // registers are below. By subtracting stack0, we either get a negative + // number (all values in registers) or the maximum stack slot accessed. + + // Convert 4-byte c2 stack slots to words. + comp_words_on_stack = align_up(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord; + // Round up to miminum stack alignment, in wordSize + comp_words_on_stack = align_up(comp_words_on_stack, 2); + __ subptr(esp, comp_words_on_stack * wordSize, esp); + } + + // push the return address and misalign the stack that youngest frame always sees + // as far as the placement of the call instruction + //__ push(rax); //TODO:How to resolve this ? jzy + + // Put saved SP in another register + const Register saved_sp = rax; + __ movl(saved_sp, r11); + + // Will jump to the compiled code just as if compiled code was doing it. + // Pre-load the register-jump target early, to schedule it better. + __ ldptr(r11, Address(rmethod, in_bytes(Method::from_compiled_offset()))); //check jzy? + +#if INCLUDE_JVMCI + if (EnableJVMCI) { + // check if this call should be routed towards a specific entry point + __ cmpptr(Address(rthread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())), R0); + Label no_alternative_target; + __ jcc(Assembler::equal, no_alternative_target); + __ ldptr(r11, Address(rthread, in_bytes(JavaThread::jvmci_alternate_call_target_offset()))); + __ stptr(R0, Address(rthread, in_bytes(JavaThread::jvmci_alternate_call_target_offset()))); + __ bind(no_alternative_target); + } +#endif // INCLUDE_JVMCI + + // Now generate the shuffle code. Pick up all register args and move the + // rest through the floating point stack top. + for (int i = 0; i < total_args_passed; i++) { + if (sig_bt[i] == T_VOID) { + // Longs and doubles are passed in native word order, but misaligned + // in the 32-bit build. + assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); + continue; + } + + // Pick up 0, 1 or 2 words from SP+offset. + + assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), + "scrambled load targets?"); + // Load in argument order going down. + int ld_off = (total_args_passed - 1 - i)*Interpreter::stackElementSize; + // Point to interpreter value (vs. tag) + int next_off = ld_off - Interpreter::stackElementSize; + // + // + // + VMReg r_1 = regs[i].first(); + VMReg r_2 = regs[i].second(); + if (!r_1->is_valid()) { + assert(!r_2->is_valid(), ""); + continue; + } + if (r_1->is_stack()) { + // Convert stack slot to an SP offset + int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size ; + + // We can use r13 as a temp here because compiled code doesn't need r13 as an input + // and if we end up going thru a c2i because of a miss a reasonable value of r13 + // will be generated. + if (!r_2->is_valid()) { + // sign extend??? + __ ldws(r13, Address(saved_sp, ld_off)); + __ stptr(r13, Address(esp, st_off), rscratch2_AT); + } else { + // + // We are using two optoregs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE + // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case + // So we must adjust where to pick up the data to match the interpreter. + // + // Interpreter local[n] == MSW, local[n+1] == LSW however locals + // are accessed as negative so LSW is at LOW address + + // ld_off is MSW so get LSW + const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)? + next_off : ld_off; + __ ldl(r13, Address(saved_sp, offset)); + // st_off is LSW (i.e. reg.first()) + __ stl(r13, Address(esp, st_off)); + } + } else if (r_1->is_Register()) { // Register argument + Register r = r_1->as_Register(); + assert(r != rax, "must be different"); + if (r_2->is_valid()) { + // + // We are using two VMRegs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE + // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case + // So we must adjust where to pick up the data to match the interpreter. + + const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)? + next_off : ld_off; + + // this can be a misaligned move + __ ldl(r, Address(saved_sp, offset)); + } else { + // sign extend and use a full word? + __ ldws(r, Address(saved_sp, ld_off)); + } + } else { + if (!r_2->is_valid()) { + __ load_float(r_1->as_FloatRegister(), Address(saved_sp, ld_off)); + } else { + __ load_double(r_1->as_FloatRegister(), Address(saved_sp, next_off)); + } + } + } + + // 6243940 We might end up in handle_wrong_method if + // the callee is deoptimized as we race thru here. If that + // happens we don't want to take a safepoint because the + // caller frame will look interpreted and arguments are now + // "compiled" so it is much better to make this transition + // invisible to the stack walking code. Unfortunately if + // we try and find the callee by normal means a safepoint + // is possible. So we stash the desired callee in the thread + // and the vm will find there should this case occur. + + __ stptr(rbx, Address(rthread, JavaThread::callee_target_offset())); + + // put Method* where a c2i would expect should we end up there + // only needed becaus eof c2 resolve stubs return Method* as a result in + // rax + __ movl(rax, rbx); //TODO:why need this? jzy + __ jmp(r11); +} + +// --------------------------------------------------------------- +AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, + int total_args_passed, + int comp_args_on_stack, + const BasicType *sig_bt, + const VMRegPair *regs, + AdapterFingerPrint* fingerprint) {__ block_comment("generate_i2c2i_adapters");//__ stop("generate_i2c2i_adapters"); + address i2c_entry = __ pc(); + + gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs); + + // ------------------------------------------------------------------------- + // Generate a C2I adapter. On entry we know rbx holds the Method* during calls + // to the interpreter. The args start out packed in the compiled layout. They + // need to be unpacked into the interpreter layout. This will almost always + // require some stack space. We grow the current (compiled) stack, then repack + // the args. We finally end in a jump to the generic interpreter entry point. + // On exit from the interpreter, the interpreter will restore our SP (lest the + // compiled code, which relys solely on SP and not RBP, get sick). + + address c2i_unverified_entry = __ pc(); + Label skip_fixup; + Label ok; + + Register holder = rax; + Register receiver = j_rarg0; + Register temp = rbx; + + { + __ load_klass(temp, receiver); + __ cmpptr(temp, Address(holder, CompiledICHolder::holder_klass_offset()), rscratch1_GP); + __ ldptr(rbx, Address(holder, CompiledICHolder::holder_metadata_offset())); + __ jcc(Assembler::equal, ok, rscratch1_GP); + __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); + + __ bind(ok); + // Method might have been compiled since the call site was patched to + // interpreted if that is the case treat it as a miss so we can get + // the call site corrected. + __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), R0, rscratch1_GP); + __ jcc(Assembler::equal, skip_fixup, rscratch1_GP); + __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()), rscratch1_GP); + } + + address c2i_entry = __ pc(); + + // Class initialization barrier for static methods + address c2i_no_clinit_check_entry = NULL; + if (VM_Version::supports_fast_class_init_checks()) { + Label L_skip_barrier; + Register method = rbx; + + { // Bypass the barrier for non-static methods + Register flags = rscratch1; + __ ldwu(flags, Address(method, Method::access_flags_offset())); + __ testw(flags, JVM_ACC_STATIC); + __ jcc(Assembler::zero, L_skip_barrier); // non-static + } + + Register klass = rscratch1; + __ load_method_holder(klass, method); + __ clinit_barrier(klass, rthread, &L_skip_barrier /*L_fast_path*/); + + __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path + + __ bind(L_skip_barrier); + c2i_no_clinit_check_entry = __ pc(); + } + + BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); + bs->c2i_entry_barrier(masm); + + gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup); + + return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry, c2i_no_clinit_check_entry); +} + +int SharedRuntime::vector_calling_convention(VMRegPair *regs, + uint num_bits, + uint total_args_passed) { + Unimplemented(); + return 0; +} + +int SharedRuntime::c_calling_convention(const BasicType *sig_bt, + VMRegPair *regs, + VMRegPair *regs2, + int total_args_passed) {//ShouldNotReachHere(); + assert(regs2 == NULL, "not needed on Sw64"); + + // We return the amount of VMRegImpl stack slots we need to reserve for all + // the arguments NOT counting out_preserve_stack_slots. + static const Register INT_ArgReg[Argument::n_register_parameters] = { + c_rarg0, c_rarg1, c_rarg2, c_rarg3, c_rarg4, c_rarg5 + }; + static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = { + c_farg0, c_farg1, c_farg2, c_farg3, + c_farg4, c_farg5 + }; + + + uint args = 0; + uint stk_args = 0; // inc by 2 each time + + for (int i = 0; i < total_args_passed; i++) { + switch (sig_bt[i]) { + case T_BOOLEAN: + case T_CHAR: + case T_BYTE: + case T_SHORT: + case T_INT: + if (args < Argument::n_int_register_parameters_c) { + regs[i].set1(INT_ArgReg[args++]->as_VMReg()); + } else { + regs[i].set1(VMRegImpl::stack2reg(stk_args)); + stk_args += 2; + } + break; + case T_LONG: + assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); + // fall through + case T_OBJECT: + case T_ARRAY: + case T_ADDRESS: + case T_METADATA: + if (args < Argument::n_int_register_parameters_c) { + regs[i].set2(INT_ArgReg[args++]->as_VMReg()); + } else { + regs[i].set2(VMRegImpl::stack2reg(stk_args)); + stk_args += 2; + } + break; + case T_FLOAT: + if (args < Argument::n_float_register_parameters_c) { + regs[i].set1(FP_ArgReg[args++]->as_VMReg()); + + } else { + regs[i].set1(VMRegImpl::stack2reg(stk_args)); + stk_args += 2; + } + break; + case T_DOUBLE: + assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); + if (args < Argument::n_float_register_parameters_c) { + regs[i].set2(FP_ArgReg[args++]->as_VMReg()); + } else { + regs[i].set2(VMRegImpl::stack2reg(stk_args)); + stk_args += 2; + } + break; + case T_VOID: // Halves of longs and doubles + assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); + regs[i].set_bad(); + break; + default: + ShouldNotReachHere(); + break; + } + } + + return stk_args; +} + +void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {//__ stop("save_native_result"); + // We always ignore the frame_slots arg and just use the space just below frame pointer + // which by this time is free to use + switch (ret_type) { + case T_FLOAT: + __ store_float(FSF, Address(rfp, -wordSize)); + break; + case T_DOUBLE: + __ store_double(FSF, Address(rfp, -wordSize)); + break; + case T_VOID: break; + default: { + __ stptr(V0, Address(rfp, -wordSize)); + } + } +} + +void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {//__ stop("restore_native_result"); + // We always ignore the frame_slots arg and just use the space just below frame pointer + // which by this time is free to use + switch (ret_type) { + case T_FLOAT: + __ load_float(FSF, Address(rfp, -wordSize)); + break; + case T_DOUBLE: + __ load_double(FSF, Address(rfp, -wordSize)); + break; + case T_VOID: break; + default: { + __ ldptr(V0, Address(rfp, -wordSize)); + } + } +} + +static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {//__ stop("save_args"); + for ( int i = first_arg ; i < arg_count ; i++ ) { + if (args[i].first()->is_Register()) { + __ push(args[i].first()->as_Register()); + } else if (args[i].first()->is_FloatRegister()) { + __ subptr(esp, 2*wordSize, esp); + __ store_double(args[i].first()->as_FloatRegister(), Address(esp, 0)); + } + } +} + +static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {//__ stop("restore_args"); + for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) { + if (args[i].first()->is_Register()) { + __ pop(args[i].first()->as_Register()); + } else if (args[i].first()->is_FloatRegister()) { + __ load_double(args[i].first()->as_FloatRegister(), Address(esp, 0)); + __ addptr(esp, 2*wordSize, esp); + } + } +} + + +// Unpack an array argument into a pointer to the body and the length +// if the array is non-null, otherwise pass 0 for both. +static void unpack_array_argument(MacroAssembler* masm, VMRegPair reg, BasicType in_elem_type, VMRegPair body_arg, VMRegPair length_arg) { + Register tmp_reg = rax; + assert(!body_arg.first()->is_Register() || body_arg.first()->as_Register() != tmp_reg, + "possible collision"); + assert(!length_arg.first()->is_Register() || length_arg.first()->as_Register() != tmp_reg, + "possible collision"); + + __ block_comment("unpack_array_argument {"); + + // Pass the length, ptr pair + Label is_null, done; + VMRegPair tmp; + tmp.set_ptr(tmp_reg->as_VMReg()); + if (reg.first()->is_stack()) { + // Load the arg up from the stack + __ move_ptr(reg, tmp); + reg = tmp; + } + __ testptr(reg.first()->as_Register(), reg.first()->as_Register()); + __ jcc(Assembler::equal, is_null); + __ lea(tmp_reg, Address(reg.first()->as_Register(), arrayOopDesc::base_offset_in_bytes(in_elem_type))); + __ move_ptr(tmp, body_arg); + // load the length relative to the body. + __ ldws(tmp_reg, Address(tmp_reg, arrayOopDesc::length_offset_in_bytes() - + arrayOopDesc::base_offset_in_bytes(in_elem_type))); + __ move32_64(tmp, length_arg); + __ jmp(done); + __ bind(is_null); + // Pass zeros + __ movl(tmp_reg, R0); + __ move_ptr(tmp, body_arg); + __ move32_64(tmp, length_arg); + __ bind(done); + + __ block_comment("} unpack_array_argument"); +} + + +// Different signatures may require very different orders for the move +// to avoid clobbering other arguments. There's no simple way to +// order them safely. Compute a safe order for issuing stores and +// break any cycles in those stores. This code is fairly general but +// it's not necessary on the other platforms so we keep it in the +// platform dependent code instead of moving it into a shared file. +// (See bugs 7013347 & 7145024.) +// Note that this code is specific to LP64. +class ComputeMoveOrder: public StackObj { + class MoveOperation: public ResourceObj { + friend class ComputeMoveOrder; + private: + VMRegPair _src; + VMRegPair _dst; + int _src_index; + int _dst_index; + bool _processed; + MoveOperation* _next; + MoveOperation* _prev; + + static int get_id(VMRegPair r) { + return r.first()->value(); + } + + public: + MoveOperation(int src_index, VMRegPair src, int dst_index, VMRegPair dst): + _src(src) + , _dst(dst) + , _src_index(src_index) + , _dst_index(dst_index) + , _processed(false) + , _next(NULL) + , _prev(NULL) { + } + + VMRegPair src() const { return _src; } + int src_id() const { return get_id(src()); } + int src_index() const { return _src_index; } + VMRegPair dst() const { return _dst; } + void set_dst(int i, VMRegPair dst) { _dst_index = i, _dst = dst; } + int dst_index() const { return _dst_index; } + int dst_id() const { return get_id(dst()); } + MoveOperation* next() const { return _next; } + MoveOperation* prev() const { return _prev; } + void set_processed() { _processed = true; } + bool is_processed() const { return _processed; } + + // insert + void break_cycle(VMRegPair temp_register) { + // create a new store following the last store + // to move from the temp_register to the original + MoveOperation* new_store = new MoveOperation(-1, temp_register, dst_index(), dst()); + + // break the cycle of links and insert new_store at the end + // break the reverse link. + MoveOperation* p = prev(); + assert(p->next() == this, "must be"); + _prev = NULL; + p->_next = new_store; + new_store->_prev = p; + + // change the original store to save it's value in the temp. + set_dst(-1, temp_register); + } + + void link(GrowableArray& killer) { + // link this store in front the store that it depends on + MoveOperation* n = killer.at_grow(src_id(), NULL); + if (n != NULL) { + assert(_next == NULL && n->_prev == NULL, "shouldn't have been set yet"); + _next = n; + n->_prev = this; + } + } + }; + + private: + GrowableArray edges; + + public: + ComputeMoveOrder(int total_in_args, const VMRegPair* in_regs, int total_c_args, VMRegPair* out_regs, + const BasicType* in_sig_bt, GrowableArray& arg_order, VMRegPair tmp_vmreg) { + // Move operations where the dest is the stack can all be + // scheduled first since they can't interfere with the other moves. + for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) { + if (in_sig_bt[i] == T_ARRAY) { + c_arg--; + if (out_regs[c_arg].first()->is_stack() && + out_regs[c_arg + 1].first()->is_stack()) { + arg_order.push(i); + arg_order.push(c_arg); + } else { + if (out_regs[c_arg].first()->is_stack() || + in_regs[i].first() == out_regs[c_arg].first()) { + add_edge(i, in_regs[i].first(), c_arg, out_regs[c_arg + 1]); + } else { + add_edge(i, in_regs[i].first(), c_arg, out_regs[c_arg]); + } + } + } else if (in_sig_bt[i] == T_VOID) { + arg_order.push(i); + arg_order.push(c_arg); + } else { + if (out_regs[c_arg].first()->is_stack() || + in_regs[i].first() == out_regs[c_arg].first()) { + arg_order.push(i); + arg_order.push(c_arg); + } else { + add_edge(i, in_regs[i].first(), c_arg, out_regs[c_arg]); + } + } + } + // Break any cycles in the register moves and emit the in the + // proper order. + GrowableArray* stores = get_store_order(tmp_vmreg); + for (int i = 0; i < stores->length(); i++) { + arg_order.push(stores->at(i)->src_index()); + arg_order.push(stores->at(i)->dst_index()); + } + } + + // Collected all the move operations + void add_edge(int src_index, VMRegPair src, int dst_index, VMRegPair dst) { + if (src.first() == dst.first()) return; + edges.append(new MoveOperation(src_index, src, dst_index, dst)); + } + + // Walk the edges breaking cycles between moves. The result list + // can be walked in order to produce the proper set of loads + GrowableArray* get_store_order(VMRegPair temp_register) { + // Record which moves kill which values + GrowableArray killer; + for (int i = 0; i < edges.length(); i++) { + MoveOperation* s = edges.at(i); + assert(killer.at_grow(s->dst_id(), NULL) == NULL, "only one killer"); + killer.at_put_grow(s->dst_id(), s, NULL); + } + assert(killer.at_grow(MoveOperation::get_id(temp_register), NULL) == NULL, + "make sure temp isn't in the registers that are killed"); + + // create links between loads and stores + for (int i = 0; i < edges.length(); i++) { + edges.at(i)->link(killer); + } + + // at this point, all the move operations are chained together + // in a doubly linked list. Processing it backwards finds + // the beginning of the chain, forwards finds the end. If there's + // a cycle it can be broken at any point, so pick an edge and walk + // backward until the list ends or we end where we started. + GrowableArray* stores = new GrowableArray(); + for (int e = 0; e < edges.length(); e++) { + MoveOperation* s = edges.at(e); + if (!s->is_processed()) { + MoveOperation* start = s; + // search for the beginning of the chain or cycle + while (start->prev() != NULL && start->prev() != s) { + start = start->prev(); + } + if (start->prev() == s) { + start->break_cycle(temp_register); + } + // walk the chain forward inserting to store list + while (start != NULL) { + stores->append(start); + start->set_processed(); + start = start->next(); + } + } + } + return stores; + } +}; + +static void verify_oop_args(MacroAssembler* masm, + const methodHandle& method, + const BasicType* sig_bt, + const VMRegPair* regs) {//__ stop("verify_oop_args"); + Register temp_reg = rmethod; // not part of any compiled calling seq + if (VerifyOops) { + for (int i = 0; i < method->size_of_parameters(); i++) { + if (is_reference_type(sig_bt[i])) { + VMReg r = regs[i].first(); + assert(r->is_valid(), "bad oop arg"); + if (r->is_stack()) { + __ ldptr(temp_reg, Address(esp, r->reg2stack() * VMRegImpl::stack_slot_size)); + __ verify_oop(temp_reg); + } else { + __ verify_oop(r->as_Register()); + } + } + } + } +} + +static void gen_special_dispatch(MacroAssembler* masm, + const methodHandle& method, + const BasicType* sig_bt, + const VMRegPair* regs) {SCOPEMARK_NAME(gen_special_dispatch, masm); + verify_oop_args(masm, method, sig_bt, regs); + vmIntrinsics::ID iid = method->intrinsic_id(); + //__ stop("gen_special_dispatch"); + // Now write the args into the outgoing interpreter space + bool has_receiver = false; + Register receiver_reg = noreg; + int member_arg_pos = -1; + Register member_reg = noreg; + int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid); + if (ref_kind != 0) { + member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument + member_reg = rmethod; // known to be free at this point + has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind); + } else if (iid == vmIntrinsics::_invokeBasic || iid == vmIntrinsics::_linkToNative) { + has_receiver = true; + } else { + fatal("unexpected intrinsic id %d", vmIntrinsics::as_int(iid)); + } + + if (member_reg != noreg) { + // Load the member_arg into register, if necessary. + SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs); + VMReg r = regs[member_arg_pos].first(); + if (r->is_stack()) { + __ ldptr(member_reg, Address(esp, r->reg2stack() * VMRegImpl::stack_slot_size)); + } else { + // no data motion is needed + member_reg = r->as_Register(); + } + } + + if (has_receiver) { + // Make sure the receiver is loaded into a register. + assert(method->size_of_parameters() > 0, "oob"); + assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object"); + VMReg r = regs[0].first(); + assert(r->is_valid(), "bad receiver arg"); + if (r->is_stack()) { + // Porting note: This assumes that compiled calling conventions always + // pass the receiver oop in a register. If this is not true on some + // platform, pick a temp and load the receiver from stack. + fatal("receiver always in a register"); + receiver_reg = j_rarg0; // known to be free at this point + __ ldptr(receiver_reg, Address(esp, r->reg2stack() * VMRegImpl::stack_slot_size)); + } else { + // no data motion is needed + receiver_reg = r->as_Register(); + } + } + + // Figure out which address we are really jumping to: + MethodHandles::generate_method_handle_dispatch(masm, iid, + receiver_reg, member_reg, /*for_compiler_entry:*/ true); +} + +// --------------------------------------------------------------------------- +// Generate a native wrapper for a given method. The method takes arguments +// in the Java compiled code convention, marshals them to the native +// convention (handlizes oops, etc), transitions to native, makes the call, +// returns to java state (possibly blocking), unhandlizes any result and +// returns. +// +// Critical native functions are a shorthand for the use of +// GetPrimtiveArrayCritical and disallow the use of any other JNI +// functions. The wrapper is expected to unpack the arguments before +// passing them to the callee and perform checks before and after the +// native call to ensure that they GCLocker +// lock_critical/unlock_critical semantics are followed. Some other +// parts of JNI setup are skipped like the tear down of the JNI handle +// block and the check for pending exceptions it's impossible for them +// to be thrown. +// +// They are roughly structured like this: +// if (GCLocker::needs_gc()) +// SharedRuntime::block_for_jni_critical(); +// tranistion to thread_in_native +// unpack arrray arguments and call native entry point +// check for safepoint in progress +// check if any thread suspend flags are set +// call into JVM and possible unlock the JNI critical +// if a GC was suppressed while in the critical native. +// transition back to thread_in_Java +// return to caller +// +nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, + const methodHandle& method, + int compile_id, + BasicType* in_sig_bt, + VMRegPair* in_regs, + BasicType ret_type, + address critical_entry) { //__ stop("generate_native_wrapper"); + if (method->is_method_handle_intrinsic()) { + vmIntrinsics::ID iid = method->intrinsic_id(); + intptr_t start = (intptr_t)__ pc(); + int vep_offset = ((intptr_t)__ pc()) - start; + gen_special_dispatch(masm, + method, + in_sig_bt, + in_regs); + int frame_complete = ((intptr_t)__ pc()) - start; // not complete, period + __ flush(); + int stack_slots = SharedRuntime::out_preserve_stack_slots(); // no out slots at all, actually + return nmethod::new_native_nmethod(method, + compile_id, + masm->code(), + vep_offset, + frame_complete, + stack_slots / VMRegImpl::slots_per_word, + in_ByteSize(-1), + in_ByteSize(-1), + (OopMapSet*)NULL); + } + bool is_critical_native = true; + address native_func = critical_entry; + if (native_func == NULL) { + native_func = method->native_function(); + is_critical_native = false; + } + assert(native_func != NULL, "must have function"); + + // An OopMap for lock (and class if static) + OopMapSet *oop_maps = new OopMapSet(); + intptr_t start = (intptr_t)__ pc(); + + // We have received a description of where all the java arg are located + // on entry to the wrapper. We need to convert these args to where + // the jni function will expect them. To figure out where they go + // we convert the java signature to a C signature by inserting + // the hidden arguments as arg[0] and possibly arg[1] (static method) + + const int total_in_args = method->size_of_parameters(); + int total_c_args = total_in_args; + if (!is_critical_native) { + total_c_args += 1; + if (method->is_static()) { + total_c_args++; + } + } else { + for (int i = 0; i < total_in_args; i++) { + if (in_sig_bt[i] == T_ARRAY) { + total_c_args++; + } + } + } + + BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args); + VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args); + BasicType* in_elem_bt = NULL; + + int argc = 0; + if (!is_critical_native) { + out_sig_bt[argc++] = T_ADDRESS; + if (method->is_static()) { + out_sig_bt[argc++] = T_OBJECT; + } + + for (int i = 0; i < total_in_args ; i++ ) { + out_sig_bt[argc++] = in_sig_bt[i]; + } + } else { + in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args); + SignatureStream ss(method->signature()); + for (int i = 0; i < total_in_args ; i++ ) { + if (in_sig_bt[i] == T_ARRAY) { + // Arrays are passed as int, elem* pair + out_sig_bt[argc++] = T_INT; + out_sig_bt[argc++] = T_ADDRESS; + ss.skip_array_prefix(1); // skip one '[' + assert(ss.is_primitive(), "primitive type expected"); + in_elem_bt[i] = ss.type(); + } else { + out_sig_bt[argc++] = in_sig_bt[i]; + in_elem_bt[i] = T_VOID; + } + if (in_sig_bt[i] != T_VOID) { + assert(in_sig_bt[i] == ss.type() || + in_sig_bt[i] == T_ARRAY, "must match"); + ss.next(); + } + } + } + + // Now figure out where the args must be stored and how much stack space + // they require. + int out_arg_slots; + out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args); + + // Compute framesize for the wrapper. We need to handlize all oops in + // incoming registers + + // Calculate the total number of stack slots we will need. + + // First count the abi requirement plus all of the outgoing args + int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots; + + // Now the space for the inbound oop handle area + int total_save_slots = 6 * VMRegImpl::slots_per_word; // 6 arguments passed in registers + if (is_critical_native) { + // Critical natives may have to call out so they need a save area + // for register arguments. + int double_slots = 0; + int single_slots = 0; + for ( int i = 0; i < total_in_args; i++) { + if (in_regs[i].first()->is_Register()) { + const Register reg = in_regs[i].first()->as_Register(); + switch (in_sig_bt[i]) { + case T_BOOLEAN: + case T_BYTE: + case T_SHORT: + case T_CHAR: + case T_INT: single_slots++; break; + case T_ARRAY: // specific to LP64 (7145024) + case T_LONG: double_slots++; break; + default: ShouldNotReachHere(); + } + } else if (in_regs[i].first()->is_FloatRegister()) { + switch (in_sig_bt[i]) { + case T_FLOAT: single_slots++; break; + case T_DOUBLE: double_slots++; break; + default: ShouldNotReachHere(); + } + } else if (in_regs[i].first()->is_FloatRegister()) { + ShouldNotReachHere(); + } + } + total_save_slots = double_slots * 2 + single_slots; + // align the save area + if (double_slots != 0) { + stack_slots = align_up(stack_slots, 2); + } + } + + int oop_handle_offset = stack_slots; + stack_slots += total_save_slots; + + // Now any space we need for handlizing a klass if static method + + int klass_slot_offset = 0; + int klass_offset = -1; + int lock_slot_offset = 0; + bool is_static = false; + + if (method->is_static()) { + klass_slot_offset = stack_slots; + stack_slots += VMRegImpl::slots_per_word; + klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size; + is_static = true; + } + + // Plus a lock if needed + + if (method->is_synchronized()) { + lock_slot_offset = stack_slots; + stack_slots += VMRegImpl::slots_per_word; + } + + // Now a place (+2) to save return values or temp during shuffling + // + 4 for return address (which we own) and saved rbp + stack_slots += 6;// swjdk8 is 2+6, but i think 6 is enough + + // Ok The space we have allocated will look like: + // + // + // FP-> | | + // |---------------------| + // | 2 slots for moves | + // |---------------------| + // | lock box (if sync) | + // |---------------------| <- lock_slot_offset + // | klass (if static) | + // |---------------------| <- klass_slot_offset + // | oopHandle area | + // |---------------------| <- oop_handle_offset (6 java arg registers) + // | outbound memory | + // | based arguments | + // | | + // |---------------------| + // | | + // SP-> | out_preserved_slots | + // + // + + + // Now compute actual number of stack words we need rounding to make + // stack properly aligned. + stack_slots = align_up(stack_slots, StackAlignmentInSlots); + + int stack_size = stack_slots * VMRegImpl::stack_slot_size; + + // First thing make an ic check to see if we should even be here + + // We are free to use all registers as temps without saving them and + // restoring them except rbp. rbp is the only callee save register + // as far as the interpreter and the compiler(s) are concerned. + + + const Register ic_reg = V0; + const Register receiver = j_rarg0; + + Label hit; + Label exception_pending; + //__ stop("generate_native_wrapper"); + assert_different_registers(ic_reg, receiver, rscratch3); + __ verify_oop(receiver); + __ load_klass(rscratch3, receiver); +// __ cmpl(ic_reg, rscratch3); +// __ jcc(Assembler::equal, hit); + __ beq_c(ic_reg, rscratch3, hit); + __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); + + // Verified entry point must be aligned + __ align(8); + + __ bind(hit); + + int vep_offset = ((intptr_t)__ pc()) - start; + + if (VM_Version::supports_fast_class_init_checks() && method->needs_clinit_barrier()) { + Label L_skip_barrier; + Register klass = T1; + __ mov_metadata(klass, method->method_holder()); // InstanceKlass* + __ clinit_barrier(klass, rthread, &L_skip_barrier /*L_fast_path*/); + + __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path + + __ bind(L_skip_barrier); + } + +#ifdef COMPILER1 + // For Object.hashCode, System.identityHashCode try to pull hashCode from object header if available. + if ((InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) || (method->intrinsic_id() == vmIntrinsics::_identityHashCode)) { + inline_check_hashcode_from_object_header(masm, method, j_rarg0 /*obj_reg*/, rax /*result*/); + } +#endif // COMPILER1 + + // The instruction at the verified entry point must be 5 bytes or longer + // because it can be patched on the fly by make_non_entrant. The stack bang + // instruction fits that requirement. + + // Generate stack overflow check + __ bang_stack_with_offset((int)StackOverflow::stack_shadow_zone_size()); + + // Generate a new frame for the wrapper. + __ enter(); + // -2 because return address is already present and so is saved rbp + __ subptr(rsp, stack_size - 2*wordSize, rsp); + + BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); + bs->nmethod_entry_barrier(masm); + + // Frame is now completed as far as size and linkage. + int frame_complete = ((intptr_t)__ pc()) - start; + + //if (UseRTMLocking) { + // Abort RTM transaction before calling JNI + // because critical section will be large and will be + // aborted anyway. Also nmethod could be deoptimized. + //__ xabort(0); jzy? + //} +//TODO:sw don't aligned? jzy +/*#ifdef ASSERT + { + Label L; + __ movl(V0, esp); + __ andptr(V0, -16, V0); // must be 16 byte boundary (see amd64 ABI) + __ cmpptr(V0, esp); + __ jcc(Assembler::equal, L); + __ stop("improperly aligned stack"); + __ bind(L); + } +#endif*/ /* ASSERT */ + + + // We use r14 as the oop handle for the receiver/klass + // It is callee save so it survives the call to native + + const Register oop_handle_reg = r14;//TODO:check jzy + + // + // We immediately shuffle the arguments so that any vm call we have to + // make from here on out (sync slow path, jvmti, etc.) we will have + // captured the oops from our caller and have a valid oopMap for + // them. + + // ----------------- + // The Grand Shuffle + + // The Java calling convention is either equal (linux) or denser (win64) than the + // c calling convention. However the because of the jni_env argument the c calling + // convention always has at least one more (and two for static) arguments than Java. + // Therefore if we move the args from java -> c backwards then we will never have + // a register->register conflict and we don't have to build a dependency graph + // and figure out how to break any cycles. + // + + // Record esp-based slot for receiver on stack for non-static methods + int receiver_offset = -1; + + // This is a trick. We double the stack slots so we can claim + // the oops in the caller's frame. Since we are sure to have + // more args than the caller doubling is enough to make + // sure we can capture all the incoming oop args from the + // caller. + // + OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); + + // Mark location of rbp (someday) + // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, vmreg(rbp)); + + // Use eax, ebx as temporaries during any memory-memory moves we have to do + // All inbound args are referenced based on rbp and all outbound args via rsp. + + +#ifdef ASSERT + bool reg_destroyed[RegisterImpl::number_of_registers]; + bool freg_destroyed[FloatRegisterImpl::number_of_registers]; + for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) { + reg_destroyed[r] = false; + } + for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) { + freg_destroyed[f] = false; + } + +#endif /* ASSERT */ + + // This may iterate in two different directions depending on the + // kind of native it is. The reason is that for regular JNI natives + // the incoming and outgoing registers are offset upwards and for + // critical natives they are offset down. + GrowableArray arg_order(2 * total_in_args); + + VMRegPair tmp_vmreg; + tmp_vmreg.set2(rbx->as_VMReg()); + + if (!is_critical_native) { + for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) { + arg_order.push(i); + arg_order.push(c_arg); + } + } else { + // Compute a valid move order, using tmp_vmreg to break any cycles + ComputeMoveOrder cmo(total_in_args, in_regs, total_c_args, out_regs, in_sig_bt, arg_order, tmp_vmreg); + } + + int temploc = -1; + for (int ai = 0; ai < arg_order.length(); ai += 2) { + int i = arg_order.at(ai); + int c_arg = arg_order.at(ai + 1); + __ block_comment(err_msg("move %d -> %d", i, c_arg)); + if (c_arg == -1) { + assert(is_critical_native, "should only be required for critical natives"); + // This arg needs to be moved to a temporary + __ movl(tmp_vmreg.first()->as_Register(), in_regs[i].first()->as_Register()); + in_regs[i] = tmp_vmreg; + temploc = i; + continue; + } else if (i == -1) { + assert(is_critical_native, "should only be required for critical natives"); + // Read from the temporary location + assert(temploc != -1, "must be valid"); + i = temploc; + temploc = -1; + } +#ifdef ASSERT + if (in_regs[i].first()->is_Register()) { + assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!"); + } else if (in_regs[i].first()->is_FloatRegister()) { + assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding()], "destroyed reg!"); + } + if (out_regs[c_arg].first()->is_Register()) { + reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; + } else if (out_regs[c_arg].first()->is_FloatRegister()) { + freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true; + } +#endif /* ASSERT */ + switch (in_sig_bt[i]) { + case T_ARRAY: + if (is_critical_native) { + unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]); + c_arg++; +#ifdef ASSERT + if (out_regs[c_arg].first()->is_Register()) { + reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; + } else if (out_regs[c_arg].first()->is_FloatRegister()) { + freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true; + } +#endif + break; + } + case T_OBJECT: + assert(!is_critical_native, "no oop arguments"); + __ object_move(map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg], + ((i == 0) && (!is_static)), + &receiver_offset); + break; + case T_VOID: + break; + + case T_FLOAT: + __ float_move(in_regs[i], out_regs[c_arg]); + break; + + case T_DOUBLE: + assert( i + 1 < total_in_args && + in_sig_bt[i + 1] == T_VOID && + out_sig_bt[c_arg+1] == T_VOID, "bad arg list"); + __ double_move(in_regs[i], out_regs[c_arg]); + break; + + case T_LONG : + __ long_move(in_regs[i], out_regs[c_arg]); + break; + + case T_ADDRESS: assert(false, "found T_ADDRESS in java args"); + + default: + __ move32_64(in_regs[i], out_regs[c_arg]); + } + } + + int c_arg; + + // Pre-load a static method's oop into r14. Used both by locking code and + // the normal JNI call code. + if (!is_critical_native) { + // point c_arg at the first arg that is already loaded in case we + // need to spill before we call out + c_arg = total_c_args - total_in_args; + + if (method->is_static()) { + + // load oop into a register + int oop_index = __ oop_recorder()->find_index(JNIHandles::make_local((method->method_holder())->java_mirror())); + RelocationHolder rspec = oop_Relocation::spec(oop_index); + __ relocate(rspec); + __ prepare_patch_li48(oop_handle_reg, (long)JNIHandles::make_local((method->method_holder())->java_mirror())); + + // Now handlize the static class mirror it's known not-null. + __ stptr(oop_handle_reg, Address(rsp, klass_offset)); + map->set_oop(VMRegImpl::stack2reg(klass_slot_offset)); + + // Now get the handle + __ lea(oop_handle_reg, Address(rsp, klass_offset)); + // store the klass handle as second argument + __ movl(c_rarg1, oop_handle_reg); + // and protect the arg if we must spill + c_arg--; + } + } else { + // For JNI critical methods we need to save all registers in save_args. + c_arg = 0; + } + + // Change state to native (we save the return address in the thread, since it might not + // be pushed on the stack when we do a a stack traversal). It is enough that the pc() + // points into the right code segment. It does not have to be the correct return pc. + // We use the same pc/oopMap repeatedly when we call out + + //intptr_t the_pc = (intptr_t) __ pc(); + //oop_maps->add_gc_map(the_pc - start, map); + Label native_return; + __ set_last_Java_frame(rsp, noreg, native_return, rscratch3); + //__ set_last_Java_frame(rsp, noreg, (address)the_pc, rscratch3); + // We have all of the arguments setup at this point. We must not touch any register + // argument registers at this point (what if we save/restore them there are no oop? + + { + SkipIfEqual skip(masm, &DTraceMethodProbes, false); + // protect the args we've loaded + save_args(masm, total_c_args, c_arg, out_regs); + __ mov_metadata(c_rarg1, method()); + __ call_VM_leaf( + CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), + rthread, c_rarg1); + restore_args(masm, total_c_args, c_arg, out_regs); + } + + // RedefineClasses() tracing support for obsolete method entry + if (log_is_enabled(Trace, redefine, class, obsolete)) { + // protect the args we've loaded + save_args(masm, total_c_args, c_arg, out_regs); + __ mov_metadata(c_rarg1, method()); + __ call_VM_leaf( + CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), + rthread, c_rarg1); + restore_args(masm, total_c_args, c_arg, out_regs); + } + + // Lock a synchronized method + + // Register definitions used by locking and unlocking + + const Register swap_reg = V0; // Must use rax for cmpxchg instruction ?jzy + const Register obj_reg = rmethod; // Will contain the oop + const Register lock_reg = rbcp; // Address of compiler lock object (BasicLock) + const Register old_hdr = rbcp; // value of old header at unlock time + + Label slow_path_lock; + Label lock_done; + + if (method->is_synchronized()) { + assert(!is_critical_native, "unhandled"); + + + const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes(); + + // Get the handle (the 2nd argument) + __ movl(oop_handle_reg, c_rarg1); + + // Get address of the box + + __ lea(lock_reg, Address(esp, lock_slot_offset * VMRegImpl::stack_slot_size)); + + // Load the oop from the handle + __ ldptr(obj_reg, Address(oop_handle_reg, 0)); + + if (UseBiasedLocking) { + __ biased_locking_enter(lock_reg, obj_reg, swap_reg, rscratch1, false, lock_done, &slow_path_lock); + } + + // Load immediate 1 into swap_reg %rax + __ movw(swap_reg, 1); + + // Load (object->mark() | 1) into swap_reg %rax + __ ldptr(rscratch3, Address(obj_reg, oopDesc::mark_offset_in_bytes())); + __ orptr(swap_reg, rscratch3, swap_reg); + + // Save (object->mark() | 1) into BasicLock's displaced header + __ stptr(swap_reg, Address(lock_reg, mark_word_offset)); + + __ memb(); + + // Address -> lock_reg if lock_reg == swap_reg else swap_reg = lock_reg + __ cmpxchg(lock_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()), swap_reg); + __ jcc(Assembler::success, lock_done); + + // Hmm should this move to the slow path code area??? + + // Test if the oopMark is an obvious stack pointer, i.e., + // 1) (mark & 3) == 0, and + // 2) rsp <= mark < mark + os::pagesize() + // These 3 tests can be done by evaluating the following + // expression: ((mark - rsp) & (3 - os::vm_page_size())), + // assuming both stack pointer and pagesize have their + // least significant 2 bits clear. + // NOTE: the oopMark is in swap_reg %rax as the result of cmpxchg + //TODO:here is similar to interpreter ? jzy + __ subptr(swap_reg, esp, swap_reg); + __ andptr(swap_reg, 3 - os::vm_page_size(), swap_reg); + + // Save the test result, for recursive case, the result is zero + __ stptr(swap_reg, Address(lock_reg, mark_word_offset)); + __ jcc(Assembler::notEqual, slow_path_lock, swap_reg); + + // Slow path will re-enter here + + __ bind(lock_done); + } + + // Finally just about ready to make the JNI call + + // get JNIEnv* which is first argument to native + if (!is_critical_native) { + __ lea(c_rarg0, Address(rthread, in_bytes(JavaThread::jni_environment_offset()))); + + // Now set thread in native + __ stw(_thread_in_native, Address(rthread, JavaThread::thread_state_offset())); + __ memb(); + } + + __ call(RuntimeAddress(native_func), &native_return); + // Verify or restore cpu control state after JNI call + //__ restore_cpu_control_state_after_jni(); //sw need this? jzy + + //intptr_t return_pc = (intptr_t) __ pc(); + oop_maps->add_gc_map( __ offset(native_return, (address)start), map); + + // Unpack native results. + switch (ret_type) { + case T_BOOLEAN: __ c2bool(V0); break; + case T_CHAR : __ zapnot(V0, 0x3, V0); break; + case T_BYTE : __ sign_extend_byte (V0); break; + case T_SHORT : __ sign_extend_short(V0); break; + case T_INT : /* nothing to do */ break; + case T_DOUBLE : + case T_FLOAT : + // Result is in xmm0 we'll save as needed + break; + case T_ARRAY: // Really a handle + case T_OBJECT: // Really a handle + break; // can't de-handlize until after safepoint check + case T_VOID: break; + case T_LONG: break; + default : ShouldNotReachHere(); + } + + + Label after_transition; + // If this is a critical native, check for a safepoint or suspend request after the call. + // If a safepoint is needed, transition to native, then to native_trans to handle + // safepoints like the native methods that are not critical natives. + if (is_critical_native) { + Label needs_safepoint; + __ safepoint_poll(needs_safepoint, rthread, rscratch3, false /* at_return */, true /* acquire */, false /* in_nmethod */); + __ cmpw(Address(rthread, JavaThread::suspend_flags_offset()), 0); + __ jcc(Assembler::equal, after_transition); + __ bind(needs_safepoint); + } + + // Switch thread to "native transition" state before reading the synchronization state. + // This additional state is necessary because reading and testing the synchronization + // state is not atomic w.r.t. GC, as this scenario demonstrates: + // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted. + // VM thread changes sync state to synchronizing and suspends threads for GC. + // Thread A is resumed to finish this native method, but doesn't block here since it + // didn't see any synchronization is progress, and escapes. + __ stw(_thread_in_native_trans, Address(rthread, JavaThread::thread_state_offset())); + + // Force this write out before the read below + __ memb(); + + // check for safepoint operation in progress and/or pending suspend requests + { + Label Continue; + Label slow_path; + + __ safepoint_poll(slow_path, rthread, rscratch3, true /* at_return */, true /* acquire */, false /* in_nmethod */); + + __ cmpw(Address(rthread, JavaThread::suspend_flags_offset()), R0); + __ jcc(Assembler::equal, Continue); + __ bind(slow_path); + + // Don't use call_VM as it will see a possible pending exception and forward it + // and never return here preventing us from clearing _last_native_pc down below. + // Also can't use call_VM_leaf either as it will check to see if rsi & rdi are + // preserved and correspond to the bcp/locals pointers. So we do a runtime call + // by hand. + // + //__ vzeroupper(); + Register r12 = rheapbase; + + save_native_result(masm, ret_type, stack_slots); + __ movl(c_rarg0, rthread); + __ movl(r12, esp); // remember sp + __ subptr(esp, frame::arg_reg_save_area_bytes, esp); // windows + __ andptr(esp, -16, esp); // align stack as required by ABI + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans))); + __ movl(esp, r12); // restore sp + __ reinit_heapbase(); + // Restore any method result value + restore_native_result(masm, ret_type, stack_slots); + + __ bind(Continue); + } + + // change thread state + __ memb(); + __ stw(_thread_in_Java, Address(rthread, JavaThread::thread_state_offset())); + __ bind(after_transition); + + Label reguard; + Label reguard_done; + __ cmpw(Address(rthread, JavaThread::stack_guard_state_offset()), StackOverflow::stack_guard_yellow_reserved_disabled); + __ jcc(Assembler::equal, reguard); + __ bind(reguard_done); + + // native result if any is live + Register rax = V0; + Register rcx = rscratch3; + Register r12 = rheapbase; + + // Unlock + Label unlock_done; + Label slow_path_unlock; + if (method->is_synchronized()) { + + // Get locked oop from the handle we passed to jni + __ ldptr(obj_reg, Address(oop_handle_reg, 0)); + + Label done; + + if (UseBiasedLocking) { + __ biased_locking_exit(obj_reg, old_hdr, done); + } + + // Simple recursive lock? + + __ cmpptr(Address(esp, lock_slot_offset * VMRegImpl::stack_slot_size), R0); + __ jcc(Assembler::equal, done); + + // Must save rax if if it is live now because cmpxchg must use it + if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { + save_native_result(masm, ret_type, stack_slots); + } + + + // get address of the stack lock + __ lea(rax, Address(esp, lock_slot_offset * VMRegImpl::stack_slot_size)); + // get old displaced header + __ ldptr(old_hdr, Address(rax, 0)); + + // Atomic swap old header if oop still contains the stack lock + __ lock(); + + __ cmpxchg(old_hdr, Address(obj_reg, oopDesc::mark_offset_in_bytes()), rax); + __ jcc(Assembler::failed, slow_path_unlock); + + // slow path re-enters here + __ bind(unlock_done); + if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { + restore_native_result(masm, ret_type, stack_slots); + } + + __ bind(done); + + } + { + SkipIfEqual skip(masm, &DTraceMethodProbes, false); + save_native_result(masm, ret_type, stack_slots); + __ mov_metadata(c_rarg1, method()); + __ call_VM_leaf( + CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), + rthread, c_rarg1); + restore_native_result(masm, ret_type, stack_slots); + } + + __ reset_last_Java_frame(false); + + // Unbox oop result, e.g. JNIHandles::resolve value. + if (is_reference_type(ret_type)) { + __ resolve_jobject(rax /* value */, + rthread /* thread */, + rcx /* tmp */); + } + + if (CheckJNICalls) { + // clear_pending_jni_exception_check + __ stptr(R0, Address(rthread, JavaThread::pending_jni_exception_check_fn_offset())); + } + + if (!is_critical_native) { + // reset handle block + __ ldptr(rcx, Address(rthread, JavaThread::active_handles_offset())); + __ stw(R0, Address(rcx, JNIHandleBlock::top_offset_in_bytes())); + } + + // pop our frame + + __ leave(); + + if (!is_critical_native) { + // Any exception pending? + __ cmpptr(Address(rthread, in_bytes(Thread::pending_exception_offset())), R0); + __ jcc(Assembler::notEqual, exception_pending); + } + + // Return + + __ ret(); + + // Unexpected paths are out of line and go here + + if (!is_critical_native) { + // forward the exception + __ bind(exception_pending); + + // and forward the exception + __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); + } + + // Slow path locking & unlocking + if (method->is_synchronized()) { + + // BEGIN Slow path lock + __ bind(slow_path_lock); + + // has last_Java_frame setup. No exceptions so do vanilla call not call_VM + // args are (oop obj, BasicLock* lock, JavaThread* thread) + + // protect the args we've loaded + save_args(masm, total_c_args, c_arg, out_regs); + + __ movl(c_rarg0, obj_reg); + __ movl(c_rarg1, lock_reg); + __ movl(c_rarg2, rthread); + + // Not a leaf but we have last_Java_frame setup as we want + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), 3); + restore_args(masm, total_c_args, c_arg, out_regs); + +#ifdef ASSERT + { Label L; + __ cmpptr(Address(rthread, in_bytes(Thread::pending_exception_offset())), R0); + __ jcc(Assembler::equal, L); + __ stop("no pending exception allowed on exit from monitorenter"); + __ bind(L); + } +#endif + __ jmp(lock_done); + + // END Slow path lock + + // BEGIN Slow path unlock + __ bind(slow_path_unlock); + + // If we haven't already saved the native result we must save it now as xmm registers + // are still exposed. + //__ vzeroupper(); + if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { + save_native_result(masm, ret_type, stack_slots); + } + + __ lea(c_rarg1, Address(esp, lock_slot_offset * VMRegImpl::stack_slot_size)); + Register r12 = rheapbase; + + __ movl(c_rarg0, obj_reg); + __ movl(c_rarg2, rthread); + __ movl(r12, esp); // remember sp + __ subptr(esp, frame::arg_reg_save_area_bytes, esp); // windows + __ andptr(esp, -16, esp); // align stack as required by ABI + + // Save pending exception around call to VM (which contains an EXCEPTION_MARK) + // NOTE that obj_reg == rbx currently + __ ldptr(rbx, Address(rthread, in_bytes(Thread::pending_exception_offset()))); + __ stptr(R0, Address(rthread, in_bytes(Thread::pending_exception_offset()))); + + // args are (oop obj, BasicLock* lock, JavaThread* thread) + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C))); + __ movl(esp, r12); // restore sp + __ reinit_heapbase(); +#ifdef ASSERT + { + Label L; + __ cmpptr(Address(rthread, in_bytes(Thread::pending_exception_offset())), R0); + __ jcc(Assembler::equal, L); + __ stop("no pending exception allowed on exit complete_monitor_unlocking_C"); + __ bind(L); + } +#endif /* ASSERT */ + + __ stptr(rbx, Address(rthread, in_bytes(Thread::pending_exception_offset()))); + + if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { + restore_native_result(masm, ret_type, stack_slots); + } + __ jmp(unlock_done); + + // END Slow path unlock + + } // synchronized + + // SLOW PATH Reguard the stack if needed + + __ bind(reguard); + //__ vzeroupper(); + save_native_result(masm, ret_type, stack_slots); + __ movl(r12, esp); // remember sp + __ subptr(esp, frame::arg_reg_save_area_bytes, esp); // windows + __ andptr(esp, -16, esp); // align stack as required by ABI + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages))); + __ movl(esp, r12); // restore sp + __ reinit_heapbase(); + restore_native_result(masm, ret_type, stack_slots); + // and continue + __ jmp(reguard_done); + + __ flush(); + + nmethod *nm = nmethod::new_native_nmethod(method, + compile_id, + masm->code(), + vep_offset, + frame_complete, + stack_slots / VMRegImpl::slots_per_word, + (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)), + in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size), + oop_maps); + + return nm; +} + +// this function returns the adjust size (in number of words) to a c2i adapter +// activation for use during deoptimization +int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals ) { + return (callee_locals - callee_parameters) * Interpreter::stackElementWords; +} + + +uint SharedRuntime::out_preserve_stack_slots() { + return 0; +} + + +// Number of stack slots between incoming argument block and the start of +// a new frame. The PROLOG must add this many slots to the stack. The +// EPILOG must remove this many slots. +uint SharedRuntime::in_preserve_stack_slots() { + return 4; +} + + +//------------------------------generate_deopt_blob---------------------------- +void SharedRuntime::generate_deopt_blob() { + // Allocate space for the code + ResourceMark rm; + // Setup code generation tools + int pad = 0; +#if INCLUDE_JVMCI + if (EnableJVMCI) { + pad += 512; // Increase the buffer size when compiling for JVMCI + } +#endif + CodeBuffer buffer("deopt_blob", 2048*2+pad, 1024); + MacroAssembler* masm = new MacroAssembler(&buffer); + int frame_size_in_words; + OopMap* map = NULL; + OopMapSet *oop_maps = new OopMapSet(); + + // ------------- + // This code enters when returning to a de-optimized nmethod. A return + // address has been pushed on the the stack, and return values are in + // registers. + // If we are doing a normal deopt then we were called from the patched + // nmethod from the point we returned to the nmethod. So the return + // address on the stack is wrong by NativeCall::instruction_size + // We will adjust the value so it looks like we have the original return + // address on the stack (like when we eagerly deoptimized). + // In the case of an exception pending when deoptimizing, we enter + // with a return address on the stack that points after the call we patched + // into the exception handler. We have the following register state from, + // e.g., the forward exception stub (see stubGenerator_x86_64.cpp). + // rax: exception oop + // rbx: exception handler + // rdx: throwing pc + // So in this case we simply jam rdx into the useless return address and + // the stack looks just like we want. + // + // At this point we need to de-opt. We save the argument return + // registers. We call the first C routine, fetch_unroll_info(). This + // routine captures the return values and returns a structure which + // describes the current frame size and the sizes of all replacement frames. + // The current frame is compiled code and may contain many inlined + // functions, each with their own JVM state. We pop the current frame, then + // push all the new frames. Then we call the C routine unpack_frames() to + // populate these frames. Finally unpack_frames() returns us the new target + // address. Notice that callee-save registers are BLOWN here; they have + // already been captured in the vframeArray at the time the return PC was + // patched. + address start = __ pc(); + Label cont; + //__ stop("check generate_deopt_blob @jzy"); + // Prolog for non exception case! + //__ subptr(RA, NativeCall::return_address_offset, RA); //TODO:need this? jzy + + // Save everything in sight. + map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words); + Register r14 = rlocals; //should be callee saved jzy + //Register rax = V0; + //Register rdi = A0; + //Register rsi = A1; + //Register rdx = A2;//?is OK? jzy + //Register rcx = A3; + //Register rbx = rmethod; + //__ stop("generate_deopt_blob"); + // Normal deoptimization. Save exec mode for unpack_frames. + __ mov_immediate32(r14, Deoptimization::Unpack_deopt); // callee-saved why r14? jzy + __ jmp(cont); + + int reexecute_offset = __ pc() - start; +#if INCLUDE_JVMCI && !defined(COMPILER1) + if (EnableJVMCI && UseJVMCICompiler) { + // JVMCI does not use this kind of deoptimization + __ should_not_reach_here(); + } +#endif + + // Reexecute case + // return address is the pc describes what bci to do re-execute at + + // No need to update map as each call to save_live_registers will produce identical oopmap + (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words); + + __ mov_immediate32(r14, Deoptimization::Unpack_reexecute); // callee-saved + __ jmp(cont); + +#if INCLUDE_JVMCI + Label after_fetch_unroll_info_call; + int implicit_exception_uncommon_trap_offset = 0; + int uncommon_trap_offset = 0; + + if (EnableJVMCI) { + implicit_exception_uncommon_trap_offset = __ pc() - start; + + __ pushptr(Address(rthread, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset()))); + __ stptr(R0, Address(rthread, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset()))); + + uncommon_trap_offset = __ pc() - start; + + // Save everything in sight. + RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words); + // fetch_unroll_info needs to call last_java_frame() + label retaddr; + __ set_last_Java_frame(esp, noreg, retaddr, rscratch3, rscratch2_AT); + + __ ldws(c_rarg1, Address(rthread, in_bytes(JavaThread::pending_deoptimization_offset()))); + __ mov_immediate32(rscratch3, -1); + __ stw(rscratch3, Address(rthread, in_bytes(JavaThread::pending_deoptimization_offset()))); + + __ mov_immediate32(r14, (int32_t)Deoptimization::Unpack_reexecute); + __ movl(c_rarg0, rthread); + __ movl(c_rarg2, r14); // exec mode + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap)), &retaddr); + oop_maps->add_gc_map( __ offset(retaddr, start), map->deep_copy()); + + __ reset_last_Java_frame(false); + + __ jmp(after_fetch_unroll_info_call); + } // EnableJVMCI +#endif // INCLUDE_JVMCI + + int exception_offset = __ pc() - start; + + // Prolog for exception case + + // all registers are dead at this entry point, except for rax, and + // rdx which contain the exception oop and exception pc + // respectively. Set them in TLS and fall thru to the + // unpack_with_exception_in_tls entry point. + //__ stop("here should check:which is rax & rdx in sw?"); + __ stptr(rdx, Address(rthread, JavaThread::exception_pc_offset())); + __ stptr(rax, Address(rthread, JavaThread::exception_oop_offset())); + + int exception_in_tls_offset = __ pc() - start; + + // new implementation because exception oop is now passed in JavaThread + + // Prolog for exception case + // All registers must be preserved because they might be used by LinearScan + // Exceptiop oop and throwing PC are passed in JavaThread + // tos: stack at point of call to method that threw the exception (i.e. only + // args are on the stack, no return address) + + // make room on stack for the return address + // It will be patched later with the throwing pc. The correct value is not + // available now because loading it from memory would destroy registers. + //__ push(0); //TODO:check return address? jzy + //__ stop("generate_deopt_blob:exception here need check: jzy"); + // Save everything in sight. + map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words); + + // Now it is safe to overwrite any register + + // Deopt during an exception. Save exec mode for unpack_frames. + __ mov_immediate32u(r14, Deoptimization::Unpack_exception); // callee-saved + + // load throwing pc from JavaThread and patch it as the return address + // of the current frame. Then clear the field in JavaThread + + __ ldptr(rdx, Address(rthread, JavaThread::exception_pc_offset())); + __ stptr(rdx, Address(rfp, wordSize)); + __ stptr(R0, Address(rthread, JavaThread::exception_pc_offset())); + +#ifdef ASSERT + // verify that there is really an exception oop in JavaThread + __ ldptr(rax, Address(rthread, JavaThread::exception_oop_offset())); + __ verify_oop(rax); + + // verify that there is no pending exception + Label no_pending_exception; + __ ldptr(rax, Address(rthread, Thread::pending_exception_offset())); + __ jcc(Assembler::zero, no_pending_exception, rax); + __ stop("must not have pending exception here"); + __ bind(no_pending_exception); +#endif + + __ bind(cont); + + // Call C code. Need thread and this frame, but NOT official VM entry + // crud. We cannot block on this call, no GC can happen. + // + // UnrollBlock* fetch_unroll_info(JavaThread* thread) + + // fetch_unroll_info needs to call last_java_frame(). + //__ stop("TODO:check how set pc? jzy"); + Label retaddr; + __ set_last_Java_frame(esp, noreg, retaddr, rscratch3, rscratch2_AT); +#ifdef ASSERT + { Label L; + __ cmpptr(Address(rthread, + JavaThread::last_Java_fp_offset()), + R0); + __ jcc(Assembler::equal, L); + __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared"); + __ bind(L); + } +#endif // ASSERT + __ movl(c_rarg0, rthread); + __ movl(c_rarg1, r14); // exec_mode + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info)), &retaddr); + + // Need to have an oopmap that tells fetch_unroll_info where to + // find any register it might need. + oop_maps->add_gc_map(__ offset(retaddr, start), map); + + __ reset_last_Java_frame(false); + +#if INCLUDE_JVMCI + if (EnableJVMCI) { + __ bind(after_fetch_unroll_info_call); + } +#endif + + // Load UnrollBlock* into rdi + __ movl(rdi, rax); + + __ ldws(r14, Address(rdi, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes())); + Label noException; + __ cmpw(r14, Deoptimization::Unpack_exception); // Was exception pending? + __ jcc(Assembler::notEqual, noException); + __ ldptr(rax, Address(rthread, JavaThread::exception_oop_offset())); + // QQQ this is useless it was NULL above + __ ldptr(rdx, Address(rthread, JavaThread::exception_pc_offset())); + __ stptr(R0, Address(rthread, JavaThread::exception_oop_offset())); + __ stptr(R0, Address(rthread, JavaThread::exception_pc_offset())); + + __ verify_oop(rax); + + // Overwrite the result registers with the exception results. + __ stptr(rax, Address(esp, RegisterSaver::v0_offset_in_bytes())); + // I think this is useless + assert(rdx == A2, "rdx not a2 register"); + __ stptr(rdx, Address(esp, RegisterSaver::a2_offset_in_bytes())); + + __ bind(noException); + + // Only register save data is on the stack. + // Now restore the result registers. Everything else is either dead + // or captured in the vframeArray. + RegisterSaver::restore_result_registers(masm); + + // All of the register save area has been popped of the stack. Only the + // return address remains. + + // Pop all the frames we must move/replace. + // + // Frame picture (youngest to oldest) + // 1: self-frame (no frame link) + // 2: deopting frame (no frame link) + // 3: caller of deopting frame (could be compiled/interpreted). + // + // Note: by leaving the return address of self-frame on the stack + // and using the size of frame 2 to adjust the stack + // when we are done the return to frame 3 will still be on the stack. + + // Pop deoptimized frame + __ ldws(rcx, Address(rdi, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes())); + __ addptr(esp, rcx, esp); + //__ ldl(RA, - 1 * wordSize, esp); + //__ ldl(rfp, - 2 * wordSize, esp); + + // rsp should be pointing at the return address to the caller (3) + + // Pick up the initial fp we should save + // restore rbp before stack bang because if stack overflow is thrown it needs to be pushed (and preserved) + __ ldptr(rfp, Address(rdi, Deoptimization::UnrollBlock::initial_info_offset_in_bytes())); + +/*#ifdef ASSERT + { + Label L; + __ movl(rscratch3, rfp); + __ ldptr(rfp, Address(rdi, Deoptimization::UnrollBlock::initial_info_offset_in_bytes())); + __ cmpl(rscratch3, rfp); + __ jcc(Assembler::zero, L); + __ stop("fp not equal @jzy"); + __ bind(L); + } +#endif*/ + +#ifdef ASSERT + // Compilers generate code that bang the stack by as much as the + // interpreter would need. So this stack banging should never + // trigger a fault. Verify that it does not on non product builds. + __ ldws(rbx, Address(rdi, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes())); + __ bang_stack_size(rbx, rcx); +#endif + + // Load address of array of frame pcs into rcx + __ ldptr(rcx, Address(rdi, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes())); + + // Trash the old pc + __ addptr(esp, wordSize, esp); + + // Load address of array of frame sizes into rsi + __ ldptr(rsi, Address(rdi, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes())); + + // Load counter into rdx + __ ldws(rdx, Address(rdi, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes())); + + // Now adjust the caller's stack to make up for the extra locals + // but record the original sp so that we can save it in the skeletal interpreter + // frame and the stack walking of interpreter_sender will get the unextended sp + // value and not the "real" sp value. + + const Register sender_sp = rscratch3;//? jzy + //__ stop("which register can we use?"); + __ movl(sender_sp, esp); + __ ldws(rbx, Address(rdi, + Deoptimization::UnrollBlock:: + caller_adjustment_offset_in_bytes())); + __ subptr(esp, rbx, esp); + + // Push interpreter frames in a loop + Label loop; + __ bind(loop); + __ ldptr(rbx, Address(rsi, 0)); // Load frame size + __ subptr(rbx, 2*wordSize, rbx); // We'll push pc and ebp by hand + __ ldptr(RA, Address(rcx, 0)); // Save return address + __ enter(); // Save old & set new ebp + __ subptr(esp, rbx, esp); // Prolog + // This value is corrected by layout_activation_impl + __ stptr(R0, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize)); + __ stptr(sender_sp, Address(rfp, frame::interpreter_frame_sender_sp_offset * wordSize)); // Make it walkable + __ movl(sender_sp, esp); // Pass sender_sp to next frame + __ addptr(rsi, wordSize, rsi); // Bump array pointer (sizes) + __ addptr(rcx, wordSize, rcx); // Bump array pointer (pcs) + __ decrementl(rdx); // Decrement counter + __ jcc(Assembler::notZero, loop, rdx); + __ ldptr(RA, Address(rcx, 0)); // Save final return address + + // Re-push self-frame + __ enter(); // Save old & set new ebp + + // Allocate a full sized register save area. + // Return address and rbp are in place, so we allocate two less words. + __ subptr(esp, (frame_size_in_words - 2) * wordSize, esp); + + // Restore frame locals after moving the frame + __ store_double(FSF, Address(esp, RegisterSaver::fsf_offset_in_bytes())); + __ stptr(rax, Address(esp, RegisterSaver::v0_offset_in_bytes())); + + // Call C code. Need thread but NOT official VM entry + // crud. We cannot block on this call, no GC can happen. Call should + // restore return values to their stack-slots with the new SP. + // + // void Deoptimization::unpack_frames(JavaThread* thread, int exec_mode) + + // Use rbp because the frames look interpreted now + // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP. + // Don't need the precise return PC here, just precise enough to point into this code blob. + address the_pc = __ pc(); + __ set_last_Java_frame(esp, rfp, the_pc, rscratch3); + + //__ andptr(esp, -(StackAlignmentInBytes), esp); // Fix stack alignment as required by ABI + __ movl(c_rarg0, rthread); + __ movl(c_rarg1, r14); // second arg: exec_mode r14 should be callee saved in sw jzy + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames))); + // Revert SP alignment after call since we're going to do some SP relative addressing below + __ ldptr(esp, Address(rthread, JavaThread::last_Java_sp_offset())); + + // Set an oopmap for the call site + // Use the same PC we used for the last java frame TODO:need modify add_gc_map's offset? + oop_maps->add_gc_map(the_pc - start, + new OopMap( frame_size_in_words, 0 )); + + // Clear fp AND pc + __ reset_last_Java_frame(true); + + // Collect return values + __ load_double(FSF, Address(esp, RegisterSaver::fsf_offset_in_bytes())); + __ ldptr(rax, Address(esp, RegisterSaver::v0_offset_in_bytes())); + // I think this is useless (throwing pc?) + __ ldptr(rdx, Address(esp, RegisterSaver::a2_offset_in_bytes())); + + // Pop self-frame. + __ leave(); // Epilog + + // Jump to interpreter + __ ret(); + + // Make sure all code is generated + masm->flush(); + //__ stop("DeoptimizationBlob::create(unimplement): jzy"); + _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words); + _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset); +#if INCLUDE_JVMCI + if (EnableJVMCI) { + _deopt_blob->set_uncommon_trap_offset(uncommon_trap_offset); + _deopt_blob->set_implicit_exception_uncommon_trap_offset(implicit_exception_uncommon_trap_offset); + } +#endif +} + +#ifdef COMPILER2 +//------------------------------generate_uncommon_trap_blob-------------------- +void SharedRuntime::generate_uncommon_trap_blob() { + // Allocate space for the code + ResourceMark rm; + // Setup code generation tools + CodeBuffer buffer("uncommon_trap_blob", 2048, 1024); + MacroAssembler* masm = new MacroAssembler(&buffer); + + assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned"); + + address start = __ pc(); + //Register rax = V0; + //Register rbx = rmethod; + //Register rdi = c_rarg0; + //Register rsi = c_rarg1; + //Register rcx = c_rarg3; + //Register rdx = rscratch2_AT; + //Register rbp = rfp;//lsp?? + //__ stop("generate_uncommon_trap_blob"); + + // Push self-frame. We get here with a return address in RA + __ enter(); + // we don't expect an arg reg save area +#ifndef PRODUCT + assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area"); +#endif + + // compiler left unloaded_class_index in j_rarg0 move to where the + // runtime expects it. + __ movws(c_rarg1, j_rarg0); + + Label retaddr; + __ set_last_Java_frame(esp, noreg, retaddr, rscratch3); + + // Call C code. Need thread but NOT official VM entry + // crud. We cannot block on this call, no GC can happen. Call should + // capture callee-saved registers as well as return values. + // Thread is in rdi already. + // + // UnrollBlock* uncommon_trap(JavaThread* thread, jint unloaded_class_index); + + __ movl(c_rarg0, rthread); + __ mov_immediate32s(c_rarg2, Deoptimization::Unpack_uncommon_trap); + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap)), &retaddr); + + // Set an oopmap for the call site + OopMapSet* oop_maps = new OopMapSet(); + OopMap* map = new OopMap(SimpleRuntimeFrame::framesize, 0); + + // location of rbp is known implicitly by the frame sender code + + oop_maps->add_gc_map(__ offset(retaddr, start), map);//TODO:check jzy + + __ reset_last_Java_frame(false); + + // Load UnrollBlock* into rdi + __ movl(rdi, rax); + +#ifdef ASSERT + { Label L; + __ mov_immediate32(rscratch3, (int32_t)Deoptimization::Unpack_uncommon_trap); + __ cmpptr(Address(rdi, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()), rscratch3); + __ jcc(Assembler::equal, L); + __ stop("SharedRuntime::generate_deopt_blob: expected Unpack_uncommon_trap"); + __ bind(L); + } +#endif + + // Pop all the frames we must move/replace. + // + // Frame picture (youngest to oldest) + // 1: self-frame (no frame link) + // 2: deopting frame (no frame link) + // 3: caller of deopting frame (could be compiled/interpreted). + + // Pop self-frame. We have no frame, and must rely only on rax and rsp. + __ addptr(esp, (SimpleRuntimeFrame::framesize - 2) << LogBytesPerInt, esp); // Epilog! + + // Pop deoptimized frame (int) + __ ldwu(rcx, Address(rdi, + Deoptimization::UnrollBlock:: + size_of_deoptimized_frame_offset_in_bytes())); + __ addptr(esp, rcx, esp); + + // rsp should be pointing at the return address to the caller (3) + + // Pick up the initial fp we should save + // restore rbp before stack bang because if stack overflow is thrown it needs to be pushed (and preserved) + __ ldptr(rfp, Address(rdi, Deoptimization::UnrollBlock::initial_info_offset_in_bytes())); + +#ifdef ASSERT + // Compilers generate code that bang the stack by as much as the + // interpreter would need. So this stack banging should never + // trigger a fault. Verify that it does not on non product builds. + __ ldws(rbx, Address(rdi ,Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes())); + __ bang_stack_size(rbx, rcx); +#endif + + // Load address of array of frame pcs into rcx (address*) + __ ldptr(rcx, Address(rdi, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes())); + + // Trash the return pc + __ addptr(esp, wordSize, esp); + + // Load address of array of frame sizes into rsi (intptr_t*) + __ ldptr(rsi, Address(rdi, Deoptimization::UnrollBlock:: frame_sizes_offset_in_bytes())); + + // Counter + __ ldws(rdx, Address(rdi, Deoptimization::UnrollBlock:: number_of_frames_offset_in_bytes())); // (int) + + // Now adjust the caller's stack to make up for the extra locals but + // record the original sp so that we can save it in the skeletal + // interpreter frame and the stack walking of interpreter_sender + // will get the unextended sp value and not the "real" sp value. + + const Register sender_sp = rsender; + __ movl(sender_sp, esp); + __ ldws(rbx, Address(rdi, Deoptimization::UnrollBlock:: caller_adjustment_offset_in_bytes())); // (int) + __ subptr(esp, rbx, esp); + + // Push interpreter frames in a loop + Label loop; + __ bind(loop); + __ ldptr(rbx, Address(rsi, 0)); // Load frame size + __ subptr(rbx, 2 * wordSize, rbx); // We'll push pc and rbp by hand + __ ldptr(RA, Address(rcx, 0)); // Save return address + __ enter(); // Save old & set new rbp + __ subptr(esp, rbx, esp); // Prolog + __ stptr(sender_sp, + Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize)); // Make it walkable + // This value is corrected by layout_activation_impl + __ stptr(R0, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize)); + __ movl(sender_sp, esp); // Pass sender_sp to next frame + __ addptr(rsi, wordSize, rsi); // Bump array pointer (sizes) + __ addptr(rcx, wordSize, rcx); // Bump array pointer (pcs) + __ decrementl(rdx); // Decrement counter + __ jcc(Assembler::notZero, loop, rdx); + __ ldptr(RA, Address(rcx, 0)); // Save final return address + + // Re-push self-frame + __ enter(); // Save old & set new rbp + __ subptr(esp, (SimpleRuntimeFrame::framesize - 4) << LogBytesPerInt, esp, rscratch3); + // Prolog + + // Use rbp because the frames look interpreted now + // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP. + // Don't need the precise return PC here, just precise enough to point into this code blob. + address the_pc = __ pc(); + __ set_last_Java_frame(esp, rfp, the_pc, rscratch3); + + // Call C code. Need thread but NOT official VM entry + // crud. We cannot block on this call, no GC can happen. Call should + // restore return values to their stack-slots with the new SP. + // Thread is in rdi already. + // + // BasicType unpack_frames(JavaThread* thread, int exec_mode); + + //__ andptr(esp, -(StackAlignmentInBytes), esp); // Align SP as required by ABI + __ movl(c_rarg0, rthread); + __ mov_immediate64(c_rarg1, Deoptimization::Unpack_uncommon_trap); + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)));//TODO:here need to modify offset? swjdk8 modifies this offset jzy + + // Set an oopmap for the call site + // Use the same PC we used for the last java frame + oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0)); + + // Clear fp AND pc + __ reset_last_Java_frame(true); + + // Pop self-frame. + __ leave(); // Epilog + + // Jump to interpreter + __ ret(); + + // Make sure all code is generated + masm->flush(); + + _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, oop_maps, + SimpleRuntimeFrame::framesize >> 1); +} +#endif // COMPILER2 + + +//------------------------------generate_handler_blob------ +// +// Generate a special Compile2Runtime blob that saves all registers, +// and setup oopmap. +// +SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) { + assert(StubRoutines::forward_exception_entry() != NULL, + "must be generated before"); + + ResourceMark rm; + OopMapSet *oop_maps = new OopMapSet(); + OopMap* map; + + // Allocate space for the code. Setup code generation tools. + CodeBuffer buffer("handler_blob", 2048, 1024); + MacroAssembler* masm = new MacroAssembler(&buffer); + + address start = __ pc(); + address call_pc = NULL; + int frame_size_in_words; + bool cause_return = (poll_type == POLL_AT_RETURN); + bool save_vectors = (poll_type == POLL_AT_VECTOR_LOOP); + Register rbx = rmethod; + Register rax = V0; + +// if (UseRTMLocking) { +// // Abort RTM transaction before calling runtime +// // because critical section will be large and will be +// // aborted anyway. Also nmethod could be deoptimized. +// __ xabort(0); +// } +//__ stop("generate_handler_blob"); + // Make room for return address (or push it again) + //if (!cause_return) { + //__ push(rbx); + //__ ldptr(RA, Address(rthread, JavaThread::saved_exception_pc_offset()));//TODO:need this? jzy + //} + + // Save registers, fpu state, and flags + map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, save_vectors); + + // The following is basically a call_VM. However, we need the precise + // address of the call in order to generate an oopmap. Hence, we do all the + // work outselves. + Label retaddr; + __ set_last_Java_frame(esp, noreg, retaddr, rscratch3); + + // The return address must always be correct so that frame constructor never + // sees an invalid pc. + + if (!cause_return) { + // Get the return pc saved by the signal handler and stash it in its appropriate place on the stack. + // Additionally, rbx is a callee saved register and we can look at it later to determine + // if someone changed the return address for us! + __ ldptr(rbx, Address(rthread, JavaThread::saved_exception_pc_offset())); + __ stptr(rbx, Address(rfp, wordSize)); + } + + // Do the call + __ movl(c_rarg0, rthread); + __ call(RuntimeAddress(call_ptr), &retaddr); + + // Set an oopmap for the call site. This oopmap will map all + // oop-registers and debug-info registers as callee-saved. This + // will allow deoptimization at this safepoint to find all possible + // debug-info recordings, as well as let GC find all oops. + + oop_maps->add_gc_map( __ offset(retaddr, start), map); + + Label noException; + + __ reset_last_Java_frame(false); + + __ memb(); + __ ldptr(rscratch1, Address(rthread, Thread::pending_exception_offset())); + __ jcc(Assembler::zero, noException, rscratch1); + + // Exception pending + + RegisterSaver::restore_live_registers(masm, save_vectors); + + __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); + + // No exception case + __ bind(noException); + + Label no_adjust; +#ifdef ASSERT + Label bail; +#endif + if (!cause_return) { + Label no_prefix, not_special; + // If our stashed return pc was modified by the runtime we avoid touching it + __ cmpptr(rbx, Address(rfp, wordSize)); + __ jcc(Assembler::notEqual, no_adjust); +#ifdef ASSERT + // Verify the correct encoding of the poll we're about to skip. + __ ldwu(rscratch3, Address(rbx, 0)); + __ srll(rscratch3, 26, rscratch2_AT);//get op + __ cmpl(rscratch2_AT, 0x22); //op_ldw + __ jcc(Assembler::notEqual, bail); + + __ srll(rscratch3, 21, rscratch2_AT);//get ra + __ andw(rscratch2_AT, 0x1F, rscratch2_AT); + __ cmpl(rscratch2_AT, 25);//rscratch3 t11 + __ jcc(Assembler::notEqual, bail); + + __ andw(rscratch3, 0xFFFF, rscratch2_AT); +// __ cmpl(rscratch2_AT, R0);//disp t11 +// __ jcc(Assembler::notEqual, bail); + __ bne_l(rscratch2_AT, bail); + +#endif + // Adjust return pc forward to step over the safepoint poll instruction + __ addptr(rbx, NativeInstruction::instruction_size, rbx); + __ stptr(rbx, Address(rfp, wordSize)); + } + + __ bind(no_adjust); + // Normal exit, restore registers and exit. + RegisterSaver::restore_live_registers(masm, save_vectors); + __ ret(); + +#ifdef ASSERT + __ bind(bail); + __ stop("Attempting to adjust pc to skip safepoint poll but the return point is not what we expected"); +#endif + + // Make sure all code is generated + masm->flush(); + + // Fill-out other meta info + return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words); +} + +// +// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss +// +// Generate a stub that calls into vm to find out the proper destination +// of a java call. All the argument registers are live at this point +// but since this is generic code we don't know what they are and the caller +// must do any gc of the args. +// +RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) { + assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before"); + + // allocate space for the code + ResourceMark rm; + + CodeBuffer buffer(name, 2000, 512); + MacroAssembler* masm = new MacroAssembler(&buffer); + + int frame_size_in_words; + + OopMapSet *oop_maps = new OopMapSet(); + OopMap* map = NULL; + + //int start = __ offset(); + address start_pc = __ pc(); + Register rbx = rmethod; + Register rax = V0; + + map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words); + + int frame_complete = __ offset(); + + Label retaddr; + //__ debug_stop("TODO:how set pc? jzy generate_resolve_blob"); + __ set_last_Java_frame(esp, noreg, retaddr, rscratch3, rscratch2_AT); + + __ movl(c_rarg0, rthread); + + __ call(RuntimeAddress(destination), &retaddr); + + // Set an oopmap for the call site. + // We need this not only for callee-saved registers, but also for volatile + // registers that the compiler might be keeping live across a safepoint. + // sw need setfpec1, so we should -4. + oop_maps->add_gc_map( __ offset(retaddr, start_pc), map); + + // rax contains the address we are going to jump to assuming no exception got installed + + // clear last_Java_sp + __ reset_last_Java_frame(false); + // check for pending exceptions + Label pending; + __ cmpptr(Address(rthread, Thread::pending_exception_offset()), (int32_t)NULL_WORD); + __ jcc(Assembler::notEqual, pending); + + // get the returned Method* + __ get_vm_result_2(rbx, rthread); + __ stptr(rbx, Address(esp, RegisterSaver::rmethod_offset_in_bytes())); + + __ stptr(rax, Address(esp, RegisterSaver::v0_offset_in_bytes())); + + RegisterSaver::restore_live_registers(masm); + + // We are back the the original state on entry and ready to go. + + __ jmp(rax); + + // Pending exception after the safepoint + + __ bind(pending); + + RegisterSaver::restore_live_registers(masm); + + // exception pending => remove activation and forward to exception handler + + __ stptr(R0, Address(rthread, JavaThread::vm_result_offset())); + + __ ldptr(rax, Address(rthread, Thread::pending_exception_offset())); + __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); + + // ------------- + // make sure all code is generated + masm->flush(); + + // return the blob + // frame_size_words or bytes?? + return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_words, oop_maps, true); +} + + +static const int native_invoker_code_size = MethodHandles::adapter_code_size; + +class NativeInvokerGenerator : public StubCodeGenerator { + address _call_target; + int _shadow_space_bytes; + + const GrowableArray& _input_registers; + const GrowableArray& _output_registers; + + int _frame_complete; + int _framesize; + OopMapSet* _oop_maps; +public: + NativeInvokerGenerator(CodeBuffer* buffer, + address call_target, + int shadow_space_bytes, + const GrowableArray& input_registers, + const GrowableArray& output_registers) + : StubCodeGenerator(buffer, PrintMethodHandleStubs), + _call_target(call_target), + _shadow_space_bytes(shadow_space_bytes), + _input_registers(input_registers), + _output_registers(output_registers), + _frame_complete(0), + _framesize(0), + _oop_maps(NULL) { + assert(_output_registers.length() <= 1 + || (_output_registers.length() == 2 && !_output_registers.at(1)->is_valid()), "no multi-reg returns"); + + } + void generate(); + + int spill_size_in_bytes() const { + if (_output_registers.length() == 0) { + return 0; + } + VMReg reg = _output_registers.at(0); + assert(reg->is_reg(), "must be a register"); + if (reg->is_Register()) { + return 8; + } else if (reg->is_FloatRegister()) { + return 16; + } else { + ShouldNotReachHere(); + } + return 0; + } + + void spill_out_registers() { + if (_output_registers.length() == 0) { + return; + } + VMReg reg = _output_registers.at(0); + assert(reg->is_reg(), "must be a register"); + MacroAssembler* masm = _masm; + if (reg->is_Register()) { + __ push(reg->as_Register()); + } else { + ShouldNotReachHere(); + } + } + + void fill_out_registers() { + if (_output_registers.length() == 0) { + return; + } + VMReg reg = _output_registers.at(0); + assert(reg->is_reg(), "must be a register"); + MacroAssembler* masm = _masm; + if (reg->is_Register()) { + __ pop(reg->as_Register()); + } else { + ShouldNotReachHere(); + } + } + + int frame_complete() const { + return _frame_complete; + } + + int framesize() const { + return (_framesize >> (LogBytesPerWord - LogBytesPerInt)); + } + + OopMapSet* oop_maps() const { + return _oop_maps; + } + + +private: +#ifdef ASSERT + bool target_uses_register(VMReg reg) { + return _input_registers.contains(reg) || _output_registers.contains(reg); +} +#endif +}; + +RuntimeStub* SharedRuntime::make_native_invoker(address call_target, + int shadow_space_bytes, + const GrowableArray& input_registers, + const GrowableArray& output_registers) { + int locs_size = 64; + CodeBuffer code("nep_invoker_blob", native_invoker_code_size, locs_size); + NativeInvokerGenerator g(&code, call_target, shadow_space_bytes, input_registers, output_registers); + g.generate(); + code.log_section_sizes("nep_invoker_blob"); + + RuntimeStub* stub = + RuntimeStub::new_runtime_stub("nep_invoker_blob", + &code, + g.frame_complete(), + g.framesize(), + g.oop_maps(), false); + return stub; +} + +void NativeInvokerGenerator::generate() { + + assert(!(target_uses_register(rthread->as_VMReg()) || target_uses_register(rscratch1->as_VMReg())), "Register conflict"); + + enum layout { + rbp_off, + rbp_off2, + return_off, + return_off2, + framesize // inclusive of return address + }; + + _framesize = align_up(framesize + ((_shadow_space_bytes + spill_size_in_bytes()) >> LogBytesPerInt), 4); + assert(is_even(_framesize/2), "sp not 16-byte aligned"); + + _oop_maps = new OopMapSet(); + MacroAssembler* masm = _masm; + + address start = __ pc(); + + __ enter(); + + // return address and rbp are already in place + __ subptr(rsp, (_framesize-4) << LogBytesPerInt, rsp); // prolog + + _frame_complete = __ pc() - start; + + address the_pc = __ pc(); + + __ set_last_Java_frame(rsp, rbp, (address)the_pc, rscratch1); + OopMap* map = new OopMap(_framesize, 0); + _oop_maps->add_gc_map(the_pc - start, map); + + // State transition + __ stw(_thread_in_native, Address(rthread, JavaThread::thread_state_offset())); + + __ call(RuntimeAddress(_call_target)); + + // __ restore_cpu_control_state_after_jni(); + + __ stw(_thread_in_native_trans, Address(rthread, JavaThread::thread_state_offset())); + + // Force this write out before the read below + __ memb(); + + Label L_after_safepoint_poll; + Label L_safepoint_poll_slow_path; + + __ safepoint_poll(L_safepoint_poll_slow_path, rthread, rscratch3, true, true, false); + __ cmpw(Address(rthread, JavaThread::suspend_flags_offset()), 0); + __ jcc(Assembler::notEqual, L_safepoint_poll_slow_path); + + __ bind(L_after_safepoint_poll); + + // change thread state + __ stw(_thread_in_Java, Address(rthread, JavaThread::thread_state_offset())); + + __ block_comment("reguard stack check"); + Label L_reguard; + Label L_after_reguard; + __ cmpw(Address(rthread, JavaThread::stack_guard_state_offset()), StackOverflow::stack_guard_yellow_reserved_disabled); + __ jcc(Assembler::equal, L_reguard); + __ bind(L_after_reguard); + + __ reset_last_Java_frame(rthread, true); + + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(); + + ////////////////////////////////////////////////////////////////////////////// + + __ block_comment("{ L_safepoint_poll_slow_path"); + __ bind(L_safepoint_poll_slow_path); + //__ vzeroupper(); + + spill_out_registers(); + + __ movl(c_rarg0, rthread); + __ movl(r12, rsp); // remember sp + __ subptr(rsp, frame::arg_reg_save_area_bytes, rsp); // windows + __ andptr(rsp, -16, rsp); // align stack as required by ABI + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans))); + __ movl(rsp, r12); // restore sp + __ reinit_heapbase(); + + fill_out_registers(); + + __ jmp(L_after_safepoint_poll); + __ block_comment("} L_safepoint_poll_slow_path"); + + ////////////////////////////////////////////////////////////////////////////// + + __ block_comment("{ L_reguard"); + __ bind(L_reguard); + // __ vzeroupper(); + + spill_out_registers(); + + __ movl(r12, rsp); // remember sp + __ subptr(rsp, frame::arg_reg_save_area_bytes, rsp); // windows + __ andptr(rsp, -16, rsp); // align stack as required by ABI + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages))); + __ movl(rsp, r12); // restore sp + __ reinit_heapbase(); + + fill_out_registers(); + + __ jmp(L_after_reguard); + + __ block_comment("} L_reguard"); + + ////////////////////////////////////////////////////////////////////////////// + + __ flush(); + +} + +/* ================================= CRC32 ================================= */ +/* ========================================================================= */ +juint crc_table[8][256] = + { + { + 0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL, + 0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL, + 0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL, + 0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL, + 0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL, + 0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL, + 0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL, + 0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL, + 0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL, + 0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL, + 0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL, + 0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL, + 0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL, + 0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL, + 0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL, + 0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL, + 0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL, + 0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL, + 0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL, + 0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL, + 0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL, + 0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL, + 0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL, + 0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL, + 0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL, + 0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL, + 0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL, + 0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL, + 0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL, + 0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL, + 0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL, + 0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL, + 0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL, + 0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL, + 0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL, + 0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL, + 0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL, + 0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL, + 0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL, + 0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL, + 0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL, + 0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL, + 0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL, + 0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL, + 0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL, + 0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL, + 0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL, + 0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL, + 0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL, + 0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL, + 0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL, + 0x2d02ef8dUL + }, + { + 0x00000000UL, 0x191b3141UL, 0x32366282UL, 0x2b2d53c3UL, 0x646cc504UL, + 0x7d77f445UL, 0x565aa786UL, 0x4f4196c7UL, 0xc8d98a08UL, 0xd1c2bb49UL, + 0xfaefe88aUL, 0xe3f4d9cbUL, 0xacb54f0cUL, 0xb5ae7e4dUL, 0x9e832d8eUL, + 0x87981ccfUL, 0x4ac21251UL, 0x53d92310UL, 0x78f470d3UL, 0x61ef4192UL, + 0x2eaed755UL, 0x37b5e614UL, 0x1c98b5d7UL, 0x05838496UL, 0x821b9859UL, + 0x9b00a918UL, 0xb02dfadbUL, 0xa936cb9aUL, 0xe6775d5dUL, 0xff6c6c1cUL, + 0xd4413fdfUL, 0xcd5a0e9eUL, 0x958424a2UL, 0x8c9f15e3UL, 0xa7b24620UL, + 0xbea97761UL, 0xf1e8e1a6UL, 0xe8f3d0e7UL, 0xc3de8324UL, 0xdac5b265UL, + 0x5d5daeaaUL, 0x44469febUL, 0x6f6bcc28UL, 0x7670fd69UL, 0x39316baeUL, + 0x202a5aefUL, 0x0b07092cUL, 0x121c386dUL, 0xdf4636f3UL, 0xc65d07b2UL, + 0xed705471UL, 0xf46b6530UL, 0xbb2af3f7UL, 0xa231c2b6UL, 0x891c9175UL, + 0x9007a034UL, 0x179fbcfbUL, 0x0e848dbaUL, 0x25a9de79UL, 0x3cb2ef38UL, + 0x73f379ffUL, 0x6ae848beUL, 0x41c51b7dUL, 0x58de2a3cUL, 0xf0794f05UL, + 0xe9627e44UL, 0xc24f2d87UL, 0xdb541cc6UL, 0x94158a01UL, 0x8d0ebb40UL, + 0xa623e883UL, 0xbf38d9c2UL, 0x38a0c50dUL, 0x21bbf44cUL, 0x0a96a78fUL, + 0x138d96ceUL, 0x5ccc0009UL, 0x45d73148UL, 0x6efa628bUL, 0x77e153caUL, + 0xbabb5d54UL, 0xa3a06c15UL, 0x888d3fd6UL, 0x91960e97UL, 0xded79850UL, + 0xc7cca911UL, 0xece1fad2UL, 0xf5facb93UL, 0x7262d75cUL, 0x6b79e61dUL, + 0x4054b5deUL, 0x594f849fUL, 0x160e1258UL, 0x0f152319UL, 0x243870daUL, + 0x3d23419bUL, 0x65fd6ba7UL, 0x7ce65ae6UL, 0x57cb0925UL, 0x4ed03864UL, + 0x0191aea3UL, 0x188a9fe2UL, 0x33a7cc21UL, 0x2abcfd60UL, 0xad24e1afUL, + 0xb43fd0eeUL, 0x9f12832dUL, 0x8609b26cUL, 0xc94824abUL, 0xd05315eaUL, + 0xfb7e4629UL, 0xe2657768UL, 0x2f3f79f6UL, 0x362448b7UL, 0x1d091b74UL, + 0x04122a35UL, 0x4b53bcf2UL, 0x52488db3UL, 0x7965de70UL, 0x607eef31UL, + 0xe7e6f3feUL, 0xfefdc2bfUL, 0xd5d0917cUL, 0xcccba03dUL, 0x838a36faUL, + 0x9a9107bbUL, 0xb1bc5478UL, 0xa8a76539UL, 0x3b83984bUL, 0x2298a90aUL, + 0x09b5fac9UL, 0x10aecb88UL, 0x5fef5d4fUL, 0x46f46c0eUL, 0x6dd93fcdUL, + 0x74c20e8cUL, 0xf35a1243UL, 0xea412302UL, 0xc16c70c1UL, 0xd8774180UL, + 0x9736d747UL, 0x8e2de606UL, 0xa500b5c5UL, 0xbc1b8484UL, 0x71418a1aUL, + 0x685abb5bUL, 0x4377e898UL, 0x5a6cd9d9UL, 0x152d4f1eUL, 0x0c367e5fUL, + 0x271b2d9cUL, 0x3e001cddUL, 0xb9980012UL, 0xa0833153UL, 0x8bae6290UL, + 0x92b553d1UL, 0xddf4c516UL, 0xc4eff457UL, 0xefc2a794UL, 0xf6d996d5UL, + 0xae07bce9UL, 0xb71c8da8UL, 0x9c31de6bUL, 0x852aef2aUL, 0xca6b79edUL, + 0xd37048acUL, 0xf85d1b6fUL, 0xe1462a2eUL, 0x66de36e1UL, 0x7fc507a0UL, + 0x54e85463UL, 0x4df36522UL, 0x02b2f3e5UL, 0x1ba9c2a4UL, 0x30849167UL, + 0x299fa026UL, 0xe4c5aeb8UL, 0xfdde9ff9UL, 0xd6f3cc3aUL, 0xcfe8fd7bUL, + 0x80a96bbcUL, 0x99b25afdUL, 0xb29f093eUL, 0xab84387fUL, 0x2c1c24b0UL, + 0x350715f1UL, 0x1e2a4632UL, 0x07317773UL, 0x4870e1b4UL, 0x516bd0f5UL, + 0x7a468336UL, 0x635db277UL, 0xcbfad74eUL, 0xd2e1e60fUL, 0xf9ccb5ccUL, + 0xe0d7848dUL, 0xaf96124aUL, 0xb68d230bUL, 0x9da070c8UL, 0x84bb4189UL, + 0x03235d46UL, 0x1a386c07UL, 0x31153fc4UL, 0x280e0e85UL, 0x674f9842UL, + 0x7e54a903UL, 0x5579fac0UL, 0x4c62cb81UL, 0x8138c51fUL, 0x9823f45eUL, + 0xb30ea79dUL, 0xaa1596dcUL, 0xe554001bUL, 0xfc4f315aUL, 0xd7626299UL, + 0xce7953d8UL, 0x49e14f17UL, 0x50fa7e56UL, 0x7bd72d95UL, 0x62cc1cd4UL, + 0x2d8d8a13UL, 0x3496bb52UL, 0x1fbbe891UL, 0x06a0d9d0UL, 0x5e7ef3ecUL, + 0x4765c2adUL, 0x6c48916eUL, 0x7553a02fUL, 0x3a1236e8UL, 0x230907a9UL, + 0x0824546aUL, 0x113f652bUL, 0x96a779e4UL, 0x8fbc48a5UL, 0xa4911b66UL, + 0xbd8a2a27UL, 0xf2cbbce0UL, 0xebd08da1UL, 0xc0fdde62UL, 0xd9e6ef23UL, + 0x14bce1bdUL, 0x0da7d0fcUL, 0x268a833fUL, 0x3f91b27eUL, 0x70d024b9UL, + 0x69cb15f8UL, 0x42e6463bUL, 0x5bfd777aUL, 0xdc656bb5UL, 0xc57e5af4UL, + 0xee530937UL, 0xf7483876UL, 0xb809aeb1UL, 0xa1129ff0UL, 0x8a3fcc33UL, + 0x9324fd72UL + }, + { + 0x00000000UL, 0x01c26a37UL, 0x0384d46eUL, 0x0246be59UL, 0x0709a8dcUL, + 0x06cbc2ebUL, 0x048d7cb2UL, 0x054f1685UL, 0x0e1351b8UL, 0x0fd13b8fUL, + 0x0d9785d6UL, 0x0c55efe1UL, 0x091af964UL, 0x08d89353UL, 0x0a9e2d0aUL, + 0x0b5c473dUL, 0x1c26a370UL, 0x1de4c947UL, 0x1fa2771eUL, 0x1e601d29UL, + 0x1b2f0bacUL, 0x1aed619bUL, 0x18abdfc2UL, 0x1969b5f5UL, 0x1235f2c8UL, + 0x13f798ffUL, 0x11b126a6UL, 0x10734c91UL, 0x153c5a14UL, 0x14fe3023UL, + 0x16b88e7aUL, 0x177ae44dUL, 0x384d46e0UL, 0x398f2cd7UL, 0x3bc9928eUL, + 0x3a0bf8b9UL, 0x3f44ee3cUL, 0x3e86840bUL, 0x3cc03a52UL, 0x3d025065UL, + 0x365e1758UL, 0x379c7d6fUL, 0x35dac336UL, 0x3418a901UL, 0x3157bf84UL, + 0x3095d5b3UL, 0x32d36beaUL, 0x331101ddUL, 0x246be590UL, 0x25a98fa7UL, + 0x27ef31feUL, 0x262d5bc9UL, 0x23624d4cUL, 0x22a0277bUL, 0x20e69922UL, + 0x2124f315UL, 0x2a78b428UL, 0x2bbade1fUL, 0x29fc6046UL, 0x283e0a71UL, + 0x2d711cf4UL, 0x2cb376c3UL, 0x2ef5c89aUL, 0x2f37a2adUL, 0x709a8dc0UL, + 0x7158e7f7UL, 0x731e59aeUL, 0x72dc3399UL, 0x7793251cUL, 0x76514f2bUL, + 0x7417f172UL, 0x75d59b45UL, 0x7e89dc78UL, 0x7f4bb64fUL, 0x7d0d0816UL, + 0x7ccf6221UL, 0x798074a4UL, 0x78421e93UL, 0x7a04a0caUL, 0x7bc6cafdUL, + 0x6cbc2eb0UL, 0x6d7e4487UL, 0x6f38fadeUL, 0x6efa90e9UL, 0x6bb5866cUL, + 0x6a77ec5bUL, 0x68315202UL, 0x69f33835UL, 0x62af7f08UL, 0x636d153fUL, + 0x612bab66UL, 0x60e9c151UL, 0x65a6d7d4UL, 0x6464bde3UL, 0x662203baUL, + 0x67e0698dUL, 0x48d7cb20UL, 0x4915a117UL, 0x4b531f4eUL, 0x4a917579UL, + 0x4fde63fcUL, 0x4e1c09cbUL, 0x4c5ab792UL, 0x4d98dda5UL, 0x46c49a98UL, + 0x4706f0afUL, 0x45404ef6UL, 0x448224c1UL, 0x41cd3244UL, 0x400f5873UL, + 0x4249e62aUL, 0x438b8c1dUL, 0x54f16850UL, 0x55330267UL, 0x5775bc3eUL, + 0x56b7d609UL, 0x53f8c08cUL, 0x523aaabbUL, 0x507c14e2UL, 0x51be7ed5UL, + 0x5ae239e8UL, 0x5b2053dfUL, 0x5966ed86UL, 0x58a487b1UL, 0x5deb9134UL, + 0x5c29fb03UL, 0x5e6f455aUL, 0x5fad2f6dUL, 0xe1351b80UL, 0xe0f771b7UL, + 0xe2b1cfeeUL, 0xe373a5d9UL, 0xe63cb35cUL, 0xe7fed96bUL, 0xe5b86732UL, + 0xe47a0d05UL, 0xef264a38UL, 0xeee4200fUL, 0xeca29e56UL, 0xed60f461UL, + 0xe82fe2e4UL, 0xe9ed88d3UL, 0xebab368aUL, 0xea695cbdUL, 0xfd13b8f0UL, + 0xfcd1d2c7UL, 0xfe976c9eUL, 0xff5506a9UL, 0xfa1a102cUL, 0xfbd87a1bUL, + 0xf99ec442UL, 0xf85cae75UL, 0xf300e948UL, 0xf2c2837fUL, 0xf0843d26UL, + 0xf1465711UL, 0xf4094194UL, 0xf5cb2ba3UL, 0xf78d95faUL, 0xf64fffcdUL, + 0xd9785d60UL, 0xd8ba3757UL, 0xdafc890eUL, 0xdb3ee339UL, 0xde71f5bcUL, + 0xdfb39f8bUL, 0xddf521d2UL, 0xdc374be5UL, 0xd76b0cd8UL, 0xd6a966efUL, + 0xd4efd8b6UL, 0xd52db281UL, 0xd062a404UL, 0xd1a0ce33UL, 0xd3e6706aUL, + 0xd2241a5dUL, 0xc55efe10UL, 0xc49c9427UL, 0xc6da2a7eUL, 0xc7184049UL, + 0xc25756ccUL, 0xc3953cfbUL, 0xc1d382a2UL, 0xc011e895UL, 0xcb4dafa8UL, + 0xca8fc59fUL, 0xc8c97bc6UL, 0xc90b11f1UL, 0xcc440774UL, 0xcd866d43UL, + 0xcfc0d31aUL, 0xce02b92dUL, 0x91af9640UL, 0x906dfc77UL, 0x922b422eUL, + 0x93e92819UL, 0x96a63e9cUL, 0x976454abUL, 0x9522eaf2UL, 0x94e080c5UL, + 0x9fbcc7f8UL, 0x9e7eadcfUL, 0x9c381396UL, 0x9dfa79a1UL, 0x98b56f24UL, + 0x99770513UL, 0x9b31bb4aUL, 0x9af3d17dUL, 0x8d893530UL, 0x8c4b5f07UL, + 0x8e0de15eUL, 0x8fcf8b69UL, 0x8a809decUL, 0x8b42f7dbUL, 0x89044982UL, + 0x88c623b5UL, 0x839a6488UL, 0x82580ebfUL, 0x801eb0e6UL, 0x81dcdad1UL, + 0x8493cc54UL, 0x8551a663UL, 0x8717183aUL, 0x86d5720dUL, 0xa9e2d0a0UL, + 0xa820ba97UL, 0xaa6604ceUL, 0xaba46ef9UL, 0xaeeb787cUL, 0xaf29124bUL, + 0xad6fac12UL, 0xacadc625UL, 0xa7f18118UL, 0xa633eb2fUL, 0xa4755576UL, + 0xa5b73f41UL, 0xa0f829c4UL, 0xa13a43f3UL, 0xa37cfdaaUL, 0xa2be979dUL, + 0xb5c473d0UL, 0xb40619e7UL, 0xb640a7beUL, 0xb782cd89UL, 0xb2cddb0cUL, + 0xb30fb13bUL, 0xb1490f62UL, 0xb08b6555UL, 0xbbd72268UL, 0xba15485fUL, + 0xb853f606UL, 0xb9919c31UL, 0xbcde8ab4UL, 0xbd1ce083UL, 0xbf5a5edaUL, + 0xbe9834edUL + }, + { + 0x00000000UL, 0xb8bc6765UL, 0xaa09c88bUL, 0x12b5afeeUL, 0x8f629757UL, + 0x37def032UL, 0x256b5fdcUL, 0x9dd738b9UL, 0xc5b428efUL, 0x7d084f8aUL, + 0x6fbde064UL, 0xd7018701UL, 0x4ad6bfb8UL, 0xf26ad8ddUL, 0xe0df7733UL, + 0x58631056UL, 0x5019579fUL, 0xe8a530faUL, 0xfa109f14UL, 0x42acf871UL, + 0xdf7bc0c8UL, 0x67c7a7adUL, 0x75720843UL, 0xcdce6f26UL, 0x95ad7f70UL, + 0x2d111815UL, 0x3fa4b7fbUL, 0x8718d09eUL, 0x1acfe827UL, 0xa2738f42UL, + 0xb0c620acUL, 0x087a47c9UL, 0xa032af3eUL, 0x188ec85bUL, 0x0a3b67b5UL, + 0xb28700d0UL, 0x2f503869UL, 0x97ec5f0cUL, 0x8559f0e2UL, 0x3de59787UL, + 0x658687d1UL, 0xdd3ae0b4UL, 0xcf8f4f5aUL, 0x7733283fUL, 0xeae41086UL, + 0x525877e3UL, 0x40edd80dUL, 0xf851bf68UL, 0xf02bf8a1UL, 0x48979fc4UL, + 0x5a22302aUL, 0xe29e574fUL, 0x7f496ff6UL, 0xc7f50893UL, 0xd540a77dUL, + 0x6dfcc018UL, 0x359fd04eUL, 0x8d23b72bUL, 0x9f9618c5UL, 0x272a7fa0UL, + 0xbafd4719UL, 0x0241207cUL, 0x10f48f92UL, 0xa848e8f7UL, 0x9b14583dUL, + 0x23a83f58UL, 0x311d90b6UL, 0x89a1f7d3UL, 0x1476cf6aUL, 0xaccaa80fUL, + 0xbe7f07e1UL, 0x06c36084UL, 0x5ea070d2UL, 0xe61c17b7UL, 0xf4a9b859UL, + 0x4c15df3cUL, 0xd1c2e785UL, 0x697e80e0UL, 0x7bcb2f0eUL, 0xc377486bUL, + 0xcb0d0fa2UL, 0x73b168c7UL, 0x6104c729UL, 0xd9b8a04cUL, 0x446f98f5UL, + 0xfcd3ff90UL, 0xee66507eUL, 0x56da371bUL, 0x0eb9274dUL, 0xb6054028UL, + 0xa4b0efc6UL, 0x1c0c88a3UL, 0x81dbb01aUL, 0x3967d77fUL, 0x2bd27891UL, + 0x936e1ff4UL, 0x3b26f703UL, 0x839a9066UL, 0x912f3f88UL, 0x299358edUL, + 0xb4446054UL, 0x0cf80731UL, 0x1e4da8dfUL, 0xa6f1cfbaUL, 0xfe92dfecUL, + 0x462eb889UL, 0x549b1767UL, 0xec277002UL, 0x71f048bbUL, 0xc94c2fdeUL, + 0xdbf98030UL, 0x6345e755UL, 0x6b3fa09cUL, 0xd383c7f9UL, 0xc1366817UL, + 0x798a0f72UL, 0xe45d37cbUL, 0x5ce150aeUL, 0x4e54ff40UL, 0xf6e89825UL, + 0xae8b8873UL, 0x1637ef16UL, 0x048240f8UL, 0xbc3e279dUL, 0x21e91f24UL, + 0x99557841UL, 0x8be0d7afUL, 0x335cb0caUL, 0xed59b63bUL, 0x55e5d15eUL, + 0x47507eb0UL, 0xffec19d5UL, 0x623b216cUL, 0xda874609UL, 0xc832e9e7UL, + 0x708e8e82UL, 0x28ed9ed4UL, 0x9051f9b1UL, 0x82e4565fUL, 0x3a58313aUL, + 0xa78f0983UL, 0x1f336ee6UL, 0x0d86c108UL, 0xb53aa66dUL, 0xbd40e1a4UL, + 0x05fc86c1UL, 0x1749292fUL, 0xaff54e4aUL, 0x322276f3UL, 0x8a9e1196UL, + 0x982bbe78UL, 0x2097d91dUL, 0x78f4c94bUL, 0xc048ae2eUL, 0xd2fd01c0UL, + 0x6a4166a5UL, 0xf7965e1cUL, 0x4f2a3979UL, 0x5d9f9697UL, 0xe523f1f2UL, + 0x4d6b1905UL, 0xf5d77e60UL, 0xe762d18eUL, 0x5fdeb6ebUL, 0xc2098e52UL, + 0x7ab5e937UL, 0x680046d9UL, 0xd0bc21bcUL, 0x88df31eaUL, 0x3063568fUL, + 0x22d6f961UL, 0x9a6a9e04UL, 0x07bda6bdUL, 0xbf01c1d8UL, 0xadb46e36UL, + 0x15080953UL, 0x1d724e9aUL, 0xa5ce29ffUL, 0xb77b8611UL, 0x0fc7e174UL, + 0x9210d9cdUL, 0x2aacbea8UL, 0x38191146UL, 0x80a57623UL, 0xd8c66675UL, + 0x607a0110UL, 0x72cfaefeUL, 0xca73c99bUL, 0x57a4f122UL, 0xef189647UL, + 0xfdad39a9UL, 0x45115eccUL, 0x764dee06UL, 0xcef18963UL, 0xdc44268dUL, + 0x64f841e8UL, 0xf92f7951UL, 0x41931e34UL, 0x5326b1daUL, 0xeb9ad6bfUL, + 0xb3f9c6e9UL, 0x0b45a18cUL, 0x19f00e62UL, 0xa14c6907UL, 0x3c9b51beUL, + 0x842736dbUL, 0x96929935UL, 0x2e2efe50UL, 0x2654b999UL, 0x9ee8defcUL, + 0x8c5d7112UL, 0x34e11677UL, 0xa9362eceUL, 0x118a49abUL, 0x033fe645UL, + 0xbb838120UL, 0xe3e09176UL, 0x5b5cf613UL, 0x49e959fdUL, 0xf1553e98UL, + 0x6c820621UL, 0xd43e6144UL, 0xc68bceaaUL, 0x7e37a9cfUL, 0xd67f4138UL, + 0x6ec3265dUL, 0x7c7689b3UL, 0xc4caeed6UL, 0x591dd66fUL, 0xe1a1b10aUL, + 0xf3141ee4UL, 0x4ba87981UL, 0x13cb69d7UL, 0xab770eb2UL, 0xb9c2a15cUL, + 0x017ec639UL, 0x9ca9fe80UL, 0x241599e5UL, 0x36a0360bUL, 0x8e1c516eUL, + 0x866616a7UL, 0x3eda71c2UL, 0x2c6fde2cUL, 0x94d3b949UL, 0x090481f0UL, + 0xb1b8e695UL, 0xa30d497bUL, 0x1bb12e1eUL, 0x43d23e48UL, 0xfb6e592dUL, + 0xe9dbf6c3UL, 0x516791a6UL, 0xccb0a91fUL, 0x740cce7aUL, 0x66b96194UL, + 0xde0506f1UL + }, + { + 0x00000000UL, 0x96300777UL, 0x2c610eeeUL, 0xba510999UL, 0x19c46d07UL, + 0x8ff46a70UL, 0x35a563e9UL, 0xa395649eUL, 0x3288db0eUL, 0xa4b8dc79UL, + 0x1ee9d5e0UL, 0x88d9d297UL, 0x2b4cb609UL, 0xbd7cb17eUL, 0x072db8e7UL, + 0x911dbf90UL, 0x6410b71dUL, 0xf220b06aUL, 0x4871b9f3UL, 0xde41be84UL, + 0x7dd4da1aUL, 0xebe4dd6dUL, 0x51b5d4f4UL, 0xc785d383UL, 0x56986c13UL, + 0xc0a86b64UL, 0x7af962fdUL, 0xecc9658aUL, 0x4f5c0114UL, 0xd96c0663UL, + 0x633d0ffaUL, 0xf50d088dUL, 0xc8206e3bUL, 0x5e10694cUL, 0xe44160d5UL, + 0x727167a2UL, 0xd1e4033cUL, 0x47d4044bUL, 0xfd850dd2UL, 0x6bb50aa5UL, + 0xfaa8b535UL, 0x6c98b242UL, 0xd6c9bbdbUL, 0x40f9bcacUL, 0xe36cd832UL, + 0x755cdf45UL, 0xcf0dd6dcUL, 0x593dd1abUL, 0xac30d926UL, 0x3a00de51UL, + 0x8051d7c8UL, 0x1661d0bfUL, 0xb5f4b421UL, 0x23c4b356UL, 0x9995bacfUL, + 0x0fa5bdb8UL, 0x9eb80228UL, 0x0888055fUL, 0xb2d90cc6UL, 0x24e90bb1UL, + 0x877c6f2fUL, 0x114c6858UL, 0xab1d61c1UL, 0x3d2d66b6UL, 0x9041dc76UL, + 0x0671db01UL, 0xbc20d298UL, 0x2a10d5efUL, 0x8985b171UL, 0x1fb5b606UL, + 0xa5e4bf9fUL, 0x33d4b8e8UL, 0xa2c90778UL, 0x34f9000fUL, 0x8ea80996UL, + 0x18980ee1UL, 0xbb0d6a7fUL, 0x2d3d6d08UL, 0x976c6491UL, 0x015c63e6UL, + 0xf4516b6bUL, 0x62616c1cUL, 0xd8306585UL, 0x4e0062f2UL, 0xed95066cUL, + 0x7ba5011bUL, 0xc1f40882UL, 0x57c40ff5UL, 0xc6d9b065UL, 0x50e9b712UL, + 0xeab8be8bUL, 0x7c88b9fcUL, 0xdf1ddd62UL, 0x492dda15UL, 0xf37cd38cUL, + 0x654cd4fbUL, 0x5861b24dUL, 0xce51b53aUL, 0x7400bca3UL, 0xe230bbd4UL, + 0x41a5df4aUL, 0xd795d83dUL, 0x6dc4d1a4UL, 0xfbf4d6d3UL, 0x6ae96943UL, + 0xfcd96e34UL, 0x468867adUL, 0xd0b860daUL, 0x732d0444UL, 0xe51d0333UL, + 0x5f4c0aaaUL, 0xc97c0dddUL, 0x3c710550UL, 0xaa410227UL, 0x10100bbeUL, + 0x86200cc9UL, 0x25b56857UL, 0xb3856f20UL, 0x09d466b9UL, 0x9fe461ceUL, + 0x0ef9de5eUL, 0x98c9d929UL, 0x2298d0b0UL, 0xb4a8d7c7UL, 0x173db359UL, + 0x810db42eUL, 0x3b5cbdb7UL, 0xad6cbac0UL, 0x2083b8edUL, 0xb6b3bf9aUL, + 0x0ce2b603UL, 0x9ad2b174UL, 0x3947d5eaUL, 0xaf77d29dUL, 0x1526db04UL, + 0x8316dc73UL, 0x120b63e3UL, 0x843b6494UL, 0x3e6a6d0dUL, 0xa85a6a7aUL, + 0x0bcf0ee4UL, 0x9dff0993UL, 0x27ae000aUL, 0xb19e077dUL, 0x44930ff0UL, + 0xd2a30887UL, 0x68f2011eUL, 0xfec20669UL, 0x5d5762f7UL, 0xcb676580UL, + 0x71366c19UL, 0xe7066b6eUL, 0x761bd4feUL, 0xe02bd389UL, 0x5a7ada10UL, + 0xcc4add67UL, 0x6fdfb9f9UL, 0xf9efbe8eUL, 0x43beb717UL, 0xd58eb060UL, + 0xe8a3d6d6UL, 0x7e93d1a1UL, 0xc4c2d838UL, 0x52f2df4fUL, 0xf167bbd1UL, + 0x6757bca6UL, 0xdd06b53fUL, 0x4b36b248UL, 0xda2b0dd8UL, 0x4c1b0aafUL, + 0xf64a0336UL, 0x607a0441UL, 0xc3ef60dfUL, 0x55df67a8UL, 0xef8e6e31UL, + 0x79be6946UL, 0x8cb361cbUL, 0x1a8366bcUL, 0xa0d26f25UL, 0x36e26852UL, + 0x95770cccUL, 0x03470bbbUL, 0xb9160222UL, 0x2f260555UL, 0xbe3bbac5UL, + 0x280bbdb2UL, 0x925ab42bUL, 0x046ab35cUL, 0xa7ffd7c2UL, 0x31cfd0b5UL, + 0x8b9ed92cUL, 0x1daede5bUL, 0xb0c2649bUL, 0x26f263ecUL, 0x9ca36a75UL, + 0x0a936d02UL, 0xa906099cUL, 0x3f360eebUL, 0x85670772UL, 0x13570005UL, + 0x824abf95UL, 0x147ab8e2UL, 0xae2bb17bUL, 0x381bb60cUL, 0x9b8ed292UL, + 0x0dbed5e5UL, 0xb7efdc7cUL, 0x21dfdb0bUL, 0xd4d2d386UL, 0x42e2d4f1UL, + 0xf8b3dd68UL, 0x6e83da1fUL, 0xcd16be81UL, 0x5b26b9f6UL, 0xe177b06fUL, + 0x7747b718UL, 0xe65a0888UL, 0x706a0fffUL, 0xca3b0666UL, 0x5c0b0111UL, + 0xff9e658fUL, 0x69ae62f8UL, 0xd3ff6b61UL, 0x45cf6c16UL, 0x78e20aa0UL, + 0xeed20dd7UL, 0x5483044eUL, 0xc2b30339UL, 0x612667a7UL, 0xf71660d0UL, + 0x4d476949UL, 0xdb776e3eUL, 0x4a6ad1aeUL, 0xdc5ad6d9UL, 0x660bdf40UL, + 0xf03bd837UL, 0x53aebca9UL, 0xc59ebbdeUL, 0x7fcfb247UL, 0xe9ffb530UL, + 0x1cf2bdbdUL, 0x8ac2bacaUL, 0x3093b353UL, 0xa6a3b424UL, 0x0536d0baUL, + 0x9306d7cdUL, 0x2957de54UL, 0xbf67d923UL, 0x2e7a66b3UL, 0xb84a61c4UL, + 0x021b685dUL, 0x942b6f2aUL, 0x37be0bb4UL, 0xa18e0cc3UL, 0x1bdf055aUL, + 0x8def022dUL + }, + { + 0x00000000UL, 0x41311b19UL, 0x82623632UL, 0xc3532d2bUL, 0x04c56c64UL, + 0x45f4777dUL, 0x86a75a56UL, 0xc796414fUL, 0x088ad9c8UL, 0x49bbc2d1UL, + 0x8ae8effaUL, 0xcbd9f4e3UL, 0x0c4fb5acUL, 0x4d7eaeb5UL, 0x8e2d839eUL, + 0xcf1c9887UL, 0x5112c24aUL, 0x1023d953UL, 0xd370f478UL, 0x9241ef61UL, + 0x55d7ae2eUL, 0x14e6b537UL, 0xd7b5981cUL, 0x96848305UL, 0x59981b82UL, + 0x18a9009bUL, 0xdbfa2db0UL, 0x9acb36a9UL, 0x5d5d77e6UL, 0x1c6c6cffUL, + 0xdf3f41d4UL, 0x9e0e5acdUL, 0xa2248495UL, 0xe3159f8cUL, 0x2046b2a7UL, + 0x6177a9beUL, 0xa6e1e8f1UL, 0xe7d0f3e8UL, 0x2483dec3UL, 0x65b2c5daUL, + 0xaaae5d5dUL, 0xeb9f4644UL, 0x28cc6b6fUL, 0x69fd7076UL, 0xae6b3139UL, + 0xef5a2a20UL, 0x2c09070bUL, 0x6d381c12UL, 0xf33646dfUL, 0xb2075dc6UL, + 0x715470edUL, 0x30656bf4UL, 0xf7f32abbUL, 0xb6c231a2UL, 0x75911c89UL, + 0x34a00790UL, 0xfbbc9f17UL, 0xba8d840eUL, 0x79dea925UL, 0x38efb23cUL, + 0xff79f373UL, 0xbe48e86aUL, 0x7d1bc541UL, 0x3c2ade58UL, 0x054f79f0UL, + 0x447e62e9UL, 0x872d4fc2UL, 0xc61c54dbUL, 0x018a1594UL, 0x40bb0e8dUL, + 0x83e823a6UL, 0xc2d938bfUL, 0x0dc5a038UL, 0x4cf4bb21UL, 0x8fa7960aUL, + 0xce968d13UL, 0x0900cc5cUL, 0x4831d745UL, 0x8b62fa6eUL, 0xca53e177UL, + 0x545dbbbaUL, 0x156ca0a3UL, 0xd63f8d88UL, 0x970e9691UL, 0x5098d7deUL, + 0x11a9ccc7UL, 0xd2fae1ecUL, 0x93cbfaf5UL, 0x5cd76272UL, 0x1de6796bUL, + 0xdeb55440UL, 0x9f844f59UL, 0x58120e16UL, 0x1923150fUL, 0xda703824UL, + 0x9b41233dUL, 0xa76bfd65UL, 0xe65ae67cUL, 0x2509cb57UL, 0x6438d04eUL, + 0xa3ae9101UL, 0xe29f8a18UL, 0x21cca733UL, 0x60fdbc2aUL, 0xafe124adUL, + 0xeed03fb4UL, 0x2d83129fUL, 0x6cb20986UL, 0xab2448c9UL, 0xea1553d0UL, + 0x29467efbUL, 0x687765e2UL, 0xf6793f2fUL, 0xb7482436UL, 0x741b091dUL, + 0x352a1204UL, 0xf2bc534bUL, 0xb38d4852UL, 0x70de6579UL, 0x31ef7e60UL, + 0xfef3e6e7UL, 0xbfc2fdfeUL, 0x7c91d0d5UL, 0x3da0cbccUL, 0xfa368a83UL, + 0xbb07919aUL, 0x7854bcb1UL, 0x3965a7a8UL, 0x4b98833bUL, 0x0aa99822UL, + 0xc9fab509UL, 0x88cbae10UL, 0x4f5def5fUL, 0x0e6cf446UL, 0xcd3fd96dUL, + 0x8c0ec274UL, 0x43125af3UL, 0x022341eaUL, 0xc1706cc1UL, 0x804177d8UL, + 0x47d73697UL, 0x06e62d8eUL, 0xc5b500a5UL, 0x84841bbcUL, 0x1a8a4171UL, + 0x5bbb5a68UL, 0x98e87743UL, 0xd9d96c5aUL, 0x1e4f2d15UL, 0x5f7e360cUL, + 0x9c2d1b27UL, 0xdd1c003eUL, 0x120098b9UL, 0x533183a0UL, 0x9062ae8bUL, + 0xd153b592UL, 0x16c5f4ddUL, 0x57f4efc4UL, 0x94a7c2efUL, 0xd596d9f6UL, + 0xe9bc07aeUL, 0xa88d1cb7UL, 0x6bde319cUL, 0x2aef2a85UL, 0xed796bcaUL, + 0xac4870d3UL, 0x6f1b5df8UL, 0x2e2a46e1UL, 0xe136de66UL, 0xa007c57fUL, + 0x6354e854UL, 0x2265f34dUL, 0xe5f3b202UL, 0xa4c2a91bUL, 0x67918430UL, + 0x26a09f29UL, 0xb8aec5e4UL, 0xf99fdefdUL, 0x3accf3d6UL, 0x7bfde8cfUL, + 0xbc6ba980UL, 0xfd5ab299UL, 0x3e099fb2UL, 0x7f3884abUL, 0xb0241c2cUL, + 0xf1150735UL, 0x32462a1eUL, 0x73773107UL, 0xb4e17048UL, 0xf5d06b51UL, + 0x3683467aUL, 0x77b25d63UL, 0x4ed7facbUL, 0x0fe6e1d2UL, 0xccb5ccf9UL, + 0x8d84d7e0UL, 0x4a1296afUL, 0x0b238db6UL, 0xc870a09dUL, 0x8941bb84UL, + 0x465d2303UL, 0x076c381aUL, 0xc43f1531UL, 0x850e0e28UL, 0x42984f67UL, + 0x03a9547eUL, 0xc0fa7955UL, 0x81cb624cUL, 0x1fc53881UL, 0x5ef42398UL, + 0x9da70eb3UL, 0xdc9615aaUL, 0x1b0054e5UL, 0x5a314ffcUL, 0x996262d7UL, + 0xd85379ceUL, 0x174fe149UL, 0x567efa50UL, 0x952dd77bUL, 0xd41ccc62UL, + 0x138a8d2dUL, 0x52bb9634UL, 0x91e8bb1fUL, 0xd0d9a006UL, 0xecf37e5eUL, + 0xadc26547UL, 0x6e91486cUL, 0x2fa05375UL, 0xe836123aUL, 0xa9070923UL, + 0x6a542408UL, 0x2b653f11UL, 0xe479a796UL, 0xa548bc8fUL, 0x661b91a4UL, + 0x272a8abdUL, 0xe0bccbf2UL, 0xa18dd0ebUL, 0x62defdc0UL, 0x23efe6d9UL, + 0xbde1bc14UL, 0xfcd0a70dUL, 0x3f838a26UL, 0x7eb2913fUL, 0xb924d070UL, + 0xf815cb69UL, 0x3b46e642UL, 0x7a77fd5bUL, 0xb56b65dcUL, 0xf45a7ec5UL, + 0x370953eeUL, 0x763848f7UL, 0xb1ae09b8UL, 0xf09f12a1UL, 0x33cc3f8aUL, + 0x72fd2493UL + }, + { + 0x00000000UL, 0x376ac201UL, 0x6ed48403UL, 0x59be4602UL, 0xdca80907UL, + 0xebc2cb06UL, 0xb27c8d04UL, 0x85164f05UL, 0xb851130eUL, 0x8f3bd10fUL, + 0xd685970dUL, 0xe1ef550cUL, 0x64f91a09UL, 0x5393d808UL, 0x0a2d9e0aUL, + 0x3d475c0bUL, 0x70a3261cUL, 0x47c9e41dUL, 0x1e77a21fUL, 0x291d601eUL, + 0xac0b2f1bUL, 0x9b61ed1aUL, 0xc2dfab18UL, 0xf5b56919UL, 0xc8f23512UL, + 0xff98f713UL, 0xa626b111UL, 0x914c7310UL, 0x145a3c15UL, 0x2330fe14UL, + 0x7a8eb816UL, 0x4de47a17UL, 0xe0464d38UL, 0xd72c8f39UL, 0x8e92c93bUL, + 0xb9f80b3aUL, 0x3cee443fUL, 0x0b84863eUL, 0x523ac03cUL, 0x6550023dUL, + 0x58175e36UL, 0x6f7d9c37UL, 0x36c3da35UL, 0x01a91834UL, 0x84bf5731UL, + 0xb3d59530UL, 0xea6bd332UL, 0xdd011133UL, 0x90e56b24UL, 0xa78fa925UL, + 0xfe31ef27UL, 0xc95b2d26UL, 0x4c4d6223UL, 0x7b27a022UL, 0x2299e620UL, + 0x15f32421UL, 0x28b4782aUL, 0x1fdeba2bUL, 0x4660fc29UL, 0x710a3e28UL, + 0xf41c712dUL, 0xc376b32cUL, 0x9ac8f52eUL, 0xada2372fUL, 0xc08d9a70UL, + 0xf7e75871UL, 0xae591e73UL, 0x9933dc72UL, 0x1c259377UL, 0x2b4f5176UL, + 0x72f11774UL, 0x459bd575UL, 0x78dc897eUL, 0x4fb64b7fUL, 0x16080d7dUL, + 0x2162cf7cUL, 0xa4748079UL, 0x931e4278UL, 0xcaa0047aUL, 0xfdcac67bUL, + 0xb02ebc6cUL, 0x87447e6dUL, 0xdefa386fUL, 0xe990fa6eUL, 0x6c86b56bUL, + 0x5bec776aUL, 0x02523168UL, 0x3538f369UL, 0x087faf62UL, 0x3f156d63UL, + 0x66ab2b61UL, 0x51c1e960UL, 0xd4d7a665UL, 0xe3bd6464UL, 0xba032266UL, + 0x8d69e067UL, 0x20cbd748UL, 0x17a11549UL, 0x4e1f534bUL, 0x7975914aUL, + 0xfc63de4fUL, 0xcb091c4eUL, 0x92b75a4cUL, 0xa5dd984dUL, 0x989ac446UL, + 0xaff00647UL, 0xf64e4045UL, 0xc1248244UL, 0x4432cd41UL, 0x73580f40UL, + 0x2ae64942UL, 0x1d8c8b43UL, 0x5068f154UL, 0x67023355UL, 0x3ebc7557UL, + 0x09d6b756UL, 0x8cc0f853UL, 0xbbaa3a52UL, 0xe2147c50UL, 0xd57ebe51UL, + 0xe839e25aUL, 0xdf53205bUL, 0x86ed6659UL, 0xb187a458UL, 0x3491eb5dUL, + 0x03fb295cUL, 0x5a456f5eUL, 0x6d2fad5fUL, 0x801b35e1UL, 0xb771f7e0UL, + 0xeecfb1e2UL, 0xd9a573e3UL, 0x5cb33ce6UL, 0x6bd9fee7UL, 0x3267b8e5UL, + 0x050d7ae4UL, 0x384a26efUL, 0x0f20e4eeUL, 0x569ea2ecUL, 0x61f460edUL, + 0xe4e22fe8UL, 0xd388ede9UL, 0x8a36abebUL, 0xbd5c69eaUL, 0xf0b813fdUL, + 0xc7d2d1fcUL, 0x9e6c97feUL, 0xa90655ffUL, 0x2c101afaUL, 0x1b7ad8fbUL, + 0x42c49ef9UL, 0x75ae5cf8UL, 0x48e900f3UL, 0x7f83c2f2UL, 0x263d84f0UL, + 0x115746f1UL, 0x944109f4UL, 0xa32bcbf5UL, 0xfa958df7UL, 0xcdff4ff6UL, + 0x605d78d9UL, 0x5737bad8UL, 0x0e89fcdaUL, 0x39e33edbUL, 0xbcf571deUL, + 0x8b9fb3dfUL, 0xd221f5ddUL, 0xe54b37dcUL, 0xd80c6bd7UL, 0xef66a9d6UL, + 0xb6d8efd4UL, 0x81b22dd5UL, 0x04a462d0UL, 0x33cea0d1UL, 0x6a70e6d3UL, + 0x5d1a24d2UL, 0x10fe5ec5UL, 0x27949cc4UL, 0x7e2adac6UL, 0x494018c7UL, + 0xcc5657c2UL, 0xfb3c95c3UL, 0xa282d3c1UL, 0x95e811c0UL, 0xa8af4dcbUL, + 0x9fc58fcaUL, 0xc67bc9c8UL, 0xf1110bc9UL, 0x740744ccUL, 0x436d86cdUL, + 0x1ad3c0cfUL, 0x2db902ceUL, 0x4096af91UL, 0x77fc6d90UL, 0x2e422b92UL, + 0x1928e993UL, 0x9c3ea696UL, 0xab546497UL, 0xf2ea2295UL, 0xc580e094UL, + 0xf8c7bc9fUL, 0xcfad7e9eUL, 0x9613389cUL, 0xa179fa9dUL, 0x246fb598UL, + 0x13057799UL, 0x4abb319bUL, 0x7dd1f39aUL, 0x3035898dUL, 0x075f4b8cUL, + 0x5ee10d8eUL, 0x698bcf8fUL, 0xec9d808aUL, 0xdbf7428bUL, 0x82490489UL, + 0xb523c688UL, 0x88649a83UL, 0xbf0e5882UL, 0xe6b01e80UL, 0xd1dadc81UL, + 0x54cc9384UL, 0x63a65185UL, 0x3a181787UL, 0x0d72d586UL, 0xa0d0e2a9UL, + 0x97ba20a8UL, 0xce0466aaUL, 0xf96ea4abUL, 0x7c78ebaeUL, 0x4b1229afUL, + 0x12ac6fadUL, 0x25c6adacUL, 0x1881f1a7UL, 0x2feb33a6UL, 0x765575a4UL, + 0x413fb7a5UL, 0xc429f8a0UL, 0xf3433aa1UL, 0xaafd7ca3UL, 0x9d97bea2UL, + 0xd073c4b5UL, 0xe71906b4UL, 0xbea740b6UL, 0x89cd82b7UL, 0x0cdbcdb2UL, + 0x3bb10fb3UL, 0x620f49b1UL, 0x55658bb0UL, 0x6822d7bbUL, 0x5f4815baUL, + 0x06f653b8UL, 0x319c91b9UL, 0xb48adebcUL, 0x83e01cbdUL, 0xda5e5abfUL, + 0xed3498beUL + }, + { + 0x00000000UL, 0x6567bcb8UL, 0x8bc809aaUL, 0xeeafb512UL, 0x5797628fUL, + 0x32f0de37UL, 0xdc5f6b25UL, 0xb938d79dUL, 0xef28b4c5UL, 0x8a4f087dUL, + 0x64e0bd6fUL, 0x018701d7UL, 0xb8bfd64aUL, 0xddd86af2UL, 0x3377dfe0UL, + 0x56106358UL, 0x9f571950UL, 0xfa30a5e8UL, 0x149f10faUL, 0x71f8ac42UL, + 0xc8c07bdfUL, 0xada7c767UL, 0x43087275UL, 0x266fcecdUL, 0x707fad95UL, + 0x1518112dUL, 0xfbb7a43fUL, 0x9ed01887UL, 0x27e8cf1aUL, 0x428f73a2UL, + 0xac20c6b0UL, 0xc9477a08UL, 0x3eaf32a0UL, 0x5bc88e18UL, 0xb5673b0aUL, + 0xd00087b2UL, 0x6938502fUL, 0x0c5fec97UL, 0xe2f05985UL, 0x8797e53dUL, + 0xd1878665UL, 0xb4e03addUL, 0x5a4f8fcfUL, 0x3f283377UL, 0x8610e4eaUL, + 0xe3775852UL, 0x0dd8ed40UL, 0x68bf51f8UL, 0xa1f82bf0UL, 0xc49f9748UL, + 0x2a30225aUL, 0x4f579ee2UL, 0xf66f497fUL, 0x9308f5c7UL, 0x7da740d5UL, + 0x18c0fc6dUL, 0x4ed09f35UL, 0x2bb7238dUL, 0xc518969fUL, 0xa07f2a27UL, + 0x1947fdbaUL, 0x7c204102UL, 0x928ff410UL, 0xf7e848a8UL, 0x3d58149bUL, + 0x583fa823UL, 0xb6901d31UL, 0xd3f7a189UL, 0x6acf7614UL, 0x0fa8caacUL, + 0xe1077fbeUL, 0x8460c306UL, 0xd270a05eUL, 0xb7171ce6UL, 0x59b8a9f4UL, + 0x3cdf154cUL, 0x85e7c2d1UL, 0xe0807e69UL, 0x0e2fcb7bUL, 0x6b4877c3UL, + 0xa20f0dcbUL, 0xc768b173UL, 0x29c70461UL, 0x4ca0b8d9UL, 0xf5986f44UL, + 0x90ffd3fcUL, 0x7e5066eeUL, 0x1b37da56UL, 0x4d27b90eUL, 0x284005b6UL, + 0xc6efb0a4UL, 0xa3880c1cUL, 0x1ab0db81UL, 0x7fd76739UL, 0x9178d22bUL, + 0xf41f6e93UL, 0x03f7263bUL, 0x66909a83UL, 0x883f2f91UL, 0xed589329UL, + 0x546044b4UL, 0x3107f80cUL, 0xdfa84d1eUL, 0xbacff1a6UL, 0xecdf92feUL, + 0x89b82e46UL, 0x67179b54UL, 0x027027ecUL, 0xbb48f071UL, 0xde2f4cc9UL, + 0x3080f9dbUL, 0x55e74563UL, 0x9ca03f6bUL, 0xf9c783d3UL, 0x176836c1UL, + 0x720f8a79UL, 0xcb375de4UL, 0xae50e15cUL, 0x40ff544eUL, 0x2598e8f6UL, + 0x73888baeUL, 0x16ef3716UL, 0xf8408204UL, 0x9d273ebcUL, 0x241fe921UL, + 0x41785599UL, 0xafd7e08bUL, 0xcab05c33UL, 0x3bb659edUL, 0x5ed1e555UL, + 0xb07e5047UL, 0xd519ecffUL, 0x6c213b62UL, 0x094687daUL, 0xe7e932c8UL, + 0x828e8e70UL, 0xd49eed28UL, 0xb1f95190UL, 0x5f56e482UL, 0x3a31583aUL, + 0x83098fa7UL, 0xe66e331fUL, 0x08c1860dUL, 0x6da63ab5UL, 0xa4e140bdUL, + 0xc186fc05UL, 0x2f294917UL, 0x4a4ef5afUL, 0xf3762232UL, 0x96119e8aUL, + 0x78be2b98UL, 0x1dd99720UL, 0x4bc9f478UL, 0x2eae48c0UL, 0xc001fdd2UL, + 0xa566416aUL, 0x1c5e96f7UL, 0x79392a4fUL, 0x97969f5dUL, 0xf2f123e5UL, + 0x05196b4dUL, 0x607ed7f5UL, 0x8ed162e7UL, 0xebb6de5fUL, 0x528e09c2UL, + 0x37e9b57aUL, 0xd9460068UL, 0xbc21bcd0UL, 0xea31df88UL, 0x8f566330UL, + 0x61f9d622UL, 0x049e6a9aUL, 0xbda6bd07UL, 0xd8c101bfUL, 0x366eb4adUL, + 0x53090815UL, 0x9a4e721dUL, 0xff29cea5UL, 0x11867bb7UL, 0x74e1c70fUL, + 0xcdd91092UL, 0xa8beac2aUL, 0x46111938UL, 0x2376a580UL, 0x7566c6d8UL, + 0x10017a60UL, 0xfeaecf72UL, 0x9bc973caUL, 0x22f1a457UL, 0x479618efUL, + 0xa939adfdUL, 0xcc5e1145UL, 0x06ee4d76UL, 0x6389f1ceUL, 0x8d2644dcUL, + 0xe841f864UL, 0x51792ff9UL, 0x341e9341UL, 0xdab12653UL, 0xbfd69aebUL, + 0xe9c6f9b3UL, 0x8ca1450bUL, 0x620ef019UL, 0x07694ca1UL, 0xbe519b3cUL, + 0xdb362784UL, 0x35999296UL, 0x50fe2e2eUL, 0x99b95426UL, 0xfcdee89eUL, + 0x12715d8cUL, 0x7716e134UL, 0xce2e36a9UL, 0xab498a11UL, 0x45e63f03UL, + 0x208183bbUL, 0x7691e0e3UL, 0x13f65c5bUL, 0xfd59e949UL, 0x983e55f1UL, + 0x2106826cUL, 0x44613ed4UL, 0xaace8bc6UL, 0xcfa9377eUL, 0x38417fd6UL, + 0x5d26c36eUL, 0xb389767cUL, 0xd6eecac4UL, 0x6fd61d59UL, 0x0ab1a1e1UL, + 0xe41e14f3UL, 0x8179a84bUL, 0xd769cb13UL, 0xb20e77abUL, 0x5ca1c2b9UL, + 0x39c67e01UL, 0x80fea99cUL, 0xe5991524UL, 0x0b36a036UL, 0x6e511c8eUL, + 0xa7166686UL, 0xc271da3eUL, 0x2cde6f2cUL, 0x49b9d394UL, 0xf0810409UL, + 0x95e6b8b1UL, 0x7b490da3UL, 0x1e2eb11bUL, 0x483ed243UL, 0x2d596efbUL, + 0xc3f6dbe9UL, 0xa6916751UL, 0x1fa9b0ccUL, 0x7ace0c74UL, 0x9461b966UL, + 0xf10605deUL + } + }; +/* ========================================================================= */ +#define DOLIT4 c ^= *buf4++; \ + c = crc_table[3][c & 0xff] ^ crc_table[2][(c >> 8) & 0xff] ^ \ + crc_table[1][(c >> 16) & 0xff] ^ crc_table[0][c >> 24] +#define DOLIT32 DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4 + +unsigned int SharedRuntime::updateBytesCRC32(unsigned long crc, const unsigned char *buf, unsigned int len) { + if (buf == 0) return 0UL; + + register unsigned int c; + register const unsigned int *buf4; + c = (unsigned int)crc; + c = ~c; + while (len && ((ptrdiff_t)buf & 3)) { + c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8); + len--; + } + + buf4 = (const unsigned int *) (const void *)buf; + while (len >= 32) { + DOLIT32; + len -= 32; + } + while (len >= 4) { + DOLIT4; + len -= 4; + } + buf = (const unsigned char *)buf4; + + if (len) do { + c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8); + } while (--len); + c = ~c; + return (unsigned long)c; +} + + +//------------------------------Montgomery multiplication------------------------ +// + +#ifndef _WINDOWS + +#define ASM_SUBTRACT +#undef ASM_SUBTRACT //by jzy + +#ifdef ASM_SUBTRACT +// Subtract 0:b from carry:a. Return carry. +static unsigned long +sub(unsigned long a[], unsigned long b[], unsigned long carry, long len) { + long i = 0, cnt = len; + unsigned long tmp; + asm volatile("clc; " + "0: ; " + "mov (%[b], %[i], 8), %[tmp]; " + "sbb %[tmp], (%[a], %[i], 8); " + "inc %[i]; dec %[cnt]; " + "jne 0b; " + "mov %[carry], %[tmp]; sbb $0, %[tmp]; " + : [i]"+r"(i), [cnt]"+r"(cnt), [tmp]"=&r"(tmp) + : [a]"r"(a), [b]"r"(b), [carry]"r"(carry) + : "memory"); + return tmp; +} +#else // ASM_SUBTRACT +typedef int __attribute__((mode(TI))) int128; + +// Subtract 0:b from carry:a. Return carry. +//static unsigned long +//sub(unsigned long a[], unsigned long b[], unsigned long carry, int len) { +// int128 tmp = 0; +// int i; +// for (i = 0; i < len; i++) { +// tmp += a[i]; +// tmp -= b[i]; +// a[i] = tmp; +// tmp >>= 64; +// assert(-1 <= tmp && tmp <= 0, "invariant"); +// } +// return tmp + carry; +//} +static julong +sub(julong a[], julong b[], julong carry, long len) { + long borrow = 0, t = 0; + julong tmp0, tmp1; + __asm__ __volatile__ ( + "0: \n" + "ldl %[tmp0], 0(%[a]) \n" + "ldl %[tmp1], 0(%[b]) \n" + "cmpult %[tmp0], %[borrow], %[t] \n" + "subl %[tmp0], %[borrow], %[tmp0] \n" + "cmpult %[tmp0], %[tmp1], %[borrow] \n" + "bis %[borrow], %[t], %[borrow] \n" + "subl %[tmp0], %[tmp1], %[tmp0] \n" + "stl %[tmp0], 0(%[a]) \n" + "addl %[a], 8, %[a] \n" + "addl %[b], 8, %[b] \n" + "subl %[len], 1, %[len] \n" + "bgt %[len], 0b \n" + "subl %[carry], %[borrow], %[tmp0] \n" + : [len]"+r"(len), [tmp0]"=&r"(tmp0), [tmp1]"=&r"(tmp1), [borrow]"+r"(borrow), [a]"+r"(a), [b]"+r"(b), [t]"+r"(t) + : [carry]"r"(carry) + : "memory" + ); + return tmp0; +} +#endif // ! ASM_SUBTRACT + +// Multiply (unsigned) Long A by Long B, accumulating the double- +// length result into the accumulator formed of T0, T1, and T2. +//#define MACC(A, B, T0, T1, T2) \ +//do { \ +// ShouldNotReachHere(); \ +//} while(0) +inline void MACC(unsigned long A, unsigned long B, unsigned long &t0, unsigned long &t1, unsigned long &t2) { + unsigned long hi, lo, carry = 0, t = 0; + __asm__ __volatile__( + "mull %[A], %[B] , %[lo] \n" + "umulh %[A], %[B] , %[hi] \n" + "addl %[lo], %[t0], %[t0] \n" + "cmpult %[t0], %[lo], %[carry] \n" + "addl %[t1], %[carry], %[t1] \n" + "cmpult %[t1], %[carry], %[t] \n" + "addl %[t1], %[hi], %[t1] \n" + "cmpult %[t1], %[hi], %[carry] \n" + "bis %[carry], %[t] , %[carry] \n" + "addl %[t2], %[carry], %[t2] \n" + : [hi]"=&r"(hi), [lo]"=&r"(lo), [t0]"+r"(t0), [t1]"+r"(t1), [t2]"+r"(t2), [carry]"+r"(carry), [t]"+r"(t) + : [A]"r"(A), [B]"r"(B) + : + ); +} + +// As above, but add twice the double-length result into the +// accumulator. +//#define MACC2(A, B, T0, T1, T2) \ +//do { \ +//ShouldNotReachHere(); \ +// } while(0) +inline void MACC2(unsigned long A, unsigned long B, unsigned long &t0, unsigned long &t1, unsigned long &t2) { + unsigned long hi, lo, carry = 0, t = 0; + __asm__ __volatile__( + "mull %[A], %[B] , %[lo] \n" + "umulh %[A], %[B] , %[hi] \n" + "addl %[t0], %[lo], %[t0] \n" + "cmpult %[t0], %[lo], %[carry] \n" + "addl %[t1], %[carry], %[t1] \n" + "cmpult %[t1], %[carry], %[t] \n" + "addl %[t1], %[hi], %[t1] \n" + "cmpult %[t1], %[hi], %[carry] \n" + "bis %[carry], %[t], %[carry] \n" + "addl %[t2], %[carry], %[t2] \n" + "addl %[t0], %[lo], %[t0] \n" + "cmpult %[t0], %[lo], %[carry] \n" + "addl %[t1], %[carry], %[t1] \n" + "cmpult %[t1], %[carry], %[t] \n" + "addl %[t1], %[hi], %[t1] \n" + "cmpult %[t1], %[hi], %[carry] \n" + "bis %[carry], %[t], %[carry] \n" + "addl %[t2], %[carry], %[t2] \n" + : [hi]"=&r"(hi), [lo]"=&r"(lo), [t0]"+r"(t0), [t1]"+r"(t1), [t2]"+r"(t2), [carry]"+r"(carry), [t]"+r"(t) + : [A]"r"(A), [B]"r"(B) + : + ); +} + +// Fast Montgomery multiplication. The derivation of the algorithm is +// in A Cryptographic Library for the Motorola DSP56000, +// Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237. + +static void __attribute__((noinline)) +montgomery_multiply(unsigned long a[], unsigned long b[], unsigned long n[], + unsigned long m[], unsigned long inv, int len) { + unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator + int i; + + assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply"); + //ShouldNotReachHere();//by jzy + for (i = 0; i < len; i++) { + int j; + for (j = 0; j < i; j++) { + MACC(a[j], b[i-j], t0, t1, t2); + MACC(m[j], n[i-j], t0, t1, t2); + } + MACC(a[i], b[0], t0, t1, t2); + m[i] = t0 * inv; + MACC(m[i], n[0], t0, t1, t2); + + assert(t0 == 0, "broken Montgomery multiply"); + + t0 = t1; t1 = t2; t2 = 0; + } + + for (i = len; i < 2*len; i++) { + int j; + for (j = i-len+1; j < len; j++) { + MACC(a[j], b[i-j], t0, t1, t2); + MACC(m[j], n[i-j], t0, t1, t2); + } + m[i-len] = t0; + t0 = t1; t1 = t2; t2 = 0; + } + + while (t0) + t0 = sub(m, n, t0, len); +} + +// Fast Montgomery squaring. This uses asymptotically 25% fewer +// multiplies so it should be up to 25% faster than Montgomery +// multiplication. However, its loop control is more complex and it +// may actually run slower on some machines. + +static void __attribute__((noinline)) +montgomery_square(unsigned long a[], unsigned long n[], + unsigned long m[], unsigned long inv, int len) { + unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator + int i; + + assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply"); + + //ShouldNotReachHere();//by jzy + for (i = 0; i < len; i++) { + int j; + int end = (i+1)/2; + for (j = 0; j < end; j++) { + MACC2(a[j], a[i-j], t0, t1, t2); + MACC(m[j], n[i-j], t0, t1, t2); + } + if ((i & 1) == 0) { + MACC(a[j], a[j], t0, t1, t2); + } + for (; j < i; j++) { + MACC(m[j], n[i-j], t0, t1, t2); + } + m[i] = t0 * inv; + MACC(m[i], n[0], t0, t1, t2); + + assert(t0 == 0, "broken Montgomery square"); + + t0 = t1; t1 = t2; t2 = 0; + } + + for (i = len; i < 2*len; i++) { + int start = i-len+1; + int end = start + (len - start)/2; + int j; + for (j = start; j < end; j++) { + MACC2(a[j], a[i-j], t0, t1, t2); + MACC(m[j], n[i-j], t0, t1, t2); + } + if ((i & 1) == 0) { + MACC(a[j], a[j], t0, t1, t2); + } + for (; j < len; j++) { + MACC(m[j], n[i-j], t0, t1, t2); + } + m[i-len] = t0; + t0 = t1; t1 = t2; t2 = 0; + } + + while (t0) + t0 = sub(m, n, t0, len); +} + +// Swap words in a longword. +static unsigned long swap(unsigned long x) { + return (x << 32) | (x >> 32); +} + +// Copy len longwords from s to d, word-swapping as we go. The +// destination array is reversed. +static void reverse_words(unsigned long *s, unsigned long *d, int len) { + d += len; + while(len-- > 0) { + d--; + *d = swap(*s); + s++; + } +} + +// The threshold at which squaring is advantageous was determined +// experimentally on an i7-3930K (Ivy Bridge) CPU @ 3.5GHz. +#define MONTGOMERY_SQUARING_THRESHOLD 64 + +void SharedRuntime::montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints, + jint len, jlong inv, + jint *m_ints) { + assert(len % 2 == 0, "array length in montgomery_multiply must be even"); + int longwords = len/2; + + // Make very sure we don't use so much space that the stack might + // overflow. 512 jints corresponds to an 16384-bit integer and + // will use here a total of 8k bytes of stack space. + int total_allocation = longwords * sizeof (unsigned long) * 4; + guarantee(total_allocation <= 8192, "must be"); + unsigned long *scratch = (unsigned long *)alloca(total_allocation); + + // Local scratch arrays + unsigned long + *a = scratch + 0 * longwords, + *b = scratch + 1 * longwords, + *n = scratch + 2 * longwords, + *m = scratch + 3 * longwords; + + reverse_words((unsigned long *)a_ints, a, longwords); + reverse_words((unsigned long *)b_ints, b, longwords); + reverse_words((unsigned long *)n_ints, n, longwords); + + ::montgomery_multiply(a, b, n, m, (unsigned long)inv, longwords); + + reverse_words(m, (unsigned long *)m_ints, longwords); +} + +void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints, + jint len, jlong inv, + jint *m_ints) { + assert(len % 2 == 0, "array length in montgomery_square must be even"); + int longwords = len/2; + + // Make very sure we don't use so much space that the stack might + // overflow. 512 jints corresponds to an 16384-bit integer and + // will use here a total of 6k bytes of stack space. + int total_allocation = longwords * sizeof (julong) * 3; + guarantee(total_allocation <= 8192, "must be"); + unsigned long *scratch = (unsigned long *)alloca(total_allocation); + + // Local scratch arrays + unsigned long + *a = scratch + 0 * longwords, + *n = scratch + 1 * longwords, + *m = scratch + 2 * longwords; + + reverse_words((unsigned long *)a_ints, a, longwords); + reverse_words((unsigned long *)n_ints, n, longwords); + + if (len >= MONTGOMERY_SQUARING_THRESHOLD) { + ::montgomery_square(a, n, m, (unsigned long)inv, longwords); + } else { + ::montgomery_multiply(a, a, n, m, (unsigned long)inv, longwords); + } + + reverse_words(m, (julong *)m_ints, longwords); +} + +#endif // WINDOWS + +#ifdef COMPILER2 +// This is here instead of runtime_x86_64.cpp because it uses SimpleRuntimeFrame +// +//------------------------------generate_exception_blob--------------------------- +// creates exception blob at the end +// Using exception blob, this code is jumped from a compiled method. +// (see emit_exception_handler in x86_64.ad file) +// +// Given an exception pc at a call we call into the runtime for the +// handler in this method. This handler might merely restore state +// (i.e. callee save registers) unwind the frame and jump to the +// exception handler for the nmethod if there is no Java level handler +// for the nmethod. +// +// This code is entered with a jmp. +// +// Arguments: +// rax: exception oop +// rdx: exception pc +// +// Results: +// rax: exception oop +// rdx: exception pc in caller or ??? +// destination: exception handler of caller +// +// Note: the exception pc MUST be at a call (precise debug information) +// Registers rax, rdx, rcx, rsi, rdi, r8-r11 are not callee saved. +// + +void OptoRuntime::generate_exception_blob() { + assert(!OptoRuntime::is_callee_saved_register(A2_num), ""); + assert(!OptoRuntime::is_callee_saved_register(A3_num), ""); + assert(!OptoRuntime::is_callee_saved_register(V0_num), ""); + + assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned"); + + // Allocate space for the code + ResourceMark rm; + // Setup code generation tools + CodeBuffer buffer("exception_blob", 2048, 1024); + MacroAssembler* masm = new MacroAssembler(&buffer); + + + address start = __ pc(); + + //__ stop("generate_exception_blob"); + + // Exception pc is 'return address' for stack walker + __ push(rdx); + __ subptr(esp, SimpleRuntimeFrame::return_off << LogBytesPerInt, esp); // Prolog + + // Save callee-saved registers. See x86_64.ad. + + // rbp is an implicitly saved callee saved register (i.e., the calling + // convention will save/restore it in the prolog/epilog). Other than that + // there are no callee save registers now that adapter frames are gone. + + __ stptr(rfp, Address(esp, SimpleRuntimeFrame::rfp_off << LogBytesPerInt)); + + // Store exception in Thread object. We cannot pass any arguments to the + // handle_exception call, since we do not want to make any assumption + // about the size of the frame where the exception happened in. + // c_rarg0 is either rdi (Linux) or rcx (Windows). + __ stptr(rax, Address(rthread, JavaThread::exception_oop_offset())); + __ stptr(rdx, Address(rthread, JavaThread::exception_pc_offset())); + + // This call does all the hard work. It checks if an exception handler + // exists in the method. + // If so, it returns the handler address. + // If not, it prepares for stack-unwinding, restoring the callee-save + // registers of the frame being removed. + // + // address OptoRuntime::handle_exception_C(JavaThread* thread) + + // At a method handle call, the stack may not be properly aligned + // when returning with an exception. + address the_pc = __ pc(); + __ set_last_Java_frame(esp, noreg, the_pc, rscratch3); + __ movl(c_rarg0, rthread); + //__ andptr(esp, -(StackAlignmentInBytes), esp); // Align stack + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, OptoRuntime::handle_exception_C))); + + // Set an oopmap for the call site. This oopmap will only be used if we + // are unwinding the stack. Hence, all locations will be dead. + // Callee-saved registers will be the same as the frame above (i.e., + // handle_exception_stub), since they were restored when we got the + // exception. + + OopMapSet* oop_maps = new OopMapSet(); + + oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0));//TODO:here need to modify offset? jzy + + __ reset_last_Java_frame(false); + + // Restore callee-saved registers + + // rbp is an implicitly saved callee-saved register (i.e., the calling + // convention will save restore it in prolog/epilog) Other than that + // there are no callee save registers now that adapter frames are gone. + + __ ldptr(rfp, Address(esp, SimpleRuntimeFrame::rfp_off << LogBytesPerInt)); + + __ addptr(esp, SimpleRuntimeFrame::return_off << LogBytesPerInt, esp); // Epilog + __ pop(rdx); // No need for exception pc anymore + + // rax: exception handler + + // We have a handler in rax (could be deopt blob). + __ movl(rscratch3, rax); + + // Get the exception oop + __ ldptr(rax, Address(rthread, JavaThread::exception_oop_offset())); + // Get the exception pc in case we are deoptimized + __ ldptr(rdx, Address(rthread, JavaThread::exception_pc_offset())); +#ifdef ASSERT + __ stptr(R0, Address(rthread, JavaThread::exception_handler_pc_offset())); + __ stptr(R0, Address(rthread, JavaThread::exception_pc_offset())); +#endif + // Clear the exception oop so GC no longer processes it as a root. + __ stptr(R0, Address(rthread, JavaThread::exception_oop_offset())); + + // rax: exception oop + // rscratch3: exception handler + // rdx: exception pc + // Jump to handler + + __ jmp(rscratch3); + + // Make sure all code is generated + masm->flush(); + + // Set exception blob + _exception_blob = ExceptionBlob::create(&buffer, oop_maps, SimpleRuntimeFrame::framesize >> 1); +} +#endif // COMPILER2 + +void SharedRuntime::compute_move_order(const BasicType* in_sig_bt, + int total_in_args, const VMRegPair* in_regs, + int total_out_args, VMRegPair* out_regs, + GrowableArray& arg_order, + VMRegPair tmp_vmreg) { + ComputeMoveOrder order(total_in_args, in_regs, + total_out_args, out_regs, + in_sig_bt, arg_order, tmp_vmreg); +} diff --git a/src/hotspot/cpu/sw64/stubGenerator_sw64.cpp b/src/hotspot/cpu/sw64/stubGenerator_sw64.cpp new file mode 100644 index 00000000000..82f48a02de8 --- /dev/null +++ b/src/hotspot/cpu/sw64/stubGenerator_sw64.cpp @@ -0,0 +1,6021 @@ +/* + * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2019, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "compiler/oopMap.hpp" +#include "gc/shared/barrierSet.hpp" +#include "gc/shared/barrierSetAssembler.hpp" +#include "gc/shared/barrierSetNMethod.hpp" +#include "gc/shared/gc_globals.hpp" +#include "interpreter/interpreter.hpp" +#include "memory/universe.hpp" +#include "nativeInst_sw64.hpp" +#include "oops/instanceOop.hpp" +#include "oops/method.hpp" +#include "oops/objArrayKlass.hpp" +#include "oops/oop.inline.hpp" +#include "oops/klass.inline.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/arguments.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/handles.inline.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubCodeGenerator.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/thread.inline.hpp" +#include "runtime/vm_version.hpp" +#include "utilities/align.hpp" +#ifdef COMPILER2 +#include "opto/runtime.hpp" +#endif +#if INCLUDE_JVMCI +#include "jvmci/jvmci_globals.hpp" +#endif + +// Declaration and definition of StubGenerator (no .hpp file). +// For a more detailed description of the stub routine structure +// see the comment in stubRoutines.hpp + +#undef __ +#define __ _masm-> +#define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8) +//#define a__ ((Assembler*)_masm)-> + +#ifdef PRODUCT +#define BLOCK_COMMENT(str) /* nothing */ +#else +#define BLOCK_COMMENT(str) { char line[1024]; sprintf(line,"%s:%s:%d",str,__FILE__, __LINE__); __ block_comment(line);} +#endif + +#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") +const int MXCSR_MASK = 0xFFC0; // Mask out any pending exceptions + +// Stub Code definitions + +class StubGenerator: public StubCodeGenerator { + private: + +#ifdef PRODUCT +#define inc_counter_np(counter) ((void)0) +#else + void inc_counter_np_(int& counter) { + // This can destroy rscratch1 if counter is far from the code cache + __ incrementw(ExternalAddress((address)&counter)); + } +#define inc_counter_np(counter) \ + BLOCK_COMMENT("inc_counter " #counter); \ + inc_counter_np_(counter); +#endif + + // Call stubs are used to call Java from C + // + // Linux Arguments: + // c_rarg0: call wrapper address address + // c_rarg1: result address + // c_rarg2: result type BasicType + // c_rarg3: method Method* + // c_rarg4: (interpreter) entry point address + // c_rarg5: parameters intptr_t* + // 16(rfp): parameter size (in words) int + // 24(rfp): thread Thread* + // + // [ return_from_Java ] <--- rsp + // [ argument word n ] + // ... + // -13 [ argument word 1 ] + // -12 [ saved S5 ] <--- rsp_after_call + // -11 [ saved S4 ] + // -10 [ saved S3 ] + // -9 [ saved S2 ] + // -8 [ saved S1 ] + // -7 [ saved S0 ] + // -6 [ call wrapper ] + // -5 [ result ] + // -4 [ result type ] + // -3 [ method ] + // -2 [ entry point ] + // -1 [ parameters ] + // 0 [ saved rfp ] <--- rfp + // 1 [ return address ] + // 2 [ parameter size ] + // 3 [ thread ] + // + + // Call stub stack layout word offsets from rfp + enum call_stub_layout { + rsp_after_call_off = -20, + F9_off = rsp_after_call_off, + F8_off = -19, + F7_off = -18, + F6_off = -17, + F5_off = -16, + F4_off = -15, + F3_off = -14, + F2_off = -13, + S5_off = -12, + S4_off = -11, + S3_off = -10, + S2_off = -9, + S1_off = -8, + S0_off = -7, + call_wrapper_off = -6, + result_off = -5, + result_type_off = -4, + method_off = -3, + entry_point_off = -2, + parameters_off = -1, + rfp_off = 0, + retaddr_off = 1, + parameter_size_off = 2, + thread_off = 3 + }; + + address generate_call_stub(address& return_address) { + StubCodeMark mark(this, "StubRoutines", "call_stub"); + address start = __ pc(); + Register rax = V0; + + // same as in generate_catch_exception()! + const Address rsp_after_call(rfp, rsp_after_call_off * wordSize); + + const Address call_wrapper (rfp, call_wrapper_off * wordSize); + const Address result (rfp, result_off * wordSize); + const Address result_type (rfp, result_type_off * wordSize); + const Address method (rfp, method_off * wordSize); + const Address entry_point (rfp, entry_point_off * wordSize); + const Address parameters (rfp, parameters_off * wordSize); + const Address parameter_size(rfp, parameter_size_off * wordSize); + + // same as in generate_catch_exception()! + const Address thread (rfp, thread_off * wordSize); + + // call Java method from C function, by LIX20170503 + __ setfpec1(); + const Address S5_save(rfp, S5_off * wordSize); + const Address S4_save(rfp, S4_off * wordSize); + const Address S3_save(rfp, S3_off * wordSize); + const Address S2_save(rfp, S2_off * wordSize); + const Address S1_save(rfp, S1_off * wordSize); + const Address S0_save(rfp, S0_off * wordSize); + + // stub code + __ enter(); + __ subptr(esp, -rsp_after_call_off * wordSize, esp); + + __ stptr(c_rarg5, parameters); + __ stptr(c_rarg4, entry_point); + __ stptr(c_rarg3, method); + __ stptr(c_rarg2, result_type); + __ stptr(c_rarg1, result); + __ stptr(c_rarg0, call_wrapper); + + // save regs belonging to calling function + __ stptr(S5, S5_save); + __ stptr(S4, S4_save); + __ stptr(S3, S3_save); + __ stptr(S2, S2_save); + __ stptr(S1, S1_save); + __ stptr(S0, S0_save); + + __ fstd(f9, F9_off * wordSize, rfp); + __ fstd(f8, F8_off * wordSize, rfp); + __ fstd(f7, F7_off * wordSize, rfp); + __ fstd(f6, F6_off * wordSize, rfp); + __ fstd(f5, F5_off * wordSize, rfp); + __ fstd(f4, F4_off * wordSize, rfp); + __ fstd(f3, F3_off * wordSize, rfp); + __ fstd(f2, F2_off * wordSize, rfp); + __ ldptr(rthread, thread); + + __ reinit_heapbase(); + +#ifdef ASSERT + // make sure we have no pending exceptions + { + Label L; + __ cmpptr(Address(rthread, Thread::pending_exception_offset()), R0); + __ jcc(Assembler::equal, L); + __ stop("StubRoutines::call_stub: entered with pending exception"); + __ bind(L); + } +#endif + + // pass parameters if any + BLOCK_COMMENT("pass parameters if any"); + Label parameters_done; + __ ldws(c_rarg3, parameter_size); + __ testw(c_rarg3, c_rarg3); + __ jcc(Assembler::zero, parameters_done); + + Label loop; + __ ldptr(c_rarg2, parameters); // parameter pointer + __ movw(c_rarg1, c_rarg3); // parameter counter is in c_rarg1 + __ BIND(loop); + __ ldptr(rax, Address(c_rarg2, 0));// get parameter + __ addptr(c_rarg2, wordSize, c_rarg2); // advance to next parameter + __ decrementw(c_rarg1); // decrement counter + __ push(rax); // pass parameter + __ jcc(Assembler::notZero, loop, c_rarg1); + + // call Java function + __ BIND(parameters_done); + __ ldptr(rmethod, method); // get Method* + __ ldptr(c_rarg1, entry_point); // get entry_point + __ movl(rsender, esp); //set sender sp + BLOCK_COMMENT("call Java function"); + __ call(c_rarg1, return_address); //c_rarg4 is (interpreter) entry point + + // store result depending on type (everything that is not + // T_OBJECT, T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT) + __ ldptr(c_rarg0, result); + Label is_long, is_float, is_double, exit; + __ ldws(c_rarg1, result_type); + __ cmpw(c_rarg1, T_OBJECT); + __ jcc(Assembler::equal, is_long); + __ cmpw(c_rarg1, T_LONG); + __ jcc(Assembler::equal, is_long); + __ cmpw(c_rarg1, T_FLOAT); + __ jcc(Assembler::equal, is_float); + __ cmpw(c_rarg1, T_DOUBLE); + __ jcc(Assembler::equal, is_double); + + // handle T_INT case + __ stw(rax, Address(c_rarg0, 0)); + + __ BIND(exit); + + // pop parameters + __ lea(esp, rsp_after_call); + +#ifdef ASSERT + // verify that threads correspond + { + Label L1, L2, L3; + Register rbx = c_rarg1; + __ cmpptr(rthread, thread); + __ jcc(Assembler::equal, L1); + __ stop("StubRoutines::call_stub: rthread is corrupted"); + __ bind(L1); + __ get_thread(rbx); + __ cmpptr(rthread, thread); + __ jcc(Assembler::equal, L2); + __ stop("StubRoutines::call_stub: rthread is modified by call"); + __ bind(L2); + __ cmpptr(rthread, rbx); + __ jcc(Assembler::equal, L3); + __ stop("StubRoutines::call_stub: threads must correspond"); + __ bind(L3); + } +#endif + + // restore regs belonging to calling function + __ ldptr(S5, S5_save); + __ ldptr(S4, S4_save); + __ ldptr(S3, S3_save); + __ ldptr(S2, S2_save); + __ ldptr(S1, S1_save); + __ ldptr(S0, S0_save); + + __ fldd(f9, F9_off * wordSize, rfp); + __ fldd(f8, F8_off * wordSize, rfp); + __ fldd(f7, F7_off * wordSize, rfp); + __ fldd(f6, F6_off * wordSize, rfp); + __ fldd(f5, F5_off * wordSize, rfp); + __ fldd(f4, F4_off * wordSize, rfp); + __ fldd(f3, F3_off * wordSize, rfp); + __ fldd(f2, F2_off * wordSize, rfp); + + // restore rsp + __ addptr(esp, -rsp_after_call_off * wordSize, esp); + + __ leave(); + // return + __ ret(); + + // handle return types different from T_INT + __ BIND(is_long); + __ stl(rax, Address(c_rarg0, 0)); + __ jmp(exit); + + __ BIND(is_float); + __ fsts(f0, Address(c_rarg0, 0)); + __ jmp(exit); + + __ BIND(is_double); + __ fstd(f0, Address(c_rarg0, 0)); + __ jmp(exit); + + return start; + } + + // Return point for a Java call if there's an exception thrown in + // Java code. The exception is caught and transformed into a + // pending exception stored in JavaThread that can be tested from + // within the VM. + // + // Note: Usually the parameters are removed by the callee. In case + // of an exception crossing an activation frame boundary, that is + // not the case if the callee is compiled code => need to setup the + // rsp. + // + // rax: exception oop + + address generate_catch_exception() { + StubCodeMark mark(this, "StubRoutines", "catch_exception"); + address start = __ pc(); + //Register rbx = c_rarg2; + //Register rax = V0; + + // same as in generate_call_stub(): + const Address rsp_after_call(rfp, rsp_after_call_off * wordSize); + const Address thread (rfp, thread_off * wordSize); + +#ifdef ASSERT + // verify that threads correspond + { + Label L1, L2, L3; + __ cmpptr(rthread, thread); + __ jcc(Assembler::equal, L1); + __ stop("StubRoutines::catch_exception: rthread is corrupted"); + __ bind(L1); + __ get_thread(rbx); + __ cmpptr(rthread, thread); + __ jcc(Assembler::equal, L2); + __ stop("StubRoutines::catch_exception: rthread is modified by call"); + __ bind(L2); + __ cmpptr(rthread, rbx); + __ jcc(Assembler::equal, L3); + __ stop("StubRoutines::catch_exception: threads must correspond"); + __ bind(L3); + } +#endif + + // set pending exception + __ verify_oop(rax); + + __ stptr(rax, Address(rthread, Thread::pending_exception_offset())); + __ lea(rscratch3, ExternalAddress((address)__FILE__)); + __ stptr(rscratch3, Address(rthread, Thread::exception_file_offset())); + __ stw((int) __LINE__, Address(rthread, Thread::exception_line_offset())); + + // complete return to VM + assert(StubRoutines::_call_stub_return_address != NULL, + "_call_stub_return_address must have been generated before"); + __ jump(RuntimeAddress(StubRoutines::_call_stub_return_address)); + + return start; + } + + // Continuation point for runtime calls returning with a pending + // exception. The pending exception check happened in the runtime + // or native call stub. The pending exception in Thread is + // converted into a Java-level exception. + // + // Contract with Java-level exception handlers: + // rax: exception + // rdx: throwing pc + // + // NOTE: At entry of this stub, exception-pc must be on stack !! + + address generate_forward_exception() { + StubCodeMark mark(this, "StubRoutines", "forward exception"); + address start = __ pc(); + //__ stop("not check:jzy"); + Register rax = V0; + Register rbx = rmethod; + Register rdx = c_rarg2; + // Upon entry, LR points to the return address returning into + // Java (interpreted or compiled) code; i.e., the return address + // becomes the throwing pc. + // + // Arguments pushed before the runtime call are still on the stack + // but the exception handler will reset the stack pointer -> + // ignore them. A potential result in registers can be ignored as + // well. + +#ifdef ASSERT + // make sure this code is only executed if there is a pending exception + { + Label L; + __ cmpptr(Address(rthread, Thread::pending_exception_offset()), R0); + __ jcc(Assembler::notEqual, L); + __ stop("StubRoutines::forward exception: no pending exception (1)"); + __ bind(L); + } +#endif + + // compute exception handler into rbx + + // call the VM to find the handler address associated with the + // caller address. pass thread in a0 and caller pc (ret address) + // in a1. n.b. the caller pc is in RA, unlike x86 where it is on + // the stack. + __ movl(rheapbase, RA); + __ movl(c_rarg1, RA); + __ block_comment("call exception_handler_for_return_address"); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, + SharedRuntime::exception_handler_for_return_address), + rthread, c_rarg1); + // we should not really care that RA is no longer the callee + // address. we saved the value the handler needs in rheapbase so we can + // just copy it to rbx. however, the C2 handler will push its own + // frame and then calls into the VM and the VM code asserts that + // the PC for the frame above the handler belongs to a compiled + // Java method. So, we restore lr here to satisfy that assert. + __ movl(RA, rheapbase); + __ reinit_heapbase(); //reset rheapbase + + // setup rax & rdx, remove return address & clear pending exception + __ movl(rbx, rax); + __ movl(rdx, RA); + __ ldptr(rax, Address(rthread, Thread::pending_exception_offset())); + __ stptr(R0, Address(rthread, Thread::pending_exception_offset())); + +#ifdef ASSERT + // make sure exception is set + { + Label L; + __ jcc(Assembler::notEqual, L, rax); + __ stop("StubRoutines::forward exception: no pending exception (2)"); + __ bind(L); + } +#endif + + // continue at exception handler (return address removed) + // rax: exception + // rbx: exception handler + // rdx: throwing pc + __ verify_oop(rax); + __ jmp(rbx); + + return start; + } + + // Support for intptr_t OrderAccess::fence() + // + // Arguments : + // + // Result: + address generate_orderaccess_fence() { + StubCodeMark mark(this, "StubRoutines", "orderaccess_fence"); + address start = __ pc();__ stop("unimplement generate_orderaccess_fence"); + __ memb(); + __ ret_sw(); + + return start; + } + + // Support for intptr_t get_previous_sp() + // + // This routine is used to find the previous stack pointer for the + // caller. + address generate_get_previous_sp() { + StubCodeMark mark(this, "StubRoutines", "get_previous_sp"); + address start = __ pc();__ stop("unimplement generate_get_previous_sp"); + + __ movl(V0, RA); + //__ addptr(V0, 8, V0); // return address is at the top of the stack. + __ ret_sw(); + + return start; + } + + address generate_f2i_fixup() { + StubCodeMark mark(this, "StubRoutines", "f2i_fixup"); + Address inout(esp, 5 * wordSize); // return address + 4 saves + + address start = __ pc(); + + Label L; + ShouldNotReachHere(); +// __ push(rax); +// __ push(c_rarg3); +// __ push(c_rarg2); +// __ push(c_rarg1); +// +// __ movl(rax, 0x7f800000); +// __ xorl(c_rarg3, c_rarg3); +// __ movl(c_rarg2, inout); +// __ movl(c_rarg1, c_rarg2); +// __ andl(c_rarg1, 0x7fffffff); +// __ cmpl(rax, c_rarg1); // NaN? -> 0 +// __ jcc(Assembler::negative, L); +// __ testl(c_rarg2, c_rarg2); // signed ? min_jint : max_jint +// __ movl(c_rarg3, 0x80000000); +// __ movl(rax, 0x7fffffff); +// __ cmovl(Assembler::positive, c_rarg3, rax); +// +// __ bind(L); +// __ movptr(inout, c_rarg3); +// +// __ pop(c_rarg1); +// __ pop(c_rarg2); +// __ pop(c_rarg3); +// __ pop(rax); +// +// __ ret(0); + + return start; + } + + address generate_f2l_fixup() { + StubCodeMark mark(this, "StubRoutines", "f2l_fixup"); + Address inout(esp, 5 * wordSize); // return address + 4 saves + address start = __ pc(); + ShouldNotReachHere(); + Label L; + +// __ push(rax); +// __ push(c_rarg3); +// __ push(c_rarg2); +// __ push(c_rarg1); +// +// __ movl(rax, 0x7f800000); +// __ xorl(c_rarg3, c_rarg3); +// __ movl(c_rarg2, inout); +// __ movl(c_rarg1, c_rarg2); +// __ andl(c_rarg1, 0x7fffffff); +// __ cmpl(rax, c_rarg1); // NaN? -> 0 +// __ jcc(Assembler::negative, L); +// __ testl(c_rarg2, c_rarg2); // signed ? min_jlong : max_jlong +// __ mov64(c_rarg3, 0x8000000000000000); +// __ mov64(rax, 0x7fffffffffffffff); +// __ cmov(Assembler::positive, c_rarg3, rax); +// +// __ bind(L); +// __ movptr(inout, c_rarg3); +// +// __ pop(c_rarg1); +// __ pop(c_rarg2); +// __ pop(c_rarg3); +// __ pop(rax); +// +// __ ret(0); + + return start; + } + + address generate_d2i_fixup() { + StubCodeMark mark(this, "StubRoutines", "d2i_fixup"); + Address inout(esp, 6 * wordSize); // return address + 5 saves + + address start = __ pc(); + ShouldNotReachHere(); +// Label L; +// +// __ push(rax); +// __ push(c_rarg3); +// __ push(c_rarg2); +// __ push(c_rarg1); +// __ push(c_rarg0); +// +// __ movl(rax, 0x7ff00000); +// __ movq(c_rarg2, inout); +// __ movl(c_rarg3, c_rarg2); +// __ mov(c_rarg1, c_rarg2); +// __ mov(c_rarg0, c_rarg2); +// __ negl(c_rarg3); +// __ shrptr(c_rarg1, 0x20); +// __ orl(c_rarg3, c_rarg2); +// __ andl(c_rarg1, 0x7fffffff); +// __ xorl(c_rarg2, c_rarg2); +// __ shrl(c_rarg3, 0x1f); +// __ orl(c_rarg1, c_rarg3); +// __ cmpl(rax, c_rarg1); +// __ jcc(Assembler::negative, L); // NaN -> 0 +// __ testptr(c_rarg0, c_rarg0); // signed ? min_jint : max_jint +// __ movl(c_rarg2, 0x80000000); +// __ movl(rax, 0x7fffffff); +// __ cmov(Assembler::positive, c_rarg2, rax); +// +// __ bind(L); +// __ movptr(inout, c_rarg2); +// +// __ pop(c_rarg0); +// __ pop(c_rarg1); +// __ pop(c_rarg2); +// __ pop(c_rarg3); +// __ pop(rax); +// +// __ ret(0); + + return start; + } + + address generate_d2l_fixup() { + StubCodeMark mark(this, "StubRoutines", "d2l_fixup"); + Address inout(esp, 6 * wordSize); // return address + 5 saves + + address start = __ pc(); + ShouldNotReachHere(); + Label L; + +// __ push(rax); +// __ push(c_rarg3); +// __ push(c_rarg2); +// __ push(c_rarg1); +// __ push(c_rarg0); +// +// __ movl(rax, 0x7ff00000); +// __ movq(c_rarg2, inout); +// __ movl(c_rarg3, c_rarg2); +// __ mov(c_rarg1, c_rarg2); +// __ mov(c_rarg0, c_rarg2); +// __ negl(c_rarg3); +// __ shrptr(c_rarg1, 0x20); +// __ orl(c_rarg3, c_rarg2); +// __ andl(c_rarg1, 0x7fffffff); +// __ xorl(c_rarg2, c_rarg2); +// __ shrl(c_rarg3, 0x1f); +// __ orl(c_rarg1, c_rarg3); +// __ cmpl(rax, c_rarg1); +// __ jcc(Assembler::negative, L); // NaN -> 0 +// __ testq(c_rarg0, c_rarg0); // signed ? min_jlong : max_jlong +// __ mov64(c_rarg2, 0x8000000000000000); +// __ mov64(rax, 0x7fffffffffffffff); +// __ cmovq(Assembler::positive, c_rarg2, rax); +// +// __ bind(L); +// __ movq(inout, c_rarg2); +// +// __ pop(c_rarg0); +// __ pop(c_rarg1); +// __ pop(c_rarg2); +// __ pop(c_rarg3); +// __ pop(rax); +// +// __ ret(0); + + return start; + } + + address generate_fp_mask(const char *stub_name, int64_t mask) { +// __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", stub_name); + address start = __ pc(); + ShouldNotReachHere(); + +// __ emit_data64( mask, relocInfo::none ); +// __ emit_data64( mask, relocInfo::none ); + + return start; + } + + // Non-destructive plausibility checks for oops + // + // Arguments: + // all args on stack! + // + // Stack after saving c_rarg3: + // [tos + 0]: saved c_rarg3 + // [tos + 1]: saved c_rarg2 + // [tos + 2]: saved rheapbase (several TemplateTable methods use it) + // [tos + 3]: rscratch1 + // [tos + 4]: last RA + // * [tos + 5]: error message (char*) + // * [tos + 6]: object to verify (oop) + // * [tos + 7]: saved rax - saved by caller and bashed + // * = popped on exit + address generate_verify_oop() { + StubCodeMark mark(this, "StubRoutines", "verify_oop"); + address start = __ pc(); + Register rax = V0; + + Label exit, error; + __ push(rscratch1); + __ push(rheapbase); + + // save c_rarg2 and c_rarg3 + __ push(c_rarg2); + __ push(c_rarg3); + + __ incrementw(ExternalAddress((address) StubRoutines::verify_oop_count_addr()), 1, c_rarg2, c_rarg3); + + enum { + // After previous pushes. + oop_to_verify = 6 * wordSize, + saved_rax = 7 * wordSize, + + // Before the call to MacroAssembler::debug(), see below. + error_msg = 25 * wordSize + }; + + // get object + __ ldptr(rax, Address(esp, oop_to_verify)); + + // make sure object is 'reasonable' + __ jcc(Assembler::zero, exit, rax); // if obj is NULL it is OK + +#if INCLUDE_ZGC + if (UseZGC) { + // Check if metadata bits indicate a bad oop + __ lea(rscratch3, Address(rthread, ZThreadLocalData::address_bad_mask_offset())); + __ testptr(rax, rscratch3); + __ jcc(Assembler::notZero, error); + } +#endif + + // Check if the oop is in the right area of memory + __ movl(c_rarg2, rax); + __ mov_immediate64(c_rarg3, (intptr_t) Universe::verify_oop_mask()); + __ andptr(c_rarg2, c_rarg3, c_rarg2); + __ mov_immediate64(c_rarg3, (intptr_t) Universe::verify_oop_bits()); + __ cmpptr(c_rarg2, c_rarg3); + __ jcc(Assembler::notZero, error); + + // make sure klass is 'reasonable', which is not zero. + __ load_klass(rax, rax); // get klass + __ jcc(Assembler::zero, error, rax); // if klass is NULL it is broken + + // return if everything seems ok + __ bind(exit); + __ pop(c_rarg3); // restore c_rarg3 + __ pop(c_rarg2); // restore c_rarg2 + __ pop(rheapbase); // restore rheapbase + __ pop(rscratch1); // restore rscratch1 + + __ ret(); + + // handle errors + __ bind(error); + __ ldptr(rax, Address(esp, saved_rax)); // get saved rax back + __ pop(c_rarg3); // get saved c_rarg3 back + __ pop(c_rarg2); // get saved c_rarg2 back + __ pop(rheapbase); // get saved r12 back + __ pop(rscratch1); // restore rscratch1 + // will be ignored + __ push(RA); // sw need to save RA which need by ret after calling of debug64 + __ pushad(); // push registers + // (rip is already + // already pushed) + // debug(char* msg, int64_t pc, int64_t regs[]) + // We've popped the registers we'd saved (c_rarg3, c_rarg2 and flags), and + // pushed all the registers, so now the stack looks like: + // [tos + 0] 24 saved registers + // [tos + 24] current_RA + // [tos + 25] last RA + // * [tos + 26] error message (char*) + // * [tos + 27] object to verify (oop) + // * [tos + 28] saved rax - saved by caller and bashed + // * = popped on exit + + __ ldptr(c_rarg0, Address(esp, error_msg)); // pass address of error message + __ movl(c_rarg1, RA); // pass return address + __ movl(c_rarg2, esp); // pass address of regs on stack + __ movl(rheapbase, esp); // remember rsp + //__ subptr(esp, frame::arg_reg_save_area_bytes, esp); // windows + __ andptr(esp, -16, esp); // align stack as required by ABI + BLOCK_COMMENT("call MacroAssembler::debug"); + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug64))); + __ movl(esp, rheapbase); // restore rsp + __ popad(); // pop registers (includes rheapbase) + __ pop(RA); + __ ret(); + + return start; + } + + // + // Verify that a register contains clean 32-bits positive value + // (high 32-bits are 0) so it could be used in 64-bits shifts. + // + // Input: + // Rint - 32-bits value + // Rtmp - scratch + // + void assert_clean_int(Register Rint, Register Rtmp) { +#ifdef ASSERT + Label L; + assert_different_registers(Rtmp, Rint); + __ movws(Rtmp, Rint); + __ cmpl_raw(Rtmp, Rint); + __ jcc(Assembler::equal, L); + __ stop("high 32-bits of int value are not 0"); + __ bind(L); +#endif + } + + // Generate overlap test for array copy stubs + // + // Input: + // c_rarg0 - from + // c_rarg1 - to + // c_rarg2 - element count + // + // Output: + // rax - &from[element count - 1] + // + void array_overlap_test(address no_overlap_target, Address::ScaleFactor sf) {SCOPEMARK_NAME(array_overlap_test, _masm) + assert(no_overlap_target != NULL, "must be generated"); + array_overlap_test(no_overlap_target, NULL, sf); + } + void array_overlap_test(Label& L_no_overlap, Address::ScaleFactor sf) { + array_overlap_test(NULL, &L_no_overlap, sf); + } + void array_overlap_test(address no_overlap_target, Label* NOLp, Address::ScaleFactor sf) { + const Register from = c_rarg0; + const Register to = c_rarg1; + const Register count = c_rarg2; + const Register end_from = V0; + + __ cmpptr(to, from); + __ lea(end_from, Address(from, count, sf, 0)); + if (NOLp == NULL) { + ExternalAddress no_overlap(no_overlap_target); + __ jump_cc(Assembler::belowEqual, no_overlap); + __ cmpptr(to, end_from); + __ jump_cc(Assembler::aboveEqual, no_overlap); + } else { + __ jcc(Assembler::belowEqual, (*NOLp)); + __ cmpptr(to, end_from); + __ jcc(Assembler::aboveEqual, (*NOLp)); + } + } + + void array_overlap_test(address no_overlap_target, int log2_elem_size) {ShouldNotReachHere(); + int elem_size = 1 << log2_elem_size; + Address::ScaleFactor sf = Address::times_1; + + switch (log2_elem_size) { + case 0: sf = Address::times_1; break; + case 1: sf = Address::times_2; break; + case 2: sf = Address::times_4; break; + case 3: sf = Address::times_8; break; + } + + ExternalAddress no_overlap(no_overlap_target); + + __ slll(A2, sf, AT); + __ addl(AT, A0, AT); + __ addptr(AT, -elem_size, T12); + __ cmpl_raw(A1, A0); + __ jump_cc(Assembler::lessEqual, no_overlap); + __ cmpl_raw(A1, T12); + __ jump_cc(Assembler::greater, no_overlap); + + // If A0 = 0xf... and A1 = 0x0..., than goto no_overlap_target + Label L; + __ jcc(Assembler::greaterEqual, L, A0); + __ jump_cc(Assembler::greater, no_overlap, A1); + __ bind(L); + + } + + // Shuffle first three arg regs on Windows into Linux/Solaris locations. + // + // Outputs: + // rdi - rcx + // rsi - rdx + // rdx - r8 + // rcx - r9 + // + // Registers r9 and r10 are used to save rdi and rsi on Windows, which latter + // are non-volatile. r9 and r10 should not be used by the caller. + // + DEBUG_ONLY(bool regs_in_thread;) + + void setup_arg_regs(int nargs = 3) { + assert(nargs == 3 || nargs == 4, "else fix"); + assert(c_rarg0 == A0 && c_rarg1 == A1 && c_rarg2 == A2 && c_rarg3 == A3, + "unexpected argument registers"); + DEBUG_ONLY(regs_in_thread = false;) + } + + void restore_arg_regs() { + } + + // This is used in places where r10 is a scratch register, and can + // be adapted if r9 is needed also. + void setup_arg_regs_using_thread() { + const Register saved_r15 = r9; + assert(c_rarg0 == rdi && c_rarg1 == rsi && c_rarg2 == rdx && c_rarg3 == rcx, + "unexpected argument registers"); + DEBUG_ONLY(regs_in_thread = true;) + } + + void restore_arg_regs_using_thread() { + assert(regs_in_thread, "wrong call to restore_arg_regs"); + const Register saved_r15 = r9; + } + + // Copy big chunks forward + // + // Inputs: + // end_from - source arrays end address + // end_to - destination array end address + // qword_count - 64-bits element count, negative + // to - scratch + // L_copy_bytes - entry label + // L_copy_8_bytes - exit label + // + void copy_bytes_forward(Register end_from, Register end_to, + Register qword_count, Register to, + Label& L_copy_bytes, Label& L_copy_8_bytes) { + ShouldNotReachHere(); +// DEBUG_ONLY(__ stop("enter at entry label, not here")); +// Label L_loop; +// __ align(OptoLoopAlignment); +// if (UseUnalignedLoadStores) { +// Label L_end; +// // Copy 64-bytes per iteration +// if (UseAVX > 2) { +// Label L_loop_avx512, L_loop_avx2, L_32_byte_head, L_above_threshold, L_below_threshold; +// +// __ BIND(L_copy_bytes); +// __ cmpptr(qword_count, (-1 * AVX3Threshold / 8)); +// __ jccb(Assembler::less, L_above_threshold); +// __ jmpb(L_below_threshold); +// +// __ bind(L_loop_avx512); +// __ evmovdqul(xmm0, Address(end_from, qword_count, Address::times_8, -56), Assembler::AVX_512bit); +// __ evmovdqul(Address(end_to, qword_count, Address::times_8, -56), xmm0, Assembler::AVX_512bit); +// __ bind(L_above_threshold); +// __ addptr(qword_count, 8); +// __ jcc(Assembler::lessEqual, L_loop_avx512); +// __ jmpb(L_32_byte_head); +// +// __ bind(L_loop_avx2); +// __ vmovdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56)); +// __ vmovdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0); +// __ vmovdqu(xmm1, Address(end_from, qword_count, Address::times_8, -24)); +// __ vmovdqu(Address(end_to, qword_count, Address::times_8, -24), xmm1); +// __ bind(L_below_threshold); +// __ addptr(qword_count, 8); +// __ jcc(Assembler::lessEqual, L_loop_avx2); +// +// __ bind(L_32_byte_head); +// __ subptr(qword_count, 4); // sub(8) and add(4) +// __ jccb(Assembler::greater, L_end); +// } else { +// __ BIND(L_loop); +// if (UseAVX == 2) { +// __ vmovdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56)); +// __ vmovdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0); +// __ vmovdqu(xmm1, Address(end_from, qword_count, Address::times_8, -24)); +// __ vmovdqu(Address(end_to, qword_count, Address::times_8, -24), xmm1); +// } else { +// __ movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56)); +// __ movdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0); +// __ movdqu(xmm1, Address(end_from, qword_count, Address::times_8, -40)); +// __ movdqu(Address(end_to, qword_count, Address::times_8, -40), xmm1); +// __ movdqu(xmm2, Address(end_from, qword_count, Address::times_8, -24)); +// __ movdqu(Address(end_to, qword_count, Address::times_8, -24), xmm2); +// __ movdqu(xmm3, Address(end_from, qword_count, Address::times_8, - 8)); +// __ movdqu(Address(end_to, qword_count, Address::times_8, - 8), xmm3); +// } +// +// __ BIND(L_copy_bytes); +// __ addptr(qword_count, 8); +// __ jcc(Assembler::lessEqual, L_loop); +// __ subptr(qword_count, 4); // sub(8) and add(4) +// __ jccb(Assembler::greater, L_end); +// } +// // Copy trailing 32 bytes +// if (UseAVX >= 2) { +// __ vmovdqu(xmm0, Address(end_from, qword_count, Address::times_8, -24)); +// __ vmovdqu(Address(end_to, qword_count, Address::times_8, -24), xmm0); +// } else { +// __ movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -24)); +// __ movdqu(Address(end_to, qword_count, Address::times_8, -24), xmm0); +// __ movdqu(xmm1, Address(end_from, qword_count, Address::times_8, - 8)); +// __ movdqu(Address(end_to, qword_count, Address::times_8, - 8), xmm1); +// } +// __ addptr(qword_count, 4); +// __ BIND(L_end); +// if (UseAVX >= 2) { +// // clean upper bits of YMM registers +// __ vpxor(xmm0, xmm0); +// __ vpxor(xmm1, xmm1); +// } +// } else { +// // Copy 32-bytes per iteration +// __ BIND(L_loop); +// __ movq(to, Address(end_from, qword_count, Address::times_8, -24)); +// __ movq(Address(end_to, qword_count, Address::times_8, -24), to); +// __ movq(to, Address(end_from, qword_count, Address::times_8, -16)); +// __ movq(Address(end_to, qword_count, Address::times_8, -16), to); +// __ movq(to, Address(end_from, qword_count, Address::times_8, - 8)); +// __ movq(Address(end_to, qword_count, Address::times_8, - 8), to); +// __ movq(to, Address(end_from, qword_count, Address::times_8, - 0)); +// __ movq(Address(end_to, qword_count, Address::times_8, - 0), to); +// +// __ BIND(L_copy_bytes); +// __ addptr(qword_count, 4); +// __ jcc(Assembler::lessEqual, L_loop); +// } +// __ subptr(qword_count, 4); +// __ jcc(Assembler::less, L_copy_8_bytes); // Copy trailing qwords + } + + // Copy big chunks backward + // + // Inputs: + // from - source arrays address + // dest - destination array address + // qword_count - 64-bits element count + // to - scratch + // L_copy_bytes - entry label + // L_copy_8_bytes - exit label + // + void copy_bytes_backward(Register from, Register dest, + Register qword_count, Register to, + Label& L_copy_bytes, Label& L_copy_8_bytes) { + ShouldNotReachHere(); + /*DEBUG_ONLY(__ stop("enter at entry label, not here")); + Label L_loop; + __ align(OptoLoopAlignment); + if (UseUnalignedLoadStores) { + Label L_end; + // Copy 64-bytes per iteration + if (UseAVX > 2) { + Label L_loop_avx512, L_loop_avx2, L_32_byte_head, L_above_threshold, L_below_threshold; + + __ BIND(L_copy_bytes); + __ cmpptr(qword_count, (AVX3Threshold / 8)); + __ jccb(Assembler::greater, L_above_threshold); + __ jmpb(L_below_threshold); + + __ BIND(L_loop_avx512); + __ evmovdqul(xmm0, Address(from, qword_count, Address::times_8, 0), Assembler::AVX_512bit); + __ evmovdqul(Address(dest, qword_count, Address::times_8, 0), xmm0, Assembler::AVX_512bit); + __ bind(L_above_threshold); + __ subptr(qword_count, 8); + __ jcc(Assembler::greaterEqual, L_loop_avx512); + __ jmpb(L_32_byte_head); + + __ bind(L_loop_avx2); + __ vmovdqu(xmm0, Address(from, qword_count, Address::times_8, 32)); + __ vmovdqu(Address(dest, qword_count, Address::times_8, 32), xmm0); + __ vmovdqu(xmm1, Address(from, qword_count, Address::times_8, 0)); + __ vmovdqu(Address(dest, qword_count, Address::times_8, 0), xmm1); + __ bind(L_below_threshold); + __ subptr(qword_count, 8); + __ jcc(Assembler::greaterEqual, L_loop_avx2); + + __ bind(L_32_byte_head); + __ addptr(qword_count, 4); // add(8) and sub(4) + __ jccb(Assembler::less, L_end); + } else { + __ BIND(L_loop); + if (UseAVX == 2) { + __ vmovdqu(xmm0, Address(from, qword_count, Address::times_8, 32)); + __ vmovdqu(Address(dest, qword_count, Address::times_8, 32), xmm0); + __ vmovdqu(xmm1, Address(from, qword_count, Address::times_8, 0)); + __ vmovdqu(Address(dest, qword_count, Address::times_8, 0), xmm1); + } else { + __ movdqu(xmm0, Address(from, qword_count, Address::times_8, 48)); + __ movdqu(Address(dest, qword_count, Address::times_8, 48), xmm0); + __ movdqu(xmm1, Address(from, qword_count, Address::times_8, 32)); + __ movdqu(Address(dest, qword_count, Address::times_8, 32), xmm1); + __ movdqu(xmm2, Address(from, qword_count, Address::times_8, 16)); + __ movdqu(Address(dest, qword_count, Address::times_8, 16), xmm2); + __ movdqu(xmm3, Address(from, qword_count, Address::times_8, 0)); + __ movdqu(Address(dest, qword_count, Address::times_8, 0), xmm3); + } + + __ BIND(L_copy_bytes); + __ subptr(qword_count, 8); + __ jcc(Assembler::greaterEqual, L_loop); + + __ addptr(qword_count, 4); // add(8) and sub(4) + __ jccb(Assembler::less, L_end); + } + // Copy trailing 32 bytes + if (UseAVX >= 2) { + __ vmovdqu(xmm0, Address(from, qword_count, Address::times_8, 0)); + __ vmovdqu(Address(dest, qword_count, Address::times_8, 0), xmm0); + } else { + __ movdqu(xmm0, Address(from, qword_count, Address::times_8, 16)); + __ movdqu(Address(dest, qword_count, Address::times_8, 16), xmm0); + __ movdqu(xmm1, Address(from, qword_count, Address::times_8, 0)); + __ movdqu(Address(dest, qword_count, Address::times_8, 0), xmm1); + } + __ subptr(qword_count, 4); + __ BIND(L_end); + if (UseAVX >= 2) { + // clean upper bits of YMM registers + __ vpxor(xmm0, xmm0); + __ vpxor(xmm1, xmm1); + } + } else { + // Copy 32-bytes per iteration + __ BIND(L_loop); + __ movq(to, Address(from, qword_count, Address::times_8, 24)); + __ movq(Address(dest, qword_count, Address::times_8, 24), to); + __ movq(to, Address(from, qword_count, Address::times_8, 16)); + __ movq(Address(dest, qword_count, Address::times_8, 16), to); + __ movq(to, Address(from, qword_count, Address::times_8, 8)); + __ movq(Address(dest, qword_count, Address::times_8, 8), to); + __ movq(to, Address(from, qword_count, Address::times_8, 0)); + __ movq(Address(dest, qword_count, Address::times_8, 0), to); + + __ BIND(L_copy_bytes); + __ subptr(qword_count, 4); + __ jcc(Assembler::greaterEqual, L_loop); + } + __ addptr(qword_count, 4); + __ jcc(Assembler::greater, L_copy_8_bytes); // Copy trailing qwords + * */ + } + + + // Arguments: + // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary + // ignored + // name - stub name string + // + // Inputs: + // c_rarg0 - source array address + // c_rarg1 - destination array address + // c_rarg2 - element count, treated as ssize_t, can be zero + // + // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, + // we let the hardware handle it. The one to eight bytes within words, + // dwords or qwords that span cache line boundaries will still be loaded + // and stored atomically. + // + // Side Effects: + // disjoint_byte_copy_entry is set to the no-overlap entry point + // used by generate_conjoint_byte_copy(). + // + address generate_disjoint_byte_copy(bool aligned, address* entry, const char * name) {SCOPEMARK_NAME(generate_disjoint_byte_copy, _masm) + StubCodeMark mark(this, "StubRoutines", name); + __ align(CodeEntryAlignment); + + Register src = c_rarg0; + Register dst = c_rarg1; + Register count = c_rarg2; + Register tmp1 = rscratch1; + Register tmp2 = rscratch2; + + address start = __ pc(); + + __ enter(); // required for proper stackwalking of RuntimeStub frame + assert_clean_int(c_rarg2, rscratch3); // Make sure 'count' is clean int. + + if (entry != NULL) { + *entry = __ pc(); + // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) + __ block_comment("Entry:"); + } + + //__ movl(src, A0); + //__ movl(dst, A1); + //__ movl(count, A2); + + Label l_align_dst, l_align_src, l_tail_bytes, l_end, l_tail; + + if(UseSimdForward){ + __ beq_l(count, l_end); + + __ cmple(count, 63, tmp1); + __ bne_l(tmp1, l_tail_bytes); //when count <= 63, don't use simd + + __ BIND(l_align_dst); + __ and_ins(dst, 31, tmp1); //is dst 0mod32? + __ beq_l(tmp1, l_align_src); + + __ ldbu(src, 0, tmp1); //grab 1 byte at a time, until dst is 0mod32 + __ stb(tmp1, 0, dst); + __ subl(count, 1, count); + __ addl(dst, 1, dst); + __ addl(src, 1, src); + __ beq_l(R0, l_align_dst); + + __ BIND(l_align_src); + copy_core_forward(32, src, dst, count, tmp1, tmp2); + + __ BIND(l_tail); + __ ble_l(count, l_end); + + //copy tail bytes. + __ BIND(l_tail_bytes); + __ ldbu(src, 0, tmp1); + __ stb(tmp1, 0, dst); + __ addl(src, 1, src); + __ addl(dst, 1, dst); + __ subl(count, 1, count); + __ bne_l(count, l_tail_bytes); + + __ BIND(l_end); + + }else{ + generate_disjoint_copy(0, src, dst, count); + } + + inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); // Update counter + __ movl(V0, R0); // return 0 + __ leave(); + __ ret(); + + return start; + } + + // Arguments: + // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary + // ignored + // name - stub name string + // + // Inputs: + // c_rarg0 - source array address + // c_rarg1 - destination array address + // c_rarg2 - element count, treated as ssize_t, can be zero + // + // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, + // we let the hardware handle it. The one to eight bytes within words, + // dwords or qwords that span cache line boundaries will still be loaded + // and stored atomically. + // + address generate_conjoint_byte_copy(bool aligned, address nooverlap_target, + address* entry, const char *name) {SCOPEMARK_NAME(generate_conjoint_byte_copy, _masm); + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); +//__ stop("generate_conjoint_byte_copy"); + Label l_exit; + Label l_copy_byte; + Label l_align_dst, l_align_src, l_tail_bytes, l_end, l_tail; + + /*address nooverlap_target = aligned ? + StubRoutines::arrayof_jbyte_disjoint_arraycopy() : + StubRoutines::jbyte_disjoint_arraycopy();*/ + + Register src = c_rarg0; // source array address + Register dst = c_rarg1; // destination array address + Register count = c_rarg2; // elements count + Register end_src = src; // source array end address + Register end_dst = dst; // destination array end address + Register end_count = count; // destination array end address + Register tmp1 = rscratch1; + Register tmp2 = rscratch2; + + __ enter(); // required for proper stackwalking of RuntimeStub frame + assert_clean_int(c_rarg2, rscratch3); // Make sure 'count' is clean int. + + if (entry != NULL) { + *entry = __ pc(); + // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) + __ block_comment("Entry:"); + } + + array_overlap_test(nooverlap_target, Address::times_1); + // copy from high to low + //__ movl(end_count, count); + __ addl(src, end_count, end_src); + __ addl(dst, end_count, end_dst); + + if (UseSimdBackward) { + + __ beq_l(count, l_end); + + __ cmple(count, 63, tmp1); + __ bne_l(tmp1, l_tail_bytes); //when count <= 63, don't use simd + + __ BIND(l_align_dst); + __ and_ins(end_dst, 31, tmp1); //is dst 0mod32? + __ beq_l(tmp1, l_align_src); + + __ ldbu(end_src, -1, tmp2); //grab 1 bytes at a time, until dst is 0mod32 + __ stb(tmp2, -1, end_dst); + __ subl(count, 1, count); + __ subl(end_dst, 1, end_dst); + __ subl(end_src, 1, end_src); + __ beq_l(R0, l_align_dst); + + __ BIND(l_align_src); + copy_core_backward(32, end_src, end_dst, count, tmp1, tmp2); + + __ BIND(l_tail); + __ ble_l(count, l_end); + + __ BIND(l_tail_bytes); + __ ldbu(end_src, -1, tmp1); + __ stb(tmp1, -1, end_dst); + __ subl(end_src, 1, end_src); + __ subl(end_dst, 1, end_dst); + __ subl(count, 1, count); + __ bne_l(count, l_tail_bytes); + + __ BIND(l_end); + + } else { + generate_conjoint_copy(0, end_src, end_dst, end_count); + } + + inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); // Update counter + __ movl(V0, R0); // return 0 + __ leave(); + __ ret(); + return start; + } + // Arguments: + // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary + // ignored + // name - stub name string + // + // Inputs: + // c_rarg0 - source array address + // c_rarg1 - destination array address + // c_rarg2 - element count, treated as ssize_t, can be zero + // + // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we + // let the hardware handle it. The two or four words within dwords + // or qwords that span cache line boundaries will still be loaded + // and stored atomically. + // + // Side Effects: + // disjoint_short_copy_entry is set to the no-overlap entry point + // used by generate_conjoint_short_copy(). + // + address generate_disjoint_short_copy(bool aligned, address *entry, const char * name) {SCOPEMARK_NAME(generate_disjoint_short_copy, _masm) + StubCodeMark mark(this, "StubRoutines", name); + __ align(CodeEntryAlignment); + + Register src = T0; + Register dst = T1; + Register count = T3; + Register tmp1 = rscratch1; + Register tmp2 = rscratch2; + + Register tmp4 = T11; + Register tmp5 = T12; + Register tmp6 = T2; + + address start = __ pc(); + __ enter(); // required for proper stackwalking of RuntimeStub frame + assert_clean_int(c_rarg2, V0); // Make sure 'count' is clean int. + + if (entry != NULL) { + *entry = __ pc(); + // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) + __ block_comment("Entry:"); + } + + __ movl(src, A0); + __ movl(dst, A1); + __ movl(count, A2); + + Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8, l_9, l_10, l_11, l_12, l_13, l_14; + Label l_debug; + Label l_align_dst, l_align_src, l_tail_2_bytes, l_end, l_tail; + // don't try anything fancy if arrays don't have many elements + + if(UseSimdForward){ + __ cmple(count, 31, tmp1); //if count < 32(bytes < 64), then copy 2 bytes at a time + __ bne_l(tmp1, l_tail); + + __ BIND(l_align_dst); + __ and_ins(dst, 31, tmp1); + __ beq_l(tmp1, l_align_src); + + __ ldhu(src, 0, tmp2); + __ subl(count, 1, count); + __ sth(tmp2, 0, dst); + __ addl(src, 2, src); + __ addl(dst, 2, dst); + __ beq_l(R0, l_align_dst); + + __ BIND(l_align_src); + copy_core_forward(16, src, dst, count, tmp1, tmp2); + + __ BIND(l_tail); + __ ble_l(count, l_end); + + __ BIND(l_tail_2_bytes); + __ ldhu(src, 0, tmp1); + __ sth(tmp1, 0, dst); + __ addl(src, 2, src); + __ addl(dst, 2, dst); + __ subl(count, 1, count); + __ bne_l(count, l_tail_2_bytes); + + + __ BIND(l_end); + + } else { + __ slll(count, 1, count); + generate_disjoint_copy(1, src, dst, count); + } + + inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); // Update counter + __ movl(V0, R0); // return 0 + __ leave(); + __ ret(); + + __ bind(l_debug); + __ stop("generate_disjoint_short_copy should not reach here"); + return start; + } + + + address generate_fill(BasicType t, bool aligned, const char *name) {SCOPEMARK_NAME(generate_fill, _masm) + //__ stop("generate_fill"); + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + __ block_comment("Entry:"); + + const Register to = c_rarg0; // source array address + const Register value = c_rarg1; // value + const Register count = c_rarg2; // elements count + + __ enter(); // required for proper stackwalking of RuntimeStub frame + + __ generate_fill(t, aligned, to, value, count, FSR); + + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(); + return start; + } + + // Arguments: + // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary + // ignored + // name - stub name string + // + // Inputs: + // c_rarg0 - source array address + // c_rarg1 - destination array address + // c_rarg2 - element count, treated as ssize_t, can be zero + // + // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we + // let the hardware handle it. The two or four words within dwords + // or qwords that span cache line boundaries will still be loaded + // and stored atomically. + // + address generate_conjoint_short_copy(bool aligned, address nooverlap_target, + address* entry, const char *name) {SCOPEMARK_NAME(generate_conjoint_short_copy, _masm) + Label l_tail_2_bytes, l_align_dst, l_align_src, l_tail, l_end, l_exit, l_copy_2_bytes; + StubCodeMark mark(this, "StubRoutines", name); + __ align(CodeEntryAlignment); + address start = __ pc(); + + Register end_src = T3; + Register end_dst = T0; + Register count = T1; + Register tmp1 = rscratch1; + Register tmp2 = rscratch2; + + __ enter(); // required for proper stackwalking of RuntimeStub frame + assert_clean_int(c_rarg2, V0); // Make sure 'count' is clean int. + if (entry != NULL) { + *entry = __ pc(); + // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) + __ block_comment("Entry:"); + } + + array_overlap_test(nooverlap_target, Address::times_2); + __ movl(end_src, A0); + __ movl(end_dst, A1); + __ movl(count, A2); + + if(UseSimdBackward){ + __ beq_l(count, l_end); + + __ sll(T1, Address::times_2, tmp1); + __ addl(T3, tmp1, end_src); + __ addl(T0, tmp1, end_dst); + + __ cmple(count, 31, tmp1); + __ bne_l(tmp1, l_tail_2_bytes); //when count <= 31, don't use simd + + __ BIND(l_align_dst); + __ and_ins(end_dst, 31, tmp1); //is dst 0mod32? + __ beq_l(tmp1, l_align_src); + + __ ldhu(end_src, -2, tmp2); //grab 2 bytes at a time, until dst is 0mod32 + __ sth(tmp2, -2, end_dst); + __ subl(count, 1, count); + __ subl(end_dst, 2, end_dst); + __ subl(end_src, 2, end_src); + __ beq_l(R0, l_align_dst); + + __ BIND(l_align_src); + copy_core_backward(16, end_src, end_dst, count, tmp1, tmp2); + + __ BIND(l_tail); + __ ble_l(count, l_end); + + __ BIND(l_tail_2_bytes); + __ ldhu(end_src, -2, tmp1); + __ sth(tmp1, -2, end_dst); + __ subl(end_src, 2, end_src); + __ subl(end_dst, 2, end_dst); + __ subl(count, 1, count); + __ bne_l(count, l_tail_2_bytes); + + __ BIND(l_end); + + }else{ + __ slll(count, 1, count); + __ addl(T3, count, end_src); + __ addl(T0, count, end_dst); + generate_conjoint_copy(1, end_src, end_dst, count); + } + + inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); // Update counter + __ movl(V0, R0); // return 0 + __ leave(); + __ ret(); + return start; + } + + // Arguments: + // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary + // ignored + // is_oop - true => oop array, so generate store check code + // name - stub name string + // + // Inputs: + // c_rarg0 - source array address + // c_rarg1 - destination array address + // c_rarg2 - element count, treated as ssize_t, can be zero + // + // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let + // the hardware handle it. The two dwords within qwords that span + // cache line boundaries will still be loaded and stored atomicly. + // + // Side Effects: + // disjoint_int_copy_entry is set to the no-overlap entry point + // used by generate_conjoint_int_oop_copy(). + // + address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, address* entry, const char *name, bool dest_uninitialized = false) {SCOPEMARK_NAME(generate_disjoint_int_oop_copy, _masm) + Label l_tail_4_bytes, l_align_dst, l_align_src, l_align_simd, l_misalign, l_misalign_simd, l_tail, l_before_tail, l_end; + StubCodeMark mark(this, "StubRoutines", name); + + Register src = T3; + Register dst = T0; + Register count = T1; + Register dword_count = T4; + Register tmp1 = rscratch1; + Register tmp2 = rscratch2; + __ align(CodeEntryAlignment); + address start = __ pc(); + __ enter(); // required for proper stackwalking of RuntimeStub frame + assert_clean_int(c_rarg2, V0); // Make sure 'count' is clean int. + + if (entry != NULL) { + *entry = __ pc(); + // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) + __ block_comment("Entry:"); + } + + __ movl(src, A0); + __ movl(dst, A1); + __ movl(count, A2); + + DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT; + if (dest_uninitialized) { + decorators |= IS_DEST_UNINITIALIZED; + } + if (aligned) { + decorators |= ARRAYCOPY_ALIGNED; + } + + BasicType type = is_oop ? T_OBJECT : T_INT; + BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); + bs->arraycopy_prologue(_masm, decorators, type, src, dst, count); + + if (is_oop) { + // save regs before copy_memory + __ push(src); + __ push(dst); + __ push(count); + } + + __ movl(dword_count, count); + + if(UseSimdForward){ + __ cmple(count, 15, tmp1); + __ bne_l(tmp1, l_tail); + + __ BIND(l_align_dst); + __ and_ins(dst, 31, tmp1); + __ beq_l(tmp1, l_align_src); + + __ ldw(src, 0, tmp1); + __ subl(count, 1, count); + __ stw(tmp1, 0, dst); + __ addl(src, 4, src); + __ addl(dst, 4, dst); + __ beq_l(R0, l_align_dst); + + __ BIND(l_align_src); + copy_core_forward(8, src, dst, count, tmp1, tmp2); + + __ BIND(l_tail); + __ ble_l(count, l_end); + + __ BIND(l_tail_4_bytes); + __ ldw(src, 0, tmp2); + __ stw(tmp2, 0, dst); + __ addl(src, 4, src); + __ addl(dst, 4, dst); + __ subl(count, 1, count); + __ bne_l(count, l_tail_4_bytes); + + + __ BIND(l_end); + + } else { + __ slll(count, 2, count); + generate_disjoint_copy(2, src, dst, count); + } + + if (is_oop) { + __ pop(count); + __ pop(dst); + __ pop(src); + } + + bs->arraycopy_epilogue(_masm, decorators, type, src, dst, dword_count); + inc_counter_np(SharedRuntime::_jint_array_copy_ctr); // Update counter + __ movl(V0, R0); + __ leave(); + __ ret(); + return start; + } + + // Arguments: + // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary + // ignored + // is_oop - true => oop array, so generate store check code + // name - stub name string + // + // Inputs: + // c_rarg0 - source array address + // c_rarg1 - destination array address + // c_rarg2 - element count, treated as ssize_t, can be zero + // + // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let + // the hardware handle it. The two dwords within qwords that span + // cache line boundaries will still be loaded and stored atomicly. + // + address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, address nooverlap_target, + address* entry, const char *name, bool dest_uninitialized = false) { + Label l_2, l_4; + Label l_tail_4_bytes, l_align_dst, l_align_src, l_tail, l_end; + StubCodeMark mark(this, "StubRoutines", name); + __ align(CodeEntryAlignment); + address start = __ pc(); + Register from = c_rarg0; + Register to = c_rarg1; + Register end_src = T3; + Register end_dst = T0; + Register count = T1; + Register tmp1 = rscratch1; + Register tmp2 = rscratch2; + Register dword_count = T4; + + __ enter(); // required for proper stackwalking of RuntimeStub frame + + if (entry != NULL) { + *entry = __ pc(); + // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) + __ block_comment("Entry:"); + } + + array_overlap_test(nooverlap_target, Address::times_4); + + __ movl(count, A2); + __ movl(end_src, A0); + __ movl(end_dst, A1); + + DecoratorSet decorators = IN_HEAP | IS_ARRAY; + if (dest_uninitialized) { + decorators |= IS_DEST_UNINITIALIZED; + } + if (aligned) { + decorators |= ARRAYCOPY_ALIGNED; + } + + BasicType type = is_oop ? T_OBJECT : T_INT; + BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); + // no registers are destroyed by this call + bs->arraycopy_prologue(_masm, decorators, type, from, to, count); + + if (is_oop) { + // save regs before copy_memory + __ push(from); + __ push(to); + __ push(count); + } + + assert_clean_int(count, V0); // Make sure 'count' is clean int. + __ movl(dword_count, count); + + // T3: source array address + // T0: destination array address + // T1: element count + + if(UseSimdBackward){ + __ beq_l(count, l_end); + + __ sll(T1, Address::times_4, tmp1); + __ addl(T3, tmp1, end_src); + __ addl(T0, tmp1, end_dst); + + __ cmple(count, 15, tmp1); + __ bne_l(tmp1, l_tail_4_bytes); //when count <= 15, don't use simd + + __ BIND(l_align_dst); + __ and_ins(end_dst, 31, tmp1); //is dst 0mod32? + __ beq_l(tmp1, l_align_src); + + __ ldw(end_src, -4, tmp1); //grab 4 bytes at a time, until dst is 0mod32 + __ stw(tmp1, -4, end_dst); + __ subl(count, 1, count); + __ subl(end_dst, 4, end_dst); + __ subl(end_src, 4, end_src); + __ beq_l(R0, l_align_dst); // todo zl check? + + __ BIND(l_align_src); + copy_core_backward(8, end_src, end_dst, count, tmp1, tmp2); + + __ BIND(l_tail); + __ ble_l(count, l_end); + + __ BIND(l_tail_4_bytes); + __ ldw(end_src, -4, tmp1); + __ stw(tmp1, -4, end_dst); + __ subl(end_src, 4, end_src); + __ subl(end_dst, 4, end_dst); + __ subl(count, 1, count); + __ bne_l(count, l_tail_4_bytes); + + __ BIND(l_end); + + }else{ + __ slll(count, 2, count); + __ addl(end_src, count, end_src); + __ addl(end_dst, count, end_dst); + generate_conjoint_copy(2, end_src, end_dst, count); + } + + if (is_oop) { + __ pop(count); + __ pop(to); + __ pop(from); + } + + bs->arraycopy_epilogue(_masm, decorators, type, from, to, dword_count); + inc_counter_np(SharedRuntime::_jint_array_copy_ctr); // Update counter after rscratch1 is free + __ movl(V0, R0); // return 0 + + __ leave(); + __ ret(); + return start; + } + + + // Arguments: + // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes + // ignored + // is_oop - true => oop array, so generate store check code + // name - stub name string + // + // Inputs: + // c_rarg0 - source array address + // c_rarg1 - destination array address + // c_rarg2 - element count, treated as ssize_t, can be zero + // + // Side Effects: + // disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the + // no-overlap entry point used by generate_conjoint_long_oop_copy(). + // + address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, address* entry, const char *name, bool dest_uninitialized = false) {SCOPEMARK_NAME(generate_disjoint_long_oop_copy, _masm) + Label l_3, l_4; + Label l_tail_8_bytes, l_align_dst, l_align_src, l_tail, l_end; + + Register src = c_rarg0; + Register dst = c_rarg1; + Register count = c_rarg2; + Register tmp1 = rscratch1; + Register tmp2 = rscratch2; + //Register saved_count = T11;//special, relate to arraycopy_prologue TODO:refactor, maybe put saved_count as parameter? jzy + + StubCodeMark mark(this, "StubRoutines", name); + __ align(CodeEntryAlignment); + address start = __ pc(); +//__ stop("generate_disjoint_long_oop_copy"); + //__ movl(src, A0); + //__ movl(dst, A1); + //__ movl(count, A2); + + __ enter(); // required for proper stackwalking of RuntimeStub frame + // Save no-overlap entry point for generate_conjoint_long_oop_copy() + assert_clean_int(c_rarg2, rscratch3); // Make sure 'count' is clean int. + + if (entry != NULL) { + *entry = __ pc(); + // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) + __ block_comment("Entry:"); + } + + DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT; + if (dest_uninitialized) { + decorators |= IS_DEST_UNINITIALIZED; + } + if (aligned) { + decorators |= ARRAYCOPY_ALIGNED; + } + + BasicType type = is_oop ? T_OBJECT : T_LONG; + BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); + bs->arraycopy_prologue(_masm, decorators, type, src, dst, count); + + if (is_oop) { + // save regs before copy_memory + __ push(src); + __ push(dst); + __ push(count); + } + + // T3: source array address + // T0: destination array address + // T1: element count + //TODO:refact jzy saved_count(T11) should not changed before arraycopy_epilogue, because count is saved in arraycopy_prologue + if(UseSimdForward){ + __ align(16); + __ beq_l(count, l_end); + + __ cmple(count, 7, tmp1); + __ bne_l(tmp1, l_tail_8_bytes); //when count <= 7, don't use simd + + __ BIND(l_align_dst); + __ and_ins(dst, 31, tmp1); //is dst 0mod32? + __ beq_l(tmp1, l_align_src); + + __ ldl(src, 0, tmp1); //grab 8 bytes at a time, until dst is 0mod32 + __ stl(tmp1, 0, dst); + __ subl(count, 1, count); + __ addl(dst, 8, dst); + __ addl(src, 8, src); + __ beq_l(R0, l_align_dst); //todo zl check? + + __ BIND(l_align_src); + copy_core_forward(4, src, dst, count, tmp1, tmp2); + + __ BIND(l_tail); + __ ble_l(count, l_end); + + __ BIND(l_tail_8_bytes); + __ ldl(src, 0, tmp1); + __ stl(tmp1, 0, dst); + __ addl(src, 8, src); + __ addl(dst, 8, dst); + __ subl(count, 1, count); + __ bne_l(count, l_tail_8_bytes); + + __ BIND(l_end); + + }else{ + __ slll(count, 3, count); + generate_disjoint_copy(3, src, dst, count); + } + + if (is_oop) { + __ pop(count); + __ pop(dst); + __ pop(src); + } + + bs->arraycopy_epilogue(_masm, decorators, type, src, dst, count); + if (is_oop) { + inc_counter_np(SharedRuntime::_oop_array_copy_ctr); // Update counter after rscratch1 is free + } else { + inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); // Update counter after rscratch1 is free + } + + __ movl(V0, R0); //return 0 + __ leave(); + __ ret(); + + return start; + } + + // Arguments: + // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes + // ignored + // is_oop - true => oop array, so generate store check code + // name - stub name string + // + // Inputs: + // c_rarg0 - source array address + // c_rarg1 - destination array address + // c_rarg2 - element count, treated as ssize_t, can be zero + // + // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let + // the hardware handle it. The two dwords within qwords that span + // cache line boundaries will still be loaded and stored atomicly. + // + address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, + address nooverlap_target, address *entry, + const char *name, bool dest_uninitialized = false) {SCOPEMARK_NAME(generate_conjoint_long_oop_copy, _masm) + + Label l_1, l_2; + Label l_tail_8_bytes, l_align_dst, l_align_src, l_tail, l_end; + + StubCodeMark mark(this, "StubRoutines", name); + __ align(CodeEntryAlignment); + address start = __ pc(); + Register end_src = c_rarg0; + Register end_dst = c_rarg1; + Register count = c_rarg2; + Register tmp1 = rscratch1; + Register tmp2 = rscratch2; + + + __ enter(); // required for proper stackwalking of RuntimeStub frame + assert_clean_int(c_rarg2, rscratch3); // Make sure 'count' is clean int. + + if (entry != NULL) { + *entry = __ pc(); + // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) + __ block_comment("Entry:"); + } + + array_overlap_test(nooverlap_target, Address::times_8); + //__ movl(end_src, A0); + //__ movl(end_dst, A1); + //__ movl(count, A2); + + DecoratorSet decorators = IN_HEAP | IS_ARRAY; + if (dest_uninitialized) { + decorators |= IS_DEST_UNINITIALIZED; + } + if (aligned) { + decorators |= ARRAYCOPY_ALIGNED; + } + + BasicType type = is_oop ? T_OBJECT : T_LONG; + BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); + bs->arraycopy_prologue(_masm, decorators, type, end_src, end_dst, count); + + if (is_oop) { + // save regs before copy_memory + __ push(end_src); + __ push(end_dst); + __ push(count); + } + + if(UseSimdBackward){ + __ align(16); + __ beq_l(count, l_end); + + __ sll(T1, Address::times_8, tmp1); + __ addl(T3, tmp1, end_src); + __ addl(T0, tmp1, end_dst); + + __ cmple(count, 7, tmp1); + __ bne_l(tmp1, l_tail_8_bytes); //when count <= 7, don't use simd + + __ BIND(l_align_dst); + __ and_ins(end_dst, 31, tmp1); //is dst 0mod32? + __ beq_l(tmp1, l_align_src); + + __ ldl(end_src, -8, tmp1); //grab 8 bytes at a time, until dst is 0mod32 + __ stl(tmp1, -8, end_dst); + __ subl(count, 1, count); + __ subl(end_dst, 8, end_dst); + __ subl(end_src, 8, end_src); + __ ble_l(count, l_end); + __ beq_l(R0, l_align_dst); + + __ BIND(l_align_src); + copy_core_backward(4, end_src, end_dst, count, tmp1, tmp2); + + __ BIND(l_tail); + __ ble_l(count, l_end); + + __ BIND(l_tail_8_bytes); + __ ldl(end_src, -8, tmp1); + __ stl(tmp1, -8, end_dst); + __ subl(end_src, 8, end_src); + __ subl(end_dst, 8, end_dst); + __ subl(count, 1, count); + __ bne_l(count,l_tail_8_bytes); + + __ BIND(l_end); + + }else{ + __ slll(count, Address::times_8, count); + __ addl(end_src, count, end_src); + __ addl(end_dst, count, end_dst); + generate_conjoint_copy(3, end_src, end_dst, count); + } + + if (is_oop) { + __ pop(count); + __ pop(end_dst); + __ pop(end_src); + } + + bs->arraycopy_epilogue(_masm, decorators, type, end_src, end_dst, count); + if (is_oop) { + inc_counter_np(SharedRuntime::_oop_array_copy_ctr); // Update counter after rscratch1 is free + } else { + inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); // Update counter after rscratch1 is free + } + + __ movl(V0, R0); //return 0 + __ leave(); + __ ret(); + return start; + } + + + // Helper for generating a dynamic type check. + // Smashes no registers. + void generate_type_check(Register sub_klass, + Register super_check_offset, + Register super_klass, + Label& L_success, Register temp_reg = noreg, Register temp2_reg = noreg) { + assert_different_registers(sub_klass, super_check_offset, super_klass); + + __ block_comment("type_check:"); + Label L_miss; + + __ check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_miss, NULL, + super_check_offset); + __ check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, temp2_reg, &L_success, NULL); + + // Fall through on failure! + __ bind(L_miss); + } + + // + // Generate checkcasting array copy stub + // + // Input: + // c_rarg0 - source array address + // c_rarg1 - destination array address + // c_rarg2 - element count, treated as ssize_t, can be zero + // c_rarg3 - size_t ckoff (super_check_offset) + // not Win64 + // c_rarg4 - oop ckval (super_klass) + // Win64 + // rsp+40 - oop ckval (super_klass) + // + // Output: + // rax == 0 - success + // rax == -1^K - failure, where K is partial transfer count + // + address generate_checkcast_copy(const char *name, address *entry, + bool dest_uninitialized = false) {SCOPEMARK_NAME(generate_checkcast_copy, _masm) + + Label L_load_element, L_store_element, L_do_card_marks, L_done; + + // Input registers (after setup_arg_regs) + const Register from = c_rarg0; // source array address + const Register to = c_rarg1; // destination array address + const Register length = c_rarg2; // elements count + const Register ckoff = c_rarg3; // super_check_offset + const Register ckval = c_rarg4; // super_klass + + // Registers used as temps (r13, r14 are save-on-entry) + const Register end_from = from; // source array end address + const Register end_to = r13; // destination array end address + const Register count = c_rarg2; // -(count_remaining) + const Register r14_length = r14; // saved copy of length + // End pointers are inclusive, and if length is not zero they point + // to the last unit copied: end_to[0] := end_from[0] + +// const Register rax = V0; +// const Register r13 = end_to; +// const Register r14 = r14_length; + //const Register rcx = ckoff; + const Register rax_oop = V0; // actual oop copied + const Register r11_klass = T4; // oop._klass + + //--------------------------------------------------------------- + // Assembler stub will be used for this call to arraycopy + // if the two arrays are subtypes of Object[] but the + // destination array type is not equal to or a supertype + // of the source type. Each element must be separately + // checked. + + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + __ enter(); // required for proper stackwalking of RuntimeStub frame + +#ifdef ASSERT + // caller guarantees that the arrays really are different + // otherwise, we would have to make conjoint checks + { Label L; + array_overlap_test(L, TIMES_OOP); + __ stop("checkcast_copy within a single array"); + __ bind(L); + } +#endif //ASSERT + + setup_arg_regs(4); // from => rdi, to => rsi, length => rdx + // ckoff => rcx, ckval => r8 + // r9 and r10 may be used to save non-volatile registers + + // Caller of this entry point must set up the argument registers. + if (entry != NULL) { + *entry = __ pc(); + __ block_comment("Entry:"); + } + + // allocate spill slots for r13, r14 + enum { + saved_r13_offset, + saved_r14_offset, + saved_rbp_offset + }; + __ subptr(rsp, saved_rbp_offset * wordSize,rsp); + __ stl(r13, Address(rsp, saved_r13_offset * wordSize)); + __ stl(r14, Address(rsp, saved_r14_offset * wordSize)); + + // check that int operands are properly extended to size_t + assert_clean_int(length, rax); + assert_clean_int(ckoff, rax); + +#ifdef ASSERT + __ block_comment("assert consistent ckoff/ckval"); + // The ckoff and ckval must be mutually consistent, + // even though caller generates both. + { Label L; + int sco_offset = in_bytes(Klass::super_check_offset_offset()); + __ cmpw(ckoff, Address(ckval, sco_offset)); + __ jcc(Assembler::equal, L); + __ stop("super_check_offset inconsistent"); + __ bind(L); + } +#endif //ASSERT + + // Loop-invariant addresses. They are exclusive end pointers. + Address end_from_addr(from, length, TIMES_OOP, 0); + Address end_to_addr(to, length, TIMES_OOP, 0); + // Loop-variant addresses. They assume post-incremented count < 0. + Address from_element_addr(end_from, count, TIMES_OOP, 0); + Address to_element_addr(end_to, count, TIMES_OOP, 0); + + DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_CHECKCAST | ARRAYCOPY_DISJOINT; + if (dest_uninitialized) { + decorators |= IS_DEST_UNINITIALIZED; + } + + BasicType type = T_OBJECT; + BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); + bs->arraycopy_prologue(_masm, decorators, type, from, to, count); + + // Copy from low to high addresses, indexed from the end of each array. + __ lea(end_from, end_from_addr); + __ lea(end_to, end_to_addr); + __ movl(r14_length, length); // save a copy of the length + assert(length == count, ""); // else fix next line: + __ subptr(R0, count, count); // negate and test the length + __ jcc(Assembler::notZero, L_load_element, count); + + // Empty array: Nothing to do. + __ movl(rax, R0); // return 0 on (trivial) success + __ jmp(L_done); + + // ======== begin loop ======== + // (Loop is rotated; its entry is L_load_element.) + // Loop control: + // for (count = -count; count != 0; count++) + // Base pointers src, dst are biased by 8*(count-1),to last element. + __ align(OptoLoopAlignment); + + __ BIND(L_store_element); + __ store_heap_oop(to_element_addr, rax_oop, noreg, noreg, AS_RAW); // store the oop + __ increment(count); // increment the count toward zero + __ jcc(Assembler::zero, L_do_card_marks, count); + + // ======== loop entry is here ======== + __ BIND(L_load_element); + __ load_heap_oop(rax_oop, from_element_addr, noreg, noreg, AS_RAW); // load the oop + __ testptr(rax_oop, rax_oop); + __ jcc(Assembler::zero, L_store_element); + + __ load_klass(r11_klass, rax_oop);// query the object klass + //will kill rscratch1 rscratch2 + generate_type_check(r11_klass, ckoff, ckval, L_store_element, rscratch1, rscratch2); + // ======== end loop ======== + + // It was a real error; we must depend on the caller to finish the job. + // Register rdx = -1 * number of *remaining* oops, r14 = *total* oops. + // Emit GC store barriers for the oops we have copied (r14 + rdx), + // and report their number to the caller. + assert_different_registers(rax, r14_length, count, to, end_to, rscratch1); + Label L_post_barrier; + __ addptr(r14_length, count, r14_length); // K = (original - remaining) oops + __ movl(rax, r14_length); // save the value + __ notptr(rax); // report (-1^K) to caller (does not affect flags) + __ jcc(Assembler::notZero, L_post_barrier); + __ jmp(L_done); // K == 0, nothing was copied, skip post barrier + + // Come here on success only. + __ BIND(L_do_card_marks); + __ movl(rax, R0); // return 0 on success + + __ BIND(L_post_barrier); + bs->arraycopy_epilogue(_masm, decorators, type, from, to, r14_length); + + // Common exit point (success or failure). + __ BIND(L_done); + __ ldptr(r13, Address(rsp, saved_r13_offset * wordSize)); + __ ldptr(r14, Address(rsp, saved_r14_offset * wordSize)); + restore_arg_regs(); + inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr); // Update counter after rscratch1 is free + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret_sw(); + + return start; + } + + // + // Generate 'unsafe' array copy stub + // Though just as safe as the other stubs, it takes an unscaled + // size_t argument instead of an element count. + // + // Input: + // c_rarg0 - source array address + // c_rarg1 - destination array address + // c_rarg2 - byte count, treated as ssize_t, can be zero + // + // Examines the alignment of the operands and dispatches + // to a long, int, short, or byte copy loop. + // + address generate_unsafe_copy(const char *name, + address byte_copy_entry, address short_copy_entry, + address int_copy_entry, address long_copy_entry) { + SCOPEMARK_NAME(generate_unsafe_copy, _masm) + + Label L_long_aligned, L_int_aligned, L_short_aligned; + + // Input registers (before setup_arg_regs) + const Register from = c_rarg0; // source array address + const Register to = c_rarg1; // destination array address + const Register size = c_rarg2; // byte count (size_t) + + // Register used as a temp + const Register bits = V0; // test copy of low bits + + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + __ enter(); // required for proper stackwalking of RuntimeStub frame + + // bump this on entry, not on exit: + inc_counter_np(SharedRuntime::_unsafe_array_copy_ctr); + + __ movl(bits, from); +// __ xorptr(bits, to, bits); +// __ xorptr(bits, size, bits); + __ orptr(bits, to, bits); + __ orptr(bits, size, bits); + + __ testb(bits, BytesPerLong-1); + __ jcc(Assembler::zero, L_long_aligned); + + __ testb(bits, BytesPerInt-1); + __ jcc(Assembler::zero, L_int_aligned); + + __ testb(bits, BytesPerShort-1); + __ jump_cc(Assembler::notZero, RuntimeAddress(byte_copy_entry)); + + __ BIND(L_short_aligned); + __ srll(size, LogBytesPerShort, size); // size => short_count + __ jump(RuntimeAddress(short_copy_entry)); + + __ BIND(L_int_aligned); + __ srll(size, LogBytesPerInt, size); // size => int_count + __ jump(RuntimeAddress(int_copy_entry)); + + __ BIND(L_long_aligned); + __ srll(size, LogBytesPerLong, size); // size => qword_count + __ jump(RuntimeAddress(long_copy_entry)); + + return start; + } + + // Perform range checks on the proposed arraycopy. + // Kills temp, but nothing else. + // Also, clean the sign bits of src_pos and dst_pos. + void arraycopy_range_checks(Register src, // source array oop (c_rarg0) + Register src_pos, // source position (c_rarg1) + Register dst, // destination array oo (c_rarg2) + Register dst_pos, // destination position (c_rarg3) + Register length, + Register temp, + Label& L_failed) { + __ block_comment("arraycopy_range_checks:"); + + // if (src_pos + length > arrayOop(src)->length()) FAIL; + __ movw(temp, length); + __ addw(temp, src_pos, temp); // src_pos + length + __ cmpw(temp, Address(src, arrayOopDesc::length_offset_in_bytes())); + __ jcc(Assembler::above, L_failed); + + // if (dst_pos + length > arrayOop(dst)->length()) FAIL; + __ movw(temp, length); + __ addw(temp, dst_pos, temp); // dst_pos + length + __ cmpw(temp, Address(dst, arrayOopDesc::length_offset_in_bytes())); + __ jcc(Assembler::above, L_failed); + + // Have to clean up high 32-bits of 'src_pos' and 'dst_pos'. + // Move with sign extension can be used since they are positive. + __ movws(src_pos, src_pos); + __ movws(dst_pos, dst_pos); + + __ block_comment("arraycopy_range_checks done"); + } + + // + // Generate generic array copy stubs + // + // Input: + // c_rarg0 - src oop + // c_rarg1 - src_pos (32-bits) + // c_rarg2 - dst oop + // c_rarg3 - dst_pos (32-bits) + // not Win64 + // c_rarg4 - element count (32-bits) + // Win64 + // rsp+40 - element count (32-bits) + // + // Output: + // rax == 0 - success + // rax == -1^K - failure, where K is partial transfer count + // + address generate_generic_copy(const char *name, + address byte_copy_entry, address short_copy_entry, + address int_copy_entry, address oop_copy_entry, + address long_copy_entry, address checkcast_copy_entry) { + SCOPEMARK_NAME(generate_generic_copy, _masm) + + Label L_failed, L_failed_0, L_objArray; + Label L_copy_bytes, L_copy_shorts, L_copy_ints, L_copy_longs; + + // Input registers + const Register src = c_rarg0; // source array oop + const Register src_pos = c_rarg1; // source position + const Register dst = c_rarg2; // destination array oop + const Register dst_pos = c_rarg3; // destination position + const Register length = c_rarg4; + + StubCodeMark mark(this, "StubRoutines", name); + + // Short-hop target to L_failed. Makes for denser prologue code. + __ BIND(L_failed_0); + __ jmp(L_failed); + + __ align(CodeEntryAlignment); + address start = __ pc(); + + __ enter(); // required for proper stackwalking of RuntimeStub frame + + // bump this on entry, not on exit: + inc_counter_np(SharedRuntime::_generic_array_copy_ctr); + + //----------------------------------------------------------------------- + // Assembler stub will be used for this call to arraycopy + // if the following conditions are met: + // + // (1) src and dst must not be null. + // (2) src_pos must not be negative. + // (3) dst_pos must not be negative. + // (4) length must not be negative. + // (5) src klass and dst klass should be the same and not NULL. + // (6) src and dst should be arrays. + // (7) src_pos + length must not exceed length of src. + // (8) dst_pos + length must not exceed length of dst. + // + + // if (src == NULL) return -1; + __ testptr(src, src); // src oop + size_t j1off = __ offset(); + __ jcc(Assembler::zero, L_failed_0, src); + + // if (src_pos < 0) return -1; + __ addw(src_pos, R0, rcc); // src_pos (32-bits) + __ jcc(Assembler::negative, L_failed_0, rcc); + + // if (dst == NULL) return -1; + __ testptr(dst, dst); // dst oop + __ jcc(Assembler::zero, L_failed_0); + + // if (dst_pos < 0) return -1; + __ addw(dst_pos, R0, rcc); // dst_pos (32-bits) + size_t j4off = __ offset(); + __ jcc(Assembler::negative, L_failed_0, rcc); + + // The first four tests are very dense code, + // but not quite dense enough to put four + // jumps in a 16-byte instruction fetch buffer. + // That's good, because some branch predicters + // do not like jumps so close together. + // Make sure of this. + guarantee(((j1off ^ j4off) & ~15) != 0, "I$ line of 1st & 4th jumps"); //should sw need this? jzy + + // registers used as temp + const Register r11_length = T0; // elements count to copy + const Register r10_src_klass = T1; // array klass + const Register r10 = r10_src_klass; + const Register r11 = r11_length; + const Register rscratch = rscratch1; + + // if (length < 0) return -1; + __ movl(r11_length, length); // length (elements count, 32-bits value) + __ jcc(Assembler::negative, L_failed_0, r11_length); + + __ load_klass(r10_src_klass, src); +#ifdef ASSERT + // assert(src->klass() != NULL); + { + BLOCK_COMMENT("assert klasses not null {"); + Label L1, L2; + __ testptr(r10_src_klass, r10_src_klass); + __ jcc(Assembler::notZero, L2); // it is broken if klass is NULL + __ bind(L1); + __ stop("broken null klass"); + __ bind(L2); + __ load_klass(rax, dst); + __ cmpl(rax, 0); + __ jcc(Assembler::equal, L1); // this would be broken also + BLOCK_COMMENT("} assert klasses not null done"); + } +#endif + + // Load layout helper (32-bits) + // + // |array_tag| | header_size | element_type | |log2_element_size| + // 32 30 24 16 8 2 0 + // + // array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0 + // + + const int lh_offset = in_bytes(Klass::layout_helper_offset()); + + // Handle objArrays completely differently... + const jint objArray_lh = Klass::array_layout_helper(T_OBJECT); +// __ mov_immediate32(rscratch, objArray_lh); + __ cmpw(Address(r10_src_klass, lh_offset), objArray_lh); + __ jcc(Assembler::equal, L_objArray); + + // if (src->klass() != dst->klass()) return -1; + __ load_klass(rax, dst); +// __ cmpl(r10_src_klass, rax); +// __ jcc(Assembler::notEqual, L_failed); + __ cmpq(rax, r10_src_klass); + __ jcc(Assembler::notEqual, L_failed); +// __ bne_c(rax, r10_src_klass, L_failed); //todo zl check? + const Register rax_lh = rax; // layout helper + __ ldws(rax_lh, Address(r10_src_klass, lh_offset)); + + // if (!src->is_Array()) return -1; + __ cmpw(rax_lh, Klass::_lh_neutral_value); + __ jcc(Assembler::greaterEqual, L_failed); + + // At this point, it is known to be a typeArray (array_tag 0x3). +#ifdef ASSERT + { + BLOCK_COMMENT("assert primitive array {"); + Label L; +// __ mov_immediate32(rscratch ,(Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift)); + __ cmpw(rax_lh, (Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift)); + __ jcc(Assembler::greaterEqual, L); + __ stop("must be a primitive array"); + __ bind(L); + BLOCK_COMMENT("} assert primitive array done"); + } +#endif + + arraycopy_range_checks(src, src_pos, dst, dst_pos, r11_length, + r10, L_failed); + + // TypeArrayKlass + // + // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize); + // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize); + // + + const Register r10_offset = r10; // array offset + const Register rax_elsize = rax_lh; // element size + + __ movw(r10_offset, rax_lh); + __ srll(r10_offset, Klass::_lh_header_size_shift, r10_offset); + __ andptr(r10_offset, Klass::_lh_header_size_mask, r10_offset); // array_offset + __ addptr(src, r10_offset, src); // src array offset + __ addptr(dst, r10_offset, dst); // dst array offset + BLOCK_COMMENT("choose copy loop based on element size"); + __ andw(rax_lh, Klass::_lh_log2_element_size_mask, rax_lh); // rax_lh -> rax_elsize + + // next registers should be set before the jump to corresponding stub + const Register from = c_rarg0; // source array address + const Register to = c_rarg1; // destination array address + const Register count = c_rarg2; // elements count + + // 'from', 'to', 'count' registers should be set in such order + // since they are the same as 'src', 'src_pos', 'dst'. + + __ BIND(L_copy_bytes); + __ cmpw(rax_elsize, 0); + __ jcc(Assembler::notEqual, L_copy_shorts); + __ lea(from, Address(src, src_pos, Address::times_1, 0));// src_addr + __ lea(to, Address(dst, dst_pos, Address::times_1, 0));// dst_addr + __ movws(count, r11_length); // length + __ jump(RuntimeAddress(byte_copy_entry)); + + __ BIND(L_copy_shorts); + __ cmpw(rax_elsize, LogBytesPerShort); + __ jcc(Assembler::notEqual, L_copy_ints); + __ lea(from, Address(src, src_pos, Address::times_2, 0));// src_addr + __ lea(to, Address(dst, dst_pos, Address::times_2, 0));// dst_addr + __ movws(count, r11_length); // length + __ jump(RuntimeAddress(short_copy_entry)); + + __ BIND(L_copy_ints); + __ cmpw(rax_elsize, LogBytesPerInt); + __ jcc(Assembler::notEqual, L_copy_longs); + __ lea(from, Address(src, src_pos, Address::times_4, 0));// src_addr + __ lea(to, Address(dst, dst_pos, Address::times_4, 0));// dst_addr + __ movws(count, r11_length); // length + __ jump(RuntimeAddress(int_copy_entry)); + + __ BIND(L_copy_longs); +#ifdef ASSERT + { + BLOCK_COMMENT("assert long copy {"); + Label L; + __ cmpw(rax_elsize, LogBytesPerLong); + __ jcc(Assembler::equal, L); + __ stop("must be long copy, but elsize is wrong"); + __ bind(L); + BLOCK_COMMENT("} assert long copy done"); + } +#endif + __ lea(from, Address(src, src_pos, Address::times_8, 0));// src_addr + __ lea(to, Address(dst, dst_pos, Address::times_8, 0));// dst_addr + __ movws(count, r11_length); // length + __ jump(RuntimeAddress(long_copy_entry)); + + // ObjArrayKlass + __ BIND(L_objArray); + // live at this point: r10_src_klass, r11_length, src[_pos], dst[_pos] + + Label L_plain_copy, L_checkcast_copy; + // test array classes for subtyping + __ load_klass(rax, dst); +// __ cmpl(r10_src_klass, rax); // usual case is exact equality +// __ jcc(Assembler::notEqual, L_checkcast_copy); +// __ bne_c(r10_src_klass, rax, L_checkcast_copy); + __ cmpq(rax, r10_src_klass); + __ jcc(Assembler::notEqual, L_checkcast_copy); + + // Identically typed arrays can be copied without element-wise checks. + arraycopy_range_checks(src, src_pos, dst, dst_pos, r11_length, + r10, L_failed); + + __ lea(from, Address(src, src_pos, TIMES_OOP, + arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // src_addr + __ lea(to, Address(dst, dst_pos, TIMES_OOP, + arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // dst_addr + __ movws(count, r11_length); // length + __ BIND(L_plain_copy); + __ jump(RuntimeAddress(oop_copy_entry)); + + __ BIND(L_checkcast_copy); + // live at this point: r10_src_klass, r11_length, rax (dst_klass) + { + // Before looking at dst.length, make sure dst is also an objArray. + __ mov_immediate32(rscratch, objArray_lh); + __ cmpw(Address(rax, lh_offset), rscratch); + __ jcc(Assembler::notEqual, L_failed); + + // It is safe to examine both src.length and dst.length. + arraycopy_range_checks(src, src_pos, dst, dst_pos, r11_length, + rax, L_failed); + + const Register r11_dst_klass = r11; + __ load_klass(r11_dst_klass, dst); // reload + + // Marshal the base address arguments now, freeing registers. + __ lea(from, Address(src, src_pos, TIMES_OOP, + arrayOopDesc::base_offset_in_bytes(T_OBJECT))); + __ lea(to, Address(dst, dst_pos, TIMES_OOP, + arrayOopDesc::base_offset_in_bytes(T_OBJECT))); + __ movw(count, length); // length (reloaded) + Register sco_temp = c_rarg3; // this register is free now + assert_different_registers(from, to, count, sco_temp, + r11_dst_klass, r10_src_klass); + assert_clean_int(count, sco_temp); + + // Generate the type check. + const int sco_offset = in_bytes(Klass::super_check_offset_offset()); + __ ldws(sco_temp, Address(r11_dst_klass, sco_offset)); + assert_clean_int(sco_temp, rax); + //will kill rscratch1 rscratch2 + generate_type_check(r10_src_klass, sco_temp, r11_dst_klass, L_plain_copy, rscratch1, rscratch2); + + // Fetch destination element klass from the ObjArrayKlass header. + int ek_offset = in_bytes(ObjArrayKlass::element_klass_offset()); + __ ldptr(r11_dst_klass, Address(r11_dst_klass, ek_offset)); + __ ldws( sco_temp, Address(r11_dst_klass, sco_offset)); + assert_clean_int(sco_temp, rax); + + // the checkcast_copy loop needs two extra arguments: + assert(c_rarg3 == sco_temp, "#3 already in place"); + // Set up arguments for checkcast_copy_entry. + setup_arg_regs(4); + __ movl(c_rarg4, r11_dst_klass); // dst.klass.element_klass, r8 is c_rarg4 on Linux/Solaris + __ jump(RuntimeAddress(checkcast_copy_entry)); + } + + __ BIND(L_failed); + __ ldi(rax, -1, R0);// return -1 + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret_sw(); + + return start; + } + + void generate_arraycopy_stubs() { + address entry; + address entry_jbyte_arraycopy; + address entry_jshort_arraycopy; + address entry_jint_arraycopy; + address entry_oop_arraycopy; + address entry_jlong_arraycopy; + address entry_checkcast_arraycopy; +//TODO:jzy fast path to arraycopy + StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(false, &entry, + "jbyte_disjoint_arraycopy"); + StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy(false, entry, &entry_jbyte_arraycopy, + "jbyte_arraycopy"); + + StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, &entry, + "jshort_disjoint_arraycopy"); + StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, entry, &entry_jshort_arraycopy, + "jshort_arraycopy"); + + StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, false, &entry, + "jint_disjoint_arraycopy"); + StubRoutines::_jint_arraycopy = generate_conjoint_int_oop_copy(false, false, entry, + &entry_jint_arraycopy, "jint_arraycopy"); + + StubRoutines::_jlong_disjoint_arraycopy = generate_disjoint_long_oop_copy(false, false, &entry, + "jlong_disjoint_arraycopy"); + StubRoutines::_jlong_arraycopy = generate_conjoint_long_oop_copy(false, false, entry, + &entry_jlong_arraycopy, "jlong_arraycopy"); + + + if (UseCompressedOops) { + StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, true, &entry, + "oop_disjoint_arraycopy"); + StubRoutines::_oop_arraycopy = generate_conjoint_int_oop_copy(false, true, entry, + &entry_oop_arraycopy, "oop_arraycopy"); + StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_int_oop_copy(false, true, &entry, + "oop_disjoint_arraycopy_uninit", + /*dest_uninitialized*/true); + StubRoutines::_oop_arraycopy_uninit = generate_conjoint_int_oop_copy(false, true, entry, + NULL, "oop_arraycopy_uninit", + /*dest_uninitialized*/true); + } else { + StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_long_oop_copy(false, true, &entry, + "oop_disjoint_arraycopy"); + StubRoutines::_oop_arraycopy = generate_conjoint_long_oop_copy(false, true, entry, + &entry_oop_arraycopy, "oop_arraycopy"); + StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_long_oop_copy(false, true, &entry, + "oop_disjoint_arraycopy_uninit", + /*dest_uninitialized*/true); + StubRoutines::_oop_arraycopy_uninit = generate_conjoint_long_oop_copy(false, true, entry, + NULL, "oop_arraycopy_uninit", + /*dest_uninitialized*/true); + } + //TODO:jzy fast path to checkcast + StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy); + StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", NULL, + /*dest_uninitialized*/true); + + StubRoutines::_unsafe_arraycopy = generate_unsafe_copy("unsafe_arraycopy", + entry_jbyte_arraycopy, + entry_jshort_arraycopy, + entry_jint_arraycopy, + entry_jlong_arraycopy); + StubRoutines::_generic_arraycopy = generate_generic_copy("generic_arraycopy", + entry_jbyte_arraycopy, + entry_jshort_arraycopy, + entry_jint_arraycopy, + entry_oop_arraycopy, + entry_jlong_arraycopy, + entry_checkcast_arraycopy); + //TODO:fast path jzy + StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill"); + StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill"); + StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill"); + StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill"); + StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill"); + StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill"); + + // We don't generate specialized code for HeapWord-aligned source + // arrays, so just use the code we've already generated + StubRoutines::_arrayof_jbyte_disjoint_arraycopy = StubRoutines::_jbyte_disjoint_arraycopy; + StubRoutines::_arrayof_jbyte_arraycopy = StubRoutines::_jbyte_arraycopy; + + StubRoutines::_arrayof_jshort_disjoint_arraycopy = StubRoutines::_jshort_disjoint_arraycopy; + StubRoutines::_arrayof_jshort_arraycopy = StubRoutines::_jshort_arraycopy; + + StubRoutines::_arrayof_jint_disjoint_arraycopy = StubRoutines::_jint_disjoint_arraycopy; + StubRoutines::_arrayof_jint_arraycopy = StubRoutines::_jint_arraycopy; + + StubRoutines::_arrayof_jlong_disjoint_arraycopy = StubRoutines::_jlong_disjoint_arraycopy; + StubRoutines::_arrayof_jlong_arraycopy = StubRoutines::_jlong_arraycopy; + + StubRoutines::_arrayof_oop_disjoint_arraycopy = StubRoutines::_oop_disjoint_arraycopy; + StubRoutines::_arrayof_oop_arraycopy = StubRoutines::_oop_arraycopy; + + StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = StubRoutines::_oop_disjoint_arraycopy_uninit; + StubRoutines::_arrayof_oop_arraycopy_uninit = StubRoutines::_oop_arraycopy_uninit; + } + + // AES intrinsic stubs + enum {AESBlockSize = 16}; + + address generate_key_shuffle_mask() { + __ align(16); + StubCodeMark mark(this, "StubRoutines", "key_shuffle_mask"); + address start = __ pc();ShouldNotReachHere(); +// __ emit_data64( 0x0405060700010203, relocInfo::none ); +// __ emit_data64( 0x0c0d0e0f08090a0b, relocInfo::none ); + return start; + } + + address generate_counter_shuffle_mask() { + __ align(16); + StubCodeMark mark(this, "StubRoutines", "counter_shuffle_mask"); + address start = __ pc();ShouldNotReachHere(); +// __ emit_data64(0x08090a0b0c0d0e0f, relocInfo::none); +// __ emit_data64(0x0001020304050607, relocInfo::none); + return start; + } + + // Utility routine for loading a 128-bit key word in little endian format + // can optionally specify that the shuffle mask is already in an xmmregister + /*void load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) { + __ movdqu(xmmdst, Address(key, offset)); + if (xmm_shuf_mask != NULL) { + __ pshufb(xmmdst, xmm_shuf_mask); + } else { + __ pshufb(xmmdst, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); + } + }*/ + + // Utility routine for increase 128bit counter (iv in CTR mode) + /*void inc_counter(Register reg, XMMRegister xmmdst, int inc_delta, Label& next_block) { + __ pextrq(reg, xmmdst, 0x0); + __ addq(reg, inc_delta); + __ pinsrq(xmmdst, reg, 0x0); + __ jcc(Assembler::carryClear, next_block); // jump if no carry + __ pextrq(reg, xmmdst, 0x01); // Carry + __ addq(reg, 0x01); + __ pinsrq(xmmdst, reg, 0x01); //Carry end + __ BIND(next_block); // next instruction + }*/ + + // Arguments: + // + // Inputs: + // c_rarg0 - source byte array address + // c_rarg1 - destination byte array address + // c_rarg2 - K (key) in little endian int array + // + address generate_aescrypt_encryptBlock() { + assert(UseAES, "need AES instructions and misaligned SSE support"); + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock"); + Label L_doLast; + address start = __ pc();ShouldNotReachHere(); +/* + const Register from = c_rarg0; // source array address + const Register to = c_rarg1; // destination array address + const Register key = c_rarg2; // key array address + const Register keylen = rax; + + const XMMRegister xmm_result = xmm0; + const XMMRegister xmm_key_shuf_mask = xmm1; + // On win64 xmm6-xmm15 must be preserved so don't use them. + const XMMRegister xmm_temp1 = xmm2; + const XMMRegister xmm_temp2 = xmm3; + const XMMRegister xmm_temp3 = xmm4; + const XMMRegister xmm_temp4 = xmm5; + + __ enter(); // required for proper stackwalking of RuntimeStub frame + + // keylen could be only {11, 13, 15} * 4 = {44, 52, 60} + __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); + + __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); + __ movdqu(xmm_result, Address(from, 0)); // get 16 bytes of input + + // For encryption, the java expanded key ordering is just what we need + // we don't know if the key is aligned, hence not using load-execute form + + load_key(xmm_temp1, key, 0x00, xmm_key_shuf_mask); + __ pxor(xmm_result, xmm_temp1); + + load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask); + load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask); + load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask); + load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask); + + __ aesenc(xmm_result, xmm_temp1); + __ aesenc(xmm_result, xmm_temp2); + __ aesenc(xmm_result, xmm_temp3); + __ aesenc(xmm_result, xmm_temp4); + + load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask); + load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask); + load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask); + load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask); + + __ aesenc(xmm_result, xmm_temp1); + __ aesenc(xmm_result, xmm_temp2); + __ aesenc(xmm_result, xmm_temp3); + __ aesenc(xmm_result, xmm_temp4); + + load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask); + load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask); + + __ cmpl(keylen, 44); + __ jccb(Assembler::equal, L_doLast); + + __ aesenc(xmm_result, xmm_temp1); + __ aesenc(xmm_result, xmm_temp2); + + load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask); + load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask); + + __ cmpl(keylen, 52); + __ jccb(Assembler::equal, L_doLast); + + __ aesenc(xmm_result, xmm_temp1); + __ aesenc(xmm_result, xmm_temp2); + + load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask); + load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask); + + __ BIND(L_doLast); + __ aesenc(xmm_result, xmm_temp1); + __ aesenclast(xmm_result, xmm_temp2); + __ movdqu(Address(to, 0), xmm_result); // store the result + __ xorptr(rax, rax); // return 0 + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(0); +*/ + return start; + } + + + // Arguments: + // + // Inputs: + // c_rarg0 - source byte array address + // c_rarg1 - destination byte array address + // c_rarg2 - K (key) in little endian int array + // + address generate_aescrypt_decryptBlock() { + assert(UseAES, "need AES instructions and misaligned SSE support"); + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock"); + Label L_doLast; + address start = __ pc();ShouldNotReachHere(); +/* + const Register from = c_rarg0; // source array address + const Register to = c_rarg1; // destination array address + const Register key = c_rarg2; // key array address + const Register keylen = rax; + + const XMMRegister xmm_result = xmm0; + const XMMRegister xmm_key_shuf_mask = xmm1; + // On win64 xmm6-xmm15 must be preserved so don't use them. + const XMMRegister xmm_temp1 = xmm2; + const XMMRegister xmm_temp2 = xmm3; + const XMMRegister xmm_temp3 = xmm4; + const XMMRegister xmm_temp4 = xmm5; + + __ enter(); // required for proper stackwalking of RuntimeStub frame + + // keylen could be only {11, 13, 15} * 4 = {44, 52, 60} + __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); + + __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); + __ movdqu(xmm_result, Address(from, 0)); + + // for decryption java expanded key ordering is rotated one position from what we want + // so we start from 0x10 here and hit 0x00 last + // we don't know if the key is aligned, hence not using load-execute form + load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask); + load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask); + load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask); + load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask); + + __ pxor (xmm_result, xmm_temp1); + __ aesdec(xmm_result, xmm_temp2); + __ aesdec(xmm_result, xmm_temp3); + __ aesdec(xmm_result, xmm_temp4); + + load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask); + load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask); + load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask); + load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask); + + __ aesdec(xmm_result, xmm_temp1); + __ aesdec(xmm_result, xmm_temp2); + __ aesdec(xmm_result, xmm_temp3); + __ aesdec(xmm_result, xmm_temp4); + + load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask); + load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask); + load_key(xmm_temp3, key, 0x00, xmm_key_shuf_mask); + + __ cmpl(keylen, 44); + __ jccb(Assembler::equal, L_doLast); + + __ aesdec(xmm_result, xmm_temp1); + __ aesdec(xmm_result, xmm_temp2); + + load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask); + load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask); + + __ cmpl(keylen, 52); + __ jccb(Assembler::equal, L_doLast); + + __ aesdec(xmm_result, xmm_temp1); + __ aesdec(xmm_result, xmm_temp2); + + load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask); + load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask); + + __ BIND(L_doLast); + __ aesdec(xmm_result, xmm_temp1); + __ aesdec(xmm_result, xmm_temp2); + + // for decryption the aesdeclast operation is always on key+0x00 + __ aesdeclast(xmm_result, xmm_temp3); + __ movdqu(Address(to, 0), xmm_result); // store the result + __ xorptr(rax, rax); // return 0 + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(0); +*/ + return start; + } + + + // Arguments: + // + // Inputs: + // c_rarg0 - source byte array address + // c_rarg1 - destination byte array address + // c_rarg2 - K (key) in little endian int array + // c_rarg3 - r vector byte array address + // c_rarg4 - input length + // + // Output: + // rax - input length + // + address generate_cipherBlockChaining_encryptAESCrypt() { + assert(UseAES, "need AES instructions and misaligned SSE support"); + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt"); + address start = __ pc();ShouldNotReachHere(); +/* + Label L_exit, L_key_192_256, L_key_256, L_loopTop_128, L_loopTop_192, L_loopTop_256; + const Register from = c_rarg0; // source array address + const Register to = c_rarg1; // destination array address + const Register key = c_rarg2; // key array address + const Register rvec = c_rarg3; // r byte array initialized from initvector array address + // and left with the results of the last encryption block +#ifndef _WIN64 + const Register len_reg = c_rarg4; // src len (must be multiple of blocksize 16) +#else + const Address len_mem(rbp, 6 * wordSize); // length is on stack on Win64 + const Register len_reg = r11; // pick the volatile windows register +#endif + const Register pos = rax; + + // xmm register assignments for the loops below + const XMMRegister xmm_result = xmm0; + const XMMRegister xmm_temp = xmm1; + // keys 0-10 preloaded into xmm2-xmm12 + const int XMM_REG_NUM_KEY_FIRST = 2; + const int XMM_REG_NUM_KEY_LAST = 15; + const XMMRegister xmm_key0 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST); + const XMMRegister xmm_key10 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST+10); + const XMMRegister xmm_key11 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST+11); + const XMMRegister xmm_key12 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST+12); + const XMMRegister xmm_key13 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST+13); + + __ enter(); // required for proper stackwalking of RuntimeStub frame + +#ifdef _WIN64 + // on win64, fill len_reg from stack position + __ movl(len_reg, len_mem); +#else + __ push(len_reg); // Save +#endif + + const XMMRegister xmm_key_shuf_mask = xmm_temp; // used temporarily to swap key bytes up front + __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); + // load up xmm regs xmm2 thru xmm12 with key 0x00 - 0xa0 + for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x00; rnum <= XMM_REG_NUM_KEY_FIRST+10; rnum++) { + load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask); + offset += 0x10; + } + __ movdqu(xmm_result, Address(rvec, 0x00)); // initialize xmm_result with r vec + + // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256)) + __ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); + __ cmpl(rax, 44); + __ jcc(Assembler::notEqual, L_key_192_256); + + // 128 bit code follows here + __ movptr(pos, 0); + __ align(OptoLoopAlignment); + + __ BIND(L_loopTop_128); + __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input + __ pxor (xmm_result, xmm_temp); // xor with the current r vector + __ pxor (xmm_result, xmm_key0); // do the aes rounds + for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_FIRST + 9; rnum++) { + __ aesenc(xmm_result, as_XMMRegister(rnum)); + } + __ aesenclast(xmm_result, xmm_key10); + __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output + // no need to store r to memory until we exit + __ addptr(pos, AESBlockSize); + __ subptr(len_reg, AESBlockSize); + __ jcc(Assembler::notEqual, L_loopTop_128); + + __ BIND(L_exit); + __ movdqu(Address(rvec, 0), xmm_result); // final value of r stored in rvec of CipherBlockChaining object + +#ifdef _WIN64 + __ movl(rax, len_mem); +#else + __ pop(rax); // return length +#endif + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(0); + + __ BIND(L_key_192_256); + // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256) + load_key(xmm_key11, key, 0xb0, xmm_key_shuf_mask); + load_key(xmm_key12, key, 0xc0, xmm_key_shuf_mask); + __ cmpl(rax, 52); + __ jcc(Assembler::notEqual, L_key_256); + + // 192-bit code follows here (could be changed to use more xmm registers) + __ movptr(pos, 0); + __ align(OptoLoopAlignment); + + __ BIND(L_loopTop_192); + __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input + __ pxor (xmm_result, xmm_temp); // xor with the current r vector + __ pxor (xmm_result, xmm_key0); // do the aes rounds + for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_FIRST + 11; rnum++) { + __ aesenc(xmm_result, as_XMMRegister(rnum)); + } + __ aesenclast(xmm_result, xmm_key12); + __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output + // no need to store r to memory until we exit + __ addptr(pos, AESBlockSize); + __ subptr(len_reg, AESBlockSize); + __ jcc(Assembler::notEqual, L_loopTop_192); + __ jmp(L_exit); + + __ BIND(L_key_256); + // 256-bit code follows here (could be changed to use more xmm registers) + load_key(xmm_key13, key, 0xd0, xmm_key_shuf_mask); + __ movptr(pos, 0); + __ align(OptoLoopAlignment); + + __ BIND(L_loopTop_256); + __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input + __ pxor (xmm_result, xmm_temp); // xor with the current r vector + __ pxor (xmm_result, xmm_key0); // do the aes rounds + for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_FIRST + 13; rnum++) { + __ aesenc(xmm_result, as_XMMRegister(rnum)); + } + load_key(xmm_temp, key, 0xe0); + __ aesenclast(xmm_result, xmm_temp); + __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output + // no need to store r to memory until we exit + __ addptr(pos, AESBlockSize); + __ subptr(len_reg, AESBlockSize); + __ jcc(Assembler::notEqual, L_loopTop_256); + __ jmp(L_exit); +*/ + return start; + } + + + // This is a version of CBC/AES Decrypt which does 4 blocks in a loop at a time + // to hide instruction latency + // + // Arguments: + // + // Inputs: + // c_rarg0 - source byte array address + // c_rarg1 - destination byte array address + // c_rarg2 - K (key) in little endian int array + // c_rarg3 - r vector byte array address + // c_rarg4 - input length + // + // Output: + // rax - input length + // + address generate_cipherBlockChaining_decryptAESCrypt_Parallel() { + assert(UseAES, "need AES instructions and misaligned SSE support"); + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt"); + address start = __ pc();ShouldNotReachHere(); +/* + const Register from = c_rarg0; // source array address + const Register to = c_rarg1; // destination array address + const Register key = c_rarg2; // key array address + const Register rvec = c_rarg3; // r byte array initialized from initvector array address + // and left with the results of the last encryption block +#ifndef _WIN64 + const Register len_reg = c_rarg4; // src len (must be multiple of blocksize 16) +#else + const Address len_mem(rbp, 6 * wordSize); // length is on stack on Win64 + const Register len_reg = r11; // pick the volatile windows register +#endif + const Register pos = rax; + + const int PARALLEL_FACTOR = 4; + const int ROUNDS[3] = { 10, 12, 14 }; // aes rounds for key128, key192, key256 + + Label L_exit; + Label L_singleBlock_loopTopHead[3]; // 128, 192, 256 + Label L_singleBlock_loopTopHead2[3]; // 128, 192, 256 + Label L_singleBlock_loopTop[3]; // 128, 192, 256 + Label L_multiBlock_loopTopHead[3]; // 128, 192, 256 + Label L_multiBlock_loopTop[3]; // 128, 192, 256 + + // keys 0-10 preloaded into xmm5-xmm15 + const int XMM_REG_NUM_KEY_FIRST = 5; + const int XMM_REG_NUM_KEY_LAST = 15; + const XMMRegister xmm_key_first = as_XMMRegister(XMM_REG_NUM_KEY_FIRST); + const XMMRegister xmm_key_last = as_XMMRegister(XMM_REG_NUM_KEY_LAST); + + __ enter(); // required for proper stackwalking of RuntimeStub frame + +#ifdef _WIN64 + // on win64, fill len_reg from stack position + __ movl(len_reg, len_mem); +#else + __ push(len_reg); // Save +#endif + __ push(rbx); + // the java expanded key ordering is rotated one position from what we want + // so we start from 0x10 here and hit 0x00 last + const XMMRegister xmm_key_shuf_mask = xmm1; // used temporarily to swap key bytes up front + __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); + // load up xmm regs 5 thru 15 with key 0x10 - 0xa0 - 0x00 + for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x10; rnum < XMM_REG_NUM_KEY_LAST; rnum++) { + load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask); + offset += 0x10; + } + load_key(xmm_key_last, key, 0x00, xmm_key_shuf_mask); + + const XMMRegister xmm_prev_block_cipher = xmm1; // holds cipher of previous block + + // registers holding the four results in the parallelized loop + const XMMRegister xmm_result0 = xmm0; + const XMMRegister xmm_result1 = xmm2; + const XMMRegister xmm_result2 = xmm3; + const XMMRegister xmm_result3 = xmm4; + + __ movdqu(xmm_prev_block_cipher, Address(rvec, 0x00)); // initialize with initial rvec + + __ xorptr(pos, pos); + + // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256)) + __ movl(rbx, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); + __ cmpl(rbx, 52); + __ jcc(Assembler::equal, L_multiBlock_loopTopHead[1]); + __ cmpl(rbx, 60); + __ jcc(Assembler::equal, L_multiBlock_loopTopHead[2]); + +#define DoFour(opc, src_reg) \ + __ opc(xmm_result0, src_reg); \ + __ opc(xmm_result1, src_reg); \ + __ opc(xmm_result2, src_reg); \ + __ opc(xmm_result3, src_reg); \ + + for (int k = 0; k < 3; ++k) { + __ BIND(L_multiBlock_loopTopHead[k]); + if (k != 0) { + __ cmpptr(len_reg, PARALLEL_FACTOR * AESBlockSize); // see if at least 4 blocks left + __ jcc(Assembler::less, L_singleBlock_loopTopHead2[k]); + } + if (k == 1) { + __ subptr(rsp, 6 * wordSize); + __ movdqu(Address(rsp, 0), xmm15); //save last_key from xmm15 + load_key(xmm15, key, 0xb0); // 0xb0; 192-bit key goes up to 0xc0 + __ movdqu(Address(rsp, 2 * wordSize), xmm15); + load_key(xmm1, key, 0xc0); // 0xc0; + __ movdqu(Address(rsp, 4 * wordSize), xmm1); + } else if (k == 2) { + __ subptr(rsp, 10 * wordSize); + __ movdqu(Address(rsp, 0), xmm15); //save last_key from xmm15 + load_key(xmm15, key, 0xd0); // 0xd0; 256-bit key goes upto 0xe0 + __ movdqu(Address(rsp, 6 * wordSize), xmm15); + load_key(xmm1, key, 0xe0); // 0xe0; + __ movdqu(Address(rsp, 8 * wordSize), xmm1); + load_key(xmm15, key, 0xb0); // 0xb0; + __ movdqu(Address(rsp, 2 * wordSize), xmm15); + load_key(xmm1, key, 0xc0); // 0xc0; + __ movdqu(Address(rsp, 4 * wordSize), xmm1); + } + __ align(OptoLoopAlignment); + __ BIND(L_multiBlock_loopTop[k]); + __ cmpptr(len_reg, PARALLEL_FACTOR * AESBlockSize); // see if at least 4 blocks left + __ jcc(Assembler::less, L_singleBlock_loopTopHead[k]); + + if (k != 0) { + __ movdqu(xmm15, Address(rsp, 2 * wordSize)); + __ movdqu(xmm1, Address(rsp, 4 * wordSize)); + } + + __ movdqu(xmm_result0, Address(from, pos, Address::times_1, 0 * AESBlockSize)); // get next 4 blocks into xmmresult registers + __ movdqu(xmm_result1, Address(from, pos, Address::times_1, 1 * AESBlockSize)); + __ movdqu(xmm_result2, Address(from, pos, Address::times_1, 2 * AESBlockSize)); + __ movdqu(xmm_result3, Address(from, pos, Address::times_1, 3 * AESBlockSize)); + + DoFour(pxor, xmm_key_first); + if (k == 0) { + for (int rnum = 1; rnum < ROUNDS[k]; rnum++) { + DoFour(aesdec, as_XMMRegister(rnum + XMM_REG_NUM_KEY_FIRST)); + } + DoFour(aesdeclast, xmm_key_last); + } else if (k == 1) { + for (int rnum = 1; rnum <= ROUNDS[k]-2; rnum++) { + DoFour(aesdec, as_XMMRegister(rnum + XMM_REG_NUM_KEY_FIRST)); + } + __ movdqu(xmm_key_last, Address(rsp, 0)); // xmm15 needs to be loaded again. + DoFour(aesdec, xmm1); // key : 0xc0 + __ movdqu(xmm_prev_block_cipher, Address(rvec, 0x00)); // xmm1 needs to be loaded again + DoFour(aesdeclast, xmm_key_last); + } else if (k == 2) { + for (int rnum = 1; rnum <= ROUNDS[k] - 4; rnum++) { + DoFour(aesdec, as_XMMRegister(rnum + XMM_REG_NUM_KEY_FIRST)); + } + DoFour(aesdec, xmm1); // key : 0xc0 + __ movdqu(xmm15, Address(rsp, 6 * wordSize)); + __ movdqu(xmm1, Address(rsp, 8 * wordSize)); + DoFour(aesdec, xmm15); // key : 0xd0 + __ movdqu(xmm_key_last, Address(rsp, 0)); // xmm15 needs to be loaded again. + DoFour(aesdec, xmm1); // key : 0xe0 + __ movdqu(xmm_prev_block_cipher, Address(rvec, 0x00)); // xmm1 needs to be loaded again + DoFour(aesdeclast, xmm_key_last); + } + + // for each result, xor with the r vector of previous cipher block + __ pxor(xmm_result0, xmm_prev_block_cipher); + __ movdqu(xmm_prev_block_cipher, Address(from, pos, Address::times_1, 0 * AESBlockSize)); + __ pxor(xmm_result1, xmm_prev_block_cipher); + __ movdqu(xmm_prev_block_cipher, Address(from, pos, Address::times_1, 1 * AESBlockSize)); + __ pxor(xmm_result2, xmm_prev_block_cipher); + __ movdqu(xmm_prev_block_cipher, Address(from, pos, Address::times_1, 2 * AESBlockSize)); + __ pxor(xmm_result3, xmm_prev_block_cipher); + __ movdqu(xmm_prev_block_cipher, Address(from, pos, Address::times_1, 3 * AESBlockSize)); // this will carry over to next set of blocks + if (k != 0) { + __ movdqu(Address(rvec, 0x00), xmm_prev_block_cipher); + } + + __ movdqu(Address(to, pos, Address::times_1, 0 * AESBlockSize), xmm_result0); // store 4 results into the next 64 bytes of output + __ movdqu(Address(to, pos, Address::times_1, 1 * AESBlockSize), xmm_result1); + __ movdqu(Address(to, pos, Address::times_1, 2 * AESBlockSize), xmm_result2); + __ movdqu(Address(to, pos, Address::times_1, 3 * AESBlockSize), xmm_result3); + + __ addptr(pos, PARALLEL_FACTOR * AESBlockSize); + __ subptr(len_reg, PARALLEL_FACTOR * AESBlockSize); + __ jmp(L_multiBlock_loopTop[k]); + + // registers used in the non-parallelized loops + // xmm register assignments for the loops below + const XMMRegister xmm_result = xmm0; + const XMMRegister xmm_prev_block_cipher_save = xmm2; + const XMMRegister xmm_key11 = xmm3; + const XMMRegister xmm_key12 = xmm4; + const XMMRegister key_tmp = xmm4; + + __ BIND(L_singleBlock_loopTopHead[k]); + if (k == 1) { + __ addptr(rsp, 6 * wordSize); + } else if (k == 2) { + __ addptr(rsp, 10 * wordSize); + } + __ cmpptr(len_reg, 0); // any blocks left?? + __ jcc(Assembler::equal, L_exit); + __ BIND(L_singleBlock_loopTopHead2[k]); + if (k == 1) { + load_key(xmm_key11, key, 0xb0); // 0xb0; 192-bit key goes upto 0xc0 + load_key(xmm_key12, key, 0xc0); // 0xc0; 192-bit key goes upto 0xc0 + } + if (k == 2) { + load_key(xmm_key11, key, 0xb0); // 0xb0; 256-bit key goes upto 0xe0 + } + __ align(OptoLoopAlignment); + __ BIND(L_singleBlock_loopTop[k]); + __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input + __ movdqa(xmm_prev_block_cipher_save, xmm_result); // save for next r vector + __ pxor(xmm_result, xmm_key_first); // do the aes dec rounds + for (int rnum = 1; rnum <= 9 ; rnum++) { + __ aesdec(xmm_result, as_XMMRegister(rnum + XMM_REG_NUM_KEY_FIRST)); + } + if (k == 1) { + __ aesdec(xmm_result, xmm_key11); + __ aesdec(xmm_result, xmm_key12); + } + if (k == 2) { + __ aesdec(xmm_result, xmm_key11); + load_key(key_tmp, key, 0xc0); + __ aesdec(xmm_result, key_tmp); + load_key(key_tmp, key, 0xd0); + __ aesdec(xmm_result, key_tmp); + load_key(key_tmp, key, 0xe0); + __ aesdec(xmm_result, key_tmp); + } + + __ aesdeclast(xmm_result, xmm_key_last); // xmm15 always came from key+0 + __ pxor(xmm_result, xmm_prev_block_cipher); // xor with the current r vector + __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output + // no need to store r to memory until we exit + __ movdqa(xmm_prev_block_cipher, xmm_prev_block_cipher_save); // set up next r vector with cipher input from this block + __ addptr(pos, AESBlockSize); + __ subptr(len_reg, AESBlockSize); + __ jcc(Assembler::notEqual, L_singleBlock_loopTop[k]); + if (k != 2) { + __ jmp(L_exit); + } + } //for 128/192/256 + + __ BIND(L_exit); + __ movdqu(Address(rvec, 0), xmm_prev_block_cipher); // final value of r stored in rvec of CipherBlockChaining object + __ pop(rbx); +#ifdef _WIN64 + __ movl(rax, len_mem); +#else + __ pop(rax); // return length +#endif + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(0); + return start; +} + + // ofs and limit are use for multi-block byte array. + // int com.sun.security.provider.MD5.implCompress(byte[] b, int ofs) + address generate_md5_implCompress(bool multi_block, const char *name) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + const Register buf_param = r15; + const Address state_param(rsp, 0 * wordSize); + const Address ofs_param (rsp, 1 * wordSize ); + const Address limit_param(rsp, 1 * wordSize + 4); + + __ enter(); + __ push(rbx); + __ push(rdi); + __ push(rsi); + __ push(r15); + __ subptr(rsp, 2 * wordSize); + + __ movptr(buf_param, c_rarg0); + __ movptr(state_param, c_rarg1); + if (multi_block) { + __ movl(ofs_param, c_rarg2); + __ movl(limit_param, c_rarg3); + } + __ fast_md5(buf_param, state_param, ofs_param, limit_param, multi_block); + + __ addptr(rsp, 2 * wordSize); + __ pop(r15); + __ pop(rsi); + __ pop(rdi); + __ pop(rbx); + __ leave(); + __ ret(0); + return start; + } + + address generate_upper_word_mask() { + __ align(64); + StubCodeMark mark(this, "StubRoutines", "upper_word_mask"); + address start = __ pc(); + __ emit_data64(0x0000000000000000, relocInfo::none); + __ emit_data64(0xFFFFFFFF00000000, relocInfo::none); + return start; + } + + address generate_shuffle_byte_flip_mask() { + __ align(64); + StubCodeMark mark(this, "StubRoutines", "shuffle_byte_flip_mask"); + address start = __ pc(); + __ emit_data64(0x08090a0b0c0d0e0f, relocInfo::none); + __ emit_data64(0x0001020304050607, relocInfo::none); + return start; + } + + // ofs and limit are use for multi-block byte array. + // int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit) + address generate_sha1_implCompress(bool multi_block, const char *name) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + Register buf = c_rarg0; + Register state = c_rarg1; + Register ofs = c_rarg2; + Register limit = c_rarg3; + + const XMMRegister abcd = xmm0; + const XMMRegister e0 = xmm1; + const XMMRegister e1 = xmm2; + const XMMRegister msg0 = xmm3; + + const XMMRegister msg1 = xmm4; + const XMMRegister msg2 = xmm5; + const XMMRegister msg3 = xmm6; + const XMMRegister shuf_mask = xmm7; + + __ enter(); + + __ subptr(rsp, 4 * wordSize); + + __ fast_sha1(abcd, e0, e1, msg0, msg1, msg2, msg3, shuf_mask, + buf, state, ofs, limit, rsp, multi_block); + + __ addptr(rsp, 4 * wordSize); + + __ leave(); + __ ret(0);*/ + return start; + } + + address generate_pshuffle_byte_flip_mask() { + __ align(64); + StubCodeMark mark(this, "StubRoutines", "pshuffle_byte_flip_mask");ShouldNotReachHere(); + address start = __ pc();/* + __ emit_data64(0x0405060700010203, relocInfo::none); + __ emit_data64(0x0c0d0e0f08090a0b, relocInfo::none); + + if (VM_Version::supports_avx2()) { + __ emit_data64(0x0405060700010203, relocInfo::none); // second copy + __ emit_data64(0x0c0d0e0f08090a0b, relocInfo::none); + // _SHUF_00BA + __ emit_data64(0x0b0a090803020100, relocInfo::none); + __ emit_data64(0xFFFFFFFFFFFFFFFF, relocInfo::none); + __ emit_data64(0x0b0a090803020100, relocInfo::none); + __ emit_data64(0xFFFFFFFFFFFFFFFF, relocInfo::none); + // _SHUF_DC00 + __ emit_data64(0xFFFFFFFFFFFFFFFF, relocInfo::none); + __ emit_data64(0x0b0a090803020100, relocInfo::none); + __ emit_data64(0xFFFFFFFFFFFFFFFF, relocInfo::none); + __ emit_data64(0x0b0a090803020100, relocInfo::none); + } +*/ + return start; + } + + //Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb. + address generate_pshuffle_byte_flip_mask_sha512() { + __ align(32); + StubCodeMark mark(this, "StubRoutines", "pshuffle_byte_flip_mask_sha512");ShouldNotReachHere(); + address start = __ pc();/* + if (VM_Version::supports_avx2()) { + __ emit_data64(0x0001020304050607, relocInfo::none); // PSHUFFLE_BYTE_FLIP_MASK + __ emit_data64(0x08090a0b0c0d0e0f, relocInfo::none); + __ emit_data64(0x1011121314151617, relocInfo::none); + __ emit_data64(0x18191a1b1c1d1e1f, relocInfo::none); + __ emit_data64(0x0000000000000000, relocInfo::none); //MASK_YMM_LO + __ emit_data64(0x0000000000000000, relocInfo::none); + __ emit_data64(0xFFFFFFFFFFFFFFFF, relocInfo::none); + __ emit_data64(0xFFFFFFFFFFFFFFFF, relocInfo::none); + } +*/ + return start; + } + +// ofs and limit are use for multi-block byte array. +// int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit) + address generate_sha256_implCompress(bool multi_block, const char *name) { + //assert(VM_Version::supports_sha() || VM_Version::supports_avx2(), ""); + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name);ShouldNotReachHere(); + address start = __ pc();/* + + Register buf = c_rarg0; + Register state = c_rarg1; + Register ofs = c_rarg2; + Register limit = c_rarg3; + + const XMMRegister msg = xmm0; + const XMMRegister state0 = xmm1; + const XMMRegister state1 = xmm2; + const XMMRegister msgtmp0 = xmm3; + + const XMMRegister msgtmp1 = xmm4; + const XMMRegister msgtmp2 = xmm5; + const XMMRegister msgtmp3 = xmm6; + const XMMRegister msgtmp4 = xmm7; + + const XMMRegister shuf_mask = xmm8; + + __ enter(); + + __ subptr(rsp, 4 * wordSize); + + if (VM_Version::supports_sha()) { + __ fast_sha256(msg, state0, state1, msgtmp0, msgtmp1, msgtmp2, msgtmp3, msgtmp4, + buf, state, ofs, limit, rsp, multi_block, shuf_mask); + } else if (VM_Version::supports_avx2()) { + __ sha256_AVX2(msg, state0, state1, msgtmp0, msgtmp1, msgtmp2, msgtmp3, msgtmp4, + buf, state, ofs, limit, rsp, multi_block, shuf_mask); + } + __ addptr(rsp, 4 * wordSize); + __ vzeroupper(); + __ leave(); + __ ret(0);*/ + return start; + } + + address generate_sha512_implCompress(bool multi_block, const char *name) { + //assert(VM_Version::supports_avx2(), ""); + //assert(VM_Version::supports_bmi2(), ""); + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name);ShouldNotReachHere(); + address start = __ pc();/* + + Register buf = c_rarg0; + Register state = c_rarg1; + Register ofs = c_rarg2; + Register limit = c_rarg3; + + const XMMRegister msg = xmm0; + const XMMRegister state0 = xmm1; + const XMMRegister state1 = xmm2; + const XMMRegister msgtmp0 = xmm3; + const XMMRegister msgtmp1 = xmm4; + const XMMRegister msgtmp2 = xmm5; + const XMMRegister msgtmp3 = xmm6; + const XMMRegister msgtmp4 = xmm7; + + const XMMRegister shuf_mask = xmm8; + + __ enter(); + + __ sha512_AVX2(msg, state0, state1, msgtmp0, msgtmp1, msgtmp2, msgtmp3, msgtmp4, + buf, state, ofs, limit, rsp, multi_block, shuf_mask); + + __ vzeroupper(); + __ leave(); + __ ret(0);*/ + return start; + } + + // This is a version of CTR/AES crypt which does 6 blocks in a loop at a time + // to hide instruction latency + // + // Arguments: + // + // Inputs: + // c_rarg0 - source byte array address + // c_rarg1 - destination byte array address + // c_rarg2 - K (key) in little endian int array + // c_rarg3 - counter vector byte array address + // Linux + // c_rarg4 - input length + // c_rarg5 - saved encryptedCounter start + // rbp + 6 * wordSize - saved used length + // Windows + // rbp + 6 * wordSize - input length + // rbp + 7 * wordSize - saved encryptedCounter start + // rbp + 8 * wordSize - saved used length + // + // Output: + // rax - input length + // + address generate_counterMode_AESCrypt_Parallel() { + assert(UseAES, "need AES instructions and misaligned SSE support"); + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "counterMode_AESCrypt");ShouldNotReachHere(); + address start = __ pc();/* + const Register from = c_rarg0; // source array address + const Register to = c_rarg1; // destination array address + const Register key = c_rarg2; // key array address + const Register counter = c_rarg3; // counter byte array initialized from counter array address + // and updated with the incremented counter in the end +#ifndef _WIN64 + const Register len_reg = c_rarg4; + const Register saved_encCounter_start = c_rarg5; + const Register used_addr = r10; + const Address used_mem(rbp, 2 * wordSize); + const Register used = r11; +#else + const Address len_mem(rbp, 6 * wordSize); // length is on stack on Win64 + const Address saved_encCounter_mem(rbp, 7 * wordSize); // length is on stack on Win64 + const Address used_mem(rbp, 8 * wordSize); // length is on stack on Win64 + const Register len_reg = r10; // pick the first volatile windows register + const Register saved_encCounter_start = r11; + const Register used_addr = r13; + const Register used = r14; +#endif + const Register pos = rax; + + const int PARALLEL_FACTOR = 6; + const XMMRegister xmm_counter_shuf_mask = xmm0; + const XMMRegister xmm_key_shuf_mask = xmm1; // used temporarily to swap key bytes up front + const XMMRegister xmm_curr_counter = xmm2; + + const XMMRegister xmm_key_tmp0 = xmm3; + const XMMRegister xmm_key_tmp1 = xmm4; + + // registers holding the four results in the parallelized loop + const XMMRegister xmm_result0 = xmm5; + const XMMRegister xmm_result1 = xmm6; + const XMMRegister xmm_result2 = xmm7; + const XMMRegister xmm_result3 = xmm8; + const XMMRegister xmm_result4 = xmm9; + const XMMRegister xmm_result5 = xmm10; + + const XMMRegister xmm_from0 = xmm11; + const XMMRegister xmm_from1 = xmm12; + const XMMRegister xmm_from2 = xmm13; + const XMMRegister xmm_from3 = xmm14; //the last one is xmm14. we have to preserve it on WIN64. + const XMMRegister xmm_from4 = xmm3; //reuse xmm3~4. Because xmm_key_tmp0~1 are useless when loading input text + const XMMRegister xmm_from5 = xmm4; + + //for key_128, key_192, key_256 + const int rounds[3] = {10, 12, 14}; + Label L_exit_preLoop, L_preLoop_start; + Label L_multiBlock_loopTop[3]; + Label L_singleBlockLoopTop[3]; + Label L__incCounter[3][6]; //for 6 blocks + Label L__incCounter_single[3]; //for single block, key128, key192, key256 + Label L_processTail_insr[3], L_processTail_4_insr[3], L_processTail_2_insr[3], L_processTail_1_insr[3], L_processTail_exit_insr[3]; + Label L_processTail_extr[3], L_processTail_4_extr[3], L_processTail_2_extr[3], L_processTail_1_extr[3], L_processTail_exit_extr[3]; + + Label L_exit; + + __ enter(); // required for proper stackwalking of RuntimeStub frame + +#ifdef _WIN64 + // allocate spill slots for r13, r14 + enum { + saved_r13_offset, + saved_r14_offset + }; + __ subptr(rsp, 2 * wordSize); + __ movptr(Address(rsp, saved_r13_offset * wordSize), r13); + __ movptr(Address(rsp, saved_r14_offset * wordSize), r14); + + // on win64, fill len_reg from stack position + __ movl(len_reg, len_mem); + __ movptr(saved_encCounter_start, saved_encCounter_mem); + __ movptr(used_addr, used_mem); + __ movl(used, Address(used_addr, 0)); +#else + __ push(len_reg); // Save + __ movptr(used_addr, used_mem); + __ movl(used, Address(used_addr, 0)); +#endif + + __ push(rbx); // Save RBX + __ movdqu(xmm_curr_counter, Address(counter, 0x00)); // initialize counter with initial counter + __ movdqu(xmm_counter_shuf_mask, ExternalAddress(StubRoutines::x86::counter_shuffle_mask_addr()), pos); // pos as scratch + __ pshufb(xmm_curr_counter, xmm_counter_shuf_mask); //counter is shuffled + __ movptr(pos, 0); + + // Use the partially used encrpyted counter from last invocation + __ BIND(L_preLoop_start); + __ cmpptr(used, 16); + __ jcc(Assembler::aboveEqual, L_exit_preLoop); + __ cmpptr(len_reg, 0); + __ jcc(Assembler::lessEqual, L_exit_preLoop); + __ movb(rbx, Address(saved_encCounter_start, used)); + __ xorb(rbx, Address(from, pos)); + __ movb(Address(to, pos), rbx); + __ addptr(pos, 1); + __ addptr(used, 1); + __ subptr(len_reg, 1); + + __ jmp(L_preLoop_start); + + __ BIND(L_exit_preLoop); + __ movl(Address(used_addr, 0), used); + + // key length could be only {11, 13, 15} * 4 = {44, 52, 60} + __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()), rbx); // rbx as scratch + __ movl(rbx, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); + __ cmpl(rbx, 52); + __ jcc(Assembler::equal, L_multiBlock_loopTop[1]); + __ cmpl(rbx, 60); + __ jcc(Assembler::equal, L_multiBlock_loopTop[2]); + +#define CTR_DoSix(opc, src_reg) \ + __ opc(xmm_result0, src_reg); \ + __ opc(xmm_result1, src_reg); \ + __ opc(xmm_result2, src_reg); \ + __ opc(xmm_result3, src_reg); \ + __ opc(xmm_result4, src_reg); \ + __ opc(xmm_result5, src_reg); + + // k == 0 : generate code for key_128 + // k == 1 : generate code for key_192 + // k == 2 : generate code for key_256 + for (int k = 0; k < 3; ++k) { + //multi blocks starts here + __ align(OptoLoopAlignment); + __ BIND(L_multiBlock_loopTop[k]); + __ cmpptr(len_reg, PARALLEL_FACTOR * AESBlockSize); // see if at least PARALLEL_FACTOR blocks left + __ jcc(Assembler::less, L_singleBlockLoopTop[k]); + load_key(xmm_key_tmp0, key, 0x00, xmm_key_shuf_mask); + + //load, then increase counters + CTR_DoSix(movdqa, xmm_curr_counter); + inc_counter(rbx, xmm_result1, 0x01, L__incCounter[k][0]); + inc_counter(rbx, xmm_result2, 0x02, L__incCounter[k][1]); + inc_counter(rbx, xmm_result3, 0x03, L__incCounter[k][2]); + inc_counter(rbx, xmm_result4, 0x04, L__incCounter[k][3]); + inc_counter(rbx, xmm_result5, 0x05, L__incCounter[k][4]); + inc_counter(rbx, xmm_curr_counter, 0x06, L__incCounter[k][5]); + CTR_DoSix(pshufb, xmm_counter_shuf_mask); // after increased, shuffled counters back for PXOR + CTR_DoSix(pxor, xmm_key_tmp0); //PXOR with Round 0 key + + //load two ROUND_KEYs at a time + for (int i = 1; i < rounds[k]; ) { + load_key(xmm_key_tmp1, key, (0x10 * i), xmm_key_shuf_mask); + load_key(xmm_key_tmp0, key, (0x10 * (i+1)), xmm_key_shuf_mask); + CTR_DoSix(aesenc, xmm_key_tmp1); + i++; + if (i != rounds[k]) { + CTR_DoSix(aesenc, xmm_key_tmp0); + } else { + CTR_DoSix(aesenclast, xmm_key_tmp0); + } + i++; + } + + // get next PARALLEL_FACTOR blocks into xmm_result registers + __ movdqu(xmm_from0, Address(from, pos, Address::times_1, 0 * AESBlockSize)); + __ movdqu(xmm_from1, Address(from, pos, Address::times_1, 1 * AESBlockSize)); + __ movdqu(xmm_from2, Address(from, pos, Address::times_1, 2 * AESBlockSize)); + __ movdqu(xmm_from3, Address(from, pos, Address::times_1, 3 * AESBlockSize)); + __ movdqu(xmm_from4, Address(from, pos, Address::times_1, 4 * AESBlockSize)); + __ movdqu(xmm_from5, Address(from, pos, Address::times_1, 5 * AESBlockSize)); + + __ pxor(xmm_result0, xmm_from0); + __ pxor(xmm_result1, xmm_from1); + __ pxor(xmm_result2, xmm_from2); + __ pxor(xmm_result3, xmm_from3); + __ pxor(xmm_result4, xmm_from4); + __ pxor(xmm_result5, xmm_from5); + + // store 6 results into the next 64 bytes of output + __ movdqu(Address(to, pos, Address::times_1, 0 * AESBlockSize), xmm_result0); + __ movdqu(Address(to, pos, Address::times_1, 1 * AESBlockSize), xmm_result1); + __ movdqu(Address(to, pos, Address::times_1, 2 * AESBlockSize), xmm_result2); + __ movdqu(Address(to, pos, Address::times_1, 3 * AESBlockSize), xmm_result3); + __ movdqu(Address(to, pos, Address::times_1, 4 * AESBlockSize), xmm_result4); + __ movdqu(Address(to, pos, Address::times_1, 5 * AESBlockSize), xmm_result5); + + __ addptr(pos, PARALLEL_FACTOR * AESBlockSize); // increase the length of crypt text + __ subptr(len_reg, PARALLEL_FACTOR * AESBlockSize); // decrease the remaining length + __ jmp(L_multiBlock_loopTop[k]); + + // singleBlock starts here + __ align(OptoLoopAlignment); + __ BIND(L_singleBlockLoopTop[k]); + __ cmpptr(len_reg, 0); + __ jcc(Assembler::lessEqual, L_exit); + load_key(xmm_key_tmp0, key, 0x00, xmm_key_shuf_mask); + __ movdqa(xmm_result0, xmm_curr_counter); + inc_counter(rbx, xmm_curr_counter, 0x01, L__incCounter_single[k]); + __ pshufb(xmm_result0, xmm_counter_shuf_mask); + __ pxor(xmm_result0, xmm_key_tmp0); + for (int i = 1; i < rounds[k]; i++) { + load_key(xmm_key_tmp0, key, (0x10 * i), xmm_key_shuf_mask); + __ aesenc(xmm_result0, xmm_key_tmp0); + } + load_key(xmm_key_tmp0, key, (rounds[k] * 0x10), xmm_key_shuf_mask); + __ aesenclast(xmm_result0, xmm_key_tmp0); + __ cmpptr(len_reg, AESBlockSize); + __ jcc(Assembler::less, L_processTail_insr[k]); + __ movdqu(xmm_from0, Address(from, pos, Address::times_1, 0 * AESBlockSize)); + __ pxor(xmm_result0, xmm_from0); + __ movdqu(Address(to, pos, Address::times_1, 0 * AESBlockSize), xmm_result0); + __ addptr(pos, AESBlockSize); + __ subptr(len_reg, AESBlockSize); + __ jmp(L_singleBlockLoopTop[k]); + __ BIND(L_processTail_insr[k]); // Process the tail part of the input array + __ addptr(pos, len_reg); // 1. Insert bytes from src array into xmm_from0 register + __ testptr(len_reg, 8); + __ jcc(Assembler::zero, L_processTail_4_insr[k]); + __ subptr(pos,8); + __ pinsrq(xmm_from0, Address(from, pos), 0); + __ BIND(L_processTail_4_insr[k]); + __ testptr(len_reg, 4); + __ jcc(Assembler::zero, L_processTail_2_insr[k]); + __ subptr(pos,4); + __ pslldq(xmm_from0, 4); + __ pinsrd(xmm_from0, Address(from, pos), 0); + __ BIND(L_processTail_2_insr[k]); + __ testptr(len_reg, 2); + __ jcc(Assembler::zero, L_processTail_1_insr[k]); + __ subptr(pos, 2); + __ pslldq(xmm_from0, 2); + __ pinsrw(xmm_from0, Address(from, pos), 0); + __ BIND(L_processTail_1_insr[k]); + __ testptr(len_reg, 1); + __ jcc(Assembler::zero, L_processTail_exit_insr[k]); + __ subptr(pos, 1); + __ pslldq(xmm_from0, 1); + __ pinsrb(xmm_from0, Address(from, pos), 0); + __ BIND(L_processTail_exit_insr[k]); + + __ movdqu(Address(saved_encCounter_start, 0), xmm_result0); // 2. Perform pxor of the encrypted counter and plaintext Bytes. + __ pxor(xmm_result0, xmm_from0); // Also the encrypted counter is saved for next invocation. + + __ testptr(len_reg, 8); + __ jcc(Assembler::zero, L_processTail_4_extr[k]); // 3. Extract bytes from xmm_result0 into the dest. array + __ pextrq(Address(to, pos), xmm_result0, 0); + __ psrldq(xmm_result0, 8); + __ addptr(pos, 8); + __ BIND(L_processTail_4_extr[k]); + __ testptr(len_reg, 4); + __ jcc(Assembler::zero, L_processTail_2_extr[k]); + __ pextrd(Address(to, pos), xmm_result0, 0); + __ psrldq(xmm_result0, 4); + __ addptr(pos, 4); + __ BIND(L_processTail_2_extr[k]); + __ testptr(len_reg, 2); + __ jcc(Assembler::zero, L_processTail_1_extr[k]); + __ pextrw(Address(to, pos), xmm_result0, 0); + __ psrldq(xmm_result0, 2); + __ addptr(pos, 2); + __ BIND(L_processTail_1_extr[k]); + __ testptr(len_reg, 1); + __ jcc(Assembler::zero, L_processTail_exit_extr[k]); + __ pextrb(Address(to, pos), xmm_result0, 0); + + __ BIND(L_processTail_exit_extr[k]); + __ movl(Address(used_addr, 0), len_reg); + __ jmp(L_exit); + + } + + __ BIND(L_exit); + __ pshufb(xmm_curr_counter, xmm_counter_shuf_mask); //counter is shuffled back. + __ movdqu(Address(counter, 0), xmm_curr_counter); //save counter back + __ pop(rbx); // pop the saved RBX. +#ifdef _WIN64 + __ movl(rax, len_mem); + __ movptr(r13, Address(rsp, saved_r13_offset * wordSize)); + __ movptr(r14, Address(rsp, saved_r14_offset * wordSize)); + __ addptr(rsp, 2 * wordSize); +#else + __ pop(rax); // return 'len' +#endif + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(0);*/ + return start; + } + +/*void roundDec(XMMRegister xmm_reg) { + __ vaesdec(xmm1, xmm1, xmm_reg, Assembler::AVX_512bit); + __ vaesdec(xmm2, xmm2, xmm_reg, Assembler::AVX_512bit); + __ vaesdec(xmm3, xmm3, xmm_reg, Assembler::AVX_512bit); + __ vaesdec(xmm4, xmm4, xmm_reg, Assembler::AVX_512bit); + __ vaesdec(xmm5, xmm5, xmm_reg, Assembler::AVX_512bit); + __ vaesdec(xmm6, xmm6, xmm_reg, Assembler::AVX_512bit); + __ vaesdec(xmm7, xmm7, xmm_reg, Assembler::AVX_512bit); + __ vaesdec(xmm8, xmm8, xmm_reg, Assembler::AVX_512bit); +}*/ + +/*void roundDeclast(XMMRegister xmm_reg) { + __ vaesdeclast(xmm1, xmm1, xmm_reg, Assembler::AVX_512bit); + __ vaesdeclast(xmm2, xmm2, xmm_reg, Assembler::AVX_512bit); + __ vaesdeclast(xmm3, xmm3, xmm_reg, Assembler::AVX_512bit); + __ vaesdeclast(xmm4, xmm4, xmm_reg, Assembler::AVX_512bit); + __ vaesdeclast(xmm5, xmm5, xmm_reg, Assembler::AVX_512bit); + __ vaesdeclast(xmm6, xmm6, xmm_reg, Assembler::AVX_512bit); + __ vaesdeclast(xmm7, xmm7, xmm_reg, Assembler::AVX_512bit); + __ vaesdeclast(xmm8, xmm8, xmm_reg, Assembler::AVX_512bit); +}*/ + +/*void ev_load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask = NULL) { + __ movdqu(xmmdst, Address(key, offset)); + if (xmm_shuf_mask != NULL) { + __ pshufb(xmmdst, xmm_shuf_mask); + } else { + __ pshufb(xmmdst, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); + } + __ evshufi64x2(xmmdst, xmmdst, xmmdst, 0x0, Assembler::AVX_512bit); + + }*/ + +address generate_cipherBlockChaining_decryptVectorAESCrypt() { + //assert(VM_Version::supports_vaes(), "need AES instructions and misaligned SSE support"); + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt");ShouldNotReachHere(); + address start = __ pc();/* + + const Register from = c_rarg0; // source array address + const Register to = c_rarg1; // destination array address + const Register key = c_rarg2; // key array address + const Register rvec = c_rarg3; // r byte array initialized from initvector array address + // and left with the results of the last encryption block +#ifndef _WIN64 + const Register len_reg = c_rarg4; // src len (must be multiple of blocksize 16) +#else + const Address len_mem(rbp, 6 * wordSize); // length is on stack on Win64 + const Register len_reg = r11; // pick the volatile windows register +#endif + + Label Loop, Loop1, L_128, L_256, L_192, KEY_192, KEY_256, Loop2, Lcbc_dec_rem_loop, + Lcbc_dec_rem_last, Lcbc_dec_ret, Lcbc_dec_rem, Lcbc_exit; + + __ enter(); + +#ifdef _WIN64 + // on win64, fill len_reg from stack position + __ movl(len_reg, len_mem); +#else + __ push(len_reg); // Save +#endif + __ push(rbx); + __ vzeroupper(); + + // Temporary variable declaration for swapping key bytes + const XMMRegister xmm_key_shuf_mask = xmm1; + __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); + + // Calculate number of rounds from key size: 44 for 10-rounds, 52 for 12-rounds, 60 for 14-rounds + const Register rounds = rbx; + __ movl(rounds, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); + + const XMMRegister IV = xmm0; + // Load IV and broadcast value to 512-bits + __ evbroadcasti64x2(IV, Address(rvec, 0), Assembler::AVX_512bit); + + // Temporary variables for storing round keys + const XMMRegister RK0 = xmm30; + const XMMRegister RK1 = xmm9; + const XMMRegister RK2 = xmm18; + const XMMRegister RK3 = xmm19; + const XMMRegister RK4 = xmm20; + const XMMRegister RK5 = xmm21; + const XMMRegister RK6 = xmm22; + const XMMRegister RK7 = xmm23; + const XMMRegister RK8 = xmm24; + const XMMRegister RK9 = xmm25; + const XMMRegister RK10 = xmm26; + + // Load and shuffle key + // the java expanded key ordering is rotated one position from what we want + // so we start from 1*16 here and hit 0*16 last + ev_load_key(RK1, key, 1 * 16, xmm_key_shuf_mask); + ev_load_key(RK2, key, 2 * 16, xmm_key_shuf_mask); + ev_load_key(RK3, key, 3 * 16, xmm_key_shuf_mask); + ev_load_key(RK4, key, 4 * 16, xmm_key_shuf_mask); + ev_load_key(RK5, key, 5 * 16, xmm_key_shuf_mask); + ev_load_key(RK6, key, 6 * 16, xmm_key_shuf_mask); + ev_load_key(RK7, key, 7 * 16, xmm_key_shuf_mask); + ev_load_key(RK8, key, 8 * 16, xmm_key_shuf_mask); + ev_load_key(RK9, key, 9 * 16, xmm_key_shuf_mask); + ev_load_key(RK10, key, 10 * 16, xmm_key_shuf_mask); + ev_load_key(RK0, key, 0*16, xmm_key_shuf_mask); + + // Variables for storing source cipher text + const XMMRegister S0 = xmm10; + const XMMRegister S1 = xmm11; + const XMMRegister S2 = xmm12; + const XMMRegister S3 = xmm13; + const XMMRegister S4 = xmm14; + const XMMRegister S5 = xmm15; + const XMMRegister S6 = xmm16; + const XMMRegister S7 = xmm17; + + // Variables for storing decrypted text + const XMMRegister B0 = xmm1; + const XMMRegister B1 = xmm2; + const XMMRegister B2 = xmm3; + const XMMRegister B3 = xmm4; + const XMMRegister B4 = xmm5; + const XMMRegister B5 = xmm6; + const XMMRegister B6 = xmm7; + const XMMRegister B7 = xmm8; + + __ cmpl(rounds, 44); + __ jcc(Assembler::greater, KEY_192); + __ jmp(Loop); + + __ BIND(KEY_192); + const XMMRegister RK11 = xmm27; + const XMMRegister RK12 = xmm28; + ev_load_key(RK11, key, 11*16, xmm_key_shuf_mask); + ev_load_key(RK12, key, 12*16, xmm_key_shuf_mask); + + __ cmpl(rounds, 52); + __ jcc(Assembler::greater, KEY_256); + __ jmp(Loop); + + __ BIND(KEY_256); + const XMMRegister RK13 = xmm29; + const XMMRegister RK14 = xmm31; + ev_load_key(RK13, key, 13*16, xmm_key_shuf_mask); + ev_load_key(RK14, key, 14*16, xmm_key_shuf_mask); + + __ BIND(Loop); + __ cmpl(len_reg, 512); + __ jcc(Assembler::below, Lcbc_dec_rem); + __ BIND(Loop1); + __ subl(len_reg, 512); + __ evmovdquq(S0, Address(from, 0 * 64), Assembler::AVX_512bit); + __ evmovdquq(S1, Address(from, 1 * 64), Assembler::AVX_512bit); + __ evmovdquq(S2, Address(from, 2 * 64), Assembler::AVX_512bit); + __ evmovdquq(S3, Address(from, 3 * 64), Assembler::AVX_512bit); + __ evmovdquq(S4, Address(from, 4 * 64), Assembler::AVX_512bit); + __ evmovdquq(S5, Address(from, 5 * 64), Assembler::AVX_512bit); + __ evmovdquq(S6, Address(from, 6 * 64), Assembler::AVX_512bit); + __ evmovdquq(S7, Address(from, 7 * 64), Assembler::AVX_512bit); + __ leaq(from, Address(from, 8 * 64)); + + __ evpxorq(B0, S0, RK1, Assembler::AVX_512bit); + __ evpxorq(B1, S1, RK1, Assembler::AVX_512bit); + __ evpxorq(B2, S2, RK1, Assembler::AVX_512bit); + __ evpxorq(B3, S3, RK1, Assembler::AVX_512bit); + __ evpxorq(B4, S4, RK1, Assembler::AVX_512bit); + __ evpxorq(B5, S5, RK1, Assembler::AVX_512bit); + __ evpxorq(B6, S6, RK1, Assembler::AVX_512bit); + __ evpxorq(B7, S7, RK1, Assembler::AVX_512bit); + + __ evalignq(IV, S0, IV, 0x06); + __ evalignq(S0, S1, S0, 0x06); + __ evalignq(S1, S2, S1, 0x06); + __ evalignq(S2, S3, S2, 0x06); + __ evalignq(S3, S4, S3, 0x06); + __ evalignq(S4, S5, S4, 0x06); + __ evalignq(S5, S6, S5, 0x06); + __ evalignq(S6, S7, S6, 0x06); + + roundDec(RK2); + roundDec(RK3); + roundDec(RK4); + roundDec(RK5); + roundDec(RK6); + roundDec(RK7); + roundDec(RK8); + roundDec(RK9); + roundDec(RK10); + + __ cmpl(rounds, 44); + __ jcc(Assembler::belowEqual, L_128); + roundDec(RK11); + roundDec(RK12); + + __ cmpl(rounds, 52); + __ jcc(Assembler::belowEqual, L_192); + roundDec(RK13); + roundDec(RK14); + + __ BIND(L_256); + roundDeclast(RK0); + __ jmp(Loop2); + + __ BIND(L_128); + roundDeclast(RK0); + __ jmp(Loop2); + + __ BIND(L_192); + roundDeclast(RK0); + + __ BIND(Loop2); + __ evpxorq(B0, B0, IV, Assembler::AVX_512bit); + __ evpxorq(B1, B1, S0, Assembler::AVX_512bit); + __ evpxorq(B2, B2, S1, Assembler::AVX_512bit); + __ evpxorq(B3, B3, S2, Assembler::AVX_512bit); + __ evpxorq(B4, B4, S3, Assembler::AVX_512bit); + __ evpxorq(B5, B5, S4, Assembler::AVX_512bit); + __ evpxorq(B6, B6, S5, Assembler::AVX_512bit); + __ evpxorq(B7, B7, S6, Assembler::AVX_512bit); + __ evmovdquq(IV, S7, Assembler::AVX_512bit); + + __ evmovdquq(Address(to, 0 * 64), B0, Assembler::AVX_512bit); + __ evmovdquq(Address(to, 1 * 64), B1, Assembler::AVX_512bit); + __ evmovdquq(Address(to, 2 * 64), B2, Assembler::AVX_512bit); + __ evmovdquq(Address(to, 3 * 64), B3, Assembler::AVX_512bit); + __ evmovdquq(Address(to, 4 * 64), B4, Assembler::AVX_512bit); + __ evmovdquq(Address(to, 5 * 64), B5, Assembler::AVX_512bit); + __ evmovdquq(Address(to, 6 * 64), B6, Assembler::AVX_512bit); + __ evmovdquq(Address(to, 7 * 64), B7, Assembler::AVX_512bit); + __ leaq(to, Address(to, 8 * 64)); + __ jmp(Loop); + + __ BIND(Lcbc_dec_rem); + __ evshufi64x2(IV, IV, IV, 0x03, Assembler::AVX_512bit); + + __ BIND(Lcbc_dec_rem_loop); + __ subl(len_reg, 16); + __ jcc(Assembler::carrySet, Lcbc_dec_ret); + + __ movdqu(S0, Address(from, 0)); + __ evpxorq(B0, S0, RK1, Assembler::AVX_512bit); + __ vaesdec(B0, B0, RK2, Assembler::AVX_512bit); + __ vaesdec(B0, B0, RK3, Assembler::AVX_512bit); + __ vaesdec(B0, B0, RK4, Assembler::AVX_512bit); + __ vaesdec(B0, B0, RK5, Assembler::AVX_512bit); + __ vaesdec(B0, B0, RK6, Assembler::AVX_512bit); + __ vaesdec(B0, B0, RK7, Assembler::AVX_512bit); + __ vaesdec(B0, B0, RK8, Assembler::AVX_512bit); + __ vaesdec(B0, B0, RK9, Assembler::AVX_512bit); + __ vaesdec(B0, B0, RK10, Assembler::AVX_512bit); + __ cmpl(rounds, 44); + __ jcc(Assembler::belowEqual, Lcbc_dec_rem_last); + + __ vaesdec(B0, B0, RK11, Assembler::AVX_512bit); + __ vaesdec(B0, B0, RK12, Assembler::AVX_512bit); + __ cmpl(rounds, 52); + __ jcc(Assembler::belowEqual, Lcbc_dec_rem_last); + + __ vaesdec(B0, B0, RK13, Assembler::AVX_512bit); + __ vaesdec(B0, B0, RK14, Assembler::AVX_512bit); + + __ BIND(Lcbc_dec_rem_last); + __ vaesdeclast(B0, B0, RK0, Assembler::AVX_512bit); + + __ evpxorq(B0, B0, IV, Assembler::AVX_512bit); + __ evmovdquq(IV, S0, Assembler::AVX_512bit); + __ movdqu(Address(to, 0), B0); + __ leaq(from, Address(from, 16)); + __ leaq(to, Address(to, 16)); + __ jmp(Lcbc_dec_rem_loop); + + __ BIND(Lcbc_dec_ret); + __ movdqu(Address(rvec, 0), IV); + + // Zero out the round keys + __ evpxorq(RK0, RK0, RK0, Assembler::AVX_512bit); + __ evpxorq(RK1, RK1, RK1, Assembler::AVX_512bit); + __ evpxorq(RK2, RK2, RK2, Assembler::AVX_512bit); + __ evpxorq(RK3, RK3, RK3, Assembler::AVX_512bit); + __ evpxorq(RK4, RK4, RK4, Assembler::AVX_512bit); + __ evpxorq(RK5, RK5, RK5, Assembler::AVX_512bit); + __ evpxorq(RK6, RK6, RK6, Assembler::AVX_512bit); + __ evpxorq(RK7, RK7, RK7, Assembler::AVX_512bit); + __ evpxorq(RK8, RK8, RK8, Assembler::AVX_512bit); + __ evpxorq(RK9, RK9, RK9, Assembler::AVX_512bit); + __ evpxorq(RK10, RK10, RK10, Assembler::AVX_512bit); + __ cmpl(rounds, 44); + __ jcc(Assembler::belowEqual, Lcbc_exit); + __ evpxorq(RK11, RK11, RK11, Assembler::AVX_512bit); + __ evpxorq(RK12, RK12, RK12, Assembler::AVX_512bit); + __ cmpl(rounds, 52); + __ jcc(Assembler::belowEqual, Lcbc_exit); + __ evpxorq(RK13, RK13, RK13, Assembler::AVX_512bit); + __ evpxorq(RK14, RK14, RK14, Assembler::AVX_512bit); + + __ BIND(Lcbc_exit); + __ pop(rbx); +#ifdef _WIN64 + __ movl(rax, len_mem); +#else + __ pop(rax); // return length +#endif + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(0);*/ + return start; +} + + // byte swap x86 long + address generate_ghash_long_swap_mask() { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "ghash_long_swap_mask"); + address start = __ pc();ShouldNotReachHere(); +// __ emit_data64(0x0f0e0d0c0b0a0908, relocInfo::none ); +// __ emit_data64(0x0706050403020100, relocInfo::none ); + return start; + } + + // byte swap x86 byte array + address generate_ghash_byte_swap_mask() { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "ghash_byte_swap_mask"); + address start = __ pc();ShouldNotReachHere(); +// __ emit_data64(0x08090a0b0c0d0e0f, relocInfo::none ); +// __ emit_data64(0x0001020304050607, relocInfo::none ); + return start; + } + + /* Single and multi-block ghash operations */ + address generate_ghash_processBlocks() { + __ align(CodeEntryAlignment); + Label L_ghash_loop, L_exit; + StubCodeMark mark(this, "StubRoutines", "ghash_processBlocks");ShouldNotReachHere(); + address start = __ pc();/* + + const Register state = c_rarg0; + const Register subkeyH = c_rarg1; + const Register data = c_rarg2; + const Register blocks = c_rarg3; + + const XMMRegister xmm_temp0 = xmm0; + const XMMRegister xmm_temp1 = xmm1; + const XMMRegister xmm_temp2 = xmm2; + const XMMRegister xmm_temp3 = xmm3; + const XMMRegister xmm_temp4 = xmm4; + const XMMRegister xmm_temp5 = xmm5; + const XMMRegister xmm_temp6 = xmm6; + const XMMRegister xmm_temp7 = xmm7; + const XMMRegister xmm_temp8 = xmm8; + const XMMRegister xmm_temp9 = xmm9; + const XMMRegister xmm_temp10 = xmm10; + + __ enter(); + + __ movdqu(xmm_temp10, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr())); + + __ movdqu(xmm_temp0, Address(state, 0)); + __ pshufb(xmm_temp0, xmm_temp10); + + + __ BIND(L_ghash_loop); + __ movdqu(xmm_temp2, Address(data, 0)); + __ pshufb(xmm_temp2, ExternalAddress(StubRoutines::x86::ghash_byte_swap_mask_addr())); + + __ movdqu(xmm_temp1, Address(subkeyH, 0)); + __ pshufb(xmm_temp1, xmm_temp10); + + __ pxor(xmm_temp0, xmm_temp2); + + // + // Multiply with the hash key + // + __ movdqu(xmm_temp3, xmm_temp0); + __ pclmulqdq(xmm_temp3, xmm_temp1, 0); // xmm3 holds a0*b0 + __ movdqu(xmm_temp4, xmm_temp0); + __ pclmulqdq(xmm_temp4, xmm_temp1, 16); // xmm4 holds a0*b1 + + __ movdqu(xmm_temp5, xmm_temp0); + __ pclmulqdq(xmm_temp5, xmm_temp1, 1); // xmm5 holds a1*b0 + __ movdqu(xmm_temp6, xmm_temp0); + __ pclmulqdq(xmm_temp6, xmm_temp1, 17); // xmm6 holds a1*b1 + + __ pxor(xmm_temp4, xmm_temp5); // xmm4 holds a0*b1 + a1*b0 + + __ movdqu(xmm_temp5, xmm_temp4); // move the contents of xmm4 to xmm5 + __ psrldq(xmm_temp4, 8); // shift by xmm4 64 bits to the right + __ pslldq(xmm_temp5, 8); // shift by xmm5 64 bits to the left + __ pxor(xmm_temp3, xmm_temp5); + __ pxor(xmm_temp6, xmm_temp4); // Register pair holds the result + // of the carry-less multiplication of + // xmm0 by xmm1. + + // We shift the result of the multiplication by one bit position + // to the left to cope for the fact that the bits are reversed. + __ movdqu(xmm_temp7, xmm_temp3); + __ movdqu(xmm_temp8, xmm_temp6); + __ pslld(xmm_temp3, 1); + __ pslld(xmm_temp6, 1); + __ psrld(xmm_temp7, 31); + __ psrld(xmm_temp8, 31); + __ movdqu(xmm_temp9, xmm_temp7); + __ pslldq(xmm_temp8, 4); + __ pslldq(xmm_temp7, 4); + __ psrldq(xmm_temp9, 12); + __ por(xmm_temp3, xmm_temp7); + __ por(xmm_temp6, xmm_temp8); + __ por(xmm_temp6, xmm_temp9); + + // + // First phase of the reduction + // + // Move xmm3 into xmm7, xmm8, xmm9 in order to perform the shifts + // independently. + __ movdqu(xmm_temp7, xmm_temp3); + __ movdqu(xmm_temp8, xmm_temp3); + __ movdqu(xmm_temp9, xmm_temp3); + __ pslld(xmm_temp7, 31); // packed right shift shifting << 31 + __ pslld(xmm_temp8, 30); // packed right shift shifting << 30 + __ pslld(xmm_temp9, 25); // packed right shift shifting << 25 + __ pxor(xmm_temp7, xmm_temp8); // xor the shifted versions + __ pxor(xmm_temp7, xmm_temp9); + __ movdqu(xmm_temp8, xmm_temp7); + __ pslldq(xmm_temp7, 12); + __ psrldq(xmm_temp8, 4); + __ pxor(xmm_temp3, xmm_temp7); // first phase of the reduction complete + + // + // Second phase of the reduction + // + // Make 3 copies of xmm3 in xmm2, xmm4, xmm5 for doing these + // shift operations. + __ movdqu(xmm_temp2, xmm_temp3); + __ movdqu(xmm_temp4, xmm_temp3); + __ movdqu(xmm_temp5, xmm_temp3); + __ psrld(xmm_temp2, 1); // packed left shifting >> 1 + __ psrld(xmm_temp4, 2); // packed left shifting >> 2 + __ psrld(xmm_temp5, 7); // packed left shifting >> 7 + __ pxor(xmm_temp2, xmm_temp4); // xor the shifted versions + __ pxor(xmm_temp2, xmm_temp5); + __ pxor(xmm_temp2, xmm_temp8); + __ pxor(xmm_temp3, xmm_temp2); + __ pxor(xmm_temp6, xmm_temp3); // the result is in xmm6 + + __ decrement(blocks); + __ jcc(Assembler::zero, L_exit); + __ movdqu(xmm_temp0, xmm_temp6); + __ addptr(data, 16); + __ jmp(L_ghash_loop); + + __ BIND(L_exit); + __ pshufb(xmm_temp6, xmm_temp10); // Byte swap 16-byte result + __ movdqu(Address(state, 0), xmm_temp6); // store the result + __ leave(); + __ ret(0);*/ + return start; + } + + //base64 character set + address base64_charset_addr() { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "base64_charset");ShouldNotReachHere(); + address start = __ pc();/* + __ emit_data64(0x0000004200000041, relocInfo::none); + __ emit_data64(0x0000004400000043, relocInfo::none); + __ emit_data64(0x0000004600000045, relocInfo::none); + __ emit_data64(0x0000004800000047, relocInfo::none); + __ emit_data64(0x0000004a00000049, relocInfo::none); + __ emit_data64(0x0000004c0000004b, relocInfo::none); + __ emit_data64(0x0000004e0000004d, relocInfo::none); + __ emit_data64(0x000000500000004f, relocInfo::none); + __ emit_data64(0x0000005200000051, relocInfo::none); + __ emit_data64(0x0000005400000053, relocInfo::none); + __ emit_data64(0x0000005600000055, relocInfo::none); + __ emit_data64(0x0000005800000057, relocInfo::none); + __ emit_data64(0x0000005a00000059, relocInfo::none); + __ emit_data64(0x0000006200000061, relocInfo::none); + __ emit_data64(0x0000006400000063, relocInfo::none); + __ emit_data64(0x0000006600000065, relocInfo::none); + __ emit_data64(0x0000006800000067, relocInfo::none); + __ emit_data64(0x0000006a00000069, relocInfo::none); + __ emit_data64(0x0000006c0000006b, relocInfo::none); + __ emit_data64(0x0000006e0000006d, relocInfo::none); + __ emit_data64(0x000000700000006f, relocInfo::none); + __ emit_data64(0x0000007200000071, relocInfo::none); + __ emit_data64(0x0000007400000073, relocInfo::none); + __ emit_data64(0x0000007600000075, relocInfo::none); + __ emit_data64(0x0000007800000077, relocInfo::none); + __ emit_data64(0x0000007a00000079, relocInfo::none); + __ emit_data64(0x0000003100000030, relocInfo::none); + __ emit_data64(0x0000003300000032, relocInfo::none); + __ emit_data64(0x0000003500000034, relocInfo::none); + __ emit_data64(0x0000003700000036, relocInfo::none); + __ emit_data64(0x0000003900000038, relocInfo::none); + __ emit_data64(0x0000002f0000002b, relocInfo::none);*/ + return start; + } + + //base64 url character set + address base64url_charset_addr() { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "base64url_charset");ShouldNotReachHere(); + address start = __ pc();/* + __ emit_data64(0x0000004200000041, relocInfo::none); + __ emit_data64(0x0000004400000043, relocInfo::none); + __ emit_data64(0x0000004600000045, relocInfo::none); + __ emit_data64(0x0000004800000047, relocInfo::none); + __ emit_data64(0x0000004a00000049, relocInfo::none); + __ emit_data64(0x0000004c0000004b, relocInfo::none); + __ emit_data64(0x0000004e0000004d, relocInfo::none); + __ emit_data64(0x000000500000004f, relocInfo::none); + __ emit_data64(0x0000005200000051, relocInfo::none); + __ emit_data64(0x0000005400000053, relocInfo::none); + __ emit_data64(0x0000005600000055, relocInfo::none); + __ emit_data64(0x0000005800000057, relocInfo::none); + __ emit_data64(0x0000005a00000059, relocInfo::none); + __ emit_data64(0x0000006200000061, relocInfo::none); + __ emit_data64(0x0000006400000063, relocInfo::none); + __ emit_data64(0x0000006600000065, relocInfo::none); + __ emit_data64(0x0000006800000067, relocInfo::none); + __ emit_data64(0x0000006a00000069, relocInfo::none); + __ emit_data64(0x0000006c0000006b, relocInfo::none); + __ emit_data64(0x0000006e0000006d, relocInfo::none); + __ emit_data64(0x000000700000006f, relocInfo::none); + __ emit_data64(0x0000007200000071, relocInfo::none); + __ emit_data64(0x0000007400000073, relocInfo::none); + __ emit_data64(0x0000007600000075, relocInfo::none); + __ emit_data64(0x0000007800000077, relocInfo::none); + __ emit_data64(0x0000007a00000079, relocInfo::none); + __ emit_data64(0x0000003100000030, relocInfo::none); + __ emit_data64(0x0000003300000032, relocInfo::none); + __ emit_data64(0x0000003500000034, relocInfo::none); + __ emit_data64(0x0000003700000036, relocInfo::none); + __ emit_data64(0x0000003900000038, relocInfo::none); + __ emit_data64(0x0000005f0000002d, relocInfo::none); +*/ + return start; + } + + address base64_bswap_mask_addr() { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "bswap_mask_base64");ShouldNotReachHere(); + address start = __ pc();/* + __ emit_data64(0x0504038002010080, relocInfo::none); + __ emit_data64(0x0b0a098008070680, relocInfo::none); + __ emit_data64(0x0908078006050480, relocInfo::none); + __ emit_data64(0x0f0e0d800c0b0a80, relocInfo::none); + __ emit_data64(0x0605048003020180, relocInfo::none); + __ emit_data64(0x0c0b0a8009080780, relocInfo::none); + __ emit_data64(0x0504038002010080, relocInfo::none); + __ emit_data64(0x0b0a098008070680, relocInfo::none); +*/ + return start; + } + + address base64_right_shift_mask_addr() { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "right_shift_mask");ShouldNotReachHere(); + address start = __ pc();/* + __ emit_data64(0x0006000400020000, relocInfo::none); + __ emit_data64(0x0006000400020000, relocInfo::none); + __ emit_data64(0x0006000400020000, relocInfo::none); + __ emit_data64(0x0006000400020000, relocInfo::none); + __ emit_data64(0x0006000400020000, relocInfo::none); + __ emit_data64(0x0006000400020000, relocInfo::none); + __ emit_data64(0x0006000400020000, relocInfo::none); + __ emit_data64(0x0006000400020000, relocInfo::none); +*/ + return start; + } + + address base64_left_shift_mask_addr() { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "left_shift_mask");ShouldNotReachHere(); + address start = __ pc();/* + __ emit_data64(0x0000000200040000, relocInfo::none); + __ emit_data64(0x0000000200040000, relocInfo::none); + __ emit_data64(0x0000000200040000, relocInfo::none); + __ emit_data64(0x0000000200040000, relocInfo::none); + __ emit_data64(0x0000000200040000, relocInfo::none); + __ emit_data64(0x0000000200040000, relocInfo::none); + __ emit_data64(0x0000000200040000, relocInfo::none); + __ emit_data64(0x0000000200040000, relocInfo::none); +*/ + return start; + } + + address base64_and_mask_addr() { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "and_mask");ShouldNotReachHere(); + address start = __ pc();/* + __ emit_data64(0x3f003f003f000000, relocInfo::none); + __ emit_data64(0x3f003f003f000000, relocInfo::none); + __ emit_data64(0x3f003f003f000000, relocInfo::none); + __ emit_data64(0x3f003f003f000000, relocInfo::none); + __ emit_data64(0x3f003f003f000000, relocInfo::none); + __ emit_data64(0x3f003f003f000000, relocInfo::none); + __ emit_data64(0x3f003f003f000000, relocInfo::none); + __ emit_data64(0x3f003f003f000000, relocInfo::none);*/ + return start; + } + + address base64_gather_mask_addr() { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "gather_mask");ShouldNotReachHere(); + address start = __ pc(); +// __ emit_data64(0xffffffffffffffff, relocInfo::none); + return start; + } + +// Code for generating Base64 encoding. +// Intrinsic function prototype in Base64.java: +// private void encodeBlock(byte[] src, int sp, int sl, byte[] dst, int dp, boolean isURL) { + address generate_base64_encodeBlock() { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "implEncode");ShouldNotReachHere(); + address start = __ pc();/* + __ enter(); + + // Save callee-saved registers before using them + __ push(r12); + __ push(r13); + __ push(r14); + __ push(r15); + + // arguments + const Register source = c_rarg0; // Source Array + const Register start_offset = c_rarg1; // start offset + const Register end_offset = c_rarg2; // end offset + const Register dest = c_rarg3; // destination array + +#ifndef _WIN64 + const Register dp = c_rarg4; // Position for writing to dest array + const Register isURL = c_rarg5;// Base64 or URL character set +#else + const Address dp_mem(rbp, 6 * wordSize); // length is on stack on Win64 + const Address isURL_mem(rbp, 7 * wordSize); + const Register isURL = r10; // pick the volatile windows register + const Register dp = r12; + __ movl(dp, dp_mem); + __ movl(isURL, isURL_mem); +#endif + + const Register length = r14; + Label L_process80, L_process32, L_process3, L_exit, L_processdata; + + // calculate length from offsets + __ movl(length, end_offset); + __ subl(length, start_offset); + __ cmpl(length, 0); + __ jcc(Assembler::lessEqual, L_exit); + + __ lea(r11, ExternalAddress(StubRoutines::x86::base64_charset_addr())); + // check if base64 charset(isURL=0) or base64 url charset(isURL=1) needs to be loaded + __ cmpl(isURL, 0); + __ jcc(Assembler::equal, L_processdata); + __ lea(r11, ExternalAddress(StubRoutines::x86::base64url_charset_addr())); + + // load masks required for encoding data + __ BIND(L_processdata); + __ movdqu(xmm16, ExternalAddress(StubRoutines::x86::base64_gather_mask_addr())); + // Set 64 bits of K register. + __ evpcmpeqb(k3, xmm16, xmm16, Assembler::AVX_512bit); + __ evmovdquq(xmm12, ExternalAddress(StubRoutines::x86::base64_bswap_mask_addr()), Assembler::AVX_256bit, r13); + __ evmovdquq(xmm13, ExternalAddress(StubRoutines::x86::base64_right_shift_mask_addr()), Assembler::AVX_512bit, r13); + __ evmovdquq(xmm14, ExternalAddress(StubRoutines::x86::base64_left_shift_mask_addr()), Assembler::AVX_512bit, r13); + __ evmovdquq(xmm15, ExternalAddress(StubRoutines::x86::base64_and_mask_addr()), Assembler::AVX_512bit, r13); + + // Vector Base64 implementation, producing 96 bytes of encoded data + __ BIND(L_process80); + __ cmpl(length, 80); + __ jcc(Assembler::below, L_process32); + __ evmovdquq(xmm0, Address(source, start_offset, Address::times_1, 0), Assembler::AVX_256bit); + __ evmovdquq(xmm1, Address(source, start_offset, Address::times_1, 24), Assembler::AVX_256bit); + __ evmovdquq(xmm2, Address(source, start_offset, Address::times_1, 48), Assembler::AVX_256bit); + + //permute the input data in such a manner that we have continuity of the source + __ vpermq(xmm3, xmm0, 148, Assembler::AVX_256bit); + __ vpermq(xmm4, xmm1, 148, Assembler::AVX_256bit); + __ vpermq(xmm5, xmm2, 148, Assembler::AVX_256bit); + + //shuffle input and group 3 bytes of data and to it add 0 as the 4th byte. + //we can deal with 12 bytes at a time in a 128 bit register + __ vpshufb(xmm3, xmm3, xmm12, Assembler::AVX_256bit); + __ vpshufb(xmm4, xmm4, xmm12, Assembler::AVX_256bit); + __ vpshufb(xmm5, xmm5, xmm12, Assembler::AVX_256bit); + + //convert byte to word. Each 128 bit register will have 6 bytes for processing + __ vpmovzxbw(xmm3, xmm3, Assembler::AVX_512bit); + __ vpmovzxbw(xmm4, xmm4, Assembler::AVX_512bit); + __ vpmovzxbw(xmm5, xmm5, Assembler::AVX_512bit); + + // Extract bits in the following pattern 6, 4+2, 2+4, 6 to convert 3, 8 bit numbers to 4, 6 bit numbers + __ evpsrlvw(xmm0, xmm3, xmm13, Assembler::AVX_512bit); + __ evpsrlvw(xmm1, xmm4, xmm13, Assembler::AVX_512bit); + __ evpsrlvw(xmm2, xmm5, xmm13, Assembler::AVX_512bit); + + __ evpsllvw(xmm3, xmm3, xmm14, Assembler::AVX_512bit); + __ evpsllvw(xmm4, xmm4, xmm14, Assembler::AVX_512bit); + __ evpsllvw(xmm5, xmm5, xmm14, Assembler::AVX_512bit); + + __ vpsrlq(xmm0, xmm0, 8, Assembler::AVX_512bit); + __ vpsrlq(xmm1, xmm1, 8, Assembler::AVX_512bit); + __ vpsrlq(xmm2, xmm2, 8, Assembler::AVX_512bit); + + __ vpsllq(xmm3, xmm3, 8, Assembler::AVX_512bit); + __ vpsllq(xmm4, xmm4, 8, Assembler::AVX_512bit); + __ vpsllq(xmm5, xmm5, 8, Assembler::AVX_512bit); + + __ vpandq(xmm3, xmm3, xmm15, Assembler::AVX_512bit); + __ vpandq(xmm4, xmm4, xmm15, Assembler::AVX_512bit); + __ vpandq(xmm5, xmm5, xmm15, Assembler::AVX_512bit); + + // Get the final 4*6 bits base64 encoding + __ vporq(xmm3, xmm3, xmm0, Assembler::AVX_512bit); + __ vporq(xmm4, xmm4, xmm1, Assembler::AVX_512bit); + __ vporq(xmm5, xmm5, xmm2, Assembler::AVX_512bit); + + // Shift + __ vpsrlq(xmm3, xmm3, 8, Assembler::AVX_512bit); + __ vpsrlq(xmm4, xmm4, 8, Assembler::AVX_512bit); + __ vpsrlq(xmm5, xmm5, 8, Assembler::AVX_512bit); + + // look up 6 bits in the base64 character set to fetch the encoding + // we are converting word to dword as gather instructions need dword indices for looking up encoding + __ vextracti64x4(xmm6, xmm3, 0); + __ vpmovzxwd(xmm0, xmm6, Assembler::AVX_512bit); + __ vextracti64x4(xmm6, xmm3, 1); + __ vpmovzxwd(xmm1, xmm6, Assembler::AVX_512bit); + + __ vextracti64x4(xmm6, xmm4, 0); + __ vpmovzxwd(xmm2, xmm6, Assembler::AVX_512bit); + __ vextracti64x4(xmm6, xmm4, 1); + __ vpmovzxwd(xmm3, xmm6, Assembler::AVX_512bit); + + __ vextracti64x4(xmm4, xmm5, 0); + __ vpmovzxwd(xmm6, xmm4, Assembler::AVX_512bit); + + __ vextracti64x4(xmm4, xmm5, 1); + __ vpmovzxwd(xmm7, xmm4, Assembler::AVX_512bit); + + __ kmovql(k2, k3); + __ evpgatherdd(xmm4, k2, Address(r11, xmm0, Address::times_4, 0), Assembler::AVX_512bit); + __ kmovql(k2, k3); + __ evpgatherdd(xmm5, k2, Address(r11, xmm1, Address::times_4, 0), Assembler::AVX_512bit); + __ kmovql(k2, k3); + __ evpgatherdd(xmm8, k2, Address(r11, xmm2, Address::times_4, 0), Assembler::AVX_512bit); + __ kmovql(k2, k3); + __ evpgatherdd(xmm9, k2, Address(r11, xmm3, Address::times_4, 0), Assembler::AVX_512bit); + __ kmovql(k2, k3); + __ evpgatherdd(xmm10, k2, Address(r11, xmm6, Address::times_4, 0), Assembler::AVX_512bit); + __ kmovql(k2, k3); + __ evpgatherdd(xmm11, k2, Address(r11, xmm7, Address::times_4, 0), Assembler::AVX_512bit); + + //Down convert dword to byte. Final output is 16*6 = 96 bytes long + __ evpmovdb(Address(dest, dp, Address::times_1, 0), xmm4, Assembler::AVX_512bit); + __ evpmovdb(Address(dest, dp, Address::times_1, 16), xmm5, Assembler::AVX_512bit); + __ evpmovdb(Address(dest, dp, Address::times_1, 32), xmm8, Assembler::AVX_512bit); + __ evpmovdb(Address(dest, dp, Address::times_1, 48), xmm9, Assembler::AVX_512bit); + __ evpmovdb(Address(dest, dp, Address::times_1, 64), xmm10, Assembler::AVX_512bit); + __ evpmovdb(Address(dest, dp, Address::times_1, 80), xmm11, Assembler::AVX_512bit); + + __ addq(dest, 96); + __ addq(source, 72); + __ subq(length, 72); + __ jmp(L_process80); + + // Vector Base64 implementation generating 32 bytes of encoded data + __ BIND(L_process32); + __ cmpl(length, 32); + __ jcc(Assembler::below, L_process3); + __ evmovdquq(xmm0, Address(source, start_offset), Assembler::AVX_256bit); + __ vpermq(xmm0, xmm0, 148, Assembler::AVX_256bit); + __ vpshufb(xmm6, xmm0, xmm12, Assembler::AVX_256bit); + __ vpmovzxbw(xmm6, xmm6, Assembler::AVX_512bit); + __ evpsrlvw(xmm2, xmm6, xmm13, Assembler::AVX_512bit); + __ evpsllvw(xmm3, xmm6, xmm14, Assembler::AVX_512bit); + + __ vpsrlq(xmm2, xmm2, 8, Assembler::AVX_512bit); + __ vpsllq(xmm3, xmm3, 8, Assembler::AVX_512bit); + __ vpandq(xmm3, xmm3, xmm15, Assembler::AVX_512bit); + __ vporq(xmm1, xmm2, xmm3, Assembler::AVX_512bit); + __ vpsrlq(xmm1, xmm1, 8, Assembler::AVX_512bit); + __ vextracti64x4(xmm9, xmm1, 0); + __ vpmovzxwd(xmm6, xmm9, Assembler::AVX_512bit); + __ vextracti64x4(xmm9, xmm1, 1); + __ vpmovzxwd(xmm5, xmm9, Assembler::AVX_512bit); + __ kmovql(k2, k3); + __ evpgatherdd(xmm8, k2, Address(r11, xmm6, Address::times_4, 0), Assembler::AVX_512bit); + __ kmovql(k2, k3); + __ evpgatherdd(xmm10, k2, Address(r11, xmm5, Address::times_4, 0), Assembler::AVX_512bit); + __ evpmovdb(Address(dest, dp, Address::times_1, 0), xmm8, Assembler::AVX_512bit); + __ evpmovdb(Address(dest, dp, Address::times_1, 16), xmm10, Assembler::AVX_512bit); + __ subq(length, 24); + __ addq(dest, 32); + __ addq(source, 24); + __ jmp(L_process32); +*/ + // Scalar data processing takes 3 bytes at a time and produces 4 bytes of encoded data + /* This code corresponds to the scalar version of the following snippet in Base64.java + ** int bits = (src[sp0++] & 0xff) << 16 |(src[sp0++] & 0xff) << 8 |(src[sp0++] & 0xff); + ** dst[dp0++] = (byte)base64[(bits >> > 18) & 0x3f]; + ** dst[dp0++] = (byte)base64[(bits >> > 12) & 0x3f]; + ** dst[dp0++] = (byte)base64[(bits >> > 6) & 0x3f]; + ** dst[dp0++] = (byte)base64[bits & 0x3f];*//* + __ BIND(L_process3); + __ cmpl(length, 3); + __ jcc(Assembler::below, L_exit); + // Read 1 byte at a time + __ movzbl(rax, Address(source, start_offset)); + __ shll(rax, 0x10); + __ movl(r15, rax); + __ movzbl(rax, Address(source, start_offset, Address::times_1, 1)); + __ shll(rax, 0x8); + __ movzwl(rax, rax); + __ orl(r15, rax); + __ movzbl(rax, Address(source, start_offset, Address::times_1, 2)); + __ orl(rax, r15); + // Save 3 bytes read in r15 + __ movl(r15, rax); + __ shrl(rax, 0x12); + __ andl(rax, 0x3f); + // rax contains the index, r11 contains base64 lookup table + __ movb(rax, Address(r11, rax, Address::times_4)); + // Write the encoded byte to destination + __ movb(Address(dest, dp, Address::times_1, 0), rax); + __ movl(rax, r15); + __ shrl(rax, 0xc); + __ andl(rax, 0x3f); + __ movb(rax, Address(r11, rax, Address::times_4)); + __ movb(Address(dest, dp, Address::times_1, 1), rax); + __ movl(rax, r15); + __ shrl(rax, 0x6); + __ andl(rax, 0x3f); + __ movb(rax, Address(r11, rax, Address::times_4)); + __ movb(Address(dest, dp, Address::times_1, 2), rax); + __ movl(rax, r15); + __ andl(rax, 0x3f); + __ movb(rax, Address(r11, rax, Address::times_4)); + __ movb(Address(dest, dp, Address::times_1, 3), rax); + __ subl(length, 3); + __ addq(dest, 4); + __ addq(source, 3); + __ jmp(L_process3); + __ BIND(L_exit); + __ pop(r15); + __ pop(r14); + __ pop(r13); + __ pop(r12); + __ leave(); + __ ret(0);*/ + return start; + } + + /** + * Arguments: + * + * Inputs: + * c_rarg0 - int crc + * c_rarg1 - byte* buf + * c_rarg2 - int length + * + * Ouput: + * rax - int crc result + */ + address generate_updateBytesCRC32() { + assert(UseCRC32Intrinsics, "need AVX and CLMUL instructions"); + + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32");ShouldNotReachHere(); + + address start = __ pc(); + __ stop("generate_updateBytesCRC32"); + /* + // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...) + // Unix: rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...) + // rscratch1: r10 + const Register crc = c_rarg0; // crc + const Register buf = c_rarg1; // source java byte array address + const Register len = c_rarg2; // length + const Register table = c_rarg3; // crc_table address (reuse register) + const Register tmp = r11; + assert_different_registers(crc, buf, len, table, tmp, rax); + + BLOCK_COMMENT("Entry:"); + __ enter(); // required for proper stackwalking of RuntimeStub frame + + __ kernel_crc32(crc, buf, len, table, tmp); + + __ movl(rax, crc); + __ vzeroupper(); + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(0); +*/ + return start; + } + + /** + * Arguments: + * + * Inputs: + * c_rarg0 - int crc + * c_rarg1 - byte* buf + * c_rarg2 - long length + * c_rarg3 - table_start - optional (present only when doing a library_call, + * not used by x86 algorithm) + * + * Ouput: + * rax - int crc result + */ + address generate_updateBytesCRC32C(bool is_pclmulqdq_supported) { + assert(UseCRC32CIntrinsics, "need SSE4_2"); + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32C");ShouldNotReachHere(); + address start = __ pc(); + __ stop("generate_updateBytesCRC32C"); + /* + //reg.arg int#0 int#1 int#2 int#3 int#4 int#5 float regs + //Windows RCX RDX R8 R9 none none XMM0..XMM3 + //Lin / Sol RDI RSI RDX RCX R8 R9 XMM0..XMM7 + const Register crc = c_rarg0; // crc + const Register buf = c_rarg1; // source java byte array address + const Register len = c_rarg2; // length + const Register a = rax; + const Register j = r9; + const Register k = r10; + const Register l = r11; +#ifdef _WIN64 + const Register y = rdi; + const Register z = rsi; +#else + const Register y = rcx; + const Register z = r8; +#endif + assert_different_registers(crc, buf, len, a, j, k, l, y, z); + + BLOCK_COMMENT("Entry:"); + __ enter(); // required for proper stackwalking of RuntimeStub frame +#ifdef _WIN64 + __ push(y); + __ push(z); +#endif + __ crc32c_ipl_alg2_alt2(crc, buf, len, + a, j, k, + l, y, z, + c_farg0, c_farg1, c_farg2, + is_pclmulqdq_supported); + __ movl(rax, crc); +#ifdef _WIN64 + __ pop(z); + __ pop(y); +#endif + __ vzeroupper(); + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(0); +*/ + return start; + } + + /** + * Arguments: + * + * Input: + * c_rarg0 - x address + * c_rarg1 - x length + * c_rarg2 - y address + * c_rarg3 - y length + * not Win64 + * c_rarg4 - z address + * c_rarg5 - z length + * Win64 + * rsp+40 - z address + * rsp+48 - z length + */ + address generate_multiplyToLen() { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "multiplyToLen");ShouldNotReachHere(); + + address start = __ pc(); + __ stop("generate_multiplyToLen"); + /* + // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...) + // Unix: rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...) + const Register x = rdi; + const Register xlen = rax; + const Register y = rsi; + const Register ylen = rcx; + const Register z = r8; + const Register zlen = r11; + + // Next registers will be saved on stack in multiply_to_len(). + const Register tmp1 = r12; + const Register tmp2 = r13; + const Register tmp3 = r14; + const Register tmp4 = r15; + const Register tmp5 = rbx; + + BLOCK_COMMENT("Entry:"); + __ enter(); // required for proper stackwalking of RuntimeStub frame + +#ifndef _WIN64 + __ movptr(zlen, r9); // Save r9 in r11 - zlen +#endif + setup_arg_regs(4); // x => rdi, xlen => rsi, y => rdx + // ylen => rcx, z => r8, zlen => r11 + // r9 and r10 may be used to save non-volatile registers +#ifdef _WIN64 + // last 2 arguments (#4, #5) are on stack on Win64 + __ movptr(z, Address(rsp, 6 * wordSize)); + __ movptr(zlen, Address(rsp, 7 * wordSize)); +#endif + + __ movptr(xlen, rsi); + __ movptr(y, rdx); + __ multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5); + + restore_arg_regs(); + + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(0); +*/ + return start; + } + + /** + * Arguments: + * + * Input: + * c_rarg0 - obja address + * c_rarg1 - objb address + * c_rarg3 - length length + * c_rarg4 - scale log2_array_indxscale + * + * Output: + * rax - int >= mismatched index, < 0 bitwise complement of tail + */ + address generate_vectorizedMismatch() { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "vectorizedMismatch");ShouldNotReachHere(); + address start = __ pc();/* + + BLOCK_COMMENT("Entry:"); + __ enter(); + +#ifdef _WIN64 // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...) + const Register scale = c_rarg0; //rcx, will exchange with r9 + const Register objb = c_rarg1; //rdx + const Register length = c_rarg2; //r8 + const Register obja = c_rarg3; //r9 + __ xchgq(obja, scale); //now obja and scale contains the correct contents + + const Register tmp1 = r10; + const Register tmp2 = r11; +#endif +#ifndef _WIN64 // Unix: rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...) + const Register obja = c_rarg0; //U:rdi + const Register objb = c_rarg1; //U:rsi + const Register length = c_rarg2; //U:rdx + const Register scale = c_rarg3; //U:rcx + const Register tmp1 = r8; + const Register tmp2 = r9; +#endif + const Register result = rax; //return value + const XMMRegister vec0 = xmm0; + const XMMRegister vec1 = xmm1; + const XMMRegister vec2 = xmm2; + + __ vectorized_mismatch(obja, objb, length, scale, result, tmp1, tmp2, vec0, vec1, vec2); + + __ vzeroupper(); + __ leave(); + __ ret(0); +*/ + return start; + } + +/** + * Arguments: + * + // Input: + // c_rarg0 - x address + // c_rarg1 - x length + // c_rarg2 - z address + // c_rarg3 - z lenth + * + */ + address generate_squareToLen() { + + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "squareToLen");ShouldNotReachHere(); + + address start = __ pc();/* + // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...) + // Unix: rdi, rsi, rdx, rcx (c_rarg0, c_rarg1, ...) + const Register x = rdi; + const Register len = rsi; + const Register z = r8; + const Register zlen = rcx; + + const Register tmp1 = r12; + const Register tmp2 = r13; + const Register tmp3 = r14; + const Register tmp4 = r15; + const Register tmp5 = rbx; + + BLOCK_COMMENT("Entry:"); + __ enter(); // required for proper stackwalking of RuntimeStub frame + + setup_arg_regs(4); // x => rdi, len => rsi, z => rdx + // zlen => rcx + // r9 and r10 may be used to save non-volatile registers + __ movptr(r8, rdx); + __ square_to_len(x, len, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, rdx, rax); + + restore_arg_regs(); + + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(0); +*/ + return start; + } + + /** + * Arguments: + * + * Input: + * c_rarg0 - out address + * c_rarg1 - in address + * c_rarg2 - offset + * c_rarg3 - len + * not Win64 + * c_rarg4 - k + * Win64 + * rsp+40 - k + */ + address generate_mulAdd() { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "mulAdd");ShouldNotReachHere(); + + address start = __ pc();/* + // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...) + // Unix: rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...) + const Register out = rdi; + const Register in = rsi; + const Register offset = r11; + const Register len = rcx; + const Register k = r8; + + // Next registers will be saved on stack in mul_add(). + const Register tmp1 = r12; + const Register tmp2 = r13; + const Register tmp3 = r14; + const Register tmp4 = r15; + const Register tmp5 = rbx; + + BLOCK_COMMENT("Entry:"); + __ enter(); // required for proper stackwalking of RuntimeStub frame + + setup_arg_regs(4); // out => rdi, in => rsi, offset => rdx + // len => rcx, k => r8 + // r9 and r10 may be used to save non-volatile registers +#ifdef _WIN64 + // last argument is on stack on Win64 + __ movl(k, Address(rsp, 6 * wordSize)); +#endif + __ movptr(r11, rdx); // move offset in rdx to offset(r11) + __ mul_add(out, in, offset, len, k, tmp1, tmp2, tmp3, tmp4, tmp5, rdx, rax); + + restore_arg_regs(); + + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(0); +*/ + return start; + } + + address generate_bigIntegerRightShift() { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "bigIntegerRightShiftWorker"); + ShouldNotReachHere(); + + address start = __ pc();/* + Label Shift512Loop, ShiftTwo, ShiftTwoLoop, ShiftOne, Exit; + // For Unix, the arguments are as follows: rdi, rsi, rdx, rcx, r8. + const Register newArr = rdi; + const Register oldArr = rsi; + const Register newIdx = rdx; + const Register shiftCount = rcx; // It was intentional to have shiftCount in rcx since it is used implicitly for shift. + const Register totalNumIter = r8; + + // For windows, we use r9 and r10 as temps to save rdi and rsi. Thus we cannot allocate them for our temps. + // For everything else, we prefer using r9 and r10 since we do not have to save them before use. + const Register tmp1 = r11; // Caller save. + const Register tmp2 = rax; // Caller save. + const Register tmp3 = WINDOWS_ONLY(r12) NOT_WINDOWS(r9); // Windows: Callee save. Linux: Caller save. + const Register tmp4 = WINDOWS_ONLY(r13) NOT_WINDOWS(r10); // Windows: Callee save. Linux: Caller save. + const Register tmp5 = r14; // Callee save. + const Register tmp6 = r15; + + const XMMRegister x0 = xmm0; + const XMMRegister x1 = xmm1; + const XMMRegister x2 = xmm2; + + BLOCK_COMMENT("Entry:"); + __ enter(); // required for proper stackwalking of RuntimeStub frame + + __ push(tmp5); + + // Rename temps used throughout the code. + const Register idx = tmp1; + const Register nIdx = tmp2; + + __ xorl(idx, idx); + + // Start right shift from end of the array. + // For example, if #iteration = 4 and newIdx = 1 + // then dest[4] = src[4] >> shiftCount | src[3] <<< (shiftCount - 32) + // if #iteration = 4 and newIdx = 0 + // then dest[3] = src[4] >> shiftCount | src[3] <<< (shiftCount - 32) + __ movl(idx, totalNumIter); + __ movl(nIdx, idx); + __ addl(nIdx, newIdx); + + // If vectorization is enabled, check if the number of iterations is at least 64 + // If not, then go to ShifTwo processing 2 iterations + if (VM_Version::supports_avx512_vbmi2()) { + __ cmpptr(totalNumIter, (AVX3Threshold/64)); + __ jcc(Assembler::less, ShiftTwo); + + if (AVX3Threshold < 16 * 64) { + __ cmpl(totalNumIter, 16); + __ jcc(Assembler::less, ShiftTwo); + } + __ evpbroadcastd(x0, shiftCount, Assembler::AVX_512bit); + __ subl(idx, 16); + __ subl(nIdx, 16); + __ BIND(Shift512Loop); + __ evmovdqul(x2, Address(oldArr, idx, Address::times_4, 4), Assembler::AVX_512bit); + __ evmovdqul(x1, Address(oldArr, idx, Address::times_4), Assembler::AVX_512bit); + __ vpshrdvd(x2, x1, x0, Assembler::AVX_512bit); + __ evmovdqul(Address(newArr, nIdx, Address::times_4), x2, Assembler::AVX_512bit); + __ subl(nIdx, 16); + __ subl(idx, 16); + __ jcc(Assembler::greaterEqual, Shift512Loop); + __ addl(idx, 16); + __ addl(nIdx, 16); + } + __ BIND(ShiftTwo); + __ cmpl(idx, 2); + __ jcc(Assembler::less, ShiftOne); + __ subl(idx, 2); + __ subl(nIdx, 2); + __ BIND(ShiftTwoLoop); + __ movl(tmp5, Address(oldArr, idx, Address::times_4, 8)); + __ movl(tmp4, Address(oldArr, idx, Address::times_4, 4)); + __ movl(tmp3, Address(oldArr, idx, Address::times_4)); + __ shrdl(tmp5, tmp4); + __ shrdl(tmp4, tmp3); + __ movl(Address(newArr, nIdx, Address::times_4, 4), tmp5); + __ movl(Address(newArr, nIdx, Address::times_4), tmp4); + __ subl(nIdx, 2); + __ subl(idx, 2); + __ jcc(Assembler::greaterEqual, ShiftTwoLoop); + __ addl(idx, 2); + __ addl(nIdx, 2); + + // Do the last iteration + __ BIND(ShiftOne); + __ cmpl(idx, 1); + __ jcc(Assembler::less, Exit); + __ subl(idx, 1); + __ subl(nIdx, 1); + __ movl(tmp4, Address(oldArr, idx, Address::times_4, 4)); + __ movl(tmp3, Address(oldArr, idx, Address::times_4)); + __ shrdl(tmp4, tmp3); + __ movl(Address(newArr, nIdx, Address::times_4), tmp4); + __ BIND(Exit); + // Restore callee save registers. + __ pop(tmp5); + + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(0);*/ + return start; + } + + /** + * Arguments: + * + * Input: + * c_rarg0 - newArr address + * c_rarg1 - oldArr address + * c_rarg2 - newIdx + * c_rarg3 - shiftCount + * not Win64 + * c_rarg4 - numIter + * Win64 + * rsp40 - numIter + */ + address generate_bigIntegerLeftShift() { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "bigIntegerLeftShiftWorker"); + ShouldNotReachHere(); + + address start = __ pc();/* + Label Shift512Loop, ShiftTwo, ShiftTwoLoop, ShiftOne, Exit; + // For Unix, the arguments are as follows: rdi, rsi, rdx, rcx, r8. + const Register newArr = rdi; + const Register oldArr = rsi; + const Register newIdx = rdx; + const Register shiftCount = rcx; // It was intentional to have shiftCount in rcx since it is used implicitly for shift. + const Register totalNumIter = r8; + // For windows, we use r9 and r10 as temps to save rdi and rsi. Thus we cannot allocate them for our temps. + // For everything else, we prefer using r9 and r10 since we do not have to save them before use. + const Register tmp1 = r11; // Caller save. + const Register tmp2 = rax; // Caller save. + const Register tmp3 = WINDOWS_ONLY(r12) NOT_WINDOWS(r9); // Windows: Callee save. Linux: Caller save. + const Register tmp4 = WINDOWS_ONLY(r13) NOT_WINDOWS(r10); // Windows: Callee save. Linux: Caller save. + const Register tmp5 = r14; // Callee save. + + const XMMRegister x0 = xmm0; + const XMMRegister x1 = xmm1; + const XMMRegister x2 = xmm2; + BLOCK_COMMENT("Entry:"); + __ enter(); // required for proper stackwalking of RuntimeStub frame + + __ push(tmp5); + + // Rename temps used throughout the code + const Register idx = tmp1; + const Register numIterTmp = tmp2; + + // Start idx from zero. + __ xorl(idx, idx); + // Compute interior pointer for new array. We do this so that we can use same index for both old and new arrays. + __ lea(newArr, Address(newArr, newIdx, Address::times_4)); + __ movl(numIterTmp, totalNumIter); + + // If vectorization is enabled, check if the number of iterations is at least 64 + // If not, then go to ShiftTwo shifting two numbers at a time + if (VM_Version::supports_avx512_vbmi2()) { + __ cmpl(totalNumIter, (AVX3Threshold/64)); + __ jcc(Assembler::less, ShiftTwo); + + if (AVX3Threshold < 16 * 64) { + __ cmpl(totalNumIter, 16); + __ jcc(Assembler::less, ShiftTwo); + } + __ evpbroadcastd(x0, shiftCount, Assembler::AVX_512bit); + __ subl(numIterTmp, 16); + __ BIND(Shift512Loop); + __ evmovdqul(x1, Address(oldArr, idx, Address::times_4), Assembler::AVX_512bit); + __ evmovdqul(x2, Address(oldArr, idx, Address::times_4, 0x4), Assembler::AVX_512bit); + __ vpshldvd(x1, x2, x0, Assembler::AVX_512bit); + __ evmovdqul(Address(newArr, idx, Address::times_4), x1, Assembler::AVX_512bit); + __ addl(idx, 16); + __ subl(numIterTmp, 16); + __ jcc(Assembler::greaterEqual, Shift512Loop); + __ addl(numIterTmp, 16); + } + __ BIND(ShiftTwo); + __ cmpl(totalNumIter, 1); + __ jcc(Assembler::less, Exit); + __ movl(tmp3, Address(oldArr, idx, Address::times_4)); + __ subl(numIterTmp, 2); + __ jcc(Assembler::less, ShiftOne); + + __ BIND(ShiftTwoLoop); + __ movl(tmp4, Address(oldArr, idx, Address::times_4, 0x4)); + __ movl(tmp5, Address(oldArr, idx, Address::times_4, 0x8)); + __ shldl(tmp3, tmp4); + __ shldl(tmp4, tmp5); + __ movl(Address(newArr, idx, Address::times_4), tmp3); + __ movl(Address(newArr, idx, Address::times_4, 0x4), tmp4); + __ movl(tmp3, tmp5); + __ addl(idx, 2); + __ subl(numIterTmp, 2); + __ jcc(Assembler::greaterEqual, ShiftTwoLoop); + + // Do the last iteration + __ BIND(ShiftOne); + __ addl(numIterTmp, 2); + __ cmpl(numIterTmp, 1); + __ jcc(Assembler::less, Exit); + __ movl(tmp4, Address(oldArr, idx, Address::times_4, 0x4)); + __ shldl(tmp3, tmp4); + __ movl(Address(newArr, idx, Address::times_4), tmp3); + + __ BIND(Exit); + // Restore callee save registers. + __ pop(tmp5); + + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(0);*/ + + return start; + } + + address generate_libmExp() { + StubCodeMark mark(this, "StubRoutines", "libmExp"); + ShouldNotReachHere(); + + address start = __ pc();/* + + const XMMRegister x0 = xmm0; + const XMMRegister x1 = xmm1; + const XMMRegister x2 = xmm2; + const XMMRegister x3 = xmm3; + + const XMMRegister x4 = xmm4; + const XMMRegister x5 = xmm5; + const XMMRegister x6 = xmm6; + const XMMRegister x7 = xmm7; + + const Register tmp = r11; + + BLOCK_COMMENT("Entry:"); + __ enter(); // required for proper stackwalking of RuntimeStub frame + + __ fast_exp(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp); + + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(0); +*/ + return start; + + } + + address generate_libmLog() { + StubCodeMark mark(this, "StubRoutines", "libmLog");ShouldNotReachHere(); + + address start = __ pc();/* + + const XMMRegister x0 = xmm0; + const XMMRegister x1 = xmm1; + const XMMRegister x2 = xmm2; + const XMMRegister x3 = xmm3; + + const XMMRegister x4 = xmm4; + const XMMRegister x5 = xmm5; + const XMMRegister x6 = xmm6; + const XMMRegister x7 = xmm7; + + const Register tmp1 = r11; + const Register tmp2 = r8; + + BLOCK_COMMENT("Entry:"); + __ enter(); // required for proper stackwalking of RuntimeStub frame + + __ fast_log(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp1, tmp2); + + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(0); +*/ + return start; + + } + + address generate_libmLog10() { + StubCodeMark mark(this, "StubRoutines", "libmLog10");ShouldNotReachHere(); + + address start = __ pc();/* + + const XMMRegister x0 = xmm0; + const XMMRegister x1 = xmm1; + const XMMRegister x2 = xmm2; + const XMMRegister x3 = xmm3; + + const XMMRegister x4 = xmm4; + const XMMRegister x5 = xmm5; + const XMMRegister x6 = xmm6; + const XMMRegister x7 = xmm7; + + const Register tmp = r11; + + BLOCK_COMMENT("Entry:"); + __ enter(); // required for proper stackwalking of RuntimeStub frame + + __ fast_log10(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp); + + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(0); +*/ + return start; + + } + + address generate_libmPow() { + StubCodeMark mark(this, "StubRoutines", "libmPow");ShouldNotReachHere(); + + address start = __ pc();/* + + const XMMRegister x0 = xmm0; + const XMMRegister x1 = xmm1; + const XMMRegister x2 = xmm2; + const XMMRegister x3 = xmm3; + + const XMMRegister x4 = xmm4; + const XMMRegister x5 = xmm5; + const XMMRegister x6 = xmm6; + const XMMRegister x7 = xmm7; + + const Register tmp1 = r8; + const Register tmp2 = r9; + const Register tmp3 = r10; + const Register tmp4 = r11; + + BLOCK_COMMENT("Entry:"); + __ enter(); // required for proper stackwalking of RuntimeStub frame + + __ fast_pow(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp1, tmp2, tmp3, tmp4); + + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(0); +*/ + return start; + + } + + address generate_libmSin() { + StubCodeMark mark(this, "StubRoutines", "libmSin");ShouldNotReachHere(); + + address start = __ pc();/* + + const XMMRegister x0 = xmm0; + const XMMRegister x1 = xmm1; + const XMMRegister x2 = xmm2; + const XMMRegister x3 = xmm3; + + const XMMRegister x4 = xmm4; + const XMMRegister x5 = xmm5; + const XMMRegister x6 = xmm6; + const XMMRegister x7 = xmm7; + + const Register tmp1 = r8; + const Register tmp2 = r9; + const Register tmp3 = r10; + const Register tmp4 = r11; + + BLOCK_COMMENT("Entry:"); + __ enter(); // required for proper stackwalking of RuntimeStub frame + +#ifdef _WIN64 + __ push(rsi); + __ push(rdi); +#endif + __ fast_sin(x0, x1, x2, x3, x4, x5, x6, x7, rax, rbx, rcx, rdx, tmp1, tmp2, tmp3, tmp4); + +#ifdef _WIN64 + __ pop(rdi); + __ pop(rsi); +#endif + + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(0); +*/ + return start; + + } + + address generate_libmCos() { + StubCodeMark mark(this, "StubRoutines", "libmCos");ShouldNotReachHere(); + + address start = __ pc();/* + + const XMMRegister x0 = xmm0; + const XMMRegister x1 = xmm1; + const XMMRegister x2 = xmm2; + const XMMRegister x3 = xmm3; + + const XMMRegister x4 = xmm4; + const XMMRegister x5 = xmm5; + const XMMRegister x6 = xmm6; + const XMMRegister x7 = xmm7; + + const Register tmp1 = r8; + const Register tmp2 = r9; + const Register tmp3 = r10; + const Register tmp4 = r11; + + BLOCK_COMMENT("Entry:"); + __ enter(); // required for proper stackwalking of RuntimeStub frame + +#ifdef _WIN64 + __ push(rsi); + __ push(rdi); +#endif + __ fast_cos(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp1, tmp2, tmp3, tmp4); + +#ifdef _WIN64 + __ pop(rdi); + __ pop(rsi); +#endif + + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(0); +*/ + return start; + + } + + address generate_libmTan() { + StubCodeMark mark(this, "StubRoutines", "libmTan");ShouldNotReachHere(); + + address start = __ pc();/* + + const XMMRegister x0 = xmm0; + const XMMRegister x1 = xmm1; + const XMMRegister x2 = xmm2; + const XMMRegister x3 = xmm3; + + const XMMRegister x4 = xmm4; + const XMMRegister x5 = xmm5; + const XMMRegister x6 = xmm6; + const XMMRegister x7 = xmm7; + + const Register tmp1 = r8; + const Register tmp2 = r9; + const Register tmp3 = r10; + const Register tmp4 = r11; + + BLOCK_COMMENT("Entry:"); + __ enter(); // required for proper stackwalking of RuntimeStub frame + +#ifdef _WIN64 + __ push(rsi); + __ push(rdi); +#endif + __ fast_tan(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp1, tmp2, tmp3, tmp4); + +#ifdef _WIN64 + __ pop(rdi); + __ pop(rsi); +#endif + + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(0); +*/ + return start; + + } + + + + void copy_core_forward(int limit, Register src, Register dst, Register count, Register tmp1, Register tmp2){ShouldNotReachHere(); + Label l_misalign, l_misalign_simd, l_align_simd, l_before_tail, l_exit; + + __ and_ins(src, 31, tmp1); + __ beq_l(tmp1, l_align_simd); + + __ BIND(l_misalign); + __ and_ins(src, 31, tmp1); //from low-5-bit = src mod 32 + __ slll(tmp1, 3, tmp1); + __ ifmovs(tmp1, f15); + __ ldi(tmp2, 256, R0); + __ subl(tmp2, tmp1, tmp1); + __ ifmovs(tmp1, F17); + __ andnot(src, 31, tmp1); + __ vldd(f10, 0, tmp1); //load 32 bytes from src + + __ BIND(l_misalign_simd); + __ srlow(f10, f15, f12);//get high feild bytes of 32 bytes + __ vldd(f10, 32, tmp1); //load next 32 bytes from src+32 + __ sllow(f10, F17, f13);//get low field bytes of 32 bytes + __ vlog(0xfc, f12, f13, f31, f12); //merge f12, f13, into f12 + __ vstd(f12, 0, dst); + + __ addl(tmp1, 32, tmp1); + __ addl(dst, 32, dst); + __ subl(count, limit, count); + + __ cmple(count, limit-1, tmp2); //At least one more trip? + __ beq_l(tmp2, l_misalign_simd); + __ beq_l(R0, l_before_tail); + + __ BIND(l_align_simd); + __ vldd(f10, 0, src); + __ vstd(f10, 0, dst); + __ subl(count, limit, count); + __ addl(src, 32, src); + __ addl(dst, 32, dst); + __ cmple(count, limit-1, tmp1); //while count >=32, do simd + __ beq_l(tmp1, l_align_simd); + __ beq_l(R0, l_exit); + + __ BIND(l_before_tail); + __ and_ins(src, 31, src); + __ addl(tmp1, src, src); + + __ BIND(l_exit); + } + + void copy_core_backward(int limit, Register end_src, Register end_dst, Register count, Register tmp1, Register tmp2){ + Label l_misalign, l_misalign_simd, l_align_simd, l_before_tail, l_exit; + + __ and_ins(end_src, 31, tmp1); + __ beq_l(tmp1, l_align_simd); + + __ BIND(l_misalign); + __ and_ins(end_src, 31, tmp1); //from low-5-bit = src mod 32 + __ slll(tmp1, 3, tmp1); + __ ifmovs(tmp1, f15); + __ ldi(tmp2, 256, R0); + __ subl(tmp2, tmp1, tmp1); + __ ifmovs(tmp1, F17); + __ andnot(end_src, 31, tmp1); + __ vldd(f10, 0, tmp1); //load 32 bytes from src + + __ BIND(l_misalign_simd); + __ sllow(f10, F17, f13);//get low field bytes of 32 bytes + __ vldd(f10, -32, tmp1); //load next 32 bytes from src+32 + __ srlow(f10, f15, f12);//get high feild bytes of 32 bytes + __ vlog(0xfc, f12, f13, f31, f12); //merge f12, f13, into f12 + __ vstd(f12, -32, end_dst); + + __ subl(tmp1, 32, tmp1); + __ subl(end_dst, 32, end_dst); + __ subl(count, limit, count); + + __ cmple(count, limit-1, tmp2); //At least one more trip? + __ beq_l(tmp2, l_misalign_simd); + __ beq_l(R0, l_before_tail); + + __ BIND(l_align_simd); + __ vldd(f10, -32, end_src); + __ vstd(f10, -32, end_dst); + __ subl(count, limit, count); + __ subl(end_src, 32, end_src); + __ subl(end_dst, 32, end_dst); + __ cmple(count, limit-1, tmp1); //while count >=32, do simd + __ beq_l(tmp1, l_align_simd); + __ beq_l(R0, l_exit); + + __ BIND(l_before_tail); + __ and_ins(end_src, 31, end_src); + __ addl(tmp1, end_src, end_src); + + __ BIND(l_exit); + } + + // Continuation point for throwing of implicit exceptions that are + // not handled in the current activation. Fabricates an exception + // oop and initiates normal exception dispatching in this + // frame. Since we need to preserve callee-saved values (currently + // only for C2, but done for C1 as well) we need a callee-saved oop + // map and therefore have to make these stubs into RuntimeStubs + // rather than BufferBlobs. If the compiler needs all registers to + // be preserved between the fault point and the exception handler + // then it must assume responsibility for that in + // AbstractCompiler::continuation_for_implicit_null_exception or + // continuation_for_implicit_division_by_zero_exception. All other + // implicit exceptions (e.g., NullPointerException or + // AbstractMethodError on entry) are either at call sites or + // otherwise assume that stack unwinding will be initiated, so + // caller saved registers were assumed volatile in the compiler. + + + // Arguments: + // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary + // ignored + // name - stub name string + // + // Inputs: + // c_rarg0 - source array address + // c_rarg1 - destination array address + // c_rarg2 - element count, treated as ssize_t, can be zero + // + // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, + // we let the hardware handle it. The one to eight bytes within words, + // dwords or qwords that span cache line boundaries will still be loaded + // and stored atomically. + // + // Side Effects: + // disjoint_byte_copy_entry is set to the no-overlap entry point + // used by generate_conjoint_byte_copy(). + // + void generate_disjoint_copy(int widthInByte, Register src, Register dst, Register count) { + // Label lblMissAlignInByte, lblMissAlignInShort, lblMissAlignInWord, lblMissAlignInLong; + Label lblMissAlign[4]; + // Label lblSkipByte, lblSkipInShort, lblSkipInWord, lblSkipInLong; + Label lblSkip[4]; + // Label lblCopyByte, lblCopyShort, lblCopyWord, lblCopyLong; + Label lblCopy[4]; + char buf[50]; + if (widthInByte == 0) {__ subl(count, 9, AT); __ ble_l(AT, lblMissAlign[1]);} + if (widthInByte == 1) {__ subl(count, 9, AT); __ ble_l(AT, lblMissAlign[2]);} + + Label done; + __ jcc(Assembler::equal, done, count); + for (int i = widthInByte; i < 3; i++) { + __ xorptr(src, dst, AT); + __ andptr(AT, 1 << i, AT); // if the backward ith bit of src and and dst is the same + __ jcc(Assembler::notEqual, lblMissAlign[i+1], AT); // if arrays don't have the same alignment, ... + + __ andptr(src, 1 << i, AT); + __ jcc(Assembler::equal, lblSkip[i], AT); // have same alignment but extra byte/short/int + + __ load(i, AT, 0, src); + __ store(i, AT, 0, dst); + __ addl(src, 1 << i, src); + __ addl(dst, 1 << i, dst); + __ subl(count, 1 << i, count); + + __ bind(lblSkip[i]); + sprintf(buf, "lblSkip[%d]", i); + __ block_comment(buf); + } + + for (int i = 3; i >= widthInByte; i--) { // FasterArrayCopy + if (i == widthInByte) { + __ jcc(Assembler::equal, lblMissAlign[i], count); + } else { + __ cmplt(count, 1 << i, AT); + __ jcc(Assembler::notEqual, lblMissAlign[i], AT); + } + __ bind(lblCopy[i]); + sprintf(buf, "lblCopy[%d]", i); + __ block_comment(buf); + + __ load(i, AT, 0, src); + __ store(i, AT, 0, dst); + __ addl(src, 1 << i, src); + __ addl(dst, 1 << i, dst); + __ subl(count, 1 << i, count); + if(i == widthInByte){ + __ jcc(Assembler::notEqual, lblCopy[i], count); + }else{ + __ subl(count, 1 << i, AT); + __ jcc(Assembler::greaterEqual, lblCopy[i], AT); + } + __ bind(lblMissAlign[i]); + sprintf(buf, "lblMissAlign[%d]", i); + __ block_comment(buf); + } + __ bind(done); + } + + void generate_conjoint_copy(int widthInByte,Register src, Register dst, Register count) {SCOPEMARK_NAME(generate_conjoint_copy, _masm) + // Label lblMissAlignInByte, lblMissAlignInShort, lblMissAlignInWord, lblMissAlignInLong; + Label lblMissAlign[4]; + // Label lblSkipByte, lblSkipInShort, lblSkipInWord, lblSkipInLong; + Label lblSkip[4]; + // Label lblCopyByte, lblCopyShort, lblCopyWord, lblCopyLong; + Label lblCopy[4]; + char buf[50]; + + assert_different_registers(src, dst, AT); + //__ stop("TODO:generate_conjoint_copy jzy"); + if (widthInByte == 0) {__ subl(count, 9, AT); __ ble_l(AT, lblMissAlign[1]);} + if (widthInByte == 1) {__ subl(count, 9, AT); __ ble_l(AT, lblMissAlign[2]);} + + + Label done; + __ jcc(Assembler::equal, done, count); + for (int i = widthInByte; i < 3; i++) { + __ xorptr(src, dst, AT); + __ andptr(AT, 1 << i, AT); // if the backward ith bit of src and and dst is the same + __ jcc(Assembler::notEqual, lblMissAlign[i+1], AT); // if arrays don't have the same alignment, ... + + __ andptr(src, 1 << i, AT); + __ jcc(Assembler::equal, lblSkip[i], AT); // have same alignment but extra byte/short/int + + __ subl(src, 1 << i, src); + __ subl(dst, 1 << i, dst); + __ load(i, AT, 0, src); //TODO:refactor? jzy + __ store(i, AT, 0, dst);//TODO:refactor? jzy + __ subl(count, 1 << i, count); + + __ bind(lblSkip[i]); + sprintf(buf, "lblSkip[%d]", i); + __ block_comment(buf); + } + + for (int i = 3; i >= widthInByte; i--) { // FasterArrayCopy + if(i == widthInByte){ + __ jcc(Assembler::equal, lblMissAlign[i], count); + }else{ + __ cmpl(count, 1 << i); + __ jcc(Assembler::less, lblMissAlign[i]); + } + + __ bind(lblCopy[i]); + sprintf(buf, "lblCopy[%d]", i); + __ block_comment(buf); + + __ subl(src, 1 << i, src); + __ subl(dst, 1 << i, dst); + __ load(i, AT, 0, src); + __ store(i, AT, 0, dst); + __ subl(count, 1 << i, count); + if (i == widthInByte) { + __ jcc(Assembler::notEqual, lblCopy[i], count); + } else { + __ cmpl(count, 1 << i); + __ jcc(Assembler::greaterEqual, lblCopy[i]); + } + __ bind(lblMissAlign[i]); + sprintf(buf, "lblMissAlign[%d]", i); + __ block_comment(buf); + } + __ bind(done); + } + +#undef __ +#define __ masm-> + + address generate_throw_exception(const char* name, + address runtime_entry, + Register arg1 = noreg, + Register arg2 = noreg) { + // Information about frame layout at time of blocking runtime call. + // Note that we only have to preserve callee-saved registers since + // the compilers are responsible for supplying a continuation point + // if they expect all registers to be preserved. + // n.b. sw64 asserts that frame::arg_reg_save_area_bytes == 0 + enum layout { + rfp_off = frame::arg_reg_save_area_bytes/BytesPerInt, + rfp_off2, + return_off, + return_off2, + framesize // inclusive of return address + }; + + int insts_size = 2048; + int locs_size = 32; + + CodeBuffer code(name, insts_size, locs_size); + OopMapSet* oop_maps = new OopMapSet(); + MacroAssembler* masm = new MacroAssembler(&code); + + address start = __ pc(); + + // This is an inlined and slightly modified version of call_VM + // which has the ability to fetch the return PC out of + // thread-local storage and also sets up last_Java_sp slightly + // differently than the real call_VM + Register java_thread = rthread; + + //Label frame_return; + //__ stop("no check:jzy"); + __ enter(); // Save FP and LR before call + + __ mov_immediate64(rscratch3, (framesize-4) << LogBytesPerWord); + __ subptr(esp, rscratch3, esp); // prolog + + int frame_complete = __ pc() - start; + + // Set up last_Java_sp and last_Java_fp + address the_pc = __ pc(); + __ set_last_Java_frame(esp, rfp, the_pc, rscratch3); + + // Call runtime + if (arg1 != noreg) { + assert(arg2 != c_rarg1, "clobbered"); + __ movl(c_rarg1, arg1); + } + if (arg2 != noreg) { + __ movl(c_rarg2, arg2); + } + __ movl(c_rarg0, rthread); + + // Call runtime + __ call(RuntimeAddress(runtime_entry)); + + // Generate oop map + OopMap* map = new OopMap(framesize, 0); + oop_maps->add_gc_map(the_pc - start, map); + + __ reset_last_Java_frame(true); + + // discard arguments + __ leave(); + // check for pending exceptions +#ifdef ASSERT + Label L; + __ cmpptr(Address(java_thread, Thread::pending_exception_offset()), + (int32_t) NULL_WORD); + __ jcc(Assembler::notEqual, L); + __ should_not_reach_here("Thread::pending_exception_offset"); + __ bind(L); +#endif //ASSERT +// __ push(RA); + __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); + + + // codeBlob framesize is in words (not VMRegImpl::slot_size) + RuntimeStub* stub = + RuntimeStub::new_runtime_stub(name, + &code, + frame_complete, + (framesize >> (LogBytesPerWord - LogBytesPerInt)), + oop_maps, false); + return stub->entry_point(); + } + + // Initialization + void generate_initial() { + if (SafePatch) { + NativeCall::instruction_size = 6 * BytesPerInstWord; + NativeCall::return_address_offset = 6 * BytesPerInstWord; + NativeJump::instruction_size = 6 * BytesPerInstWord; + NativeJump::instruction_size = 6 * BytesPerInstWord; +// NativeMovConstReg::instruction_size = 5 * BytesPerInstWord; +// NativeMovConstReg::next_instruction_offset = 5 * BytesPerInstWord; + } + // Generate initial stubs and initializes the entry points + + // entry points that exist in all platforms Note: This is code + // that could be shared among different platforms - however the + // benefit seems to be smaller than the disadvantage of having a + // much more complicated generator structure. See also comment in + // stubRoutines.hpp. + + StubRoutines::_forward_exception_entry = generate_forward_exception(); + + StubRoutines::_call_stub_entry = generate_call_stub(StubRoutines::_call_stub_return_address); + + // is referenced by megamorphic call + StubRoutines::_catch_exception_entry = generate_catch_exception(); + + // atomic calls + StubRoutines::_fence_entry = generate_orderaccess_fence(); + +//// StubRoutines::_handler_for_unsafe_access_entry = generate_handler_for_unsafe_access(); + + StubRoutines::_throw_StackOverflowError_entry = + generate_throw_exception("StackOverflowError throw_exception", + CAST_FROM_FN_PTR(address, + SharedRuntime:: + throw_StackOverflowError)); + StubRoutines::_throw_delayed_StackOverflowError_entry = + generate_throw_exception("delayed StackOverflowError throw_exception", + CAST_FROM_FN_PTR(address, + SharedRuntime::throw_delayed_StackOverflowError)); + // platform dependent + StubRoutines::sw64::_get_previous_sp_entry = generate_get_previous_sp(); + if (UseCRC32Intrinsics) { + // set table address before stub generation which use it + StubRoutines::_crc_table_adr = (address)StubRoutines::sw64::_crc_table; + //ShouldNotReachHere(); + StubRoutines::_updateBytesCRC32 = CAST_FROM_FN_PTR(address, SharedRuntime::updateBytesCRC32); + } + } + + void generate_all() { + // Generates all stubs and initializes the entry points + + // These entry points require SharedInfo::stack0 to be set up in + // non-core builds and need to be relocatable, so they each + // fabricate a RuntimeStub internally. + StubRoutines::_throw_AbstractMethodError_entry = + generate_throw_exception("AbstractMethodError throw_exception", + CAST_FROM_FN_PTR(address, + SharedRuntime:: + throw_AbstractMethodError)); + + StubRoutines::_throw_IncompatibleClassChangeError_entry = + generate_throw_exception("IncompatibleClassChangeError throw_exception", + CAST_FROM_FN_PTR(address, + SharedRuntime:: + throw_IncompatibleClassChangeError)); + + StubRoutines::_throw_NullPointerException_at_call_entry = + generate_throw_exception("NullPointerException at call throw_exception", + CAST_FROM_FN_PTR(address, + SharedRuntime:: + throw_NullPointerException_at_call)); + + // support for verify_oop (must happen after universe_init) + if (VerifyOops) { + StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop(); + } + + // arraycopy stubs used by compilers + generate_arraycopy_stubs(); + +#ifdef COMPILER2 + //TODO:jzy + if (UseMontgomeryMultiplyIntrinsic) { + StubRoutines::_montgomeryMultiply + = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply); + } + if (UseMontgomerySquareIntrinsic) { + StubRoutines::_montgomerySquare + = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square); + } +#endif + + } + + public: + StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) { + if (all) { + generate_all(); + } else { + generate_initial(); + } + } +}; // end class declaration + +void StubGenerator_generate(CodeBuffer* code, bool all) { + StubGenerator g(code, all); +} diff --git a/src/hotspot/cpu/sw64/stubRoutines_sw64.cpp b/src/hotspot/cpu/sw64/stubRoutines_sw64.cpp new file mode 100644 index 00000000000..ebf2638325e --- /dev/null +++ b/src/hotspot/cpu/sw64/stubRoutines_sw64.cpp @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "runtime/deoptimization.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/thread.inline.hpp" +#include "utilities/globalDefinitions.hpp" + +// Implementation of the platform-specific part of StubRoutines - for +// a description of how to extend it, see the stubRoutines.hpp file. + +//find the last fp value +address StubRoutines::sw64::_get_previous_sp_entry = NULL; +address StubRoutines::sw64::_call_stub_compiled_return = NULL; +address StubRoutines::sw64::_method_entry_barrier = NULL; + +juint StubRoutines::sw64::_crc_table[] = +{ + 0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL, + 0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL, + 0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL, + 0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL, + 0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL, + 0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL, + 0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL, + 0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL, + 0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL, + 0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL, + 0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL, + 0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL, + 0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL, + 0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL, + 0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL, + 0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL, + 0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL, + 0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL, + 0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL, + 0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL, + 0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL, + 0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL, + 0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL, + 0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL, + 0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL, + 0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL, + 0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL, + 0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL, + 0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL, + 0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL, + 0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL, + 0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL, + 0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL, + 0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL, + 0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL, + 0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL, + 0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL, + 0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL, + 0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL, + 0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL, + 0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL, + 0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL, + 0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL, + 0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL, + 0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL, + 0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL, + 0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL, + 0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL, + 0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL, + 0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL, + 0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL, + 0x2d02ef8dUL +}; diff --git a/src/hotspot/cpu/sw64/stubRoutines_sw64.hpp b/src/hotspot/cpu/sw64/stubRoutines_sw64.hpp new file mode 100644 index 00000000000..bae939a2fa0 --- /dev/null +++ b/src/hotspot/cpu/sw64/stubRoutines_sw64.hpp @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SW64_VM_STUBROUTINES_SW64_HPP +#define CPU_SW64_VM_STUBROUTINES_SW64_HPP + +// This file holds the platform specific parts of the StubRoutines +// definition. See stubRoutines.hpp for a description on how to +// extend it. + +static bool returns_to_call_stub(address return_pc) { + return return_pc == _call_stub_return_address || return_pc == sw64::get_call_stub_compiled_return(); +} + +enum platform_dependent_constants { + code_size1 = 20000 LP64_ONLY(+12000), // simply increase if too small (assembler will crash if too small) + code_size2 = 42000 LP64_ONLY(+25000) // simply increase if too small (assembler will crash if too small) +}; + +class sw64 { + friend class StubGenerator; + friend class VMStructs; + private: + // If we call compiled code directly from the call stub we will + // need to adjust the return back to the call stub to a specialized + // piece of code that can handle compiled results and cleaning the fpu + // stack. The variable holds that location. + static address _call_stub_compiled_return; + static address _get_previous_sp_entry; + static address _verify_mxcsr_entry; + // shuffle mask for fixing up 128-bit words consisting of big-endian 32-bit integers + static address _key_shuffle_mask_addr; + static address _method_entry_barrier; + // masks and table for CRC32 + static uint64_t _crc_by128_masks[]; + static juint _crc_table[]; + + static address _counter_mask_addr; +public: + // Call back points for traps in compiled code + static address get_call_stub_compiled_return() { return _call_stub_compiled_return; } + + static void set_call_stub_compiled_return(address ret) { _call_stub_compiled_return = ret; } + + static address method_entry_barrier() { return _method_entry_barrier; } + + static address counter_mask_addr() { return _counter_mask_addr; } +}; + +#endif // CPU_SW64_VM_STUBROUTINES_SW64_HPP diff --git a/src/hotspot/cpu/sw64/sw64.ad b/src/hotspot/cpu/sw64/sw64.ad new file mode 100644 index 00000000000..352fd19642a --- /dev/null +++ b/src/hotspot/cpu/sw64/sw64.ad @@ -0,0 +1,15785 @@ +// +// Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +// +// This code is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License version 2 only, as +// published by the Free Software Foundation. +// +// This code is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// version 2 for more details (a copy is included in the LICENSE file that +// accompanied this code). +// +// You should have received a copy of the GNU General Public License version +// 2 along with this work; if not, write to the Free Software Foundation, +// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +// or visit www.oracle.com if you need additional information or have any +// questions. +// +// + +// Sw64 Architecture Description File + +//----------REGISTER DEFINITION BLOCK------------------------------------------ +// This information is used by the matcher and the register allocator to +// describe individual registers and classes of registers within the target +// archtecture. + +// format: +// reg_def name (call convention, c-call convention, ideal type, encoding); +// call convention : +// NS = No-Save +// SOC = Save-On-Call +// SOE = Save-On-Entry +// AS = Always-Save +// ideal type : +// see opto/opcodes.hpp for more info +// reg_class name (reg, ...); +// alloc_class name (reg, ...); + +register %{ +// We must define the 64 bit int registers in two 32 bit halves, the +// real lower register and a virtual upper half register. upper halves +// are used by the register allocator but are not actually supplied as +// operands to memory ops. +// +// follow the C1 compiler in making registers +// +// r0-r7,r10-r26 volatile (caller save) +// r27-r32 system (no save, no allocate) +// r8-r9 invisible to the allocator (so we can use them as scratch regs) +// +// as regards Java usage. we don't use any callee save registers +// because this makes it difficult to de-optimise a frame (see comment +// in x86 implementation of Deoptimization::unwind_callee_save_values) +// + +// General Registers +// Integer Registers +reg_def V0 (SOC, SOC, Op_RegI, 0, i0->as_VMReg()); +reg_def V0_H (SOC, SOC, Op_RegI, 0, i0->as_VMReg()->next()); + +reg_def T0 (SOC, SOC, Op_RegI, 1, i1->as_VMReg()); +reg_def T0_H (SOC, SOC, Op_RegI, 1, i1->as_VMReg()->next()); +reg_def T1 (SOC, SOC, Op_RegI, 2, i2->as_VMReg()); +reg_def T1_H (SOC, SOC, Op_RegI, 2, i2->as_VMReg()->next()); +reg_def T2 (SOC, SOC, Op_RegI, 3, i3->as_VMReg()); +reg_def T2_H (SOC, SOC, Op_RegI, 3, i3->as_VMReg()->next()); +reg_def T3 (SOC, SOC, Op_RegI, 4, i4->as_VMReg()); +reg_def T3_H (SOC, SOC, Op_RegI, 4, i4->as_VMReg()->next()); +reg_def T4 (SOC, SOC, Op_RegI, 5, i5->as_VMReg()); +reg_def T4_H (SOC, SOC, Op_RegI, 5, i5->as_VMReg()->next()); +reg_def T5 (SOC, SOC, Op_RegI, 6, i6->as_VMReg()); +reg_def T5_H (SOC, SOC, Op_RegI, 6, i6->as_VMReg()->next()); +reg_def T6 (SOC, SOC, Op_RegI, 7, i7->as_VMReg()); +reg_def T6_H (SOC, SOC, Op_RegI, 7, i7->as_VMReg()->next()); +reg_def T7 (SOC, SOC, Op_RegI, 8, i8->as_VMReg()); +reg_def T7_H (SOC, SOC, Op_RegI, 8, i8->as_VMReg()->next()); + +reg_def S0 (SOC, SOE, Op_RegI, 9, i9->as_VMReg()); +reg_def S0_H (SOC, SOE, Op_RegI, 9, i9->as_VMReg()->next()); +reg_def S1 (SOC, SOE, Op_RegI, 10, i10->as_VMReg()); +reg_def S1_H (SOC, SOE, Op_RegI, 10, i10->as_VMReg()->next()); +reg_def S2 (SOC, SOE, Op_RegI, 11, i11->as_VMReg()); +reg_def S2_H (SOC, SOE, Op_RegI, 11, i11->as_VMReg()->next()); +reg_def S3 (SOC, SOE, Op_RegI, 12, i12->as_VMReg()); +reg_def S3_H (SOC, SOE, Op_RegI, 12, i12->as_VMReg()->next()); +reg_def S4 (SOC, SOE, Op_RegI, 13, i13->as_VMReg()); +reg_def S4_H (SOC, SOE, Op_RegI, 13, i13->as_VMReg()->next()); +reg_def S5 (SOC, SOE, Op_RegI, 14, i14->as_VMReg()); +reg_def S5_H (SOC, SOE, Op_RegI, 14, i14->as_VMReg()->next()); +reg_def FP ( NS, SOE, Op_RegI, 15, i15->as_VMReg()); +reg_def FP_H ( NS, SOE, Op_RegI, 15, i15->as_VMReg()->next()); + +reg_def A0 (SOC, SOC, Op_RegI, 16, i16->as_VMReg()); +reg_def A0_H (SOC, SOC, Op_RegI, 16, i16->as_VMReg()->next()); +reg_def A1 (SOC, SOC, Op_RegI, 17, i17->as_VMReg()); +reg_def A1_H (SOC, SOC, Op_RegI, 17, i17->as_VMReg()->next()); +reg_def A2 (SOC, SOC, Op_RegI, 18, i18->as_VMReg()); +reg_def A2_H (SOC, SOC, Op_RegI, 18, i18->as_VMReg()->next()); +reg_def A3 (SOC, SOC, Op_RegI, 19, i19->as_VMReg()); +reg_def A3_H (SOC, SOC, Op_RegI, 19, i19->as_VMReg()->next()); +reg_def A4 (SOC, SOC, Op_RegI, 20, i20->as_VMReg()); +reg_def A4_H (SOC, SOC, Op_RegI, 20, i20->as_VMReg()->next()); +reg_def A5 (SOC, SOC, Op_RegI, 21, i21->as_VMReg()); +reg_def A5_H (SOC, SOC, Op_RegI, 21, i21->as_VMReg()->next()); + +reg_def T8 (SOC, SOC, Op_RegI, 22, i22->as_VMReg()); +reg_def T8_H (SOC, SOC, Op_RegI, 22, i22->as_VMReg()->next()); +reg_def T9 (SOC, SOC, Op_RegI, 23, i23->as_VMReg()); +reg_def T9_H (SOC, SOC, Op_RegI, 23, i23->as_VMReg()->next()); +reg_def T10 (SOC, SOC, Op_RegI, 24, i24->as_VMReg()); +reg_def T10_H (SOC, SOC, Op_RegI, 24, i24->as_VMReg()->next()); +reg_def T11 (SOC, SOC, Op_RegI, 25, i25->as_VMReg()); +reg_def T11_H (SOC, SOC, Op_RegI, 25, i25->as_VMReg()->next()); +reg_def RA ( NS, NS, Op_RegI, 26, i26->as_VMReg()); +reg_def RA_H ( NS, NS, Op_RegI, 26, i26->as_VMReg()->next()); +reg_def T12 (SOC, SOC, Op_RegI, 27, i27->as_VMReg()); +reg_def T12_H (SOC, SOC, Op_RegI, 27, i27->as_VMReg()->next()); +reg_def AT ( NS, NS, Op_RegI, 28, i28->as_VMReg()); +reg_def AT_H ( NS, NS, Op_RegI, 28, i28->as_VMReg()->next()); +reg_def GP ( NS, NS, Op_RegI, 29, i29->as_VMReg()); +reg_def GP_H ( NS, NS, Op_RegI, 29, i29->as_VMReg()->next()); +reg_def SP ( NS, NS, Op_RegI, 30, i30->as_VMReg()); +reg_def SP_H ( NS, NS, Op_RegI, 30, i30->as_VMReg()->next()); +reg_def R0 ( NS, NS, Op_RegI, 31, VMRegImpl::Bad()); + +// Floating registers. +reg_def F0 ( SOC, SOC, Op_RegF, 0, f0->as_VMReg()); +reg_def F0_H ( SOC, SOC, Op_RegF, 0, f0->as_VMReg()->next()); +reg_def F1 ( SOC, SOC, Op_RegF, 1, f1->as_VMReg()); +reg_def F1_H ( SOC, SOC, Op_RegF, 1, f1->as_VMReg()->next()); +reg_def F2 ( SOC, SOC, Op_RegF, 2, f2->as_VMReg()); +reg_def F2_H ( SOC, SOC, Op_RegF, 2, f2->as_VMReg()->next()); +reg_def F3 ( SOC, SOC, Op_RegF, 3, f3->as_VMReg()); +reg_def F3_H ( SOC, SOC, Op_RegF, 3, f3->as_VMReg()->next()); +reg_def F4 ( SOC, SOC, Op_RegF, 4, f4->as_VMReg()); +reg_def F4_H ( SOC, SOC, Op_RegF, 4, f4->as_VMReg()->next()); +reg_def F5 ( SOC, SOC, Op_RegF, 5, f5->as_VMReg()); +reg_def F5_H ( SOC, SOC, Op_RegF, 5, f5->as_VMReg()->next()); +reg_def F6 ( SOC, SOC, Op_RegF, 6, f6->as_VMReg()); +reg_def F6_H ( SOC, SOC, Op_RegF, 6, f6->as_VMReg()->next()); +reg_def F7 ( SOC, SOC, Op_RegF, 7, f7->as_VMReg()); +reg_def F7_H ( SOC, SOC, Op_RegF, 7, f7->as_VMReg()->next()); +reg_def F8 ( SOC, SOC, Op_RegF, 8, f8->as_VMReg()); +reg_def F8_H ( SOC, SOC, Op_RegF, 8, f8->as_VMReg()->next()); +reg_def F9 ( SOC, SOC, Op_RegF, 9, f9->as_VMReg()); +reg_def F9_H ( SOC, SOC, Op_RegF, 9, f9->as_VMReg()->next()); +reg_def F10 ( SOC, SOC, Op_RegF, 10, f10->as_VMReg()); +reg_def F10_H ( SOC, SOC, Op_RegF, 10, f10->as_VMReg()->next()); +reg_def F11 ( SOC, SOC, Op_RegF, 11, f11->as_VMReg()); +reg_def F11_H ( SOC, SOC, Op_RegF, 11, f11->as_VMReg()->next()); +reg_def F12 ( SOC, SOC, Op_RegF, 12, f12->as_VMReg()); +reg_def F12_H ( SOC, SOC, Op_RegF, 12, f12->as_VMReg()->next()); +reg_def F13 ( SOC, SOC, Op_RegF, 13, f13->as_VMReg()); +reg_def F13_H ( SOC, SOC, Op_RegF, 13, f13->as_VMReg()->next()); +reg_def F14 ( SOC, SOC, Op_RegF, 14, f14->as_VMReg()); +reg_def F14_H ( SOC, SOC, Op_RegF, 14, f14->as_VMReg()->next()); +reg_def F15 ( SOC, SOC, Op_RegF, 15, f15->as_VMReg()); +reg_def F15_H ( SOC, SOC, Op_RegF, 15, f15->as_VMReg()->next()); +reg_def F16 ( SOC, SOC, Op_RegF, 16, f16->as_VMReg()); +reg_def F16_H ( SOC, SOC, Op_RegF, 16, f16->as_VMReg()->next()); +reg_def F17 ( SOC, SOC, Op_RegF, 17, f17->as_VMReg()); +reg_def F17_H ( SOC, SOC, Op_RegF, 17, f17->as_VMReg()->next()); +reg_def F18 ( SOC, SOC, Op_RegF, 18, f18->as_VMReg()); +reg_def F18_H ( SOC, SOC, Op_RegF, 18, f18->as_VMReg()->next()); +reg_def F19 ( SOC, SOC, Op_RegF, 19, f19->as_VMReg()); +reg_def F19_H ( SOC, SOC, Op_RegF, 19, f19->as_VMReg()->next()); +reg_def F20 ( SOC, SOC, Op_RegF, 20, f20->as_VMReg()); +reg_def F20_H ( SOC, SOC, Op_RegF, 20, f20->as_VMReg()->next()); +reg_def F21 ( SOC, SOC, Op_RegF, 21, f21->as_VMReg()); +reg_def F21_H ( SOC, SOC, Op_RegF, 21, f21->as_VMReg()->next()); +reg_def F22 ( SOC, SOC, Op_RegF, 22, f22->as_VMReg()); +reg_def F22_H ( SOC, SOC, Op_RegF, 22, f22->as_VMReg()->next()); +reg_def F23 ( SOC, SOC, Op_RegF, 23, f23->as_VMReg()); +reg_def F23_H ( SOC, SOC, Op_RegF, 23, f23->as_VMReg()->next()); +reg_def F24 ( SOC, SOC, Op_RegF, 24, f24->as_VMReg()); +reg_def F24_H ( SOC, SOC, Op_RegF, 24, f24->as_VMReg()->next()); +reg_def F25 ( SOC, SOC, Op_RegF, 25, f25->as_VMReg()); +reg_def F25_H ( SOC, SOC, Op_RegF, 25, f25->as_VMReg()->next()); +reg_def F26 ( SOC, SOC, Op_RegF, 26, f26->as_VMReg()); +reg_def F26_H ( SOC, SOC, Op_RegF, 26, f26->as_VMReg()->next()); +reg_def F27 ( SOC, SOC, Op_RegF, 27, f27->as_VMReg()); +reg_def F27_H ( SOC, SOC, Op_RegF, 27, f27->as_VMReg()->next()); +reg_def F28 ( SOC, SOC, Op_RegF, 28, f28->as_VMReg()); +reg_def F28_H ( SOC, SOC, Op_RegF, 28, f28->as_VMReg()->next()); +reg_def F29 ( SOC, SOC, Op_RegF, 29, f29->as_VMReg()); +reg_def F29_H ( SOC, SOC, Op_RegF, 29, f29->as_VMReg()->next()); +reg_def F30 ( SOC, SOC, Op_RegF, 30, f30->as_VMReg()); +reg_def F30_H ( SOC, SOC, Op_RegF, 30, f30->as_VMReg()->next()); +reg_def F31 ( SOC, SOC, Op_RegF, 31, f31->as_VMReg()); +reg_def F31_H ( SOC, SOC, Op_RegF, 31, f31->as_VMReg()->next()); + + +// ---------------------------- +// Special Registers +// Condition Codes Flag Registers +// swjdk11 flag reg is GP +reg_def SW64_FLAG (SOC, SOC, Op_RegFlags, 29, as_Register(29)->as_VMReg()); + +//S2 is used for get_thread(S2) +//S5 is uesd for heapbase of compressed oop +alloc_class chunk0( + S0, S0_H, + S1, S1_H, + S3, S3_H, + S4, S4_H, + S5, S5_H, + S2, S2_H, + T2, T2_H, + T3, T3_H, + //T11, T11_H, jzy use rscratch3 + T12, T12_H, + T1, T1_H, // inline_cache_reg + A5, A5_H, + A4, A4_H, + V0, V0_H, + A3, A3_H, + A2, A2_H, + A1, A1_H, + A0, A0_H, + T0, T0_H, + T4, T4_H, + T5, T5_H, + T6, T6_H, + T7, T7_H, + T8, T8_H, + T9, T9_H, + T10, T10_H, + GP, GP_H, + RA, RA_H, + AT, AT_H, + SP, SP_H, // stack_pointer + FP, FP_H // frame_pointer + ); + +alloc_class chunk1( F0, F0_H, + F1, F1_H, + F2, F2_H, + F3, F3_H, + F4, F4_H, + F5, F5_H, + F6, F6_H, + F7, F7_H, + F8, F8_H, + F9, F9_H, + F10, F10_H, + F11, F11_H, + F20, F20_H, + F21, F21_H, + F22, F22_H, + F23, F23_H, + F24, F24_H, + F25, F25_H, + F26, F26_H, + F27, F27_H, + F28, F28_H, + F19, F19_H, + F18, F18_H, + F17, F17_H, + F16, F16_H, + F15, F15_H, + F14, F14_H, + F13, F13_H, + F12, F12_H, + F29, F29_H, + F30, F30_H, + F31, F31_H); + +alloc_class chunk2(SW64_FLAG); + +reg_class s_reg( S0, S1, S2, S3, S4, S5 ); +reg_class s0_reg( S0 ); +reg_class s1_reg( S1 ); +reg_class s2_reg( S2 ); +reg_class s3_reg( S3 ); +reg_class s4_reg( S4 ); +reg_class s5_reg( S5 ); + +//reg_class t_reg( T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12 ); //jzy +reg_class t_reg( T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T12 ); +reg_class t0_reg( T0 ); +reg_class t1_reg( T1 ); +reg_class t2_reg( T2 ); +reg_class t3_reg( T3 ); +reg_class t4_reg( T4 ); +reg_class t5_reg( T5 ); +reg_class t6_reg( T6 ); +reg_class t7_reg( T7 ); +reg_class t8_reg( T8 ); +reg_class t9_reg( T9 ); +reg_class t10_reg( T10 ); +//reg_class t11_reg( T11 ); +reg_class t12_reg( T12 ); + +reg_class a_reg( A0, A1, A2, A3, A4, A5 ); +reg_class a0_reg( A0 ); +reg_class a1_reg( A1 ); +reg_class a2_reg( A2 ); +reg_class a3_reg( A3 ); +reg_class a4_reg( A4 ); +reg_class a5_reg( A5 ); + +reg_class v0_reg( V0 ); + +reg_class sp_reg( SP, SP_H ); +reg_class fp_reg( FP, FP_H ); + +reg_class sw64_flags(SW64_FLAG); + +reg_class v0_long_reg( V0, V0_H ); + +reg_class t0_long_reg( T0, T0_H ); +reg_class t1_long_reg( T1, T1_H ); +reg_class t2_long_reg( T2, T2_H ); +reg_class t3_long_reg( T3, T3_H ); +reg_class t4_long_reg( T4, T4_H ); +reg_class t5_long_reg( T5, T5_H ); +reg_class t6_long_reg( T6, T6_H ); +reg_class t7_long_reg( T7, T7_H ); +reg_class t8_long_reg( T8, T8_H ); +reg_class t9_long_reg( T9, T9_H ); +reg_class t10_long_reg( T10, T10_H ); +//reg_class t11_long_reg( T11, T11_H ); jzy +reg_class t12_long_reg( T12, T12_H ); + +reg_class a0_long_reg( A0, A0_H ); +reg_class a1_long_reg( A1, A1_H ); +reg_class a2_long_reg( A2, A2_H ); +reg_class a3_long_reg( A3, A3_H ); +reg_class a4_long_reg( A4, A4_H ); +reg_class a5_long_reg( A5, A5_H ); + +reg_class s0_long_reg( S0, S0_H ); +reg_class s1_long_reg( S1, S1_H ); +reg_class s2_long_reg( S2, S2_H ); +reg_class s3_long_reg( S3, S3_H ); +reg_class s4_long_reg( S4, S4_H ); +reg_class s5_long_reg( S5, S5_H ); + +//TODO:order is OK? jzy +//TODO:no S2 & S5 jzy +//NO T12? +//reg_class int_reg( S1, S0, S4, S3, T11, T2, T3, T1, A5, A4, V0, A3, A2, A1, A0, T0, T4, T5, T6, T7, T8, T9, T10 ); jzy +reg_class int_reg( S1, S0, S4, S3, T2, T3, T1, A5, A4, V0, A3, A2, A1, A0, T0, T4, T5, T6, T7, T8, T9, T10 ); +//TODO:no S2 & S5 jzy +//NO T12? +//reg_class no_Ax_int_reg( S1, S0, S4, S3, T11, T2, T3, T1, V0, T0, T4, T5, T6, T7, T8, T9, T10 ); jzy +reg_class no_Ax_int_reg( S1, S0, S4, S3, T2, T3, T1, V0, T0, T4, T5, T6, T7, T8, T9, T10 ); +//TODO: no S2 & S5 +reg_class any_reg( // without FP + S1, S1_H, + S0, S0_H, + S4, S4_H, + S3, S3_H, + T11, T11_H, + T2, T2_H, + T3, T3_H, + T1, T1_H, + A5, A5_H, + A4, A4_H, + A3, A3_H, + A2, A2_H, + A1, A1_H, + A0, A0_H, + T0, T0_H, + T4, T4_H, + T5, T5_H, + T6, T6_H, + T7, T7_H, + T8, T8_H, + T9, T9_H, + T10, T10_H, + S2, S2_H, // TLS thread + S5, S5_H, // heapbase + V0, V0_H, + RA, RA_H, + T12, T12_H, + AT, AT_H, + GP, GP_H, + SP, SP_H, +); + +reg_class ptr_reg( + S1, S1_H, + S0, S0_H, + S4, S4_H, + S3, S3_H, + //T11, T11_H, jzy + T2, T2_H, + T3, T3_H, + T1, T1_H, + A5, A5_H, + A4, A4_H, + A3, A3_H, + A2, A2_H, + A1, A1_H, + A0, A0_H, + T0, T0_H, + T4, T4_H, + T5, T5_H, + T6, T6_H, + T7, T7_H, + T8, T8_H, + T9, T9_H, + T10, T10_H, + V0, V0_H + ); +//TODO:who no T11? what perpose of T11? jzy +reg_class no_T11_p_reg( + S1, S1_H, + S0, S0_H, + S4, S4_H, + S3, S3_H, + T2, T2_H, + T3, T3_H, + T1, T1_H, + A5, A5_H, + A4, A4_H, + A3, A3_H, + A2, A2_H, + A1, A1_H, + A0, A0_H, + T0, T0_H, + T4, T4_H, + T5, T5_H, + T6, T6_H, + T7, T7_H, + T8, T8_H, + T9, T9_H, + T10, T10_H, + V0, V0_H + ); + +reg_class long_reg( + S1, S1_H, + S0, S0_H, + S4, S4_H, + S3, S3_H, + //T11, T11_H, jzy + T2, T2_H, + T3, T3_H, + T1, T1_H, + A5, A5_H, + A4, A4_H, + A3, A3_H, + A2, A2_H, + A1, A1_H, + A0, A0_H, + T0, T0_H, + T4, T4_H, + T5, T5_H, + T6, T6_H, + T7, T7_H, + T8, T8_H, + T9, T9_H, + T10, T10_H, + V0, V0_H + ); + + +// Floating point registers. +//2017/9/6 zyh: F28&F29 are used as temporary registers in float cmp instructs +reg_class flt_reg( F0, F1, F2, F3, F4, F5, F6, F7, F8, F9, F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, F20, F21, F22, F23, F24, F25, F26); +reg_class dbl_reg( F0, F0_H, + F1, F1_H, + F2, F2_H, + F3, F3_H, + F4, F4_H, + F5, F5_H, + F6, F6_H, + F7, F7_H, + F8, F8_H, + F9, F9_H, + F10, F10_H, + F11, F11_H, + F12, F12_H, + F13, F13_H, + F14, F14_H, + F15, F15_H, + F16, F16_H, + F17, F17_H, + F18, F18_H, + F19, F19_H, + F20, F20_H, + F21, F21_H, + F22, F22_H, + F23, F23_H, + F24, F24_H, + F25, F25_H, + F26, F26_H, +// F27, F27_H, +// F28, F28_H, +// F29, F29_H + ); + +reg_class flt_arg0( F16 ); +reg_class dbl_arg0( F16, F16_H ); +reg_class dbl_arg1( F17, F17_H ); +reg_class dbl_tmp_f27( F27, F27_H ); +reg_class dbl_tmp_f28( F28, F28_H ); +reg_class dbl_tmp_f29( F29, F29_H ); +reg_class dbl_tmp_f30( F30, F30_H ); + +%} + +//----------DEFINITION BLOCK--------------------------------------------------- +// Define name --> value mappings to inform the ADLC of an integer valued name +// Current support includes integer values in the range [0, 0x7FFFFFFF] +// Format: +// int_def ( , ); +// Generated Code in ad_.hpp +// #define () +// // value == +// Generated code in ad_.cpp adlc_verification() +// assert( == , "Expect () to equal "); +// +definitions %{ + //int_def DEFAULT_COST ( 100, 100); + int_def HUGE_COST (1000000, 1000000); + int_def INSN_COST ( 100, 100); + + // Memory refs are twice as expensive as run-of-the-mill. + int_def MEMORY_REF_COST ( 200, INSN_COST * 2); + // Branches are even more expensive. + int_def BRANCH_COST ( 300, INSN_COST * 3); + // we use jr instruction to construct call, so more expensive + int_def CALL_COST ( 500, INSN_COST * 5); + int_def VOLATILE_REF_COST ( 1000, INSN_COST * 10); //not in 8?? CHECK djx +%} + + +//----------SOURCE BLOCK------------------------------------------------------- +// This is a block of C++ code which provides values, functions, and +// definitions necessary in the rest of the architecture description + +source_hpp %{ +// Header information of the source block. +// Method declarations/definitions which are used outside +// the ad-scope can conveniently be defined here. +// +// To keep related declarations/definitions/uses close together, +// we switch between source %{ }% and source_hpp %{ }% freely as needed. + +#if INCLUDE_ZGC +#include "gc/z/zBarrierSetAssembler.hpp" +#endif +#include "opto/machnode.hpp" +#include "asm/macroAssembler.hpp" +#include "gc/shared/cardTable.hpp" +#include "gc/shared/cardTableBarrierSet.hpp" +#include "gc/shared/collectedHeap.hpp" +#include "opto/addnode.hpp" +#include "runtime/vm_version.hpp" +#include "utilities/powerOfTwo.hpp" + +class NativeJump; + +class CallStubImpl { + + //-------------------------------------------------------------- + //---< Used for optimization in Compile::shorten_branches >--- + //-------------------------------------------------------------- + + public: + // Size of call trampoline stub. + static uint size_call_trampoline() { + return 0; // no call trampolines on this platform + } + + // number of relocations needed by a call trampoline stub + static uint reloc_call_trampoline() { + return 0; // no call trampolines on this platform + } +}; + +class HandlerImpl { + + public: + + static int emit_exception_handler(CodeBuffer &cbuf); + static int emit_deopt_handler(CodeBuffer& cbuf); + + static uint size_exception_handler() { + // NativeCall instruction size is the same as NativeJump. + // exception handler starts out as jump and can be patched to + // a call be deoptimization. (4932387) + // Note that this value is also credited (in output.cpp) to + // the size of the code section. + int size = NativeJump::instruction_size; +// int size = NativeCall::instruction_size; + return align_up(size, 16);//need align_up? jzy + } + + static uint size_deopt_handler() { + int size = NativeCall::instruction_size; // BytesPerInstWord; //li48(4) + call(1) + return align_up(size, 16);//need align_up? jzy + } + +}; + +class Node::PD { + public: + enum NodeFlags { + _last_flag = Node::_last_flag + }; +}; + bool is_CAS(int opcode); + bool unnecessary_release(const Node *barrier); + +%} // end source_hpp + +source %{ + + // Derived RegMask with conditionally allocatable registers + + void PhaseOutput::pd_perform_mach_node_analysis() { + } + + int MachNode::pd_alignment_required() const { + return 1; + } + + int MachNode::compute_padding(int current_offset) const { + return 0; + } + + +#define __ _masm. + +#ifdef PRODUCT +#define BLOCK_COMMENT(str) /* nothing */ +#else +#define BLOCK_COMMENT(str) { char line[1024];sprintf(line,"%s:%s:%d",str,__FILE__, __LINE__); __ block_comment(line);} +#endif + +#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") + + +// Emit exception handler code. +// Stuff framesize into a register and call a VM stub routine. + int HandlerImpl::emit_exception_handler(CodeBuffer & cbuf) { + // Note that the code buffer's insts_mark is always relative to insts. + // That's why we must use the macroassembler to generate a handler. + C2_MacroAssembler _masm(&cbuf); + address base = __ start_a_stub(size_exception_handler()); + if (base == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); + return 0; // CodeBuffer::expand failed + } + + int offset = __ offset(); + + __ block_comment("; emit_exception_handler"); + + //cbuf.set_insts_mark(); + //__ relocate(relocInfo::runtime_call_type); + //__ patchable_jump((address)(OptoRuntime::exception_blob()->entry_point())); + __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); + __ align(16); + assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); + __ end_a_stub(); + return offset; + // return 0; + } + +// Emit deopt handler code. +int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { + + // Note that the code buffer's insts_mark is always relative to insts. + // That's why we must use the macroassembler to generate a handler. + C2_MacroAssembler _masm(&cbuf); + address base = __ start_a_stub(size_deopt_handler()); + if (base == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); + return 0; // CodeBuffer::expand failed + } + //__ stop("TODO:not check jzy(emit_deopt_handler)"); + int offset = __ offset(); + address the_pc = (address) __ pc(); + __ block_comment("; emit_deopt_handler"); + __ br(RA, 0); + __ subptr(RA, BytesPerInstWord, RA);//point to the begin of deopt + __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); + __ align(16); + assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset)); + __ end_a_stub(); + return offset; +} + + +//============================================================================= + +/* + // Float masks come from different places depending on platform. +#ifdef _LP64 + static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } + static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } + static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } + static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } +#else + static address float_signmask() { return (address)float_signmask_pool; } + static address float_signflip() { return (address)float_signflip_pool; } + static address double_signmask() { return (address)double_signmask_pool; } + static address double_signflip() { return (address)double_signflip_pool; } +#endif +*/ + + +const bool Matcher::match_rule_supported(int opcode) { + if (!has_match_rule(opcode)) + return false; + + switch (opcode) { + //Op_CountLeadingZerosI Op_CountLeadingZerosL can be deleted, all MIPS CPUs support clz & dclz. + case Op_CountLeadingZerosI: + case Op_CountLeadingZerosL: + if (!UseCountLeadingZerosInstruction) + return false; + break; + case Op_CountTrailingZerosI: + case Op_CountTrailingZerosL: + if (!UseCountTrailingZerosInstruction) + return false; + break; + } + + return true; // Per default match rules are supported. +} + + const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { + // identify extra cases that we might want to provide match rules for + // e.g. Op_ vector nodes and other intrinsics while guarding with vlen + if (!match_rule_supported(opcode)) { + return false; + } + bool ret_value = match_rule_supported(opcode); + /*if (ret_value) { + switch (opcode) { + case Op_AddVB: + case Op_SubVB: + if ((vlen == 64) && (VM_Version::supports_avx512bw() == false)) + ret_value = false; + break; + case Op_URShiftVS: + case Op_RShiftVS: + case Op_LShiftVS: + case Op_MulVS: + case Op_AddVS: + case Op_SubVS: + if ((vlen == 32) && (VM_Version::supports_avx512bw() == false)) + ret_value = false; + break; + case Op_CMoveVF: + if (vlen != 8) + ret_value = false; + break; + case Op_CMoveVD: + if (vlen != 4) + ret_value = false; + break; + } + }*/ + + return ret_value; // Per default match rules are supported. +} + +const RegMask* Matcher::predicate_reg_mask(void) { + return NULL; +} + +const TypeVect* Matcher::predicate_reg_type(const Type* elemTy, int length) { + return NULL; +} + +MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* original_opnd, uint ideal_reg, bool is_temp) { + ShouldNotReachHere(); // generic vector operands not supported + return NULL; +} + +bool Matcher::is_generic_reg2reg_move(MachNode* m) { + ShouldNotReachHere(); // generic vector operands not supported + return false; +} + +bool Matcher::is_generic_vector(MachOper* opnd) { + ShouldNotReachHere(); // generic vector operands not supported + return false; +} + +OptoRegPair Matcher::vector_return_value(uint ideal_reg) { + ShouldNotReachHere(); + return OptoRegPair(0, 0); +} + +const int Matcher::float_pressure(int default_pressure_threshold) { + int float_pressure_threshold = default_pressure_threshold; + /* +#ifdef _LP64 + if (UseAVX > 2) { + // Increase pressure threshold on machines with AVX3 which have + // 2x more XMM registers. + float_pressure_threshold = default_pressure_threshold * 2; + } +#endif + */ + return float_pressure_threshold; +} + +// Max vector size in bytes. 0 if not supported. +const int Matcher::vector_width_in_bytes(BasicType bt) { + /*assert(is_java_primitive(bt), "only primitive type vectors"); + if (UseSSE < 2) return 0; + // SSE2 supports 128bit vectors for all types. + // AVX2 supports 256bit vectors for all types. + // AVX2/EVEX supports 512bit vectors for all types. + int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; + // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. + if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) + size = (UseAVX > 2) ? 64 : 32; + if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR)) + size = (VM_Version::supports_avx512bw()) ? 64 : 32; + // Use flag to limit vector size. + size = MIN2(size,(int)MaxVectorSize); + // Minimum 2 values in vector (or 4 for bytes). + switch (bt) { + case T_DOUBLE: + case T_LONG: + if (size < 16) return 0; + break; + case T_FLOAT: + case T_INT: + if (size < 8) return 0; + break; + case T_BOOLEAN: + if (size < 4) return 0; + break; + case T_CHAR: + if (size < 4) return 0; + break; + case T_BYTE: + if (size < 4) return 0; + break; + case T_SHORT: + if (size < 4) return 0; + break; + default: + ShouldNotReachHere(); + } + return size;*/ + return 0; +} + +// Limits on vector size (number of elements) loaded into vector. +const int Matcher::max_vector_size(const BasicType bt) { + assert(is_java_primitive(bt), "only primitive type vectors"); + return vector_width_in_bytes(bt)/type2aelembytes(bt); +} + +const int Matcher::min_vector_size(const BasicType bt) { + return max_vector_size(bt); // Same as max. +} + + +const int Matcher::scalable_vector_reg_size(const BasicType bt) { + return 0; +} + +// Vector ideal reg +const uint Matcher::vector_ideal_reg(int size) { + assert(MaxVectorSize == 8, ""); + switch(size) { + case 8: return Op_VecD; + } + ShouldNotReachHere(); + return 0; +} + + +// Should the Matcher clone shifts on addressing modes, expecting them +// to be subsumed into complex addressing expressions or compute them +// into registers? +bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { + return clone_base_plus_offset_address(m, mstack, address_visited); +} + + +// Should the matcher clone input 'm' of node 'n'? +bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) { + + if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con) + mstack.push(m, Visit); // m = ShiftCntV + return true; + } + return false; +} + + + +/* +static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) { + switch (bt) { + switch (bt) { + case BoolTest::eq: + return Assembler::eq; + case BoolTest::ne: + return Assembler::neq; + case BoolTest::le: + case BoolTest::ule: + return Assembler::le; + case BoolTest::ge: + case BoolTest::uge: + return Assembler::nlt; + case BoolTest::lt: + case BoolTest::ult: + return Assembler::lt; + case BoolTest::gt: + case BoolTest::ugt: + return Assembler::nle; + default : ShouldNotReachHere(); return Assembler::_false; + } +} + +static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) { + switch (bt) { + case BoolTest::eq: return Assembler::EQ_OQ; // ordered non-signaling + // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. + case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling + case BoolTest::le: return Assembler::LE_OQ; // ordered non-signaling + case BoolTest::ge: return Assembler::GE_OQ; // ordered non-signaling + case BoolTest::lt: return Assembler::LT_OQ; // ordered non-signaling + case BoolTest::gt: return Assembler::GT_OQ; // ordered non-signaling + default: ShouldNotReachHere(); return Assembler::FALSE_OS; + } +} +*/ + +/* +// Helper methods for MachSpillCopyNode::implementation(). +static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo, + int src_hi, int dst_hi, uint ireg, outputStream* st) { + // In 64-bit VM size calculation is very complex. Emitting instructions + // into scratch buffer is used to get size in 64-bit VM. + /* + + assert(ireg == Op_VecS || // 32bit vector + (src_lo & 1) == 0 && (src_lo + 1) == src_hi && + (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, + "no non-adjacent vector moves" ); + if (cbuf) { + C2_MacroAssembler _masm(cbuf); + switch (ireg) { + case Op_VecS: // copy whole register + case Op_VecD: + case Op_VecX: +#ifndef _LP64 + __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); +#else + if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { + __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); + } else { + __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); + } +#endif + break; + case Op_VecY: +#ifndef _LP64 + __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); +#else + if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { + __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); + } else { + __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); + } +#endif + break; + case Op_VecZ: + __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); + break; + default: + ShouldNotReachHere(); + } +#ifndef PRODUCT + } else { + switch (ireg) { + case Op_VecS: + case Op_VecD: + case Op_VecX: + st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); + break; + case Op_VecY: + case Op_VecZ: + st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); + break; + default: + ShouldNotReachHere(); + } +#endif + } + +} + +void int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, + int stack_offset, int reg, uint ireg, outputStream* st) { + // In 64-bit VM size calculation is very complex. Emitting instructions + // into scratch buffer is used to get size in 64-bit VM. + /* + LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) + if (cbuf) { + C2_MacroAssembler _masm(cbuf); + int offset = __ offset(); + if (is_load) { + switch (ireg) { + case Op_VecS: + __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); + break; + case Op_VecD: + __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); + break; + case Op_VecX: +#ifndef _LP64 + __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); +#else + if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { + __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); + } else { + __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); + __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); + } +#endif + break; + case Op_VecY: +#ifndef _LP64 + __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); +#else + if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { + __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); + } else { + __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); + __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); + } +#endif + break; + case Op_VecZ: + __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); + break; + default: + ShouldNotReachHere(); + } + } else { // store + switch (ireg) { + case Op_VecS: + __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); + break; + case Op_VecD: + __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); + break; + case Op_VecX: +#ifndef _LP64 + __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); +#else + if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { + __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); + } + else { + __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); + } +#endif + break; + case Op_VecY: +#ifndef _LP64 + __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); +#else + if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { + __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); + } + else { + __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); + } +#endif + break; + case Op_VecZ: + __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); + break; + default: + ShouldNotReachHere(); + } + } + int size = __ offset() - offset; +#ifdef ASSERT + int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4); + // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. + assert(!do_size || size == (5+offset_size), "incorrect size calculattion"); +#endif + return size; +#ifndef PRODUCT + } else if (!do_size) { + if (is_load) { + switch (ireg) { + case Op_VecS: + st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); + break; + case Op_VecD: + st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); + break; + case Op_VecX: + st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); + break; + case Op_VecY: + case Op_VecZ: + st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); + break; + default: + ShouldNotReachHere(); + } + } else { // store + switch (ireg) { + case Op_VecS: + st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); + break; + case Op_VecD: + st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); + break; + case Op_VecX: + st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); + break; + case Op_VecY: + case Op_VecZ: + st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); + break; + default: + ShouldNotReachHere(); + } + } +#endif + } + bool is_single_byte = false; + int vec_len = 0; + if ((UseAVX > 2) && (stack_offset != 0)) { + int tuple_type = Assembler::EVEX_FVM; + int input_size = Assembler::EVEX_32bit; + switch (ireg) { + case Op_VecS: + tuple_type = Assembler::EVEX_T1S; + break; + case Op_VecD: + tuple_type = Assembler::EVEX_T1S; + input_size = Assembler::EVEX_64bit; + break; + case Op_VecX: + break; + case Op_VecY: + vec_len = 1; + break; + case Op_VecZ: + vec_len = 2; + break; + } + is_single_byte = Assembler::query_compressed_disp_byte(stack_offset, true, vec_len, tuple_type, input_size, 0); + } + int offset_size = 0; + int size = 5; + if (UseAVX > 2 ) { + if (VM_Version::supports_avx512novl() && (vec_len == 2)) { + offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); + size += 2; // Need an additional two bytes for EVEX encoding + } else if (VM_Version::supports_avx512novl() && (vec_len < 2)) { + offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); + } else { + offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); + size += 2; // Need an additional two bytes for EVEX encodding + } + } else { + offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); + } + // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. + return size+offset_size; + return 0; +} +*/ +static inline jint replicate4_imm(int con, int width) { + // Load a constant of "width" (in bytes) and replicate it to fill 32bit. + assert(width == 1 || width == 2, "only byte or short types here"); + int bit_width = width * 8; + jint val = con; + val &= (1 << bit_width) - 1; // mask off sign bits + while(bit_width < 32) { + val |= (val << bit_width); + bit_width <<= 1; + } + return val; +} + +static inline jlong replicate8_imm(int con, int width) { + // Load a constant of "width" (in bytes) and replicate it to fill 64bit. + assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here"); + int bit_width = width * 8; + jlong val = con; + val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits + while(bit_width < 64) { + val |= (val << bit_width); + bit_width <<= 1; + } + return val; +} +//============================================================================= +#ifndef PRODUCT +void MachNopNode::format( PhaseRegAlloc *, outputStream* st ) const { + st->print("NOP \t# %d bytes pad for loops and calls", 4 * _count); +} +#endif + +void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc * ) const { + C2_MacroAssembler _masm(&cbuf); + int i = 0; + for(i = 0; i < _count; i++) + __ nop(); +} + +uint MachNopNode::size(PhaseRegAlloc *) const { + return 4 * _count; +} + +const Pipeline* MachNopNode::pipeline() const { + return MachNode::pipeline_class(); +} + +#ifndef PRODUCT + void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { + st->print("# breakpoint"); + } +#endif + + void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { + C2_MacroAssembler _masm(&cbuf); + //__ stop("breakpoint! ");// stop is ok ?? lsp + __ block_comment("execute breakpoint"); + __ int3(); + } + + uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { + return MachNode::size(ra_); + } + +#define RELOC_IMM64 Assembler::imm_operand +#define RELOC_DISP32 Assembler::disp32_operand +/* +#define __ _masm. + +static bool generate_vzeroupper(Compile* C) { + return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false; // Generate vzeroupper +} + +static int clear_avx_size() { + return generate_vzeroupper(Compile::current()) ? 3: 0; // vzeroupper +}*/ + +// !!!!! Special hack to get all types of calls to specify the byte offset +// from the start of the call to the point where the return address +// will point. +int MachCallStaticJavaNode::ret_addr_offset() +{ +// warning("TODO:MachCallStaticJavaNode::ret_addr_offset(), check lsp"); + if (SafePatch) { + assert(NativeCall::instruction_size == 24, "in MachCallStaticJavaNode::ret_addr_offset"); + } else { + assert(NativeCall::instruction_size == 20, "in MachCallStaticJavaNode::ret_addr_offset"); // don't consider setfpec1 + } + return NativeCall::instruction_size; +} + +int MachCallDynamicJavaNode::ret_addr_offset() +{ + //TODO:warning("TODO:MachCallDynamicJavaNode::ret_addr_offset(), check lsp"); + //ldi IC_Klass, + //sll IC_Klass, + //ldih IC_Klass + //ldi IC_Klass // refer to MacroAssembler::ic_call(address entry) + + //ldi T12 + //sll T12 + //ldih T12 + //ldi T12 + //call T12 + //nop + if (SafePatch) { + assert(NativeCall::instruction_size == 24, "in MachCallStaticJavaNode::ret_addr_offset"); + } else { + assert(NativeCall::instruction_size == 20, "in MachCallStaticJavaNode::ret_addr_offset"); // don't consider setfpec1 + } + return 4 * BytesPerInstWord + NativeCall::instruction_size; +} + +int MachCallRuntimeNode::ret_addr_offset() { + if (SafePatch) { + assert(NativeCall::instruction_size == 24, "in MachCallRuntimeNode::ret_addr_offset()"); + } else { +// warning("TODO:MachCallRuntimeNode::ret_addr_offset(), check lsp");// need adjust for enc_class Java_To_Runtime ? lsp + assert(NativeCall::instruction_size == 20, "in MachCallRuntimeNode::ret_addr_offset()"); + } + return 4 * BytesPerInstWord + NativeCall::instruction_size; // don't consider setfpec1 +} + +int MachCallNativeNode::ret_addr_offset() { + ShouldNotReachHere(); + return -1; +} + +// +// Compute padding required for nodes which need alignment +// + +// no use in sw8!! CHECK djx +// The address of the call instruction needs to be 4-byte aligned to +// ensure that it does not span a cache line so that it can be patched. +int CallStaticJavaDirectNode::compute_padding(int current_offset) const +{ +// warning("TODO:CallStaticJavaDirectNode::compute_padding, check lsp"); + return align_up(current_offset, alignment_required()) - current_offset; +// return 0; +} + +// The address of the call instruction needs to be 4-byte aligned to +// ensure that it does not span a cache line so that it can be patched. +int CallDynamicJavaDirectNode::compute_padding(int current_offset) const +{ +// warning("TODO:CallDynamicJavaDirectNode::compute_padding, check lsp"); + current_offset += 4 * BytesPerInstWord; //skip li48 + return align_up(current_offset, alignment_required()) - current_offset; +// return 0; +} + +//swjdk8 has it use for CallLeafNoFPDirect ins_alignment(16) lsp +//int CallLeafNoFPDirectNode::compute_padding(int current_offset) const { +// return round_to(current_offset, alignment_required()) - current_offset; +//} +// +//use for CallRuntimeDirect ins_alignment(16) +//int CallLeafDirectNode::compute_padding(int current_offset) const { +// return round_to(current_offset, alignment_required()) - current_offset; +//} +// +// use for CallRuntimeDirect ins_alignment(16) +//int CallRuntimeDirectNode::compute_padding(int current_offset) const { +// return round_to(current_offset, alignment_required()) - current_offset; +//} + +//============================================================================= +const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; + +int ConstantTable::calculate_table_base_offset() const { + return 0; // absolute addressing, no offset +} + +bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } +void MachConstantBaseNode::postalloc_expand(GrowableArray *nodes, PhaseRegAlloc *ra_) { + ShouldNotReachHere(); +} + +void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { + // Empty encoding + } + +uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { + return 0; +} + +#ifndef PRODUCT +void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { + st->print("# MachConstantBaseNode (empty encoding)"); +} +#endif + + +//============================================================================= +#ifndef PRODUCT +void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { + Compile* C = ra_->C; + + int framesize = C->output()->frame_size_in_bytes(); + int bangsize = C->output()->bang_size_in_bytes(); + assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); + + // Calls to C2R adapters often do not accept exceptional returns. + // We require that their callers must bang for them. But be careful, because + // some VM calls (such as call site linkage) can use several kilobytes of0fc1ecaa6b7d + // stack. But the stack safety zone should account for that. + // See bugs 4446381, 4468289, 4497237.0fc1ecaa6b7d + if (C->output()->need_stack_bang(bangsize)) { + st->print_cr("# stack bang %d", bangsize); st->print("\t"); + } + st->print("\tstl ra, %d(esp) @ MachPrologNode\n\t", -wordSize); + st->print("\tstl rfp, %d(esp) \n\t", -wordSize*2); + + if (PreserveFramePointer) { + st->print("\tsubptr esp, %d, rfp \n\t", wordSize*2); + } + st->print("\tsubptr esp, %d, esp",framesize); +} +#endif + +void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + Compile* C = ra_->C; + C2_MacroAssembler _masm(&cbuf); + + int framesize = C->output()->frame_size_in_bytes(); + int bangsize = C->output()->bang_size_in_bytes(); + + if (C->clinit_barrier_on_entry()) { + assert(VM_Version::supports_fast_class_init_checks(), "sanity"); + assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started"); + + Label L_skip_barrier; + Register klass = rscratch1; + + __ mov_metadata(klass, C->method()->holder()->constant_encoding()); + __ clinit_barrier(klass, rthread, &L_skip_barrier /*L_fast_path*/); + + __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path + + __ bind(L_skip_barrier); + } + + assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); + + if (C->output()->need_stack_bang(bangsize)) { + __ generate_stack_overflow_check(bangsize); + + } + + if (PreserveFramePointer) + __ subptr(esp, wordSize*2, rfp); + + __ subptr(esp, framesize, esp); + __ stl(RA, framesize - wordSize, esp); + __ stl(rfp, framesize - wordSize*2, esp); + __ nop(); //Make enough room for patch_verified_entry() + __ nop(); + + C->output()->set_frame_complete(cbuf.insts_size()); + if (C->has_mach_constant_base_node()) { + // NOTE: We set the table base offset here because users might be + // emitted before MachConstantBaseNode. + ConstantTable& constant_table = C->output()->constant_table(); + constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); + } +} + +uint MachPrologNode::size(PhaseRegAlloc* ra_) const +{ + return MachNode::size(ra_); // too many variables; just compute it + // the hard way +} + +int MachPrologNode::reloc() const +{ + return 0; // a large enough number +} + +//============================================================================= +#ifndef PRODUCT +void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const +{ + Compile* C = ra_->C; +// if (generate_vzeroupper(C)) { +// st->print("vzeroupper"); +// st->cr(); st->print("\t"); +// } + int framesize = C->output()->frame_size_in_bytes(); + assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); +// // Remove word for return adr already pushed +// // and RBP +// framesize -= 2*wordSize; + + st->print("\tldl RA, %d, esp # Restore RA ", framesize - wordSize); + st->cr(); st->print("\t"); + st->print("\tldl rfp, %d, esp # Restore rfp ", framesize - wordSize*2); + st->cr(); st->print("\t"); + st->print("addptr esp, %d, esp # Rlease stack @ MachEpilogNode",framesize); + st->cr(); st->print("\t"); + + if (do_polling() && C->is_method_compilation()) { + st->print("# test polling word\n\t"); + st->print("ldr rscratch1, [rthread],#%d\n\t", in_bytes(JavaThread::polling_word_offset())); + st->print("cmp sp, rscratch1\n\t"); + st->print("bgt #slow_path"); + } +} +#endif + +void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const +{ + Compile* C = ra_->C; + C2_MacroAssembler _masm(&cbuf); + + int framesize = C->output()->frame_size_in_bytes(); + assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); + + // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here + + __ ldl(RA, framesize - wordSize, esp); + __ ldl(rfp, framesize - wordSize * 2, esp); + __ addptr(esp, framesize, esp); + + if (StackReservedPages > 0 && C->has_reserved_stack_access()) { + __ reserved_stack_check(); + } + + if (do_polling() && C->is_method_compilation()) { + Label dummy_label; + Label* code_stub = &dummy_label; + if (!C->output()->in_scratch_emit_size()) { + code_stub = &C->output()->safepoint_poll_table()->add_safepoint(__ offset()); + } + __ relocate(relocInfo::poll_return_type); + __ safepoint_poll(*code_stub, rthread, rscratch3, true, false, true); + } +} + +uint MachEpilogNode::size(PhaseRegAlloc* ra_) const +{ + return MachNode::size(ra_); // too many variables; just compute it + // the hard way +} + +int MachEpilogNode::reloc() const +{ +// tty->print_cr(">>>>MachEpilog"); while(1); + return 2; // a large enough number +} + +const Pipeline* MachEpilogNode::pipeline() const +{ + return MachNode::pipeline_class(); +} + +//============================================================================= + +enum RC { + rc_bad, + rc_int, + rc_float, + rc_stack +}; + +static enum RC rc_class(OptoReg::Name reg) +{ + if( !OptoReg::is_valid(reg) ) return rc_bad; + + if (OptoReg::is_stack(reg)) return rc_stack; + + VMReg r = OptoReg::as_VMReg(reg); + + if (r->is_Register()) return rc_int; + + assert(r->is_FloatRegister(), "must be"); + return rc_float; +} + +// Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad. +/* +static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo, + int src_hi, int dst_hi, uint ireg, outputStream* st); + + void vec_spill_helper(CodeBuffer *cbuf, bool is_load, + int stack_offset, int reg, uint ireg, outputStream* st); + +static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, + int stack_offset, int reg, uint ireg, outputStream* st);*/ + +static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset, + int dst_offset, uint ireg, outputStream* st) { + /*if (cbuf) { + C2_MacroAssembler _masm(cbuf); + switch (ireg) { + case Op_VecS: + __ movq(Address(rsp, -8), rax); + __ movl(rax, Address(rsp, src_offset)); + __ movl(Address(rsp, dst_offset), rax); + __ movq(rax, Address(rsp, -8)); + break; + case Op_VecD: + __ pushq(Address(rsp, src_offset)); + __ popq (Address(rsp, dst_offset)); + break; + case Op_VecX: + __ pushq(Address(rsp, src_offset)); + __ popq (Address(rsp, dst_offset)); + __ pushq(Address(rsp, src_offset+8)); + __ popq (Address(rsp, dst_offset+8)); + break; + case Op_VecY: + __ vmovdqu(Address(rsp, -32), xmm0); + __ vmovdqu(xmm0, Address(rsp, src_offset)); + __ vmovdqu(Address(rsp, dst_offset), xmm0); + __ vmovdqu(xmm0, Address(rsp, -32)); + break; + case Op_VecZ: + __ evmovdquq(Address(rsp, -64), xmm0, 2); + __ evmovdquq(xmm0, Address(rsp, src_offset), 2); + __ evmovdquq(Address(rsp, dst_offset), xmm0, 2); + __ evmovdquq(xmm0, Address(rsp, -64), 2); + break; + default: + ShouldNotReachHere(); + } +#ifndef PRODUCT + } else { + switch (ireg) { + case Op_VecS: + st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t" + "movl rax, [rsp + #%d]\n\t" + "movl [rsp + #%d], rax\n\t" + "movq rax, [rsp - #8]", + src_offset, dst_offset); + break; + case Op_VecD: + st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t" + "popq [rsp + #%d]", + src_offset, dst_offset); + break; + case Op_VecX: + st->print("pushq [rsp + #%d]\t# 128-bit mem-mem spill\n\t" + "popq [rsp + #%d]\n\t" + "pushq [rsp + #%d]\n\t" + "popq [rsp + #%d]", + src_offset, dst_offset, src_offset+8, dst_offset+8); + break; + case Op_VecY: + st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t" + "vmovdqu xmm0, [rsp + #%d]\n\t" + "vmovdqu [rsp + #%d], xmm0\n\t" + "vmovdqu xmm0, [rsp - #32]", + src_offset, dst_offset); + break; + case Op_VecZ: + st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t" + "vmovdqu xmm0, [rsp + #%d]\n\t" + "vmovdqu [rsp + #%d], xmm0\n\t" + "vmovdqu xmm0, [rsp - #64]", + src_offset, dst_offset); + break; + default: + ShouldNotReachHere(); + } +#endif + }*/ +} + +uint MachSpillCopyNode::implementation(CodeBuffer* cbuf, + PhaseRegAlloc* ra_, + bool do_size, + outputStream* st) const { + assert(cbuf != NULL || st != NULL, "sanity"); + // Get registers to move + OptoReg::Name src_second = ra_->get_reg_second(in(1)); + OptoReg::Name src_first = ra_->get_reg_first(in(1)); + OptoReg::Name dst_second = ra_->get_reg_second(this); + OptoReg::Name dst_first = ra_->get_reg_first(this); + + enum RC src_second_rc = rc_class(src_second); + enum RC src_first_rc = rc_class(src_first); + enum RC dst_second_rc = rc_class(dst_second); + enum RC dst_first_rc = rc_class(dst_first); + + assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), + "must move at least 1 register" ); + // Generate spill code! + int size = 0; + if (src_first == dst_first && src_second == dst_second) { + // Self copy, no move + return 0; + } + if (bottom_type()->isa_vect() != NULL) { + uint ireg = ideal_reg(); + assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity"); + assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity"); + if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { + // mem -> mem + int src_offset = ra_->reg2offset(src_first); + int dst_offset = ra_->reg2offset(dst_first); + vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st); + } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) { + //vec_mov_helper(cbuf, false, src_first, dst_first, src_second, dst_second, ireg, st); + } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) { + int stack_offset = ra_->reg2offset(dst_first); + //vec_spill_helper(cbuf, false, false, stack_offset, src_first, ireg, st); + } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) { + int stack_offset = ra_->reg2offset(src_first); + //vec_spill_helper(cbuf, false, true, stack_offset, dst_first, ireg, st); + } else { + ShouldNotReachHere(); + } + return 0; + } + if (src_first_rc == rc_stack) { + // mem -> + if (dst_first_rc == rc_stack) { + // mem -> mem + assert(src_second != dst_first, "overlap"); + if ((src_first & 1) == 0 && src_first + 1 == src_second && + (dst_first & 1) == 0 && dst_first + 1 == dst_second) { + // 64-bit + int src_offset = ra_->reg2offset(src_first); + int dst_offset = ra_->reg2offset(dst_first); + if (cbuf) { + C2_MacroAssembler _masm(cbuf); + __ ldl(rscratch3, Address(esp, src_offset)); + __ stl(rscratch3, Address(esp, dst_offset)); +#ifndef PRODUCT + } else { + if(!do_size){ + if (size != 0) st->print("\n\t"); + st->print("ldl rscratch3, [esp + #%d]\t# 64-bit mem-mem spill 1\n\t" + "stl rscratch3, [esp + #%d]", + src_offset, dst_offset); + } +#endif + } + size += 8; + } else { + // 32-bit + assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); + assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); + // No pushl/popl, so: + int src_offset = ra_->reg2offset(src_first); + int dst_offset = ra_->reg2offset(dst_first); + if (cbuf) { + C2_MacroAssembler _masm(cbuf); + __ ldw(rscratch3, Address(esp, src_offset)); + __ stw(rscratch3, Address(esp, dst_offset)); +#ifndef PRODUCT + } else { + if(!do_size){ + if (size != 0) st->print("\n\t"); + st->print("ldw rscratch3, [esp + #%d] spill 2\n\t" + "stw rscratch3, [esp + #%d]\n\t", + src_offset, dst_offset); + } +#endif + } + size += 8; + } + return size; + } else if (dst_first_rc == rc_int) { + // mem -> gpr + if ((src_first & 1) == 0 && src_first + 1 == src_second && + (dst_first & 1) == 0 && dst_first + 1 == dst_second) { + // 64-bit + int offset = ra_->reg2offset(src_first); + if (cbuf) { + C2_MacroAssembler _masm(cbuf); + __ ldl(as_Register(Matcher::_regEncode[dst_first]), Address(esp, offset)); +#ifndef PRODUCT + } else { + if(!do_size){ + if (size != 0) st->print("\n\t"); + st->print("ldl %s, [esp + #%d]\t# spill 3", + Matcher::regName[dst_first], + offset); + } +#endif + } + size += 4; + } else { + // 32-bit + assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); + assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); + int offset = ra_->reg2offset(src_first); + if (cbuf) { + C2_MacroAssembler _masm(cbuf); + if (this->ideal_reg() == Op_RegI) + __ ldw(as_Register(Matcher::_regEncode[dst_first]), Address(esp, offset)); + else + __ ldwu(as_Register(Matcher::_regEncode[dst_first]), Address(esp, offset)); +#ifndef PRODUCT + } else { + if(!do_size){ + if (size != 0) st->print("\n\t"); + if (this->ideal_reg() == Op_RegI) + st->print("ldw %s, [esp + #%d]\t# spill 4", + Matcher::regName[dst_first], + offset); + else + st->print("ldwu %s, [esp + #%d]\t# spill 5", + Matcher::regName[dst_first], + offset); + } +#endif + } + if (this->ideal_reg() == Op_RegI) { + size += 4; + } else { + size += 8; + } + } + return size; + } else if (dst_first_rc == rc_float) { + // mem-> xmm + if ((src_first & 1) == 0 && src_first + 1 == src_second && + (dst_first & 1) == 0 && dst_first + 1 == dst_second) { + // 64-bit + int offset = ra_->reg2offset(src_first); + if (cbuf) { + C2_MacroAssembler _masm(cbuf); + __ fldd( as_FloatRegister(Matcher::_regEncode[dst_first]), Address(esp, offset)); +#ifndef PRODUCT + } else { + if(!do_size){ + if (size != 0) st->print("\n\t"); + st->print("fldd %s, [esp + #%d]\t# spill 6", + Matcher::regName[dst_first], + offset); + } +#endif + } + size += 4; + } else { + // 32-bit + assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); + assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); + int offset = ra_->reg2offset(src_first); + if (cbuf) { + C2_MacroAssembler _masm(cbuf); + __ flds( as_FloatRegister(Matcher::_regEncode[dst_first]), Address(esp, offset)); +#ifndef PRODUCT + } else { + if(!do_size){ + if (size != 0) st->print("\n\t"); + st->print("flds %s, [esp + #%d]\t# spill 7", + Matcher::regName[dst_first], + offset); + } +#endif + } + size += 4; + } + return size; + } + } else if (src_first_rc == rc_int) { + // gpr -> + if (dst_first_rc == rc_stack) { + // gpr -> mem + if ((src_first & 1) == 0 && src_first + 1 == src_second && + (dst_first & 1) == 0 && dst_first + 1 == dst_second) { + // 64-bit + int offset = ra_->reg2offset(dst_first); + if (cbuf) { + C2_MacroAssembler _masm(cbuf); + __ stl(as_Register(Matcher::_regEncode[src_first]), Address(esp, offset)); +#ifndef PRODUCT + } else { + if(!do_size){ + if (size != 0) st->print("\n\t"); + st->print("stl %s, [esp + #%d] # spill 8", + Matcher::regName[src_first], + offset); + } +#endif + } + size += 4; + } else { + // 32-bit + assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); + assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); + int offset = ra_->reg2offset(dst_first); + if (cbuf) { + C2_MacroAssembler _masm(cbuf); + __ stw(as_Register(Matcher::_regEncode[src_first]), Address(esp, offset)); +#ifndef PRODUCT + } else { + if (!do_size) { + if (size != 0) st->print("\n\t"); + st->print("stw %s, [esp + #%d]\t# spill 9", + Matcher::regName[src_first], offset); + } +#endif + } + size += 4; + } + return size; + } else if (dst_first_rc == rc_int) { + // gpr -> gpr + if ((src_first & 1) == 0 && src_first + 1 == src_second && + (dst_first & 1) == 0 && dst_first + 1 == dst_second) { + // 64-bit + if (cbuf) { + C2_MacroAssembler _masm(cbuf); + __ movl(as_Register(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first])); +#ifndef PRODUCT + } else { + if(!do_size){ + if (size != 0) st->print("\n\t"); + st->print("movl %s <-- %s\t# spill 10", + Matcher::regName[dst_first], + Matcher::regName[src_first]); + } +#endif + } + size += 4; + return size; + } else { + // 32-bit + assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); + assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); + if (cbuf) { + C2_MacroAssembler _masm(cbuf); + if (this->ideal_reg() == Op_RegI) + __ movws(as_Register(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first])); + else + __ movl(as_Register(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first])); +#ifndef PRODUCT + } else { + if (!do_size) { + if (size != 0) st->print("\n\t"); + st->print("move(32-bit) %s <-- %s\t# spill 11", + Matcher::regName[dst_first], + Matcher::regName[src_first]); + } +#endif + } + size += 4; + return size; + } + } else if (dst_first_rc == rc_float) { + // gpr -> xmm + if ((src_first & 1) == 0 && src_first + 1 == src_second && + (dst_first & 1) == 0 && dst_first + 1 == dst_second) { + // 64-bit + if (cbuf) { + C2_MacroAssembler _masm(cbuf); + __ ifmovd(as_Register(Matcher::_regEncode[src_first]), as_FloatRegister(Matcher::_regEncode[dst_first])); +#ifndef PRODUCT + } else { + if(!do_size){ + if (size != 0) st->print("\n\t"); + st->print("ifmovd %s, %s\t# spill 12", + Matcher::regName[src_first], + Matcher::regName[dst_first]); + } +#endif + } + size += 4; + } else { + // 32-bit + assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); + assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); + if (cbuf) { + C2_MacroAssembler _masm(cbuf); + __ ifmovs( as_Register(Matcher::_regEncode[src_first]), as_FloatRegister(Matcher::_regEncode[dst_first]) ); +#ifndef PRODUCT + } else { + if(!do_size){ + if (size != 0) st->print("\n\t"); + st->print("ifmovs %s, %s\t# spill 13", + Matcher::regName[src_first], + Matcher::regName[dst_first]); + } +#endif + } + size += 4; + } + return size; + } + } else if (src_first_rc == rc_float) { + // xmm -> + if (dst_first_rc == rc_stack) { + // xmm -> mem + if ((src_first & 1) == 0 && src_first + 1 == src_second && + (dst_first & 1) == 0 && dst_first + 1 == dst_second) { + // 64-bit + int offset = ra_->reg2offset(dst_first); + if (cbuf) { + C2_MacroAssembler _masm(cbuf); + __ fstd( as_FloatRegister(Matcher::_regEncode[src_first]), Address(esp, offset) ); +#ifndef PRODUCT + } else { + if(!do_size){ + if (size != 0) st->print("\n\t"); + st->print("fstd %s, [esp + #%d]\t# spill 14", + Matcher::regName[src_first], + offset); + } +#endif + } + size += 4; + } else { + // 32-bit + assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); + assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); + int offset = ra_->reg2offset(dst_first); + if (cbuf) { + C2_MacroAssembler _masm(cbuf); + __ fsts(as_FloatRegister(Matcher::_regEncode[src_first]), Address(esp, offset)); +#ifndef PRODUCT + } else { + if(!do_size){ + if (size != 0) st->print("\n\t"); + st->print("fsts %s, [esp + #%d]\t# spill 15", + Matcher::regName[src_first], + offset); + } +#endif + } + size += 4; + } + return size; + } else if (dst_first_rc == rc_int) { + // xmm -> gpr + if ((src_first & 1) == 0 && src_first + 1 == src_second && + (dst_first & 1) == 0 && dst_first + 1 == dst_second) { + // 64-bit + if (cbuf) { + C2_MacroAssembler _masm(cbuf); + __ fimovd( as_FloatRegister(Matcher::_regEncode[src_first]), as_Register(Matcher::_regEncode[dst_first])); +#ifndef PRODUCT + } else { + if(!do_size){ + if (size != 0) st->print("\n\t"); + st->print("fimovd %s, %s\t# spill 16", + Matcher::regName[src_first], + Matcher::regName[dst_first]); + } +#endif + } + size += 4; + } else { + // 32-bit + assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); + assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); + if (cbuf) { + C2_MacroAssembler _masm(cbuf); + __ fimovs( as_FloatRegister(Matcher::_regEncode[src_first]), as_Register(Matcher::_regEncode[dst_first])); +#ifndef PRODUCT + } else { + if(!do_size){ + if (size != 0) st->print("\n\t"); + st->print("fimovs %s, %s\t# spill 17", + Matcher::regName[src_first], + Matcher::regName[dst_first]); + } +#endif + } + size += 4; + } + return size; + } else if (dst_first_rc == rc_float) { + // xmm -> xmm + if ((src_first & 1) == 0 && src_first + 1 == src_second && + (dst_first & 1) == 0 && dst_first + 1 == dst_second) { + // 64-bit + if (cbuf) { + C2_MacroAssembler _masm(cbuf); + __ mov_d( as_FloatRegister(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first])); +#ifndef PRODUCT + } else { + if(!do_size){ + if (size != 0) st->print("\n\t"); + st->print("mov_d %s <-- %s\t# spill 18", + Matcher::regName[dst_first], + Matcher::regName[src_first]); + } +#endif + } + size += 4; + } else { + // 32-bit + assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); + assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); + if (cbuf) { + C2_MacroAssembler _masm(cbuf); + __ mov_s( as_FloatRegister(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first])); +#ifndef PRODUCT + } else { + if(!do_size){ + if (size != 0) st->print("\n\t"); + st->print("mov_s %s <-- %s\t# spill 19", + Matcher::regName[dst_first], + Matcher::regName[src_first]); + } +#endif + } + size += 4; + } + return size; + } + } + + assert(0," foo "); + Unimplemented(); + return size; +} + +#ifndef PRODUCT +void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const { + implementation(NULL, ra_, false, st); +} +#endif + +void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + implementation(&cbuf, ra_, false, NULL); +} + +uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { + return MachNode::size(ra_); +} + +//============================================================================= +#ifndef PRODUCT +void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const +{ + int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); + int reg = ra_->get_reg_first(this); + st->print("addl esp, %d, %s \t# box lock@BoxLockNode", offset, Matcher::regName[reg]); +} +#endif + +void BoxLockNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const +{ + C2_MacroAssembler _masm(&cbuf); + int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); + int reg = ra_->get_encode(this); + /*if (Assembler::operand_valid_for_simple_type_instruction_immediate(offset)) { + __ addl(esp, offset, as_Register(reg)); + } else { + __ addptr(esp, offset, as_Register(reg)); + }*/ + __ addptr(esp, offset, as_Register(reg)); +} + +uint BoxLockNode::size(PhaseRegAlloc *ra_) const +{ + //int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); + //return Assembler::operand_valid_for_simple_type_instruction_immediate(offset) ? 4 : 8; + return 8; +} + +//============================================================================= +#ifndef PRODUCT +void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const +{ + if (UseCompressedClassPointers) { + st->print_cr("movl rscratch3, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass"); + st->print_cr("\tdecode_klass_not_null rscratch4, rscratch4"); + st->print_cr("\tcmpeq iCache_v0, rscratch4\t # Inline cache check"); + } else { + st->print_cr("\tcmpeq iCache_v0, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t" + "# Inline cache check"); + } + st->print_cr("\tjne SharedRuntime::_ic_miss_stub"); + st->print_cr("\tnop\t# nops to align entry point"); +} +#endif + +void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc * ra_) const { + C2_MacroAssembler _masm(&cbuf); + int ic_reg = Matcher::inline_cache_reg_encode();//sw64 inline_cache_reg(V0); + Register receiver = j_rarg0; + Register iCache = as_Register(ic_reg); + + Label skip; + __ load_klass(rscratch4, receiver); + __ cmpptr(rscratch4, iCache); + __ jcc(Assembler::equal, skip); + __ relocate(relocInfo::runtime_call_type); + __ patchable_jump((address)SharedRuntime::get_ic_miss_stub()); + __ align(CodeEntryAlignment); + __ bind(skip); +} + +uint MachUEPNode::size(PhaseRegAlloc* ra_) const +{ + return MachNode::size(ra_); // too many variables; just compute it + // the hard way +} + + +//============================================================================= + +const bool Matcher::supports_vector_calling_convention(void) { + if (EnableVectorSupport && UseVectorStubs) { + return true; + } + return false; +} + +// Is this branch offset short enough that a short branch can be used? +// +// NOTE: If the platform does not provide any short branch variants, then +// this method should return false for offset 0. +bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { + // The passed offset is relative to address of the branch. + // On 86 a branch displacement is calculated relative to address + // of a next instruction. +// offset -= br_size; +// +// // the short version of jmpConUCF2 contains multiple branches, +// // making the reach slightly less +// if (rule == jmpConUCF2_rule) +// return (-126 <= offset && offset <= 125); +// return (-128 <= offset && offset <= 127); + Unimplemented(); + return false; +} + + + // is_CAS(int opcode) + // + // return true if opcode is one of the possible CompareAndSwapX + // values otherwise false. + +bool is_CAS(int opcode) +{ + switch(opcode) { + // We handle these + case Op_CompareAndSwapI: + case Op_CompareAndSwapL: + case Op_CompareAndSwapP: + case Op_CompareAndSwapN: + case Op_GetAndSetI: + case Op_GetAndSetL: + case Op_GetAndSetP: + case Op_GetAndSetN: + case Op_GetAndAddI: + case Op_GetAndAddL: + return true; + default: + return false; + } +} + +bool unnecessary_release(const Node *n) +{ + assert((n->is_MemBar() && + n->Opcode() == Op_MemBarRelease), + "expecting a release membar"); + + MemBarNode *barrier = n->as_MemBar(); + + if (!barrier->leading()) { + return false; + } else { + Node* trailing = barrier->trailing_membar(); + MemBarNode* trailing_mb = trailing->as_MemBar(); + assert(trailing_mb->trailing(), "Not a trailing membar?"); + assert(trailing_mb->leading_membar() == n, "inconsistent leading/trailing membars"); + + Node* mem = trailing_mb->in(MemBarNode::Precedent); + if (!mem->is_Store()) { + assert(mem->is_LoadStore(), ""); + assert(trailing_mb->Opcode() == Op_MemBarAcquire, ""); + return is_CAS(mem->Opcode()); + } + } + + return false; +} + +// Return whether or not this register is ever used as an argument. +// This function is used on startup to build the trampoline stubs in +// generateOptoStub. Registers not mentioned will be killed by the VM +// call in the trampoline, and arguments in those registers not be +// available to the callee. +bool Matcher::can_be_java_arg( int reg ) { + /* Refer to: [sharedRuntime_sw64.cpp] SharedRuntime::java_calling_convention() */ + if ( /* reg == T0_num || reg == T0_H_num + || */ reg == A0_num || reg == A0_H_num + || reg == A1_num || reg == A1_H_num + || reg == A2_num || reg == A2_H_num + || reg == A3_num || reg == A3_H_num + || reg == A4_num || reg == A4_H_num + || reg == A5_num || reg == A5_H_num ) + return true; + + if ( reg == F16_num || reg == F16_H_num + || reg == F17_num || reg == F17_H_num + || reg == F18_num || reg == F18_H_num + || reg == F19_num || reg == F19_H_num + || reg == F20_num || reg == F20_H_num + || reg == F21_num || reg == F21_H_num ) + return true; + + return false; + } + +bool Matcher::is_spillable_arg(int reg) + { + return can_be_java_arg(reg); + } + +bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) { + // In 64 bit mode a code which use multiply when + // devisor is constant is faster than hardware + // DIV instruction (it uses MulHiL). + return false; + } + + // Register for DIVI projection of divmodI +RegMask Matcher::divI_proj_mask() { + // return INT_RAX_REG_mask(); + Unimplemented(); + return 0; + } + + // Register for MODI projection of divmodI +RegMask Matcher::modI_proj_mask() { + // return INT_RDX_REG_mask(); + Unimplemented(); + return 0; + } + + // Register for DIVL projection of divmodL +RegMask Matcher::divL_proj_mask() { + // return LONG_RAX_REG_mask(); + Unimplemented(); + return 0; + } + + // Register for MODL projection of divmodL +RegMask Matcher::modL_proj_mask() { + // return LONG_RDX_REG_mask(); + Unimplemented(); + return 0; + } + +// Register for saving SP into on method handle invokes. Not used on x86_64. +const RegMask Matcher::method_handle_invoke_SP_save_mask() { +// return NO_REG_mask(); + //warning("TODO:Matcher::method_handle_invoke_SP_save_mask(), check lsp"); + return FP_REG_mask(); +} + +%} + +//----------ENCODING BLOCK----------------------------------------------------- +// This block specifies the encoding classes used by the compiler to output +// byte streams. Encoding classes generate functions which are called by +// Machine Instruction Nodes in order to generate the bit encoding of the +// instruction. Operands specify their base encoding interface with the +// interface keyword. There are currently supported four interfaces, +// REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an +// operand to generate a function which returns its register number when +// queried. CONST_INTER causes an operand to generate a function which +// returns the value of the constant when queried. MEMORY_INTER causes an +// operand to generate four functions which return the Base Register, the +// Index Register, the Scale Value, and the Offset Value of the operand when +// queried. COND_INTER causes an operand to generate six functions which +// return the encoding code (ie - encoding bits for the instruction) +// associated with each basic boolean condition for a conditional instruction. +// Instructions specify two basic values for encoding. They use the +// ins_encode keyword to specify their encoding class (which must be one of +// the class names specified in the encoding block), and they use the +// opcode keyword to specify, in order, their primary, secondary, and +// tertiary opcode. Only the opcode sections which a particular instruction +// needs for encoding need to be specified. +encode %{ + + enc_class load_N_enc (rRegN dst, memory mem) %{ + C2_MacroAssembler _masm(&cbuf); + int dst = $dst$$reg; + + relocInfo::relocType disp_reloc = $mem->disp_reloc(); + assert(disp_reloc == relocInfo::none, "cannot have disp"); + __ ldwu($dst$$Register, $mem$$Address); + %} + +enc_class load_P_enc(rRegP dst, memory mem) %{ + C2_MacroAssembler _masm(&cbuf); + int dst = $dst$$reg; + + relocInfo::relocType disp_reloc = $mem->disp_reloc(); + assert(disp_reloc == relocInfo::none, "cannot have disp"); + __ ldptr($dst$$Register, $mem$$Address); +%} + + enc_class sw64_Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf + C2_MacroAssembler _masm(&cbuf); + //__ stop("TODO:not check lsp(Java_To_Runtime)"); +// // This is the instruction starting address for relocation info. + +// // the return address is store@(-1)SP by convention on mips, +// // but we don't have this convention. +// // so we have to store the pc into last_java_frame by ourself before calling into runtime + address addr = (address)$meth$$method; + Label retaddr; + int offset = __ offset(); + //assert( rscratch3 != T12, "rscratch3 can not is T12!" ); + __ block_comment(";;execute sw64_Java_To_Runtime"); + __ set_last_Java_frame(esp, noreg, retaddr, rscratch3, rscratch2_AT); + //lsp: same to swjdk8, different form aarch64, need to store retaddr in stack?? + __ call(AddressLiteral(addr, relocInfo::runtime_call_type),&retaddr);// need to check lsp!! + //assert(__ offset() - offset <= (int) ret_addr_offset(), "overflow"); + + //__ mov_immediate64(pv, (intptr_t)addr); + //__ push_RA_call(pv); + %} +// + +enc_class Java_Static_Call(method meth) %{ + // JAVA STATIC CALL + // CALL to fixup routine. Fixup routine uses ScopeDesc info to + // determine who we intended to call. + C2_MacroAssembler _masm(&cbuf); + + + //__ stop("TODO:not check lsp(Java_Static_Call)"); + cbuf.set_insts_mark();//TODO:relate to relocate? jzy + + address addr = (address)$meth$$method; + address call; + if (!_method) { + // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap. + //__ call(AddressLiteral(addr, relocInfo::runtime_call_type)); + __ relocate(relocInfo::runtime_call_type); + __ patchable_call((address)($meth$$method)); + } else { + int method_index = resolved_method_index(cbuf); + RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index) + : static_call_Relocation::spec(method_index); + __ relocate(rspec); + __ patchable_call((address)($meth$$method)); + + // Emit stubs for static call. + address mark = cbuf.insts_mark(); + address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, mark); + if (stub == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); + return; + } + } + %} + + +enc_class call_epilog() %{ + C2_MacroAssembler _masm(&cbuf); + if (VerifyStackAtCalls) { + Unimplemented(); +// Label L; +// C2_MacroAssembler _masm(&cbuf); +// int framesize = ra_->C->frame_size_in_bytes(); +// __ addl(esp, framesize, rscratch2_AT); +// __ cmpptr(rscratch2_AT, rfp); +// __ jcc(Assembler::equal, L); +// __ stop("VerifyStackAtCalls failed"); +// __ BIND(L); + } + %} + // + // [Ref: LIR_Assembler::ic_call() ] + // +enc_class Java_Dynamic_Call (method meth) %{ + C2_MacroAssembler _masm(&cbuf); + __ block_comment("Java_Dynamic_Call"); + __ ic_call((address)$meth$$method, resolved_method_index(cbuf)); +%} + + +enc_class Set_Flags_After_Fast_Lock_Unlock(FlagsReg cr) %{ +// Register flags = $cr$$Register; +// Label L; +// +// C2_MacroAssembler _masm(&cbuf); +// +// __ addu(flags, R0, R0); +// __ beq(AT, L); +// __ move(flags, 0xFFFFFFFF); +// __ BIND(L); +%} + + enc_class enc_PartialSubtypeCheck(rRegP result, rRegP sub, rRegP super) %{ + Register result = $result$$Register; + Register sub = $sub$$Register; + Register super = $super$$Register; + Register length = rscratch3; + Label miss; +// +// // result may be the same as sub +// // 47c B40: # B21 B41 <- B20 Freq: 0.155379 +// // 47c partialSubtypeCheck result=S1, sub=S1, super=S3, length=S0 +// // 4bc mov S2, NULL #@loadConP +// // 4c0 beq S1, S2, B21 #@branchConP P=0.999999 C=-1.000000 +// // + C2_MacroAssembler _masm(&cbuf); + Label done; + __ check_klass_subtype_slow_path(sub, super, length, noreg, + NULL, &miss, + /*set_cond_codes:*/ true); + // Refer to X86_64's RDI + __ movl(result, R0); + __ beq_l(R0, done); + + __ BIND(miss); + __ movl(result, 1); + __ BIND(done); + %} + +%} + + +//---------SW64 FRAME-------------------------------------------------------------- +// Definition of frame structure and management information. +// +// S T A C K L A Y O U T Allocators stack-slot number +// | (to get allocators register number +// G Owned by | | v add SharedInfo::stack0) +// r CALLER | | +// o | +--------+ pad to even-align allocators stack-slot +// w V | pad0 | numbers; owned by CALLER +// t -----------+--------+----> Matcher::_in_arg_limit, unaligned +// h ^ | in | 5 +// | | args | 4 Holes in incoming args owned by SELF +// | | old | | 3 +// | | SP-+--------+----> Matcher::_old_SP, even aligned +// v | | ret | 3 return address +// Owned by +--------+ +// Self | pad2 | 2 pad to align old SP +// | +--------+ 1 +// | | locks | 0 +// | +--------+----> SharedInfo::stack0, even aligned +// | | pad1 | 11 pad to align new SP +// | +--------+ +// | | | 10 +// | | spills | 9 spills +// V | | 8 (pad0 slot for callee) +// -----------+--------+----> Matcher::_out_arg_limit, unaligned +// ^ | out | 7 +// | | args | 6 Holes in outgoing args owned by CALLEE +// Owned by new | | +// Callee SP-+--------+----> Matcher::_new_SP, even aligned +// | | +// +// Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is +// known from SELF's arguments and the Java calling convention. +// Region 6-7 is determined per call site. +// Note 2: If the calling convention leaves holes in the incoming argument +// area, those holes are owned by SELF. Holes in the outgoing area +// are owned by the CALLEE. Holes should not be nessecary in the +// incoming area, as the Java calling convention is completely under +// the control of the AD file. Doubles can be sorted and packed to +// avoid holes. Holes in the outgoing arguments may be nessecary for +// varargs C calling conventions. +// Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is +// even aligned with pad0 as needed. +// Region 6 is even aligned. Region 6-7 is NOT even aligned; +// region 6-11 is even aligned; it may be padded out more so that +// the region from SP to FP meets the minimum stack alignment. +// Note 4: For I2C adapters, the incoming FP may not meet the minimum stack +// alignment. Region 11, pad1, may be dynamically extended so that +// SP meets the minimum alignment. + + +frame +%{ + // These three registers define part of the calling convention + // between compiled code and the interpreter. + inline_cache_reg(V0); // Inline Cache Register x86 is rax, sw64 is v0 check lsp? + + // Optional: name the operand used by cisc-spilling to access + // [stack_pointer + offset] + cisc_spilling_operand_name(indOffset32); + + // Number of stack slots consumed by locking an object + sync_stack_slots(2); + + // Compiled code's Frame Pointer + frame_pointer(SP); + + // Interpreter stores its frame pointer in a register which is + // stored to the stack by I2CAdaptors. + // I2CAdaptors convert from interpreted java to compiled java. + interpreter_frame_pointer(FP); + + // Stack alignment requirement + stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes) TODO:check jzy + + // Number of outgoing stack slots killed above the out_preserve_stack_slots + // for calls to C. Supports the var-args backing area for register parms. + varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt); + + // The after-PROLOG location of the return address. Location of + // return address specifies a type (REG or STACK) and a number + // representing the register number (i.e. - use a register name) or + // stack slot. + // Ret Addr is on stack in slot 0 if no locks or verification or alignment. + // Otherwise, it is above the locks and verification slot and alignment word + + return_addr(STACK - 2 + + align_up((Compile::current()->in_preserve_stack_slots() + + Compile::current()->fixed_slots()), + stack_alignment_in_slots())); + + // Location of C & interpreter return values + // register(s) contain(s) return value for Op_StartI2C and Op_StartOSR. + // SEE Matcher::match. + // Location of compiled Java return values. Same as C for now. + return_value %{ + assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); + + static const int lo[Op_RegL + 1] = { // enum name + 0, // Op_Node + 0, // Op_Set + V0_num, // Op_RegN + V0_num, // Op_RegI + V0_num, // Op_RegP + F0_num, // Op_RegF + F0_num, // Op_RegD + V0_num // Op_RegL + }; + + static const int hi[Op_RegL + 1] = { // enum name + 0, // Op_Node + 0, // Op_Set + OptoReg::Bad, // Op_RegN + OptoReg::Bad, // Op_RegI + V0_H_num, // Op_RegP + OptoReg::Bad, // Op_RegF + F0_H_num, // Op_RegD + V0_H_num // Op_RegL + }; + + return OptoRegPair(hi[ideal_reg], lo[ideal_reg]); + %} + +%} + +//----------ATTRIBUTES--------------------------------------------------------- +//----------Operand Attributes------------------------------------------------- +op_attrib op_cost(0); // Required cost attribute + +//----------Instruction Attributes--------------------------------------------- +ins_attrib ins_cost(100); // Required cost attribute +ins_attrib ins_size(32); // Required size attribute (in bits) +ins_attrib ins_pc_relative(0); // Required PC Relative flag +ins_attrib ins_short_branch(0); // Required flag: is this instruction a + // non-matching short branch variant of some + // long branch? +ins_attrib ins_alignment(4); // Required alignment attribute (must be a power of 2) + // specifies the alignment that some part of the instruction (not + // necessarily the start) requires. If > 1, a compute_padding() + // function must be provided for the instruction + +//----------OPERANDS----------------------------------------------------------- +// Operand definitions must precede instruction definitions for correct parsing +// in the ADLC because operands constitute user defined types which are used in +// instruction definitions. + +// Vectors +operand vecD() %{ + constraint(ALLOC_IN_RC(dbl_reg)); + match(VecD); + + format %{ %} + interface(REG_INTER); +%} + +// Flags register, used as output of compare instructions +operand rFlagsReg() %{ + constraint(ALLOC_IN_RC(sw64_flags)); + match(RegFlags); + + format %{ "RFLAGS" %} + interface(REG_INTER); +%} + +// Flags register, used as output of compare instructions +operand rFlagsRegU() %{ + constraint(ALLOC_IN_RC(sw64_flags)); + match(RegFlags); + + format %{ "RFLAGS_U" %} + interface(REG_INTER); +%} + +operand immI_MaxI() %{ + predicate(n->get_int() == 2147483647); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immI16_sub() %{ + predicate((-32767 <= n->get_int()) && (n->get_int() <= 32768)); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immU8() %{ + predicate( n->get_int() >= 0 && n->get_int() <= 255 ); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +operand immI_le_4() +%{ + predicate(n->get_int() <= 4); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + + +// Pointer for polling page +operand immP_poll() %{ + predicate(n->get_ptr() != 0 && n->get_ptr() == (intptr_t)SafepointMechanism::get_polling_page()); + match(ConP); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +operand immL16() %{ + predicate((-32768 <= n->get_long()) && (n->get_long() <= 32767)); + match(ConL); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immL16_sub() %{ + predicate((-32767 <= n->get_long()) && (n->get_long() <= 32768)); + match(ConL); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +//----------Simple Operands---------------------------------------------------- +// Immediate Operands +// Integer Immediate +operand immI() +%{ + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Constant for test vs zero +operand immI0() +%{ + predicate(n->get_int() == 0); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Constant for increment +operand immI1() +%{ + predicate(n->get_int() == 1); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immI_2() +%{ + predicate(n->get_int() == 2); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immI_3() +%{ + predicate(n->get_int() == 3); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Constant for decrement +operand immI_M1() +%{ + predicate(n->get_int() == -1); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Valid scale values for addressing modes +operand immI2() +%{ + predicate(0 <= n->get_int() && (n->get_int() <= 3)); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immI8() +%{ + predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80)); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immI16() +%{ + predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767)); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Int Immediate non-negative +operand immU31() +%{ + predicate(n->get_int() >= 0); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Pointer Immediate +operand immP() +%{ + match(ConP); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// NULL Pointer Immediate +operand immP0() +%{ + predicate(n->get_ptr() == 0); + match(ConP); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Pointer Immediate +operand immN() %{ + match(ConN); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immNKlass() %{ + match(ConNKlass); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// NULL Pointer Immediate +operand immN0() %{ + predicate(n->get_narrowcon() == 0); + match(ConN); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immP31() +%{ + predicate(n->as_Type()->type()->reloc() == relocInfo::none + && (n->get_ptr() >> 31) == 0); + match(ConP); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + + +// Long Immediate +operand immL() +%{ + match(ConL); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immUL8() %{ + predicate( n->get_long() >= 0 && n->get_long() <= 255 ); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Long Immediate 32-bit unsigned +operand immUL32() +%{ + predicate(n->get_long() == (unsigned int) (n->get_long())); + match(ConL); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Long Immediate 32-bit signed +operand immL32() +%{ + predicate(n->get_long() == (int) (n->get_long())); + match(ConL); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immL_Pow2() +%{ + predicate(is_power_of_2((julong)n->get_long())); + match(ConL); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immL_NotPow2() +%{ + predicate(is_power_of_2((julong)~n->get_long())); + match(ConL); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Long Immediate zero +operand immL0() +%{ + predicate(n->get_long() == 0L); + match(ConL); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Constant for increment +operand immL1() +%{ + predicate(n->get_long() == 1); + match(ConL); + + format %{ %} + interface(CONST_INTER); +%} + +// Constant for decrement +operand immL_M1() +%{ + predicate(n->get_long() == -1); + match(ConL); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Long Immediate: the value 10 +operand immL10() +%{ + predicate(n->get_long() == 10); + match(ConL); + + format %{ %} + interface(CONST_INTER); +%} + +// Long Immediate: low 32-bit mask +operand immL_32bits() +%{ + predicate(n->get_long() == 0xFFFFFFFFL); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Float Immediate zero +operand immF0() +%{ + predicate(jint_cast(n->getf()) == 0); + match(ConF); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Float Immediate +operand immF() +%{ + match(ConF); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Double Immediate zero +operand immD0() +%{ + predicate(jlong_cast(n->getd()) == 0); + match(ConD); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Double Immediate +operand immD() +%{ + match(ConD); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Immediates for special shifts (sign extend) + +// Constants for increment +operand immI_16() +%{ + predicate(n->get_int() == 16); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +operand immI_24() +%{ + predicate(n->get_int() == 24); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Constant for byte-wide masking +operand immI_255() +%{ + predicate(n->get_int() == 255); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Constant for short-wide masking +operand immI_65535() +%{ + predicate(n->get_int() == 65535); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Constant for byte-wide masking +operand immL_255() +%{ + predicate(n->get_long() == 255); + match(ConL); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Constant for short-wide masking +operand immL_65535() +%{ + predicate(n->get_long() == 65535); + match(ConL); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Register Operands +// Integer Register +operand rRegI() +%{ + constraint(ALLOC_IN_RC(int_reg)); + match(RegI); + + format %{ %} + interface(REG_INTER); +%} + +operand t10_RegI() %{ + constraint(ALLOC_IN_RC(t10_reg)); + match(RegI); + match(rRegI); + + format %{ "T10" %} + interface(REG_INTER); +%} + +//operand t11_RegI() %{ +// constraint(ALLOC_IN_RC(t11_reg)); +// match(RegI); +// match(rRegI); +// +// format %{ "T11" %} +// interface(REG_INTER); +// %} + +operand a0_RegI() %{ + constraint(ALLOC_IN_RC(a0_reg)); + match(RegI); + match(rRegI); + + format %{ "A0" %} + interface(REG_INTER); +%} + +operand a1_RegI() %{ + constraint(ALLOC_IN_RC(a1_reg)); + match(RegI); + match(rRegI); + + format %{ "A1" %} + interface(REG_INTER); +%} + +operand a2_RegI() %{ + constraint(ALLOC_IN_RC(a2_reg)); + match(RegI); + match(rRegI); + + format %{ "A2" %} + interface(REG_INTER); +%} + +operand a3_RegI() %{ + constraint(ALLOC_IN_RC(a3_reg)); + match(RegI); + match(rRegI); + + format %{ "A3" %} + interface(REG_INTER); +%} + +operand a4_RegI() %{ + constraint(ALLOC_IN_RC(a4_reg)); + match(RegI); + match(rRegI); + + format %{ "A4" %} + interface(REG_INTER); +%} + +operand v0_RegI() +%{ + constraint(ALLOC_IN_RC(v0_reg)); + match(RegI); + match(rRegI); + + format %{ "V0" %} + interface(REG_INTER); +%} + +operand rRegN() %{ + constraint(ALLOC_IN_RC(int_reg)); + match(RegN); + + format %{ %} + interface(REG_INTER); +%} + +operand v0_RegN() %{ + constraint(ALLOC_IN_RC(v0_reg)); + match(RegN); + match(rRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand t10_RegN() %{ + constraint(ALLOC_IN_RC(t10_reg)); + match(RegN); + match(rRegN); + + format %{ %} + interface(REG_INTER); +%} + +// Pointer Register +operand any_RegP() %{ + constraint(ALLOC_IN_RC(any_reg)); + match(RegP); + match(a0_RegP); + match(s2_RegP); + match(rRegP); + + format %{ %} + interface(REG_INTER); +%} + +// Pointer Register +operand rRegP() %{ + constraint(ALLOC_IN_RC(ptr_reg)); + match(RegP); + match(a0_RegP); + match(s2_RegP); + + format %{ %} + interface(REG_INTER); +%} + +//TODO:why no T11 jzy +operand no_T11_rRegP() %{ + constraint(ALLOC_IN_RC(no_T11_p_reg)); + match(RegP); + match(rRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand s2_RegP() +%{ + constraint(ALLOC_IN_RC(s2_long_reg)); + match(RegP); + match(rRegP); + match(no_T11_rRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand s3_RegP() +%{ + constraint(ALLOC_IN_RC(s3_long_reg)); + match(RegP); + match(rRegP); + match(no_T11_rRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand t8_RegP() +%{ + constraint(ALLOC_IN_RC(t8_long_reg)); + match(RegP); + match(rRegP); + match(no_T11_rRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand t9_RegP() +%{ + constraint(ALLOC_IN_RC(t9_long_reg)); + match(RegP); + match(rRegP); + match(no_T11_rRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand t10_RegP() +%{ + constraint(ALLOC_IN_RC(t10_long_reg)); + match(RegP); + match(rRegP); + match(no_T11_rRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand a0_RegP() +%{ + constraint(ALLOC_IN_RC(a0_long_reg)); + match(RegP); + match(rRegP); + match(no_T11_rRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand a1_RegP() +%{ + constraint(ALLOC_IN_RC(a1_long_reg)); + match(RegP); + match(rRegP); + match(no_T11_rRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand a2_RegP() +%{ + constraint(ALLOC_IN_RC(a2_long_reg)); + match(RegP); + match(rRegP); + match(no_T11_rRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand a3_RegP() +%{ + constraint(ALLOC_IN_RC(a3_long_reg)); + match(RegP); + match(rRegP); + match(no_T11_rRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand a4_RegP() +%{ + constraint(ALLOC_IN_RC(a4_long_reg)); + match(RegP); + match(rRegP); + match(no_T11_rRegP); + + format %{ %} + interface(REG_INTER); +%} + + +operand a5_RegP() +%{ + constraint(ALLOC_IN_RC(a5_long_reg)); + match(RegP); + match(rRegP); + match(no_T11_rRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand v0_RegP() +%{ + constraint(ALLOC_IN_RC(v0_long_reg)); + match(RegP); + match(rRegP); + match(no_T11_rRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand rRegL() %{ + constraint(ALLOC_IN_RC(long_reg)); + match(RegL); + + format %{ %} + interface(REG_INTER); +%} + +operand v0_RegL() %{ + constraint(ALLOC_IN_RC(v0_long_reg)); + match(RegL); + match(rRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand t9_RegL() %{ + constraint(ALLOC_IN_RC(t9_long_reg)); + match(RegL); + match(rRegL); + + format %{ %} + interface(REG_INTER); +%} + +// Float register operands +operand regF() %{ + constraint(ALLOC_IN_RC(flt_reg)); + match(RegF); + + format %{ %} + interface(REG_INTER); +%} + +// Double register operands +operand regD() %{ + constraint(ALLOC_IN_RC(dbl_reg)); + match(RegD); + + format %{ %} + interface(REG_INTER); +%} + +operand f27_RegD() +%{ + constraint(ALLOC_IN_RC(dbl_tmp_f27)); + match(RegD); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} +operand f28_RegD() +%{ + constraint(ALLOC_IN_RC(dbl_tmp_f28)); + match(RegD); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} +operand f29_RegD() +%{ + constraint(ALLOC_IN_RC(dbl_tmp_f29)); + match(RegD); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} +operand f30_RegD() +%{ + constraint(ALLOC_IN_RC(dbl_tmp_f30)); + match(RegD); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + + +//----------Memory Operands---------------------------------------------------- +// Direct Memory Operand +// operand direct(immP addr) +// %{ +// match(addr); + +// format %{ "[$addr]" %} +// interface(MEMORY_INTER) %{ +// base(0xFFFFFFFF); +// index(0x4); +// scale(0x0); +// disp($addr); +// %} +// %} + +// Indirect Memory Operand +operand indirect(any_RegP reg) +%{ + constraint(ALLOC_IN_RC(ptr_reg)); + match(reg); + + format %{ "[$reg] @ indirect" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0x1e); + scale(0x0); + disp(0x0); + %} +%} + +// Indirect Memory Plus Short Offset Operand +operand indOffset16(any_RegP reg, immL16 off) +%{ + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP reg off); + + format %{ "[$reg + $off (16-bit)] @ indOffset16" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0x1e); + scale(0x0); + disp($off); + %} +%} + +// Indirect Memory Plus Long Offset Operand +//operand indOffset32(rRegP reg, immL32 off) +//%{ +// constraint(ALLOC_IN_RC(ptr_reg)); +// match(AddP reg off); +// +// format %{ "[$reg + $off (32-bit)]" %} +// interface(MEMORY_INTER) %{ +// base($reg); +// index(0x1e); +// scale(0x0); +// disp($off); +// %} +//%} + +// Indirect Memory Plus Index Register Plus Offset Operand +operand indIndexOffset(any_RegP reg, rRegL lreg, immL16 off) +%{ + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP (AddP reg lreg) off); + + op_cost(10); + format %{"[$reg + $off + $lreg]" %} + interface(MEMORY_INTER) %{ + base($reg); + index($lreg); + scale(0x0); + disp($off); + %} +%} + +// Indirect Memory Plus Index Register Plus Offset Operand +operand indIndex(any_RegP reg, rRegL lreg) +%{ + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP reg lreg); + + op_cost(10); + format %{"[$reg + $lreg]" %} + interface(MEMORY_INTER) %{ + base($reg); + index($lreg); + scale(0x0); + disp(0x0); + %} +%} + +// Indirect Memory Times Scale Plus Index Register +operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale) +%{ + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP reg (LShiftL lreg scale)); + + op_cost(10); + format %{"[$reg + $lreg << $scale]" %} + interface(MEMORY_INTER) %{ + base($reg); + index($lreg); + scale($scale); + disp(0x0); + %} +%} + +operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale) +%{ + constraint(ALLOC_IN_RC(ptr_reg)); + predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0); + match(AddP reg (LShiftL (ConvI2L idx) scale)); + + op_cost(10); + format %{"[$reg + pos $idx << $scale]" %} + interface(MEMORY_INTER) %{ + base($reg); + index($idx); + scale($scale); + disp(0x0); + %} +%} + +// Indirect Memory Times Scale Plus Index Register Plus Offset Operand +operand indIndexScaleOffset(any_RegP reg, immL16 off, rRegL lreg, immI2 scale) +%{ + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP (AddP reg (LShiftL lreg scale)) off); + + op_cost(10); + format %{"[$reg + $off + $lreg << $scale]" %} + interface(MEMORY_INTER) %{ + base($reg); + index($lreg); + scale($scale); + disp($off); + %} +%} + +// Indirect Memory Plus Positive Index Register Plus Offset Operand +operand indPosIndexOffset(any_RegP reg, immL16 off, rRegI idx) +%{ + constraint(ALLOC_IN_RC(ptr_reg)); + predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0); + match(AddP (AddP reg (ConvI2L idx)) off); + + op_cost(10); + format %{"[$reg + $off + $idx]" %} + interface(MEMORY_INTER) %{ + base($reg); + index($idx); + scale(0x0); + disp($off); + %} +%} + +// Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand +operand indPosIndexScaleOffset(any_RegP reg, immL16 off, rRegI idx, immI2 scale) +%{ + constraint(ALLOC_IN_RC(ptr_reg)); + predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0); + match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off); + + op_cost(10); + format %{"[$reg + $off + $idx << $scale]" %} + interface(MEMORY_INTER) %{ + base($reg); + index($idx); + scale($scale); + disp($off); + %} +%} + +// Indirect Narrow Oop Plus Offset Operand +// Note: x86 architecture doesn't support "scale * index + offset" without a base +// we can't free r12 even with CompressedOops::base() == NULL. TODO:why r12? jzy +//lsp todo check sw is s5?? +operand indCompressedOopOffset(rRegN reg, immL16 off) %{ + predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8)); + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP (DecodeN reg) off); + + op_cost(10); + format %{"[S5 + $reg << 3 + $off] (compressed oop addressing)" %} + interface(MEMORY_INTER) %{ + base(0xe); // S5 fo SW64 + index($reg); + scale(0x3); + disp($off); + %} +%} + +// Indirect Memory Operand +operand indirectNarrow(rRegN reg) +%{ + predicate(CompressedOops::shift() == 0); + constraint(ALLOC_IN_RC(ptr_reg)); + match(DecodeN reg); + + format %{ "[$reg]" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0x1e); + scale(0x0); + disp(0x0); + %} +%} + +// Indirect Memory Plus Short Offset Operand +operand indOffset16Narrow(rRegN reg, immL16 off) +%{ + predicate(CompressedOops::shift() == 0); + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP (DecodeN reg) off); + + format %{ "[$reg + $off (16-bit)]" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0x1e); + scale(0x0); + disp($off); + %} +%} + +// Indirect Memory Plus Long Offset Operand +//operand indOffset32Narrow(rRegN reg, immL32 off) +//%{ +// predicate(CompressedOops::shift() == 0); +// constraint(ALLOC_IN_RC(ptr_reg)); +// match(AddP (DecodeN reg) off); +// +// format %{ "[$reg + $off (32-bit)]" %} +// interface(MEMORY_INTER) %{ +// base($reg); +// index(0x1e); +// scale(0x0); +// disp($off); +// %} +//%} + +// Indirect Memory Plus Index Register Plus Offset Operand +operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL16 off) +%{ + predicate(CompressedOops::shift() == 0); + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP (AddP (DecodeN reg) lreg) off); + + op_cost(10); + format %{"[$reg + $off + $lreg]" %} + interface(MEMORY_INTER) %{ + base($reg); + index($lreg); + scale(0x0); + disp($off); + %} +%} + +// Indirect Memory Plus Index Register Plus Offset Operand +operand indIndexNarrow(rRegN reg, rRegL lreg) +%{ + predicate(CompressedOops::shift() == 0); + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP (DecodeN reg) lreg); + + op_cost(10); + format %{"[$reg + $lreg]" %} + interface(MEMORY_INTER) %{ + base($reg); + index($lreg); + scale(0x0); + disp(0x0); + %} +%} + +// Indirect Memory Times Scale Plus Index Register +operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale) +%{ + predicate(CompressedOops::shift() == 0); + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP (DecodeN reg) (LShiftL lreg scale)); + + op_cost(10); + format %{"[$reg + $lreg << $scale]" %} + interface(MEMORY_INTER) %{ + base($reg); + index($lreg); + scale($scale); + disp(0x0); + %} +%} + +// Indirect Memory Times Scale Plus Index Register Plus Offset Operand +operand indIndexScaleOffsetNarrow(rRegN reg, immL16 off, rRegL lreg, immI2 scale) +%{ + predicate(CompressedOops::shift() == 0); + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off); + + op_cost(10); + format %{"[$reg + $off + $lreg << $scale]" %} + interface(MEMORY_INTER) %{ + base($reg); + index($lreg); + scale($scale); + disp($off); + %} +%} + +// Indirect Memory Times Plus Positive Index Register Plus Offset Operand +operand indPosIndexOffsetNarrow(rRegN reg, immL16 off, rRegI idx) +%{ + constraint(ALLOC_IN_RC(ptr_reg)); + predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0); + match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off); + + op_cost(10); + format %{"[$reg + $off + $idx]" %} + interface(MEMORY_INTER) %{ + base($reg); + index($idx); + scale(0x0); + disp($off); + %} +%} + +// Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand +operand indPosIndexScaleOffsetNarrow(rRegN reg, immL16 off, rRegI idx, immI2 scale) +%{ + constraint(ALLOC_IN_RC(ptr_reg)); + predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0); + match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off); + + op_cost(10); + format %{"[$reg + $off + $idx << $scale]" %} + interface(MEMORY_INTER) %{ + base($reg); + index($idx); + scale($scale); + disp($off); + %} +%} + +//----------Special Memory Operands-------------------------------------------- +// Stack Slot Operand - This operand is used for loading and storing temporary +// values on the stack where a match requires a value to +// flow through memory. +operand stackSlotP(sRegP reg) +%{ + constraint(ALLOC_IN_RC(stack_slots)); + // No match rule because this operand is only generated in matching + + format %{ "[$reg]" %} + interface(MEMORY_INTER) %{ + base(0x1e); // SP + index(0x1e); // No Index + scale(0x0); // No Scale + disp($reg); // Stack Offset + %} +%} + +operand stackSlotI(sRegI reg) +%{ + constraint(ALLOC_IN_RC(stack_slots)); + // No match rule because this operand is only generated in matching + + format %{ "[$reg]" %} + interface(MEMORY_INTER) %{ + base(0x1e); // SP + index(0x1e); // No Index + scale(0x0); // No Scale + disp($reg); // Stack Offset + %} +%} + +operand stackSlotF(sRegF reg) +%{ + constraint(ALLOC_IN_RC(stack_slots)); + // No match rule because this operand is only generated in matching + + format %{ "[$reg]" %} + interface(MEMORY_INTER) %{ + base(0x1e); // SP + index(0x1e); // No Index + scale(0x0); // No Scale + disp($reg); // Stack Offset + %} +%} + +operand stackSlotD(sRegD reg) +%{ + constraint(ALLOC_IN_RC(stack_slots)); + // No match rule because this operand is only generated in matching + + format %{ "[$reg]" %} + interface(MEMORY_INTER) %{ + base(0x1e); // SP + index(0x1e); // No Index + scale(0x0); // No Scale + disp($reg); // Stack Offset + %} +%} + +operand stackSlotL(sRegL reg) +%{ + constraint(ALLOC_IN_RC(stack_slots)); + // No match rule because this operand is only generated in matching + + format %{ "[$reg]" %} + interface(MEMORY_INTER) %{ + base(0x1e); // SP + index(0x1e); // No Index + scale(0x0); // No Scale + disp($reg); // Stack Offset + %} +%} + +//----------Conditional Branch Operands---------------------------------------- +// Comparison Op - This is the operation of the comparison, and is limited to +// the following set of codes: +// L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) +// +// Other attributes of the comparison, such as unsignedness, are specified +// by the comparison instruction that sets a condition code flags register. +// That result is represented by a flags operand whose subtype is appropriate +// to the unsignedness (etc.) of the comparison. +// +// Later, the instruction which matches both the Comparison Op (a Bool) and +// the flags (produced by the Cmp) specifies the coding of the comparison op +// by matching a specific subtype of Bool operand below, such as cmpOpU. + +// Comparision Code +operand cmpOp() %{ + match(Bool); + + format %{ "" %} + interface(COND_INTER) %{ + equal(0x4, "e"); + not_equal(0x5, "ne"); + less(0xC, "l"); + greater_equal(0xD, "ge"); + less_equal(0xE, "le"); + greater(0xF, "g"); + overflow(0x0, "o"); + no_overflow(0x1, "no"); + %} +%} + +// Comparison Code, unsigned compare. Used by FP also, with +// C2 (unordered) turned into GT or LT already. The other bits +// C0 and C3 are turned into Carry & Zero flags. +operand cmpOpU() %{ + match(Bool); + + format %{ "" %} + interface(COND_INTER) %{ + equal(0x4, "e"); + not_equal(0x5, "ne"); + less(0x2, "b"); + greater_equal(0x3, "nb"); + less_equal(0x6, "be"); + greater(0x7, "nbe"); + overflow(0x0, "o"); + no_overflow(0x1, "no"); + %} +%} + + +//----------OPERAND CLASSES---------------------------------------------------- +// Operand Classes are groups of operands that are used as to simplify +// instruction definitions by not requiring the AD writer to specify separate +// instructions for every form of operand when the instruction accepts +// multiple operand types with the same basic encoding and format. The classic +// case of this is memory operands. + +opclass memory(indirect, indOffset16, indIndexOffset, indIndex, indIndexScale, indIndexScaleOffset, + indPosIndexScale, indPosIndexOffset, indPosIndexScaleOffset, + indCompressedOopOffset, + indirectNarrow, indOffset16Narrow, indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow, indIndexScaleOffsetNarrow, + indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow); + +//----------PIPELINE----------------------------------------------------------- +// Rules which define the behavior of the target architectures pipeline. +pipeline %{ + +//----------ATTRIBUTES--------------------------------------------------------- +attributes %{ + fixed_size_instructions; // Fixed size instructions + branch_has_delay_slot; // branch have delay slot in gs2 + max_instructions_per_bundle = 1; // 1 instruction per bundle + max_bundles_per_cycle = 4; // Up to 4 bundles per cycle + bundle_unit_size=4; + instruction_unit_size = 4; // An instruction is 4 bytes long + instruction_fetch_unit_size = 16; // The processor fetches one line + instruction_fetch_units = 1; // of 16 bytes + + // List of nop instructions + nops( MachNop ); + %} + + //----------RESOURCES---------------------------------------------------------- + // Resources are the functional units available to the machine + + resources(D1, D2, D3, D4, DECODE = D1 | D2 | D3| D4, ALU1, ALU2, ALU = ALU1 | ALU2, FPU1, FPU2, FPU = FPU1 | FPU2, MEM, BR); + + //----------PIPELINE DESCRIPTION----------------------------------------------- + // Pipeline Description specifies the stages in the machine's pipeline + + // IF: fetch + // ID: decode + // RD: read + // CA: caculate + // WB: write back + // CM: commit + + pipe_desc(IF, ID, RD, CA, WB, CM); + + + //----------PIPELINE CLASSES--------------------------------------------------- + // Pipeline Classes describe the stages in which input and output are + // referenced by the hardware pipeline. + + //No.1 Integer ALU reg-reg operation : dst <-- reg1 op reg2 + pipe_class ialu_regI_regI(rRegI dst, rRegI src1, rRegI src2) %{ + single_instruction; + src1 : RD(read); + src2 : RD(read); + dst : WB(write)+1; + DECODE : ID; + ALU : CA; + %} + + //No.19 Integer mult operation : dst <-- reg1 mult reg2 + pipe_class ialu_mult(rRegI dst, rRegI src1, rRegI src2) %{ + src1 : RD(read); + src2 : RD(read); + dst : WB(write)+5; + DECODE : ID; + ALU2 : CA; + %} + + pipe_class mulL_reg_reg(rRegL dst, rRegL src1, rRegL src2) %{ + src1 : RD(read); + src2 : RD(read); + dst : WB(write)+10; + DECODE : ID; + ALU2 : CA; + %} + + pipe_class ialu_mult_imm(rRegI dst, rRegI src1, immU8 src2) %{ + src1 : RD(read); + dst : WB(write)+5; + DECODE : ID; + ALU2 : CA; + %} + + pipe_class mulL_reg_imm(rRegL dst, rRegL src1, immUL8 src2) %{ + src1 : RD(read); + dst : WB(write)+10; + DECODE : ID; + ALU2 : CA; + %} + + //No.19 Integer div operation : dst <-- reg1 div reg2 + pipe_class ialu_div(rRegI dst, rRegI src1, rRegI src2) %{ + src1 : RD(read); + src2 : RD(read); + dst : WB(write)+10; + DECODE : ID; + ALU2 : CA; + %} + + //No.19 Integer mod operation : dst <-- reg1 mod reg2 + pipe_class ialu_mod(rRegI dst, rRegI src1, rRegI src2) %{ + instruction_count(2); + src1 : RD(read); + src2 : RD(read); + dst : WB(write)+10; + DECODE : ID; + ALU2 : CA; + %} + + //No.15 Long ALU reg-reg operation : dst <-- reg1 op reg2 + pipe_class ialu_regL_regL(rRegL dst, rRegL src1, rRegL src2) %{ + instruction_count(2); + src1 : RD(read); + src2 : RD(read); + dst : WB(write); + DECODE : ID; + ALU : CA; + %} + + //No.18 Long ALU reg-imm operation : dst <-- reg1 op immUL8 + pipe_class ialu_regL_imm(rRegL dst, rRegL src) %{ + instruction_count(2); + src : RD(read); + dst : WB(write); + DECODE : ID; + ALU : CA; + %} + + //No.18 Long ALU reg-imm16 operation : dst <-- reg1 op imm16 + pipe_class ialu_regL_imm16(rRegL dst, rRegL src) %{ + instruction_count(2); + src : RD(read); + dst : WB(write); + DECODE : ID; + ALU : CA; + %} + + //no.16 load Long from memory : + pipe_class ialu_loadL(rRegL dst, memory mem) %{ + instruction_count(2); + mem : RD(read); + dst : WB(write)+5; + DECODE : ID; + MEM : RD; + %} + + //No.17 Store Long to Memory : + pipe_class ialu_storeL(rRegL src, memory mem) %{ + instruction_count(2); + mem : RD(read); + src : RD(read); + DECODE : ID; + MEM : RD; + %} + + //No.2 Integer ALU reg-imm16 operation : dst <-- reg1 op imm16 + pipe_class ialu_regI_imm16(rRegI dst, rRegI src) %{ + single_instruction; + src : RD(read); + dst : WB(write); + DECODE : ID; + ALU : CA; + %} + + //No.3 Integer move operation : dst <-- reg + pipe_class ialu_regI_mov(rRegI dst, rRegI src) %{ + src : RD(read); + dst : WB(write); + DECODE : ID; + ALU : CA; + %} + + //No.4 No instructions : do nothing + pipe_class empty( ) %{ + instruction_count(0); + %} + + //No.5 UnConditional branch + pipe_class pipe_jmp( label labl ) %{ + multiple_bundles; + DECODE : ID; + BR : RD; + %} + + //No.6 ALU Conditional branch : + pipe_class pipe_alu_branch(rRegI src1, rRegI src2, label labl ) %{ + multiple_bundles; + src1 : RD(read); + src2 : RD(read); + DECODE : ID; + BR : RD; + %} + + //no.7 load integer from memory : + pipe_class ialu_reg_mem(rRegI dst, memory mem) %{ + mem : RD(read); + dst : WB(write)+3; + DECODE : ID; + MEM : RD; + %} + + //No.8 Store Integer to Memory : + pipe_class ialu_storeI(rRegI src, memory mem) %{ + mem : RD(read); + src : RD(read); + DECODE : ID; + MEM : RD; + %} + + + //No.10 Floating FPU reg-reg operation : dst <-- reg1 op reg2 + pipe_class fpu_regF_regF(regF dst, regF src1, regF src2) %{ + src1 : RD(read); + src2 : RD(read); + dst : WB(write); + DECODE : ID; + FPU : CA; + %} + + //No.22 Floating div operation : dst <-- reg1 div reg2 + pipe_class fpu_div(regF dst, regF src1, regF src2) %{ + src1 : RD(read); + src2 : RD(read); + dst : WB(write); + DECODE : ID; + FPU2 : CA; + %} + + pipe_class fcvt_I2D(regD dst, rRegI src) %{ + src : RD(read); + dst : WB(write); + DECODE : ID; + FPU1 : CA; + %} + + pipe_class fcvt_D2I(rRegI dst, regD src) %{ + src : RD(read); + dst : WB(write); + DECODE : ID; + FPU1 : CA; + %} + + pipe_class pipe_mfc1(rRegI dst, regD src) %{ + src : RD(read); + dst : WB(write); + DECODE : ID; + MEM : RD; + %} + + pipe_class pipe_mtc1(regD dst, rRegI src) %{ + src : RD(read); + dst : WB(write); + DECODE : ID; + MEM : RD(5); + %} + + //No.23 Floating sqrt operation : dst <-- reg1 sqrt reg2 + pipe_class fpu_sqrt(regF dst, regF src1, regF src2) %{ + multiple_bundles; + src1 : RD(read); + src2 : RD(read); + dst : WB(write); + DECODE : ID; + FPU2 : CA; + %} + + //No.11 Load Floating from Memory : + pipe_class fpu_loadF(regF dst, memory mem) %{ + instruction_count(1); + mem : RD(read); + dst : WB(write)+3; + DECODE : ID; + MEM : RD; + %} + + //No.12 Store Floating to Memory : + pipe_class fpu_storeF(regF src, memory mem) %{ + instruction_count(1); + mem : RD(read); + src : RD(read); + DECODE : ID; + MEM : RD; + %} + + //No.13 FPU Conditional branch : + pipe_class pipe_fpu_branch(regF src1, regF src2, label labl ) %{ + multiple_bundles; + src1 : RD(read); + src2 : RD(read); + DECODE : ID; + BR : RD; + %} + +//No.14 Floating FPU reg operation : dst <-- op reg + pipe_class fpu1_regF(regF dst, regF src) %{ + src : RD(read); + dst : WB(write); + DECODE : ID; + FPU : CA; + %} + + pipe_class long_memory_op() %{ + instruction_count(10); multiple_bundles; force_serialization; + fixed_latency(30); + %} + + pipe_class simple_call() %{ + instruction_count(10); multiple_bundles; force_serialization; + fixed_latency(200); + BR : RD; + %} + + pipe_class call() %{ + instruction_count(10); multiple_bundles; force_serialization; + fixed_latency(200); + %} + + //FIXME: + //No.9 Piple slow : for multi-instructions + pipe_class pipe_slow( ) %{ + instruction_count(20); + force_serialization; + multiple_bundles; + fixed_latency(50); + %} + +%} + + +//----------INSTRUCTIONS------------------------------------------------------- +// +// match -- States which machine-independent subtree may be replaced +// by this instruction. +// ins_cost -- The estimated cost of this instruction is used by instruction +// selection to identify a minimum cost tree of machine +// instructions that matches a tree of machine-independent +// instructions. +// format -- A string providing the disassembly for this instruction. +// The value of an instruction's operand may be inserted +// by referring to it with a '$' prefix. +// opcode -- Three instruction opcodes may be provided. These are referred +// to within an encode class as $primary, $secondary, and $tertiary +// rrspectively. The primary opcode is commonly used to +// indicate the type of machine instruction, while secondary +// and tertiary are often used for prefix options or addressing +// modes. +// ins_encode -- A list of encode classes with parameters. The encode class +// name must have been defined in an 'enc_class' specification +// in the encode section of the architecture description. + +//-------- only swjdk8-------- +instruct s4AddLp(rRegP dst, rRegI index, immI_2 dis, rRegP base) %{ + match(Set dst (AddP base (LShiftL (ConvI2L index) dis))); + ins_cost(10); + format %{ " s4addl $index,$base,$dst @ s4AddLp " %} + ins_encode %{ + Register dst = $dst$$Register; + Register op1 = $index$$Register; + Register op2 = $base$$Register; + __ s4addl(op1, op2, dst); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct s8AddLp(rRegP dst, rRegI index, immI_3 scale, rRegP base) %{ + match(Set dst (AddP base (LShiftL (ConvI2L index) scale))); + ins_cost(10); + format %{ " s8addl $index,$base,$dst @ s8AddLp " %} + ins_encode %{ + Register dst = $dst$$Register; + Register op1 = $index$$Register; + Register op2 = $base$$Register; + __ s8addl(op1, op2, dst); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct s4AddWp(rRegI dst, rRegI index, immI_2 scale, rRegI base) %{ + match(Set dst (AddI base (LShiftI index scale))); + ins_cost(10); + format %{ " s4addw $index,$base,$dst @ s4AddWp " %} + ins_encode %{ + Register dst = $dst$$Register; + Register op1 = $index$$Register; + Register op2 = $base$$Register; + __ s4addw(op1, op2, dst); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct s8AddWp(rRegI dst, rRegI index, immI_3 scale, rRegI base) %{ + match(Set dst (AddI base (LShiftI index scale))); + ins_cost(10); + format %{ " s8addw $index,$base,$dst @ s8AddWp " %} + ins_encode %{ + Register dst = $dst$$Register; + Register op1 = $index$$Register; + Register op2 = $base$$Register; + __ s8addw(op1, op2, dst); + %} + ins_pipe( ialu_regI_regI ); +%} +//---------------------- + +//----------Load/Store/Move Instructions--------------------------------------- +//----------Load Instructions-------------------------------------------------- + +// Load Byte (8 bit signed) +instruct loadB(rRegI dst, memory mem) +%{ + match(Set dst (LoadB mem)); + + ins_cost(125); + format %{ "ldbu $dst, $mem\t# byte\t@loadB\n\t" + "\tsextb $dst, $dst" + %} + + ins_encode %{ + __ ldbu ($dst$$Register, $mem$$Address); + __ sextb($dst$$Register, $dst$$Register); + %} + + ins_pipe(ialu_reg_mem); +%} + +// Load Byte (8 bit signed) into Long Register +instruct loadB2L(rRegL dst, memory mem) +%{ + match(Set dst (ConvI2L (LoadB mem))); + + ins_cost(125); + format %{ "ldbu $dst, $mem\t# byte -> long\t@loadB2L\n\t" + "\tsextb $dst, $dst" %} + + ins_encode %{ + __ ldbu ($dst$$Register, $mem$$Address); + __ sextb($dst$$Register, $dst$$Register); + %} + + ins_pipe(ialu_reg_mem); +%} + +// Load Unsigned Byte (8 bit UNsigned) +instruct loadUB(rRegI dst, memory mem) +%{ + match(Set dst (LoadUB mem)); + + ins_cost(125); + format %{ "ldbu $dst, $mem\t# ubyte\t@loadUB" %} + + ins_encode %{ + __ ldbu($dst$$Register, $mem$$Address); + %} + + ins_pipe(ialu_reg_mem); +%} + +// Load Unsigned Byte (8 bit UNsigned) into Long Register +instruct loadUB2L(rRegL dst, memory mem) +%{ + match(Set dst (ConvI2L (LoadUB mem))); + + ins_cost(125); + format %{ "ldbu $dst, $mem\t# ubyte -> long\t@loadUB2L" %} + + ins_encode %{ + __ ldbu($dst$$Register, $mem$$Address); + %} + + ins_pipe(ialu_reg_mem); +%} + +// Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register +instruct loadUB2L_immI(rRegL dst, memory mem, immI mask) %{ + match(Set dst (ConvI2L (AndI (LoadUB mem) mask))); + //effect(KILL cr); + + format %{ "ldbu $dst, $mem\t# ubyte & 32-bit mask -> long\t@loadUB2L_immI\n\t" + "andw $dst, right_n_bits($mask, 8)" %} + ins_encode %{ + Register Rdst = $dst$$Register; + __ ldbu(Rdst, $mem$$Address); + __ andw(Rdst, $mask$$constant & right_n_bits(8), Rdst); + %} + ins_pipe(ialu_reg_mem); +%} + +// Load Short (16 bit signed) +instruct loadS(rRegI dst, memory mem) +%{ + match(Set dst (LoadS mem)); + + ins_cost(125); + format %{ "ldhu $dst, $mem\t# short\t@loadS\n\t" + "sexth $dst, $dst" %} + + ins_encode %{ + __ ldhu ($dst$$Register, $mem$$Address); + __ sexth($dst$$Register, $dst$$Register); + %} + + ins_pipe(ialu_reg_mem); +%} + +// Load Short (16 bit signed) to Byte (8 bit signed) +instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ + match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); + + ins_cost(125); + format %{ "ldbu $dst, $mem\t# short -> byte\t@loadS2B\n\t" + "sextb $dst, $dst" %} + ins_encode %{ + __ ldbu ($dst$$Register, $mem$$Address); + __ sextb($dst$$Register, $dst$$Register); + %} + ins_pipe(ialu_reg_mem); +%} + +// Load Short (16 bit signed) into Long Register +instruct loadS2L(rRegL dst, memory mem) +%{ + match(Set dst (ConvI2L (LoadS mem))); + + ins_cost(125); + format %{ "ldhu $dst, $mem\t# short\t@loadS2L\n\t" + "sexth $dst, $dst" %} + + ins_encode %{ + __ ldhu ($dst$$Register, $mem$$Address); + __ sexth($dst$$Register, $dst$$Register); + %} + + ins_pipe(ialu_reg_mem); +%} + +// Load Unsigned Short/Char (16 bit UNsigned) +instruct loadUS(rRegI dst, memory mem) +%{ + match(Set dst (LoadUS mem)); + + ins_cost(125); + format %{ "ldhu $dst, $mem\t# ushort/char\t@loadUS" %} + + ins_encode %{ + __ ldhu($dst$$Register, $mem$$Address); + %} + + ins_pipe(ialu_reg_mem); +%} + +// Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed) +instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ + match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour)); + + ins_cost(125); + format %{ "ldbu $dst, $mem\t# ushort -> byte\t@loadUS2B\n\t" + "sextb $dst, $mem" %} + ins_encode %{ + __ ldbu ($dst$$Register, $mem$$Address); + __ sextb($dst$$Register, $dst$$Register); + %} + ins_pipe(ialu_reg_mem); +%} + +// Load Unsigned Short/Char (16 bit UNsigned) into Long Register +instruct loadUS2L(rRegL dst, memory mem) +%{ + match(Set dst (ConvI2L (LoadUS mem))); + + ins_cost(125); + format %{ "ldhu $dst, $mem\t# ushort/char -> long\t@loadUS2L" %} + + ins_encode %{ + __ ldhu($dst$$Register, $mem$$Address); + %} + + ins_pipe(ialu_reg_mem); +%} + +// Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register +instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{ + match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); + + format %{ "ldbu $dst, $mem\t# ushort/char & 0xFF -> long\t@loadUS2L_immI_255" %} + ins_encode %{ + __ ldbu($dst$$Register, $mem$$Address); + %} + ins_pipe(ialu_reg_mem); +%} + +// Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register +instruct loadUS2L_immI(rRegL dst, memory mem, immI mask) %{ + match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); + //effect(KILL cr); + + format %{ "ldhu $dst, $mem\t# ushort/char & 32-bit mask -> long\t@loadUS2L_immI\n\t" + "andw $dst, right_n_bits($mask, 16), $dst" %} + ins_encode %{ + Register Rdst = $dst$$Register; + __ ldhu(Rdst, $mem$$Address); + __ andw(Rdst, $mask$$constant & right_n_bits(16), Rdst); + %} + ins_pipe(ialu_reg_mem); +%} + +// Load Integer +instruct loadI(rRegI dst, memory mem) +%{ + match(Set dst (LoadI mem)); + + ins_cost(125); + format %{ "ldws $dst, $mem\t# int\t@loadI" %} + + ins_encode %{ + __ ldws($dst$$Register, $mem$$Address); + %} + + ins_pipe(ialu_reg_mem); +%} + +// Load Integer (32 bit signed) to Byte (8 bit signed) +instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{ + match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); + + ins_cost(125); + format %{ "ldbu $dst, $mem\t# int -> byte\t@loadI2B\n\t" + "sextb $dst, $dst" %} + ins_encode %{ + __ ldbu($dst$$Register, $mem$$Address); + __ sextb($dst$$Register, $dst$$Register); + %} + ins_pipe(ialu_reg_mem); +%} + +// Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) +instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{ + match(Set dst (AndI (LoadI mem) mask)); + + ins_cost(125); + format %{ "ldbu $dst, $mem\t# int -> ubyte\t@loadI2UB" %} + ins_encode %{ + __ ldbu($dst$$Register, $mem$$Address); + %} + ins_pipe(ialu_reg_mem); +%} + +// Load Integer (32 bit signed) to Short (16 bit signed) +instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{ + match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); + + ins_cost(125); + format %{ "ldhu $dst, $mem\t# int -> short\t@loadI2S\n\t" + "sexth $dst, $dst" %} + ins_encode %{ + __ ldhu ($dst$$Register, $mem$$Address); + __ sexth($dst$$Register, $dst$$Register); + %} + ins_pipe(ialu_reg_mem); +%} + +// Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) +instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{ + match(Set dst (AndI (LoadI mem) mask)); + + ins_cost(125); + format %{ "ldhu $dst, $mem\t# int -> ushort/char\t@loadI2US" %} + ins_encode %{ + __ ldhu($dst$$Register, $mem$$Address); + %} + ins_pipe(ialu_reg_mem); +%} + +// Load Integer into Long Register +instruct loadI2L(rRegL dst, memory mem) +%{ + match(Set dst (ConvI2L (LoadI mem))); + + ins_cost(100); + format %{ "ldws $dst, $mem\t# int -> long\t@loadI2L" %} + + ins_encode %{ + __ ldws($dst$$Register, $mem$$Address); + %} + + ins_pipe(ialu_reg_mem); +%} + +// Load Integer with mask 0xFF into Long Register +instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{ + match(Set dst (ConvI2L (AndI (LoadI mem) mask))); + + format %{ "ldbu $dst, $mem\t# int & 0xFF -> long\t@loadI2L_immI_255" %} + ins_encode %{ + __ ldbu($dst$$Register, $mem$$Address); + %} + ins_pipe(ialu_reg_mem); +%} + +// Load Integer with mask 0xFFFF into Long Register +instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{ + match(Set dst (ConvI2L (AndI (LoadI mem) mask))); + + format %{ "ldhu $dst, $mem\t# int & 0xFFFF -> long\t@loadI2L_immI_65535" %} + ins_encode %{ + __ ldhu($dst$$Register, $mem$$Address); + %} + ins_pipe(ialu_reg_mem); +%} + +// Load Integer with a 31-bit mask into Long Register TODO:jzy mask length s is OK? andw's immediate length -si 8-bit +instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask) %{ + match(Set dst (ConvI2L (AndI (LoadI mem) mask))); + //effect(KILL cr); + + format %{ "ldwu $dst, $mem\t# int & 31-bit mask -> long\t@loadI2L_immU31\n\t" + "andw $dst, $mask, $dst" %} + ins_encode %{ + Register Rdst = $dst$$Register; + __ ldw(Rdst, $mem$$Address); + __ andw(Rdst, $mask$$constant, Rdst); + %} + ins_pipe(ialu_reg_mem); +%} + +// Load Unsigned Integer into Long Register +instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask) +%{ + match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); + + ins_cost(125); + format %{ "ldwu $dst, $mem\t# uint -> long\t@loadUI2L" %} + + ins_encode %{ + __ ldwu($dst$$Register, $mem$$Address); + %} + + ins_pipe(ialu_reg_mem); +%} + +// Load Long +//TODO implicit null check LSP +instruct loadL(rRegL dst, memory mem) +%{ + match(Set dst (LoadL mem)); + + ins_cost(125); + format %{ "ldl $dst, $mem\t# long\t@loadL" %} + + ins_encode %{ + __ ldl($dst$$Register, $mem$$Address); + %} + + ins_pipe(ialu_reg_mem); // XXX +%} + +// Load Long - UNaligned +instruct loadL_unaligned(rRegL dst, memory mem) +%{ + match(Set dst (LoadL_unaligned mem)); + + // FIXME: Need more effective ldl/ldr + ins_cost(450); + format %{ "loadL_unaligned $dst, $mem #@loadL_unaligned" %} + ins_encode %{ + __ ldl($dst$$Register, $mem$$Address); + %} + ins_pipe( ialu_loadL ); +%} + +// Load Range +//TODO CHECK LSP +instruct loadRange(rRegI dst, memory mem) +%{ + match(Set dst (LoadRange mem)); + + ins_cost(125); // XXX + format %{ "ldws $dst, $mem\t# range\t@loadRange" %} + ins_encode %{ + __ ldws($dst$$Register, $mem$$Address); + %} + ins_pipe(ialu_reg_mem); +%} + +// Load Pointer +instruct loadP(rRegP dst, memory mem) +%{ + match(Set dst (LoadP mem)); + predicate(n->as_Load()->barrier_data() == 0); + + ins_cost(125); // XXX + format %{ "ldptr $dst, $mem\t# ptr\t@loadP" %} + ins_encode (load_P_enc(dst, mem)); + ins_pipe(ialu_reg_mem); // XXX +%} + +// Load Compressed Pointer +instruct loadN(rRegN dst, memory mem) +%{ + match(Set dst (LoadN mem)); + + ins_cost(125); // XXX + format %{ "ldwu $dst, $mem\t# compressed ptr\t@loadN" %} + ins_encode (load_N_enc(dst, mem)); + ins_pipe(ialu_reg_mem); // XXX +%} + + +// Load Klass Pointer +instruct loadKlass(rRegP dst, memory mem) +%{ + match(Set dst (LoadKlass mem)); + + ins_cost(125); // XXX + format %{ "ldptr $dst, $mem\t# class\t@loadKlass" %} + ins_encode (load_P_enc(dst, mem)); + ins_pipe(ialu_reg_mem); // XXX +%} + +// Load narrow Klass Pointer +instruct loadNKlass(rRegN dst, memory mem) +%{ + match(Set dst (LoadNKlass mem)); + + ins_cost(125); // XXX + format %{ "ldwu $dst, $mem\t# compressed klass ptr\t@loadNKlass" %} + ins_encode (load_N_enc(dst, mem)); + ins_pipe(ialu_reg_mem); // XXX +%} + +// Load Float +instruct loadF(regF dst, memory mem) +%{ + match(Set dst (LoadF mem)); + //effect(KILL rscratch1_GP); + + ins_cost(145); // XXX + format %{ "load_float $dst, $mem\t# float\t@loadF" %} + ins_encode %{ + __ load_float($dst$$FloatRegister, $mem$$Address); + %} + ins_pipe(ialu_reg_mem); // XXX +%} + +// Load Float TODO:jzy 4 bytes? +instruct MoveF2VL(regF dst, regF src) %{ + match(Set dst src); + format %{ "fcpys $src, $src, $dst\t! load float (4 bytes)\t@MoveF2VL" %} + ins_encode %{ + __ fcpys($src$$FloatRegister, $src$$FloatRegister, $dst$$FloatRegister); + %} + ins_pipe( fpu_regF_regF ); +%} + +// Load Float TODO: jzy need this? +instruct MoveVL2F(regF dst, regF src) %{ + match(Set dst src); + format %{ "fcpys $src, $src, $dst\t! load float (4 bytes)\t@MoveVL2F" %} + ins_encode %{ + __ fcpys($src$$FloatRegister, $src$$FloatRegister, $dst$$FloatRegister); + %} + ins_pipe( fpu_regF_regF); +%} + +// Load Double +/*instruct loadD_partial(regD dst, memory mem) +%{ +// predicate(!UseXmmLoadAndClearUpper); + match(Set dst (LoadD mem)); + + ins_cost(145); // XXX + format %{ "movlpd $dst, $mem\t# double" %} + ins_encode %{ +// __ movdbl($dst$$XMMRegister, $mem$$Address); + %} + ins_pipe(ialu_reg_mem); // XXX +%}*/ + +instruct loadD(regD dst, memory mem) +%{ +// predicate(UseXmmLoadAndClearUpper); + match(Set dst (LoadD mem)); + + ins_cost(145); // XXX + format %{ "load_double $dst, $mem\t# double\t@loadD" %} + ins_encode %{ + __ load_double($dst$$FloatRegister, $mem$$Address); + %} + ins_pipe(ialu_reg_mem); // XXX +%} + +instruct loadD_unaligned(regD dst, memory mem ) %{ + match(Set dst (LoadD_unaligned mem)); + ins_cost(250); + // FIXME: Need more effective ldl/ldr + ins_encode %{ + __ load_double($dst$$FloatRegister, $mem$$Address); + %} + ins_pipe( ialu_reg_mem ); +%} + +// Load Double +// TODO CHECK LSP +instruct MoveD2VL(regD dst, regD src) %{ + match(Set dst src); + format %{ "fcpys $src, $src, $dst\t! load double (8 bytes)\t@MoveD2VL" %} + ins_encode %{ + __ fcpys($src$$FloatRegister, $src$$FloatRegister, $dst$$FloatRegister); + %} + ins_pipe( fpu_regF_regF ); +%} + +// Load Double +// TODO CHECK LSP +instruct MoveVL2D(regD dst, regD src) %{ + match(Set dst src); + format %{ "fcpys $src, $src, $dst\t! load double (8 bytes)\t@MoveVL2D" %} + ins_encode %{ + __ fcpys($src$$FloatRegister, $src$$FloatRegister, $dst$$FloatRegister); + %} + ins_pipe( fpu_regF_regF ); +%} + +// Load Effective Address +instruct leaP16(rRegP dst, indOffset16 mem) +%{ + match(Set dst mem); + + ins_cost(110); // XXX + format %{ "lea $dst, $mem\t# ptr 16\t@leaP16" %} + ins_encode %{ + __ lea($dst$$Register, $mem$$Address); + %} + ins_pipe(ialu_regI_mov); //TODO check +%} + +//instruct leaP32(rRegP dst, indOffset32 mem) +//%{ +// match(Set dst mem); +// +// ins_cost(110); +// format %{ "lea $dst, $mem\t# ptr 32\t@leaP32" %} +// ins_encode %{ +// __ lea($dst$$Register, $mem$$Address); +// %} +// ins_pipe(ialu_regI_mov);//TODO check +//%} + +instruct leaPIdxOff(rRegP dst, indIndexOffset mem) +%{ + match(Set dst mem); + + ins_cost(110); + format %{ "lea $dst, $mem\t# ptr idxoff\t@leaPIdxOff" %} + ins_encode %{ + __ lea($dst$$Register, $mem$$Address); + %} + ins_pipe(ialu_regI_mov); +%} + +instruct leaPIdxScale(rRegP dst, indIndexScale mem) +%{ + match(Set dst mem); + + ins_cost(110); + format %{ "lea $dst, $mem\t# ptr idxscale\t@leaPIdxScale" %} + ins_encode %{ + __ lea($dst$$Register, $mem$$Address); + %} + ins_pipe(ialu_regI_mov); +%} + +instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem) +%{ + match(Set dst mem); + + ins_cost(110); + format %{ "lea $dst, $mem\t# ptr idxscale\t@leaPPosIdxScale" %} + ins_encode %{ + __ lea($dst$$Register, $mem$$Address); + %} + ins_pipe(ialu_regI_mov); +%} + +instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem) +%{ + match(Set dst mem); + + ins_cost(110); + format %{ "lea $dst, $mem\t# ptr idxscaleoff\t@leaPIdxScaleOff" %} + ins_encode %{ + __ lea($dst$$Register, $mem$$Address); + %} + ins_pipe(ialu_regI_mov); +%} + +instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem) +%{ + match(Set dst mem); + + ins_cost(110); + format %{ "lea $dst, $mem\t# ptr posidxoff\t@leaPPosIdxOff" %} + ins_encode %{ + __ lea($dst$$Register, $mem$$Address); + %} + ins_pipe(ialu_regI_mov); +%} + +instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem) +%{ + match(Set dst mem); + + ins_cost(110); + format %{ "lea $dst, $mem\t# ptr posidxscaleoff\t@leaPPosIdxScaleOff" %} + ins_encode %{ + __ lea($dst$$Register, $mem$$Address); + %} + ins_pipe(ialu_regI_mov); +%} + +// Load Effective Address which uses Narrow (32-bits) oop +instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem) +%{ + predicate(UseCompressedOops && (CompressedOops::shift() != 0)); + match(Set dst mem); + + ins_cost(110); + format %{ "lea $dst, $mem\t# ptr compressedoopoff32\t@leaPCompressedOopOffset" %} + ins_encode %{ + __ lea($dst$$Register, $mem$$Address); + %} + ins_pipe(ialu_regI_mov); +%} + +instruct leaP16Narrow(rRegP dst, indOffset16Narrow mem) +%{ + predicate(CompressedOops::shift() == 0); + match(Set dst mem); + + ins_cost(110); // XXX + format %{ "lea $dst, $mem\t# ptr off8narrow\t@leaP8Narrow" %} + ins_encode %{ + __ lea($dst$$Register, $mem$$Address); + %} + ins_pipe(ialu_regI_mov); +%} + +//instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem) +//%{ +// predicate(CompressedOops::shift() == 0); +// match(Set dst mem); +// +// ins_cost(110); +// format %{ "lea $dst, $mem\t# ptr off32narrow\t@leaP32Narrow" %} +// ins_encode %{ +// __ lea($dst$$Register, $mem$$Address); +// %} +// ins_pipe(ialu_regI_mov); +//%} + +instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem) +%{ + predicate(CompressedOops::shift() == 0); + match(Set dst mem); + + ins_cost(110); + format %{ "lea $dst, $mem\t# ptr idxoffnarrow\t@leaPIdxOffNarrow" %} + ins_encode %{ + __ lea($dst$$Register, $mem$$Address); + %} + ins_pipe(ialu_regI_mov); +%} + +instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem) +%{ + predicate(CompressedOops::shift() == 0); + match(Set dst mem); + + ins_cost(110); + format %{ "lea $dst, $mem\t# ptr idxscalenarrow\t@leaPIdxScaleNarrow" %} + ins_encode %{ + __ lea($dst$$Register, $mem$$Address); + %} + ins_pipe(ialu_regI_mov); +%} + +instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem) +%{ + predicate(CompressedOops::shift() == 0); + match(Set dst mem); + + ins_cost(110); + format %{ "lea $dst, $mem\t# ptr idxscaleoffnarrow\t@leaPIdxScaleOffNarrow" %} + ins_encode %{ + __ lea($dst$$Register, $mem$$Address); + %} + ins_pipe(ialu_regI_mov); +%} + +instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem) +%{ + predicate(CompressedOops::shift() == 0); + match(Set dst mem); + + ins_cost(110); + format %{ "lea $dst, $mem\t# ptr posidxoffnarrow\t@leaPPosIdxOffNarrow" %} + ins_encode %{ + __ lea($dst$$Register, $mem$$Address); + %} + ins_pipe(ialu_regI_mov); +%} + +instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem) +%{ + predicate(CompressedOops::shift() == 0); + match(Set dst mem); + + ins_cost(110); + format %{ "lea $dst, $mem\t# ptr posidxscaleoffnarrow\t@leaPPosIdxScaleOffNarrow" %} + ins_encode %{ + __ lea($dst$$Register, $mem$$Address); + %} + ins_pipe(ialu_regI_mov); +%} + +instruct loadConI(rRegI dst, immI src) +%{ + match(Set dst src); + + format %{ "mov_immediate32s $dst, $src\t# int\t@loadConI" %} + ins_encode %{ + __ mov_immediate32s($dst$$Register, $src$$constant); + %} + ins_pipe(ialu_regI_regI); +%} + +instruct loadConI0(rRegI dst, immI0 src) +%{ + match(Set dst src); + + ins_cost(50); + format %{ "movl $dst, R0\t# int\t@loadConI0" %} + ins_encode %{ + __ movl($dst$$Register, R0); + %} + ins_pipe(ialu_regI_regI); +%} + +instruct loadConL(rRegL dst, immL src) +%{ + match(Set dst src); + + ins_cost(150); + format %{ "mov_immediate64 $dst, $src\t# long\t@loadConL" %} + ins_encode %{ + __ mov_immediate64($dst$$Register, $src$$constant); + %} + ins_pipe(ialu_regL_regL); +%} + +instruct loadConL0(rRegL dst, immL0 src) +%{ + match(Set dst src); + + ins_cost(50); + format %{ "movl $dst, R0\t# int\t@loadConL0" %} + ins_encode %{ + __ movl($dst$$Register, R0); + %} + ins_pipe(ialu_regL_regL); +%} + +//instruct loadConUL32(rRegL dst, immUL32 src) +//%{ +// match(Set dst src); +// +// ins_cost(60); +// format %{ "mov_immediate32u $dst, $src\t# long (unsigned 32-bit)\t@loadConUL32" %} +// ins_encode %{ +// __ mov_immediate32u($dst$$Register, $src$$constant); +// %} +// ins_pipe(ialu_regL_regL); +//%} + + +instruct loadConL32(rRegL dst, immL32 src) +%{ + match(Set dst src); + + ins_cost(70); + format %{ "mov_immediate32s $dst, $src\t# long (32-bit)\t@loadConL32" %} + ins_encode %{ + __ mov_immediate32s($dst$$Register, (int)$src$$constant); + %} + ins_pipe(ialu_regL_regL); +%} + +//use in swjdk8 need to check lsp? +instruct loadConL16(rRegL dst, immL16 src) +%{ + match(Set dst src); + + ins_cost(70); + format %{ "ldi $dst, $src, R0\t# long (16-bit)\t@loadConL16" %} + ins_encode %{ + __ ldi($dst$$Register, (int)$src$$constant, R0); + %} + ins_pipe(ialu_regL_regL); +%} + +instruct loadConP(rRegP dst, immP con) %{ + match(Set dst con); + + format %{ "mov_immediate64 $dst, $con\t# ptr\t@loadConP" %} + ins_encode %{ + Register dst = $dst$$Register; + long* value = (long*)$con$$constant; + + if($con->constant_reloc() == relocInfo::metadata_type){ + int klass_index = __ oop_recorder()->find_index((Klass*)value); + RelocationHolder rspec = metadata_Relocation::spec(klass_index); + + __ relocate(rspec); + __ prepare_patch_li48(dst, (long)value); + }else if($con->constant_reloc() == relocInfo::oop_type){ + int oop_index = __ oop_recorder()->find_index((jobject)value); + RelocationHolder rspec = oop_Relocation::spec(oop_index); + + __ relocate(rspec); + __ prepare_patch_li48(dst, (long)value); + } else if ($con->constant_reloc() == relocInfo::none) { + __ mov_immediate64(dst, (long)value); + } + %} + ins_pipe(ialu_regL_regL); // XXX +%} + +instruct loadConP0(rRegP dst, immP0 src) +%{ + match(Set dst src); + + ins_cost(50); + format %{ "movl $dst, R0\t# ptr\t@loadConP0" %} + ins_encode %{ + __ movl($dst$$Register, R0); + %} + ins_pipe(ialu_regL_regL); +%} + +instruct loadConP31(rRegP dst, immP31 src) +%{ + match(Set dst src); + + ins_cost(60); + format %{ "mov_immediate32u $dst, $src\t# ptr (positive 32-bit)\t@loadConP31" %} + ins_encode %{ + __ mov_immediate32u($dst$$Register, $src$$constant); + %} + ins_pipe(ialu_regL_regL); +%} + +instruct loadConP_poll(rRegP dst, immP_poll src) %{ + match(Set dst src); + + ins_cost(50); + format %{ "mov_immediate64 $dst, $src #@loadConP_poll" %} + + ins_encode %{ + Register dst = $dst$$Register; + intptr_t value = (intptr_t)$src$$constant; + + __ mov_immediate64(dst, (long)value); + %} + + ins_pipe( ialu_regI_regI ); +%} + +instruct loadConF(regF dst, immF con) %{ + match(Set dst con); + ins_cost(125); + format %{ "load_float $dst, [$constantaddress]\t# load from constant table: float=$con\t@loadConF" %} + ins_encode %{ + __ load_float($dst$$FloatRegister, $constantaddress($con)); + %} + ins_pipe(pipe_slow); +%} +//TODO:jzy which is immN0? +instruct loadConN0(rRegN dst, immN0 src) %{ + match(Set dst src); + + format %{ "movl $dst, $src\t# compressed NULL ptr\t@loadConN0" %} + ins_encode %{ + __ movl($dst$$Register, R0); + %} + ins_pipe(ialu_regI_regI); +%} +//TODO:jzy compressed ptr? +instruct loadConN(rRegN dst, immN src) %{ + match(Set dst src); + + ins_cost(125); + format %{ "set_narrow_oop $dst, $src\t# compressed ptr\t@loadConN" %} + ins_encode %{ + address con = (address)$src$$constant; + if (con == NULL) { + ShouldNotReachHere(); + } else { + __ set_narrow_oop($dst$$Register, (jobject)$src$$constant); + } + %} + ins_pipe(ialu_regI_regI); // XXX +%} + +instruct loadConNKlass(rRegN dst, immNKlass src) %{ + match(Set dst src); + + ins_cost(125); + format %{ "set_narrow_klass $dst, $src\t# compressed klass ptr\t@loadConNKlass" %} + ins_encode %{ + address con = (address)$src$$constant; + if (con == NULL) { + ShouldNotReachHere(); + } else { + __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant); + } + %} + ins_pipe(ialu_regI_regI); // XXX +%} + +instruct loadConF0(regF dst, immF0 src) +%{ + match(Set dst src); + ins_cost(100); + + format %{ "fcpys f31, f31, $dst\t# float 0.0\t@loadConF0" %} + ins_encode %{ + __ fcpys(f31, f31, $dst$$FloatRegister); + %} + ins_pipe(pipe_slow); +%} + +// Use the same format since predicate() can not be used here. +instruct loadConD(regD dst, immD con) %{ + match(Set dst con); + ins_cost(125); + format %{ "load_double $dst, [$constantaddress]\t# load from constant table: double=$con\t@loadConD" %} + ins_encode %{ + __ load_double($dst$$FloatRegister, $constantaddress($con)); + %} + ins_pipe(fpu_loadF); +%} + +instruct loadConD0(regD dst, immD0 src) +%{ + match(Set dst src); + ins_cost(100); + + format %{ "fcpys f31, f31, $dst\t# double 0.0\t@loadConD0" %} + ins_encode %{ + __ fcpys(f31, f31, $dst$$FloatRegister); + %} + ins_pipe(fpu_loadF); +%} + +instruct loadSSI(rRegI dst, stackSlotI src) +%{ + match(Set dst src); + + ins_cost(125); + format %{ "ldws $dst, $src\t# int stk\t@loadSSI" %} + ins_encode %{ + __ ldws($dst$$Register, Address(esp, $src$$disp)); + %} + ins_pipe(ialu_reg_mem); +%} + +instruct loadSSL(rRegL dst, stackSlotL src) +%{ + match(Set dst src); + + ins_cost(125); + format %{ "ldl $dst, $src\t# long stk\t@loadSSL" %} + ins_encode %{ + __ ldl($dst$$Register, Address(esp, $src$$disp)); + %} + ins_pipe(ialu_reg_mem); +%} + +instruct loadSSP(rRegP dst, stackSlotP src) +%{ + match(Set dst src); + + ins_cost(125); + format %{ "ldl $dst, $src\t# ptr stk\t@loadSSP" %} + ins_encode %{ + __ ldl($dst$$Register, Address(esp, $src$$disp)); + %} + ins_pipe(ialu_reg_mem); +%} + +instruct loadSSF(regF dst, stackSlotF src) +%{ + match(Set dst src); + + ins_cost(125); + format %{ "load_float $dst, $src\t# float stk\t@loadSSF" %} + ins_encode %{ + __ load_float($dst$$FloatRegister, Address(esp, $src$$disp)); + %} + ins_pipe(pipe_slow); // XXX +%} + +// Use the same format since predicate() can not be used here. +instruct loadSSD(regD dst, stackSlotD src) +%{ + match(Set dst src); + + ins_cost(125); + format %{ "load_double $dst, $src\t# double stk\t@loadSSD" %} + ins_encode %{ + __ load_double($dst$$FloatRegister, Address(esp, $src$$disp)); + %} + ins_pipe(pipe_slow); // XXX +%} + +// Prefetch instructions for allocation. +// Must be safe to execute with invalid address (cannot fault). + +instruct prefetchAlloc( memory mem ) %{ + predicate(AllocatePrefetchInstr==3); + match(PrefetchAllocation mem); + ins_cost(125); + + format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %} + ins_encode %{ +// __ prefetchw($mem$$Address); + %} + ins_pipe(pipe_slow); +%} + +instruct prefetchAllocNTA( memory mem ) %{ + predicate(AllocatePrefetchInstr==0); + match(PrefetchAllocation mem); + ins_cost(125); + + format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %} + ins_encode %{ + Register dst = R0; + __ load_float(f31, $mem$$Address); // fillde + %} + ins_pipe(pipe_slow); +%} + +instruct prefetchAllocT0( memory mem ) %{ + predicate(AllocatePrefetchInstr==1); + match(PrefetchAllocation mem); + ins_cost(125); + + format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %} + ins_encode %{ +// __ prefetcht0($mem$$Address); + %} + ins_pipe(pipe_slow); +%} + +instruct prefetchAllocT2( memory mem ) %{ + predicate(AllocatePrefetchInstr==2); + match(PrefetchAllocation mem); + ins_cost(125); + + format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %} + ins_encode %{ +// __ prefetcht2($mem$$Address); + %} + ins_pipe(pipe_slow); +%} + +//----------Store Instructions------------------------------------------------- + +// Store Byte +instruct storeB(memory mem, rRegI src) +%{ + match(Set mem (StoreB mem src)); + + ins_cost(125); // XXX + format %{ "stb $src, $mem\t# byte\t@storeB" %} + ins_encode %{ + __ stb($src$$Register, $mem$$Address); + %} + ins_pipe(ialu_storeI); +%} + +// Store Char/Short +instruct storeC(memory mem, rRegI src) +%{ + match(Set mem (StoreC mem src)); + + ins_cost(125); // XXX + format %{ "sth $src, $mem\t# char/short\t@storeC" %} + ins_encode %{ + __ sth($src$$Register, $mem$$Address); + %} + ins_pipe(ialu_storeI); +%} + +// Store Integer +instruct storeI(memory mem, rRegI src) +%{ + match(Set mem (StoreI mem src)); + + ins_cost(125); // XXX + format %{ "stw $src, $mem\t# int\t@storeI" %} + + ins_encode %{ + __ stw($src$$Register, $mem$$Address); + %} + ins_pipe(ialu_storeI); +%} + +// Store Long +//TODO implicit null check LSP +instruct storeL(memory mem, rRegL src) +%{ + match(Set mem (StoreL mem src)); + + ins_cost(125); // XXX + format %{ "stl $src, $mem\t# long\t@storeL" %} + + ins_encode %{ + __ stl($src$$Register, $mem$$Address); + %} + ins_pipe(ialu_storeI); // XXX +%} + +// Store Pointer +instruct storeP(memory mem, any_RegP src) +%{ + match(Set mem (StoreP mem src)); + + ins_cost(125); // XXX + format %{ "stl $src, $mem\t# ptr\t@storeP" %} + ins_encode %{ + __ stl($src$$Register, $mem$$Address); + %} + ins_pipe(ialu_storeI); +%} + +instruct storeImmP0(memory mem, immP0 zero) +%{ + predicate(UseCompressedOops && (CompressedOops::base() == NULL)); + match(Set mem (StoreP mem zero)); + + ins_cost(125); // XXX + format %{ "stl S5, $mem\t# ptr (rheapbase==0)\t@storeImmP0" %} + + ins_encode %{ +//TODO:where set rheapbase? jzy + __ stl(rheapbase, $mem$$Address); + %} + + ins_pipe(ialu_storeI); +%} + +/*no immdiate operand in swjdk8 +// Store NULL Pointer, mark word, or other simple pointer constant. TODO:jzy immP31 is NULL +instruct storeImmP(memory mem, immP31 src) +%{ + match(Set mem (StoreP mem src)); + + ins_cost(150); // XXX + format %{"movwu rscratch3, $src\t# ptr\t@storeImmP" + "stl rscratch3, $mem" %} + + ins_encode %{ + __ mov_immediate32(rscratch3, $src$$constant); + __ stl(rscratch3, $mem$$Address); + %} + ins_pipe(ialu_storeI); +%}*/ + +// Store Compressed Pointer +instruct storeN(memory mem, rRegN src) +%{ + match(Set mem (StoreN mem src)); + + ins_cost(125); // XXX + format %{ "stw $src, $mem\t# compressed ptr\t@storeN" %} + ins_encode %{ + __ stw($src$$Register, $mem$$Address); + %} + ins_pipe(ialu_storeI); +%} + +instruct storeNKlass(memory mem, rRegN src) +%{ + match(Set mem (StoreNKlass mem src)); + + ins_cost(125); // XXX + format %{ "stw $src, $mem\t# compressed klass ptr\t@storeNKlass" %} + ins_encode %{ + __ stw($src$$Register, $mem$$Address); + %} + ins_pipe(ialu_storeI); +%} + +instruct storeImmN0(memory mem, immN0 zero) +%{ + predicate(CompressedOops::base() == NULL); + match(Set mem (StoreN mem zero)); + + ins_cost(125); // XXX + format %{ "stw $mem, rheapbase\t# compressed ptr (R12_heapbase==0)\t@storeImmN0" %} + ins_encode %{ + __ stw(rheapbase, $mem$$Address); + %} + ins_pipe(ialu_storeI); +%} + +/*no immdiate operand in swjdk8 +instruct storeImmN(memory mem, immN src) +%{ + match(Set mem (StoreN mem src)); + + ins_cost(150); // XXX + format %{ "set_narrow_oop $src, $mem\t# compressed ptr\t@storeImmN" %} + ins_encode %{ + address con = (address)$src$$constant; + if (con == NULL) { + __ stw(R0, $mem$$Address); + } else { + __ set_narrow_oop($mem$$Address, (jobject)$src$$constant); + } + %} + ins_pipe(ialu_storeI); +%} + +instruct storeImmNKlass(memory mem, immNKlass src) +%{ + match(Set mem (StoreNKlass mem src)); + + ins_cost(150); // XXX + format %{ "set_narrow_klass $src, $mem\t# compressed klass ptr\t@storeImmNKlass" %} + ins_encode %{ + __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant); + %} + ins_pipe(ialu_storeI); +%}*/ + +// Store Integer Immediate +instruct storeImmI0(memory mem, immI0 zero) +%{ + predicate(UseCompressedOops && (CompressedOops::base() == NULL)); + match(Set mem (StoreI mem zero)); + + ins_cost(125); // XXX + format %{ "stw rheapbase, $mem\t# int (rheapbase==0)\t@storeImmI0" %} + ins_encode %{ + __ stw(rheapbase, $mem$$Address); + %} + ins_pipe(ialu_storeI); +%} + + + +// Store Long Immediate +//TODO implicit null check LSP +instruct storeImmL0(memory mem, immL0 zero) +%{ + predicate(UseCompressedOops && (CompressedOops::base() == NULL)); + match(Set mem (StoreL mem zero)); + + ins_cost(125); // XXX + format %{ "stl rheapbase, $mem\t# long (rheapbase==0)\t@storeImmL0" %} + ins_encode %{ + __ stl(rheapbase, $mem$$Address); + %} + ins_pipe(ialu_storeI); +%} +/*no immdiate operand in swjdk8 +instruct storeImmL(memory mem, immL32 src) +%{ + match(Set mem (StoreL mem src)); + + ins_cost(150); + format %{ "movws rscratch3, $src\t# long\t@storeImmL\n\t" + "stl rscratch3, $mem" %} + ins_encode %{ + __ movws(rscratch3, (u_int32_t)$src$$constant); + __ stl(rscratch3, $mem$$Address); + %} + ins_pipe(ialu_storeI); +%}*/ + +// Store Short/Char Immediate +instruct storeImmC0(memory mem, immI0 zero) +%{ + predicate(UseCompressedOops && (CompressedOops::base() == NULL)); + match(Set mem (StoreC mem zero)); + + ins_cost(125); // XXX + format %{ "sth rheapbase, $mem\t# short/char (rheapbase==0)\t@storeImmC0" %} + ins_encode %{ + __ sth(rheapbase, $mem$$Address); + %} + ins_pipe(ialu_storeI); +%} +/*no immdiate operand in swjdk8 +instruct storeImmI16(memory mem, immI16 src) +%{ +// predicate(UseStoreImmI16); + match(Set mem (StoreC mem src)); + + ins_cost(150); + format %{ "ldi rscratch3, $src\t# short/char\t@storeImmI16\n\t" + "sth rscratch3, $mem" %} + ins_encode %{ + __ ldi(rscratch3, $src$$constant, R0); + __ sth(rscratch3, $mem$$Address); + %} + ins_pipe(ialu_storeI); +%}*/ + +// Store Byte Immediate +instruct storeImmB0(memory mem, immI0 zero) +%{ + predicate(UseCompressedOops && (CompressedOops::base() == NULL)); + match(Set mem (StoreB mem zero)); + + ins_cost(125); // XXX + format %{ "stb rheapbase, $mem\t# short/char (rheapbase==0)\t@storeImmB0" %} + ins_encode %{ + __ stb(rheapbase, $mem$$Address); + %} + ins_pipe(ialu_storeI); +%} + +/*no immdiate operand in swjdk8 +instruct storeImmB(memory mem, immI8 src) +%{ + match(Set mem (StoreB mem src)); + + ins_cost(150); // XXX + format %{ "ldi rscratch3, $src\t# byte\t@storeImmB\n\t" + "stb rscratch3, $mem" %} + ins_encode %{ + __ ldi(rscratch3, $src$$constant, R0); + __ stb(rscratch3, $mem$$Address); + %} + ins_pipe(ialu_storeI); +%}*/ + +// Store CMS card-mark Immediate +instruct storeImmCM0_reg(memory mem, immI0 zero) +%{ + predicate(UseCompressedOops && (CompressedOops::base() == NULL)); + match(Set mem (StoreCM mem zero)); + + ins_cost(125); // XXX + format %{ "stb rheapbase, $mem\t# CMS card-mark byte 0 (rheapbase==0)\t@storeImmCM0_reg" %} + + ins_encode %{ + __ stb(rheapbase, $mem$$Address); + %} + ins_pipe(ialu_storeI); +%} + +instruct storeImmCM0(memory mem, immI0 src) +%{ + match(Set mem (StoreCM mem src)); + + ins_cost(150); // XXX + format %{ "stb R0, $mem\t# CMS card-mark byte 0\t@storeImmCM0" %} + + ins_encode %{ + __ stb(R0, $mem$$Address); + %} + ins_pipe(ialu_storeI); +%} + +// Store Float +instruct storeF(memory mem, regF src) +%{ + match(Set mem (StoreF mem src)); + + ins_cost(95); // XXX + format %{ "store_float $src, $mem\t# float\t@storeF" %} + ins_encode %{ + __ store_float($src$$FloatRegister, $mem$$Address); + %} + ins_pipe(pipe_slow); // XXX +%} + +// Store immediate Float value (it is faster than store from XMM register) +instruct storeF0(memory mem, immF0 zero) +%{ + predicate(UseCompressedOops && (CompressedOops::base() == NULL)); + match(Set mem (StoreF mem zero)); + + ins_cost(25); // XXX + format %{ "store_float f31, $mem\t# float 0. (rheapbase==0)\t@storeF0" %} + + ins_encode %{ + __ store_float(f31, $mem$$Address); + %} + ins_pipe(ialu_storeI); +%} + +/*no immdiate operand in swjdk8 +//TODO:it's ok todo this ? jzy +instruct storeF_imm(memory mem, immF src) +%{ + match(Set mem (StoreF mem src)); + + ins_cost(50); + format %{ "mov_immdiate32 rscratch3, $src\t# float\t@storeF_imm\n\t" + "stw rscratch3, $mem\t# float" %} + + ins_encode %{ + __ mov_immediate32(rscratch3, $src$$constant); + __ stw(rscratch3, $mem$$Address); + %} + ins_pipe(ialu_storeI); +%} +*/ +// Store Double +instruct storeD(memory mem, regD src) +%{ + match(Set mem (StoreD mem src)); + + ins_cost(95); // XXX + format %{ "store_double $src, $mem\t# double\t@storeD" %} + ins_encode %{ + __ store_double($src$$FloatRegister, $mem$$Address); + %} + ins_pipe(pipe_slow); // XXX +%} + +// Store immediate double 0.0 (it is faster than store from XMM register) TODO:is zero? jzy +instruct storeD0_imm(memory mem, immD0 src) +%{ + predicate(!UseCompressedOops || (CompressedOops::base() != NULL));// lsp todo check + match(Set mem (StoreD mem src)); + + ins_cost(50); + format %{ "store_double f31, $mem\t# double 0.\t@storeD0_imm" %} + + ins_encode %{ + __ store_double(f31, $mem$$Address); + %} + ins_pipe(ialu_storeI); +%} + +instruct storeD0(memory mem, immD0 zero) +%{ + predicate(UseCompressedOops && (CompressedOops::base() == NULL)); + match(Set mem (StoreD mem zero)); + + ins_cost(25); // XXX + format %{ "store_double f31, $mem\t# double 0. \t@storeD0" %} + + ins_encode %{ + __ store_double(f31, $mem$$Address); + %} + ins_pipe(ialu_storeI); +%} + +instruct storeSSI(stackSlotI dst, rRegI src) +%{ + match(Set dst src); + + ins_cost(100); + format %{ "stw $src, $dst\t# int stk\t@storeSSI" %} + + ins_encode %{ + __ stw($src$$Register, $dst$$Address); + %} + ins_pipe( ialu_storeI ); +%} + +instruct storeSSL(stackSlotL dst, rRegL src) +%{ + match(Set dst src); + + ins_cost(100); + format %{ "stl $src, $dst\t# long stk\t@storeSSL" %} + + ins_encode %{ + __ stl($src$$Register, $dst$$Address); + %} + ins_pipe(ialu_storeI); +%} + +instruct storeSSP(stackSlotP dst, rRegP src) +%{ + match(Set dst src); + + ins_cost(100); + format %{ "stl $src, $dst\t# ptr stk\t@storeSSP" %} + + ins_encode %{ + __ stl($src$$Register, $dst$$Address); + %} + ins_pipe(ialu_storeI); +%} + +instruct storeSSF(stackSlotF dst, regF src) +%{ + match(Set dst src); + + ins_cost(95); // XXX + format %{ "store_float $src, $dst\t# float stk\t@storeSSF" %} + ins_encode %{ + __ store_float($src$$FloatRegister, Address(esp, $dst$$disp)); + %} + ins_pipe(pipe_slow); // XXX +%} + +instruct storeSSD(stackSlotD dst, regD src) +%{ + match(Set dst src); + + ins_cost(95); // XXX + format %{ "store_double $src, $dst\t# double stk\t@storeSSD" %} + ins_encode %{ + __ store_double($src$$FloatRegister, Address(esp, $dst$$disp)); + %} + ins_pipe(pipe_slow); // XXX +%} + +//----------BSWAP Instructions------------------------------------------------- +instruct bytes_reverse_int(rRegI dst) %{ + match(Set dst (ReverseBytesI dst)); + + format %{ "bswapw $dst @bytes_reverse_int" %} + //opcode(0x0F, 0xC8); /*Opcode 0F /C8 */ + ins_encode %{ + Register dst = $dst$$Register; + __ swap(dst); + %} + ins_pipe( pipe_slow ); +%} + +instruct bytes_reverse_long(rRegL dst) %{ + match(Set dst (ReverseBytesL dst)); + + format %{ "bswapl $dst @bytes_reverse_long" %} + + ins_encode %{ + Register dst = $dst$$Register; + int zap1 = 0x1; + int zap2 = 0x80; + int count = 0x38; + int zap3 = 0x7E; + //__ stop("bytes_reverse_long"); + assert(dst != rscratch3 && dst != rscratch2_AT, "dst should not equal to AT and rscratch3"); + __ slll(dst, count, rscratch3); + __ srll(dst, count, rscratch2_AT); + __ bis(rscratch3, rscratch2_AT, rscratch2_AT); + __ zapnot(dst, zap3, dst);//set the highest and lowest bit to zero + __ bis(dst, rscratch2_AT, dst); + + for(int i=1; i<4; i++){ + zap1 = zap1<<1; + zap2 = zap2>>1; + count = count - 16; + zap3 = 0xff - zap1 -zap2; + __ zapnot(dst, zap1, rscratch3); + __ slll(rscratch3, count, rscratch3); + __ zapnot(dst, zap2, rscratch2_AT); + __ srll(rscratch2_AT, count, rscratch2_AT); + __ bis(rscratch3, rscratch2_AT, rscratch2_AT); + __ zapnot(dst, zap3, dst); + __ bis(dst, rscratch2_AT, dst); + } + + %} + ins_pipe( pipe_slow); +%} + +instruct bytes_reverse_unsigned_short(rRegI dst) %{ + match(Set dst (ReverseBytesUS dst)); + + format %{ "zapnot $dst, #0x3, $dst $dst @bytes_reverse_unsigned_short\n\t" + "huswap $dst" %} + ins_encode %{ + Register dst = $dst$$Register; + __ zapnot(dst, 0x3, dst); + __ huswap(dst); + %} + ins_pipe( pipe_slow ); +%} + +instruct bytes_reverse_short(rRegI dst) %{ + match(Set dst (ReverseBytesS dst)); + + format %{ "zapnot $dst, #0x3, $dst $dst @bytes_reverse_unsigned_short\n\t" + "hswap $dst" %} + ins_encode %{ + Register dst = $dst$$Register; + __ zapnot(dst, 0x3, dst); + __ hswap(dst); + %} + ins_pipe( pipe_slow ); +%} + + +//---------- Zeros Count Instructions ------------------------------------------ +// CountLeadingZerosINode CountTrailingZerosINode +instruct countLeadingZerosI(rRegI dst, rRegI src) %{ + predicate(UseCountLeadingZerosInstruction); + match(Set dst (CountLeadingZerosI src)); + + format %{ "CTLZ $dst, $dst #@countLeadingZerosI" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + + __ zapnot(src, 0xf, dst); + __ ctlz(dst, dst); + __ subw(dst, 32, dst); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct countLeadingZerosL(rRegI dst, rRegL src) %{ + predicate(UseCountLeadingZerosInstruction); + match(Set dst (CountLeadingZerosL src)); + + format %{ "CTLZ $src,$dst #@countLeadingZerosL" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + + __ ctlz(src, dst); + %} + ins_pipe(ialu_regL_regL); +%} + +instruct countTrailingZerosI(rRegI dst, rRegI src) %{ + predicate(UseCountTrailingZerosInstruction); + match(Set dst (CountTrailingZerosI src)); + + format %{ "CTTZ $src, $dst\n\t #@countTrailingZerosI"%} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + + __ slll(src, 32, dst); + __ cttz(dst, dst); + __ subw(dst, 32, dst); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct countTrailingZerosL(rRegI dst, rRegL src) %{ + predicate(UseCountTrailingZerosInstruction); + match(Set dst (CountTrailingZerosL src)); + + format %{ "CTTZ $src,$dst #@countTrailingZerosL" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + + __ cttz(src, dst); + %} + ins_pipe( ialu_regL_regL ); +%} + + +//---------- Population Count Instructions ------------------------------------- + +instruct popCountI(rRegI dst, rRegI src) %{ + predicate(UsePopCountInstruction); + match(Set dst (PopCountI src)); + + format %{ "popcnt $dst, $src" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + + __ zapnot(src, 0xf, dst); + __ ctpop(dst, dst); + %} + ins_pipe(ialu_regL_regL); +%} + +/* memory operands no use in sw64 +instruct popCountI_mem(rRegI dst, memory mem) %{ + predicate(UsePopCountInstruction); + match(Set dst (PopCountI (LoadI mem))); + + format %{ "popcnt $dst, $mem" %} + ins_encode %{ + Register dst = $dst$$Register; + + __ ldw(rscratch2_AT, $mem$$Address); + __ zapnot(rscratch2_AT, 0xf, dst); + __ ctpop(dst, dst); + %} + ins_pipe(ialu_reg_mem); +%}*/ + +// Note: Long.bitCount(long) returns an int. +instruct popCountL(rRegI dst, rRegL src) %{ + predicate(UsePopCountInstruction); + match(Set dst (PopCountL src)); + + format %{ "CTPOP $dst, $src" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + + __ ctpop(src, dst); + %} + ins_pipe(ialu_regL_regL); +%} + +/* memory operands no use in sw64 +// Note: Long.bitCount(long) returns an int. +instruct popCountL_mem(rRegI dst, memory mem) %{ + predicate(UsePopCountInstruction); + match(Set dst (PopCountL (LoadL mem))); + + format %{ "popcnt $dst, $mem" %} + ins_encode %{ + Register dst = $dst$$Register; + + __ ldl(rscratch2_AT, $mem$$Address); + __ ctpop(rscratch2_AT, dst); + %} + ins_pipe(ialu_reg_mem); +%}*/ + +//----------MemBar Instructions----------------------------------------------- +// Memory barrier flavors + +instruct membar_acquire() %{ + match(MemBarAcquire); + format %{ "MEMBAR-acquire @ membar_acquire" %} + ins_cost(400); + ins_encode %{ + __ memb(); + %} + ins_pipe(empty); +%} + +instruct load_fence() %{ + match(LoadFence); + ins_cost(400); + + format %{ "MEMBAR @ load_fence" %} + ins_encode %{ + __ memb(); + %} + ins_pipe(pipe_slow); +%} + +instruct membar_acquire_lock() %{ + match(MemBarAcquireLock); + ins_cost(0); + + size(0); + format %{ "MEMBAR-acquire (acquire as part of CAS in prior FastLock so empty encoding) @ membar_acquire_lock" %} + ins_encode(); + ins_pipe(empty); +%} + +instruct unnecessary_membar_release() %{ + predicate(unnecessary_release(n)); + match(MemBarRelease); + ins_cost(0); + + format %{ "membar_release (elided)" %} + + ins_encode %{ + __ block_comment("membar_release (elided)"); + %} + ins_pipe(pipe_slow); +%} + +instruct membar_release() %{ + match(MemBarRelease); + match(StoreFence); + ins_cost(400);//0 + + format %{ "MEMBAR-release StoreFence @ membar_release" %} + + ins_encode %{ +// // Attention: DO NOT DELETE THIS GUY! + __ memb(); + %} + + ins_pipe(pipe_slow); +%} + +instruct membar_release_lock() %{ + match(MemBarReleaseLock); + ins_cost(0); + format %{ "MEMBAR-release-lock (release in FastUnlock so empty) @ membar_release_lock" %} + ins_encode(); + ins_pipe(empty); +%} + +instruct membar_volatile() %{ + match(MemBarVolatile); + ins_cost(400); + + format %{ "MEMBAR-volatile" %} + ins_encode %{ + //if( !os::is_MP() ) return; // Not needed on single CPU + __ memb(); + + %} + ins_pipe(pipe_slow); +%} + +instruct membar_storestore() %{ + match(MemBarStoreStore); + match(StoreStoreFence); + format %{ "MEMBAR-storestore @ membar_storestore" %} + ins_cost(400); + ins_encode %{ + __ memb(); + %} + ins_pipe(empty); +%} + +//----------Move Instructions-------------------------------------------------- + +instruct castX2P(rRegP dst, rRegL src) +%{ + match(Set dst (CastX2P src)); + + format %{ "movl $dst, $src\t# long->ptr @castX2P" %} + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + + if(src != dst) + __ movl(dst, src); + %} + ins_pipe(ialu_regI_mov); +%} + +instruct castP2X(rRegL dst, rRegP src) +%{ + match(Set dst (CastP2X src)); + + format %{ "movl $dst, $src\t# ptr -> long@castP2X" %} + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + + if(src != dst); + __ movl(dst, src); + %} + ins_pipe(ialu_regI_mov); +%} + +// Convert oop into int for vectors alignment masking +instruct convP2I(rRegI dst, rRegP src) +%{ + match(Set dst (ConvL2I (CastP2X src))); + + format %{ "movwu $dst, $src\t# ptr -> int" %} + ins_encode %{ + __ movwu($dst$$Register, $src$$Register); //LSP CHECK?? OK + %} + ins_pipe(ialu_regI_regI); // XXX +%} + +// Convert compressed oop into int for vectors alignment masking +// in case of 32bit oops (heap < 4Gb). +instruct convN2I(rRegI dst, rRegN src) +%{ + predicate(CompressedOops::shift() == 0); + match(Set dst (ConvL2I (CastP2X (DecodeN src)))); + + format %{ "movwu $dst, $src\t# compressed ptr -> int" %} + ins_encode %{ + __ movwu($dst$$Register, $src$$Register);//LSP CHECK?? OK + %} + ins_pipe(ialu_regI_regI); // XXX +%} + +// Convert oop pointer into compressed form +instruct encodeHeapOop(rRegN dst, rRegP src) %{ + predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull); + match(Set dst (EncodeP src)); +// effect(KILL cr); + format %{ "encode_heap_oop $dst,$src" %} + ins_encode %{ + Register s = $src$$Register; + Register d = $dst$$Register; +// __ movl(d, s); + __ encode_heap_oop(d, s); + %} + ins_pipe(ialu_regL_regL); +%} + +instruct encodeHeapOop_not_null(rRegN dst, rRegP src) %{ + predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull); + match(Set dst (EncodeP src)); + format %{ "encode_heap_oop_not_null $dst,$src" %} + ins_encode %{ + __ encode_heap_oop_not_null($dst$$Register, $src$$Register); + %} + ins_pipe(ialu_regL_regL); +%} + +instruct decodeHeapOop(rRegP dst, rRegN src) %{ + predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull && + n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant); + match(Set dst (DecodeN src)); +// effect(KILL cr); + format %{ "decode_heap_oop $dst,$src" %} + ins_encode %{ + Register s = $src$$Register; + Register d = $dst$$Register; +// __ movl(d, s); + __ decode_heap_oop(d, s); + %} + ins_pipe(ialu_regL_regL); +%} + +instruct decodeHeapOop_not_null(rRegP dst, rRegN src) %{ + predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull || + n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant); + match(Set dst (DecodeN src)); + format %{ "decode_heap_oop_not_null $dst,$src" %} + ins_encode %{ + Register s = $src$$Register; + Register d = $dst$$Register; + if (s != d) { + __ decode_heap_oop_not_null(d, s); + } else { + __ decode_heap_oop_not_null(d); + } + %} + ins_pipe(ialu_regL_regL); +%} + +instruct encodeKlass_not_null(rRegN dst, rRegP src) %{ + match(Set dst (EncodePKlass src)); +// effect(KILL cr); + format %{ "encode_klass_not_null $dst,$src" %} + ins_encode %{ + __ encode_klass_not_null($dst$$Register, $src$$Register); + %} + ins_pipe(ialu_regL_regL); +%} + +instruct decodeKlass_not_null(rRegP dst, rRegN src) %{ + match(Set dst (DecodeNKlass src)); +// effect(KILL cr); + format %{ "decode_klass_not_null $dst,$src" %} + ins_encode %{ + Register s = $src$$Register; + Register d = $dst$$Register; + if (s != d) { + __ decode_klass_not_null(d, s); + } else { + __ decode_klass_not_null(d); + } + %} + ins_pipe(ialu_regL_regL); +%} +/* + +//----------Conditional Move--------------------------------------------------- +// Jump +// dummy instruction for generating temp registers +instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{ + match(Jump (LShiftL switch_val shift)); + ins_cost(350); + predicate(false); + effect(TEMP dest); + + format %{ "leaq $dest, [$constantaddress]\n\t" + "jmp [$dest + $switch_val << $shift]\n\t" %} + ins_encode %{ + // We could use jump(ArrayAddress) except that the macro assembler needs to use r10 + // to do that and the compiler is using that register as one it can allocate. + // So we build it all by hand. + // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant); + // ArrayAddress dispatch(table, index); + Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant); + __ lea($dest$$Register, $constantaddress); + __ jmp(dispatch); + %} + ins_pipe(pipe_jmp); +%} + +instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{ + match(Jump (AddL (LShiftL switch_val shift) offset)); + ins_cost(350); + effect(TEMP dest); + + format %{ "leaq $dest, [$constantaddress]\n\t" + "jmp [$dest + $switch_val << $shift + $offset]\n\t" %} + ins_encode %{ + // We could use jump(ArrayAddress) except that the macro assembler needs to use r10 + // to do that and the compiler is using that register as one it can allocate. + // So we build it all by hand. + // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant); + // ArrayAddress dispatch(table, index); + Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant); + __ lea($dest$$Register, $constantaddress); + __ jmp(dispatch); + %} + ins_pipe(pipe_jmp); +%} + +instruct jumpXtnd(rRegL switch_val, rRegI dest) %{ + match(Jump switch_val); + ins_cost(350); + effect(TEMP dest); + + format %{ "leaq $dest, [$constantaddress]\n\t" + "jmp [$dest + $switch_val]\n\t" %} + ins_encode %{ + // We could use jump(ArrayAddress) except that the macro assembler needs to use r10 + // to do that and the compiler is using that register as one it can allocate. + // So we build it all by hand. + // Address index(noreg, switch_reg, Address::times_1); + // ArrayAddress dispatch(table, index); + Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1); + __ lea($dest$$Register, $constantaddress); + __ jmp(dispatch); + %} + ins_pipe(pipe_jmp); +%} +*/ + +// Conditional move +instruct cmovI_cmpI_reg_reg(rRegI dst, rRegI src, rRegI tmp1, rRegI tmp2, cmpOp cop) +%{ + match(Set dst (CMoveI (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); + + ins_cost(200); // XXX + format %{ "cmovI$cop $dst, $src\t# signed, int @cmovI_cmpI_reg_reg" %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + __ cmpws(flag, op1, op2); + __ selne(rcc, src, dst, dst); + %} + ins_pipe(pipe_slow); +%} + +instruct cmovI_cmpI_reg_imm(rRegI dst, immU8 src, rRegI tmp1, rRegI tmp2, cmpOp cop ) %{ + match(Set dst (CMoveI (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "cmovI$cop $dst, $src\t# signed, int @cmovI_cmpI_reg_imm" + %} + + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + int src = $src$$constant; + int flag = $cop$$cmpcode; + __ cmpws(flag, op1, op2, rcc); + __ selne(rcc, src, dst, dst); + + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovI_cmpL_reg_reg(rRegI dst, rRegI src, rRegL tmp1, rRegL tmp2, cmpOp cop) +%{ + match(Set dst (CMoveI (Binary cop (CmpL tmp1 tmp2)) (Binary dst src))); + + ins_cost(200); // XXX + format %{ "cmovI$cop $dst, $src\t# signed, int @cmovI_cmpL_reg_reg" %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + __ cmpls(flag, op1, op2); + __ selne(rcc, src, dst, dst); + %} + ins_pipe(pipe_slow); +%} + +instruct cmovI_cmpL_reg_imm(rRegI dst, immU8 src, rRegL tmp1, rRegL tmp2, cmpOp cop) +%{ + match(Set dst (CMoveI (Binary cop (CmpL tmp1 tmp2)) (Binary dst src))); + + ins_cost(200); // XXX + format %{ "cmovI$cop $dst, $src\t# signed, int @cmovI_cmpL_reg_imm" %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + int src = $src$$constant; + int flag = $cop$$cmpcode; + __ cmpls(flag, op1, op2); + __ selne(rcc, src, dst, dst); + %} + ins_pipe(pipe_slow); +%} + +instruct cmovI_cmpN_reg_reg(rRegI dst, rRegI src, rRegN tmp1, rRegN tmp2, cmpOpU cop) %{ + match(Set dst (CMoveI (Binary cop (CmpN tmp1 tmp2)) (Binary dst src))); + + ins_cost(200); // XXX + format %{ "cmovI$cop $dst, $src\t# unsigned, int @cmovI_cmpN_reg_reg" %} + ins_encode%{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + __ cmpwu(flag, op1, op2); + __ selne(rcc, src, dst, dst); + %} + ins_pipe(pipe_slow); +%} + +instruct cmovI_cmpN_reg_imm(rRegI dst, immU8 src, rRegN tmp1, rRegN tmp2, cmpOpU cop) %{ + match(Set dst (CMoveI (Binary cop (CmpN tmp1 tmp2)) (Binary dst src))); + + ins_cost(200); // XXX + format %{ "cmovI$cop $dst, $src\t# unsigned, int @cmovI_cmpN_reg_imm" %} + ins_encode%{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + int src = $src$$constant; + int flag = $cop$$cmpcode; + __ cmpwu(flag, op1, op2); + __ selne(rcc, src, dst, dst); + %} + ins_pipe(pipe_slow); +%} + +instruct cmovI_cmpP_reg_reg(rRegI dst, rRegI src, rRegP tmp1, rRegP tmp2, cmpOpU cop) %{ + match(Set dst (CMoveI (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); + + ins_cost(200); // XXX + format %{ "cmovI$cop $dst, $src\t# unsigned, int @cmovI_cmpP_reg_reg" %} + ins_encode%{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + __ cmplu(flag, op1, op2, rcc); + __ selne(rcc, src, dst, dst); + %} + ins_pipe(pipe_slow); +%} + +instruct cmovI_cmpP_reg_imm(rRegI dst, immU8 src, rRegP tmp1, rRegP tmp2, cmpOpU cop) %{ + match(Set dst (CMoveI (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); + + ins_cost(200); // XXX + format %{ "cmovI$cop $dst, $src\t# unsigned, int @cmovI_cmpP_reg_imm" %} + ins_encode%{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + int src = $src$$constant; + int flag = $cop$$cmpcode; + __ cmplu(flag, op1, op2, rcc); + __ selne(rcc, src, dst, dst); + %} + ins_pipe(pipe_slow); +%} + +instruct cmovI_cmpU_reg_reg(rRegI dst, rRegI src, rRegI tmp1, rRegI tmp2, cmpOpU cop) +%{ + match(Set dst (CMoveI (Binary cop (CmpU tmp1 tmp2)) (Binary dst src))); + + ins_cost(200); // XXX + format %{ "cmovI$cop $dst, $src\t# signed, int @cmovI_cmpU_reg_reg" %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + __ cmpwu(flag, op1, op2); + __ selne(rcc, src, dst, dst); + %} + ins_pipe(pipe_slow); +%} + +instruct cmovI_cmpU_reg_imm(rRegI dst, immU8 src, rRegI tmp1, rRegI tmp2, cmpOpU cop) +%{ + match(Set dst (CMoveI (Binary cop (CmpU tmp1 tmp2)) (Binary dst src))); + + ins_cost(200); // XXX + format %{ "cmovI$cop $dst, $src\t# signed, int @cmovI_cmpU_reg_imm" %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + int src = $src$$constant; + int flag = $cop$$cmpcode; + __ cmpwu(flag, op1, op2); + __ selne(rcc, src, dst, dst); + %} + ins_pipe(pipe_slow); +%} + +instruct cmovI_cmpD_reg_reg(rRegI dst, rRegI src, regD tmp1, regD tmp2, cmpOp cop ) %{ + match(Set dst (CMoveI (Binary cop (CmpD tmp1 tmp2)) (Binary dst src))); + + ins_cost(200); // XXX + format %{ "cmovI$cop $dst, $src\t# signed, ptr @cmovI_cmpD_reg_reg" %} + ins_encode%{ + FloatRegister op1 = $tmp1$$FloatRegister; + FloatRegister op2 = $tmp2$$FloatRegister; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + __ cmpfd(flag, op1, op2); + __ fimovd(FcmpRES, rcc); + __ selne(rcc, src, dst, dst); + %} + ins_pipe(pipe_slow); // XXX +%} + +instruct cmovI_cmpD_reg_imm(rRegI dst, immU8 src, regD tmp1, regD tmp2, cmpOp cop ) %{ + match(Set dst (CMoveI (Binary cop (CmpD tmp1 tmp2)) (Binary dst src))); + + ins_cost(200); // XXX + format %{ "cmovI$cop $dst, $src\t# signed, ptr @cmovI_cmpD_reg_imm" %} + ins_encode%{ + FloatRegister op1 = $tmp1$$FloatRegister; + FloatRegister op2 = $tmp2$$FloatRegister; + Register dst = $dst$$Register; + int src = $src$$constant; + int flag = $cop$$cmpcode; + __ cmpfd(flag, op1, op2); + __ fimovd(FcmpRES, rcc); + __ selne(rcc, src, dst, dst); + %} + ins_pipe(pipe_slow); // XXX +%} + +instruct cmovI_cmpF_reg_reg(rRegI dst, rRegI src, regF tmp1, regF tmp2, cmpOp cop ) %{ + match(Set dst (CMoveI (Binary cop (CmpF tmp1 tmp2)) (Binary dst src))); + + ins_cost(200); // XXX + format %{ "cmovI$cop $dst, $src\t# signed, ptr @cmovI_cmpF_reg_reg" %} + ins_encode%{ + FloatRegister op1 = $tmp1$$FloatRegister; + FloatRegister op2 = $tmp2$$FloatRegister; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + __ cmpfs(flag, op1, op2); + __ fimovd(FcmpRES, rcc); + __ selne(rcc, src, dst, dst); + %} + ins_pipe(pipe_slow); // XXX +%} + +instruct cmovI_cmpF_reg_imm(rRegI dst, immU8 src, regF tmp1, regF tmp2, cmpOp cop ) %{ + match(Set dst (CMoveI (Binary cop (CmpF tmp1 tmp2)) (Binary dst src))); + + ins_cost(200); // XXX + format %{ "cmovI$cop $dst, $src\t# signed, ptr @cmovI_cmpF_reg_imm" %} + ins_encode%{ + FloatRegister op1 = $tmp1$$FloatRegister; + FloatRegister op2 = $tmp2$$FloatRegister; + Register dst = $dst$$Register; + int src = $src$$constant; + int flag = $cop$$cmpcode; + __ cmpfs(flag, op1, op2); + __ fimovd(FcmpRES, rcc); + __ selne(rcc, src, dst, dst); + %} + ins_pipe(pipe_slow); // XXX +%} +/* +instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{ + match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); + ins_cost(200); + expand %{ + cmovI_regU(cop, cr, dst, src); + %} +%} +*/ +/* memory operands no need for SW64 +// Conditional move +instruct cmovI_mem1(rRegI dst, memory src, rRegI tmp1, rRegI tmp2, cmpOp cop, rFlagsReg cr) %{ + match(Set dst (CMoveI (Binary cop (CmpI tmp1 tmp2)) (Binary dst (LoadI src)))); + effect(KILL cr); + ins_cost(250); // XXX + format %{ "cmovl$cop $dst, $src\t# signed, int" %} + ins_encode%{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Address src = $src$$Address; + int flag = $cop$$cmpcode; + __ ldw(rscratch3, src); + __ cmpws(flag, op1, op2, cr); + __ selne(cr, rscratch3, dst, dst); + %} + ins_pipe(pipe_slow); +%} + +instruct cmovI_mem2(rRegI dst, memory src, rRegL tmp1, rRegL tmp2, cmpOp cop) %{ + match(Set dst (CMoveI (Binary cop (CmpL tmp1 tmp2)) (Binary dst (LoadI src)))); + + ins_cost(250); // XXX + format %{ "cmovl$cop $dst, $src\t# signed, int" %} + ins_encode%{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Address src = $src$$Address; + int flag = $cop$$cmpcode; + __ ldw(rscratch2_AT, src); + __ cmpls(flag, op1, op2, rscratch1_GP); + __ selne(rscratch1_GP, rscratch2_AT, dst, dst); + %} + ins_pipe(pipe_slow); +%} + +// Conditional move +instruct cmovI_memU1(rRegI dst, memory src, rRegN tmp1, rRegN tmp2, cmpOpU cop) +%{ + match(Set dst (CMoveI (Binary cop (CmpN tmp1 tmp2)) (Binary dst (LoadI src)))); + + ins_cost(250); // XXX + format %{ "cmovl$cop $dst, $src\t# unsigned, int" %} + ins_encode%{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Address src = $src$$Address; + int flag = $cop$$cmpcode; + __ ldw(rscratch2_AT, src); + __ cmpwu(flag, op1, op2, rscratch1_GP); + __ selne(rscratch1_GP, rscratch2_AT, dst, dst); + %} + ins_pipe(pipe_slow); +%} + +instruct cmovI_memU2(rRegI dst, memory src, rRegP tmp1, rRegP tmp2, cmpOpU cop) +%{ + match(Set dst (CMoveI (Binary cop (CmpN tmp1 tmp2)) (Binary dst (LoadI src)))); + + ins_cost(250); // XXX + format %{ "cmovl$cop $dst, $src\t# unsigned, int" %} + ins_encode%{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Address src = $src$$Address; + int flag = $cop$$cmpcode; + __ ldw(rscratch2_AT, src); + __ cmplu(flag, op1, op2, rscratch1_GP); + __ selne(rscratch1_GP, rscratch2_AT, dst, dst); + %} + ins_pipe(pipe_slow); +%}*/ +/* +instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{ + match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); + ins_cost(250); + expand %{ + cmovI_memU(cop, cr, dst, src); + %} +%} +*/ + +// Conditional move +instruct cmovN_cmpI_reg_reg(rRegN dst, rRegN src, rRegI tmp1, rRegI tmp2, cmpOp cop) +%{ + match(Set dst (CMoveN (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); + + ins_cost(200); // XXX + format %{ "cmovN$cop $dst, $src\t# signed, compressed ptr @cmovN_cmpI_reg_reg" %} + ins_encode%{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + __ cmpws(flag, op1, op2); + __ selne(rcc, src, dst, dst); + %} + ins_pipe(pipe_slow); +%} + +instruct cmovN_cmpI_reg_imm(rRegN dst, immU8 src, rRegI tmp1, rRegI tmp2, cmpOp cop) +%{ + match(Set dst (CMoveN (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); + + ins_cost(200); // XXX + format %{ "cmovN$cop $dst, $src\t# signed, compressed ptr @cmovN_cmpI_reg_imm" %} + ins_encode%{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + int src = $src$$constant; + int flag = $cop$$cmpcode; + __ cmpws(flag, op1, op2); + __ selne(rcc, src, dst, dst); + %} + ins_pipe(pipe_slow); +%} + +instruct cmovN_cmpL_reg_reg(rRegN dst, rRegN src, rRegL tmp1, rRegL tmp2, cmpOp cop) +%{ + match(Set dst (CMoveN (Binary cop (CmpL tmp1 tmp2)) (Binary dst src))); + + ins_cost(200); // XXX + format %{ "cmovN$cop $dst, $src\t# signed, compressed ptr @cmovN_cmpL_reg_reg" %} + ins_encode%{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + __ cmpls(flag, op1, op2); + __ selne(rcc, src, dst, dst); + %} + ins_pipe(pipe_slow); +%} + +instruct cmovN_cmpU_reg_reg(rRegN dst, rRegN src, rRegI tmp1, rRegI tmp2, cmpOpU cop) +%{ + match(Set dst (CMoveN (Binary cop (CmpU tmp1 tmp2)) (Binary dst src))); + + ins_cost(200); // XXX + format %{ "cmovN$cop $dst, $src\t# signed, compressed ptr @cmovN_cmpU_reg_reg" %} + ins_encode%{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + __ cmpwu(flag, op1, op2); + __ selne(rcc, src, dst, dst); + %} + ins_pipe(pipe_slow); +%} + +// Conditional move +instruct cmovN_cmpN_reg_reg(rRegN dst, rRegN src, rRegN tmp1, rRegN tmp2, cmpOpU cop) +%{ + match(Set dst (CMoveN (Binary cop (CmpN tmp1 tmp2)) (Binary dst src))); + + ins_cost(200); // XXX + format %{ "cmovN$cop $dst, $src\t# unsigned, compressed ptr @cmovN_cmpN_reg_reg" %} + ins_encode%{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + __ cmpwu(flag, op1, op2); + __ selne(rcc, src, dst, dst); + %} + ins_pipe(pipe_slow); +%} + +instruct cmovN_cmpN_reg_imm(rRegN dst, immU8 src, rRegN tmp1, rRegN tmp2, cmpOpU cop) +%{ + match(Set dst (CMoveN (Binary cop (CmpN tmp1 tmp2)) (Binary dst src))); + + ins_cost(200); // XXX + format %{ "cmovN$cop $dst, $src\t# unsigned, compressed ptr @cmovN_cmpN_reg_imm" %} + ins_encode%{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + int src = $src$$constant; + int flag = $cop$$cmpcode; + __ cmpwu(flag, op1, op2); + __ selne(rcc, src, dst, dst); + %} + ins_pipe(pipe_slow); +%} + +instruct cmovN_cmpP_reg_reg(rRegN dst, rRegN src, rRegP tmp1, rRegP tmp2, cmpOpU cop) +%{ + match(Set dst (CMoveN (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); + + ins_cost(200); // XXX + format %{ "cmovN$cop $dst, $src\t# unsigned, compressed ptr @cmovN_cmpP_reg_reg" %} + ins_encode%{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + __ cmplu(flag, op1, op2); + __ selne(rcc, src, dst, dst); + %} + ins_pipe(pipe_slow); +%} + +instruct cmovN_cmpP_reg_imm(rRegN dst, immU8 src, rRegP tmp1, rRegP tmp2, cmpOpU cop) +%{ + match(Set dst (CMoveN (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); + + ins_cost(200); // XXX + format %{ "cmovN$cop $dst, $src\t# unsigned, compressed ptr @cmovN_cmpP_reg_reg" %} + ins_encode%{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + int src = $src$$constant & ((1<<8)-1); + int flag = $cop$$cmpcode; + __ cmplu(flag, op1, op2); + __ selne(rcc, src, dst, dst); + %} + ins_pipe(pipe_slow); +%} + +/* +instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{ + match(Set dst (CMoveN (Binary cop cr) (Binary dst src))); + ins_cost(200); + expand %{ + cmovN_regU(cop, cr, dst, src); + %} +%} +*/ + +// Conditional move +instruct cmovP_cmpI_reg_reg(rRegP dst, rRegP src, rRegI tmp1, rRegI tmp2, cmpOp cop) +%{ + match(Set dst (CMoveP (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); + + ins_cost(200); // XXX + format %{ "cmovP$cop $dst, $src\t# signed, ptr @cmovP_cmpI_reg_reg" %} + ins_encode%{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + __ cmpws(flag, op1, op2); + __ selne(rcc, src, dst, dst); + %} + ins_pipe(pipe_slow); // XXX +%} + +instruct cmovP_cmpU_reg_reg(rRegP dst, rRegP src, rRegI tmp1, rRegI tmp2, cmpOpU cop) +%{ + match(Set dst (CMoveP (Binary cop (CmpU tmp1 tmp2)) (Binary dst src))); + + ins_cost(200); // XXX + format %{ "cmovP$cop $dst, $src\t# signed, ptr @cmovP_cmpU_reg_reg" %} + ins_encode%{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + __ cmpwu(flag, op1, op2); + __ selne(rcc, src, dst, dst); + %} + ins_pipe(pipe_slow); // XXX +%} + +instruct cmovP_cmpF_reg_reg(rRegP dst, rRegP src, regF tmp1, regF tmp2, cmpOp cop ) %{ + match(Set dst (CMoveP (Binary cop (CmpF tmp1 tmp2)) (Binary dst src))); + + ins_cost(200); // XXX + format %{ "cmovP$cop $dst, $src\t# signed, ptr @cmovP_cmpF_reg_reg" %} + ins_encode%{ + FloatRegister op1 = $tmp1$$FloatRegister; + FloatRegister op2 = $tmp2$$FloatRegister; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + __ cmpfs(flag, op1, op2); + __ fimovs(FcmpRES, rcc); + __ selne(rcc, src, dst, dst); + %} + ins_pipe(pipe_slow); // XXX +%} + +instruct cmovP_cmpD_reg_reg(rRegP dst, rRegP src, regD tmp1, regD tmp2, cmpOp cop ) %{ + match(Set dst (CMoveP (Binary cop (CmpD tmp1 tmp2)) (Binary dst src))); + + ins_cost(200); // XXX + format %{ "cmovP$cop $dst, $src\t# signed, ptr @cmovP_cmpD_reg_reg" %} + ins_encode%{ + FloatRegister op1 = $tmp1$$FloatRegister; + FloatRegister op2 = $tmp2$$FloatRegister; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + __ cmpfd(flag, op1, op2); + __ fimovd(FcmpRES, rcc); + __ selne(rcc, src, dst, dst); + %} + ins_pipe(pipe_slow); // XXX +%} + + +instruct cmovP_cmpL_reg_reg(rRegP dst, rRegP src, rRegL tmp1, rRegL tmp2, cmpOp cop) +%{ + match(Set dst (CMoveP (Binary cop (CmpL tmp1 tmp2)) (Binary dst src))); + + ins_cost(200); // XXX + format %{ "cmovP$cop $dst, $src\t# signed, ptr @cmovP_cmpL_reg_reg" %} + ins_encode%{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + __ cmpls(flag, op1, op2); + __ selne(rcc, src, dst, dst); + %} + ins_pipe(pipe_slow); // XXX +%} + +// Conditional move +instruct cmovP_cmpN_reg_reg(rRegP dst, rRegP src, rRegN tmp1, rRegN tmp2, cmpOpU cop) +%{ + match(Set dst (CMoveP (Binary cop (CmpN tmp1 tmp2)) (Binary dst src))); + + ins_cost(200); // XXX + format %{ "cmovP$cop $dst, $src\t# unsigned, ptr @cmovP_cmpN_reg_reg" %} + ins_encode%{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + __ cmpwu(flag, op1, op2); + __ selne(rcc, src, dst, dst); + %} + ins_pipe(pipe_slow); // XXX +%} + +instruct cmovP_cmpP_reg_reg(rRegP dst, rRegP src, rRegP tmp1, rRegP tmp2, cmpOpU cop) +%{ + match(Set dst (CMoveP (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); + + ins_cost(200); // XXX + format %{ "cmovP$cop $dst, $src\t# unsigned, ptr @cmovP_cmpP_reg_reg" %} + ins_encode%{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + __ cmplu(flag, op1, op2 ); + __ selne(rcc, src, dst, dst); + %} + ins_pipe(pipe_slow); // XXX +%} +/* +instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{ + match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); + ins_cost(200); + expand %{ + cmovP_regU(cop, cr, dst, src); + %} +%} + +// DISABLED: Requires the ADLC to emit a bottom_type call that +// correctly meets the two pointer arguments; one is an incoming +// register but the other is a memory operand. ALSO appears to +// be buggy with implicit null checks. +//*/ + +/* memory operands no need for SW64 +// Conditional move +instruct cmovP_mem1(rRegP dst, memory src, rRegI tmp1, rRegI tmp2, cmpOp cop) +%{ + match(Set dst (CMoveP (Binary cop (CmpI tmp1 tmp2)) (Binary dst (LoadP src)))); + ins_cost(250); + format %{ "CMOV$cop $dst,$src\t# ptr" %} + opcode(0x0F,0x40); + ins_encode%{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Address src = $src$$Address; + int flag = $cop$$cmpcode; + __ ldl(rscratch2_AT, src); + __ cmpws(flag, op1, op2, rscratch1_GP); + __ selne(rscratch1_GP, rscratch2_AT, dst, dst); + %} + ins_pipe( pipe_slow ); +%} + +instruct cmovP_mem2(rRegP dst, memory src, rRegL tmp1, rRegL tmp2, cmpOp cop) +%{ + match(Set dst (CMoveP (Binary cop (CmpL tmp1 tmp2)) (Binary dst (LoadP src)))); + ins_cost(250); + format %{ "CMOV$cop $dst,$src\t# ptr" %} + ins_encode%{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Address src = $src$$Address; + int flag = $cop$$cmpcode; + __ ldl(rscratch2_AT, src); + __ cmpls(flag, op1, op2, rscratch1_GP); + __ selne(rscratch1_GP, rscratch2_AT, dst, dst); + %} + ins_pipe( pipe_slow ); +%} + +// Conditional move +instruct cmovP_memU1(rRegP dst, memory src, rRegN tmp1, rRegN tmp2, cmpOpU cop) +%{ + match(Set dst (CMoveP (Binary cop (CmpN tmp1 tmp2)) (Binary dst (LoadP src)))); + ins_cost(250); + format %{ "CMOV$cop $dst,$src\t# ptr" %} + ins_encode%{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Address src = $src$$Address; + int flag = $cop$$cmpcode; + __ ldl(rscratch2_AT, src); + __ cmpwu(flag, op1, op2, rscratch1_GP); + __ selne(rscratch1_GP, rscratch2_AT, dst, dst); + %} + ins_pipe( pipe_slow ); +%} + +instruct cmovP_memU2(rRegP dst, memory src, rRegP tmp1, rRegP tmp2, cmpOpU cop) +%{ + match(Set dst (CMoveP (Binary cop (CmpP tmp1 tmp2)) (Binary dst (LoadP src)))); + ins_cost(250); + format %{ "CMOV$cop $dst,$src\t# ptr" %} + ins_encode%{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Address src = $src$$Address; + int flag = $cop$$cmpcode; + __ ldl(rscratch2_AT, src); + __ cmplu(flag, op1, op2, rscratch1_GP); + __ selne(rscratch1_GP, rscratch2_AT, dst, dst); + %} + ins_pipe( pipe_slow ); +%} +*/ + +instruct cmovL_cmpI_reg_reg(rRegL dst, rRegL src, rRegI tmp1, rRegI tmp2, cmpOp cop) +%{ + match(Set dst (CMoveL (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); + + ins_cost(200); // XXX + format %{ "cmovL$cop $dst, $src\t# signed, long @cmovL_cmpI_reg_reg" %} + ins_encode%{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + __ cmpws(flag, op1, op2); + __ selne(rcc, src, dst, dst); + %} + ins_pipe(pipe_slow); // XXX +%} + +instruct cmovL_cmpI_reg_imm(rRegL dst, immU8 src, rRegI tmp1, rRegI tmp2, cmpOp cop) +%{ + match(Set dst (CMoveL (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); + + ins_cost(200); // XXX + format %{ "cmovL$cop $dst, $src\t# signed, long @cmovL_cmpI_reg_reg" %} + ins_encode%{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + int src = $src$$constant; + int flag = $cop$$cmpcode; + __ cmpws(flag, op1, op2); + __ selne(rcc, src, dst, dst); + %} + ins_pipe(pipe_slow); // XXX +%} + +instruct cmovL_cmpL_reg_reg(rRegL dst, rRegL src, rRegL tmp1, rRegL tmp2, cmpOp cop) +%{ + match(Set dst (CMoveL (Binary cop (CmpL tmp1 tmp2)) (Binary dst src))); + + ins_cost(200); // XXX + format %{ "cmovL$cop $dst, $src\t# signed, long@cmovL_cmpL_reg_reg" %} + ins_encode%{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + __ cmpls(flag, op1, op2); + __ selne(rcc, src, dst, dst); + %} + ins_pipe(pipe_slow); // XXX +%} + +instruct cmovL_cmpL_reg_imm(rRegL dst, immU8 src, rRegL tmp1, rRegL tmp2, cmpOp cop) +%{ + match(Set dst (CMoveL (Binary cop (CmpL tmp1 tmp2)) (Binary dst src))); + + ins_cost(200); // XXX + format %{ "cmovL$cop $dst, $src\t# signed, long@cmovL_cmpL_reg_imm" %} + ins_encode%{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + int src = $src$$constant; + int flag = $cop$$cmpcode; + __ cmpls(flag, op1, op2); + __ selne(rcc, src, dst, dst); + %} + ins_pipe(pipe_slow); // XXX +%} + +instruct cmovL_cmpF_reg_reg(rRegL dst, rRegL src, regF tmp1, regF tmp2, cmpOp cop ) %{ + match(Set dst (CMoveL (Binary cop (CmpF tmp1 tmp2)) (Binary dst src))); + + ins_cost(200); // XXX + format %{ "cmovL$cop $dst, $src\t# signed, ptr @cmovL_cmpF_reg_reg" %} + ins_encode%{ + FloatRegister op1 = $tmp1$$FloatRegister; + FloatRegister op2 = $tmp2$$FloatRegister; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + __ cmpfs(flag, op1, op2); + __ fimovs(FcmpRES, rcc); + __ selne(rcc, src, dst, dst); + %} + ins_pipe(pipe_slow); // XXX +%} + +instruct cmovL_cmpD_reg_reg(rRegL dst, rRegL src, regD tmp1, regD tmp2, cmpOp cop ) %{ + match(Set dst (CMoveL (Binary cop (CmpD tmp1 tmp2)) (Binary dst src))); + + ins_cost(200); // XXX + format %{ "cmovL$cop $dst, $src\t# signed, ptr @cmovL_cmpD_reg_reg" %} + ins_encode%{ + FloatRegister op1 = $tmp1$$FloatRegister; + FloatRegister op2 = $tmp2$$FloatRegister; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + __ cmpfd(flag, op1, op2); + __ fimovd(FcmpRES, rcc); + __ selne(rcc, src, dst, dst); + %} + ins_pipe(pipe_slow); // XXX +%} + +instruct cmovL_cmpD_reg_imm(rRegL dst, immU8 src, regD tmp1, regD tmp2, cmpOp cop ) %{ + match(Set dst (CMoveL (Binary cop (CmpD tmp1 tmp2)) (Binary dst src))); + + ins_cost(200); // XXX + format %{ "cmovL$cop $dst, $src\t# signed, ptr @cmovL_cmpD_reg_imm" %} + ins_encode%{ + FloatRegister op1 = $tmp1$$FloatRegister; + FloatRegister op2 = $tmp2$$FloatRegister; + Register dst = $dst$$Register; + int src = $src$$constant; + int flag = $cop$$cmpcode; + __ cmpfd(flag, op1, op2); + __ fimovd(FcmpRES, rcc); + __ selne(rcc, src, dst, dst); + %} + ins_pipe(pipe_slow); // XXX +%} + +instruct cmovL_cmpU_reg_reg(rRegL dst, rRegL src, rRegI tmp1, rRegI tmp2, cmpOpU cop) +%{ + match(Set dst (CMoveL (Binary cop (CmpU tmp1 tmp2)) (Binary dst src))); + + ins_cost(200); // XXX + format %{ "cmovL$cop $dst, $src\t# unsigned, long @cmovL_cmpU_reg_reg" %} + ins_encode%{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + __ cmpwu(flag, op1, op2); + __ selne(rcc, src, dst, dst); + %} + ins_pipe(pipe_slow); // XXX +%} + +instruct cmovL_cmpN_reg_reg(rRegL dst, rRegL src, rRegN tmp1, rRegN tmp2, cmpOpU cop) +%{ + match(Set dst (CMoveL (Binary cop (CmpN tmp1 tmp2)) (Binary dst src))); + + ins_cost(200); // XXX + format %{ "cmovL$cop $dst, $src\t# unsigned, long @cmovL_cmpN_reg_reg" %} + ins_encode%{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + __ cmpwu(flag, op1, op2); + __ selne(rcc, src, dst, dst); + %} + ins_pipe(pipe_slow); // XXX +%} + +instruct cmovL_cmpN_reg_imm(rRegL dst, immU8 src, rRegN tmp1, rRegN tmp2, cmpOpU cop) +%{ + match(Set dst (CMoveL (Binary cop (CmpN tmp1 tmp2)) (Binary dst src))); + + ins_cost(200); // XXX + format %{ "cmovL$cop $dst, $src\t# unsigned, long @cmovL_cmpN_reg_imm" %} + ins_encode%{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + int src = $src$$constant; + int flag = $cop$$cmpcode; + __ cmpwu(flag, op1, op2); + __ selne(rcc, src, dst, dst); + %} + ins_pipe(pipe_slow); // XXX +%} + +instruct cmovL_cmpP_reg_reg(rRegL dst, rRegL src, rRegP tmp1, rRegP tmp2, cmpOpU cop) +%{ + match(Set dst (CMoveL (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); + + ins_cost(200); // XXX + format %{ "cmovL$cop $dst, $src\t# unsigned, long @cmovL_cmpP_reg_reg" %} + ins_encode%{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + __ cmplu(flag, op1, op2); + __ selne(rcc, src, dst, dst); + %} + ins_pipe(pipe_slow); // XXX +%} +/* +instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{ + match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); + ins_cost(200); + expand %{ + cmovL_regU(cop, cr, dst, src); + %} +%} +*/ +/* memory operands no need for SW64 +instruct cmovL_mem1(rRegL dst, memory src, rRegI tmp1, rRegI tmp2, cmpOp cop) +%{ + match(Set dst (CMoveL (Binary cop (CmpI tmp1 tmp2)) (Binary dst (LoadL src)))); + + ins_cost(200); // XXX + format %{ "cmovq$cop $dst, $src\t# signed, long" %} + ins_encode%{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Address src = $src$$Address; + int flag = $cop$$cmpcode; + __ ldl(rscratch2_AT, src); + __ cmpws(flag, op1, op2, rscratch1_GP); + __ selne(rscratch1_GP, rscratch2_AT, dst, dst); + %} + ins_pipe(pipe_slow); // XXX +%} + +instruct cmovL_mem2(rRegL dst, memory src, rRegL tmp1, rRegL tmp2, cmpOp cop) +%{ + match(Set dst (CMoveL (Binary cop (CmpL tmp1 tmp2)) (Binary dst (LoadL src)))); + + ins_cost(200); // XXX + format %{ "cmovq$cop $dst, $src\t# signed, long" %} + ins_encode%{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Address src = $src$$Address; + int flag = $cop$$cmpcode; + __ ldl(rscratch2_AT, src); + __ cmpls(flag, op1, op2, rscratch1_GP); + __ selne(rscratch1_GP, rscratch2_AT, dst, dst); + %} + ins_pipe(pipe_slow); // XXX +%} + +instruct cmovL_memU1(rRegL dst, memory src, rRegN tmp1, rRegN tmp2, cmpOpU cop) +%{ + match(Set dst (CMoveL (Binary cop (CmpN tmp1 tmp2)) (Binary dst (LoadL src)))); + + ins_cost(200); // XXX + format %{ "cmovq$cop $dst, $src\t# unsigned, long" %} + ins_encode%{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Address src = $src$$Address; + int flag = $cop$$cmpcode; + __ ldl(rscratch2_AT, src); + __ cmpwu(flag, op1, op2, rscratch1_GP); + __ selne(rscratch1_GP, rscratch2_AT, dst, dst); + %} + ins_pipe(pipe_slow); // XXX +%} + +instruct cmovL_memU2(rRegL dst, memory src, rRegP tmp1, rRegP tmp2, cmpOpU cop) +%{ + match(Set dst (CMoveL (Binary cop (CmpP tmp1 tmp2)) (Binary dst (LoadL src)))); + + ins_cost(200); // XXX + format %{ "cmovq$cop $dst, $src\t# unsigned, long" %} + ins_encode%{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Address src = $src$$Address; + int flag = $cop$$cmpcode; + __ ldl(rscratch2_AT, src); + __ cmplu(flag, op1, op2, rscratch1_GP); + __ selne(rscratch1_GP, rscratch2_AT, dst, dst); + %} + ins_pipe(pipe_slow); // XXX +%}*/ +/* +instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{ + match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src)))); + ins_cost(200); + expand %{ + cmovL_memU(cop, cr, dst, src); + %} +%} +*/ +instruct cmovF_cmpI_reg_reg(regF dst, regF src, rRegI tmp1, rRegI tmp2, cmpOp cop) +%{ + match(Set dst (CMoveF (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); + + ins_cost(200); // XXX + format %{ "cmovF$cop $dst, $src\t# signed, float @cmovF_cmpI_reg_reg" %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + FloatRegister dst = as_FloatRegister($dst$$reg); + FloatRegister src = as_FloatRegister($src$$reg); + int flag = $cop$$cmpcode; + __ cmpws(flag, op1, op2); + __ ifmovd(rcc, fcc); + __ fselne(fcc, src, dst, dst); + + %} + ins_pipe(pipe_slow); +%} + +instruct cmovF_cmpL_reg_reg(regF dst, regF src, rRegL tmp1, rRegL tmp2, cmpOp cop) +%{ + match(Set dst (CMoveF (Binary cop (CmpL tmp1 tmp2)) (Binary dst src))); + + ins_cost(200); // XXX + format %{ "cmovF$cop $dst, $src\t# @cmovF_cmpL_reg_reg" %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + FloatRegister dst = as_FloatRegister($dst$$reg); + FloatRegister src = as_FloatRegister($src$$reg); + int flag = $cop$$cmpcode; + __ cmpls(flag, op1, op2); + __ ifmovd(rcc, fcc); + __ fselne(fcc, src, dst, dst); + + %} + ins_pipe(pipe_slow); +%} + +instruct cmovF_cmpF_reg_reg(regF dst, regF src, regF tmp1, regF tmp2, cmpOp cop) +%{ + match(Set dst (CMoveF (Binary cop (CmpF tmp1 tmp2)) (Binary dst src))); + + ins_cost(200); // XXX + format %{ "cmovF$cop $dst, $src\t# @cmovF_cmpF_reg_reg" %} + ins_encode %{ + FloatRegister op1 = as_FloatRegister($tmp1$$reg); + FloatRegister op2 = as_FloatRegister($tmp2$$reg); + FloatRegister dst = as_FloatRegister($dst$$reg); + FloatRegister src = as_FloatRegister($src$$reg); + int flag = $cop$$cmpcode; + __ cmpfs(flag, op1, op2); + __ fselne(fcc, src, dst, dst); + + %} + ins_pipe(pipe_slow); +%} + + //no in sw8 ?? TODO djx +instruct cmovF_cmpU_reg_reg(regF dst, regF src, rRegN tmp1, rRegN tmp2, cmpOpU cop) +%{ + match(Set dst (CMoveF (Binary cop (CmpN tmp1 tmp2)) (Binary dst src))); + + ins_cost(200); // XXX + format %{ "cmovF$cop $dst, $src\t# @cmovF_cmpU_reg_reg" %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + FloatRegister dst = as_FloatRegister($dst$$reg); + FloatRegister src = as_FloatRegister($src$$reg); + int flag = $cop$$cmpcode; + __ cmpwu(flag, op1, op2); + __ ifmovd(rcc, fcc); + __ fselne(fcc, src, dst, dst); + %} + ins_pipe(pipe_slow); +%} + + //no in sw8 ?? TODO djx +instruct cmovF_cmpP_reg_reg(regF dst, regF src, rRegP tmp1, rRegP tmp2, cmpOpU cop) +%{ + match(Set dst (CMoveF (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); + + ins_cost(200); // XXX + format %{ "cmovF$cop $dst, $src\t# @cmovF_cmpP_reg_reg" %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + FloatRegister dst = as_FloatRegister($dst$$reg); + FloatRegister src = as_FloatRegister($src$$reg); + int flag = $cop$$cmpcode; + __ cmplu(flag, op1, op2); + __ ifmovd(rcc, fcc); + __ fselne(fcc, src, dst, dst); + %} + ins_pipe(pipe_slow); +%} +/* +instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{ + match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); + ins_cost(200); + expand %{ + cmovF_regU(cop, cr, dst, src); + %} +%} +*/ + +instruct cmovD_cmpI_reg_reg(regD dst, regD src, rRegI tmp1, rRegI tmp2, cmpOp cop) +%{ + match(Set dst (CMoveD (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); + + ins_cost(200); // XXX + format %{ "cmovD$cop $dst, $src\t# @cmovD_cmpI_reg_reg" %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + FloatRegister dst = as_FloatRegister($dst$$reg); + FloatRegister src = as_FloatRegister($src$$reg); + int flag = $cop$$cmpcode; + __ cmpws(flag, op1, op2); + __ ifmovd(rcc, fcc); + __ fselne(fcc, src, dst, dst); + %} + ins_pipe(pipe_slow); +%} + +instruct cmovD_cmpD_reg_reg(regD dst, regD src, regD tmp1, regD tmp2, cmpOp cop) +%{ + match(Set dst (CMoveD (Binary cop (CmpD tmp1 tmp2)) (Binary dst src))); + + ins_cost(200); // XXX + format %{ "cmovD$cop $dst, $src\t# @cmovD_cmpD_reg_reg" %} + ins_encode %{ + FloatRegister op1 = as_FloatRegister($tmp1$$reg); + FloatRegister op2 = as_FloatRegister($tmp2$$reg); + FloatRegister dst = as_FloatRegister($dst$$reg); + FloatRegister src = as_FloatRegister($src$$reg); + int flag = $cop$$cmpcode; + __ cmpfd(flag, op1, op2); + __ fselne(fcc, src, dst, dst); + %} + ins_pipe(pipe_slow); +%} + +instruct cmovD_cmpF_reg_reg(regD dst, regD src, regF tmp1, regF tmp2, cmpOp cop) +%{ + match(Set dst (CMoveD (Binary cop (CmpF tmp1 tmp2)) (Binary dst src))); + + ins_cost(200); // XXX + format %{ "cmovD$cop $dst, $src\t# @cmovD_cmpF_reg_reg" %} + ins_encode %{ + FloatRegister op1 = as_FloatRegister($tmp1$$reg); + FloatRegister op2 = as_FloatRegister($tmp2$$reg); + FloatRegister dst = as_FloatRegister($dst$$reg); + FloatRegister src = as_FloatRegister($src$$reg); + int flag = $cop$$cmpcode; + __ cmpfs(flag, op1, op2); + __ fselne(fcc, src, dst, dst); + %} + ins_pipe(pipe_slow); +%} + +instruct cmovD_cmpN_reg_reg(regD dst, regD src, rRegN tmp1, rRegN tmp2, cmpOpU cop) +%{ + match(Set dst (CMoveD (Binary cop (CmpN tmp1 tmp2)) (Binary dst src))); + + ins_cost(200); // XXX + format %{ "cmovD$cop $dst, $src\t# @cmovD_cmpN_reg_reg" %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + FloatRegister dst = as_FloatRegister($dst$$reg); + FloatRegister src = as_FloatRegister($src$$reg); + int flag = $cop$$cmpcode; + __ cmpwu(flag, op1, op2); + __ ifmovd(rcc, FcmpRES); + __ fselne(FcmpRES, src, dst, dst); + %} + ins_pipe(pipe_slow); +%} + +instruct cmovD_cmpP_reg_reg(regD dst, regD src, rRegP tmp1, rRegP tmp2, cmpOpU cop) +%{ + match(Set dst (CMoveD (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); + + ins_cost(200); // XXX + format %{ "cmovD$cop $dst, $src\t# @cmovD_cmpP_reg_reg" %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + FloatRegister dst = as_FloatRegister($dst$$reg); + FloatRegister src = as_FloatRegister($src$$reg); + int flag = $cop$$cmpcode; + __ cmplu(flag, op1, op2); + __ ifmovd(rcc, FcmpRES); + __ fselne(FcmpRES, src, dst, dst); + %} + ins_pipe(pipe_slow); +%} + +/* +instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{ + match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); + ins_cost(200); + expand %{ + cmovD_regU(cop, cr, dst, src); + %} +%} +*/ +//----------Arithmetic Instructions-------------------------------------------- +//----------Addition Instructions---------------------------------------------- + +instruct addI_rReg(rRegI dst, rRegI src1, rRegI src2) +%{ + match(Set dst (AddI src1 src2)); + + format %{ "addw $src1, $src2, $dst\t# int @addI_rReg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + __ addw(src1, src2, dst); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct addI_rReg_imm(rRegI dst, rRegI src1, immU8 src2) +%{ + match(Set dst (AddI src1 src2)); + ins_cost(80); + format %{ "addw $src1, $src2, $dst\t# int @addI_rReg_imm" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + int imm = $src2$$constant; + + __ addw(src1, imm, dst); + %} + ins_pipe( ialu_regI_regI ); +%} + +/*memory operands no need for SW64 +instruct addI_rReg_mem(rRegI dst, rRegI src1, memory src2) +%{ + match(Set dst (AddI src1 (LoadI src2))); + + ins_cost(150); // XXX + format %{ "ldw $dst, $src2\t# int @addI_rReg_mem\n\t" + "addw $src1, $dst, $dst"%} + ins_encode %{ + __ ldw($dst$$Register, $src2$$Address); + __ addw($src1$$Register, rscratch2_AT, $dst$$Register); + %} +// ins_pipe( ialu_reg_mem ); +%} + +instruct addI_mem_rReg(memory dst, rRegI src) +%{ + match(Set dst (StoreI dst (AddI (LoadI dst) src))); + + ins_cost(150); // XXX + format %{ "ldw rscratch1_GP, $dst\t# int @addI_mem_rReg\n\t" + "addw rscratch1_GP, $src, rscratch1_GP\n\t" + "stw rscratch1_GP, $dst"%} + ins_encode %{ + __ ldw(rscratch1_GP, $dst$$Address); + __ addw(rscratch1_GP, $src$$Register, rscratch1_GP); + __ stw(rscratch1_GP, $dst$$Address); + %} +// ins_pipe(ialu_mem_reg); +%} + +instruct addI_mem_imm(memory dst, immI src) +%{ + match(Set dst (StoreI dst (AddI (LoadI dst) src))); + + ins_cost(125); // XXX + format %{ "addw $dst, $src\t# int @addI_mem_imm" %} + ins_encode %{ + int imm = $src$$constant; + + __ ldw(rscratch2_AT, $dst$$Address); + if(MacroAssembler::is_uimm8(imm)) { + __ addw(rscratch2_AT, imm, rscratch2_AT); + } else { + __ mov_immediate32(rscratch1_GP, imm); + __ addw(rscratch2_AT, rscratch1_GP, rscratch2_AT); + } + __ stw(rscratch2_AT, $dst$$Address, rscratch1_GP); + %} +// ins_pipe(ialu_mem_imm); +%}*/ + +instruct incI_rReg(rRegI dst, rRegI src1, immI1 src2) +%{ + match(Set dst (AddI src1 src2)); + ins_cost(60); + format %{ "addw $src1, #1, $dst\t# int @incI_rReg" %} + ins_encode %{ + __ addw($src1$$Register, 1, $dst$$Register); + %} + ins_pipe(ialu_regI_imm16); +%} + +/*memory operands no need for SW64 +instruct incI_mem(memory dst, immI1 src) +%{ + match(Set dst (StoreI dst (AddI (LoadI dst) src))); + + ins_cost(125); // XXX + format %{ "ldw rscratch2_AT, $dst\t# int @incI_mem\n\t" + "addw rscratch2_AT, #1, rscratch2_AT\n\t" + "stw rscratch2_AT, $dst"%} + ins_encode %{ + __ ldw(rscratch2_AT, $dst$$Address); + __ addw(rscratch2_AT, 1, rscratch2_AT); + __ stw(rscratch2_AT, $dst$$Address, rscratch1_GP); + %} +// ins_pipe(ialu_mem_imm); +%}*/ + +// XXX why does that use AddI +instruct decI_rReg(rRegI dst, rRegI src1, immI_M1 src2) +%{ + match(Set dst (AddI src1 src2)); + ins_cost(60); + format %{ "subw $src1, #1, $dst\t# int @decI_rReg" %} + ins_encode %{ + __ subw($src1$$Register, 1, $dst$$Register); + %} + ins_pipe(ialu_regI_imm16); +%} + +/*memory operands no need for SW64 +// XXX why does that use AddI +instruct decI_mem(memory dst, immI_M1 src) +%{ + match(Set dst (StoreI dst (AddI (LoadI dst) src))); + + ins_cost(125); // XXX + format %{ "ldw rscratch2_AT, $dst\t# int @decI_mem\n\t" + "subw rscratch2_AT, #1, rscratch2_AT\n\t" + "stw rscratch2_AT, $dst"%} + ins_encode %{ + __ ldw(rscratch2_AT, $dst$$Address); + __ subw(rscratch2_AT, 1, rscratch2_AT); + __ stw(rscratch2_AT, $dst$$Address, rscratch1_GP); + %} +// ins_pipe(ialu_mem_imm); +%}*/ + +// the same as addI_rReg_imm +//instruct leaI_rReg_immI(rRegI dst, rRegI src0, immI src1) +//%{ +// match(Set dst (AddI src0 src1)); +// +// ins_cost(110); +// format %{ "addw $src0, $src1, $dst\t# int @leaI_rReg_immI" %} +// ins_encode %{ +// Register dst = $dst$$Register; +// Register src = $src0$$Register; +// int imm = $src1$$constant; +// +// if(MacroAssembler::is_uimm8(imm)) { +// __ addw(src, imm, dst); +// } else { +// __ mov_immediate32(rscratch2_AT, imm); +// __ addw(src, rscratch2_AT, dst); +// } +// %} +// ins_pipe(ialu_regL_imm16); +//%} + +instruct addL_rReg(rRegL dst, rRegL src1, rRegL src2) +%{ + match(Set dst (AddL src1 src2)); + + format %{ "addl $src1, $src2, $dst\t# long @addL_rReg" %} + ins_encode %{ + __ addl($src1$$Register, $src2$$Register, $dst$$Register); + %} + ins_pipe(ialu_regL_regL); +%} + +instruct addL_rReg_imm(rRegL dst, rRegL src1, immU8 src2) +%{ + match(Set dst (AddL src1 src2)); + ins_cost(80); + format %{ "addptr $src1, $src2, $dst\t# long @addL_rReg_imm" %} + ins_encode %{ + __ addl($src1$$Register, (int)$src2$$constant, $dst$$Register); + %} + ins_pipe( ialu_regL_imm ); +%} + +/*memory operands no need for SW64 +instruct addL_rReg_mem(rRegL dst, rRegL src1, memory src2) +%{ + match(Set dst (AddL src1 (LoadL src2))); + + ins_cost(125); // XXX + format %{ "ldl $dst, $src2\t# long @addL_rReg_mem\n\t" + "addl src1, $dst, $dst"%} + ins_encode %{ + __ ldl($dst$$Register, $src2$$Address); + __ addl($src1$$Register, $dst$$Register, $dst$$Register); + %} + //ins_pipe(ialu_reg_mem); +%} + +instruct addL_mem_rReg(memory dst, rRegL src) +%{ + match(Set dst (StoreL dst (AddL (LoadL dst) src))); + + ins_cost(150); // XXX + format %{ "ldl rscratch2_AT, $dst\t# long @addL_mem_rReg\n\t" + "addl rscratch2_AT, $src, rscratch2_AT\n\t" + "stl rscratch2_AT, $dst"%} + ins_encode %{ + __ ldl(rscratch2_AT, $dst$$Address); + __ addl(rscratch2_AT, $src$$Register, rscratch2_AT); + __ stl(rscratch2_AT, $dst$$Address, rscratch1_GP); + %} +// ins_pipe(ialu_mem_reg); +%} + +instruct addL_mem_imm(memory dst, immL32 src) +%{ + match(Set dst (StoreL dst (AddL (LoadL dst) src))); + + ins_cost(125); // XXX + format %{ "ldl rscratch2_AT, $dst\t# long @addL_mem_imm\n\t" + "addptr rscratch2_AT, $src, rscratch2_AT\n\t" + "stl rscratch2_AT, $dst"%} + ins_encode %{ + __ ldl(rscratch2_AT, $dst$$Address); + __ addptr(rscratch2_AT, (int)$src$$constant, rscratch2_AT); + __ stl(rscratch2_AT, $dst$$Address, rscratch1_GP); + %} + //ins_pipe(ialu_mem_imm); +%}*/ + +instruct incL_rReg(rRegL dst, rRegL src1, immL1 src2) +%{ + match(Set dst (AddL src1 src2)); + ins_cost(40); + format %{ "addl $src1, #1, $dst\t# int @incL_rReg" %} + ins_encode %{ + __ addl($src1$$Register, 1, $dst$$Register); + %} + ins_pipe(ialu_regL_imm); +%} + +/*memory operands no need for SW64 +instruct incL_mem(memory dst, immL1 src) +%{ + match(Set dst (StoreL dst (AddL (LoadL dst) src))); + + ins_cost(125); // XXX + format %{ "ldl rscratch2_AT, $dst\t# long @incL_mem\n\t" + "addl rscratch2_AT, #1, rscratch2_AT\n\t" + "stl rscratch2_AT, $dst"%} + ins_encode %{ + __ ldl(rscratch2_AT, $dst$$Address); + __ addl(rscratch2_AT, 1, rscratch2_AT); + __ stl(rscratch2_AT, $dst$$Address, rscratch1_GP); + %} +// ins_pipe(ialu_mem_imm); +%}*/ + +// XXX why does that use AddL +instruct decL_rReg(rRegL dst, rRegL src1, immL_M1 src2) +%{ + match(Set dst (AddL src1 src2)); + ins_cost(60); + format %{ "subl $src1, #1, $dst\t# int @decL_rReg" %} + ins_encode %{ + __ subl($src1$$Register, 1, $dst$$Register); + %} + ins_pipe(ialu_regL_imm); +%} + +/*memory operands no need for SW64 +// XXX why does that use AddL +instruct decL_mem(memory dst, immL_M1 src) +%{ + match(Set dst (StoreL dst (AddL (LoadL dst) src))); + + ins_cost(125); // XXX + format %{ "ldl rscratch2_AT, $dst\t# int @decL_mem\n\t" + "subl rscratch2_AT, #1, rscratch2_AT\n\t" + "stl rscratch2_AT, $dst"%} + ins_encode %{ + __ ldl(rscratch2_AT, $dst$$Address); + __ subl(rscratch2_AT, 1, rscratch2_AT); + __ stl(rscratch2_AT, $dst$$Address, rscratch1_GP); + %} + //ins_pipe(ialu_mem_imm); +%}*/ + +//the same as addL_rReg_imm +//instruct leaL_rReg_immL(rRegL dst, rRegL src0, immL32 src1) +//%{ +// match(Set dst (AddL src0 src1)); +// +// ins_cost(110); +// format %{ "addptr $src0, $src1, $dst\t# long @leaL_rReg_immL" %} +// ins_encode %{ +// __ addptr($src0$$Register, (int)$src1$$constant, $dst$$Register); +// %} +// ins_pipe(ialu_regL_regL); +//%} + +instruct addP_rReg(rRegP dst, rRegP src1, rRegP src2) +%{ + match(Set dst (AddP src1 src2)); + + format %{ "addl $src1, $src2, $dst\t# ptr @addP_rReg" %} + ins_encode %{ + __ addl($src1$$Register, $src2$$Register, $dst$$Register); + %} + ins_pipe(ialu_regL_regL); //in 8 this is ialu_regI_regI?? TODO djx +%} + +instruct addP_reg_reg(rRegP dst, rRegP src1, rRegL src2) %{ + match(Set dst (AddP src1 src2)); + + format %{ "addl $src1, $src2, $dst #@addP_reg_reg" %} + ins_encode %{ + __ addl($src1$$Register, $src2$$Register, $dst$$Register); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct addP_rReg_imm(rRegP dst, rRegP src1, immUL8 src2) +%{ + match(Set dst (AddP src1 src2)); + ins_cost(40); + format %{ "addptr $src1, $src2, $dst\t# long @addP_rReg_imm" %} + ins_encode %{ + __ addl($src1$$Register, (int)$src2$$constant, $dst$$Register); + %} + ins_pipe( ialu_regL_imm ); +%} + +//the same as addP_rReg_imm +// XXX addP mem ops ???? + +//instruct leaP_rReg_imm(rRegP dst, rRegP src0, immL32 src1) +//%{ +// match(Set dst (AddP src0 src1)); +// +// ins_cost(110); +// format %{ "addptr $src0, $src1, $dst\t# long @leaP_rReg_imm" %} +// ins_encode %{ +// __ addptr($src0$$Register, (int)$src1$$constant, $dst$$Register); +// %} +//// ins_pipe(ialu_reg_reg); +//%} + +instruct checkCastPP(rRegP dst) +%{ + match(Set dst (CheckCastPP dst)); + + size(0); //?? TODO djx + format %{ "#checkcastPP of $dst (empty encoding)\t# @chekCastPP" %} + ins_encode( /*empty encoding*/ ); + ins_pipe( empty ); +%} + +instruct castPP(rRegP dst) +%{ + match(Set dst (CastPP dst)); + + size(0); + format %{ "#castPP of $dst (empty encoding)\t# @castPP" %} + ins_encode(/* empty encoding */); + ins_pipe(empty); +%} + +instruct castII(rRegI dst) +%{ + match(Set dst (CastII dst)); + + size(0); + format %{ "#castII of $dst (empty encoding)\t# @castII" %} + ins_encode( /*empty encoding*/ ); + ins_cost(0); + ins_pipe( empty ); +%} + +instruct castLL(rRegL dst) +%{ + match(Set dst (CastLL dst)); + + size(0); + format %{ "# castLL of $dst" %} + ins_encode(/* empty encoding */); + ins_cost(0); + ins_pipe(empty); +%} + +instruct castFF(regF dst) +%{ + match(Set dst (CastFF dst)); + + size(0); + format %{ "# castFF of $dst" %} + ins_encode(/* empty encoding */); + ins_cost(0); + ins_pipe(empty); +%} + +instruct castDD(regD dst) +%{ + match(Set dst (CastDD dst)); + + size(0); + format %{ "# castDD of $dst" %} + ins_encode(/* empty encoding */); + ins_cost(0); + ins_pipe(empty); +%} + +// LoadP-locked same as a regular LoadP when used with compare-swap +instruct loadPLocked(rRegP dst, memory mem) +%{ + match(Set dst (LoadPLocked mem)); + + ins_cost(125); + format %{ "ldptr $dst, $mem #@loadPLocked" %} + ins_encode (load_P_enc(dst, mem)); + ins_pipe( ialu_reg_mem ); +%} +/* +// Conditional-store of the updated heap-top. +// Used during allocation of the shared heap. +// Sets flags (EQ) on success. Implemented with a CMPXCHG on Intel. + +instruct storePConditional(memory heap_top_ptr, + rax_RegP oldval, rRegP newval, + rFlagsReg cr) +%{ + predicate(n->as_LoadStore()->barrier_data() == 0); + match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); + + format %{ "cmpxchgq $heap_top_ptr, $newval\t# (ptr) " + "If rax == $heap_top_ptr then store $newval into $heap_top_ptr" %} + opcode(0x0F, 0xB1); + ins_encode(lock_prefix, + REX_reg_mem_wide(newval, heap_top_ptr), + OpcP, OpcS, + reg_mem(newval, heap_top_ptr)); + ins_pipe(pipe_cmpxchg); +%} + +// Conditional-store of an int value. +// ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG. +instruct storeIConditional(memory mem, rax_RegI oldval, rRegI newval, rFlagsReg cr) +%{ + match(Set cr (StoreIConditional mem (Binary oldval newval))); + effect(KILL oldval); + + format %{ "cmpxchgl $mem, $newval\t# If rax == $mem then store $newval into $mem" %} + opcode(0x0F, 0xB1); + ins_encode(lock_prefix, + REX_reg_mem(newval, mem), + OpcP, OpcS, + reg_mem(newval, mem)); + ins_pipe(pipe_cmpxchg); +%} + +// Conditional-store of a long value. +// ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG. +instruct storeLConditional(memory mem, rax_RegL oldval, rRegL newval, rFlagsReg cr) +%{ + match(Set cr (StoreLConditional mem (Binary oldval newval))); + effect(KILL oldval); + + format %{ "cmpxchgq $mem, $newval\t# If rax == $mem then store $newval into $mem" %} + opcode(0x0F, 0xB1); + ins_encode(lock_prefix, + REX_reg_mem_wide(newval, mem), + OpcP, OpcS, + reg_mem(newval, mem)); + ins_pipe(pipe_cmpxchg); +%} + + +// XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them +instruct compareAndSwapP(rRegI res, + memory mem_ptr, + rax_RegP oldval, rRegP newval, + rFlagsReg cr) +%{ + predicate(VM_Version::supports_cx8() && n->as_LoadStore()->barrier_data() == 0); + match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); + match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval))); + effect(KILL cr, KILL oldval); + + format %{ "cmpxchgq $mem_ptr,$newval\t# " + "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" + "sete $res\n\t" + "movzbl $res, $res" %} + opcode(0x0F, 0xB1); + ins_encode(lock_prefix, + REX_reg_mem_wide(newval, mem_ptr), + OpcP, OpcS, + reg_mem(newval, mem_ptr), + REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete + REX_reg_breg(res, res), // movzbl + Opcode(0xF), Opcode(0xB6), reg_reg(res, res)); + ins_pipe( pipe_cmpxchg ); +%} + +instruct compareAndSwapL(rRegI res, + memory mem_ptr, + rax_RegL oldval, rRegL newval, + rFlagsReg cr) +%{ + predicate(VM_Version::supports_cx8()); + match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); + match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval))); + effect(KILL cr, KILL oldval); + + format %{ "cmpxchgq $mem_ptr,$newval\t# " + "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" + "sete $res\n\t" + "movzbl $res, $res" %} + opcode(0x0F, 0xB1); + ins_encode(lock_prefix, + REX_reg_mem_wide(newval, mem_ptr), + OpcP, OpcS, + reg_mem(newval, mem_ptr), + REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete + REX_reg_breg(res, res), // movzbl + Opcode(0xF), Opcode(0xB6), reg_reg(res, res)); + ins_pipe( pipe_cmpxchg ); +%} + +instruct compareAndSwapI(rRegI res, + memory mem_ptr, + rax_RegI oldval, rRegI newval, + rFlagsReg cr) +%{ + match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); + match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval))); + effect(KILL cr, KILL oldval); + + format %{ "cmpxchgl $mem_ptr,$newval\t# " + "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" + "sete $res\n\t" + "movzbl $res, $res" %} + opcode(0x0F, 0xB1); + ins_encode(lock_prefix, + REX_reg_mem(newval, mem_ptr), + OpcP, OpcS, + reg_mem(newval, mem_ptr), + REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete + REX_reg_breg(res, res), // movzbl + Opcode(0xF), Opcode(0xB6), reg_reg(res, res)); + ins_pipe( pipe_cmpxchg ); +%} + +instruct compareAndSwapB(rRegI res, + memory mem_ptr, + rax_RegI oldval, rRegI newval, + rFlagsReg cr) +%{ + match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval))); + match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval))); + effect(KILL cr, KILL oldval); + + format %{ "cmpxchgb $mem_ptr,$newval\t# " + "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" + "sete $res\n\t" + "movzbl $res, $res" %} + opcode(0x0F, 0xB0); + ins_encode(lock_prefix, + REX_breg_mem(newval, mem_ptr), + OpcP, OpcS, + reg_mem(newval, mem_ptr), + REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete + REX_reg_breg(res, res), // movzbl + Opcode(0xF), Opcode(0xB6), reg_reg(res, res)); + ins_pipe( pipe_cmpxchg ); +%} + +instruct compareAndSwapS(rRegI res, + memory mem_ptr, + rax_RegI oldval, rRegI newval, + rFlagsReg cr) +%{ + match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval))); + match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval))); + effect(KILL cr, KILL oldval); + + format %{ "cmpxchgw $mem_ptr,$newval\t# " + "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" + "sete $res\n\t" + "movzbl $res, $res" %} + opcode(0x0F, 0xB1); + ins_encode(lock_prefix, + SizePrefix, + REX_reg_mem(newval, mem_ptr), + OpcP, OpcS, + reg_mem(newval, mem_ptr), + REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete + REX_reg_breg(res, res), // movzbl + Opcode(0xF), Opcode(0xB6), reg_reg(res, res)); + ins_pipe( pipe_cmpxchg ); +%} + +instruct compareAndSwapN(rRegI res, + memory mem_ptr, + rax_RegN oldval, rRegN newval, + rFlagsReg cr) %{ + match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval))); + match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval))); + effect(KILL cr, KILL oldval); + + format %{ "cmpxchgl $mem_ptr,$newval\t# " + "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" + "sete $res\n\t" + "movzbl $res, $res" %} + opcode(0x0F, 0xB1); + ins_encode(lock_prefix, + REX_reg_mem(newval, mem_ptr), + OpcP, OpcS, + reg_mem(newval, mem_ptr), + REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete + REX_reg_breg(res, res), // movzbl + Opcode(0xF), Opcode(0xB6), reg_reg(res, res)); + ins_pipe( pipe_cmpxchg ); +%} + +instruct compareAndExchangeB( + memory mem_ptr, + rax_RegI oldval, rRegI newval, + rFlagsReg cr) +%{ + match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval))); + effect(KILL cr); + + format %{ "cmpxchgb $mem_ptr,$newval\t# " + "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %} + opcode(0x0F, 0xB0); + ins_encode(lock_prefix, + REX_breg_mem(newval, mem_ptr), + OpcP, OpcS, + reg_mem(newval, mem_ptr) // lock cmpxchg + ); + ins_pipe( pipe_cmpxchg ); +%} + +instruct compareAndExchangeS( + memory mem_ptr, + rax_RegI oldval, rRegI newval, + rFlagsReg cr) +%{ + match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval))); + effect(KILL cr); + + format %{ "cmpxchgw $mem_ptr,$newval\t# " + "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %} + opcode(0x0F, 0xB1); + ins_encode(lock_prefix, + SizePrefix, + REX_reg_mem(newval, mem_ptr), + OpcP, OpcS, + reg_mem(newval, mem_ptr) // lock cmpxchg + ); + ins_pipe( pipe_cmpxchg ); +%} + +instruct compareAndExchangeI( + memory mem_ptr, + rax_RegI oldval, rRegI newval, + rFlagsReg cr) +%{ + match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval))); + effect(KILL cr); + + format %{ "cmpxchgl $mem_ptr,$newval\t# " + "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %} + opcode(0x0F, 0xB1); + ins_encode(lock_prefix, + REX_reg_mem(newval, mem_ptr), + OpcP, OpcS, + reg_mem(newval, mem_ptr) // lock cmpxchg + ); + ins_pipe( pipe_cmpxchg ); +%} + +instruct compareAndExchangeL( + memory mem_ptr, + rax_RegL oldval, rRegL newval, + rFlagsReg cr) +%{ + predicate(VM_Version::supports_cx8()); + match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval))); + effect(KILL cr); + + format %{ "cmpxchgq $mem_ptr,$newval\t# " + "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %} + opcode(0x0F, 0xB1); + ins_encode(lock_prefix, + REX_reg_mem_wide(newval, mem_ptr), + OpcP, OpcS, + reg_mem(newval, mem_ptr) // lock cmpxchg + ); + ins_pipe( pipe_cmpxchg ); +%} + +instruct compareAndExchangeN( + memory mem_ptr, + rax_RegN oldval, rRegN newval, + rFlagsReg cr) %{ + match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval))); + effect(KILL cr); + + format %{ "cmpxchgl $mem_ptr,$newval\t# " + "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %} + opcode(0x0F, 0xB1); + ins_encode(lock_prefix, + REX_reg_mem(newval, mem_ptr), + OpcP, OpcS, + reg_mem(newval, mem_ptr) // lock cmpxchg + ); + ins_pipe( pipe_cmpxchg ); +%} + +instruct compareAndExchangeP( + memory mem_ptr, + rax_RegP oldval, rRegP newval, + rFlagsReg cr) +%{ + predicate(VM_Version::supports_cx8()); + match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval))); + effect(KILL cr); + + format %{ "cmpxchgq $mem_ptr,$newval\t# " + "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %} + opcode(0x0F, 0xB1); + ins_encode(lock_prefix, + REX_reg_mem_wide(newval, mem_ptr), + OpcP, OpcS, + reg_mem(newval, mem_ptr) // lock cmpxchg + ); + ins_pipe( pipe_cmpxchg ); +%} + +instruct xaddB_no_res( memory mem, Universe dummy, immI add, rFlagsReg cr) %{ + predicate(n->as_LoadStore()->result_not_used()); + match(Set dummy (GetAndAddB mem add)); + effect(KILL cr); + format %{ "ADDB [$mem],$add" %} + ins_encode %{ + __ lock(); + __ addb($mem$$Address, $add$$constant); + %} + ins_pipe( pipe_cmpxchg ); +%} + +instruct xaddB( memory mem, rRegI newval, rFlagsReg cr) %{ + match(Set newval (GetAndAddB mem newval)); + effect(KILL cr); + format %{ "XADDB [$mem],$newval" %} + ins_encode %{ + __ lock(); + __ xaddb($mem$$Address, $newval$$Register); + %} + ins_pipe( pipe_cmpxchg ); +%} + +instruct xaddS_no_res( memory mem, Universe dummy, immI add, rFlagsReg cr) %{ + predicate(n->as_LoadStore()->result_not_used()); + match(Set dummy (GetAndAddS mem add)); + effect(KILL cr); + format %{ "ADDW [$mem],$add" %} + ins_encode %{ + __ lock(); + __ addw($mem$$Address, $add$$constant); + %} + ins_pipe( pipe_cmpxchg ); +%} + +instruct xaddS( memory mem, rRegI newval, rFlagsReg cr) %{ + match(Set newval (GetAndAddS mem newval)); + effect(KILL cr); + format %{ "XADDW [$mem],$newval" %} + ins_encode %{ + __ lock(); + __ xaddw($mem$$Address, $newval$$Register); + %} + ins_pipe( pipe_cmpxchg ); +%} + +instruct xaddI_no_res( memory mem, Universe dummy, immI add, rFlagsReg cr) %{ + predicate(n->as_LoadStore()->result_not_used()); + match(Set dummy (GetAndAddI mem add)); + effect(KILL cr); + format %{ "ADDL [$mem],$add" %} + ins_encode %{ + __ lock(); + __ addl($mem$$Address, $add$$constant); + %} + ins_pipe( pipe_cmpxchg ); +%} + +instruct xaddI( memory mem, rRegI newval, rFlagsReg cr) %{ + match(Set newval (GetAndAddI mem newval)); + effect(KILL cr); + format %{ "XADDL [$mem],$newval" %} + ins_encode %{ + __ lock(); + __ xaddl($mem$$Address, $newval$$Register); + %} + ins_pipe( pipe_cmpxchg ); +%} + +instruct xaddL_no_res( memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{ + predicate(n->as_LoadStore()->result_not_used()); + match(Set dummy (GetAndAddL mem add)); + effect(KILL cr); + format %{ "ADDQ [$mem],$add" %} + ins_encode %{ + __ lock(); + __ addq($mem$$Address, $add$$constant); + %} + ins_pipe( pipe_cmpxchg ); +%} + +instruct xaddL( memory mem, rRegL newval, rFlagsReg cr) %{ + match(Set newval (GetAndAddL mem newval)); + effect(KILL cr); + format %{ "XADDQ [$mem],$newval" %} + ins_encode %{ + __ lock(); + __ xaddq($mem$$Address, $newval$$Register); + %} + ins_pipe( pipe_cmpxchg ); +%} + +instruct xchgB( memory mem, rRegI newval) %{ + match(Set newval (GetAndSetB mem newval)); + format %{ "XCHGB $newval,[$mem]" %} + ins_encode %{ + __ xchgb($newval$$Register, $mem$$Address); + %} + ins_pipe( pipe_cmpxchg ); +%} + +instruct xchgS( memory mem, rRegI newval) %{ + match(Set newval (GetAndSetS mem newval)); + format %{ "XCHGW $newval,[$mem]" %} + ins_encode %{ + __ xchgw($newval$$Register, $mem$$Address); + %} + ins_pipe( pipe_cmpxchg ); +%} + +instruct xchgI( memory mem, rRegI newval) %{ + match(Set newval (GetAndSetI mem newval)); + format %{ "XCHGL $newval,[$mem]" %} + ins_encode %{ + __ xchgl($newval$$Register, $mem$$Address); + %} + ins_pipe( pipe_cmpxchg ); +%} + +instruct xchgL( memory mem, rRegL newval) %{ + match(Set newval (GetAndSetL mem newval)); + format %{ "XCHGL $newval,[$mem]" %} + ins_encode %{ + __ xchgq($newval$$Register, $mem$$Address); + %} + ins_pipe( pipe_cmpxchg ); +%} + +instruct xchgP( memory mem, rRegP newval) %{ + match(Set newval (GetAndSetP mem newval)); + predicate(n->as_LoadStore()->barrier_data() == 0); + format %{ "XCHGQ $newval,[$mem]" %} + ins_encode %{ + __ xchgq($newval$$Register, $mem$$Address); + %} + ins_pipe( pipe_cmpxchg ); +%} + +instruct xchgN( memory mem, rRegN newval) %{ + match(Set newval (GetAndSetN mem newval)); + format %{ "XCHGL $newval,$mem]" %} + ins_encode %{ + __ xchgl($newval$$Register, $mem$$Address); + %} + ins_pipe( pipe_cmpxchg ); +%} +*/ + +// lsp check the T11 register? replace to t12?? +instruct partialSubtypeCheck( rRegP result, no_T11_rRegP sub, no_T11_rRegP super) %{ + match(Set result (PartialSubtypeCheck sub super)); + //effect(KILL tmp); + ins_cost(1100); // slightly larger than the next version + format %{ "partialSubtypeCheck result=$result, sub=$sub, super=$super, tmp=rscratch3 " %} + + ins_encode( enc_PartialSubtypeCheck(result, sub, super) ); + ins_pipe( pipe_slow ); +%} + +instruct storePConditional(indirect mem, v0_RegP oldval, rRegP newval, rFlagsReg cr) %{ + match(Set cr(StorePConditional mem(Binary oldval newval))); + effect(KILL oldval); + //size(56); + format %{ "StorePConditional cmpxchg $mem, $newval\t# If $oldval == $mem then store $newval into $mem" %} + ins_encode %{ + Register oldval = $oldval$$Register; + Register newval = $newval$$Register; + Register cr = $cr$$Register; + + __ storeLcon(oldval, $mem$$Address, newval); + //__ movl(cr, AT); + + %} + ins_pipe(long_memory_op); +%} + +// Conditional-store of an int value. +// ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG on Intel. +instruct storeIConditional(indirect mem, v0_RegI oldval, rRegI newval, rFlagsReg cr) %{ + match(Set cr(StoreIConditional mem(Binary oldval newval))); + effect(KILL oldval); + format %{ "CMPXCHG32 $newval, $mem, $oldval \t# @storeIConditional" %} + + ins_encode %{ + Register oldval = $oldval$$Register; + Register newval = $newval$$Register; + Register cr = $cr$$Register; + guarantee($mem$$index == sp->encoding() && $mem$$disp == 0, "impossible encoding storeIConditional"); + __ storeIcon(oldval, $mem$$Address, newval); + //__ movl(cr, AT); + %} + ins_pipe(long_memory_op); + %} + + + // Conditional-store of a long value. + // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG. + +instruct storeLConditional(indirect mem, v0_RegL oldval, rRegL newval, rFlagsReg cr) %{ + match(Set cr(StoreLConditional mem (Binary oldval newval))); + effect(KILL oldval);//TODO:kill oldval? jzy + //size(56); + format %{ "StoreLConditional cmpxchg $mem, $newval\t# If $oldval == $mem then store $newval into $mem" %} + ins_encode %{ + Register oldval = $oldval$$Register; + Register newval = $newval$$Register; + Register cr = $cr$$Register; + guarantee($mem$$index == sp->encoding() && $mem$$disp == 0, "impossible encoding storeLConditional"); + + __ storeLcon(oldval, $mem$$Address, newval); + + %} + ins_pipe(long_memory_op); + %} + +//FIXME: +instruct compareAndSwapP( rRegI res, indirect mem, v0_RegP oldval, rRegP newval, rFlagsReg cr) %{ + match(Set res (CompareAndSwapP mem (Binary oldval newval))); + effect(KILL cr, KILL oldval); + //size(60); + format %{ + "CMPXCHG $newval, $mem, $oldval @ compareAndSwapP\n\t" + "If $oldval == $mem then store $newval into $mem\n\t" + "sete $res " + %} + ins_encode %{ + Register newval = $newval$$Register; + Register oldval = $oldval$$Register; + Register res = $res$$Register; +// Address addr($mem_ptr$$Register, 0); + guarantee($mem$$index == sp->encoding() && $mem$$disp == 0, "impossible encoding compareAndSwapP"); + //SizedScope sc(&_masm, 100); + __ cmpxchg(newval, $mem$$Address, oldval); + __ seleq(rcc, 1, R0, res); + %} + ins_pipe( long_memory_op ); +%} + + +instruct compareAndSwapL( rRegI res, indirect mem, v0_RegL oldval, rRegL newval, rFlagsReg cr) %{ + //predicate(VM_Version::supports_cx8()); + match(Set res (CompareAndSwapL mem (Binary oldval newval))); + effect(KILL cr, KILL oldval); +// effect(TEMP tmpt10, USE_KILL oldval); + //size(60); //TODO: ZHJ20180613 + format %{ + "CMPXCHG $newval, $mem, $oldval @ compareAndSwapL\n\t" + "If $oldval == $mem then store $newval into $mem\n\t" + "sete $res " + %} + ins_encode %{ + Register newval = $newval$$Register; + Register oldval = $oldval$$Register; + Register res = $res$$Register; +// Address addr($mem_ptr$$Register, 0); + + //SizedScope sc(&_masm, 100); + guarantee($mem$$index == sp->encoding() && $mem$$disp == 0, "impossible encoding compareAndSwapL"); + __ cmpxchg(newval, $mem$$Address, oldval); + __ seleq(rcc, 1, R0, res); + %} + ins_pipe( long_memory_op ); +%} + +instruct compareAndSwapI( rRegI res, indirect mem, v0_RegI oldval, rRegI newval, rFlagsReg cr) %{ + match(Set res (CompareAndSwapI mem (Binary oldval newval))); + effect(KILL cr, KILL oldval); + //size(60); +// match(CompareAndSwapI mem_ptr (Binary oldval newval)); + format %{ + "CMPXCHG32 $newval, $mem, $oldval @ compareAndSwapI\n\t" + "If $oldval == $mem then store $newval into $mem\n\t" + "sete $res " + %} + ins_encode %{ + Register newval = $newval$$Register; + Register oldval = $oldval$$Register; + Register res = $res$$Register; +// Address addr($mem_ptr$$Register, 0); + guarantee($mem$$index == sp->encoding() && $mem$$disp == 0, "impossible encoding compareAndSwapI"); + + //SizedScope sc(&_masm, 100); + __ cmpxchg32(newval, $mem$$Address, oldval); + __ seleq(rcc, 1, R0, res); + %} + ins_pipe( long_memory_op ); +%} + +instruct compareAndSwapN( rRegI res, indirect mem, v0_RegN oldval, rRegN newval, rFlagsReg cr) %{ + match(Set res (CompareAndSwapN mem (Binary oldval newval))); + effect(KILL cr, KILL oldval); + //effect(KILL cr, USE_KILL oldval); +// effect(TEMP tmpT10, USE_KILL oldval); + //size(64); + format %{ + "CMPXCHG32 $newval, $mem, $oldval @ compareAndSwapI\n\t" + "If $oldval == $mem then store $newval into $mem\n\t" + "sete $res" + %} + ins_encode %{ + Register newval = $newval$$Register; + Register oldval = $oldval$$Register; + Register res = $res$$Register; +// Address addr($mem_ptr$$Register, 0); +// Label L; + guarantee($mem$$index == sp->encoding() && $mem$$disp == 0, "impossible encoding compareAndSwapN"); + // cmpxchg32 is implemented with ll/sc, which will do sign extension. + // Thus, we should extend oldval's sign for correct comparision. + + // __ stop("?compareAndSwapN jzy"); + __ addw(oldval, 0, oldval); + __ cmpxchg32(newval, $mem$$Address, oldval); +// __ selne(AT, 1, AT, res); + __ seleq(rcc, 1, R0, res); + %} + ins_pipe( long_memory_op ); +%} + +instruct getAndAddI(indirect mem, rRegI add, rRegI val, rFlagsReg cr) %{ + // predicate( n->get_int() == 1 && n->get_int() == -1); + // val = *mem & *mem = *mem + add + match(Set val (GetAndAddI mem add)); + effect(KILL cr); + format %{ "xaddI [$mem],$add\t@getAndAddI" %} + ins_encode %{ + Register base = as_Register($mem$$base); + int disp = $mem$$disp; + Register value = $val$$Register; + Register add = $add$$Register; + Label again; + guarantee($mem$$index == sp->encoding() && $mem$$disp == 0, "impossible encoding getAndAddI"); + SizedScope sc(&_masm, 40); + __ BIND(again); + __ lldw(AT, disp, base); + __ ldi(GP, 1, R0); + __ wr_f(GP); + __ addw(AT, add, GP); + __ align(8); // must align + __ lstw(GP, disp, base); + __ rd_f(GP); + __ beq_l(GP, again); + __ movl(value, AT); + %} + ins_pipe( long_memory_op ); +%} + +instruct getAndAddL( indirect mem, rRegL add, rRegL val, rFlagsReg cr) %{ + // val = *mem & *mem = *mem + add + match(Set val (GetAndAddL mem add)); + effect(KILL cr); + format %{ "xaddL [$mem],$add\t@ getAndAddL" %} + ins_encode %{ + Register base = as_Register($mem$$base); + int disp = $mem$$disp; + Register value = $val$$Register; + Register add = $add$$Register; + Label again; + guarantee($mem$$index == sp->encoding() && $mem$$disp == 0, "impossible encoding getAndAddL"); + SizedScope sc(&_masm, 40); + __ BIND(again); + __ lldl(AT, disp, base); + __ ldi(GP, 1, R0); + __ wr_f(GP); + __ addl( AT, add, GP); + __ align(8); // must align + __ lstl(GP, disp, base); + __ rd_f(GP); + __ beq_l(GP, again); + __ movl(value, AT); + %} + ins_pipe( long_memory_op ); +%} +//----------Subtraction Instructions------------------------------------------- + +// Integer Subtraction Instructions +instruct subI_rReg(rRegI dst, rRegI src1, rRegI src2) +%{ + match(Set dst (SubI src1 src2)); + + format %{ "subw $src1, $src2, $dst\t# int\t@subI_rReg" %} + + ins_encode%{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + + __ subw(src1, src2, dst); + %} + ins_pipe(ialu_regI_regI); +%} + +instruct subI_rReg_imm(rRegI dst, rRegI src1, immU8 src2) +%{ + match(Set dst (SubI src1 src2)); + ins_cost(80); + format %{ "subw $src1, $src2, $dst\t# int\t@subI_rReg_imm" %} + ins_encode%{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + int imm = $src2$$constant; + + __ subw(src1, imm, dst); + %} + ins_pipe(ialu_regI_regI); +%} + +/* memory operands no need for SW64 +instruct subI_rReg_mem(rRegI dst, memory src1, rRegI src2) +%{ + match(Set dst (SubI src2 (LoadI src1))); + + ins_cost(125); + format %{ + "ldw $dst, $src1\t# int\t@subI_rReg_mem\n\t" + "subw $src2, $dst, $dst" + %} + ins_encode%{ + Register dst = $dst$$Register; + Address src1 = $src1$$Address; + Register src2 = $src2$$Register; + __ ldw(dst, src1); + __ subw(src2, dst, dst); + %} + ins_pipe(ialu_regI_regI); +%} + +instruct subI_mem_rReg(memory dst, rRegI src) +%{ + match(Set dst (StoreI dst (SubI (LoadI dst) src))); + + ins_cost(150); + format %{ + "ldw rscratch2_AT, $dst\t# int\t@subI_mem_rReg\n\t" + "subw rscratch2_AT, $src, $dst\n\t" + "stw rscratch2_AT, $dst, rscratch1_GP" + %} + + ins_encode%{ + Address dst = $dst$$Address; + Register src = $src$$Register; + __ ldw(rscratch2_AT, dst); + __ subw(rscratch2_AT, src, rscratch2_AT); + __ stw(rscratch2_AT, dst, rscratch1_GP); + %} + //ins_pipe(ialu_mem_reg); +%} + +instruct subI_mem_imm(memory dst, immI src) +%{ + match(Set dst (StoreI dst (SubI (LoadI dst) src))); + + ins_cost(125); // XXX + format %{ + "ldw rscratch2_AT, $dst\t# int\t@subI_mem_imm\n\t" + "subw rscratch2_AT, $src, rscratch2_AT\n\t" + "stw rscratch2_AT, $dst" + %} + + ins_encode%{ + Address dst = $dst$$Address; + int src = $src$$constant; + __ ldw(rscratch2_AT, dst); + __ subw(rscratch2_AT, src, rscratch2_AT); + __ stw(rscratch2_AT, dst, rscratch1_GP); + %} + //ins_pipe(ialu_mem_imm); +%}*/ + +instruct subL_rReg(rRegL dst, rRegL src1, rRegL src2) +%{ + match(Set dst (SubL src1 src2)); + + format %{ "subl $src1, $src2, $dst\t# long\t@subL_rReg" %} + + ins_encode%{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + __ subl(src1, src2, dst); + %} + ins_pipe(ialu_regL_regL); +%} + +instruct subL_rReg_imm(rRegI dst, rRegI src1, immUL8 src2) +%{ + match(Set dst (SubL src1 src2)); + + format %{"subl $src1, $src2, $dst\t# long\t@subL_rReg_imm" %} + + ins_encode%{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + int src2 = $src2$$constant; + + __ subl(src1, src2, dst); + %} + ins_pipe(ialu_regL_imm); +%} + +/* memory operands no need for SW64 +instruct subL_rReg_mem(rRegL dst, rRegL src1, memory src2) +%{ + match(Set dst (SubL src1 (LoadL src2))); + + ins_cost(125); + format %{ + "ldl rscratch2_AT, $src2\t# long\t@subL_rReg_mem\n\t" + "subl $src1, rscratch2_AT, rscratch2_AT\n\t" + "stl rscratch2_AT, $dst" + %} + + ins_encode%{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Address src2 = $src2$$Address; + __ ldl(rscratch2_AT, src2); + __ subl(src1, rscratch2_AT, dst); + %} + //ins_pipe(ialu_reg_mem); +%} + +instruct subL_mem_rReg(memory dst, rRegL src) +%{ + match(Set dst (StoreL dst (SubL (LoadL dst) src))); + + ins_cost(150); + format %{ + "ldl rscratch2_AT, $dst\t# long\t@subL_mem_rReg\n\t" + "subl rscratch2_AT, $src, rscratch2_AT\n\t" + "stl rscratch2_AT, $dst" + %} + + ins_encode%{ + Address dst = $dst$$Address; + Register src = $src$$Register; + __ ldl(rscratch2_AT, dst); + __ subl(rscratch2_AT, src, rscratch2_AT); + __ stl(rscratch2_AT, dst, rscratch1_GP); + %} + //ins_pipe(ialu_mem_reg); +%} + +instruct subL_mem_imm(memory dst, immL32 src) +%{ + match(Set dst (StoreL dst (SubL (LoadL dst) src))); + + ins_cost(125); // XXX + format %{ + "ldptr rscratch2_AT, $dst\t# long\t@subL_mem_imm\n\t" + "subptr rscratch2_AT, $src, rscratch2_AT\n\t" + "stptr rscratch2_AT, $dst" + %} + + ins_encode%{ + Address dst = $dst$$Address; + int src = $src$$constant; + __ ldl(rscratch2_AT, dst); + __ mov_immediate32s(rscratch1_GP, src); //lsp to check sign-extend?? + __ subl(rscratch2_AT, rscratch1_GP, rscratch2_AT); + __ stl(rscratch2_AT, dst, rscratch1_GP); + %} + //ins_pipe(ialu_mem_imm); +%}*/ + +// Subtract from a pointer +// XXX hmpf??? +instruct subP_rReg(rRegP dst, rRegP src1, rRegI src2, immI0 zero) +%{ + match(Set dst (AddP src1 (SubI zero src2))); + + format %{ "subw R0, $src2, $dst\t# ptr - int\t@subP_rReg\n\t" + "addl $src1, $dst, $dst" + %} + + ins_encode%{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + __ subw(R0, src2, dst); + __ addl(src1, dst, dst); + %} + ins_pipe(ialu_regL_regL); +%} + +instruct negI_rReg(rRegI dst, rRegI src, immI0 zero) +%{ + match(Set dst (SubI zero src)); + + format %{ "subw R0, $src,$dst\t# int\t@negI_rReg" %} + + ins_encode%{ + Register dst = $dst$$Register; + Register src = $src$$Register; + __ subw(R0, src, dst); + %} + ins_pipe(ialu_regI_imm16); +%} + +/* +instruct negI_rReg_2(rRegI dst, rFlagsReg cr) +%{ + match(Set dst (NegI dst)); + effect(KILL cr); + + format %{ "negl $dst\t# int" %} + ins_encode %{ + __ negw($dst$$Register); + %} + ins_pipe(ialu_reg); +%} +*/ + +/* memory operands no need for SW64 +instruct negI_mem(memory dst, immI0 zero) +%{ + match(Set dst (StoreI dst (SubI zero (LoadI dst)))); + + format %{ "ldw rscratch2_AT, $dst\t# int\t@negI_mem\n\t" + "subw R0, rscratch2_AT, rscratch2_AT\n\t" + "stw rscratch2_AT, $dst, rscratch1_GP" + %} + + ins_encode%{ + Address dst = $dst$$Address; + __ ldw(rscratch2_AT, dst); + __ subw(R0, rscratch2_AT, rscratch2_AT); + __ stw(rscratch2_AT, dst, rscratch1_GP); + %} + //ins_pipe(ialu_reg); +%}*/ + +instruct negL_rReg(rRegL dst, rRegL src, immL0 zero) +%{ + match(Set dst (SubL zero src)); + + format %{ "subl R0, $src, $dst\t# long \t@negL_rReg" %} + + ins_encode%{ + Register dst = $dst$$Register; + Register src = $src$$Register; + __ subl(R0, src, dst); + %} + ins_pipe(ialu_regL_imm); +%} + +/* +instruct negL_rReg_2(rRegL dst, rFlagsReg cr) +%{ + match(Set dst (NegL dst)); + effect(KILL cr); + + format %{ "negq $dst\t# int" %} + ins_encode %{ + __ negq($dst$$Register); + %} + ins_pipe(ialu_reg); +%} +*/ + +/* memory operands no need for SW64 +instruct negL_mem(memory dst, immL0 zero) +%{ + match(Set dst (StoreL dst (SubL zero (LoadL dst)))); + + format %{ "ldl rscratch2_AT, $dst\t# long\t@negL_mem\n\t" + "subl R0, rscratch2_AT, rscratch2_AT\n\t" + "stl( rscratch2_AT, $dst, rscratch1_GP" + %} + + ins_encode%{ + Address dst = $dst$$Address; + __ ldl(rscratch2_AT, dst); + __ subl(R0, rscratch2_AT, rscratch2_AT); + __ stl(rscratch2_AT, dst, rscratch1_GP); + %} + //ins_pipe(ialu_reg); +%}*/ + + +//----------Multiplication/Division Instructions------------------------------- +// Integer Multiplication Instructions +// Multiply Register + +instruct mulI_rReg(rRegI dst, rRegI src1, rRegI src2) +%{ + match(Set dst (MulI src1 src2)); + + ins_cost(300); + format %{ "mulw $src1, $src2, $dst\t# int @mulI_rReg" %} + ins_encode %{ + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + Register dst = $dst$$Register; + + __ mulw(src1, src2, dst); + %} + ins_pipe( ialu_mult ); +%} + +instruct mulI_rReg_imm(rRegI dst, rRegI src, immU8 imm) +%{ + match(Set dst (MulI src imm)); + + ins_cost(300); + format %{ "mulw $src, $dst, $dst \t# int @mulI_rReg_imm\n\t" %} + ins_encode %{ + Register src1 = $src$$Register; + int src2 = $imm$$constant; + Register dst = $dst$$Register; + __ mulw(src1, src2, dst); + %} + ins_pipe( ialu_mult_imm ); +%} + +/* memory operands no need for SW64 +instruct mulI_mem(rRegI dst, memory src1, rRegI src2) +%{ + match(Set dst (MulI src2 (LoadI src1))); + + ins_cost(350); + format %{ "ldw $dst, $src1\t# int @mulI_mem \n\t" + "mulw $dst, $src2, $dst" %} + ins_encode %{ + Register dst = $dst$$Register; + Address src1 = $src1$$Address; + Register src2 = $src2$$Register; + __ ldw(dst, src1); + __ mulw(src2, dst, dst); + %} +// ins_pipe(ialu_reg_mem_alu0); +%} + +instruct mulI_mem_imm(rRegI dst, memory src, immI imm) +%{ + match(Set dst (MulI (LoadI src) imm)); + + ins_cost(300); + format %{ "ldw rscratch2_AT, $src, $imm\t# int @mulI_mem_imm \n\t" + "mov_immediate32 rscratch1_GP, $imm\n\t" + "mulw rscratch2_AT, $imm, $dst"%} + ins_encode %{ + Register dst = $dst$$Register; + Address src = $src$$Address; + int val = $imm$$constant; + __ ldw(rscratch2_AT, src); + __ mov_immediate32(rscratch1_GP, val); + __ mulw(rscratch2_AT, rscratch1_GP, dst); + %} +// ins_pipe(ialu_reg_mem_alu0); +%}*/ + +instruct mulL_rReg(rRegL dst, rRegL src1, rRegL src2) +%{ + match(Set dst (MulL src1 src2)); + + ins_cost(300); + format %{ "mull $src1, $src2, $dst\t# long @mulL_rReg" %} + ins_encode %{ + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + Register dst = $dst$$Register; + + __ mull(src1, src2, dst); + %} + ins_pipe(pipe_slow); +%} + +instruct mulL_rReg_imm(rRegL dst, rRegL src, immUL8 imm) +%{ + match(Set dst (MulL src imm)); + + ins_cost(300); + format %{ "mull $src, $imm, $dst \t# long\t@mulL_rReg_imm\n\t" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + int imm = $imm$$constant; + + __ mull(src, imm, dst); + %} + ins_pipe( pipe_slow ); +%} + +/* memory operands no need for SW64 +instruct mulL_mem(rRegL dst, memory src1, rRegL src2) +%{ + match(Set dst (MulL src2 (LoadL src1))); + + ins_cost(350); + format %{ "ldptr $dst, $src1 \t# long\t@mulL_mem\n\t" + "mull $src2, $dst, $dst" %} + ins_encode %{ + Register dst = $dst$$Register; + Address src1 = $src1$$Address; + Register src2 = $src2$$Register; + __ ldptr(dst, src1); + __ mull(src2, dst, dst); + %} +// ins_pipe(ialu_reg_mem_alu0); +%} + +instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm) +%{ + match(Set dst (MulL (LoadL src) imm)); + + ins_cost(300); + format %{ "ldptr $dst, $src\t# long\t@mulL_mem_imm\n\t" + "mov_immediate32 rscratch1_GP, $imm\n\t" + "mull $dst, rscratch1_GP, $dst"%} + ins_encode %{ + Register dst = $dst$$Register; + Address src = $src$$Address; + int val = $imm$$constant; + __ ldptr(dst, src); + __ mov_immediate32s(rscratch1_GP, val); + __ mull(dst, rscratch1_GP, dst); + %} +// ins_pipe(ialu_reg_mem_alu0); +%}*/ + +/*sw have no such instruct +instruct mulHiL_rReg(rdx_RegL dst, no_rax_RegL src, rax_RegL rax)//??todo +%{ + match(Set dst (MulHiL src rax)); + + ins_cost(300); + format %{ "imulq RDX:RAX, RAX, $src\t# mulhi" %} + ins_encode %{ +// (REX_reg_wide(src), OpcP, reg_opc(src)); + %} +// ins_pipe(ialu_reg_reg_alu0); +%} +*/ + +instruct divI_rReg(rRegI dst, rRegI src, rRegI div) +%{ + match(Set dst (DivI src div)); + + ins_cost(30*100+10*100); // XXX + format %{ "divI $src, $div $dst @divI_rReg" %}//TODO: How to represent the logic written below?jx + + ins_encode%{ + Register dst = $dst$$Register; + Register src = $src$$Register; + Register div = $div$$Register; + //__ stop("divI_rReg"); + +// if (UseSW6B) { +// __ divw(src1, src2, dst); +// } else + if (FastIntDiv) { + __ idiv_sw(src, div, dst); + } else { + __ saveTRegisters(); + if(src == A0){ + __ movl(rscratch3, src); + __ movl(A0, div); + __ movl(A1, rscratch3); + }else{ + __ movl(A0, div); + __ movl(A1, src); + } + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::sdiv), 2); + __ movl(pv, V0); + __ restoreTRegisters(); + __ movl(dst, pv); + } + %} + ins_pipe(pipe_slow); +%} + +instruct divL_rReg(rRegL dst, rRegL src, rRegL div) +%{ + match(Set dst (DivL src div)); + + ins_cost(30*100+10*100); // XXX + format %{ "divL $src $div $dst @divL_rReg" %}//TODO: How to represent the logic written below?jx + ins_encode%{ + Register dst = $dst$$Register; + Register src = $src$$Register; + Register div = $div$$Register; +// if (UseSW6B) { +// __ divl(dst, div, dst); +// } else + if (FastLongDiv) { + Label ldiv, exit; + //AT does not need to be saved(in pushad function) before calling + //since it has been defined as NS + __ slll(dst, 0xb, rscratch3); //logically left shift 11-bit + __ sral(rscratch3, 0xb, rscratch3); //arithmetically right shift 11-bit + + // when 1 was put in 53 bit-position, + // the result would be different from the original one + + // which means when the value of op1 is [0xFFE0000000000000, 0x20000000000000], + // the result would be different after slll and sral + // why?jx + __ cmpeq(dst, rscratch3, rscratch3); + + __ bne_l(rscratch3, ldiv); + + __ saveTRegisters(); + if(src == A0){ + __ movl(pv, src); + __ movl(A0, div); + __ movl(A1, pv); + }else{ + __ movl(A0, div); + __ movl(A1, src); + } + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::ldiv), 2); + __ movl(pv, V0); + __ restoreTRegisters(); + __ movl(dst, pv); + __ beq_l(R0, exit); + + __ BIND(ldiv); + __ ldiv_sw(src, div, dst); + + __ BIND(exit); + } else { + __ saveTRegisters(); + if(src == A0){ + __ movl(rscratch3, src); + __ movl(A0, div); + __ movl(A1, rscratch3); + }else{ + __ movl(A0, div); + __ movl(A1, src); + } + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::ldiv), 2); + __ movl(pv, V0); + __ restoreTRegisters(); + __ movl(dst, pv); + } + %} + ins_pipe(pipe_slow); +%} + +/*No need for SW64 +// Author: jx 1.13.2021 +// Integer DIVMOD with Register, both quotient and mod results +// refer to the definition and implementation of idiv in macroAssembler_sw64.hpp +// and irem_sw in macroAssembler_sw64.hpp +// +// dst: dividend +// divisor: div +// dst: quotient +// rem: reminder +// +// dst = dst/div ----rem = reminder +instruct divModI_rReg_divmod(rRegI dst, rRegI rem, rRegI src, rRegI div) +%{ + match(Set dst (DivModI src div)); + + ins_cost(30*100+10*100); // XXX + format %{ "divModI" %} + ins_encode%{ + Register dst = $dst$$Register; + Register rem = $rem$$Register; + Register src = $src$$Register; + Register div = $div$$Register; + Register scratch1 = rscratch1_GP; + Register scratch2 = rscratch2_AT; + + +// if (UseSW6B) { +// __ remw(dst, div, rem); +// __ idivw(dst, div, dst); +// } else + if (FastIntRem) { + __ irem_sw(src, div, rem); + __ idiv_sw(src, div, dst); + } else { + // In this section, srem and sdiv will be invoked, and their declaration define + // the second parameter is dividend and the first one is divisor + // like this: result = the second one % the first one + // therefore, A0 stores the div and A1 stores the dst, which acts as dividend + + __ pushad(); //save all the values of registers before calculating + + __ movl(A0, div); + if(src==A0){//ps: the judegement here is to find out whether the registers are the same one, not the value + // load the value of A0, which is before the invocation, into A1 + __ ldl(A1, 128, scratch1);//refer to pushad in macroAssembler_sw64.cpp + } + else{ + __ movl(A1, src); + } + + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::sdiv), 2); + __ movl(scratch1, V0); + + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::srem), 2); + __ movl(scratch2, V0); + + __ popad(); + __ movws(rem, scratch2); + __ movws(dst, scratch1); + } + %} + ins_pipe(pipe_slow); +%} + +// Long DIVMOD with Register, both quotient and mod results +instruct divModL_rReg_divmod(rRegL dst, rRegL rem, rRegL src, rRegL div) +%{ + match(Set dst (DivModL src div)); + + ins_cost(30*100+10*100); // XXX + format %{ "divModL" %} + ins_encode%{ + Register dst = $dst$$Register; + Register rem = $rem$$Register; + Register src = $src$$Register; + Register div = $div$$Register; + Register scratch1 = rscratch1_GP; + Register scratch2 = rscratch2_AT; + + +// if (UseSW6B) { +// __ remw(dst, div, rem); +// __ idivw(dst, div, dst); +// } else + if (FastIntRem) { + __ lrem_sw(src, div, rem); + __ ldiv_sw(src, div, dst); + } else { + // In this section, srem and sdiv will be invoked, and their declaration define + // the second parameter is dividend and the first one is divisor + // like this: result = the second one % the first one + // therefore, A0 stores the div and A1 stores the dst, which acts as dividend + + __ pushad(); //save all the values of registers before calculating + + __ movl(A0, div); + if(src==A0){//ps: the judegement here is to find out whether the registers are the same one, not the value + // load the value of A0, which is before the invocation, into A1 + __ ldl(A1, 128, scratch1);//refer to pushad in macroAssembler_sw64.cpp + } + else{ + __ movl(A1, src); + } + + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::ldiv), 2); + __ movl(scratch1, V0); + + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::lrem), 2); + __ movl(scratch2, V0); + + __ popad(); + __ movl(rem, scratch2); + __ movl(dst, scratch1); + } + %} + //ins_pipe(pipe_slow); +%} + +//----------- DivL-By-Constant-Expansions-------------------------------------- +// DivI cases are handled by the compiler + +// Magic constant, reciprocal of 10 + +instruct loadConL_0x6666666666666667(rRegL dst) +%{ + format %{ "movq $dst, #0x666666666666667\t# Used in div-by-10" %} + ins_encode%{ + Register dst = $dst$$Register; + __ mov_immediate64(dst, 0x666666666666667); + %} + //ins_pipe(ialu_reg); +%} + +//instruct mul_hi(rRegL dst, rRegL src, rRegL rax) +//%{ +// +// format %{ "imulq rdx:rax, rax, $src\t# Used in div-by-10" %} +// opcode(0xF7, 0x5); +// ins_encode(REX_reg_wide(src), OpcP, reg_opc(src)); +// ins_pipe(ialu_reg_reg_alu0); +//%} + +//instruct sarL_rReg_63(rRegL dst, rFlagsReg cr) +//%{ +// effect(USE_DEF dst, KILL cr); +// +// format %{ "sarq $dst, #63\t# Used in div-by-10" %} +// opcode(0xC1, 0x7); +// ins_encode(reg_opc_imm_wide(dst, 0x3F)); +// ins_pipe(ialu_reg); +//%} +// +//instruct sarL_rReg_2(rRegL dst, rFlagsReg cr) +//%{ +// effect(USE_DEF dst, KILL cr); +// +// format %{ "sarq $dst, #2\t# Used in div-by-10" %} +// opcode(0xC1, 0x7); +// ins_encode(reg_opc_imm_wide(dst, 0x2)); +// ins_pipe(ialu_reg); +//%} + +instruct divL_10(rRegL dst, rRegL src, immL10 div) +%{ + match(Set dst (DivL src div)); + + ins_cost((5+8)*100); + ins_encode %{ + __ mov_immediate64(rscratch2_AT, 10); + __ ldiv_sw(src, rscratch2_AT, dst); + %} +%} + */ + +//----------------------------------------------------------------------------- + +instruct modI_rReg(rRegI dst, rRegI src1, rRegI src2) +%{ + match(Set dst (ModI src1 src2)); + + ins_cost(300); // XXX + format %{ "modi $src1, $src2, $dst @ modI_rReg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; +// __ stop("modI_rReg"); + +// if (UseSW6B) { +// __ remw(src1, src2, dst); +// } else + if (FastIntRem) { + __ irem_sw(src1, src2, dst); + } else { + __ saveTRegisters(); + if(src1 == A0){ + __ movl(pv, src1); + __ movl(A0, src2); + __ movl(A1, pv); + }else{ + __ movl(A0, src2); + __ movl(A1, src1); + } + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::srem), 2); + __ movl(pv, V0); + __ restoreTRegisters(); + __ movl(dst, pv); + } + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct modL_rReg(rRegL dst, rRegL src1, rRegL src2) +%{ + match(Set dst (ModL src1 src2)); + + ins_cost(300); // XXX + format %{ "modL $src1, $src2, $dst\t@modL_rReg" %} + ins_encode %{ + Register dst = as_Register($dst$$reg); + Register src1 = as_Register($src1$$reg); + Register src2 = as_Register($src2$$reg); + +// if (UseSW6B) { +// __ reml(src1, src2, dst); +// } else + if (FastLongRem) { + Label lrem, exit; +// Register tem = operand; + + __ slll(src1, 0xb, rscratch3); + __ sral(rscratch3, 0xb, rscratch3); + __ cmpeq(src1, rscratch3, rscratch3); + __ bne_l(rscratch3, lrem); + + __ saveTRegisters(); + if(src1 == A0){ + __ movl(rscratch3, src1); + __ movl(A0, src2); + __ movl(A1, rscratch3); + }else{ + __ movl(A0, src2); + __ movl(A1, src1); + } + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::lrem), 2); + __ movl(pv, V0); + __ restoreTRegisters(); + __ movl(dst, pv); + __ beq_l(R0, exit); + + __ BIND(lrem); + __ lrem_sw(src1, src2, dst); + + __ BIND(exit); + } else { + __ saveTRegisters(); + if(src1 == A0){ + __ movl(rscratch3, src1); + __ movl(A0, src2); + __ movl(A1, rscratch3); + }else{ + __ movl(A0, src2); + __ movl(A1, src1); + } + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::lrem), 2); + __ movl(pv, V0); + __ restoreTRegisters(); + __ movl(dst, pv); + } + %} + ins_pipe( pipe_slow ); +%} + +/*No need for SW64 +// Integer Shift Instructions +// Shift Left by one +instruct salI_rReg_1(rRegI dst, rRegI src, immI1 shift) +%{ + match(Set dst (LShiftI src shift)); + + format %{ "slll $src, #1, $dst\t# @salI_rReg_1" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + + __ slll(src, 1, dst); + __ addw(dst, R0, dst);//lsp to check ok?? + + %} + ins_pipe(ialu_regI_imm); +%}*/ + +/* memory operands no need for SW64 +// Shift Left by one +instruct salI_mem_1(memory dst, immI1 shift) +%{ + match(Set dst (StoreI dst (LShiftI (LoadI dst) shift))); + + format %{ "ldw rscratch2_AT, $dst\t# @salI_mem_1\n\t" + "slll rscratch2_AT, #1, rscratch2_AT\n\t" + "stw rscratch2_AT, $dst" %} + + ins_encode %{ + Address dst = $dst$$Address; + __ ldw(rscratch2_AT, dst); + __ slll(rscratch2_AT, 1, rscratch2_AT); + __ stw(rscratch2_AT, dst, rscratch1_GP); + %} + //ins_pipe(ialu_mem_imm); +%}*/ + +// Shift Left by 8-bit immediate +instruct salI_rReg_imm(rRegI dst, rRegI src, immU8 shift) +%{ + match(Set dst (LShiftI src shift)); + ins_cost(80); + format %{ "slll $src, $shift�x1f, $dst\t# @salI_rReg_imm\n\t" + "addw $dst, #0, $dst" %} + + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + int shamt = $shift$$constant; + + __ slll(src, shamt&0x1f, dst); + __ addw(dst, 0, dst); + %} + ins_pipe(ialu_regI_imm16); +%} + +/* memory operands no need for SW64 +// Shift Left by 8-bit immediate +instruct salI_mem_imm(memory dst, immI8 shift) +%{ + match(Set dst (StoreI dst (LShiftI (LoadI dst) shift))); + + format %{ "ldw rscratch2_AT, $dst\t# @salI_mem_imm\n\t" + "slll rscratch2_AT, $shift�x1f, rscratch2_AT\n\t" + "stw rscratch2_AT, $dst, rscratch1_GP" %}//?shift + + ins_encode %{ + Address dst = $dst$$Address; + int shamt = $shift$$constant; + __ ldw(rscratch2_AT, dst); + __ slll(rscratch2_AT, shamt&0x1f, rscratch2_AT); + __ stw(rscratch2_AT, dst, rscratch1_GP); + %} + //ins_pipe(ialu_mem_imm); +%} +*/ +// Shift Left by variable//sny reg_reg +instruct salI_rReg_CL(rRegI dst, rRegI src, rRegI shift) +%{ + match(Set dst (LShiftI src shift)); + + format %{ + "and_ins $shift, #0x1f, rscratch3\t #@salI_rReg_CL\n\t" + "slll $src, rscratch3, $dst\n\t" + "movws $dst, $dst" + %}//?shift + + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + Register shamt = $shift$$Register; + if (UseSW6B) { + __ sllw(src, shamt, dst); + } else { + __ and_ins(shamt, 0x1f, rscratch3);//31(0x1f) + __ slll(src, rscratch3, dst); + __ movws(dst, dst);// Do we need this operation?jx lsp?? + } + %} + ins_pipe(ialu_regI_regI); +%} +/* memory operands no need for SW64 +// Shift Left by variable +instruct salI_mem_CL(memory dst, rRegI shift) +%{ + match(Set dst (StoreI dst (LShiftI (LoadI dst) shift))); + + format %{ + "ldw rscratch2_AT, $dst\t #@salI_mem_CL\n\t" + "and_ins $shift, 0x1f, $shift\n\t" + "slll rscratch2_AT, $shift, rscratch2_AT\n\t" + "stw rscratch2_AT, $dst, rscratch1_GP" + %} + + ins_encode %{ + Register shamt = $shift$$Register; + Address dst = $dst$$Address; + __ ldw(rscratch2_AT, dst); + __ and_ins(shamt, 0x1f, shamt); + __ slll(rscratch2_AT, shamt, rscratch2_AT); + __ stw(rscratch2_AT, dst, rscratch1_GP); + %} + //ins_pipe(ialu_mem_reg); +%} +*/ + +/* no need for SW64 +// Arithmetic shift right by one +instruct sarI_rReg_1(rRegI dst, rRegI src, immI1 shift) +%{ + match(Set dst (RShiftI src shift)); + + format %{ "sral $src, #1, $dst\t #@sarI_rReg_1" %} + + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + + __ sral(src, 1, dst); + %} + ins_pipe(ialu_regI_imm); +%}*/ + +/* memory operands no need for SW64 +// Arithmetic shift right by one +instruct sarI_mem_1(memory dst, immI1 shift) +%{ + match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); + + format %{ + "ldw rscratch2_AT, $dst\t #@sarI_mem_1\n\t" + "sral rscratch2_AT, #1, rscratch2_AT\n\t" + "stw rscratch2_AT, $dst, rscratch1_GP" + %} + + ins_encode %{ + Address dst = $dst$$Address; + + __ ldw(rscratch2_AT, dst); + __ sral(rscratch2_AT, 1, rscratch2_AT); + __ stw(rscratch2_AT, dst, rscratch1_GP); + %} + //ins_pipe(ialu_mem_imm); +%}*/ + +// Arithmetic Shift Right by 8-bit immediate +instruct sarI_rReg_imm(rRegI dst, rRegI src, immU8 shift) +%{ + match(Set dst (RShiftI src shift)); + + format %{ + "sral $src, $shift&0x1f, $dst\t #@sarI_rReg_imm" + %} + + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + int shamt = $shift$$constant; + __ sral(src, shamt&0x1f, dst); + %} + ins_pipe(ialu_regI_imm16); +%} + +/* memory operands no need for SW64 +// Arithmetic Shift Right by 8-bit immediate +instruct sarI_mem_imm(memory dst, immI8 shift) +%{ + match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); + + format %{ + "ldw rscratch2_AT, $dst\t #@sarI_mem_imm\n\t" + "sral rscratch2_AT, $shift&0x1f, rscratch2_AT\n\t" + "stw rscratch2_AT, $dst, rscratch1_GP" + %} + + ins_encode %{ + Address dst = $dst$$Address; + int shamt = $shift$$constant; + __ ldw(rscratch2_AT, dst); + __ sral(rscratch2_AT, shamt&0x1f, rscratch2_AT); + __ stw(rscratch2_AT, dst, rscratch1_GP); + %} + //ins_pipe(ialu_mem_imm); +%}*/ + +// Arithmetic Shift Right by variable +instruct sarI_rReg_CL(rRegI dst, rRegI src, rRegI shift) +%{ + match(Set dst (RShiftI src shift)); + + format %{ + "and_ins $shift, #31, rscratch3\t #@sarI_rReg_CL\n\t\t" + "sral $src, rscratch3, $dst" + %} + + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + Register shamt = $shift$$Register; + __ and_ins(shamt, 0x1f, rscratch3); + __ sral(src, rscratch3, dst); + %} + ins_pipe(ialu_regI_regI); +%} + +/* memory operands no need for SW64 +// Arithmetic Shift Right by variable +instruct sarI_mem_CL(memory dst, rRegI shift) +%{ + match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); + + format %{ + "ldw rscratch2_AT, $dst\t #@sarI_mem_CL\n\t" + "and_ins $shift, #31, $shift\n\t" + "sral rscratch2_AT, $shift, rscratch2_AT\n\t" + "stw rscratch2_AT, $dst, rscratch1_GP" + %} + + ins_encode %{ + Register shamt = $shift$$Register; + Address dst = $dst$$Address; + __ ldw(rscratch2_AT, dst); + __ and_ins(shamt, 31, shamt); + __ sral(rscratch2_AT, shamt, rscratch2_AT); + __ stw(rscratch2_AT, dst, rscratch1_GP); + %} + //ins_pipe(ialu_mem_reg); +%}*/ + +/* no need for SW54 +// Logical shift right by one +instruct shrI_rReg_1(rRegI dst, rRegI src, immI1 shift) +%{ + match(Set dst (URShiftI src shift)); + + format %{ "srll $src, #1, $dst\t #@shrI_rReg_1" %} + + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + __ srll(src, 1, dst); + %} + ins_pipe(ialu_regI_imm); +%}*/ + +/* memory operands no need for SW64 +// Logical shift right by one +instruct shrI_mem_1(memory dst, immI1 shift) +%{ + match(Set dst (StoreI dst (URShiftI (LoadI dst) shift))); + + format %{ "ldw rscratch2_AT, $dst\t #@shrI_mem_1\n\t" + "srll rscratch2_AT, #1, rscratch2_AT\n\t" + "stw rscratch2_AT, $dst, rscratch1_GP" %} + + ins_encode %{ + Address dst = $dst$$Address; + __ ldw(rscratch2_AT, dst); + __ srll(rscratch2_AT, 0x1, rscratch2_AT); + __ stw(rscratch2_AT, dst, rscratch1_GP); + %} + //ins_pipe(ialu_mem_imm); +%}*/ + +// Logical Shift Right by 8-bit immediate +instruct shrI_rReg_imm(rRegI dst, rRegI src, immU8 shift) +%{ + match(Set dst (URShiftI src shift)); + + format %{ "srll $src, $shift�x1f, $dst\t #@shrI_rReg_imm" %} + + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + int shamt = $shift$$constant; + __ zapnot(src, 0xf, dst); + __ srll(dst, shamt&0x1f, dst); + __ addw(dst, 0x0, dst); //need to CHECK lsp + %} + ins_pipe(ialu_regI_imm16); +%} + +/* memory operands no need for SW64 +// Logical Shift Right by 8-bit immediate +instruct shrI_mem_imm(memory dst, immI8 shift) +%{ + match(Set dst (StoreI dst (URShiftI (LoadI dst) shift))); + + format %{ "ldw rscratch2_AT, $dst\t #@shrI_mem_imm\n\t" + "srll rscratch2_AT, $shift�x1f, rscratch2_AT\n\t" + "stw rscratch2_AT, $dst, rscratch1_GP" %} + + ins_encode %{ + Address dst = $dst$$Address; + int shamt = $shift$$constant; + __ ldw(rscratch2_AT, dst); + __ srll(rscratch2_AT, shamt&0x1f, rscratch2_AT); + __ stw(rscratch2_AT, dst, rscratch1_GP); + %} + //ins_pipe(ialu_mem_imm); +%}*/ + +// Logical Shift Right by variable +instruct shrI_rReg_CL(rRegI dst, rRegI src, rRegI shift) +%{ + match(Set dst (URShiftI src shift)); + + format %{ + "and_ins $shift, 0x1f, rscratch3\t #@shrI_rReg_CL\n\t\t" + "movwu $dst, $src\n\t\t" + "srll $dst, rscratch3, $dst\n\t\t" + "movws $dst, $dst" + %} + + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + Register shamt = $shift$$Register; + __ and_ins(shamt, 0x1f, rscratch3); + __ movwu(dst, src); //need to Check lsp + __ srll(dst, rscratch3, dst); + __ movws(dst, dst); + + %} + ins_pipe(ialu_regI_regI); +%} + +/* memory operands no need for SW64 +// Logical Shift Right by variable +instruct shrI_mem_CL(memory dst, rRegI shift) +%{ + match(Set dst (StoreI dst (URShiftI (LoadI dst) shift))); + + format %{ "shrl #@shrI_mem_CL" %} + + ins_encode %{ + Register shamt = $shift$$Register; + Address dst = $dst$$Address; + __ ldw(rscratch2_AT, dst); + __ and_ins(shamt, 0x1f, shamt); + __ srll(rscratch2_AT, shamt, rscratch2_AT); + __ stw(rscratch2_AT, dst, rscratch1_GP); + %} + //ins_pipe(ialu_mem_reg); +%}*/ + +/* No need for SW64 +// Long Shift Instructions +// Shift Left by one +instruct salL_rReg_1(rRegL dst, rRegL src, immI1 shift) +%{ + match(Set dst (LShiftL src shift)); + + format %{ "slll $src, $shift, $dst\t #@salL_rReg_1 " %} + + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + __ slll(src, 1, dst); + %} + ins_pipe(ialu_regL_imm); +%}*/ + +/* memory operands no need for SW64 +// Shift Left by one +instruct salL_mem_1(memory dst, immI1 shift) +%{ + match(Set dst (StoreL dst (LShiftL (LoadL dst) shift))); + + format %{ "salq #@salL_mem_1" %} + + ins_encode %{ + Address dst = $dst$$Address; + __ ldl(rscratch2_AT, dst); + __ slll(rscratch2_AT, 1, rscratch2_AT); + __ stl(rscratch2_AT, dst, rscratch1_GP); + %} + //ins_pipe(ialu_mem_imm); +%}*/ + +// Shift Left by 8-bit immediate +instruct salL_rReg_imm(rRegL dst, rRegL src, immU8 shift) +%{ + match(Set dst (LShiftL src shift)); + ins_cost(80); + format %{ "slll $src, $shift, $dst #@salL_rReg_imm" %} + + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + int shamt = $shift$$constant; + + __ slll(src, shamt&0x3f, dst); + %} + ins_pipe(ialu_regL_imm); +%} + +/* memory operands no need for SW64 +// Shift Left by 8-bit immediate +instruct salL_mem_imm(memory dst, immI8 shift) +%{ + match(Set dst (StoreL dst (LShiftL (LoadL dst) shift))); + + format %{ "salq #@salL_mem_imm" %} + + ins_encode %{ + Address dst = $dst$$Address; + int shamt = $shift$$constant; + __ ldl(rscratch2_AT, dst); + __ slll(rscratch2_AT, shamt&0x3f, rscratch2_AT); + __ stl(rscratch2_AT, dst, rscratch1_GP); + %} + //ins_pipe(ialu_mem_imm); +%}*/ + +// Shift Left by variable +instruct salL_rReg_CL(rRegL dst, rRegL src, rRegI shift) +%{ + match(Set dst (LShiftL src shift)); + ins_cost(80); + format %{ "slll $src $shift, $dst #@salL_rReg_CL" %} + + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + Register shamt = $shift$$Register; + //__ and_ins(shamt, 0x3f, shamt); + __ slll(src, shamt, dst); + %} + ins_pipe(ialu_regI_imm16); +%} + +/* memory operands no need for SW64 +// Shift Left by variable +instruct salL_mem_CL(memory dst, rRegI shift) +%{ + match(Set dst (StoreL dst (LShiftL (LoadL dst) shift))); + + format %{ "salq #@salL_mem_CL" %} + + ins_encode %{ + Register shamt = $shift$$Register; + Address dst = $dst$$Address; + __ ldl(rscratch2_AT, dst); + __ and_ins(shamt, 0x3f, shamt); + __ slll(rscratch2_AT, shamt, rscratch2_AT); + __ stl(rscratch2_AT, dst, rscratch1_GP); + %} + //ins_pipe(ialu_mem_reg); +%} +// No need for SW64 +// Arithmetic shift right by one +instruct sarL_rReg_1(rRegL dst, rRegL src, immI1 shift) +%{ + match(Set dst (RShiftL src shift)); + + format %{ "sral $src, #1, $dst\t# long\t@sarL_rReg_1" %} + + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + __ sral(src, 1, dst); + %} + ins_pipe(ialu_regL_imm); +%} + +// memory operands no need for SW64 +// Arithmetic shift right by one +instruct sarL_mem_1(memory dst, immI1 shift) +%{ + match(Set dst (StoreL dst (RShiftL (LoadL dst) shift))); + + + format %{ + "ldl rscratch2_AT, $dst\t# long\t@sarL_mem_1\n\t" + "sral rscratch2_AT, #1, rscratch2_AT\n\t" + "stl rscratch2_AT, $dst, rscratch1_GP" + %} + + ins_encode %{ + Address dst = $dst$$Address; + + __ ldl(rscratch2_AT, dst); + __ sral(rscratch2_AT, 1, rscratch2_AT); + __ stl(rscratch2_AT, dst, rscratch1_GP); + %} + //ins_pipe(ialu_mem_imm); +%}*/ + +// Arithmetic Shift Right by 8-bit immediate +instruct sarL_rReg_imm(rRegL dst, rRegL src, immU8 shift) +%{ + match(Set dst (RShiftL src shift)); + ins_cost(80); + format %{ "sral $src, $shift, $dst\t# long\t@sarL_rReg_imm" %} + + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + int value = $shift$$constant; + + __ sral(src, value, dst); + %} + ins_pipe(ialu_regL_imm); +%} + +/* memory operands no need for SW64 +// Arithmetic Shift Right by 8-bit immediate +instruct sarL_mem_imm(memory dst, immI8 shift) +%{ + match(Set dst (StoreL dst (RShiftL (LoadL dst) shift))); + + + format %{ + "ldl rscratch2_AT, $dst\t# long\t@sarL_mem_imm\n\t" + "sral rscratch2_AT, $shift, rscratch2_AT\n\t" + "stl rscratch2_AT, $dst, rscratch1_GP" + %} + + ins_encode %{ + Address dst = $dst$$Address; + int value = $shift$$constant; + + __ ldl(rscratch2_AT, dst); + __ sral(rscratch2_AT, value, rscratch2_AT); + __ stl(rscratch2_AT, dst, rscratch1_GP); + %} + //ins_pipe(ialu_mem_imm); +%}*/ + +// Arithmetic Shift Right by variable +instruct sarL_rReg_CL(rRegL dst, rRegL src, rRegI shift) +%{ + match(Set dst (RShiftL src shift)); + + format %{ "sral $src, $shift, $dst\t# long\t@sarL_rReg_CL" %} + + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + Register shift = $shift$$Register; + + __ sral(src, shift, dst); + %} + ins_pipe(ialu_regL_regL); +%} + +/* memory operands no need for SW64 +// Arithmetic Shift Right by variable +instruct sarL_mem_CL(memory dst, rRegI shift) +%{ + match(Set dst (StoreL dst (RShiftL (LoadL dst) shift))); + + + format %{ + "ldl rscratch2_AT, $dst\t# long\t@sarL_mem_CL\n\t" + "sral rscratch2_AT, $shift, rscratch2_AT\n\t" + "stl rscratch2_AT, $dst, rscratch1_GP" + %} + + ins_encode %{ + Address dst = $dst$$Address; + Register shift = $shift$$Register; + + __ ldl(rscratch2_AT, dst); + __ sral(rscratch2_AT, shift, rscratch2_AT); + __ stl(rscratch2_AT, dst, rscratch1_GP); + %} + //ins_pipe(ialu_mem_reg); +%} + +// No need for SW64 +// Logical shift right by one +instruct shrL_rReg_1(rRegL dst, rRegL src, immI1 shift) +%{ + match(Set dst (URShiftL src shift)); + + format %{ "srll $src, #1, $dst\t# long\t@shrL_rReg_1\n\t" %} + + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + + __ srll(src, 1, dst); + %} + ins_pipe(ialu_regL_imm); +%} + +// memory operands no need for SW64 +// Logical shift right by one +instruct shrL_mem_1(memory dst, immI1 shift) +%{ + match(Set dst (StoreL dst (URShiftL (LoadL dst) shift))); + + + format %{ + "ldl rscratch2_AT, $dst\t# long\t@shrL_mem_1\n\t" + "srll rscratch2_AT, #1, rscratch2_AT\n\t" + "stl rscratch2_AT, $dst, rscratch1_GP" + %} + + ins_encode %{ + Address dst = $dst$$Address; + + __ ldl(rscratch2_AT, dst); + __ srll(rscratch2_AT, 1, rscratch2_AT); + __ stl(rscratch2_AT, dst, rscratch1_GP); + %} + //ins_pipe(ialu_mem_imm); +%}*/ + +// Logical Shift Right by 8-bit immediate +instruct shrL_rReg_imm(rRegL dst, rRegL src, immU8 shift) +%{ + match(Set dst (URShiftL src shift)); + ins_cost(80); + format %{ "srll $src, $shift�x3f, $dst\t# long\t@shrL_rReg_imm" %} + + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + int shamt = $shift$$constant; + __ srll(src, shamt&0x3f, dst); + %} + ins_pipe(ialu_regL_imm); +%} + +/* memory operands no need for SW64 +// Logical Shift Right by 8-bit immediate +instruct shrL_mem_imm(memory dst, immI8 shift) +%{ + match(Set dst (StoreL dst (URShiftL (LoadL dst) shift))); + + format %{ + "ldl rscratch2_AT, $dst\t# long\t@shrL_mem_imm\n\t" + "srll rscratch2_AT, $shift�x3f, rscratch2_AT\n\t" + "stl rscratch2_AT, $dst, rscratch1_GP" + %} + + ins_encode %{ + Address dst = $dst$$Address; + int shamt = $shift$$constant; + __ ldl(rscratch2_AT, dst); + __ srll(rscratch2_AT, shamt&0x3f, rscratch2_AT); + __ stl(rscratch2_AT, dst, rscratch1_GP); + %} + //ins_pipe(ialu_mem_imm); +%}*/ + +// Logical Shift Right by variable +instruct shrL_rReg_CL(rRegL dst, rRegL src, rRegI shift) +%{ + match(Set dst (URShiftL src shift)); + + format %{ + "srll $src, $shift, $dst\t# long\t@shrL_rReg_CL" + %} + + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + Register shift = $shift$$Register; + + //__ and_ins(shift, 0x3f, shift);TODO: + __ srll(src, shift, dst); + %} + ins_pipe(ialu_regL_regL); +%} + +/* memory operands no need for SW64 +// Logical Shift Right by variable +instruct shrL_mem_CL(memory dst, rRegI shift) +%{ + match(Set dst (StoreL dst (URShiftL (LoadL dst) shift))); + + + format %{ + "ldl rscratch2_AT, $dst\t# long\t@shrL_mem_CL\n\t" + "and_ins $shift, #0x3f, $shift\n\t" + "srll rscratch2_AT, $shift, rscratch2_AT\n\t" + "stl rscratch2_AT, $dst, rscratch1_GP" + %} + + ins_encode %{ + Address dst = $dst$$Address; + Register shift = $shift$$Register; + + __ ldl(rscratch2_AT, dst); + __ and_ins(shift, 0x3f, shift); + __ srll(rscratch2_AT, shift, rscratch2_AT); + __ stl(rscratch2_AT, dst, rscratch1_GP); + %} + //ins_pipe(ialu_mem_reg); +%}*/ + + +// Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. +// This idiom is used by the compiler for the i2b bytecode. +instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour) +%{ + match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); + + format %{ "sextb $src, $dst\t#@i2b" %} + + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + + __ sextb(src, dst); + %} + ins_pipe(ialu_regI_regI); +%} + +// Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. +// This idiom is used by the compiler the i2s bytecode. +instruct i2s(rRegI dst, rRegI src, immI_16 sixteen) +%{ + match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); + + format %{ "sexth $src, $dst\t#@i2s" %} + + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + + __ sexth(src, dst); + %} + ins_pipe(ialu_regI_regI); +%} + +/* Rotate shift No need for SW64 ?? +// ROL/ROR instructions + +// ROL expand +instruct rolI_rReg_imm1(rRegI dst, immI1 lshift, immI_M1 rshift) %{ + match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift))); + format %{ "roll #@rolI_rReg_imm1" %} + + ins_encode %{ + Register dst = $dst$$Register; +// int lshift = $lshift$$constant; +// int rshift = $rshift$$constant; + __ slll(dst, 0x1, rscratch2_AT); + __ srll(dst, 0x1F, rscratch1_GP); + __ or_ins(rscratch2_AT, rscratch1_GP, dst); + + %} + //ins_pipe(ialu_reg); +%} + +/*--x86 does not provide any match rule, compiling error---*/ +/* +// Rotate Left by 8-bit immediate +instruct rolI_rReg_i8(rRegI dst, immI8 lshift, immI8 rshift) +%{ + predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); + match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift))); + format %{ "roll #@rolI_rReg_i8" %} + + ins_encode %{ + Register dst = $dst$$Register; + int lshift = $lshift$$constant; + int rshift = $rshift$$constant; + __ slll(dst, lshift, rscratch2_AT); + __ srll(dst, rshift, rscratch1_GP); + __ or_ins(rscratch2_AT, rscratch1_GP, dst); + + %} + //ins_pipe(ialu_reg); +%} + +instruct rolI_rReg_CL(rRegI dst, rRegI shift, immI0 zero) +%{ + match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift)))); + format %{ "roll #@rolI_rReg_CL" %} + +// opcode(0xD3, 0x0); +// ins_encode(REX_reg(dst), OpcP, reg_opc(dst)); + + ins_encode %{ + Register dst = $dst$$Register; + Register shift = $shift$$Register; + __ slll(dst, shift, rscratch2_AT); + __ mov_immediate32(rscratch1_GP, 0x20); + __ subl(rscratch1_GP, shift, shift); + __ srll(dst, shift, rscratch1_GP); + __ or_ins(rscratch2_AT, rscratch1_GP, dst); + %} + //ins_pipe(ialu_reg_reg); +%}*/ +// end of ROL expand + + +// ROR expand +/*---x86 does not provide any match rule, compiling error---*/ +/* + +// Rotate Right by 8-bit immediate +instruct rorI_rReg_i8(rRegI dst, immI8 rshift, immI8 lshift) +%{ + predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); + match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift))); + format %{ "rorl #@rorI_rReg_i8" %} + ins_encode %{ + Register dst = $dst$$Register; + int rshift = $rshift$$constant; + int lshift = $lshift$$constant; + __ srll(dst, rshift, rscratch2_AT); + __ slll(dst, lshift, rscratch1_GP); + __ or_ins(rscratch2_AT, rscratch1_GP, dst); + %} + //ins_pipe(ialu_reg); +%} +// +instruct rorI_rReg_Var_C0(rRegI dst, rRegI shift, immI0 zero) +%{ + match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift)))); + format %{ "rorl $dst, $shift" %} + ins_encode %{ + Register dst = $dst$$Register; + Register shift = $shift$$Register; + __ srll(dst, shift, rscratch2_AT); + __ mov_immediate32(rscratch1_GP, 0x20); + __ subl(rscratch1_GP, shift, shift); + __ slll(dst, shift, rscratch1_GP); + __ or_ins(rscratch2_AT, rscratch1_GP, dst); + %} + //ins_pipe(ialu_reg_reg); +%} +// end of ROR expand + +// Rotate Right by one +//instruct rorI_rReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift) +//%{ +// match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift))); +// +// expand %{ +// //rorI_rReg_imm1(dst, cr); +// %} +//%} + +// Rotate Right by 8-bit immediate +//instruct rorI_rReg_i8(rRegI dst, immI8 rshift, immI8 lshift) +//%{ +// predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); +// match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift))); +// +// expand %{ +// //rorI_rReg_imm8(dst, rshift, cr); +// %} +//%} + +// Rotate Right by variable +//instruct rorI_rReg_Var_C0(rRegI dst, rRegI shift, immI0 zero) +//%{ +// match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift)))); +// +// expand %{ +// //rorI_rReg_CL(dst, shift, cr); +// %} +//%} + +// Rotate Right by variable +//instruct rorI_rReg_Var_C32(rRegI dst, rRegI shift, immI_32 c32) +//%{ +// match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift)))); +// +// expand %{ +// //rorI_rReg_CL(dst, shift, cr); +// %} +//%} + + +// for long rotate +// ROL expand +instruct rolL_rReg_imm1(rRegL dst) %{ + + format %{ + "slll $dst, 1, rscratch2_AT\t#long\t@rolL_rReg_imm1\n\t" + "srll $dst, 63, rscratch1_GP\n\t" + "or_ins rscratch2_AT, rscratch1_GP, $dst" + %} + + ins_encode %{ + Register dst = $dst$$Register; + + __ slll(dst, 1, rscratch2_AT); + __ srll(dst, 63, rscratch1_GP); + __ or_ins(rscratch2_AT, rscratch1_GP, dst); + + %} + //ins_pipe(ialu_reg); +%} +instruct rolL_rReg_i8(rRegL dst, immI8 lshift, immI8 rshift) +%{ + predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f)); + match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift))); +//instruct rolL_rReg_imm8(rRegL dst, immI8 shift) %{ + format %{ + "slll $dst, $lshift, rscratch2_AT\t#long\t@rolL_rReg_imm1\n\t" + "srll $dst, #64-$lshift, rscratch1_GP\n\t" + "or_ins rscratch2_AT, rscratch1_GP, $dst" + %} + + ins_encode %{ + Register dst = $dst$$Register; + int shift = $lshift$$constant; + + __ slll(dst, shift&0x1f, rscratch2_AT); + __ srll(dst, 64-shift, rscratch1_GP); + __ or_ins(rscratch2_AT, rscratch1_GP, dst); + + %} + //ins_pipe(ialu_reg); +%} + +instruct rolL_rReg_CL(rRegL dst, rRegI shift) %{ +// format %{ +// "andw $shift, #0x1f, $shift\t#long\t@rolL_rReg_CL\n\t" +// "mov_immediate32 rscratch2_AT, #64\n\t" +// "subw rscratch2_AT, $shift, rscratch1_GP\n\t" +// "mov_immediate64 rscratch2_AT, 0xffffffffffffffff\n\t" +// "slll rscratch2_AT, rscratch1_GP, rscratch2_AT\n\t" +// "and_ins $src, rscratch2_AT, rscratch2_AT\n\t" +// "srll rscratch2_AT, rscratch1_GP, rscratch2_AT\n\t" +// "slll $src, $shift, $dst\n\t" +// "or_ins $dst, rscratch2_AT, $dst" +// %} + format %{ "rolL_rReg_CL\t#@rolL_rReg_CL\n\t" %} + ins_encode %{ + Register dst = $dst$$Register; + Register shift = $shift$$Register; + + __ andw(shift, 0x1f, shift); + __ slll(dst, shift, rscratch2_AT); + __ mov_immediate32(rscratch1_GP, 64); + __ subw(rscratch1_GP, shift, rscratch1_GP); + __ srll(dst, rscratch1_GP, rscratch1_GP); + + __ or_ins(rscratch2_AT, rscratch1_GP, dst); + %} + //ins_pipe(ialu_reg_reg); +%} +// end of ROL expand + +// Rotate Left by one +instruct rolL_rReg_i1(rRegL dst, immI1 lshift, immI_M1 rshift) +%{ + match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift))); + + expand %{ + rolL_rReg_imm1(dst); + %} +%} + +// Rotate Left by 8-bit immediate +//instruct rolL_rReg_i8(rRegL dst, immI8 lshift, immI8 rshift) +//%{ +// predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f)); +// match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift))); +// +// expand %{ +// rolL_rReg_imm8(dst, lshift); +// %} +//%} + +// Rotate Left by variable +instruct rolL_rReg_Var_C0(rRegL dst, rRegI shift, immI0 zero) +%{ + match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI zero shift)))); + + expand %{ + rolL_rReg_CL(dst, shift); + %} +%} + +// Rotate Left by variable +instruct rolL_rReg_Var_C64(rRegL dst, rRegI shift, immI_64 c64) +%{ + match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI c64 shift)))); + + expand %{ + rolL_rReg_CL(dst, shift); + %} +%} + + +// ROR expand +instruct rorL_rReg_imm1(rRegL dst) +%{ + format %{ + "srll $dst, #1, rscratch2_AT\t#@rorL_rReg_imm1\n\t" + "slll $dst, #63, rscratch1_GP\n\t" + "or_ins rscratch2_AT, rscratch1_GP, $dst" + %} + ins_encode %{ + Register dst = $dst$$Register; + + __ srll(dst, 1, rscratch2_AT); + __ slll(dst, 63, rscratch1_GP); + __ or_ins(rscratch2_AT, rscratch1_GP, dst); + %} + //ins_pipe(ialu_reg); +%} +*/ +/* The following two methods cannot be implemented since there are no match rules*/ +/* +instruct rorL_rReg_imm8(rRegL dst, immI8 shift) +%{ + + format %{ "rorq $dst, $shift" %} + + ins_encode%{ + Register dst = $dst$$Register; + int shift = $rshift$$constant; + + __ srll(dst, shift, rscratch2_AT); + __ slll(dst, 64-shift, rscratch1_GP); + __ or_ins(rscratch2_AT, rscratch1_GP, dst); + %} + //ins_pipe(ialu_reg); +%} + +instruct rorL_rReg_CL(rRegL dst, rRegI shift) +%{ + + format %{ "rorq $dst, $shift" %} + + ins_encode %{ + Register dst = $dst$$Register; + Register shift = $shift$$Register; + + __ srll(dst, shift, rscratch2_AT); + __ mov_immediate32(rscratch1_GP, 64); + __ subw(rscratch1_GP, shift, rscratch1_GP); + __ slll(dst, rscratch1_GP, rscratch1_GP); + __ or_ins(rscratch2_AT, rscratch1_GP, dst); + %} + //ins_pipe(ialu_reg_reg); +%} +*/ +/* +// end of ROR expand + +// Rotate Right by one +instruct rorL_rReg_i1(rRegL dst, immI1 rshift, immI_M1 lshift) +%{ + match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift))); + + expand %{ + rorL_rReg_imm1(dst); + %} +%} + +// Rotate Right by 8-bit immediate +instruct rorL_rReg_i8(rRegL dst, rRegL src, immI8 rshift, immI8 lshift) +%{ + predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f)); + match(Set dst (OrL (URShiftL src rshift) (LShiftL src lshift))); +//instruct rorL_rReg_imm8(rRegL dst, rRegL src, immI8 shift) +//%{ + format %{ + "srll $dst, $rshift, rscratch2_AT\t#@rorL_rReg_i8\n\t" + "slll $dst, 64-rshift, rscratch1_GP\n\t" + "or_ins rscratch2_AT, rscratch1_GP, $dst" + %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + int shift = $rshift$$constant; + + __ srll(dst, shift, rscratch2_AT); + __ slll(dst, 64-shift, rscratch1_GP); + __ or_ins(rscratch2_AT, rscratch1_GP, dst); + %} + //ins_pipe(ialu_reg); +%} + + +// Rotate Right by variable +instruct rorL_rReg_Var_C0(rRegL dst, rRegI shift, immI0 zero) +%{ + match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI zero shift)))); +//instruct rorL_rReg_CL(rRegL dst, rRegL src, rRegI shift) +//%{ + + format %{ + "andw $shift, #0x1f, $shift\t#@rorL_rReg_Var_C0\n\t" + "srll $dst, $shift, rscratch2_AT\n\t" + "mov_immediate32 rscratch1_GP, 64\n\t" + "subw rscratch1_GP, $shift, rscratch1_GP\n\t" + "slll $dst, rscratch1_GP, rscratch1_GP\n\t" + "or_ins rscratch2_AT, rscratch1_GP, $dst" + %} + ins_encode %{ + Register dst = $dst$$Register; + Register shift = $shift$$Register; + + __ and_ins(shift, 0x1f, shift); + __ srll(dst, shift, rscratch2_AT); + __ mov_immediate32(rscratch1_GP, 64); + __ subw(rscratch1_GP, shift, rscratch1_GP); + __ slll(dst, rscratch1_GP, rscratch1_GP); + __ or_ins(rscratch2_AT, rscratch1_GP, dst); + + %} + //ins_pipe(ialu_reg_reg); +%} + +instruct rorL_rReg_Var_C64(rRegL dst, rRegI shift, immI_64 c64) +%{ + match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI c64 shift)))); + format %{ + "andw $shift, #0x1f, $shift\t#@rorL_rReg_CL\n\t" + "srll $dst, $shift, rscratch2_AT\n\t" + "mov_immediate32 rscratch1_GP, 64\n\t" + "subw rscratch1_GP, $shift, rscratch1_GP\n\t" + "slll $dst, rscratch1_GP, rscratch1_GP\n\t" + "or_ins rscratch2_AT, rscratch1_GP, $dst" + %} + ins_encode %{ + Register dst = $dst$$Register; + Register shift = $shift$$Register; + + __ andw(shift, 0x1f, shift); + __ srll(dst, shift, rscratch2_AT); + __ mov_immediate32(rscratch1_GP, 64); + __ subw(rscratch1_GP, shift, rscratch1_GP); + __ slll(dst, rscratch1_GP, rscratch1_GP); + __ or_ins(rscratch2_AT, rscratch1_GP, dst); + + %} + //ins_pipe(ialu_reg_reg); +%}*/ + + +// Logical Instructions + +// Integer Logical Instructions + +// And Instructions +// And Register with Register +instruct andI_rReg(rRegI dst, rRegI src1, rRegI src2) +%{ + match(Set dst (AndI src1 src2)); + + format %{ + "and_ins $src1, $src2, $dst\t# int @andI_rReg" + %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + __ and_ins(src1, src2, dst); + %} + ins_pipe( ialu_regI_regI ); +%} + +/* sw8 +instruct andI_Reg_Reg(rRegI dst, rRegI src1, rRegI src2) %{ + match(Set dst (AndI src1 src2)); + + format %{ "and $dst, $src1, $src2 #@andI_Reg_Reg" %} + ins_encode %{ +// Register dst = $dst$$Register; +// Register src1 = $src1$$Register; +// Register src2 = $src2$$Register; +// __ andr(dst, src1, src2); + %} + ins_pipe( ialu_regI_regI ); +%} +*/ +/* TODO no in jdk8 +// And Register with Immediate 255 +instruct andI_rReg_imm255(rRegI dst, rRegI src1, immI_255 src2) +%{ + match(Set dst (AndI src1 src2)); + + format %{ "and_ins $src1, #255, $dst\t# int & 0xFF @andI_rReg_imm255"%} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + __ and_ins(src1, 255, dst); + %} + ins_pipe( ialu_regI_regI ); +%} + +// And Register with Immediate 255 and promote to long +instruct andI2L_rReg_imm255(rRegI dst, rRegI src, immI_255 mask) +%{ + match(Set dst (ConvI2L (AndI src mask))); + + format %{ "and_ins $src, #255, $dst\t# int & 0xFF -> long @andI2L_rReg_imm255"%} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + __ and_ins(src, 255, dst); +// __ sextb(src, dst); //TODO CHECK LSP I2L signed extend + %} + ins_pipe( ialu_regI_regI ); +%} +*/ +// And Register with Immediate 65535 +instruct andI_rReg_imm65535(rRegI dst, rRegI src1, immI_65535 src2) +%{ + match(Set dst (AndI src1 src2)); + ins_cost(40); + format %{ "zapnot $src1, #3, $dst\t# int & 0xFFFF @andI_rReg_imm65535"%} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src1$$Register; + __ zapnot(src, 0x3, dst); + %} + ins_pipe( ialu_regI_regI ); +%} +/* TODO no in jdk8 +// And Register with Immediate 65535 and promote to long +instruct andI2L_rReg_imm65535(rRegI dst, rRegI src, immI_65535 mask) +%{ + match(Set dst (ConvI2L (AndI src mask))); + + format %{ "zapnot $src, #3, $dst\t# int & 0xFFFF -> long @andI2L_rReg_imm65535"%} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + __ zapnot(src, 0x3, dst); +// __ sexth(dst, dst); // TODO CHECK lsp I2L signed extend? + %} + ins_pipe( ialu_regI_regI ); +%} +*/ +// And Register with Immediate +instruct andI_rReg_imm(rRegI dst, rRegI src1, immU8 src2) +%{ + match(Set dst (AndI src1 src2)); + ins_cost(60); + format %{ "andw $src1, $src2, $dst\t# int @andI_rReg_imm"%} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src1$$Register; + int val = $src2$$constant; + __ and_ins(src, val, dst); + %} + ins_pipe( ialu_regI_regI ); +%} + +/* +instruct andI_Reg_immI(rRegI dst, rRegI src1, immI src2) %{ + match(Set dst (AndI src1 src2)); + + format %{ "and $dst, $src1, $src2 #@andI_Reg_immI" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src1$$Register; + int val = $src2$$constant; + + __ mov_immediate32(AT, val); + __ and_ins(src, AT, dst); + %} + ins_pipe( ialu_regI_regI ); +%} +*/ + +/* memory operands no need for SW64 +// And Register with Memory +instruct andI_rReg_mem(rRegI dst, memory src1, rRegI src2) +%{ + match(Set dst (AndI src2 (LoadI src1))); + +// ins_cost(125);//todo + format %{ "ldw rscratch3, $src1\t# int @andI_rReg_mem\n\t" + "andw rscratch3, $src2, $dst" %} + ins_encode %{ + Register dst = $dst$$Register; + Address src1 = $src1$$Address; + Register src2 = $src2$$Register; + __ ldw(rscratch3, src1); + __ andw(rscratch3, src2, dst); + %} +// ins_pipe(ialu_reg_mem); +%} + +// And Memory with Register +instruct andI_mem_rReg(memory dst, rRegI src) +%{ + match(Set dst (StoreI dst (AndI (LoadI dst) src))); + + ins_cost(150);//todo + format %{ "ldw rscratch3, $dst\t# int @andI_mem_rReg\n\t" + "and_ins rscratch3, $src, rscratch2_AT\n\t" + "stw rscratch3, $dst" %} + ins_encode %{ + Address dst = $dst$$Address; + Register src = $src$$Register; + __ ldw(rscratch3, dst); + __ and_ins(src, rscratch3, rscratch3); + __ stw(rscratch3, dst); + + %} +// ins_pipe(ialu_mem_reg); +%} + +// And Memory with Immediate +instruct andI_mem_imm(memory dst, immI src) +%{ + match(Set dst (StoreI dst (AndI (LoadI dst) src))); + + ins_cost(125);//todo + format %{ "ldw rscratch2_AT, $dst\t# int @andI_mem_imm\n\t" + "movws rscratch2_AT, $src, rscratch2_AT\n\t" + "and_ins rscratch2_AT, rscratch1_GP, rscratch2_AT\n\t" + "stw rscratch2_AT, $dst" %} + ins_encode %{ + Address dst = $dst$$Address; + int val = $src$$constant; + __ ldw(rscratch1_GP, dst); + __ movws(rscratch2_AT, val); + __ and_ins(rscratch2_AT, rscratch1_GP, rscratch2_AT); + __ stw(rscratch2_AT, dst, rscratch1_GP); + + %} +// ins_pipe(ialu_mem_imm); +%} + +// BMI1 instructions +instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1) %{ + match(Set dst (AndI (XorI src1 minus_1) (LoadI src2))); +// predicate(UseBMI1Instructions); + + ins_cost(125);//todo + format %{ "ldw rscratch2_AT, $src2\t# @andnI_rReg_rReg_mem\n\t" + "ornot R0, $src1, rscratch1_GP\n\t" + "andw rscratch1_GP, rscratch2_AT, $dst" %} + + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Address src2 = $src2$$Address; + int val = $minus_1$$constant; + __ ldw(rscratch2_AT, src2); + __ ornot(R0, src1, rscratch1_GP); + __ andw(rscratch1_GP, rscratch2_AT, dst); + %} +// ins_pipe(ialu_reg_mem); +%}*/ +/*no need in swjdk8 +instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1) %{ + match(Set dst (AndI (XorI src1 minus_1) src2)); +// predicate(UseBMI1Instructions); + + format %{ "ornot R0, $src1, rscratch3 \t# @andnI_rReg_rReg_rReg\n\t" + "andw rscratch3, $src2, $dst" + %} + + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + + __ ornot(R0, src1, rscratch3); + __ andw(rscratch3, src2, dst); + %} + ins_pipe(ialu_regI_regI); +%} + +// TODO CHECK lsp: instruct name blsiI needed to be changed?? +instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero) %{ + match(Set dst (AndI (SubI imm_zero src) src)); +// predicate(UseBMI1Instructions); + + format %{ "subw R0, $src, rscratch3\t# @blsiI_rReg_rReg\n\t" + "andw rscratch3, $src, $dst"%} + + ins_encode %{ +// __ blsil($dst$$Register, $src$$Register); + __ subw(R0, $src$$Register, rscratch3); + __ andw(rscratch3, $src$$Register, $dst$$Register); + %} +// ins_pipe(ialu_reg); +%}*/ +/* memory operands no need for SW64 +instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero) %{ + match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) )); +// predicate(UseBMI1Instructions); + + ins_cost(125);//todo + format %{ "ldw rscratch2_AT, $src\t# @blsiI_rReg_mem\n\t" + "subw R0, rscratch2_AT, rscratch1_GP\n\t" + "andw rscratch1_GP, rscratch2_AT, $dst"%} + + ins_encode %{ +// __ blsil($dst$$Register, $src$$Address); + __ ldw(rscratch2_AT, $src$$Address); + __ subw(R0, rscratch2_AT, rscratch1_GP); + __ andw(rscratch1_GP, rscratch2_AT, $dst$$Register); + %} +// ins_pipe(ialu_reg_mem); +%} + +instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1) +%{ + match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) ); +// predicate(UseBMI1Instructions); + + ins_cost(125);//todo + format %{ "ldw rscratch2_AT, $src\t# @blsmskI_rReg_mem\n\t" + "subw rscratch2_AT, #1, rscratch1_GP\n\t" + "xorw rscratch1_GP, rscratch2_AT, $dst"%} + + ins_encode %{ +// __ blsmskl($dst$$Register, $src$$Address); + __ ldw(rscratch2_AT, $src$$Address); + __ subw(rscratch2_AT, 1, rscratch1_GP); + __ xorw(rscratch1_GP, rscratch2_AT, $dst$$Register); + %} +// ins_pipe(ialu_reg_mem); +%}*/ + +/* no need in swjdk8 +instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1) +%{ + match(Set dst (XorI (AddI src minus_1) src)); +// predicate(UseBMI1Instructions); + + format %{ "subw $src, #1, rscratch3\t# @blsmskI_rReg_rReg\n\t" + "xorw rscratch3, $src, $dst"%} + + ins_encode %{ +// __ blsmskl($dst$$Register, $src$$Register); + __ subw($src$$Register, 1, rscratch3); + __ xorw(rscratch3, $src$$Register, $dst$$Register); + %} + +// ins_pipe(ialu_reg); +%} + +instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1) +%{ + match(Set dst (AndI (AddI src minus_1) src) ); +// predicate(UseBMI1Instructions); + + format %{ "subw $src, #1, rscratch3\t# @blsrI_rReg_rReg\n\t" + "andw rscratch3, $src, $dst"%} + + ins_encode %{ +// __ blsrl($dst$$Register, $src$$Register); + __ subw($src$$Register, 1, rscratch3); + __ andw(rscratch3, $src$$Register, $dst$$Register); + %} + +// ins_pipe(ialu_reg_mem); +%} +*/ +/* memory operands no need for SW64 +instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1) +%{ + match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) ); +// predicate(UseBMI1Instructions); + + ins_cost(125);//todo + format %{ "ldw rscratch2_AT, $src\t# @blsmskI_rReg_mem\n\t" + "subw rscratch2_AT, #1, rscratch1_GP\n\t" + "andw rscratch2_AT, rscratch1_GP, $dst"%} + + ins_encode %{ +// __ blsrl($dst$$Register, $src$$Address); + __ ldw(rscratch2_AT, $src$$Address); + __ subw(rscratch2_AT, 1, rscratch1_GP); + __ andw(rscratch2_AT, rscratch1_GP, $dst$$Register); + %} + +// ins_pipe(ialu_reg); +%}*/ + +// Or Instructions +// Or Register with Register +instruct orI_rReg(rRegI dst, rRegI src1, rRegI src2) +%{ + match(Set dst (OrI src1 src2)); + + format %{ + "bis $src1, $src2, $dst\t# int @orI_rReg" + %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + //__ stop("orI_rReg");//TODO + __ bis(src1, src2, dst); + %} + ins_pipe(ialu_regI_regI); +%} + + +// Or Register with Immediate +instruct orI_rReg_imm(rRegI dst, rRegI src1, immU8 src2) +%{ + match(Set dst (OrI src1 src2)); + ins_cost(60); + format %{ "bis $src1, $src2, $dst\t# int @orI_rReg_imm" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + int src2 = $src2$$constant; + //__ stop("orI_rReg_imm");//TODO + __ bis(src1, src2, dst); + %} + ins_pipe(ialu_regI_regI);//ins_pipe needs to be changed to a proper one +%} + +/* memory operands no need for SW64 +// Or Register with Memory +instruct orI_rReg_mem(rRegI dst, memory src1, rRegI src2) +%{ + match(Set dst (OrI src2 (LoadI src1))); + + ins_cost(125); + format %{ + "ldw rscratch2_AT, $src1\t# int @orI_rReg_mem\n\t" + "orw $src2, rscratch2_AT, $dst" + %} + ins_encode %{ + Register dst = $dst$$Register; + Address src1 = $src1$$Address; + Register src2 = $src2$$Register; + __ ldw(rscratch2_AT, src1); + __ orw(src2, rscratch2_AT, dst); + %} + ins_pipe(ialu_regI_regI);//ins_pipe needs to be changed to a proper one +%} + +// Or Memory with Register +instruct orI_mem_rReg(memory dst, rRegI src) +%{ + match(Set dst (StoreI dst (OrI (LoadI dst) src))); + + ins_cost(150); + format %{ + "ldw rscratch2_AT, $dst\t# int @orI_mem_rReg\n\t" + "orw $src, rscratch2_AT, rscratch2_AT\n\t" + "stw rscratch2_AT, $dst" + %} + ins_encode %{ + Address dst = $dst$$Address; + Register src = $src$$Register; + __ ldw(rscratch2_AT, dst); + __ bis(rscratch2_AT, src, rscratch2_AT); + __ stw(rscratch2_AT, dst, rscratch1_GP); + %} + ins_pipe(ialu_regI_regI);//ins_pipe needs to be changed to a proper one +%} + +// Or Memory with Immediate +instruct orI_mem_imm(memory dst, immI src) +%{ + match(Set dst (StoreI dst (OrI (LoadI dst) src))); + + ins_cost(125); + format %{ + "ldw rscratch2_AT, $dst\t# int @orI_mem_imm\n\t" + "movws rscratch1_GP, $src\n\t" + "bis rscratch2_AT, $src, rscratch2_AT\n\t" + "stw rscratch2_AT, $dst" + %} + ins_encode %{ + Address dst = $dst$$Address; + int src = $src$$constant; + __ ldw(rscratch2_AT, dst); + __ movws(rscratch1_GP, src); + __ bis(rscratch2_AT, rscratch1_GP, rscratch2_AT); + __ stw(rscratch2_AT, dst, rscratch1_GP); + %} + ins_pipe(ialu_regI_regI);//ins_pipe needs to be changed to a proper one +%}*/ + +// Xor Instructions +// Xor Register with Register +instruct xorI_rReg(rRegI dst, rRegI src1, rRegI src2) +%{ + match(Set dst (XorI src1 src2)); +// ins_cost(60); + + format %{ + "xor_ins $src1, $src2, $dst\t# int @xorI_rReg" + %} + ins_encode %{ + //__ stop("xorI_rReg"); TODO:jzy + __ xor_ins($src1$$Register, $src2$$Register, $dst$$Register); + %} + ins_pipe( ialu_regI_regI ); +%} +/* TODO no in jdk8 +// Xor Register with Immediate -1 +instruct xorI_rReg_im1(rRegI dst, rRegI src, immI_M1 imm) %{ + match(Set dst (XorI src imm)); + ins_cost(60); + + format %{ "ornot R0, $src, $dst\t# int @xorI_rReg_im1\n\t" + "zapnot $dst, 0xf, $dst"%} + ins_encode %{ + __ ornot(R0, $src$$Register, $dst$$Register); +// __ zapnot($dst$$Register, 0xf, $dst$$Register);//?? + %} + ins_pipe( ialu_regI_regI ); +%} +*/ +// Xor Register with Immediate +instruct xorI_rReg_imm(rRegI dst, rRegI src1, immU8 src2) +%{ + match(Set dst (XorI src1 src2)); + ins_cost(60); + + format %{ "xor_ins $src1, $src2, $dst\t# int @xorI_rReg_imm\n\t" %} + ins_encode %{ + //__ stop("xorI_rReg_imm"); TODO:jzy + __ xor_ins($src1$$Register, $src2$$constant, $dst$$Register); + %} + ins_pipe( ialu_regI_regI ); +%} + +/* memory operands no need for SW64 +// Xor Register with Memory +instruct xorI_rReg_mem(rRegI dst, rRegI src1, memory src2) +%{ + match(Set dst (XorI src1 (LoadI src2))); + ins_cost(125); + + format %{ "ldw rscratch2_AT, $src2\t# int @xorI_rReg_mem\n\t" + "xorw $src1, rscratch2_AT, $dst"%} + ins_encode %{ + __ ldw(rscratch2_AT, $src2$$Address); + __ xorw($src1$$Register, rscratch2_AT, $dst$$Register); + %} +// ins_pipe( ialu_reg_mem ); +%} + +// Xor Memory with Register +instruct xorI_mem_rReg(memory dst, rRegI src) +%{ + match(Set dst (StoreI dst (XorI (LoadI dst) src))); + ins_cost(150); + + format %{ "ldw rscratch2_AT, $dst\t# int @xorI_rReg_mem\n\t" + "xor_ins rscratch2_AT, $src, rscratch2_AT\n\t" + "stw rscratch2_AT, $dst"%} + ins_encode %{ + __ ldw(rscratch2_AT, $dst$$Address); + __ xor_ins(rscratch2_AT, $src$$Register, rscratch2_AT); + __ stw(rscratch2_AT, $dst$$Address, rscratch1_GP); + %} +// ins_pipe( ialu_reg_mem ); +%} + +// Xor Memory with Immediate +instruct xorI_mem_imm(memory dst, immI src) +%{ + match(Set dst (StoreI dst (XorI (LoadI dst) src))); + ins_cost(125); + + format %{ "ldw rscratch2_AT, $dst\t# int @xorI_mem_imm\n\t" + "movws rscratch1_GP, $src\n\t" + "xor_ins rscratch2_AT, $src, rscratch2_AT\n\t" + "stw rscratch2_AT, $dst"%} + ins_encode %{ + __ ldw(rscratch2_AT, $dst$$Address); + __ movws(rscratch1_GP, (int)$src$$constant); + __ xor_ins(rscratch2_AT, rscratch1_GP, rscratch2_AT); + __ stw(rscratch2_AT, $dst$$Address, rscratch1_GP); + %} +// ins_pipe(ialu_mem_imm); +%} + */ + + +// Long Logical Instructions + +// And Instructions +// And Register with Register +instruct andL_rReg(rRegL dst, rRegL src1, rRegL src2) +%{ + match(Set dst (AndL src1 src2)); + ins_cost(100); + format %{ "and_ins $src1, $src2, $dst\t# long @andL_rReg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + + __ and_ins(src1, src2, dst); + %} + ins_pipe( ialu_regL_regL ); +%} +/* TODO no in jdk8 +// And Register with Immediate 255 +instruct andL_rReg_imm255(rRegL dst, rRegL src1, immL_255 src2) +%{ + match(Set dst (AndL src1 src2)); + + format %{ "and_ins $src1, #255, $dst\t# long & 0xFF @andL_rReg_imm_255" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + __ and_ins(src1, 255, dst); + %} + ins_pipe( ialu_regI_regI ); +%} + +// And Register with Immediate 65535 +instruct andL_rReg_imm65535(rRegL dst, rRegL src1, immL_65535 src2) +%{ + match(Set dst (AndL src1 src2)); + + format %{ "zapnot $src1, 0x3, $dst\t# long & 0xFFFF @andL_rReg_imm65535"%} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + __ zapnot(src1, 0x3, dst);//TODO CHECK LSP + %} + ins_pipe(ialu_regI_imm16); +%} + +// And Register with Immediate +instruct andL_rReg_imm(rRegL dst, rRegL src1, immL32 src2) +%{ + match(Set dst (AndL src1 src2)); + + format %{ "movws $dst, $src2\t# long @andL_rReg_imm\n\t" + "and_ins $dst, $src1, $dst"%} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + int value = $src2$$constant; + __ movws(dst, value); + __ and_ins(dst, src1, dst); + %} + ins_pipe(ialu_regI_regI); +%} +*/ +instruct andL_Reg_Reg_convI2L(rRegL dst, rRegL src1, rRegI src2) %{ + match(Set dst (AndL src1 (ConvI2L src2))); + format %{ "AND $dst, $src1, $src2 @ andL_Reg_Reg_convI2L\n\t" %} + ins_encode %{ + Register dst_reg = $dst$$Register; + Register src1_reg = $src1$$Register; + Register src2_reg = $src2$$Register; +// __ stop("andL_Reg_Reg_convI2L copy from jdk8 why src2 do not signed extend lsp"); + + __ and_ins(src1_reg, src2_reg, dst_reg); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct andL_Reg_imm_0_255(rRegL dst, rRegL src1, immUL8 src2) %{ + match(Set dst (AndL src1 src2)); + ins_cost(60); + + format %{ "and $dst, $src1, $src2 #@andL2I_Reg_imm_0_255" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src1$$Register; + long val = $src2$$constant; + + __ and_ins(src, val, dst); + %} + ins_pipe( ialu_regI_regI ); +%} + +// And Register with Immediate 65535 +instruct andL_rReg_imm65535(rRegL dst, rRegL src1, immL_65535 src2) +%{ + match(Set dst (AndL src1 src2)); + ins_cost(60); + format %{ "zapnot $src1, 0x3, $dst\t# long & 0xFFFF @andL_rReg_imm65535"%} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + __ zapnot(src1, 0x3, dst);//TODO CHECK LSP + %} + ins_pipe(ialu_regI_imm16); +%} +instruct andL2I_Reg_imm_0_255(rRegI dst, rRegL src1, immUL8 src2) %{ + match(Set dst (ConvL2I (AndL src1 src2))); + ins_cost(60); + + format %{ "and $dst, $src1, $src2 #@andL2I_Reg_imm_0_255" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src1$$Register; + long val = $src2$$constant; + + __ and_ins(src, val, dst); + %} + ins_pipe( ialu_regI_regI ); +%} + +/* memory operands no need for SW64 +// And Register with Memory +instruct andL_rReg_mem(rRegL dst, memory src1, rRegL src2) +%{ + match(Set dst (AndL src2 (LoadL src1))); + + ins_cost(125);//todo + format %{ + "ldl rscratch2_AT, $src1\t# long @andL_rReg_mem\n\t" + "and_ins rscratch2_AT, $src2, rscratch2_AT\n\t" + "stl rscratch2_AT, $dst" + %} + ins_encode %{ + Register dst = $dst$$Register; + Address src1 = $src1$$Address; + Register src2 = $src2$$Register; + __ ldl(rscratch2_AT, src1); + __ and_ins(rscratch2_AT, src2, rscratch2_AT); + __ stl(rscratch2_AT, dst, rscratch1_GP); + %} +// ins_pipe(ialu_reg_mem); +%} + +// And Memory with Immediate +instruct andL_mem_imm(memory dst, immL32 src) +%{ + match(Set dst (StoreL dst (AndL (LoadL dst) src))); + + ins_cost(125);//todo + format %{ "ldl rscratch2_AT, $dst\t# long @andL_mem_imm\n\t" + "movws rscratch1_GP, $src\n\t" + "and_ins rscratch2_AT, $src, rscratch2_AT\n\t" + "stl rscratch2_AT, $dst" %} + ins_encode %{ + Address dst = $dst$$Address; + int val = $src$$constant; + __ ldl(rscratch2_AT, dst); + __ movws(rscratch1_GP, val); + __ and_ins(rscratch1_GP, rscratch2_AT, rscratch2_AT); + __ stl(rscratch2_AT, dst); + %} +// ins_pipe(ialu_mem_imm); +%} + + +// BMI1 instructions +instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1) %{ + match(Set dst (AndL (XorL src1 minus_1) (LoadL src2))); +// predicate(UseBMI1Instructions); + + ins_cost(125);//todo + format %{ "ldl rscratch2_AT, $src2\t# @andL_mem_rReg\n\t" + "ornot R0, $src1, rscratch3\n\t" + "and_ins rscratch3, rscratch2_AT, $dst" %} + + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Address src2 = $src2$$Address; + __ ldl(rscratch2_AT, src2); + __ ornot(R0, src1, rscratch3); + __ and_ins(rscratch3, rscratch2_AT, dst); + %} +// ins_pipe(ialu_reg_mem); +%}*/ +/* TODO no in jdk8 +instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1) %{ + match(Set dst (AndL (XorL src1 minus_1) src2)); +// predicate(UseBMI1Instructions); + + format %{ + "ornot R0, $src1, rscratch3\t# @andnL_rReg_rReg_rReg\n\t" + "andptr rscratch3, $src2, $dst" + %} + + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $dst$$Register; + Register src2 = $dst$$Register; + __ ornot(R0, src1, rscratch3); + __ andptr(rscratch3, src2, dst); + %} + ins_pipe(ialu_regL_regL); +%} +*/ +/*no need in swjdk8 +instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero) %{ + match(Set dst (AndL (SubL imm_zero src) src)); +// predicate(UseBMI1Instructions); + + format %{ + "subptr R0, $src, rscratch3\t# @blsiL_rReg_rReg\n\t" + "andptr rscratch3, $src, $dst" + %} + + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + __ subptr(R0, src, rscratch3); + __ andptr(rscratch3, src, dst); + %} + ins_pipe(ialu_regI_regI); +%}*/ +/* memory operands no need for SW64 +instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero) %{ + match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) )); +// predicate(UseBMI1Instructions); + + ins_cost(125);//todo + format %{ + "ldl rscratch2_AT, $src\t# @blsiL_rReg_rReg\n\t" + "subl R0, rscratch2_AT, rscratch3\n\t" + "and_ins rscratch3, rscratch2_AT, $dst" + %} + + ins_encode %{ + Register dst = $dst$$Register; + Address src = $src$$Address; + __ ldl(rscratch2_AT, src); + __ subl(R0, rscratch2_AT, rscratch3); + __ and_ins(rscratch3, rscratch2_AT, dst); + %} +// ins_pipe(ialu_reg_mem); +%} + +instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1) +%{ + match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) ); +// predicate(UseBMI1Instructions); + + ins_cost(125);//todo + format %{ + "ldl rscratch2_AT, $src\t# @blsmskL_Reg_mem\n\t" + "subl rscratch2_AT, #1, rscratch1_GP\n\t" + "xor_ins rscratch2_AT, rscratch1_GP, $dst" + %} + + ins_encode %{ + Register dst = $dst$$Register; + Address src = $src$$Address; + __ ldl(rscratch2_AT, src); + __ subl(rscratch2_AT, 1, rscratch1_GP); + __ xor_ins(rscratch2_AT, rscratch1_GP, dst); + %} +// ins_pipe(ialu_reg_mem); +%}*/ +/*no need in swjdk8 +instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1) +%{ + match(Set dst (XorL (AddL src minus_1) src)); +// predicate(UseBMI1Instructions); + + format %{ "subl $src, #1, rscratch3\t# @blsmskL_rReg_rReg\n\t" + "xor_ins rscratch3, $src, $dst" %} + + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + __ subl(src, 1, rscratch3); + __ xor_ins(rscratch3, src, dst); + %} + + ins_pipe(ialu_regI_regI); +%} + +instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1) +%{ + match(Set dst (AndL (AddL src minus_1) src) ); +// predicate(UseBMI1Instructions); + + format %{ "subl $src, #1, rscratch3\t# @blsrL_Reg_Reg\n\t" + "and_ins rscratch3, $src, $dst" %} + + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + + __ subl(src, 1, rscratch3); + __ and_ins(rscratch3, src, dst); + %} + + ins_pipe(ialu_regI_regI); +%}*/ +/* memory operands no need for SW64 +instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1) +%{ + match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) ); +// predicate(UseBMI1Instructions); + + ins_cost(125);//todo + format %{ "ldl rscratch2_AT, $src\t# @blsrL_rReg_mem\n\t" + "subl rscratch2_AT, #1, rscratch1_GP\n\t" + "and_ins rscratch2_AT, rscratch1_GP, $dst" %} + + ins_encode %{ + Register dst = $dst$$Register; + Address src = $src$$Address; + __ ldl(rscratch2_AT, src); + __ subl(rscratch2_AT, 1, rscratch1_GP); + __ and_ins(rscratch2_AT, rscratch1_GP, dst); + %} + +// ins_pipe(ialu_reg); +%} +*/ +// Or Instructions +// Or Register with Register +instruct orL_rReg(rRegL dst, rRegL src1, rRegL src2) +%{ + match(Set dst (OrL src1 src2)); + + format %{ "bis $src1, $src2, $dst\t# @orL_rReg\n\t" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + __ bis(src1, src2, dst); + %} + ins_pipe(ialu_regI_regI);//ins_pipe needs to be changed to a proper one +%} + + +// Use any_RegP to match R15 (TLS register) without spilling. +instruct orL_rReg_castP2X(rRegL dst, any_RegP src1, rRegL src2) %{ + match(Set dst (OrL src2 (CastP2X src1))); + + format %{ "bis $src1, $src2, $dst\t# @orL_rReg_castP2X\n\t" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + __ bis(src1, src2, dst); + %} + ins_pipe(ialu_regL_regL);//ins_pipe needs to be changed to a proper one +%} + + +// Or Register with Immediate +instruct orL_rReg_imm(rRegL dst, rRegL src1, immU8 src2) +%{ + match(Set dst (OrL src1 src2)); + ins_cost(80); + format %{ +"movws $dst, $src2, $dst\t# @orL_rReg_imm\n\t" +"bis $src1, $dst, $dst" + %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + int src2 = $src2$$constant; + __ bis(src1, src2, dst); + %} + ins_pipe(ialu_regL_regL);//ins_pipe needs to be changed to a proper one +%} + +/* memory operands no need for SW64 +// Or Register with Memory +instruct orL_rReg_mem(rRegL dst, memory src1, rRegL src2) +%{ + match(Set dst (OrL src2 (LoadL src1))); + + ins_cost(125); + format %{ + "ldptr rscratch2_AT, $src1\t# @orL_rReg_mem\n\t" + "bis $src2, rscratch2_AT, $dst" + %} + ins_encode %{ + Register dst = $dst$$Register; + Address src1 = $src1$$Address; + Register src2 = $src2$$Register; + __ ldptr(rscratch2_AT, src1); + __ bis(src2, rscratch2_AT, dst); + %} +// ins_pipe(ialu_regI_regI);//ins_pipe needs to be changed to a proper one +%} + + +// Or Memory with Register +instruct orL_mem_rReg(memory dst, rRegL src) +%{ + match(Set dst (StoreL dst (OrL (LoadL dst) src))); + ins_cost(150); + format %{ + "ldl rscratch2_AT, $dst\t# @orL_mem_rReg\n\t" + "bis rscratch2_AT, $src, rscratch2_AT\n\t" + "stl rscratch2_AT, $dst" + %} + ins_encode%{ + Address dst = $dst$$Address; + Register src = $src$$Register; + __ ldl(rscratch2_AT, dst); + __ bis(rscratch2_AT, src, rscratch2_AT); + __ stl(rscratch2_AT, dst, rscratch1_GP); + %} +// ins_pipe(ialu_regI_regI);//ins_pipe needs to be changed to a proper one +%} + +// Or Memory with Immediate +instruct orL_mem_imm(memory dst, immL32 src) +%{ + match(Set dst (StoreL dst (OrL (LoadL dst) src))); + + ins_cost(125); + format %{ + "ldl rscratch2_AT, $dst\t# @orL_mem_imm\n\t" + "movws rscratch1_GP, $src\n\t" + "bis rscratch2_AT, rscratch1_GP, rscratch2_AT\n\t" + "stl rscratch2_AT, $dst" + %} + ins_encode %{ + Address dst = $dst$$Address; + int src = $src$$constant; + __ ldl(rscratch2_AT, dst); + __ movws(rscratch1_GP, src); + __ bis(rscratch2_AT, rscratch1_GP, rscratch2_AT); + __ stl(rscratch2_AT, dst, rscratch1_GP); + %} +// ins_pipe(ialu_regI_regI);//ins_pipe needs to be changed to a proper one +%} + */ + + +// Xor Instructions +// Xor Register with Register +instruct xorL_rReg(rRegL dst, rRegL src1, rRegL src2) +%{ + match(Set dst (XorL src1 src2)); + ins_cost(60); + + format %{ "xorptr $src1, $src2, $dst\t# long @xorL_rReg" %} + ins_encode %{ + __ xorptr($src1$$Register, $src2$$Register, $dst$$Register); + %} + ins_pipe( ialu_regI_regI ); +%} +/* TODO no in jdk8 +// Xor Register with Immediate -1 +instruct xorL_rReg_im1(rRegL dst, rRegL src, immL_M1 imm) %{ + match(Set dst (XorL src imm)); + ins_cost(60); + + format %{ "ornot R0, $src, $dst\t# long @xorL_rReg_im1" %} + ins_encode %{ + __ ornot(R0, $src$$Register, $dst$$Register); + %} + ins_pipe( ialu_regI_regI ); +%} +*/ +// Xor Register with Immediate +instruct xorL_rReg_imm(rRegL dst, rRegL src1, immU8 src2) +%{ + match(Set dst (XorL src1 src2)); + ins_cost(60); + + format %{ "xor_ins $src1, $src2, $dst\t# long @xorL_rReg_imm\n" %} + ins_encode %{ + __ xor_ins($src1$$Register, $src2$$constant, $dst$$Register); + %} + ins_pipe( ialu_regI_regI ); +%} + +/* memory operands no need for SW64 +// Xor Register with Memory +instruct xorL_rReg_mem(rRegL dst, rRegL src1, memory src2) +%{ + match(Set dst (XorL src1 (LoadL src2))); + ins_cost(125); + + format %{ "ldl rscratch2_AT, $src2\t# long @xorL_rReg_mem\n\t" + "xor_ins $src1, rscratch2_AT, $dst" %} + ins_encode %{ + __ ldl(rscratch2_AT, $src2$$Address); + __ xor_ins($src1$$Register, rscratch2_AT, $dst$$Register); + %} +// ins_pipe( ialu_reg_mem ); +%} + +// Xor Memory with Register +instruct xorL_mem_rReg(memory dst, rRegL src) +%{ + match(Set dst (StoreL dst (XorL (LoadL dst) src))); + ins_cost(150); + + format %{ "ldl rscratch2_AT, $src\t# long @xorL_mem_rReg\n\t" + "xor_ins $src, rscratch2_AT, $dst\n\t" + "stl rscratch2_AT, $dst"%} + ins_encode %{ + __ ldl(rscratch2_AT, $dst$$Address); + __ xor_ins(rscratch2_AT, $src$$Register, rscratch2_AT); + __ stl(rscratch2_AT, $dst$$Address, rscratch1_GP); + %} +// ins_pipe( ialu_reg_mem ); +%} + +// Xor Memory with Immediate +instruct xorL_mem_imm(memory dst, immL32 src) +%{ + match(Set dst (StoreI dst (XorL (LoadI dst) src))); + ins_cost(125); + + format %{ "ldl rscratch2_AT, $dst\t# long @xorL_mem_imm\n\t" + "movws rscratch1_GP, $src\n\t" + "xo_ins rscratch2_AT, $src, rscratch2_AT\n\t" + "stl rscratch2_AT, $dst"%} + ins_encode %{ + __ ldl(rscratch2_AT, $dst$$Address); + __ movws(rscratch1_GP, (int)$src$$constant); + __ xor_ins(rscratch2_AT, rscratch1_GP, rscratch2_AT); + __ stl(rscratch2_AT, $dst$$Address, rscratch1_GP); + %} +// ins_pipe( ialu_mem_imm ); +%} + */ + +// Convert Int to Boolean +instruct convI2B(rRegI dst, rRegI src) +%{ + match(Set dst (Conv2B src)); + + ins_cost(100); + format %{ "selne $src, #1, $src, $dst\t# @ convI2B" %} + ins_encode %{ + Register dst = as_Register($dst$$reg); + Register src = as_Register($src$$reg); + __ selne(src, 1, src, dst); + %} + ins_pipe(ialu_regL_regL ); +%} + + +// Convert Pointer to Boolean +instruct convP2B(rRegI dst, rRegP src) +%{ + match(Set dst (Conv2B src)); + + format %{ "selne $src, #1, $src, $dst\t# @convP2B" %} + ins_encode %{ + Register dst = as_Register($dst$$reg); + Register src = as_Register($src$$reg); + __ selne(src, 1, src, dst); + %} + ins_pipe( ialu_regL_regL ); +%} + +//lsp if p= max_int(0x7fffffff),jump to Skip Label + + //Lable Convert + __ BIND(Convert); + __ fcvtsd(src, temp_float_reg); + __ fcvtdl_z(temp_float_reg, temp_float_reg1); + __ fcvtlw(temp_float_reg1, temp_float_reg); + __ fimovs(temp_float_reg, dst); + __ addw(dst, 0, dst); + __ beq_l(R0, Done); + + //Lable Skip + __ BIND(Overflow) + __ addw(rscratch3, 0, dst); + __ BIND(Done); + %} + + ins_pipe( pipe_slow ); +%} + +instruct convF2I_reg_slow( rRegI dst, regF src ) %{ + match(Set dst (ConvF2I src)); + ins_cost(250); + format %{ "convf2i $dst, $src @ convF2I_reg_slow" %} + ins_encode %{ + Register dreg = $dst$$Register; + FloatRegister fval = $src$$FloatRegister; + + __ pushad(rscratch3); + __ mov_s(F16, fval); + __ call(RuntimeAddress((CAST_FROM_FN_PTR(address, SharedRuntime::f2i)))); + __ movl(rscratch3, V0); + __ popad(rscratch3); + __ movl(dreg, rscratch3); + %} + + ins_pipe( pipe_slow ); +%} + + +instruct convF2L_reg_fast( rRegL dst, regF src ) %{ + match(Set dst (ConvF2L src)); + ins_cost(150); + format %{ "convf2l $dst, $src @ convF2L_reg_fast" %} + ins_encode %{ + + Register dst = as_Register($dst$$reg); + FloatRegister src = $src$$FloatRegister; + Label Convert,Overflow,Done; + FloatRegister temp_float_reg = f30; + FloatRegister temp_float_reg1 = f28; + + __ fcmpun(src, src, temp_float_reg); + __ ffbne(temp_float_reg, Convert); + + __ bis (R0, 1, rscratch3); + __ slll(rscratch3, 63, rscratch3); + + __ ifmovd(rscratch3, temp_float_reg); + __ fcvtls(temp_float_reg, temp_float_reg1); + __ fcmple(src, temp_float_reg1, temp_float_reg); + __ ffbne(temp_float_reg, Overflow); //if less than min_long(0x8000000000000000),jump to Skip Lable + + __ subl(rscratch3, 1, rscratch3); + __ ifmovd(rscratch3, temp_float_reg); + __ fcvtls(temp_float_reg, temp_float_reg1); + __ fcmple(temp_float_reg1,src, temp_float_reg); + __ ffbne(temp_float_reg, Overflow); // if >=max_long(0x7fffffffffffffff),jump to Skip Lable + + //Lable Convert + __ BIND(Convert); + __ fcvtsd(src, temp_float_reg); + __ fcvtdl_z(temp_float_reg, temp_float_reg1); + __ fimovd(temp_float_reg1, dst); + __ beq_l(R0, Done); + + //Lable Skip + __ BIND(Overflow); + __ move(dst, rscratch3); + __ BIND(Done); + %} + + ins_pipe( pipe_slow ); +%} + + +instruct convF2L_reg_slow( rRegL dst, regF src ) %{ + match(Set dst (ConvF2L src)); + ins_cost(250); + format %{ "convf2l $dst, $src @ convF2L_reg_slow" %} + ins_encode %{ + Register dst = as_Register($dst$$reg); + FloatRegister fval = $src$$FloatRegister; + //TODO:jzy check usage + __ pushad(rscratch3); + __ mov_s(F16, fval); +// __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2l), 1); + __ call(RuntimeAddress((CAST_FROM_FN_PTR(address, SharedRuntime::f2l)))); + __ movl(rscratch3, V0); + __ popad(rscratch3); + __ movl(dst, rscratch3); + %} + + ins_pipe( pipe_slow ); +%} + +instruct convD2L_reg_fast( rRegL dst, regD src ) %{ + match(Set dst (ConvD2L src)); + ins_cost(150); + format %{ "convD2L $dst, $src @ convD2L_reg_fast" %} + ins_encode %{ + Register dst = as_Register($dst$$reg); + FloatRegister src = as_FloatRegister($src$$reg); + FloatRegister temp_float_reg = f30; + FloatRegister temp_float_reg1 = f28; + assert((temp_float_reg1 != $src$$FloatRegister), "can not use F28"); + assert((temp_float_reg1 != $dst$$FloatRegister), "can not use F28"); + + Label Convert,Overflow,Done; + __ fcmpun (src, src, temp_float_reg); + __ ffbne (temp_float_reg, Convert); //If Unorder,Jump to Convert Label + + __ bis (R0, 1, rscratch3); + __ slll (rscratch3, 63, rscratch3); + + __ ifmovd (rscratch3, temp_float_reg); + __ fcvtld (temp_float_reg, temp_float_reg1); + __ fcmple (src, temp_float_reg1, temp_float_reg); + __ ffbne (temp_float_reg, Overflow); //If less than min_long(0x8000000000000000),jump to Skip Label + + __ subl (rscratch3, 0x1, rscratch3); + __ ifmovd (rscratch3, temp_float_reg); + __ fcvtld (temp_float_reg, temp_float_reg1); + __ fcmple (temp_float_reg1, src, temp_float_reg); + __ ffbne (temp_float_reg, Overflow); //If >= max_long(0x7fffffffffffffff),jump to Skip Label + + //Label Convert + __ BIND(Convert); + __ fcvtdl_z (src, temp_float_reg);//lx20121018,result is rounded toward zero + __ fimovd(temp_float_reg,dst); + __ beq_l (R0, Done); + //Labe Skip + __ BIND(Overflow); + __ move(dst,rscratch3); + __ BIND(Done); + %} + + ins_pipe( pipe_slow ); +%} + + +instruct convD2L_reg_slow( rRegL dst, regD src ) %{ + match(Set dst (ConvD2L src)); + ins_cost(250); + format %{ "convD2L $dst, $src @ convD2L_reg_slow" %} + ins_encode %{ + Register dst = as_Register($dst$$reg); + FloatRegister src = as_FloatRegister($src$$reg); + + __ pushad(rscratch3); + __ mov_d(F16, src); +// __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2l), 1); + __ call(RuntimeAddress((CAST_FROM_FN_PTR(address, SharedRuntime::d2l)))); + __ movl(rscratch3, V0); + __ popad(rscratch3); + __ movl(dst, rscratch3); + %} + + ins_pipe( pipe_slow ); +%} + +// Convert a double to an int. If the double is a NAN, stuff a zero in instead. +instruct convD2I_reg_reg_fast( rRegI dst, regD src ) %{ + match(Set dst (ConvD2I src)); + + ins_cost(150); + format %{ "convD2I $dst, $src\t# @ convD2I_reg_reg_fast" %} + + ins_encode %{ + FloatRegister src = $src$$FloatRegister; + Register dst = $dst$$Register; + FloatRegister temp_float_reg = f30; + FloatRegister temp_float_reg1 = f28; + FloatRegister tmp = f27; + + assert( (temp_float_reg1 != src), "can not use F28"); + assert( (temp_float_reg != src), "can not use F28"); + Label Convert,Overflow,Done; + __ fcmpun (src, src, temp_float_reg); + __ ffbne (temp_float_reg, Convert); //If Unorder,Jump to Convert Label + + __ bis (R0, 1, rscratch3); + __ slll (rscratch3, 31, rscratch3); + __ ifmovs (rscratch3, temp_float_reg); + __ fcvtwl (temp_float_reg, temp_float_reg1); + __ fcvtld (temp_float_reg1, temp_float_reg); + __ fcmple (src, temp_float_reg, temp_float_reg1); + __ ffbne (temp_float_reg1, Overflow); //If less than min_int(0x80000000),jump to Skip Label + + __ subw (rscratch3, 0x1, rscratch3); + __ ifmovs (rscratch3, temp_float_reg); + __ fcvtwl (temp_float_reg, temp_float_reg1); + __ fcvtld (temp_float_reg1, temp_float_reg); + __ fcmple (temp_float_reg, src, temp_float_reg1); + __ ffbne (temp_float_reg1, Overflow); //If >= max_int(0x7fffffff),jump to Skip Label + + //Label Convert + __ BIND(Convert); + __ fcvtdl_z (src, temp_float_reg);//lx20121018,result is rounded toward zero + __ fcvtlw (temp_float_reg, tmp); + __ fimovs(tmp,dst); + __ addw(dst,0,dst); + __ beq_l (R0, Done); + //Labe Overflow + __ BIND(Overflow); + __ addw(rscratch3, 0, dst); + __ BIND(Done); + %} + ins_pipe( pipe_slow ); +%} + + +instruct convD2I_reg_reg_slow( rRegI dst, regD src ) %{ + match(Set dst (ConvD2I src)); + + ins_cost(250); + format %{ "convD2I $dst, $src\t# @ convD2I_reg_reg_slow" %} + + ins_encode %{ + FloatRegister src = $src$$FloatRegister; + Register dst = $dst$$Register; + + __ pushad(rscratch3); + __ mov_d(F16, src); +// __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2i), 1); + __ call(RuntimeAddress((CAST_FROM_FN_PTR(address, SharedRuntime::d2i)))); + __ movl(rscratch3, V0); + __ popad(rscratch3); + __ movl(dst, rscratch3); + %} + ins_pipe( pipe_slow ); +%} + +instruct convI2F_reg_reg( regF dst, rRegI src ) %{ + match(Set dst (ConvI2F src)); + format %{ "convi2f $dst, $src @ convI2F_reg" %} + ins_encode %{ + Register src = $src$$Register; + FloatRegister dst = $dst$$FloatRegister; + + __ ifmovs(src, f30); + __ fcvtwl(f30, f28); + __ fcvtls(f28, dst); + + %} + + ins_pipe( fpu_regF_regF ); +%} +/* memory operands no need for SW64 +instruct convI2F_reg_mem(regF dst, memory src) +%{ + match(Set dst (ConvI2F (LoadI src))); + + format %{ "ldw rscratch1_GP, $src\t# i2f@convI2F_reg_mem\n\t" + "convi2f $dst, rscratch1_GP " %} + ins_encode %{ + FloatRegister dst = $dst$$FloatRegister; + __ ldw(rscratch1_GP, $src$$Address); + __ ifmovs(rscratch1_GP, f30); + __ fcvtwl(f30, f28); + __ fcvtls(f28, dst); +// __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address); + %} + ins_pipe(pipe_slow); // XXX +%} +*/ + +instruct convI2D_reg_reg(regD dst, rRegI src) %{ + match(Set dst (ConvI2D src)); + format %{ "conI2D $dst, $src @convI2D_reg" %} + ins_encode %{ + Register src = $src$$Register; + FloatRegister dst = $dst$$FloatRegister; + __ ifmovd(src, f30); + __ fcvtld(f30, dst); + %} + ins_pipe( fpu_regF_regF ); +%} + +/* memory operands no need for SW64 +instruct convI2D_reg_mem(regD dst, memory src) +%{ + match(Set dst (ConvI2D (LoadI src))); + format %{ "ldw rscratch1_GP, $src\t# i2d@convI2D_reg_mem\n\t" + "conI2D $dst, rscratch1_GP " %} + ins_encode %{ + Register src = $src$$Register; + FloatRegister dst = $dst$$FloatRegister; + __ ldw(rscratch1_GP, $src$$Address); + __ ifmovd(rscratch1_GP, f30); + __ fcvtld(f30, dst); + %} + ins_pipe(pipe_slow); // XXX +%}*/ +/* +instruct convXI2F_reg(regF dst, rRegI src) +%{ + predicate(UseXmmI2F); + match(Set dst (ConvI2F src)); + + format %{ "movdl $dst, $src\n\t" + "cvtdq2psl $dst, $dst\t# i2f" %} + ins_encode %{ + __ movdl($dst$$XMMRegister, $src$$Register); + __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe(pipe_slow); // XXX +%} + +instruct convXI2D_reg(regD dst, rRegI src) +%{ + predicate(UseXmmI2D); + match(Set dst (ConvI2D src)); + + format %{ "movdl $dst, $src\n\t" + "cvtdq2pdl $dst, $dst\t# i2d" %} + ins_encode %{ + __ movdl($dst$$XMMRegister, $src$$Register); + __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe(pipe_slow); // XXX +%} +*/ +instruct convL2F_reg_reg(regF dst, rRegL src) +%{ + match(Set dst (ConvL2F src)); + format %{ "convl2f $dst, $src @ convL2F_reg" %} + ins_encode %{ + FloatRegister dst = $dst$$FloatRegister; + Register src = as_Register($src$$reg); + __ ifmovd(src, f30); + __ fcvtls(f30, dst); + %} + ins_pipe(pipe_slow); // XXX +%} + +/* memory operands no need for SW64 +instruct convL2F_reg_mem(regF dst, memory src) +%{ + match(Set dst (ConvL2F (LoadL src))); + format %{ "ldl rscratch1_GP, $src\t# l2f@convL2F_reg_mem\n\t" + "conI2D $dst, rscratch1_GP " %} + ins_encode %{ + FloatRegister dst = $dst$$FloatRegister; + Register src = as_Register($src$$reg); + __ ldl(rscratch1_GP, $src$$Address); + __ ifmovd(rscratch1_GP, f30); + __ fcvtls(f30, dst); +// __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address); + %} + ins_pipe(pipe_slow); // XXX +%} + */ + +instruct convL2D_reg_reg(regD dst, rRegL src) +%{ + match(Set dst (ConvL2D src)); + + format %{ "convL2D $dst, $src @ convL2D_reg" %} + ins_encode %{ + Register src = as_Register($src$$reg); + FloatRegister dst = as_FloatRegister($dst$$reg); + + __ ifmovd(src, f30); + __ fcvtld(f30, dst); + %} + ins_pipe(pipe_slow); // XXX +%} + +/* memory operands no need for SW64 +instruct convL2D_reg_mem(regD dst, memory src) +%{ + match(Set dst (ConvL2D (LoadL src))); + + format %{ "convL2D $dst, $src @ convL2D_reg" %} + ins_encode %{ + Register src = as_Register($src$$reg); + FloatRegister dst = as_FloatRegister($dst$$reg); + __ ldl(rscratch1_GP, $src$$Address); + __ ifmovd(rscratch1_GP, f30); + __ fcvtld(f30, dst); + %} + ins_pipe(pipe_slow); // XXX +%}*/ + +instruct convI2L_reg_reg( rRegL dst, rRegI src) %{ + match(Set dst (ConvI2L src)); + + ins_cost(100); + format %{ "movws $dst, $src @ convI2L_reg\t" %} + ins_encode %{ + Register dst = as_Register($dst$$reg); + Register src = as_Register($src$$reg); + + if(dst != src) __ movws(dst, src); + %} + ins_pipe( ialu_regL_regL ); +%} + + +// instruct convI2L_reg_reg_foo(rRegL dst, rRegI src) +// %{ +// match(Set dst (ConvI2L src)); +// // predicate(_kids[0]->_leaf->as_Type()->type()->is_int()->_lo >= 0 && +// // _kids[0]->_leaf->as_Type()->type()->is_int()->_hi >= 0); +// predicate(((const TypeNode*) n)->type()->is_long()->_hi == +// (unsigned int) ((const TypeNode*) n)->type()->is_long()->_hi && +// ((const TypeNode*) n)->type()->is_long()->_lo == +// (unsigned int) ((const TypeNode*) n)->type()->is_long()->_lo); + +// format %{ "movl $dst, $src\t# unsigned i2l" %} +// ins_encode(enc_copy(dst, src)); +// // opcode(0x63); // needs REX.W +// // ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst,src)); +// ins_pipe(ialu_reg_reg); +// %} + + +// Zero-extend convert int to long +instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask) +%{ + match(Set dst (AndL (ConvI2L src) mask)); + + format %{ "movwu $dst, $src \t# i2l zero-extend @convI2L_reg_reg_zex" %} + ins_encode %{ + Register dst = as_Register($dst$$reg); + Register src = as_Register($src$$reg); + __ movwu(dst, src); + %} + ins_pipe(ialu_regI_regI); +%} +/* memory operands no need for SW64 +// Zero-extend convert int to long +instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask) +%{ + match(Set dst (AndL (ConvI2L (LoadI src)) mask)); + + format %{ "ldwu $dst, $src\t# i2l zero-extend @convI2L_reg_mem_zex" %} + ins_encode %{ + __ ldwu($dst$$Register, $src$$Address); + %} + ins_pipe(pipe_slow); +%} +*/ + +instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask) +%{ + match(Set dst (AndL src mask)); + format %{ "zapnot $src, 0xf, $dst\t# i2l zero-extend @zerox_long_reg_reg" %} + ins_encode %{ + Register dst = as_Register($dst$$reg); + Register src = as_Register($src$$reg); + __ zapnot(src, 0xf, dst); + + %} + ins_pipe(ialu_regI_regI); +%} + +instruct convL2I2L_reg_reg_zex(rRegL dst, rRegL src, immL_32bits mask) +%{ + match(Set dst (AndL (ConvI2L (ConvL2I src)) mask)); + + format %{ "zapnot $dst, $src\t# L2I2L zero-extend @ convL2I2L_reg_reg_zex" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + + __ zapnot(src, 0xf, dst); + + %} + ins_pipe(ialu_regI_regI); +%} + +instruct convL2I2L_reg_reg( rRegL dst, rRegL src ) %{ + match(Set dst (ConvI2L (ConvL2I src))); + + format %{ "addw $dst, $src, 0 @ convL2I2L_reg" %} + ins_encode %{ + Register dst = as_Register($dst$$reg); + Register src = as_Register($src$$reg); + + __ addw(src, 0, dst); + %} + + ins_pipe( ialu_regI_regI ); +%} + +instruct convL2I_reg_reg( rRegI dst, rRegL src ) %{ + match(Set dst (ConvL2I src)); + + format %{ "addw $src, 0, $dst@ convL2I_reg" %} + ins_encode %{ + Register dst = as_Register($dst$$reg); + Register src = as_Register($src$$reg); + + __ addw(src, 0, dst); + %} + + ins_pipe( ialu_regI_regI ); +%} + + +instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{ + match(Set dst (MoveF2I src)); + effect(DEF dst, USE src); + + ins_cost(125); + format %{ "ldw $dst, $src\t# @MoveF2I_stack_reg" %} + ins_encode %{ + Register dst = as_Register($dst$$reg); + __ ldw(dst, Address(esp, $src$$disp));//LSP CHECK sign extend? + %} + ins_pipe(pipe_slow); +%} + +instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{ + match(Set dst (MoveI2F src)); + effect(DEF dst, USE src); + + ins_cost(125); + format %{ "store_float $dst, $src\t# @MoveI2F_stack_reg " %} + ins_encode %{ + FloatRegister dst = as_FloatRegister($dst$$reg); + __ store_float(dst, Address(esp, $src$$disp)); + %} + ins_pipe(pipe_slow); +%} + +instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{ + match(Set dst (MoveD2L src)); + effect(DEF dst, USE src); + + ins_cost(125); + format %{ "movq $dst, $src\t# MoveD2L_stack_reg" %} + ins_encode %{ + __ ldl($dst$$Register, Address(esp, $src$$disp)); + %} + ins_pipe(pipe_slow); +%} + +instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{ +// predicate(!UseXmmLoadAndClearUpper); + match(Set dst (MoveL2D src)); + effect(DEF dst, USE src); + + ins_cost(125); + format %{ "store_double $dst, $src\t# @MoveI2F_stack_reg"%} + ins_encode %{ + FloatRegister dst = as_FloatRegister($dst$$reg); + __ store_double(dst, Address(esp, $src$$disp)); + %} + ins_pipe(pipe_slow); +%} +/* +instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{ + predicate(UseXmmLoadAndClearUpper); + match(Set dst (MoveL2D src)); + effect(DEF dst, USE src); + + ins_cost(125); + format %{ "movsd $dst, $src\t# MoveL2D_stack_reg" %} + ins_encode %{ + __ movdbl($dst$$XMMRegister, Address(esp, $src$$disp)); + %} + ins_pipe(pipe_slow); +%} +*/ + +instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{ + match(Set dst (MoveF2I src)); + effect(DEF dst, USE src); + + ins_cost(95); // XXX + format %{ "store_float $dst, $src\t# MoveF2I_reg_stack" %} + ins_encode %{ + FloatRegister src = as_FloatRegister($src$$reg); + __ store_float(src, Address(esp, $dst$$disp)); + %} + ins_pipe(pipe_slow); +%} + +instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{ + match(Set dst (MoveI2F src)); + effect(DEF dst, USE src); + + ins_cost(100); + format %{ "stw $src, $dst\t# @MoveI2F_reg_stack" %} + ins_encode %{ + __ stw( $src$$Register, Address(esp, $dst$$disp)); + %} + ins_pipe( pipe_slow ); +%} + +instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{ + match(Set dst (MoveD2L src)); + effect(DEF dst, USE src); + + ins_cost(95); // XXX + format %{ "store_double $src, $dst\t# @MoveL2D_reg_stack" %} + ins_encode %{ + FloatRegister src = as_FloatRegister($src$$reg); + __ store_double(src, Address(esp, $dst$$disp)); + %} + ins_pipe(pipe_slow); +%} + +instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{ + match(Set dst (MoveL2D src)); + effect(DEF dst, USE src); + + ins_cost(100); + format %{ "stl $src, $dst\t# @MoveL2D_reg_stack" %} + ins_encode %{ + __ stl($src$$Register, Address(esp, $dst$$disp)); + %} + ins_pipe(pipe_slow); +%} + +instruct MoveF2I_reg_reg(rRegI dst, regF src) %{ + match(Set dst (MoveF2I src)); + effect(DEF dst, USE src); + ins_cost(85); + format %{ "MoveF2I $dst, $src @ MoveF2I_reg_reg" %} + ins_encode %{ + Register dst = as_Register($dst$$reg); + FloatRegister src = as_FloatRegister($src$$reg); + + __ fimovs(src, dst); + __ addw(dst, 0, dst); + %} + ins_pipe( pipe_slow ); +%} + + +instruct MoveD2L_reg_reg(rRegL dst, regD src) %{ + match(Set dst (MoveD2L src)); + effect(DEF dst, USE src); + ins_cost(85); + format %{ "MoveD2L $dst, $src @ MoveD2L_reg_reg" %} + ins_encode %{ + Register dst = as_Register($dst$$reg); + FloatRegister src = as_FloatRegister($src$$reg); + + __ fimovd(src, dst); + %} + ins_pipe( pipe_slow ); +%} + + instruct MoveI2F_reg_reg(regF dst, rRegI src) %{ + match(Set dst (MoveI2F src)); + effect(DEF dst, USE src); + ins_cost(85); + format %{ "MoveI2F $dst, $src @ MoveI2F_reg_reg" %} + ins_encode %{ + Register src = as_Register($src$$reg); + FloatRegister dst = as_FloatRegister($dst$$reg); + + __ ifmovs(src, dst); + %} + ins_pipe( pipe_slow ); +%} + +instruct MoveL2D_reg_reg(regD dst, rRegL src) %{ + match(Set dst (MoveL2D src)); + effect(DEF dst, USE src); + ins_cost(85); + format %{ "MoveL2D $dst, $src @ MoveL2D_reg_reg" %} + ins_encode %{ + FloatRegister dst = as_FloatRegister($dst$$reg); + Register src = as_Register($src$$reg); + + __ ifmovd(src, dst); + %} + ins_pipe( pipe_slow ); +%} + + +// ======================================================================= +// fast clearing of an array +instruct clear_array(rRegL cnt, rRegP base, Universe dummy) %{ + match(Set dummy (ClearArray cnt base)); + format %{ "CLEAR_ARRAY base = $base, cnt = $cnt # Clear doublewords" %} + ins_encode %{ + //Assume cnt is the number of bytes in an array to be cleared, + //and base points to the starting address of the array. + Register base = $base$$Register; + Register num = $cnt$$Register; + Label Loop, done; + + __ move(rscratch3, num); /* rscratch3 = words */ + __ beq_l(rscratch3, done); + __ move(rscratch2_AT, base); + + __ BIND(Loop); + __ stl(R0, Address(rscratch2_AT, 0)); + __ addl(rscratch2_AT, wordSize, rscratch2_AT); + __ subl(rscratch3, 1, rscratch3); + __ bne_l(rscratch3, Loop); + __ BIND(done); + %} + ins_pipe( pipe_slow ); +%} +/*TODO:need implement jzy +instruct string_compareU(a1_RegP str1, a2_RegI cnt1, a3_RegP str2, a4_RegI cnt2, + a0_RegI result, t8_RegP tmp1, t9_RegL tmp2, rFlagsReg cr) +%{ + predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); + match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); + effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); + + format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result # KILL $tmp1" %} + ins_encode %{ + // Count is in 8-bit bytes; non-Compact chars are 16 bits. + __ string_compare($str1$$Register, $str2$$Register, + $cnt1$$Register, $cnt2$$Register, $result$$Register, + $tmp1$$Register, $tmp2$$Register, + fnoreg, fnoreg, fnoreg, StrIntrinsicNode::UU); + %} + ins_pipe(pipe_slow); +%} + +instruct string_compareL(a1_RegP str1, a2_RegI cnt1, a3_RegP str2, a4_RegI cnt2, + a0_RegI result, t8_RegP tmp1, t9_RegL tmp2, rFlagsReg cr) +%{ + predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); + match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); + effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); + + format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result # KILL $tmp1" %} + ins_encode %{ + __ string_compare($str1$$Register, $str2$$Register, + $cnt1$$Register, $cnt2$$Register, $result$$Register, + $tmp1$$Register, $tmp2$$Register, + fnoreg, fnoreg, fnoreg, StrIntrinsicNode::LL); + %} + ins_pipe(pipe_slow); +%} + +instruct string_compareUL(a1_RegP str1, a2_RegI cnt1, a3_RegP str2, a4_RegI cnt2, + a0_RegI result, t8_RegP tmp1, t9_RegL tmp2, + f28_RegD vtmp1, f29_RegD vtmp2, f30_RegD vtmp3, rFlagsReg cr) +%{ + predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); + match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); + effect(KILL tmp1, KILL tmp2, KILL vtmp1, KILL vtmp2, KILL vtmp3, + USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); + + format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result # KILL $tmp1, $tmp2, $vtmp1, $vtmp2, $vtmp3" %} + ins_encode %{ + __ string_compare($str1$$Register, $str2$$Register, + $cnt1$$Register, $cnt2$$Register, $result$$Register, + $tmp1$$Register, $tmp2$$Register, + $vtmp1$$FloatRegister, $vtmp2$$FloatRegister, + $vtmp3$$FloatRegister, StrIntrinsicNode::UL); + %} + ins_pipe(pipe_slow); +%} + +instruct string_compareLU(a1_RegP str1, a2_RegI cnt1, a3_RegP str2, a4_RegI cnt2, + a0_RegI result, t8_RegP tmp1, t9_RegL tmp2, + f28_RegD vtmp1, f29_RegD vtmp2, f30_RegD vtmp3, rFlagsReg cr) +%{ + predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); + match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); + effect(KILL tmp1, KILL tmp2, KILL vtmp1, KILL vtmp2, KILL vtmp3, + USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); + + format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result # KILL $tmp1, $tmp2, $vtmp1, $vtmp2, $vtmp3" %} + ins_encode %{ + __ string_compare($str1$$Register, $str2$$Register, + $cnt1$$Register, $cnt2$$Register, $result$$Register, + $tmp1$$Register, $tmp2$$Register, + $vtmp1$$FloatRegister, $vtmp2$$FloatRegister, + $vtmp3$$FloatRegister,StrIntrinsicNode::LU); + %} + ins_pipe(pipe_slow); +%}*/ +/*TODO:need implement jzy +instruct string_indexofUU(a1_RegP str1, a2_RegI cnt1, a3_RegP str2, a4_RegI cnt2, + a0_RegI result, rRegI tmp1, rRegI tmp2, rRegI tmp3, + rRegI tmp4, rRegI tmp5, rRegI tmp6, rFlagsReg cr) +%{ + predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU); + match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); + effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, + TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr); + format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UU)" %} + + ins_encode %{ + __ string_indexof($str1$$Register, $str2$$Register, + $cnt1$$Register, $cnt2$$Register, + $tmp1$$Register, $tmp2$$Register, + $tmp3$$Register, $tmp4$$Register, + $tmp5$$Register, $tmp6$$Register, + -1, $result$$Register, StrIntrinsicNode::UU); + %} + ins_pipe(pipe_slow); +%} + +instruct string_indexofLL(a1_RegP str1, a2_RegI cnt1, a3_RegP str2, a4_RegI cnt2, + a0_RegI result, rRegI tmp1, rRegI tmp2, rRegI tmp3, + rRegI tmp4, rRegI tmp5, rRegI tmp6, rFlagsReg cr) +%{ + predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL); + match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); + effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, + TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr); + format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LL)" %} + + ins_encode %{ + __ string_indexof($str1$$Register, $str2$$Register, + $cnt1$$Register, $cnt2$$Register, + $tmp1$$Register, $tmp2$$Register, + $tmp3$$Register, $tmp4$$Register, + $tmp5$$Register, $tmp6$$Register, + -1, $result$$Register, StrIntrinsicNode::LL); + %} + ins_pipe(pipe_slow); +%} + +instruct string_indexofUL(a1_RegP str1, a2_RegI cnt1, a3_RegP str2, a4_RegI cnt2, + a0_RegI result, rRegI tmp1, rRegI tmp2, rRegI tmp3, + rRegI tmp4, rRegI tmp5, rRegI tmp6, rFlagsReg cr) +%{ + predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL); + match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); + effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, + TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr); + format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UL)" %} + + ins_encode %{ + __ string_indexof($str1$$Register, $str2$$Register, + $cnt1$$Register, $cnt2$$Register, + $tmp1$$Register, $tmp2$$Register, + $tmp3$$Register, $tmp4$$Register, + $tmp5$$Register, $tmp6$$Register, + -1, $result$$Register, StrIntrinsicNode::UL); + %} + ins_pipe(pipe_slow); +%} + +instruct string_indexof_conUU(a1_RegP str1, a4_RegI cnt1, a3_RegP str2, + immI_le_4 int_cnt2, a0_RegI result, rRegI tmp1, rRegI tmp2, rRegI tmp3, + rRegI tmp4, rFlagsReg cr) +%{ + predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU); + match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); + effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, + TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); + format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UU)" %} + + ins_encode %{ + int icnt2 = (int)$int_cnt2$$constant; + __ string_indexof($str1$$Register, $str2$$Register, + $cnt1$$Register, zr, + $tmp1$$Register, $tmp2$$Register, + $tmp3$$Register, $tmp4$$Register, zr, zr, + icnt2, $result$$Register, StrIntrinsicNode::UU); + %} + ins_pipe(pipe_slow); +%} + +instruct string_indexof_conLL(a1_RegP str1, a4_RegI cnt1, a3_RegP str2, + immI_le_4 int_cnt2, a0_RegI result, rRegI tmp1, rRegI tmp2, rRegI tmp3, + rRegI tmp4, rFlagsReg cr) +%{ + predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL); + match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); + effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, + TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); + format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LL)" %} + + ins_encode %{ + int icnt2 = (int)$int_cnt2$$constant; + __ string_indexof($str1$$Register, $str2$$Register, + $cnt1$$Register, zr, + $tmp1$$Register, $tmp2$$Register, + $tmp3$$Register, $tmp4$$Register, zr, zr, + icnt2, $result$$Register, StrIntrinsicNode::LL); + %} + ins_pipe(pipe_slow); +%} + +instruct string_indexof_conUL(a1_RegP str1, a4_RegI cnt1, a3_RegP str2, + immI_le_4 int_cnt2, a0_RegI result, rRegI tmp1, rRegI tmp2, rRegI tmp3, + rRegI tmp4, rFlagsReg cr) +%{ + predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL); + match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); + effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, + TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); + format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UL)" %} + + ins_encode %{ + int icnt2 = (int)$int_cnt2$$constant; + __ string_indexof($str1$$Register, $str2$$Register, + $cnt1$$Register, zr, + $tmp1$$Register, $tmp2$$Register, + $tmp3$$Register, $tmp4$$Register, zr, zr, + icnt2, $result$$Register, StrIntrinsicNode::UL); + %} + ins_pipe(pipe_slow); +%} + +instruct string_indexofU_char(a1_RegP str1, a2_RegI cnt1, a3_RegI ch, + a0_RegI result, rRegI tmp1, rRegI tmp2, + rRegI tmp3, rFlagsReg cr) +%{ + match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); + effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, + TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + + format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result" %} + + ins_encode %{ + __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, + $result$$Register, $tmp1$$Register, $tmp2$$Register, + $tmp3$$Register); + %} + ins_pipe(pipe_slow); +%}*/ +//TODO:need implement jzy +/* +instruct string_equalsL(a1_RegP str1, a3_RegP str2, a4_RegI cnt, + a0_RegI result, rFlagsReg cr) +%{ + predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL); + match(Set result (StrEquals (Binary str1 str2) cnt)); + effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr); + + format %{ "String Equals $str1,$str2,$cnt -> $result" %} + ins_encode %{ + // Count is in 8-bit bytes; non-Compact chars are 16 bits. + __ string_equals($str1$$Register, $str2$$Register, + $result$$Register, $cnt$$Register, 1); + %} + ins_pipe(pipe_slow); +%} + +instruct string_equalsU(a1_RegP str1, a3_RegP str2, a4_RegI cnt, + a0_RegI result, rFlagsReg cr) +%{ + predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU); + match(Set result (StrEquals (Binary str1 str2) cnt)); + effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr); + + format %{ "String Equals $str1,$str2,$cnt -> $result" %} + ins_encode %{ + // Count is in 8-bit bytes; non-Compact chars are 16 bits. + __ string_equals($str1$$Register, $str2$$Register, + $result$$Register, $cnt$$Register, 2); + %} + ins_pipe(pipe_slow); +%} + +instruct array_equalsB(a1_RegP ary1, a2_RegP ary2, a0_RegI result, + a3_RegP tmp1, a4_RegP tmp2, a5_RegP tmp3, + t10_RegP tmp, rFlagsReg cr) +%{ + predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); + match(Set result (AryEq ary1 ary2)); + effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + + format %{ "Array Equals $ary1,ary2 -> $result // KILL $tmp" %} + ins_encode %{ + __ arrays_equals($ary1$$Register, $ary2$$Register, + $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, + $result$$Register, $tmp$$Register, 1); + %} + ins_pipe(pipe_slow); +%} + +instruct array_equalsC(a1_RegP ary1, a2_RegP ary2, a0_RegI result, + a3_RegP tmp1, a4_RegP tmp2, a5_RegP tmp3, + t10_RegP tmp, rFlagsReg cr) +%{ + predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); + match(Set result (AryEq ary1 ary2)); + effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + + format %{ "Array Equals $ary1,ary2 -> $result // KILL $tmp" %} + ins_encode %{ + __ arrays_equals($ary1$$Register, $ary2$$Register, + $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, + $result$$Register, $tmp$$Register, 2); + %} + ins_pipe(pipe_slow); +%} +*/ +instruct has_negatives(a1_RegP ary1, a2_RegI len, v0_RegI result, rFlagsReg cr) +%{ + match(Set result (HasNegatives ary1 len)); + effect(USE_KILL ary1, USE_KILL len, KILL cr); + format %{ "has negatives byte[] $ary1,$len -> $result" %} + ins_encode %{ + __ has_negatives($ary1$$Register, $len$$Register, $result$$Register); + %} + ins_pipe( pipe_slow ); +%} + +// fast char[] to byte[] compression TODO:jzy +/*instruct string_compress(a2_RegP src, a1_RegP dst, a3_RegI len, + f27_RegD tmp1, f28_RegD tmp2, f29_RegD tmp3, f30_RegD tmp4, + a0_RegI result, rFlagsReg cr) +%{ + match(Set result (StrCompressedCopy src (Binary dst len))); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); + + format %{ "String Compress $src,$dst -> $result // KILL R1, R2, R3, R4" %} + ins_encode %{ + __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, + $tmp1$$FloatRegister, $tmp2$$FloatRegister, + $tmp3$$FloatRegister, $tmp4$$FloatRegister, + $result$$Register); + %} + ins_pipe( pipe_slow ); +%}*/ + +// fast byte[] to char[] inflation TODO:should implement jzy +/*instruct string_inflate(Universe dummy, a0_RegP src, a1_RegP dst, a2_RegI len, + f27_RegD tmp1, f28_RegD tmp2, f29_RegD tmp3, f30_RegD tmp4, rFlagsReg cr) +%{ + match(Set dummy (StrInflatedCopy src (Binary dst len))); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); + + format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %} + ins_encode %{ + __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, + $tmp1$$FloatRegister, $tmp2$$FloatRegister, $tmp3$$FloatRegister, $tmp4$$Register); + %} + ins_pipe(pipe_slow); +%} + +// encode char[] to byte[] in ISO_8859_1 +instruct encode_iso_array(a2_RegP src, a1_RegP dst, a3_RegI len, + f27_RegD Vtmp1, f28_RegD Vtmp2, f29_RegD Vtmp3, f30_RegD Vtmp4, + a0_RegI result, rFlagsReg cr) +%{ + predicate(!((EncodeISOArrayNode*)n)->is_ascii()); + match(Set result (EncodeISOArray src (Binary dst len))); + effect(USE_KILL src, USE_KILL dst, USE_KILL len, + KILL Vtmp1, KILL Vtmp2, KILL Vtmp3, KILL Vtmp4, KILL cr); + + format %{ "Encode array $src,$dst,$len -> $result" %} + ins_encode %{ + __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, + $result$$Register, $Vtmp1$$FloatRegister, $Vtmp2$$FloatRegister, + $Vtmp3$$FloatRegister, $Vtmp4$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%}*/ + +/* + +//----------Overflow Math Instructions----------------------------------------- + +instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2) +%{ + match(Set cr (OverflowAddI op1 op2)); + effect(DEF cr, USE_KILL op1, USE op2); + + format %{ "addl $op1, $op2\t# overflow check int" %} + + ins_encode %{ + __ addl($op1$$Register, $op2$$Register); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2) +%{ + match(Set cr (OverflowAddI op1 op2)); + effect(DEF cr, USE_KILL op1, USE op2); + + format %{ "addl $op1, $op2\t# overflow check int" %} + + ins_encode %{ + __ addl($op1$$Register, $op2$$constant); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2) +%{ + match(Set cr (OverflowAddL op1 op2)); + effect(DEF cr, USE_KILL op1, USE op2); + + format %{ "addq $op1, $op2\t# overflow check long" %} + ins_encode %{ + __ addq($op1$$Register, $op2$$Register); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2) +%{ + match(Set cr (OverflowAddL op1 op2)); + effect(DEF cr, USE_KILL op1, USE op2); + + format %{ "addq $op1, $op2\t# overflow check long" %} + ins_encode %{ + __ addq($op1$$Register, $op2$$constant); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2) +%{ + match(Set cr (OverflowSubI op1 op2)); + + format %{ "cmpl $op1, $op2\t# overflow check int" %} + ins_encode %{ + __ cmpl($op1$$Register, $op2$$Register); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2) +%{ + match(Set cr (OverflowSubI op1 op2)); + + format %{ "cmpl $op1, $op2\t# overflow check int" %} + ins_encode %{ + __ cmpl($op1$$Register, $op2$$constant); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2) +%{ + match(Set cr (OverflowSubL op1 op2)); + + format %{ "cmpq $op1, $op2\t# overflow check long" %} + ins_encode %{ + __ cmpq($op1$$Register, $op2$$Register); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2) +%{ + match(Set cr (OverflowSubL op1 op2)); + + format %{ "cmpq $op1, $op2\t# overflow check long" %} + ins_encode %{ + __ cmpq($op1$$Register, $op2$$constant); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct overflowNegI_rReg(rFlagsReg cr, immI0 zero, rax_RegI op2) +%{ + match(Set cr (OverflowSubI zero op2)); + effect(DEF cr, USE_KILL op2); + + format %{ "negl $op2\t# overflow check int" %} + ins_encode %{ + __ negl($op2$$Register); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2) +%{ + match(Set cr (OverflowSubL zero op2)); + effect(DEF cr, USE_KILL op2); + + format %{ "negq $op2\t# overflow check long" %} + ins_encode %{ + __ negq($op2$$Register); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2) +%{ + match(Set cr (OverflowMulI op1 op2)); + effect(DEF cr, USE_KILL op1, USE op2); + + format %{ "imull $op1, $op2\t# overflow check int" %} + ins_encode %{ + __ imull($op1$$Register, $op2$$Register); + %} + ins_pipe(ialu_reg_reg_alu0); +%} + +instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp) +%{ + match(Set cr (OverflowMulI op1 op2)); + effect(DEF cr, TEMP tmp, USE op1, USE op2); + + format %{ "imull $tmp, $op1, $op2\t# overflow check int" %} + ins_encode %{ + __ imull($tmp$$Register, $op1$$Register, $op2$$constant); + %} + ins_pipe(ialu_reg_reg_alu0); +%} + +instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2) +%{ + match(Set cr (OverflowMulL op1 op2)); + effect(DEF cr, USE_KILL op1, USE op2); + + format %{ "imulq $op1, $op2\t# overflow check long" %} + ins_encode %{ + __ imulq($op1$$Register, $op2$$Register); + %} + ins_pipe(ialu_reg_reg_alu0); +%} + +instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp) +%{ + match(Set cr (OverflowMulL op1 op2)); + effect(DEF cr, TEMP tmp, USE op1, USE op2); + + format %{ "imulq $tmp, $op1, $op2\t# overflow check long" %} + ins_encode %{ + __ imulq($tmp$$Register, $op1$$Register, $op2$$constant); + %} + ins_pipe(ialu_reg_reg_alu0); +%} +*/ + +/* Cmpxxx useless in SW64 +//----------Control Flow Instructions------------------------------------------ +// Signed compare Instructions + +// XXX more variants!! +instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2) +%{ + match(Set cr (CmpI op1 op2)); + effect(DEF cr, USE op1, USE op2); + + format %{ "cmpw $op1, $op2\t@compI_rReg" %} + + ins_encode %{ + __ cmpw($op1$$Register, $op2$$Register, $cr$$Register); + %} + //ins_pipe(ialu_cr_reg_reg); +%} + +instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2) +%{ + match(Set cr (CmpI op1 op2)); + + format %{ "cmpw $op1, $op2\t@compI_rReg_imm" %} + + ins_encode %{ + __ mov_immediate32(rscratch3, $op2$$constant); + __ cmpw($op1$$Register, rscratch3, $cr$$Register); + %} + //ins_pipe(ialu_cr_reg_imm); +%} + +instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2) +%{ + match(Set cr (CmpI op1 (LoadI op2))); + + ins_cost(500); // XXX + format %{ "cmpw $op1, $op2\t@compI_rReg_mem" %} + + ins_encode %{ + __ ldws(rscratch2_AT, $op2$$Address); + __ cmpw($op1$$Register, rscratch2_AT, $cr$$Register); + %} + //ins_pipe(ialu_cr_reg_mem); +%} + +instruct testI_reg(rFlagsReg cr, rRegI src, immI0 zero) +%{ + match(Set cr (CmpI src zero)); + + format %{ "testw $src, $src\t@testI_reg" %} + ins_encode %{ + __ testw($src$$Register, $src$$Register, $cr$$Register); + %} + //ins_pipe(ialu_cr_reg_imm); +%} + +instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI0 zero) +%{ + match(Set cr (CmpI (AndI src con) zero)); + + format %{ "testl $src, $con\t@testI_reg_imm" %} + ins_encode %{ + __ testw($src$$Register, $con$$constant, $cr$$Register); + %} + //ins_pipe(ialu_cr_reg_imm); +%} + +instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI0 zero) +%{ + match(Set cr (CmpI (AndI src (LoadI mem)) zero)); + + format %{ "testl $src, $mem\t@testI_reg_mem" %} + ins_encode %{ + __ ldws(rscratch2_AT, $mem$$Address); + __ testw($src$$Register, rscratch2_AT, $cr$$Register); + %} + //ins_pipe(ialu_cr_reg_mem); +%} + +// Unsigned compare Instructions; really, same as signed except they +// produce an rFlagsRegU instead of rFlagsReg. +instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2) +%{ + match(Set cr (CmpU op1 op2)); + + format %{ "cmpw $op1, $op2\t# unsigned\t@compU_rReg" %} + ins_encode %{ + __ cmpw($op1$$Register, $op2$$Register, $cr$$Register); + __ stop("need add unsigned instruct: jzy?"); + %} + //ins_pipe(ialu_cr_reg_reg); +%} + +instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2) +%{ + match(Set cr (CmpU op1 op2)); + + format %{ "cmpl $op1, $op2\t# unsigned\t@compU_rReg_imm" %} + ins_encode %{ + __ mov_immediate32(rscratch2_AT, $op2$$constant); + __ cmpw($op1$$Register, rscratch2_AT, $cr$$Register); + __ stop("need add unsigned instruct: jzy?"); + %} + //ins_pipe(ialu_cr_reg_imm); +%} + +instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2) +%{ + match(Set cr (CmpU op1 (LoadI op2))); + + ins_cost(500); // XXX + format %{ "cmpl $op1, $op2\t# unsigned\t@compU_rReg_mem" %} + ins_encode %{ + __ ldws(rscratch2_AT, $op2$$Address); + __ cmpw($op1$$Register, rscratch2_AT, $cr$$Register); + __ stop("need add unsigned instruct: jzy?"); + %} + //ins_pipe(ialu_cr_reg_mem); +%} + +// // // Cisc-spilled version of cmpU_rReg +// //instruct compU_mem_rReg(rFlagsRegU cr, memory op1, rRegI op2) +// //%{ +// // match(Set cr (CmpU (LoadI op1) op2)); +// // +// // format %{ "CMPu $op1,$op2" %} +// // ins_cost(500); +// // opcode(0x39); +// // ins_encode( OpcP, reg_mem( op1, op2) ); +// //%} + +instruct testU_reg(rFlagsRegU cr, rRegI src, immI0 zero) +%{ + match(Set cr (CmpU src zero)); + + format %{ "testw $src, $src\t# unsigned\t@testU_reg" %} + ins_encode %{ + __ testw($src$$Register, $src$$Register, $cr$$Register); + %} + //ins_pipe(ialu_cr_reg_imm); +%} + +instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2) +%{ + match(Set cr (CmpP op1 op2)); + + format %{ "cmpptr $op1, $op2\t# ptr\t@compP_rReg" %} + ins_encode %{ + __ cmpptr($op1$$Register, $op2$$Register, $cr$$Register); + %} + //ins_pipe(ialu_cr_reg_reg); +%} + +instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2) +%{ + match(Set cr (CmpP op1 (LoadP op2))); + + ins_cost(500); // XXX + format %{ "cmpptr $op1, $op2\t# ptr\t@compP_rReg_mem" %} + ins_encode %{ + __ ldws(rscratch2_AT, $op2$$Address); + __ cmpptr($op1$$Register, rscratch2_AT, $cr$$Register); + %} + //ins_pipe(ialu_cr_reg_mem); +%} + +// // // Cisc-spilled version of cmpP_rReg +// //instruct compP_mem_rReg(rFlagsRegU cr, memory op1, rRegP op2) +// //%{ +// // match(Set cr (CmpP (LoadP op1) op2)); +// // +// // format %{ "CMPu $op1,$op2" %} +// // ins_cost(500); +// // opcode(0x39); +// // ins_encode( OpcP, reg_mem( op1, op2) ); +// //%} + +// XXX this is generalized by compP_rReg_mem??? +// Compare raw pointer (used in out-of-heap check). +// Only works because non-oop pointers must be raw pointers +// and raw pointers have no anti-dependencies. +instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2) +%{ + predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none);//TODO:jzy? + match(Set cr (CmpP op1 (LoadP op2))); + + format %{ "cmpptr $op1, $op2\t# raw ptr\t@compP_mem_rReg" %} + ins_encode %{ + __ ldws(rscratch2_AT, $op2$$Address); + __ cmpptr($op1$$Register, rscratch2_AT, $cr$$Register); + %} + //ins_pipe(ialu_cr_reg_mem); +%} + +// This will generate a signed flags result. This should be OK since +// any compare to a zero should be eq/neq. +instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero) +%{ + match(Set cr (CmpP src zero)); + + format %{ "testptr $src, $src\t# ptr\t@testP_reg" %} + ins_encode %{ + __ testptr($src$$Register, $src$$Register, $cr$$Register); + %} + //ins_pipe(ialu_cr_reg_imm); +%} + +// This will generate a signed flags result. This should be OK since +// any compare to a zero should be eq/neq. +instruct testP_mem(rFlagsReg cr, memory op, immP0 zero) +%{ + predicate(!UseCompressedOops || (CompressedOops::base()!= NULL)); + match(Set cr (CmpP (LoadP op) zero)); + + ins_cost(500); // XXX + format %{ "testq $op, 0xffffffffffffffff\t# ptr\t@testP_mem" %} + ins_encode %{ + __ ldptr(rscratch2_AT, $op$$Address); + __ testptr(rscratch2_AT, 0xFFFFFFFF, $cr$$Register); + __ stop("0xFFFFFFFF or 0xffffffffffffffff jzy?"); + %} + //ins_pipe(ialu_cr_reg_imm); +%} + +instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero) +%{ + predicate(UseCompressedOops && (CompressedOops::base() == NULL)); + match(Set cr (CmpP (LoadP mem) zero)); + + format %{ "cmpq R0, $mem\t# ptr (rheapbase==0)\t@testP_mem_reg0" %} + ins_encode %{ + __ ldptr(rscratch2_AT, $mem$$Address); + __ cmpptr(rscratch2_AT, 0, $cr$$Register); + %} + //ins_pipe(ialu_cr_reg_mem); +%} + +instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2) +%{ + match(Set cr (CmpN op1 op2)); + + format %{ "cmpw $op1, $op2\t# compressed ptr\t@compN_rReg" %} + ins_encode %{ + __ cmpw($op1$$Register, $op2$$Register, $cr$$Register); + %} + //ins_pipe(ialu_cr_reg_reg); +%} + +instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem) +%{ + match(Set cr (CmpN src (LoadN mem))); + + format %{ "cmpw $src, $mem\t# compressed ptr\t@compN_rReg_mem" %} + ins_encode %{ + __ ldwu(rscratch2_AT, $mem$$Address); + __ cmpw($src$$Register, rscratch2_AT, $cr$$Register); + %} + //ins_pipe(ialu_cr_reg_mem); +%} + +instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{ + match(Set cr (CmpN op1 op2)); + + format %{ "cmpw $op1, $op2\t# compressed ptr\t@compN_rReg_imm" %} + ins_encode %{ + __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant, $cr$$Register); + %} + //ins_pipe(ialu_cr_reg_imm); +%} + +instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src) +%{ + match(Set cr (CmpN src (LoadN mem))); + + format %{ "cmpw $mem, $src\t# compressed ptr\t@compN_mem_imm" %} + ins_encode %{ + __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant, $cr$$Register); + %} + //ins_pipe(ialu_cr_reg_mem); +%} + +instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{ + match(Set cr (CmpN op1 op2)); + + format %{ "cmpw $op1, $op2\t# compressed klass ptr\t@compN_rReg_imm_klass" %} + ins_encode %{ + __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant, $cr$$Register); + %} + //ins_pipe(ialu_cr_reg_imm); +%} + +instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src) +%{ + match(Set cr (CmpN src (LoadNKlass mem))); + + format %{ "cmpw $mem, $src\t# compressed klass ptr\t@compN_mem_imm_klass" %} + ins_encode %{ + __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant, $cr$$Register); + %} + //ins_pipe(ialu_cr_reg_mem); +%} + +instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{ + match(Set cr (CmpN src zero)); + + format %{ "testw $src, $src\t# compressed ptr\t@testN_reg" %} + ins_encode %{ __ testw($src$$Register, $src$$Register, $cr$$Register); %} + //ins_pipe(ialu_cr_reg_imm); +%} + +instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero) +%{ + predicate(CompressedOops::base() != NULL); + match(Set cr (CmpN (LoadN mem) zero)); + + ins_cost(500); // XXX + format %{ "testw $mem, 0xffffffff\t# compressed ptr\t@testN_mem" %} + ins_encode %{ + __ ldwu(rscratch2_AT, $mem$$Address); + __ cmpw(rscratch2_AT, (int)0xFFFFFFFF, $cr$$Register); + %} + //ins_pipe(ialu_cr_reg_mem); +%} + +instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero) +%{ + predicate(CompressedOops::base() == NULL); + match(Set cr (CmpN (LoadN mem) zero)); + + format %{ "cmpl R12, $mem\t# compressed ptr (R12_heapbase==0)\t@testN_mem_reg0" %} + ins_encode %{ + __ ldwu(rscratch2_AT, $mem$$Address); + __ cmpw(R0, rscratch2_AT, $cr$$Register); + %} + //ins_pipe(ialu_cr_reg_mem); +%} + +// Yanked all unsigned pointer compare operations. +// Pointer compares are done with CmpP which is already unsigned. + +instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2) +%{ + match(Set cr (CmpL op1 op2)); + + format %{ "cmpl $op1, $op2\t@compL_rReg" %} + ins_encode %{ + __ cmpl($op1$$Register, $op2$$Register, $cr$$Register); + %} + //ins_pipe(ialu_cr_reg_reg); +%} + +instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2) +%{ + match(Set cr (CmpL op1 op2)); + + format %{ "cmpl $op1, $op2\t@compL_rReg_imm" %} + ins_encode %{ + __ cmpl($op1$$Register, (int)$op2$$constant, $cr$$Register); + __ stop("immL32's length is OK? jzy"); + %} + //ins_pipe(ialu_cr_reg_imm); +%} + +instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2) +%{ + match(Set cr (CmpL op1 (LoadL op2))); + + format %{ "cmpl $op1, $op2\t@compL_rReg_mem" %} + ins_encode %{ + __ ldl(rscratch2_AT, $op2$$Address); + __ cmpl($op1$$Register, rscratch2_AT, $cr$$Register); + %} + //ins_pipe(ialu_cr_reg_mem); +%} + +instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero) +%{ + match(Set cr (CmpL src zero)); + + format %{ "testl $src, $src\t@testL_reg" %} + ins_encode %{ + __ testl($src$$Register, $src$$Register, $cr$$Register); + %} + //ins_pipe(ialu_cr_reg_imm); +%} + +instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero) +%{ + match(Set cr (CmpL (AndL src con) zero)); + + format %{ "testl $src, $con\t# long\t@testL_reg_imm" %} + ins_encode %{ + __ testl($src$$Register, (int)$con$$constant, $cr$$Register); + %} + //ins_pipe(ialu_cr_reg_imm); +%} + +instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero) +%{ + match(Set cr (CmpL (AndL src (LoadL mem)) zero)); + + format %{ "testl $src, $mem\t@testL_reg_mem" %} + ins_encode %{ + __ ldl(rscratch2_AT, $mem$$Address); + __ testl($src$$Register, rscratch2_AT, $cr$$Register); + %} + //ins_pipe(ialu_cr_reg_mem); +%} + +instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero) +%{ + match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero)); + + format %{ "testl $src, $mem\t@testL_reg_mem2" %} + ins_encode %{ + __ ldl(rscratch2_AT, $mem$$Address); + __ testl($src$$Register, rscratch2_AT, $cr$$Register); + %} + //ins_pipe(ialu_cr_reg_mem); +%} +*/ +// Manifest a CmpL result in an integer register. +// (src1 < src2) ? -1 : ((src1 > src2) ? 1 : 0) +instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2) %{ + match(Set dst (CmpL3 src1 src2)); + ins_cost(1000); + format %{ "cmpL3 $dst, $src1, $src2 @ cmpL3_reg_reg" %} + ins_encode %{ + Register opr1 = $src1$$Register; + Register opr2 = $src2$$Register; + Register dst = $dst$$Register; + + Label done; + __ subl(opr1, opr2, rscratch3); + __ subl(R0, 1, dst); + __ blt_l(rscratch3, done); + + __ selgt(rscratch3, 1, R0, dst); + __ BIND(done); + +// __ cmpl(opr1, opr2, rcc); +// __ ldi(rscratch3, -1, R0); +// __ sellt(rcc, rscratch3, R0, dst); +// __ selgt(rcc, 1, dst, dst); + + +// Label done; +// __ cmplt(opr2, opr1, dst); +// __ jcc(Assembler::neq, done); +// __ cmpeq(opr1, opr2, rcc); +// __ ldi(rscratch3, -1); +// __ seleq(rcc, rscratch3, R0, dst); +// __ bind(done); + %} + ins_pipe( pipe_slow ); +%} +// +// less_rsult = -1 +// greater_result = 1 +// equal_result = 0 +// nan_result = -1 +// +instruct cmpF3_reg_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr) %{ + match(Set dst (CmpF3 src1 src2)); + effect(KILL cr); + + //ins_cost(1000); + format %{ "cmpF3 $dst, $src1, $src2 @ cmpF3_reg_reg" %} + ins_encode %{ + FloatRegister src1 = as_FloatRegister($src1$$reg); + FloatRegister src2 = as_FloatRegister($src2$$reg); + Register dst = as_Register($dst$$reg); + + Label Done; + + __ ldi(dst, -1, R0); + __ c_ult_s(src1, src2); + __ fbne(fcc, 4); + + __ movl(dst, R0); + __ c_eq_s(src1, src2); + __ fbne(fcc, 1); + __ ldi(dst, 1, R0); + + __ bind(Done); + %} + ins_pipe( pipe_slow ); +%} + +instruct cmpD3_reg_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr) %{ + match(Set dst (CmpD3 src1 src2)); + effect(KILL cr); + + //ins_cost(1000); + format %{ "cmpD3 $dst, $src1, $src2 @ cmpD3_reg_reg" %} + ins_encode %{ + FloatRegister src1 = as_FloatRegister($src1$$reg); + FloatRegister src2 = as_FloatRegister($src2$$reg); + Register dst = as_Register($dst$$reg); + + Label Done; + + __ ldi(dst, -1, R0); + __ c_ult_d(src1, src2); + __ fbne(fcc, 4); + + __ movl(dst, R0); + __ c_eq_d(src1, src2); + __ fbne(fcc, 1); + __ ldi(dst, 1, R0); + + __ bind(Done); + %} + ins_pipe( pipe_slow ); +%} + +// Unsigned long compare Instructions; really, same as signed long except they +// produce an rFlagsRegU instead of rFlagsReg. +/*instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2) +%{ + match(Set cr (CmpUL op1 op2)); + + format %{ "cmpq $op1, $op2\t# unsigned\t@compUL_rReg" %} + ins_encode %{ + __ cmpUL($op1$$Register, $op2$$Register, $cr$$Register); + %} + ins_pipe(ialu_regL_regL); +%}*/ + +/*Cmpxxx useless in SW64 +instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2) +%{ + match(Set cr (CmpUL op1 op2)); + + format %{ "cmpl $op1, $op2\t# unsigned\t@compUL_rReg_imm" %} + ins_encode %{ + __ mov_immediate32(rscratch2_AT, (int)$op2$$constant); + __ cmpl($op1$$Register, rscratch2_AT, $cr$$Register); + __ stop("need unsigned edition of cmpw/cmpl? jzy"); + %} + //ins_pipe(ialu_cr_reg_imm); +%} + +instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2) +%{ + match(Set cr (CmpUL op1 (LoadL op2))); + + format %{ "cmpq $op1, $op2\t# unsigned\t@compUL_rReg_mem" %} + ins_encode %{ + __ ldl(rscratch2_AT, $op2$$Address); + __ cmpl($op1$$Register, rscratch2_AT, $cr$$Register); + __ stop("need unsigned edition of cmpw/cmpl? jzy"); + %} + //ins_pipe(ialu_cr_reg_mem); +%} + +instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero) +%{ + match(Set cr (CmpUL src zero)); + + format %{ "testq $src, $src\t# unsigned\t@testUL_reg" %} + ins_encode %{ + __ testl($src$$Register, $src$$Register, $cr$$Register); + %} + //ins_pipe(ialu_cr_reg_imm); +%} + +instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm) +%{ + match(Set cr (CmpI (LoadB mem) imm)); + + ins_cost(125); + format %{ "cmpb $mem, $imm\t@compB_mem_imm" %} + ins_encode %{ + __ cmpb($mem$$Address, $imm$$constant, $cr$$Register); + %} + //ins_pipe(ialu_cr_reg_mem); +%} + +instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI0 zero) +%{ + match(Set cr (CmpI (AndI (LoadB mem) imm) zero)); + + ins_cost(125); + format %{ "testb $mem, $imm\t@testB_mem_imm" %} + ins_encode %{ __ testb($mem$$Address, $imm$$constant, $cr$$Register); %} + //ins_pipe(ialu_cr_reg_mem); +%}*/ + +//----------Max and Min-------------------------------------------------------- +// Min Instructions + +instruct minI_Reg_Reg(rRegI dst, rRegI src) %{ + match(Set dst (MinI dst src)); + //effect(KILL flags); + ins_cost(200); + + format %{ "MIN $dst, $src @minI_Reg_Reg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + + __ cmplt(src, dst, rscratch3); + __ selne(rscratch3, src, dst, dst); + %} + + ins_pipe( pipe_slow ); +%} + + // Max Register with Register (generic version) +instruct maxI_Reg_Reg(rRegI dst, rRegI src) %{ + match(Set dst (MaxI dst src)); + ins_cost(80); + + format %{ "MAX $dst, $src @maxI_Reg_Reg" %} + + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + + __ cmplt(dst, src, rscratch3); + __ selne(rscratch3, src, dst, dst); + + %} + + ins_pipe( pipe_slow ); +%} + +instruct maxI_Reg_zero(rRegI dst, immI0 zero) %{ + match(Set dst (MaxI dst zero)); + ins_cost(50); + + format %{ "MAX $dst, 0 @maxI_Reg_zero" %} + + ins_encode %{ + Register dst = $dst$$Register; + + __ cmplt(dst, R0, rscratch3); + __ selne(rscratch3, R0, dst, dst); + + %} + + ins_pipe( pipe_slow ); +%} + +// ============================================================================ +// Branch Instructions + +// Jump Direct - Label defines a relative address from JMP+1 +instruct jmpDir(label labl) +%{ + match(Goto); + effect(USE labl); + + ins_cost(300); + format %{ "jmp $labl\t@jmpDir" %} + //size(5); + ins_encode %{ + Label* L = $labl$$label; + __ jmp(*L); // Always long jump + %} + ins_pipe(pipe_jmp); //CHECK TODO djx + ins_pc_relative(1); +%} + +// Jump Direct Conditional - Label defines a relative address from Jcc+1 +instruct jmpLoopEnd(cmpOp cop, rRegI src1, rRegI src2, label labl) %{ + match(CountedLoopEnd cop (CmpI src1 src2)); + effect(USE labl); + + ins_cost(300); + format %{ "J$cop $src1, $src2, $labl\t# Loop end @ jmpLoopEnd" %} + ins_encode %{ + Register op1 = $src1$$Register; + Register op2 = $src2$$Register; + Label *L = $labl$$label; + int flag = $cop$$cmpcode; + +// __ cmpl(op1, op2); +// __ jcc((Assembler::Condition)flag, *L); + __ cmpls(flag, op1, op2); + __ bne_l(rcc, *L); + %} + ins_pipe( pipe_jmp ); + ins_pc_relative(1); +%} + + instruct jmpLoopEnd_reg_imm16(cmpOp cop, rRegI src1, immI16_sub src2, label labl) %{ + match(CountedLoopEnd cop (CmpI src1 src2)); + effect(USE labl); + + ins_cost(150); + format %{ "J$cop $src1, $src2, $labl\t# Loop end @ jmpLoopEnd_reg_imm_16_sub" %} + ins_encode %{ + Register op1 = $src1$$Register; + int val = $src2$$constant; + Label *L = $labl$$label; + int flag = $cop$$cmpcode; + + //__ ldi(rscratch1_GP, -1 * val, op1); + //__ jcc((Assembler::Condition)flag, *L, rscratch1_GP); + + __ cmpw(op1, val); + __ jcc((Assembler::Condition)flag, *L); + %} + ins_pipe( pipe_jmp ); + ins_pc_relative(1); +%} + + instruct jmpLoopEnd_reg_immI(cmpOp cop, rRegI src1, immI src2, label labl) %{ + match(CountedLoopEnd cop (CmpI src1 src2)); + effect(USE labl); + + ins_cost(300); + format %{ "J$cop $src1, $src2, $labl\t# Loop end @ jmpLoopEnd_reg_immI" %} + ins_encode %{ + Register op1 = $src1$$Register; + Register op2 = rscratch3; + Label *L = $labl$$label; + int flag = $cop$$cmpcode; + + __ mov_immediate32s(op2, $src2$$constant); + __ cmpls(flag, op1, op2); + __ bne_l(rcc, *L); + %} + ins_pipe( pipe_jmp ); + ins_pc_relative(1); +%} + + instruct jmpLoopEnd_reg_immI0(cmpOp cop, rRegI src1, immI0 src2, label labl) %{ + match( CountedLoopEnd cop (CmpI src1 src2) ); + effect(USE labl); + ins_cost(170); + format %{ "J$cop $src1, $src2, $labl\t# Loop end @ jmpLoopEnd_reg_imm0_short" %} + + ins_encode %{ + Register op1 = $src1$$Register; + Label *L = $labl$$label; + int flag = $cop$$cmpcode; + + __ jcc((Assembler::Condition)flag, *L, op1); + %} + + ins_pipe( pipe_jmp ); + ins_pc_relative(1); +//ZLONG ins_short_branch(1); +%} + +// This match pattern is created for StoreIConditional since I cannot match IfNode without a RegFlags! +instruct jmpCon_flags(cmpOp cop, rFlagsReg cr, label labl) %{ + match(If cop cr); + effect(USE labl); + + ins_cost(300); + format %{ "J$cop $labl #sw64 uses GP as eflag @jmpCon_flags" %} + + ins_encode %{ + //__ stop("jmpCon_flags is for StoreIConditional?? lsp"); + Label *L = $labl$$label; + int flag = $cop$$cmpcode; + __ jcc((Assembler::Condition)flag, *L); + /*switch((Assembler::Condition)flag) + { + case Assembler::equal: //equal + __ bne_l($cr$$Register, *L); + break; + case Assembler::notEqual: //not equal + __ beq_l($cr$$Register, *L); + break; + default: + Unimplemented(); + }*/ + %} + + ins_pipe( pipe_jmp ); + ins_pc_relative(1); +%} + +//SW64:OKOK: +instruct branchConP_zero(cmpOpU cmp, rRegP op1, immP0 zero, label labl) %{ + match(If cmp (CmpP op1 zero)); + effect(USE labl); + + ins_cost(180); + format %{ "b$cmp $op1, R0, $labl #@branchConP_zero_short" %} + + ins_encode %{ + Register op1 = $op1$$Register; + Register op2 = R0; + Label * L = $labl$$label; + int flag = $cmp$$cmpcode; + + __ jcc((Assembler::Condition)flag, *L, op1); + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +//ZLONG ins_short_branch(1); +%} + +instruct branchConN2P_zero_short(cmpOpU cmp, rRegN op1, immP0 zero, label labl) %{ + match(If cmp (CmpP (DecodeN op1) zero)); + predicate(CompressedOops::base() == NULL && CompressedOops::shift() == 0); + effect(USE labl); + + ins_cost(180); + format %{ "b$cmp $op1, R0, $labl #@branchConN2P_zero_short" %} + + ins_encode %{ + Register op1 = $op1$$Register; + Register op2 = R0; + Label * L = $labl$$label; + int flag = $cmp$$cmpcode; + //__ stop("why only use beq&bne? sny"); + switch((Assembler::Condition)flag) + { + case Assembler::equal: //equal + __ beq_l(op1, *L); + break; + case Assembler::notEqual: //not_equal + __ bne_l(op1, *L); + break; + default: + Unimplemented(); + } + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +//ZLONG ins_short_branch(1); +%} + + +instruct branchConP_short(cmpOpU cmp, rRegP op1, rRegP op2, label labl) %{ + match(If cmp (CmpP op1 op2)); +// predicate(can_branch_register(_kids[0]->_leaf, _kids[1]->_leaf)); + effect(USE labl); + + ins_cost(200); + format %{ "b$cmp $op1, $op2, $labl #@branchConP_short" %} + + ins_encode %{ + Register op1 = $op1$$Register; + Register op2 = $op2$$Register; + Label * L = $labl$$label; + int flag = $cmp$$cmpcode; + + __ cmplu(flag, op1, op2); + __ bne_l(rcc, *L);//TODO: add default rcc jzy + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +//ZLONG ins_short_branch(1); +%} + +instruct cmpN_null_branch_short(cmpOp cmp, rRegN op1, immN0 null, label labl) %{ + match(If cmp (CmpN op1 null)); + effect(USE labl); + + ins_cost(180); + format %{ "CMP $op1,0\t! compressed ptr\n\t" + "BP$cmp $labl @ cmpN_null_branch_short" %} + ins_encode %{ + Register op1 = $op1$$Register; + Register op2 = R0; + Label * L = $labl$$label; + int flag = $cmp$$cmpcode; + + __ jcc((Assembler::Condition)flag, *L, op1); + %} + +//TODO: pipe_branchP or create pipe_branchN LEE + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +//ZLONG ins_short_branch(1); +%} + +instruct cmpN_reg_branch_short(cmpOpU cmp, rRegN op1, rRegN op2, label labl) %{ + match(If cmp (CmpN op1 op2)); + effect(USE labl); + + ins_cost(180); + format %{ "CMP $op1,$op2\t! compressed ptr\n\t" + "BP$cmp $labl @ cmpN_reg_branch_short" %} + ins_encode %{ + Register op1 = $op1$$Register; + Register op2 = $op2$$Register; + Label * L = $labl$$label; + int flag = $cmp$$cmpcode; + + __ cmplu(flag, op1, op2); + __ bne_l(rcc, *L); + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +//ZLONG ins_short_branch(1); +%} + +instruct branchConIU_reg_reg_short(cmpOpU cmp, rRegI src1, rRegI src2, label labl) %{ + match( If cmp (CmpU src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_reg_short" %} + + ins_encode %{ + Register op1 = $src1$$Register; + Register op2 = $src2$$Register; + Label * L = $labl$$label; + int flag = $cmp$$cmpcode; +// __ stop("check if op1 & op2 are unsigned //sny"); + __ cmplu((Assembler::Condition)flag, op1, op2); + __ bne_l(rcc, *L); + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +//ZLONG ins_short_branch(1); +%} + + +instruct branchConIU_reg_imm_short(cmpOpU cmp, rRegI src1, immI src2, label labl) %{ + match( If cmp (CmpU src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_imm_short" %} + + ins_encode %{ + Register op1 = $src1$$Register; + int val = $src2$$constant; + Label * L = $labl$$label; + int flag = $cmp$$cmpcode; + + __ mov_immediate32s(rscratch3, val); + __ cmplu((Assembler::Condition)flag, op1, rscratch3); + __ bne_l(rcc, *L); + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +%} + +instruct branchConI_reg_reg_short(cmpOp cmp, rRegI src1, rRegI src2, label labl) %{ + match( If cmp (CmpI src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_reg_short" %} + + ins_encode %{ + Register op1 = $src1$$Register; + Register op2 = $src2$$Register; + Label * L = $labl$$label; + int flag = $cmp$$cmpcode; + + __ cmpls(flag, op1, op2); + __ bne_l(rcc, *L); + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +%} + +instruct branchConI_reg_imm0_short(cmpOp cmp, rRegI src1, immI0 src2, label labl) %{ + match( If cmp (CmpI src1 src2) ); + effect(USE labl); + ins_cost(20); + format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_imm0_short" %} + + ins_encode %{ + Register op1 = $src1$$Register; + Label * L = $labl$$label; + int flag = $cmp$$cmpcode; + + __ jcc((Assembler::Condition)flag, *L, op1); + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +//ZLONG ins_short_branch(1); +%} + +instruct branchConI_reg_imm_short(cmpOp cmp, rRegI src1, immI src2, label labl) %{ + match( If cmp (CmpI src1 src2) ); + effect(USE labl); +// ins_cost(200); + format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_imm_short" %} + ins_encode %{ + Register op1 = $src1$$Register; + int val = $src2$$constant; + Label * L = $labl$$label; + int flag = $cmp$$cmpcode; + + __ mov_immediate32s(rscratch3, val); +// __ cmpl(op1, rscratch3); +// __ jcc((Assembler::Condition)flag, *L); + __ cmpls(flag, op1, rscratch3); + __ bne_l(rcc, *L); + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +//ZLONG ins_short_branch(1); +%} + +instruct branchConIU_reg_imm0_short(cmpOpU cmp, rRegI src1, immI0 zero, label labl) %{ + match( If cmp (CmpU src1 zero) ); + effect(USE labl); + format %{ "BR$cmp $src1, zero, $labl #@branchConIU_reg_imm0_short" %} + + ins_encode %{ + Register op1 = $src1$$Register; + Label * L = $labl$$label; + int flag = $cmp$$cmpcode; +// __ stop("is below necessary? sny"); + switch((Assembler::Condition)flag) { + case Assembler::equal: //equal + __ beq_l(op1, *L); + break; + case Assembler::notEqual: //not_equal + __ bne_l(op1, *L); + break; + case Assembler::above: //above + __ bne_l(op1, *L); + break; + case Assembler::aboveEqual: //above_equal + __ beq_l(R0, *L); + break; + case Assembler::below: //below + Unimplemented(); + return; + break; + case Assembler::belowEqual: //below_equal + __ beq_l(op1, *L); + break; + default: + Unimplemented(); + } + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +//ZLONG ins_short_branch(1); +%} + +//instruct branchConIU_reg_immI16_short(cmpOpU cmp, rRegI src1, immI16_sub src2, label labl) %{ +// match( If cmp (CmpU src1 src2) ); +// effect(USE labl); +// ins_cost(180); +// format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_immI16_short" %} +// +// ins_encode %{ +// Register op1 = $src1$$Register; +// int val = $src2$$constant; +// Label &L = *($labl$$label); +// int flag = $cmp$$cmpcode; +// +// __ ldi(AT, -1 * val, op1); +// switch(flag) { +// case 0x04: //equal +// if (&L) +// __ beq_l(AT, L); +// else +// __ beq(AT, (int)0); +// break; +// case 0x05: //not_equal +// if (&L) +// __ bne_l(AT, L); +// else +// __ bne(AT, (int)0); +// break; +// case 0x0F: //above +// if(&L) +// __ bgt_l(AT, L); +// else +// __ bgt(AT, (int)0); +// break; +// case 0x0D: //above_equal +// if(&L) +// __ bge_l(AT, L); +// else +// __ bge(AT, (int)0); +// break; +// case 0x0C: //below +// if(&L) +// __ blt_l(AT, L); +// else +// __ blt(AT, (int)0); +// break; +// case 0x0E: //below_equal +// if(&L) +// __ ble_l(AT, L); +// else +// __ ble(AT, (int)0); +// break; +// default: +// Unimplemented(); +// } +// %} +// +// ins_pc_relative(1); +// ins_pipe( pipe_alu_branch ); +////ZLONG ins_short_branch(1); +//%} + +instruct branchConL_regL_regL_short(cmpOp cmp, rRegL src1, rRegL src2, label labl) %{ + match( If cmp (CmpL src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConL_regL_regL_short" %} +// ins_cost(250); + + ins_encode %{ + Register op1 = as_Register($src1$$reg); + Register op2 = as_Register($src2$$reg); + + Label * L = $labl$$label; + int flag = $cmp$$cmpcode; + +// __ cmpl(op1, op2); +// __ jcc((Assembler::Condition)flag, *L); + __ cmpls(flag, op1, op2); + __ bne_l(rcc, *L); + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +//ZLONG ins_short_branch(1); +%} + +instruct branchConI_reg_imm16_sub(cmpOp cmp, rRegI src1, immI16_sub src2, label labl) %{ + match( If cmp (CmpI src1 src2) ); + effect(USE labl); +// ins_cost(180); + format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_imm16_sub" %} + + ins_encode %{ + Register op1 = $src1$$Register; + int val = $src2$$constant; + Label * L = $labl$$label; + int flag = $cmp$$cmpcode; + + //__ ldi(rscratch1_GP, -1 * val, op1); + //__ jcc((Assembler::Condition)flag, *L, rscratch1_GP); + + __ cmpw(op1, val); + __ jcc((Assembler::Condition)flag, *L); + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +%} + +instruct branchConL_regL_immL0_short(cmpOp cmp, rRegL src1, immL0 zero, label labl) %{ + match( If cmp (CmpL src1 zero) ); + effect(USE labl); + format %{ "BR$cmp $src1, zero, $labl #@branchConL_regL_immL0_short" %} + ins_cost(80); + + ins_encode %{ + Register op1 = as_Register($src1$$reg); + Label * L = $labl$$label; + int flag = $cmp$$cmpcode; + + __ jcc((Assembler::Condition)flag, *L, op1); + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +//ZLONG ins_short_branch(1); +%} + +instruct branchConL_regL_immL_short(cmpOp cmp, rRegL src1, immL src2, label labl) %{ + match( If cmp (CmpL src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConL_regL_immL_short" %} +// ins_cost(100); + + ins_encode %{ + Register op1 = as_Register($src1$$reg); + Register op2 = rscratch2_AT; + + Label * L = $labl$$label; + int flag = $cmp$$cmpcode; + + __ mov_immediate64(op2, $src2$$constant); +// __ cmpl(op1, op2); +// __ jcc((Assembler::Condition)flag, *L); + __ cmpls(flag, op1, op2); + __ bne_l(rcc, *L); + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +//ZLONG ins_short_branch(1); +%} + +instruct branchConF_reg_reg_short(cmpOp cmp, regF src1, regF src2, label labl) %{ + match( If cmp (CmpF src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConF_reg_reg_short" %} + + ins_encode %{ + FloatRegister op1 = $src1$$FloatRegister; + FloatRegister op2 = $src2$$FloatRegister; + Label * L = $labl$$label; + int flag = $cmp$$cmpcode; + + __ cmpfs(flag, op1, op2, FcmpRES, false); + __ ffbne(FcmpRES, *L); + %} + + ins_pc_relative(1); + ins_pipe(pipe_slow); +//ZLONG ins_short_branch(1); +%} + +instruct branchConD_reg_reg_short(cmpOp cmp, regD src1, regD src2, label labl) %{ + match( If cmp (CmpD src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConD_reg_reg_short" %} + + ins_encode %{ + FloatRegister op1 = $src1$$FloatRegister; + FloatRegister op2 = $src2$$FloatRegister; + Label * L = $labl$$label; + int flag = $cmp$$cmpcode; + + __ cmpfd(flag, op1, op2, FcmpRES, false); + __ ffbne(FcmpRES, *L); + %} + + ins_pc_relative(1); + ins_pipe(pipe_slow); +//ZLONG ins_short_branch(1); +%} + +// mask version +// Jump Direct Conditional - Label defines a relative address from Jcc+1 +/* TODO:jzy what's n->has_vector_mask_set()? +instruct jmpLoopEnd_and_restoreMask(cmpOp cop, rFlagsReg cr, label labl) +%{ + predicate(n->has_vector_mask_set()); + match(CountedLoopEnd cop cr); + effect(USE labl); + + ins_cost(400); + format %{ "j$cop $labl\t# loop end\n\t" + "restorevectmask \t# vector mask restore for loops" %} + size(10); + ins_encode %{ + Label* L = $labl$$label; + __ jcc((Assembler::Condition)($cop$$cmpcode), *L, cr); // Always long jump + __ stop("jmpLoopEnd_and_restoreMask stop"); + //__ restorevectmask(); + %} + //ins_pipe(pipe_jcc); +%} + +// Jump Direct Conditional - Label defines a relative address from Jcc+1 +instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, rFlagsRegU cmp, label labl) %{ + predicate(n->has_vector_mask_set()); + match(CountedLoopEnd cop cmp); + effect(USE labl); + + ins_cost(400); + format %{ "j$cop,u $labl\t# loop end\n\t" + "restorevectmask \t# vector mask restore for loops" %} + size(10); + ins_encode %{ + Label* L = $labl$$label; + __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump + __ restorevectmask(); + %} + ins_pipe(pipe_jcc); +%} + +instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{ + predicate(n->has_vector_mask_set()); + match(CountedLoopEnd cop cmp); + effect(USE labl); + + ins_cost(300); + format %{ "j$cop,u $labl\t# loop end\n\t" + "restorevectmask \t# vector mask restore for loops" %} + size(10); + ins_encode %{ + Label* L = $labl$$label; + __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump + __ restorevectmask(); + %} + ins_pipe(pipe_jcc); +%}*/ + +// Jump Direct Conditional - using unsigned comparison +instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{ + match(If cop cmp); + effect(USE labl); + + ins_cost(300); + format %{ "j$cop,us $labl\t@jmpConU" %} +// size(20); + ins_encode %{ + Label* L = $labl$$label; + __ jccb((Assembler::Condition)($cop$$cmpcode), *L); // Always long jump + %} + ins_pipe( pipe_jmp ); +// ins_pc_relative(1); +%} +/* +instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{ + match(If cop cmp); + effect(USE labl); + + ins_cost(200); + format %{ "j$cop,u $labl" %} + size(6); + ins_encode %{ + Label* L = $labl$$label; + __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump + %} + ins_pipe(pipe_jcc); +%} + +instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{ + match(If cop cmp); + effect(USE labl); + + ins_cost(200); + format %{ $$template + if ($cop$$cmpcode == Assembler::notEqual) { + $$emit$$"jp,u $labl\n\t" + $$emit$$"j$cop,u $labl" + } else { + $$emit$$"jp,u done\n\t" + $$emit$$"j$cop,u $labl\n\t" + $$emit$$"done:" + } + %} + ins_encode %{ + Label* l = $labl$$label; + if ($cop$$cmpcode == Assembler::notEqual) { + __ jcc(Assembler::parity, *l, false); + __ jcc(Assembler::notEqual, *l, false); + } else if ($cop$$cmpcode == Assembler::equal) { + Label done; + __ jccb(Assembler::parity, done); + __ jcc(Assembler::equal, *l, false); + __ bind(done); + } else { + ins_cost(300); + format %{ "j$cop,us $labl\t@jmpConU" %} + // size(20); + ins_encode %{ + Label* L = $labl$$label; + __ jccb((Assembler::Condition)($cop$$cmpcode), *L); // Always long jump + %} + ins_pipe( pipe_jmp ); + // ins_pc_relative(1);); + } + %} + ins_pipe(pipe_jcc); +%} +*/ +/* +// ============================================================================ +// The 2nd slow-half of a subtype check. Scan the subklass's 2ndary +// superklass array for an instance of the superklass. Set a hidden +// internal cache on a hit (cache is checked with exposed code in +// gen_subtype_check()). Return NZ for a miss or zero for a hit. The +// encoding ALSO sets flags. + +instruct partialSubtypeCheck(rdi_RegP result, + rsi_RegP sub, rax_RegP super, rcx_RegI rcx, + rFlagsReg cr) +%{ + match(Set result (PartialSubtypeCheck sub super)); + effect(KILL rcx, KILL cr); + + ins_cost(1100); // slightly larger than the next version + format %{ "movq rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t" + "movl rcx, [rdi + Array::length_offset_in_bytes()]\t# length to scan\n\t" + "addq rdi, Array::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t" + "repne scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t" + "jne,s miss\t\t# Missed: rdi not-zero\n\t" + "movq [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t" + "xorq $result, $result\t\t Hit: rdi zero\n\t" + "miss:\t" %} + + opcode(0x1); // Force a XOR of RDI + ins_encode(enc_PartialSubtypeCheck()); + ins_pipe(pipe_slow); +%} + +instruct partialSubtypeCheck_vs_Zero(rFlagsReg cr, + rsi_RegP sub, rax_RegP super, rcx_RegI rcx, + immP0 zero, + rdi_RegP result) +%{ + match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); + effect(KILL rcx, KILL result); + + ins_cost(1000); + format %{ "movq rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t" + "movl rcx, [rdi + Array::length_offset_in_bytes()]\t# length to scan\n\t" + "addq rdi, Array::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t" + "repne scasq\t# Scan *rdi++ for a match with rax while cx-- != 0\n\t" + "jne,s miss\t\t# Missed: flags nz\n\t" + "movq [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t" + "miss:\t" %} + + opcode(0x0); // No need to XOR RDI + ins_encode(enc_PartialSubtypeCheck()); + ins_pipe(pipe_slow); +%} +*/ +// ============================================================================ +// Branch Instructions -- short offset versions +// +// These instructions are used to replace jumps of a long offset (the default +// match) with jumps of a shorter offset. These instructions are all tagged +// with the ins_short_branch attribute, which causes the ADLC to suppress the +// match rules in general matching. Instead, the ADLC generates a conversion +// method in the MachNode which can be used to do in-place replacement of the +// long variant with the shorter variant. The compiler will determine if a +// branch can be taken by the is_short_branch_offset() predicate in the machine +// specific code section of the file. + +// Jump Direct - Label defines a relative address from JMP+1 +instruct jmpDir_short(label labl) %{ + match(Goto); + effect(USE labl); + ins_cost(300); + format %{ "JMP $labl #@jmpDir" %} + ins_encode %{ + Label &L = *($labl$$label); + if(&L) + __ beq_l(R0, L); + else + ShouldNotReachHere(); + %} + ins_pipe( pipe_jmp ); + ins_pc_relative(1); +%} + +/* +// Jump Direct Conditional - Label defines a relative address from Jcc+1 +instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{ + match(If cop cr); + effect(USE labl); + + ins_cost(300); + format %{ "j$cop,s $labl" %} + size(2); + ins_encode %{ + Label* L = $labl$$label; + __ jccb((Assembler::Condition)($cop$$cmpcode), *L); + %} + ins_pipe(pipe_jcc); + ins_short_branch(1); +%} + +// Jump Direct Conditional - Label defines a relative address from Jcc+1 +instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{ + match(CountedLoopEnd cop cr); + effect(USE labl); + + ins_cost(300); + format %{ "j$cop,s $labl\t# loop end" %} + size(2); + ins_encode %{ + Label* L = $labl$$label; + __ jccb((Assembler::Condition)($cop$$cmpcode), *L); + %} + ins_pipe(pipe_jcc); + ins_short_branch(1); +%} + +// Jump Direct Conditional - Label defines a relative address from Jcc+1 +instruct jmpLoopEndU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{ + match(CountedLoopEnd cop cmp); + effect(USE labl); + + ins_cost(300); + format %{ "j$cop,us $labl\t# loop end" %} + size(2); + ins_encode %{ + Label* L = $labl$$label; + __ jccb((Assembler::Condition)($cop$$cmpcode), *L); + %} + ins_pipe(pipe_jcc); + ins_short_branch(1); +%} + +instruct jmpLoopEndUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{ + match(CountedLoopEnd cop cmp); + effect(USE labl); + + ins_cost(300); + format %{ "j$cop,us $labl\t# loop end" %} + size(2); + ins_encode %{ + Label* L = $labl$$label; + __ jccb((Assembler::Condition)($cop$$cmpcode), *L); + %} + ins_pipe(pipe_jcc); + ins_short_branch(1); +%} + +// Jump Direct Conditional - using unsigned comparison +instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{ + match(If cop cmp); + effect(USE labl); + + ins_cost(300); + format %{ "j$cop,us $labl" %} + size(2); + ins_encode %{ + Label* L = $labl$$label; + __ jccb((Assembler::Condition)($cop$$cmpcode), *L); + %} + ins_pipe(pipe_jcc); + ins_short_branch(1); +%} + +instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{ + match(If cop cmp); + effect(USE labl); + + ins_cost(300); + format %{ "j$cop,us $labl" %} + size(2); + ins_encode %{ + Label* L = $labl$$label; + __ jccb((Assembler::Condition)($cop$$cmpcode), *L); + %} + ins_pipe(pipe_jcc); + ins_short_branch(1); +%} + +instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{ + match(If cop cmp); + effect(USE labl); + + ins_cost(300); + format %{ $$template + if ($cop$$cmpcode == Assembler::notEqual) { + $$emit$$"jp,u,s $labl\n\t" + $$emit$$"j$cop,u,s $labl" + } else { + $$emit$$"jp,u,s done\n\t" + $$emit$$"j$cop,u,s $labl\n\t" + $$emit$$"done:" + } + %} + size(4); + ins_encode %{ + Label* l = $labl$$label; + if ($cop$$cmpcode == Assembler::notEqual) { + __ jccb(Assembler::parity, *l); + __ jccb(Assembler::notEqual, *l); + } else if ($cop$$cmpcode == Assembler::equal) { + Label done; + __ jccb(Assembler::parity, done); + __ jccb(Assembler::equal, *l); + __ bind(done); + } else { + ShouldNotReachHere(); + } + %} + ins_pipe(pipe_jcc); + ins_short_branch(1); +%} +*/ + +// ============================================================================ +// inlined locking and unlocking +/* +instruct cmpFastLockRTM(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI tmp, rdx_RegI scr, rRegI cx1, rRegI cx2) %{ + predicate(Compile::current()->use_rtm()); + match(Set cr (FastLock object box)); + effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box); + ins_cost(300); + format %{ "fastlock $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %} + ins_encode %{ + __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, + $scr$$Register, $cx1$$Register, $cx2$$Register, + _counters, _rtm_counters, _stack_rtm_counters, + ((Method*)(ra_->C->method()->constant_encoding()))->method_data(), + true, ra_->C->profile_rtm()); + %} + ins_pipe(pipe_slow); +%}*/ +instruct cmpFastLock(rFlagsReg cr, rRegP object, s3_RegP box, v0_RegI tmp, rRegP scr) %{ + //predicate(!Compile::current()->use_rtm()); + match(Set cr (FastLock object box));//TODO:check where use cr? jzy + effect(TEMP tmp, TEMP scr, USE_KILL box); + ins_cost(300); + format %{ "fastlock $object,$box\t! kills $box,$tmp,$scr\t@cmpFastLock" %} + ins_encode %{ + //__ stop("need check parameters and implements. jzy?"); + __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, + $scr$$Register, noreg, noreg, _counters, NULL, false, false); + %} + ins_pipe( pipe_slow ); + ins_pc_relative(1); +%} + +instruct cmpFastUnlock(rFlagsReg cr, rRegP object, v0_RegP box, rRegP tmp) %{ + match(Set cr (FastUnlock object box)); + effect(TEMP tmp, USE_KILL box); + ins_cost(300); + format %{ "fastunlock $object,$box\t! kills $box,$tmp\t@cmpFastUnlock" %} + ins_encode %{ + __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, false); + %} + ins_pipe( pipe_slow ); + ins_pc_relative(1); +%} + + +// ============================================================================ +// Safepoint Instructions +/* no need in sw8 +instruct safePoint_poll(rFlagsReg cr) +%{ + predicate(!Assembler::is_polling_page_far() && SafepointMechanism::uses_global_page_poll()); + match(SafePoint); + effect(KILL cr); + + format %{ "testl rax, [rip + #offset_to_poll_page]\t" + "# Safepoint: poll for GC" %} + ins_cost(125); + ins_encode %{ + //AddressLiteral addr(os::get_polling_page(), relocInfo::poll_type); + //__ testl(rax, addr); + __ stop("safePoint_poll unimplement jzy?"); + %} + ins_pipe(ialu_reg_mem); +%}*/ + +instruct safePoint_poll_far(rFlagsReg cr, rRegP poll) +%{ +// predicate(Assembler::is_polling_page_far() && SafepointMechanism::uses_global_page_poll()); + match(SafePoint poll); + effect(USE poll); + + ins_cost(125); + format %{ "Safepoint @ [$poll] : poll for GC @ safePoint_poll_reg" %} + + ins_encode %{ + Register poll_reg = $poll$$Register; + + __ block_comment("Safepoint:"); + __ relocate(relocInfo::poll_type); + __ ldw(rscratch3, 0, poll_reg); + %} + ins_pipe(ialu_reg_mem); +%} +/* +instruct safePoint_poll_tls(rFlagsReg cr, t0_RegP poll) +%{ + predicate(SafepointMechanism::uses_thread_local_poll()); + match(SafePoint poll); + effect(KILL cr, USE poll); + + format %{ "testl rax, [$poll]\t" + "# Safepoint: poll for GC\t@safePoint_poll_tls why rax? jzy" %} + ins_cost(125); +// size(3); + ins_encode %{ + //__ relocate(relocInfo::poll_type); + //address pre_pc = __ pc(); + //__ testl(rax, Address($poll$$Register, 0)); + //address post_pc = __ pc(); + //guarantee(pre_pc[0] == 0x41 && pre_pc[1] == 0x85, "must emit #rex test-ax [reg]"); + __ stop("safePoint_poll_tls unimplement jzy?"); + %} + //ins_pipe(ialu_reg_mem); +%} +*/ +// ============================================================================ +// Procedure Call/Return Instructions +// Call Java Static Instruction +// Note: If this code changes, the corresponding ret_addr_offset() and +// compute_padding() functions will have to be adjusted. +instruct CallStaticJavaDirect(method meth) %{ + match(CallStaticJava); + effect(USE meth); + + ins_cost(300); + format %{ "call,static\t@CallStaticJavaDirect" %} + + ins_encode(Java_Static_Call(meth), call_epilog); + ins_pipe(pipe_slow); + ins_alignment(4); +%} + +// Call Java Dynamic Instruction +// Note: If this code changes, the corresponding ret_addr_offset() and +// compute_padding() functions will have to be adjusted. +instruct CallDynamicJavaDirect(method meth) +%{ + match(CallDynamicJava); + effect(USE meth); + + ins_cost(300); + format %{ "movq v0, #Universe::non_oop_word()\t@CallDynamicJavaDirect\n\t" + "call,dynamic " %} + ins_encode(Java_Dynamic_Call(meth), call_epilog); + ins_pipe(pipe_slow); + ins_alignment(4); +%} + +// Call Runtime Instruction +instruct CallRuntimeDirect(method meth) +%{ + match(CallRuntime); + effect(USE meth); + + ins_cost(300); + format %{ "call,runtime\t@CallRuntimeDirect" %} + ins_encode(sw64_Java_To_Runtime(meth)); + ins_pipe(pipe_slow); +// ins_alignment(16);//lsp todo check +%} + +// Call runtime without safepoint +instruct CallLeafDirect(method meth) +%{ + match(CallLeaf); + effect(USE meth); + + ins_cost(300); + format %{ "call_leaf,runtime\t@CallLeafDirect" %} + ins_encode(sw64_Java_To_Runtime(meth)); + ins_pipe(pipe_slow); + ins_pc_relative(1);//lsp todo check +// ins_alignment(16);//lsp todo check +%} + +/* +instruct CallNativeDirect(method meth) +%{ + match(CallNative); + effect(USE meth); + + ins_cost(300); + format %{ "call_native " %} + ins_encode(clear_avx, Java_To_Runtime(meth)); + ins_pipe(pipe_slow); +%}*/ + +// Call runtime without safepoint +instruct CallLeafNoFPDirect(method meth) +%{ + match(CallLeafNoFP); + effect(USE meth); + + ins_cost(300); + format %{ "call_leaf_nofp,runtime\t@CallLeafNoFPDirect" %} + ins_encode(sw64_Java_To_Runtime(meth)); + ins_pipe(pipe_slow); + ins_pc_relative(1);//lsp todo check +// ins_alignment(16); +%} + +// Return Instruction +// Remove the return address & jump to it. +// Notice: We always emit a nop after a ret to make sure there is room +// for safepoint patching +instruct Ret() +%{ + match(Return); + + format %{ "ret\t@Ret" %} + + ins_encode %{ + __ ret_sw(); + %} + ins_pipe(pipe_jmp); +%} + +// Tail Call; Jump from runtime stub to Java code. +// Also known as an 'interprocedural jump'. +// Target of jump will eventually return to caller. +// TailJump below removes the return address. +instruct TailCalljmpInd(rRegP jump_target, rRegP method_ptr) +%{ + match(TailCall jump_target method_ptr); + + ins_cost(300); + format %{ "jmp $jump_target\t# rmethod: holds method\t@TailCalljmpInd" %} + + ins_encode %{ + Register target = $jump_target$$Register; + Register oop = $method_ptr$$Register; + //__ stop("check parameters jzy?"); + // RA will be used in generate_forward_exception() +// __ push(RA); + + __ movl(rmethod, oop); + __ jmp(target); + //__ stop("check parameters jzy?"); + %} + ins_pipe(pipe_jmp); +%} + +// Tail Jump; remove the return address; jump to target. +// TailCall above leaves the return address around. +instruct tailjmpInd(rRegP jump_target, v0_RegP ex_oop) +%{ + match(TailJump jump_target ex_oop); + + ins_cost(300); + format %{ "Jmp $jump_target ; ex_oop = $ex_oop\t@tailjmpInd" %} + ins_encode %{ + // V0, c_rarg2 are indicated in: + // [stubGenerator_sw64.cpp] generate_forward_exception() + // [runtime_sw64.cpp] OptoRuntime::generate_exception_blob() + // + Register target = $jump_target$$Register; + Register oop = $ex_oop$$Register; + //Register exception_oop = V0; + Register exception_pc = c_rarg2; + //__ stop("check tailjmpInd lsp"); + __ block_comment(";;tailjmpInd start"); + __ movl(exception_pc, RA); + //__ movl(rax, oop); // oop is same as exception_oop,both are v0 + __ jmp(target); + %} + ins_pipe(pipe_jmp); +%} + +// Create exception oop: created by stack-crawling runtime code. +// Created exception is now available to this handler, and is setup +// just prior to jumping to this handler. No code emitted. +instruct CreateException(v0_RegP ex_oop) +%{ + match(Set ex_oop (CreateEx)); + + size(0); + // use the following format syntax + format %{ "# exception oop is in v0; no code emitted" %} + ins_encode(); + ins_pipe(empty); +%} + +// Rethrow exception: +// The exception oop will come in the first argument position. +// Then JUMP (not call) to the rethrow stub code. +instruct RethrowException() +%{ + match(Rethrow); + + // use the following format syntax + format %{ "jmp rethrow_stub\t@RethrowException" %} + + ins_encode %{ + //__ stop("check RethrowException lsp"); + __ block_comment("@ RethrowException"); + cbuf.set_insts_mark(); + cbuf.relocate(cbuf.insts_mark(), runtime_call_Relocation::spec()); + + // call OptoRuntime::rethrow_stub to get the exception handler in parent method + __ patchable_jump((address)OptoRuntime::rethrow_stub()); + %} + ins_pipe(pipe_jmp); +%} + +instruct ShouldNotReachHere( ) +%{ + match(Halt); + ins_cost(300); + + // Use the following format syntax + format %{ "ILLTRAP ;#@ShouldNotReachHere" %} + ins_encode %{ + // Here we should emit illtrap ! + __ stop("in ShoudNotReachHere"); + %} + ins_pipe( pipe_jmp ); +%} +/* +// +// Execute ZGC load barrier (strong) slow path +// + +// When running without XMM regs +instruct loadBarrierSlowRegNoVec(rRegP dst, memory mem, rFlagsReg cr) %{ + + match(Set dst (LoadBarrierSlowReg mem)); + predicate(MaxVectorSize < 16); + + effect(DEF dst, KILL cr); + + format %{"LoadBarrierSlowRegNoVec $dst, $mem" %} + ins_encode %{ +#if INCLUDE_ZGC + Register d = $dst$$Register; + ZBarrierSetAssembler* bs = (ZBarrierSetAssembler*)BarrierSet::barrier_set()->barrier_set_assembler(); + + assert(d != r12, "Can't be R12!"); + assert(d != r15, "Can't be R15!"); + assert(d != rsp, "Can't be RSP!"); + + __ lea(d, $mem$$Address); + __ call(RuntimeAddress(bs->load_barrier_slow_stub(d))); +#else + ShouldNotReachHere(); +#endif + %} + ins_pipe(pipe_slow); +%} + +// For XMM and YMM enabled processors +instruct loadBarrierSlowRegXmmAndYmm(rRegP dst, memory mem, rFlagsReg cr, + rxmm0 x0, rxmm1 x1, rxmm2 x2,rxmm3 x3, + rxmm4 x4, rxmm5 x5, rxmm6 x6, rxmm7 x7, + rxmm8 x8, rxmm9 x9, rxmm10 x10, rxmm11 x11, + rxmm12 x12, rxmm13 x13, rxmm14 x14, rxmm15 x15) %{ + + match(Set dst (LoadBarrierSlowReg mem)); + predicate((UseSSE > 0) && (UseAVX <= 2) && (MaxVectorSize >= 16)); + + effect(DEF dst, KILL cr, + KILL x0, KILL x1, KILL x2, KILL x3, + KILL x4, KILL x5, KILL x6, KILL x7, + KILL x8, KILL x9, KILL x10, KILL x11, + KILL x12, KILL x13, KILL x14, KILL x15); + + format %{"LoadBarrierSlowRegXmm $dst, $mem" %} + ins_encode %{ +#if INCLUDE_ZGC + Register d = $dst$$Register; + ZBarrierSetAssembler* bs = (ZBarrierSetAssembler*)BarrierSet::barrier_set()->barrier_set_assembler(); + + assert(d != r12, "Can't be R12!"); + assert(d != r15, "Can't be R15!"); + assert(d != rsp, "Can't be RSP!"); + + __ lea(d, $mem$$Address); + __ call(RuntimeAddress(bs->load_barrier_slow_stub(d))); +#else + ShouldNotReachHere(); +#endif + %} + ins_pipe(pipe_slow); +%} + +// For ZMM enabled processors +instruct loadBarrierSlowRegZmm(rRegP dst, memory mem, rFlagsReg cr, + rxmm0 x0, rxmm1 x1, rxmm2 x2,rxmm3 x3, + rxmm4 x4, rxmm5 x5, rxmm6 x6, rxmm7 x7, + rxmm8 x8, rxmm9 x9, rxmm10 x10, rxmm11 x11, + rxmm12 x12, rxmm13 x13, rxmm14 x14, rxmm15 x15, + rxmm16 x16, rxmm17 x17, rxmm18 x18, rxmm19 x19, + rxmm20 x20, rxmm21 x21, rxmm22 x22, rxmm23 x23, + rxmm24 x24, rxmm25 x25, rxmm26 x26, rxmm27 x27, + rxmm28 x28, rxmm29 x29, rxmm30 x30, rxmm31 x31) %{ + + match(Set dst (LoadBarrierSlowReg mem)); + predicate((UseAVX == 3) && (MaxVectorSize >= 16)); + + effect(DEF dst, KILL cr, + KILL x0, KILL x1, KILL x2, KILL x3, + KILL x4, KILL x5, KILL x6, KILL x7, + KILL x8, KILL x9, KILL x10, KILL x11, + KILL x12, KILL x13, KILL x14, KILL x15, + KILL x16, KILL x17, KILL x18, KILL x19, + KILL x20, KILL x21, KILL x22, KILL x23, + KILL x24, KILL x25, KILL x26, KILL x27, + KILL x28, KILL x29, KILL x30, KILL x31); + + format %{"LoadBarrierSlowRegZmm $dst, $mem" %} + ins_encode %{ +#if INCLUDE_ZGC + Register d = $dst$$Register; + ZBarrierSetAssembler* bs = (ZBarrierSetAssembler*)BarrierSet::barrier_set()->barrier_set_assembler(); + + assert(d != r12, "Can't be R12!"); + assert(d != r15, "Can't be R15!"); + assert(d != rsp, "Can't be RSP!"); + + __ lea(d, $mem$$Address); + __ call(RuntimeAddress(bs->load_barrier_slow_stub(d))); +#else + ShouldNotReachHere(); +#endif + %} + ins_pipe(pipe_slow); +%} + +// +// Execute ZGC load barrier (weak) slow path +// + +// When running without XMM regs +instruct loadBarrierWeakSlowRegNoVec(rRegP dst, memory mem, rFlagsReg cr) %{ + + match(Set dst (LoadBarrierSlowReg mem)); + predicate(MaxVectorSize < 16); + + effect(DEF dst, KILL cr); + + format %{"LoadBarrierSlowRegNoVec $dst, $mem" %} + ins_encode %{ +#if INCLUDE_ZGC + Register d = $dst$$Register; + ZBarrierSetAssembler* bs = (ZBarrierSetAssembler*)BarrierSet::barrier_set()->barrier_set_assembler(); + + assert(d != r12, "Can't be R12!"); + assert(d != r15, "Can't be R15!"); + assert(d != rsp, "Can't be RSP!"); + + __ lea(d, $mem$$Address); + __ call(RuntimeAddress(bs->load_barrier_weak_slow_stub(d))); +#else + ShouldNotReachHere(); +#endif + %} + ins_pipe(pipe_slow); +%} + +// For XMM and YMM enabled processors +instruct loadBarrierWeakSlowRegXmmAndYmm(rRegP dst, memory mem, rFlagsReg cr, + rxmm0 x0, rxmm1 x1, rxmm2 x2,rxmm3 x3, + rxmm4 x4, rxmm5 x5, rxmm6 x6, rxmm7 x7, + rxmm8 x8, rxmm9 x9, rxmm10 x10, rxmm11 x11, + rxmm12 x12, rxmm13 x13, rxmm14 x14, rxmm15 x15) %{ + + match(Set dst (LoadBarrierWeakSlowReg mem)); + predicate((UseSSE > 0) && (UseAVX <= 2) && (MaxVectorSize >= 16)); + + effect(DEF dst, KILL cr, + KILL x0, KILL x1, KILL x2, KILL x3, + KILL x4, KILL x5, KILL x6, KILL x7, + KILL x8, KILL x9, KILL x10, KILL x11, + KILL x12, KILL x13, KILL x14, KILL x15); + + format %{"LoadBarrierWeakSlowRegXmm $dst, $mem" %} + ins_encode %{ +#if INCLUDE_ZGC + Register d = $dst$$Register; + ZBarrierSetAssembler* bs = (ZBarrierSetAssembler*)BarrierSet::barrier_set()->barrier_set_assembler(); + + assert(d != r12, "Can't be R12!"); + assert(d != r15, "Can't be R15!"); + assert(d != rsp, "Can't be RSP!"); + + __ lea(d,$mem$$Address); + __ call(RuntimeAddress(bs->load_barrier_weak_slow_stub(d))); +#else + ShouldNotReachHere(); +#endif + %} + ins_pipe(pipe_slow); +%} + +// For ZMM enabled processors +instruct loadBarrierWeakSlowRegZmm(rRegP dst, memory mem, rFlagsReg cr, + rxmm0 x0, rxmm1 x1, rxmm2 x2,rxmm3 x3, + rxmm4 x4, rxmm5 x5, rxmm6 x6, rxmm7 x7, + rxmm8 x8, rxmm9 x9, rxmm10 x10, rxmm11 x11, + rxmm12 x12, rxmm13 x13, rxmm14 x14, rxmm15 x15, + rxmm16 x16, rxmm17 x17, rxmm18 x18, rxmm19 x19, + rxmm20 x20, rxmm21 x21, rxmm22 x22, rxmm23 x23, + rxmm24 x24, rxmm25 x25, rxmm26 x26, rxmm27 x27, + rxmm28 x28, rxmm29 x29, rxmm30 x30, rxmm31 x31) %{ + + match(Set dst (LoadBarrierWeakSlowReg mem)); + predicate((UseAVX == 3) && (MaxVectorSize >= 16)); + + effect(DEF dst, KILL cr, + KILL x0, KILL x1, KILL x2, KILL x3, + KILL x4, KILL x5, KILL x6, KILL x7, + KILL x8, KILL x9, KILL x10, KILL x11, + KILL x12, KILL x13, KILL x14, KILL x15, + KILL x16, KILL x17, KILL x18, KILL x19, + KILL x20, KILL x21, KILL x22, KILL x23, + KILL x24, KILL x25, KILL x26, KILL x27, + KILL x28, KILL x29, KILL x30, KILL x31); + + format %{"LoadBarrierWeakSlowRegZmm $dst, $mem" %} + ins_encode %{ +#if INCLUDE_ZGC + Register d = $dst$$Register; + ZBarrierSetAssembler* bs = (ZBarrierSetAssembler*)BarrierSet::barrier_set()->barrier_set_assembler(); + + assert(d != r12, "Can't be R12!"); + assert(d != r15, "Can't be R15!"); + assert(d != rsp, "Can't be RSP!"); + + __ lea(d,$mem$$Address); + __ call(RuntimeAddress(bs->load_barrier_weak_slow_stub(d))); +#else + ShouldNotReachHere(); +#endif + %} + ins_pipe(pipe_slow); +%} +*/ +// ============================================================================ +// This name is KNOWN by the ADLC and cannot be changed. +// The ADLC forces a 'TypeRawPtr::BOTTOM' output type +// for this guy. +instruct tlsLoadP(s2_RegP dst) %{ + match(Set dst (ThreadLocal)); + effect(DEF dst); + + size(0); + format %{ "# TLS is in S2" %} + ins_encode( /*empty encoding*/ ); + ins_pipe(empty); +%} +/* + +//----------PEEPHOLE RULES----------------------------------------------------- +// These must follow all instruction definitions as they use the names +// defined in the instructions definitions. +// +// peepmatch ( root_instr_name [preceding_instruction]* ); +// +// peepconstraint %{ +// (instruction_number.operand_name relational_op instruction_number.operand_name +// [, ...] ); +// // instruction numbers are zero-based using left to right order in peepmatch +// +// peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); +// // provide an instruction_number.operand_name for each operand that appears +// // in the replacement instruction's match rule +// +// ---------VM FLAGS--------------------------------------------------------- +// +// All peephole optimizations can be turned off using -XX:-OptoPeephole +// +// Each peephole rule is given an identifying number starting with zero and +// increasing by one in the order seen by the parser. An individual peephole +// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# +// on the command-line. +// +// ---------CURRENT LIMITATIONS---------------------------------------------- +// +// Only match adjacent instructions in same basic block +// Only equality constraints +// Only constraints between operands, not (0.dest_reg == RAX_enc) +// Only one replacement instruction +// +// ---------EXAMPLE---------------------------------------------------------- +// +// // pertinent parts of existing instructions in architecture description +// instruct movI(rRegI dst, rRegI src) +// %{ +// match(Set dst (CopyI src)); +// %} +// +// instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr) +// %{ +// match(Set dst (AddI dst src)); +// effect(KILL cr); +// %} +// +// // Change (inc mov) to lea +// peephole %{ +// // increment preceeded by register-register move +// peepmatch ( incI_rReg movI ); +// // require that the destination register of the increment +// // match the destination register of the move +// peepconstraint ( 0.dst == 1.dst ); +// // construct a replacement instruction that sets +// // the destination to ( move's source register + one ) +// peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) ); +// %} +// + +// Implementation no longer uses movX instructions since +// machine-independent system no longer uses CopyX nodes. +// +// peephole +// %{ +// peepmatch (incI_rReg movI); +// peepconstraint (0.dst == 1.dst); +// peepreplace (leaI_rReg_immI(0.dst 1.src 0.src)); +// %} + +// peephole +// %{ +// peepmatch (decI_rReg movI); +// peepconstraint (0.dst == 1.dst); +// peepreplace (leaI_rReg_immI(0.dst 1.src 0.src)); +// %} + +// peephole +// %{ +// peepmatch (addI_rReg_imm movI); +// peepconstraint (0.dst == 1.dst); +// peepreplace (leaI_rReg_immI(0.dst 1.src 0.src)); +// %} + +// peephole +// %{ +// peepmatch (incL_rReg movL); +// peepconstraint (0.dst == 1.dst); +// peepreplace (leaL_rReg_immL(0.dst 1.src 0.src)); +// %} + +// peephole +// %{ +// peepmatch (decL_rReg movL); +// peepconstraint (0.dst == 1.dst); +// peepreplace (leaL_rReg_immL(0.dst 1.src 0.src)); +// %} + +// peephole +// %{ +// peepmatch (addL_rReg_imm movL); +// peepconstraint (0.dst == 1.dst); +// peepreplace (leaL_rReg_immL(0.dst 1.src 0.src)); +// %} + +// peephole +// %{ +// peepmatch (addP_rReg_imm movP); +// peepconstraint (0.dst == 1.dst); +// peepreplace (leaP_rReg_imm(0.dst 1.src 0.src)); +// %} + +// // Change load of spilled value to only a spill +// instruct storeI(memory mem, rRegI src) +// %{ +// match(Set mem (StoreI mem src)); +// %} +// +// instruct loadI(rRegI dst, memory mem) +// %{ +// match(Set dst (LoadI mem)); +// %} +// + +peephole +%{ + peepmatch (loadI storeI); + peepconstraint (1.src == 0.dst, 1.mem == 0.mem); + peepreplace (storeI(1.mem 1.mem 1.src)); +%} + +peephole +%{ + peepmatch (loadL storeL); + peepconstraint (1.src == 0.dst, 1.mem == 0.mem); + peepreplace (storeL(1.mem 1.mem 1.src)); +%} + +//----------SMARTSPILL RULES--------------------------------------------------- +// These must follow all instruction definitions as they use the names +// defined in the instructions definitions. +*/ diff --git a/src/hotspot/cpu/sw64/sw64Test.cpp b/src/hotspot/cpu/sw64/sw64Test.cpp new file mode 100644 index 00000000000..5ff952f91d9 --- /dev/null +++ b/src/hotspot/cpu/sw64/sw64Test.cpp @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include + +#include "runtime/java.hpp" +#include "precompiled.hpp" +#include "code/codeBlob.hpp" +#include "asm/macroAssembler.hpp" +#include "runtime/stubCodeGenerator.hpp" + +// hook routine called during JVM bootstrap to test AArch64 assembler + +extern "C" void entry(CodeBuffer*); + + +class X_Generator: public StubCodeGenerator { + public: + X_Generator(CodeBuffer *c, bool print_code = true) : StubCodeGenerator(c, print_code) {} + + + address generate_getGenerateInfo() { + StubCodeMark mark(this, "VM_Version", "getGenerateInfo"); + +# define __ _masm-> + address start = __ pc(); + + __ movl(V0, c_rarg0); // Copy to eax we need a return value anyhow + __ xchgptr(V0, Address(c_rarg1, 0)); // automatic LOCK + __ ret(); + +# undef __ + return start; + } +}; + +void directTestCode() +{ + BufferBlob* b = BufferBlob::create("sw64Test", 500000); + CodeBuffer code(b); + MacroAssembler _masm(&code); + //entry(&code); +#define _masm __ + +#undef __ +} + + +extern "C" { + typedef void (*getGenerateStub_t)(void*); +} +static getGenerateStub_t getGenerateStub = NULL; + +void sw64TestHook() +{ +#ifdef ASSERT + //direct test generatecode + { + directTestCode(); + } + + //test generation code by StubGenerator + { + { + ResourceMark rm; + + BufferBlob* stub_blob = BufferBlob::create("sw64TestHook_stub", 500000); + if (stub_blob == NULL) { + vm_exit_during_initialization("Unable to allocate sw64TestHook_stub"); + } + + CodeBuffer c(stub_blob); + X_Generator g(&c, false); + getGenerateStub = CAST_TO_FN_PTR(getGenerateStub_t, + g.generate_getGenerateInfo()); + } + + address arg0; + getGenerateStub_t((void*)arg0); + } + +#endif +} diff --git a/src/hotspot/cpu/sw64/sw64_ad.m4 b/src/hotspot/cpu/sw64/sw64_ad.m4 new file mode 100644 index 00000000000..e69de29bb2d diff --git a/src/hotspot/cpu/sw64/templateInterpreterGenerator_sw64.cpp b/src/hotspot/cpu/sw64/templateInterpreterGenerator_sw64.cpp new file mode 100644 index 00000000000..78899e83c2e --- /dev/null +++ b/src/hotspot/cpu/sw64/templateInterpreterGenerator_sw64.cpp @@ -0,0 +1,1977 @@ +/* + * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "classfile/javaClasses.hpp" +#include "compiler/compiler_globals.hpp" +#include "compiler/disassembler.hpp" +#include "gc/shared/barrierSetAssembler.hpp" +#include "interpreter/bytecodeHistogram.hpp" +#include "interpreter/interp_masm.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "interpreter/templateInterpreterGenerator.hpp" +#include "interpreter/templateTable.hpp" +#include "oops/arrayOop.hpp" +#include "oops/methodData.hpp" +#include "oops/method.hpp" +#include "oops/oop.inline.hpp" +#include "prims/jvmtiExport.hpp" +#include "prims/jvmtiThreadState.hpp" +#include "runtime/deoptimization.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/jniHandles.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/synchronizer.hpp" +#include "runtime/timer.hpp" +#include "runtime/vframeArray.hpp" +#include "utilities/debug.hpp" +#include "utilities/macros.hpp" + +#define __ Disassembler::hook(__FILE__, __LINE__, _masm)-> + +// Size of interpreter code. Increase if too small. Interpreter will +// fail with a guarantee ("not enough space for interpreter generation"); +// if too small. +// Run with +PrintInterpreter to get the VM to print out the size. +// Max size with JVMTI +int TemplateInterpreter::InterpreterCodeSize = JVMCI_ONLY(268) NOT_JVMCI(256) * 1024; + + +const int method_offset = frame::interpreter_frame_method_offset * wordSize; +const int bcp_offset = frame::interpreter_frame_bcp_offset * wordSize; +const int locals_offset = frame::interpreter_frame_locals_offset * wordSize; + +#ifdef PRODUCT +#define BLOCK_COMMENT(str) /* nothing */ +#else +#define BLOCK_COMMENT(str) { char line[1024];sprintf(line,"%s:%s:%d",str,__FILE__, __LINE__); __ block_comment(line);} +#endif +#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") + +//----------------------------------------------------------------------------- + +extern "C" void entry(CodeBuffer*); + +//----------------------------------------------------------------------------- + +address TemplateInterpreterGenerator::generate_StackOverflowError_handler() { + BLOCK_COMMENT("generate_StackOverflowError_handler enter"); //__ warn("TODO:check function right generate_StackOverflowError_handler jzy "); + address entry = __ pc(); + Register rax = V0; + +#ifdef ASSERT + { + Label L; + __ lea(rax, Address(rfp, + frame::interpreter_frame_monitor_block_top_offset * + wordSize)); + __ cmpptr(rax, esp); // rax = maximal rsp for current rbp (stack + // grows negative) + __ jcc(Assembler::aboveEqual, L); // check if frame is complete + __ stop ("interpreter frame not set up"); + __ bind(L); + } +#endif // ASSERT + // Restore bcp under the assumption that the current frame is still + // interpreted + __ restore_bcp(); + + // expression stack must be empty before entering the VM if an + // exception happened + __ empty_expression_stack(); + // throw exception + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_StackOverflowError)); + return entry; +} + +address TemplateInterpreterGenerator::generate_ArrayIndexOutOfBounds_handler() {SCOPEMARK_NAME(TemplateInterpreterGenerator::generate_ArrayIndexOutOfBounds_handler, _masm) + address entry = __ pc(); + // The expression stack must be empty before entering the VM if an + // exception happened + __ empty_expression_stack(); + + Register rarg = c_rarg1; + __ call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime:: + throw_ArrayIndexOutOfBoundsException), + rarg, c_rarg2); + return entry; +} + +address TemplateInterpreterGenerator::generate_ClassCastException_handler() {//__ warn("TODO:check function right generate_ClassCastException_handler jzy "); + address entry = __ pc(); + + // object is at TOS + Register rarg = c_rarg1; + __ pop(rarg); + + // expression stack must be empty before entering the VM if an + // exception happened + __ empty_expression_stack(); + + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime:: + throw_ClassCastException), + rarg);BLOCK_COMMENT("generate_ClassCastException_handler leave"); + return entry; +} + +address TemplateInterpreterGenerator::generate_exception_handler_common( + const char* name, const char* message, bool pass_oop) {SCOPEMARK_NAME(TemplateInterpreterGenerator::generate_exception_handler_common, _masm) + assert(!pass_oop || message == NULL, "either oop or message but not both"); + address entry = __ pc(); + + Register rarg = c_rarg1; + Register rarg2 = c_rarg2; + Register rax = V0; + + if (pass_oop) { + // object is at TOS + __ pop(rarg2); + } + // expression stack must be empty before entering the VM if an + // exception happened + __ empty_expression_stack(); + // setup parameters + __ lea(rarg, ExternalAddress((address)name)); + if (pass_oop) { + __ call_VM(rax, CAST_FROM_FN_PTR(address, + InterpreterRuntime:: + create_klass_exception), + rarg, rarg2); + } else { + __ lea(rarg2, ExternalAddress((address)message)); + __ call_VM(rax, + CAST_FROM_FN_PTR(address, InterpreterRuntime::create_exception), + rarg, rarg2); + } + // throw exception + __ jump(ExternalAddress(Interpreter::throw_exception_entry())); + return entry; +} + +address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, int step, size_t index_size) {SCOPEMARK_NAME(TemplateInterpreterGenerator::generate_return_entry_for, _masm); + address entry = __ pc(); + Register rax = FSR; + + // Restore stack bottom in case i2c adjusted stack + __ ldptr(esp, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize)); + // and NULL it as marker that esp is now tos until next java call + __ stptr(R0, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize)); + + __ restore_bcp(); + __ restore_locals(); + + if (state == atos) { + Register mdp = rscratch1; + Register tmp = rscratch2; + __ profile_return_type(mdp, rax, tmp); + } + + const Register cache = rscratch1; + const Register index = rscratch2; + __ get_cache_and_index_at_bcp(cache, index, 1, index_size); + + const Register flags = cache; + __ ldw(flags, Address(cache, index, Address::times_ptr, ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset())); + __ andw(flags, ConstantPoolCacheEntry::parameter_size_mask, flags); + __ lea(esp, Address(esp, flags, Interpreter::stackElementScale())); + + const Register java_thread = rthread; + if (JvmtiExport::can_pop_frame()) { + __ check_and_handle_popframe(java_thread); + } + if (JvmtiExport::can_force_early_return()) { + __ check_and_handle_earlyret(java_thread); + } + + __ dispatch_next(state, step); + + return entry; +} + + +address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state, int step, address continuation) {BLOCK_COMMENT("generate_deopt_entry_for enter");//__ warn("TODO:check function right generate_deopt_entry_for jzy "); + address entry = __ pc(); + // NULL last_sp until next java call + __ stptr(R0, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize)); + __ restore_bcp(); + __ restore_locals(); + Register rbx = rmethod; +#if INCLUDE_JVMCI + // Check if we need to take lock at entry of synchronized method. This can + // only occur on method entry so emit it only for vtos with step 0. + if ((EnableJVMCI) && state == vtos && step == 0) { + Label L; + __ ldbu(rcc, Address(rthread, JavaThread::pending_monitorenter_offset())); + __ jcc(Assembler::zero, L); + // Clear flag. + __ stb(R0, Address(rthread, JavaThread::pending_monitorenter_offset())); + // Satisfy calling convention for lock_method(). + __ get_method(rbx); + // Take lock. + lock_method(); + __ bind(L); + } else { +#ifdef ASSERT + if (EnableJVMCI) { + Label L; + __ ldbu(rscratch3, Address(rthread, JavaThread::pending_monitorenter_offset())); + __ cmpw(rscratch3, R0); + __ jcc(Assembler::zero, L); + __ stop("unexpected pending monitor in deopt entry"); + __ bind(L); + } +#endif + } +#endif + // handle exceptions + { + Label L; + __ cmpptr(Address(rthread, Thread::pending_exception_offset()), R0); + __ jcc(Assembler::zero, L); + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_pending_exception)); + __ should_not_reach_here("260"); + __ bind(L); + } + if (continuation == NULL) { + __ dispatch_next(state, step); + } else { + __ jump_to_entry(continuation); + }BLOCK_COMMENT("generate_deopt_entry_for leave"); + return entry; +} + +address TemplateInterpreterGenerator::generate_result_handler_for( + BasicType type) {BLOCK_COMMENT("generate_result_handler_for enter");//__ warn("TODO:check function right generate_result_handler_for jzy "); + address entry = __ pc(); + switch (type) { + case T_BOOLEAN: __ c2bool(V0); break; + case T_CHAR : __ zapnot(V0, 0x3, V0); break; + case T_BYTE : __ sign_extend_byte (V0); break; + case T_SHORT : __ sign_extend_short(V0); break; + case T_INT : /* nothing to do */ break; + case T_LONG : /* nothing to do */ break; + case T_VOID : /* nothing to do */ break; + case T_FLOAT : /* nothing to do */ break; + case T_DOUBLE : /* nothing to do */ break; + case T_OBJECT : + // retrieve result from frame + __ ldptr(V0, Address(rfp, frame::interpreter_frame_oop_temp_offset*wordSize)); + // and verify it + __ verify_oop(V0); + break; + default : ShouldNotReachHere(); + } + __ ret_sw(); // return from result handler + BLOCK_COMMENT("generate_result_handler_for leave"); + return entry; +} + +address TemplateInterpreterGenerator::generate_safept_entry_for( + TosState state, + address runtime_entry) {BLOCK_COMMENT("generate_safept_entry_for enter");//__ warn("TODO:check function right generate_safept_entry_for jzy "); + address entry = __ pc(); + __ push(state); + __ call_VM(noreg, runtime_entry); + __ memb(); + __ dispatch_via(vtos, Interpreter::_normal_table.table_for(vtos));BLOCK_COMMENT("generate_safept_entry_for leave"); + return entry; +} + + + +// Helpers for commoning out cases in the various type of method entries. +// + + +// increment invocation count & check for overflow +// +// Note: checking for negative value instead of overflow +// so we have a 'sticky' overflow test +// +// rmethod: method +// +void TemplateInterpreterGenerator::generate_counter_incr(Label* overflow) {SCOPEMARK_NAME(generate_counter_incr, _masm); + Label done; + Register rax = FSR; + Register rcx = rscratch1; + + int increment = InvocationCounter::count_increment; + Label no_mdo; + if (ProfileInterpreter) { + // Are we profiling? + __ ldptr(rax, Address(rmethod, Method::method_data_offset())); + __ jcc(Assembler::zero, no_mdo, rax); + // Increment counter in the MDO + const Address mdo_invocation_counter(rax, in_bytes(MethodData::invocation_counter_offset()) + + in_bytes(InvocationCounter::counter_offset())); + const Address mask(rax, in_bytes(MethodData::invoke_mask_offset())); + __ increment_mask_and_jump(mdo_invocation_counter, increment, mask, rcx, false, Assembler::zero, overflow); + __ jmp(done); + } + __ bind(no_mdo); + // Increment counter in MethodCounters + const Address invocation_counter(rax, + MethodCounters::invocation_counter_offset() + + InvocationCounter::counter_offset()); + __ get_method_counters(rmethod, rax, done); + const Address mask(rax, in_bytes(MethodCounters::invoke_mask_offset())); + __ increment_mask_and_jump(invocation_counter, increment, mask, rcx, + false, Assembler::zero, overflow); + __ bind(done); +} + +void TemplateInterpreterGenerator::generate_counter_overflow(Label& do_continue) {BLOCK_COMMENT("generate_counter_overflow enter"); //__ warn("TODO:check function right generate_counter_overflow jzy"); + + // Asm interpreter on entry + // rlocals - locals + // rbcp - bcp + // rmethod - method + // rfp - interpreter frame + + // On return (i.e. jump to entry_point) + // rmethod - method + // RA - return address of interpreter caller + // tos - the last parameter to Java method + // SP - sender_sp + + // InterpreterRuntime::frequency_counter_overflow takes two + // arguments, the first (thread) is passed by call_VM, the second + // indicates if the counter overflow occurs at a backwards branch + // (NULL bcp). We pass zero for it. The call returns the address + // of the verified entry point for the method or NULL if the + // compilation did not complete (either went background or bailed + // out). + Register rarg = c_rarg1; + __ movw(rarg, (u_int32_t)0); + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::frequency_counter_overflow), + rarg); + + __ ldptr(rmethod, Address(rfp, method_offset)); // restore Method* + // Preserve invariant that r13/r14 contain bcp/locals of sender frame + // and jump to the interpreted entry. + __ jmp(do_continue);BLOCK_COMMENT("generate_counter_overflow leave"); +} + +// See if we've got enough room on the stack for locals plus overhead +// below JavaThread::stack_overflow_limit(). If not, throw a StackOverflowError +// without going through the signal handler, i.e., reserved and yellow zones +// will not be made usable. The shadow zone must suffice to handle the +// overflow. +// The expression stack grows down incrementally, so the normal guard +// page mechanism will work for that. +// +// NOTE: Since the additional locals are also always pushed (wasn't +// obvious in generate_method_entry) so the guard should work for them +// too. +// +// Args: +// c_rarg5: number of additional locals this frame needs (what we must check) +// rmethod: Method* +// +void TemplateInterpreterGenerator::generate_stack_overflow_check(void) {BLOCK_COMMENT("generate_stack_overflow_check enter");//__ warn("TODO:check function right generate_stack_overflow_check jzy"); + // see if we've got enough room on the stack for locals plus overhead. + // the expression stack grows down incrementally, so the normal guard + // page mechanism will work for that. + // + // Registers live on entry: + // + // rmethod: Method* + // rdx: number of additional locals this frame needs (what we must check) + // register is special, should be same register in generate_normal_entry + + // killed: V0, rscratch2 + // NOTE: since the additional locals are also always pushed (wasn't obvious in + // generate_method_entry) so the guard should work for them too. + // + const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; + + // total overhead size: entry_size + (saved fp thru expr stack bottom). + // be sure to change this if you add/subtract anything to/from the overhead area + const int overhead_size = + -(frame::interpreter_frame_initial_sp_offset * wordSize) + entry_size; + + const int page_size = os::vm_page_size(); + + Label after_frame_check; + //Register rdx = c_rarg5;//TODO:why set this? jzy + //Register rax = V0; + + // see if the frame is greater than one page in size. If so, + // then we need to verify there is enough stack space remaining + // for the additional locals. + __ cmpw(rdx, (page_size - overhead_size) / Interpreter::stackElementSize); + __ jcc(Assembler::belowEqual, after_frame_check); + + // compute sp as if this were going to be the last frame on + // the stack before the red zone + + + Label after_frame_check_pop; + + const Address stack_limit(rthread, JavaThread::stack_overflow_limit_offset()); + + // locals + overhead, in bytes + __ movl(rax, rdx); + __ slll(rax, Interpreter::logStackElementSize, rax); // Convert parameter count to bytes. + __ addptr(rax, overhead_size, rax); + +#ifdef ASSERT + Label limit_okay; + // Verify that thread stack overflow limit is non-zero. + __ cmpptr(stack_limit, R0); + __ jcc(Assembler::notEqual, limit_okay); + __ stop("stack overflow limit is zero"); + __ bind(limit_okay); +#endif + + // Add locals/frame size to stack limit. + __ ldptr(rscratch2, stack_limit); + __ addptr(rax, rscratch2, rax); + + // check against the current stack bottom + __ cmpptr(esp, rax); + __ jcc(Assembler::above, after_frame_check); + + // Restore sender's sp as SP. This is necessary if the sender's + // frame is an extended compiled frame (see gen_c2i_adapter()) + // and safer anyway in case of JSR292 adaptations. + + __ movl(esp, rsender); + + // Note: the restored frame is not necessarily interpreted. + // Use the shared runtime version of the StackOverflowError. + assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "stub not yet generated"); + __ jump(ExternalAddress(StubRoutines::throw_StackOverflowError_entry())); + + // all done with frame size check + __ bind(after_frame_check);BLOCK_COMMENT("generate_stack_overflow_check leave"); +} + +// Allocate monitor and lock method (asm interpreter) +// +// Args: +// rmethod: Method* +// rlocals: locals +// +// Kills: +// rax +// c_rarg0, c_rarg1, c_rarg2, c_rarg3, ...(param regs) +// rscratch1, rscratch2 (scratch regs) +void TemplateInterpreterGenerator::lock_method() {BLOCK_COMMENT("lock_method enter"); + const Register rbx = rmethod; + const Register rax = FSR; + + // synchronize method + const Address access_flags(rbx, Method::access_flags_offset()); + const Address monitor_block_top( + rfp, + frame::interpreter_frame_monitor_block_top_offset * wordSize); + const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; + +#ifdef ASSERT + { + Label L; + __ ldw(rax, access_flags); + __ testw(rax, JVM_ACC_SYNCHRONIZED); + __ jcc(Assembler::notZero, L); + __ stop("method doesn't need synchronization"); + __ bind(L); + } +#endif // ASSERT + + // get synchronization object + { + Label done; + __ ldw(rax, access_flags); + __ testw(rax, JVM_ACC_STATIC); + // get receiver (assume this is frequent case) + __ ldptr(rax, Address(rlocals, Interpreter::local_offset_in_bytes(0))); + __ jcc(Assembler::zero, done); + __ load_mirror(rax, rbx); + +#ifdef ASSERT + { + Label L; + __ jcc(Assembler::notZero, L, rax); + __ stop("synchronization object is NULL"); + __ bind(L); + } +#endif // ASSERT + + __ bind(done); + } + + // add space for monitor & lock + __ subptr(esp, entry_size, esp); // add space for a monitor entry + __ stptr(esp, monitor_block_top); // set new monitor block top + // store object + __ stptr(rax, Address(esp, BasicObjectLock::obj_offset_in_bytes())); + const Register lockreg = c_rarg1; + __ movl(lockreg, esp); // object address + __ lock_object(lockreg);BLOCK_COMMENT("lock_method leave"); +} + +// Generate a fixed interpreter frame. This is identical setup for +// interpreted methods and for native methods hence the shared code. +// +// Args: +// RA: return address +// rbx: Method* +// r14: pointer to locals +// r13: sender sp +// rdx: cp cache +void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) {SCOPEMARK_NAME(generate_fixed_frame, _masm); + const Register rbx = rmethod; + const Register rdx = rscratch1; + + // initialize fixed part of activation frame + __ enter(); // save old & set new rbp + __ push(rsender); // set sender sp + __ push((int)NULL_WORD); // leave last_sp as null + __ ldptr(rbcp, Address(rbx, Method::const_offset())); // get ConstMethod* + __ lea(rbcp, Address(rbcp, ConstMethod::codes_offset())); // get codebase + __ push(rbx); // save Method* + // Get mirror and store it in the frame as GC root for this Method* + __ load_mirror(rdx, rbx, rscratch2); // + __ push(rdx); + if (ProfileInterpreter) { // + Label method_data_continue; + __ ldptr(rdx, Address(rbx, in_bytes(Method::method_data_offset()))); + __ testptr(rdx, rdx); + __ jcc(Assembler::zero, method_data_continue); + __ addptr(rdx, in_bytes(MethodData::data_offset()), rdx); + __ bind(method_data_continue); + __ push(rdx); // set the mdp (method data pointer) + } else { + __ push(0); + } + + __ ldptr(rdx, Address(rbx, Method::const_offset())); + __ ldptr(rdx, Address(rdx, ConstMethod::constants_offset())); + __ ldptr(rdx, Address(rdx, ConstantPool::cache_offset_in_bytes())); + __ push(rdx); // set constant pool cache + __ push(rlocals); // set locals pointer + if (native_call) { + __ push(0); // no bcp + } else { + __ push(rbcp); // set bcp + } + __ push(0); // reserve word for pointer to expression stack bottom + __ stptr(esp, Address(esp, 0)); // set expression stack bottom +} + +// End of helpers + +// Method entry for java.lang.ref.Reference.get. +address TemplateInterpreterGenerator::generate_Reference_get_entry(void) {BLOCK_COMMENT("generate_Reference_get_entry enter"); + // Code: _aload_0, _getfield, _areturn + // parameter size = 1 + // + // The code that gets generated by this routine is split into 2 parts: + // 1. The "intrinsified" code performing an ON_WEAK_OOP_REF load, + // 2. The slow path - which is an expansion of the regular method entry. + // + // Notes:- + // * An intrinsic is always executed, where an ON_WEAK_OOP_REF load is performed. + // * We may jump to the slow path iff the receiver is null. If the + // Reference object is null then we no longer perform an ON_WEAK_OOP_REF load + // Thus we can use the regular method entry code to generate the NPE. + // + // This code is based on generate_accessor_entry. + // + // rmethod: Method* + // rsender: senderSP must preserve for slow path, set SP to it on fast path (rsender) + + address entry = __ pc(); + + const int referent_offset = java_lang_ref_Reference::referent_offset(); + + Label slow_path; + Register rax = V0; + + // Check if local 0 != NULL + // If the receiver is null then it is OK to jump to the slow path. + __ ldptr(rax, Address(esp, 0)); + +// __ testptr(rax, rax); + __ jcc(Assembler::zero, slow_path, rax); + + // Load the value of the referent field. + const Address field_address(rax, referent_offset); + + //__ push(RA); + __ load_heap_oop(rax, field_address, /*tmp1*/ rscratch1, /*tmp_thread*/ rscratch2, ON_WEAK_OOP_REF); + //__ pop(RA); + + __ movl(esp, rsender); // set sp to sender sp + __ ret_sw(); + + // generate a vanilla interpreter entry as the slow path + __ bind(slow_path); + __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals)); + BLOCK_COMMENT("generate_Reference_get_entry leave"); + return entry; +} + +void TemplateInterpreterGenerator::bang_stack_shadow_pages(bool native_call) {SCOPEMARK_NAME(bang_stack_shadow_pages, _masm) + // Quick & dirty stack overflow checking: bang the stack & handle trap. + // Note that we do the banging after the frame is setup, since the exception + // handling code expects to find a valid interpreter frame on the stack. + // Doing the banging earlier fails if the caller frame is not an interpreter + // frame. + // (Also, the exception throwing code expects to unlock any synchronized + // method receiever, so do the banging after locking the receiver.) + + // Bang each page in the shadow zone. We can't assume it's been done for + // an interpreter frame with greater than a page of locals, so each page + // needs to be checked. Only true for non-native. + const int page_size = os::vm_page_size(); + const int n_shadow_pages = ((int)StackOverflow::stack_shadow_zone_size()) / page_size; + const int start_page = native_call ? n_shadow_pages : 1; + for (int pages = start_page; pages <= n_shadow_pages; pages++) { + __ bang_stack_with_offset(pages * page_size); + } +} + + +/** + * Method entry for static native methods: + * int java.util.zip.CRC32.update(int crc, int b) + */ +address TemplateInterpreterGenerator::generate_CRC32_update_entry() {BLOCK_COMMENT("generate_CRC32_update_entry enter"); + if (UseCRC32Intrinsics) { + address entry = __ pc(); + + Label slow_path; + // If we need a safepoint check, generate full interpreter entry. +// __ mov(GP, SafepointSynchronize::address_of_state()); +// __ lw(AT, GP, 0); +// __ mov(GP, (SafepointSynchronize::_not_synchronized)); +// __ bne_c(AT, GP, slow_path); + __ get_thread(rthread); + __ safepoint_poll(slow_path, rthread, rscratch1, false /* at_return */, false /* acquire */, false /* in_nmethod */); + + // We don't generate local frame and don't align stack because + // we call stub code and there is no safepoint on this path. + // Load parameters + const Register crc = V0; // crc + const Register val = A0; // source java byte value + const Register tbl = A1; // scratch + + // Arguments are reversed on java expression stack + __ ldw(val, 0, esp); // byte value + __ ldw(crc, wordSize, esp); // Initial CRC + + __ lea(tbl, ExternalAddress(StubRoutines::crc_table_addr())); + __ notw(crc, crc); // ~crc + __ update_byte_crc32(crc, val, tbl); + __ notw(crc ,crc); // ~crc + + // result in V0 + // _areturn + __ addl(rsender, R0, esp); // set sp to sender sp + __ ret_sw(); + + // generate a vanilla native entry as the slow path + __ bind(slow_path); + __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native)); + return entry; + }BLOCK_COMMENT("generate_CRC32_update_entry leave"); + return NULL; +} + +/** + * Method entry for static native methods: + * int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len) + * int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len) + */ +address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) {BLOCK_COMMENT("generate_CRC32_updateBytes_entry enter"); + if (UseCRC32Intrinsics) { + address entry = __ pc(); + + // // rbx,: Method* + // // r13: senderSP must preserved for slow path, set SP to it on fast path + // // If we need a safepoint check, generate full interpreter entry. + // We don't generate local frame and don't align stack because + // // we call stub code and there is no safepoint on this path. + Label slow_path; +// __ mov(GP, SafepointSynchronize::address_of_state()); +// __ lw(AT, GP, 0); +// __ mov(GP, (SafepointSynchronize::_not_synchronized)); +// __ bne_c(AT, GP, slow_path); + __ get_thread(rthread); + __ safepoint_poll(slow_path, rthread, rscratch1, false /* at_return */, false /* acquire */, false /* in_nmethod */); + + // Load parameters + const Register crc = A0; // crc + const Register buf = A1; // source java byte array address + const Register len = A2; // length + const Register off = len; // offset (never overlaps with 'len') +// const Register table = A3; // crc_table address + + // // Arguments are reversed on java expression stack + // // Calculate address of start element + if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) { + __ ldl(buf, 2 * wordSize, esp); // long buf + __ ldw(off, 1 * wordSize, esp); // offset + __ zapnot(off, 0xF, off); + __ addl(buf, off, buf); // + offset + __ ldw(crc, 4 * wordSize, esp); // Initial CRC + } else { + __ ldw(off, 1 * wordSize, esp); + __ zapnot(off, 0xF, off); + __ ldl(buf, 2 * wordSize, esp); // byte[] array + __ addl(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE), buf); // + header size + __ addl(buf, off, buf); // offset + __ ldw(crc, 3 * wordSize, esp); // Initial CRC + } + // Can now load 'len' since we're finished with 'off' + __ ldw(len, 0 * wordSize, esp); + __ zapnot(len, 0xF, len); + __ enter(); + //__ stop("use SharedRuntime::updateBytesCRC32? jzy"); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::updateBytesCRC32), 3); + // _areturn + __ leave(); + __ daddu(esp, rsender, R0); // set sp to sender sp + __ ret_sw(); + // generate a vanilla native entry as the slow path + __ BIND(slow_path); + __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native)); + return entry; + }BLOCK_COMMENT("generate_CRC32_updateBytes_entry leave"); + return NULL; +} + +/** + * Method entry for intrinsic-candidate (non-native) methods: + * int java.util.zip.CRC32C.updateBytes(int crc, byte[] b, int off, int end) + * int java.util.zip.CRC32C.updateDirectByteBuffer(int crc, long buf, int off, int end) + * Unlike CRC32, CRC32C does not have any methods marked as native + * CRC32C also uses an "end" variable instead of the length variable CRC32 uses + */ +address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) { + __ should_not_reach_here("generate_CRC32C_updateBytes_entry not implement"); + return NULL; +} + +// Interpreter stub for calling a native method. (asm interpreter) +// This sets up a somewhat different looking stack for calling the +// native method than the typical interpreter frame setup. +address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {SCOPEMARK_NAME(generate_native_entry, _masm); + // determine code generation flags + bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods; + // rsender: sender's sp + // rmethod: Method* + const Register rbx = rmethod; + const Register rcx = rscratch1; + const Register rax = V0; + address entry_point = __ pc(); + + const Address constMethod (rbx, Method::const_offset()); + const Address access_flags (rbx, Method::access_flags_offset()); + const Address size_of_parameters(rcx, ConstMethod:: + size_of_parameters_offset()); + + + // get parameter size (always needed) + __ ldptr(rcx, constMethod); + __ load_unsigned_short(rcx, size_of_parameters); + + // native calls don't need the stack size check since they have no + // expression stack and the arguments are already on the stack and + // we only add a handful of words to the stack + + // rmethod: Method* + // rcx: size of parameters + // rbcp: sender sp //? jzy + //__ pop(rax); // different from x86, sw donot need return address + + // for natives the size of locals is zero + + // compute beginning of parameters (rlocals) + __ lea(rlocals, Address(esp, rcx, Interpreter::stackElementScale(), -wordSize)); + + // add 2 zero-initialized slots for native calls + // initialize result_handler slot + __ push((int) NULL_WORD); + // slot for oop temp + // (static native method holder mirror/jni oop result) + __ push((int) NULL_WORD); + + // initialize fixed part of activation frame + generate_fixed_frame(true); + + // make sure method is native & not abstract +#ifdef ASSERT + __ ldw(rax, access_flags); + { + Label L; + __ testw(rax, JVM_ACC_NATIVE); + __ jcc(Assembler::notZero, L); + __ stop("tried to execute non-native method as native"); + __ bind(L); + } + { + Label L; + __ testw(rax, JVM_ACC_ABSTRACT); + __ jcc(Assembler::zero, L); + __ stop("tried to execute abstract method in interpreter"); + __ bind(L); + } +#endif + + // Since at this point in the method invocation the exception handler + // would try to exit the monitor of synchronized methods which hasn't + // been entered yet, we set the thread local variable + // _do_not_unlock_if_synchronized to true. The remove_activation will + // check this flag. + Register thread1 = rthread; + const Address do_not_unlock_if_synchronized(thread1, + in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); + __ stbool(true, do_not_unlock_if_synchronized); + + // increment invocation count & check for overflow + Label invocation_counter_overflow; + if (inc_counter) { + generate_counter_incr(&invocation_counter_overflow); + } + + Label continue_after_compile; + __ bind(continue_after_compile); + + bang_stack_shadow_pages(true); + + // reset the _do_not_unlock_if_synchronized flag + __ stbool(false, do_not_unlock_if_synchronized); + + // check for synchronized methods + // Must happen AFTER invocation_counter check and stack overflow check, + // so method is not locked if overflows. + if (synchronized) { + lock_method(); + } else { + // no synchronization necessary +#ifdef ASSERT + { + Label L; + __ ldw(rax, access_flags); + __ testw(rax, JVM_ACC_SYNCHRONIZED); + __ jcc(Assembler::zero, L); + __ stop("method needs synchronization"); + __ bind(L); + } +#endif + } + + // start execution +#ifdef ASSERT + { + Label L; + const Address monitor_block_top(rfp, + frame::interpreter_frame_monitor_block_top_offset * wordSize); + __ ldptr(rax, monitor_block_top); + __ cmpptr(rax, esp); + __ jcc(Assembler::equal, L); + __ stop("broken stack frame setup in interpreter"); + __ bind(L); + } +#endif + + // jvmti support + __ notify_method_entry(); + + // work registers + const Register method = rmethod; + const Register thread = rthread; + const Register t = T12; //will use in call instruction in sw + + // allocate space for parameters + __ get_method(method); + __ ldptr(t, Address(method, Method::const_offset())); + __ load_unsigned_short(t, Address(t, ConstMethod::size_of_parameters_offset())); + + __ slll(t, Interpreter::logStackElementSize, t); + + __ subptr(esp, t, esp); + //__ subptr(esp, frame::arg_reg_save_area_bytes, esp); // windows + __ andptr(esp, -16, esp); // must be 16 byte boundary (see amd64 ABI) sw need this ? jzy + // get signature handler + __ block_comment(" get signature handler"); + { + Label L; + __ ldptr(t, Address(method, Method::signature_handler_offset())); + //__ testptr(t, t); + __ jcc(Assembler::notZero, L, t); + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::prepare_native_call), + method); + __ get_method(method); + __ ldptr(t, Address(method, Method::signature_handler_offset())); + __ bind(L); + } + + // call signature handler + assert(InterpreterRuntime::SignatureHandlerGenerator::from() == rlocals, + "adjust this code"); + assert(InterpreterRuntime::SignatureHandlerGenerator::to() == esp, + "adjust this code"); + assert(InterpreterRuntime::SignatureHandlerGenerator::temp() == rscratch1, + "adjust this code"); + + // The generated handlers do not touch RBX (the method oop). + // However, large signatures cannot be cached and are generated + // each time here. The slow-path generator can do a GC on return, + // so we must reload it after the call. + __ call(t); + __ get_method(method); // slow path can do a GC, reload RBX + + + // result handler is in V0 + // set result handler + __ stptr(rax, + Address(rfp, + (frame::interpreter_frame_result_handler_offset) * wordSize)); + __ memb(); + + // pass mirror handle if static call + { + Label L; + __ ldw(t, Address(method, Method::access_flags_offset())); + __ testw(t, JVM_ACC_STATIC); + __ jcc(Assembler::zero, L); + // get mirror + __ load_mirror(t, method, rax); + // copy mirror into activation frame + __ stptr(t, + Address(rfp, frame::interpreter_frame_oop_temp_offset * wordSize)); + // pass handle to mirror + __ lea(c_rarg1, + Address(rfp, frame::interpreter_frame_oop_temp_offset * wordSize)); + __ bind(L); + } + + // get native function entry point + { + Label L; + __ ldptr(rax, Address(method, Method::native_function_offset())); + ExternalAddress unsatisfied(SharedRuntime::native_method_throw_unsatisfied_link_error_entry()); + __ cmpptr(rax, unsatisfied.addr()); + __ jcc(Assembler::notEqual, L); + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::prepare_native_call), + method); + __ get_method(method); + __ ldptr(rax, Address(method, Method::native_function_offset())); + __ bind(L); + } + + // pass JNIEnv + __ lea(c_rarg0, Address(rthread, JavaThread::jni_environment_offset())); + + // Set the last Java PC in the frame anchor to be the return address from + // the call to the native method: this will allow the debugger to + // generate an accurate stack trace. + Label native_return; + __ set_last_Java_frame(esp, rfp, native_return, rscratch1); //TODO:check jzy + + // change thread state +#ifdef ASSERT + { + Label L; + __ ldwu(t, Address(thread, JavaThread::thread_state_offset())); + __ cmpw(t, _thread_in_Java); + __ jcc(Assembler::equal, L); + __ stop("Wrong thread state in native stub"); + __ bind(L); + } +#endif + + // Change state to native + __ mov_immediate32(rscratch1, _thread_in_native); + __ memb(); + __ stw(rscratch1, Address(thread, JavaThread::thread_state_offset())); + + // call native method + __ movl(t, rax); //SW ABI + __ call(t, &native_return);//t == T12 +// __ bind(native_return); + // result potentially in V0 or f0 + + // via _last_native_pc and not via _last_jave_sp + // NOTE: the order of theses push(es) is known to frame::interpreter_frame_result. + // If the order changes or anything else is added to the stack the code in + // interpreter_frame_result will have to be changed. + //FIXME, should modify here + // save return value to keep the value from being destroyed by other calls + __ push_d(FSF); //? jzy + __ push(V0); + + // change thread state + __ memb(); + + __ mov_immediate32(rscratch1, _thread_in_native_trans); + __ stw(rscratch1, Address(thread, JavaThread::thread_state_offset())); + + __ memb(); // Force this write out before the read below + + // check for safepoint operation in progress and/or pending suspend requests + { + Label Continue; + Label slow_path; + __ safepoint_poll(slow_path, rthread, rscratch2, true /* at_return */, true /* acquire */, false /* in_nmethod */); + + __ cmpw(Address(thread, JavaThread::suspend_flags_offset()), R0); + __ jcc(Assembler::equal, Continue); + __ bind(slow_path); + + // Don't use call_VM as it will see a possible pending exception + // and forward it and never return here preventing us from + // clearing _last_native_pc down below. Also can't use + // call_VM_leaf either as it will check to see if r13 & r14 are + // preserved and correspond to the bcp/locals pointers. So we do a + // runtime call by hand. + // + __ movl(c_rarg0, rthread); + __ movl(rheapbase, esp); // remember sp (can only use r12 if not using call_VM) +// __ subptr(esp, frame::arg_reg_save_area_bytes); // windows + __ subptr(esp, 16, esp); // align stack as required by ABI + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans))); + __ movl(esp, rheapbase); // restore sp + __ reinit_heapbase(); + __ bind(Continue); + } + + // change thread state + __ memb(); + + __ mov_immediate32(rscratch1, _thread_in_Java); + __ stw(rscratch1, Address(thread, JavaThread::thread_state_offset())); + + // reset_last_Java_frame + __ reset_last_Java_frame(thread, true); + + if (CheckJNICalls) { + // clear_pending_jni_exception_check + __ stptr(R0, Address(thread, JavaThread::pending_jni_exception_check_fn_offset())); + } + + // reset handle block + __ ldptr(t, Address(thread, JavaThread::active_handles_offset())); + __ stw(R0, Address(t, JNIHandleBlock::top_offset_in_bytes())); + + // If result is an oop unbox and store it in frame where gc will see it + // and result handler will pick it up + + { + Label no_oop; + __ lea(t, ExternalAddress(AbstractInterpreter::result_handler(T_OBJECT))); + __ cmpptr(t, Address(rfp, frame::interpreter_frame_result_handler_offset*wordSize)); + __ jcc(Assembler::notEqual, no_oop); + // retrieve result + __ pop(rax); + // Unbox oop result, e.g. JNIHandles::resolve value. + __ resolve_jobject(rax /* value */, + thread /* thread */, + t /* tmp */); + __ stptr(rax, Address(rfp, frame::interpreter_frame_oop_temp_offset*wordSize)); + // keep stack depth as expected by pushing oop which will eventually be discarded + __ push(rax); + __ BIND(no_oop); + } + { + Label no_reguard; + __ cmpptr(Address(thread, JavaThread::stack_guard_state_offset()), + StackOverflow::stack_guard_yellow_reserved_disabled); + __ jcc(Assembler::notEqual, no_reguard); + + __ pushad(); + __ movl(rheapbase, esp);// + __ andptr(esp, 16, esp); // align stack as required by ABI + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages))); + __ movl(esp, rheapbase); + __ popad(); + //add for compressedoops + __ reinit_heapbase(); + __ BIND(no_reguard); + } + + + // The method register is junk from after the thread_in_native transition + // until here. Also can't call_VM until the bcp has been + // restored. Need bcp for throwing exception below so get it now. + __ get_method(method); + + __ ldptr(rbcp, Address(method, Method::const_offset())); // get ConstMethod* + __ lea(rbcp, Address(rbcp, in_bytes(ConstMethod::codes_offset()))); + // handle exceptions (exception handling will handle unlocking!) + { + Label L; + __ cmpptr(Address(thread, Thread::pending_exception_offset()),R0); + __ jcc(Assembler::zero, L); + // Note: At some point we may want to unify this with the code + // used in call_VM_base(); i.e., we should use the + // StubRoutines::forward_exception code. For now this doesn't work + // here because the rsp is not correctly set at this point. + __ MacroAssembler::call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_pending_exception)); + __ should_not_reach_here("1186"); + __ BIND(L); + } + + // do unlocking if necessary + { + Label L; + __ ldw(t, Address(method, Method::access_flags_offset())); + __ testw(t, JVM_ACC_SYNCHRONIZED); + __ jcc(Assembler::zero, L); + // the code below should be shared with interpreter macro + // assembler implementation + { + Label unlock; + // BasicObjectLock will be first in list, since this is a + // synchronized method. However, need to check that the object + // has not been unlocked by an explicit monitorexit bytecode. + const Address monitor(rfp, + (int)(frame::interpreter_frame_initial_sp_offset * + wordSize - (int)sizeof(BasicObjectLock))); + + const Register regmon = c_rarg1; + + // monitor expect in c_rarg1 for slow unlock path + __ lea(regmon, monitor); // address of first monitor + + __ ldptr(t, Address(regmon, BasicObjectLock::obj_offset_in_bytes())); + __ testptr(t, t); + __ jcc(Assembler::notZero, unlock); + + // Entry already unlocked, need to throw exception + __ MacroAssembler::call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_illegal_monitor_state_exception)); + __ should_not_reach_here("1220"); + + __ BIND(unlock); + __ unlock_object(regmon); + } + __ BIND(L); + } + + // jvmti support + // Note: This must happen _after_ handling/throwing any exceptions since + // the exception handler code notifies the runtime of method exits + // too. If this happens before, method entry/exit notifications are + // not properly paired (was bug - gri 11/22/99). + __ notify_method_exit(vtos, InterpreterMacroAssembler::NotifyJVMTI);// + + // restore potential result in edx:eax, call result handler to + // restore potential result in ST0 & handle result + + __ pop(rax); + __ pop_d(); + __ ldptr(t, Address(rfp, + (frame::interpreter_frame_result_handler_offset) * wordSize)); + __ call(t); + + // remove activation + __ ldptr(t, Address(rfp, + frame::interpreter_frame_sender_sp_offset * + wordSize)); // get sender sp + __ leave(); // remove frame anchor + //__ pop(rdi); // get return address TODO:where set? jzy + __ movl(esp, t); // set sp to sender sp + __ jmp(RA); // return address, set RA in leave() + + if (inc_counter) { + // Handle overflow of counter and compile method + __ bind(invocation_counter_overflow); + generate_counter_overflow(continue_after_compile); + } + + return entry_point; +} + +// Abstract method entry +// Attempt to execute abstract method. Throw exception +address TemplateInterpreterGenerator::generate_abstract_entry(void) {BLOCK_COMMENT("generate_abstract_entry enter"); + // rmethod: methodOop + // V0: receiver (unused) + // rsender : sender 's sp + address entry_point = __ pc(); + + // abstract method entry + __ empty_expression_stack(); + __ restore_bcp(); + __ restore_locals(); + // throw exception + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodErrorWithMethod), rmethod); + // the call_VM checks for exception, so we should never return here. + __ should_not_reach_here("1277"); +BLOCK_COMMENT("generate_abstract_entry leave"); + return entry_point; +} + +// +// Generic interpreted method entry to (asm) interpreter +// +address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) {SCOPEMARK_NAME(generate_normal_entry, _masm) + // determine code generation flags + bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods; + + // rmethod: Method* + // rsender: sender 's sp + //const Register rbx = rmethod; + //const Register rdx = c_rarg5;//special,should be same register in generate_stack_overflow_check + //const Register rcx = c_rarg4; + //const Register rax = V0; + address entry_point = __ pc(); + + const Address constMethod(rbx, Method::const_offset()); + const Address access_flags(rbx, Method::access_flags_offset()); + const Address size_of_parameters(rdx, + ConstMethod::size_of_parameters_offset()); + const Address size_of_locals(rdx, ConstMethod::size_of_locals_offset()); + + + // get parameter size (always needed) + __ ldptr(rdx, constMethod); + __ ldhu(rcx, size_of_parameters); + + // rmethod: Method* + // rcx: size of parameters + // rsender: sender 's sp ,could be different frome sp if we call via c2i + + + __ ldhu(rdx, size_of_locals);// get size of locals in words + __ subl(rdx, rcx, rdx);// rdx = no. of additional locals + + // see if we've got enough room on the stack for locals plus overhead. + generate_stack_overflow_check(); // + +// // get return address +// __ pop(rax);// x86 pushes RA on stack, so pops here, we haven't push + + // compute beginning of parameters (rlocals) + __ lea(rlocals, Address(esp, rcx, Interpreter::stackElementScale(), -wordSize)); + + // rdx - # of additional locals + // allocate space for locals + // explicitly initialize locals + { + Label exit, loop; + __ jcc(Assembler::lessEqual, exit, rdx); // do nothing if rdx <= 0 + __ bind(loop); + __ push((int)NULL_WORD); // initialize local variables + __ decrementl(rdx); // until everything initialized + __ jcc(Assembler::greater, loop, rdx); + __ bind(exit); + } + + // initialize fixed part of activation frame + generate_fixed_frame(false); + + // make sure method is not native & not abstract +#ifdef ASSERT + __ ldw(rax, access_flags); + { + Label L; + __ testw(rax, JVM_ACC_NATIVE); + __ jcc(Assembler::zero, L); + __ stop("tried to execute native method as non-native"); + __ bind(L); + } + { + Label L; + __ testw(rax, JVM_ACC_ABSTRACT); + __ jcc(Assembler::zero, L); + __ stop("tried to execute abstract method in interpreter"); + __ bind(L); + } +#endif + + // Since at this point in the method invocation the exception + // handler would try to exit the monitor of synchronized methods + // which hasn't been entered yet, we set the thread local variable + // _do_not_unlock_if_synchronized to true. The remove_activation + // will check this flag. + + const Register thread = rthread; + const Address do_not_unlock_if_synchronized(thread, + in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); + __ ldi(rscratch1, 1, R0); + __ stb(rscratch1, do_not_unlock_if_synchronized); + + __ profile_parameters_type(rax, rcx, rdx); // + // increment invocation count & check for overflow + Label invocation_counter_overflow; + if (inc_counter) { + generate_counter_incr(&invocation_counter_overflow); + } + + Label continue_after_compile; + __ bind(continue_after_compile); + + // check for synchronized interpreted methods + bang_stack_shadow_pages(false); + + // reset the _do_not_unlock_if_synchronized flag + __ ldi(rscratch1, 0, R0); + __ stb(rscratch1, do_not_unlock_if_synchronized); + + // check for synchronized methods + // Must happen AFTER invocation_counter check and stack overflow check, + // so method is not locked if overflows. + if (synchronized) { + // Allocate monitor and lock method + lock_method(); + } else { + // no synchronization necessary +#ifdef ASSERT + { + Label L; + __ ldw(rax, access_flags); + __ testw(rax, JVM_ACC_SYNCHRONIZED); + __ jcc(Assembler::zero, L); + __ stop("method needs synchronization"); + __ bind(L); + } +#endif + } + + // start execution +#ifdef ASSERT + { + Label L; + const Address monitor_block_top (rfp, + frame::interpreter_frame_monitor_block_top_offset * wordSize); + __ ldptr(rax, monitor_block_top); + __ cmpptr(rax, esp); + __ jcc(Assembler::equal, L); + __ stop("broken stack frame setup in interpreter"); + __ bind(L); + } +#endif + + // jvmti support + __ notify_method_entry(); + __ block_comment("start to execute bytecode"); + __ dispatch_next(vtos); + + // invocation counter overflow + if (inc_counter) { + // Handle overflow of counter and compile method + __ bind(invocation_counter_overflow); + generate_counter_overflow(continue_after_compile); + } + + return entry_point; +} + +//----------------------------------------------------------------------------- +// Exceptions + +void TemplateInterpreterGenerator::generate_throw_exception() {BLOCK_COMMENT("generate_throw_exception enter");//__ warn("TODO:check function right generate_throw_exception jzy "); + // Entry point in previous activation (i.e., if the caller was + // interpreted) + Interpreter::_rethrow_exception_entry = __ pc(); + Register rax = V0; + + // Restore sp to interpreter_frame_last_sp even though we are going + // to empty the expression stack for the exception processing. + __ stptr(R0, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize)); + // rax: exception + // rdx: return address/pc that threw exception + __ restore_bcp(); // r13/rsi points to call/send + __ restore_locals(); + //add for compressedoops + __ reinit_heapbase(); // restore rheapbase as heapbase. + // Entry point for exceptions thrown within interpreter code + Interpreter::_throw_exception_entry = __ pc(); + // expression stack is undefined here + // V0: exception + // rbcp: exception bcp + __ verify_oop(rax); + Register rarg = c_rarg1; + __ movl(rarg, rax); + + // expression stack must be empty before entering the VM in case of + // an exception + __ empty_expression_stack(); + // find exception handler address and preserve exception oop + __ call_VM(rdx, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::exception_handler_for_exception), + rarg); + // V0: exception handler entry point + // rdx: preserved exception oop + // rbcp: bcp for exception handler + + __ push_ptr(rdx);// push exception which is now the only value on the stack + __ jmp(rax); // jump to exception handler (may be _remove_activation_entry!) + + // If the exception is not handled in the current frame the frame is removed and + // the exception is rethrown (i.e. exception continuation is _rethrow_exception). + // + // Note: At this point the bci is still the bxi for the instruction which caused + // the exception and the expression stack is empty. Thus, for any VM calls + // at this point, GC will find a legal oop map (with empty expression stack). + + // In current activation + // V0: exception + // rbcp: exception bcp + + // + // JVMTI PopFrame support + // + + Interpreter::_remove_activation_preserving_args_entry = __ pc(); + __ empty_expression_stack(); + // Set the popframe_processing bit in pending_popframe_condition + // indicating that we are currently handling popframe, so that + // call_VMs that may happen later do not trigger new popframe + // handling cycles. + const Register thread = rthread; + __ ldw(rdx, Address(thread, JavaThread::popframe_condition_offset())); + __ orw(rdx, JavaThread::popframe_processing_bit, rdx); + __ stw(rdx, Address(thread, JavaThread::popframe_condition_offset())); + + { + // Check to see whether we are returning to a deoptimized frame. + // (The PopFrame call ensures that the caller of the popped frame is + // either interpreted or compiled and deoptimizes it if compiled.) + // In this case, we can't call dispatch_next() after the frame is + // popped, but instead must save the incoming arguments and restore + // them after deoptimization has occurred. + // + // Note that we don't compare the return PC against the + // deoptimization blob's unpack entry because of the presence of + // adapter frames in C2. + Label caller_not_deoptimized; + Register rarg = c_rarg1; // + __ ldptr(rarg, Address(rfp, frame::return_addr_offset * wordSize)); + __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, + InterpreterRuntime::interpreter_contains), rarg); + //__ testl(rax, rax); + __ jcc(Assembler::notZero, caller_not_deoptimized, rax); + + // Compute size of arguments for saving when returning to + // deoptimized caller + __ get_method(rax); + __ ldptr(rax, Address(rax, Method::const_offset())); + __ load_unsigned_short(rax, Address(rax, in_bytes(ConstMethod:: + size_of_parameters_offset()))); + __ slll(rax, Interpreter::logStackElementSize, rax); + __ restore_locals(); + __ subptr(rlocals, rax, rlocals); + __ addptr(rlocals, wordSize, rlocals); + // Save these arguments + __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, + Deoptimization:: + popframe_preserve_args), + thread, rax, rlocals); + + __ remove_activation(vtos, rdx, + /* throw_monitor_exception */ false, + /* install_monitor_exception */ false, + /* notify_jvmdi */ false); + + // Inform deoptimization that it is responsible for restoring these arguments + __ movw(rscratch3, JavaThread::popframe_force_deopt_reexecution_bit); + __ stw(rscratch3, Address(thread, JavaThread::popframe_condition_offset())); + // Continue in deoptimization handler + __ jmp(rdx); + + __ bind(caller_not_deoptimized); + } + + __ remove_activation(vtos, rdx, + /* throw_monitor_exception */ false, + /* install_monitor_exception */ false, + /* notify_jvmdi */ false); + + // Finish with popframe handling + // A previous I2C followed by a deoptimization might have moved the + // outgoing arguments further up the stack. PopFrame expects the + // mutations to those outgoing arguments to be preserved and other + // constraints basically require this frame to look exactly as + // though it had previously invoked an interpreted activation with + // no space between the top of the expression stack (current + // last_sp) and the top of stack. Rather than force deopt to + // maintain this kind of invariant all the time we call a small + // fixup routine to move the mutated arguments onto the top of our + // expression stack if necessary. + + __ movl(c_rarg1, esp); + __ ldptr(c_rarg2, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize)); + // PC must point into interpreter here + __ set_last_Java_frame(noreg, rfp, __ pc(), rscratch1); + __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::popframe_move_outgoing_args), rthread, c_rarg1, c_rarg2); + __ reset_last_Java_frame(thread, true); + // Restore the last_sp and null it out + __ ldptr(esp, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize)); + __ stptr(R0, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize)); + + __ restore_bcp(); + __ restore_locals(); + // The method data pointer was incremented already during + // call profiling. We have to restore the mdp for the current bcp. + if (ProfileInterpreter) { + __ set_method_data_pointer_for_bcp(); + } + // Clear the popframe condition flag + __ movw(rscratch1, JavaThread::popframe_inactive); + __ stw(rscratch1, Address(thread, JavaThread::popframe_condition_offset())); +#if INCLUDE_JVMTI + { + Label L_done; + const Register local0 = rlocals; + + __ cmpb(Address(rbcp, 0), Bytecodes::_invokestatic); + __ jcc(Assembler::notEqual, L_done); + + // The member name argument must be restored if _invokestatic is re-executed after a PopFrame call. + // Detect such a case in the InterpreterRuntime function and return the member name argument, or NULL. + + __ get_method(rdx); + __ ldptr(rax, Address(local0, 0)); + __ call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::member_name_arg_or_null), rax, rdx, rbcp); + + //__ testptr(rax, rax); + __ jcc(Assembler::zero, L_done, rax); + + __ stptr(rax, Address(esp, 0)); // store 64bits + __ bind(L_done); + } +#endif // INCLUDE_JVMTI + __ dispatch_next(vtos); + // end of PopFrame support + + Interpreter::_remove_activation_entry = __ pc(); + + // preserve exception over this code sequence + __ pop_ptr(rax); + __ stptr(rax, Address(thread, JavaThread::vm_result_offset())); + // remove the activation (without doing throws on illegalMonitorExceptions) + __ remove_activation(vtos, rdx, false, true, false); + // restore exception + __ get_vm_result(rax, thread); + //__ verify_oop(rax); + + // Inbetween activations - previous activation type unknown yet + // compute continuation point - the continuation point expects + // the following registers set up: + // + // rax: exception + // rscratch1: return address/pc that threw exception + // rsp: expression stack of caller + // rbp: ebp of caller + __ push(rax); // save exception + __ push(rdx); // save return address + __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, + SharedRuntime::exception_handler_for_return_address), + thread, rdx); + __ movl(rbx, rax); // save exception handler + __ pop(rdx); // restore return address TODO:here need check jzy + __ pop(rax); + // Note that an "issuing PC" is actually the next PC after the call + __ jmp(rbx); // jump to exception handler of caller + BLOCK_COMMENT("generate_throw_exception leave"); +} + + +// +// JVMTI ForceEarlyReturn support +// +address TemplateInterpreterGenerator::generate_earlyret_entry_for(TosState state) {BLOCK_COMMENT("generate_earlyret_entry_for enter"); + address entry = __ pc(); + + __ restore_bcp(); + __ restore_locals(); + __ empty_expression_stack(); + __ empty_FPU_stack(); + __ load_earlyret_value(state); + + __ ldptr(rscratch1, Address(rthread, JavaThread::jvmti_thread_state_offset())); + + const Address cond_addr(rscratch1, in_bytes(JvmtiThreadState::earlyret_state_offset())); + // Clear the earlyret state + __ movw(rscratch2, JvmtiThreadState::earlyret_inactive); + __ stw(rscratch2, cond_addr); + __ memb(); + + __ remove_activation(state, rscratch1, + false, /* throw_monitor_exception */ + false, /* install_monitor_exception */ + true); /* notify_jvmdi */ + __ memb(); + __ jmp(rscratch1);BLOCK_COMMENT("generate_earlyret_entry_for leave"); + return entry; +} // end of ForceEarlyReturn support + + +//----------------------------------------------------------------------------- +// Helper for vtos entry point generation + +void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t, + address& bep, + address& cep, + address& sep, + address& aep, + address& iep, + address& lep, + address& fep, + address& dep, + address& vep) {BLOCK_COMMENT("TemplateInterpreterGenerator::set_vtos_entry_points enter"); + assert(t->is_valid() && t->tos_in() == vtos, "illegal template"); + Label L; + fep = __ pc(); __ push(ftos); __ beq_l(R0, L); + dep = __ pc(); __ push(dtos); __ beq_l(R0, L); + lep = __ pc(); __ push(ltos); __ beq_l(R0, L); + aep =__ pc(); __ push(atos); __ beq_l(R0, L); + bep = cep = sep = iep = __ pc(); __ push(itos); + vep = __ pc(); + __ BIND(L); // fall through + generate_and_dispatch(t);BLOCK_COMMENT("TemplateInterpreterGenerator::set_vtos_entry_points leave"); +} + +//----------------------------------------------------------------------------- + +// Non-product code +#ifndef PRODUCT +address TemplateInterpreterGenerator::generate_trace_code(TosState state) {//__ warn("TODO:check function right generate_trace_code jzy "); + address entry = __ pc(); + + // prepare expression stack + __ push(state); // save tosca + // tos & tos2 + // trace_bytecode need actually 4 args, the last two is tos&tos2 + // this work fine for x86. but mips o32 call convention will store A2-A3 + // to the stack position it think is the tos&tos2 + // when the expression stack have no more than 2 data, error occur. + __ ldptr(A2, Address(esp, 0));// + __ ldptr(A3, Address(esp, 1 * wordSize)); + // pass arguments & call tracer + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::trace_bytecode), RA, A2, A3); + __ movl(RA, V0); // make sure return address is not destroyed by pop(state) + + // restore expression stack + __ pop(state); // restore tosca + + // return + __ ret_sw(); + + return entry; +} + +void TemplateInterpreterGenerator::count_bytecode() {//__ warn("TODO:check function right count_bytecode jzy "); + __ incrementw(ExternalAddress((address) &BytecodeCounter::_counter_value)); +} + +void TemplateInterpreterGenerator::histogram_bytecode(Template* t) {//__ warn("TODO:check function right histogram_bytecode jzy "); + __ incrementw(ExternalAddress((address) &BytecodeHistogram::_counters[t->bytecode()])); +} + +void TemplateInterpreterGenerator::histogram_bytecode_pair(Template* t) { + const Register rbx = T11; + + __ movw(rbx, ExternalAddress((address) &BytecodePairHistogram::_index)); + __ srll(rbx, BytecodePairHistogram::log2_number_of_codes, rbx); + __ orw(rbx, + ((int) t->bytecode()) << + BytecodePairHistogram::log2_number_of_codes, rbx); + __ movw(ExternalAddress((address) &BytecodePairHistogram::_index), rbx); + __ lea(rscratch1, ExternalAddress((address) BytecodePairHistogram::_counters)); + __ incrementw(Address(rscratch1, rbx, Address::times_4)); +} + + +void TemplateInterpreterGenerator::trace_bytecode(Template* t) {SCOPEMARK_NAME(trace_bytecode,_masm) + // Call a little run-time stub to avoid blow-up for each bytecode. + // The run-time runtime saves the right registers, depending on + // the tosca in-state for the given template. + + assert(Interpreter::trace_code(t->tos_in()) != NULL, + "entry must have been generated"); + + __ movl(rheapbase, esp); // remember sp (can only use r12 if not using call_VM) + __ andptr(esp, -16, esp); // align stack as required by ABI + __ call(RuntimeAddress(Interpreter::trace_code(t->tos_in()))); + __ movl(esp, rheapbase); // restore sp + __ reinit_heapbase(); +} + + +void TemplateInterpreterGenerator::stop_interpreter_at() {SCOPEMARK_NAME(stop_interpreter_at,_masm) + Label L; + __ mov_immediate32(rscratch1, (int)StopInterpreterAt);//TODO:current cmpw just compare 16bit jzy + __ cmpw(ExternalAddress((address) &BytecodeCounter::_counter_value), rscratch1); + __ jcc(Assembler::notEqual, L); +// __ mov(GP, 1);// +// __ bne(GP, -1); + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint))); + __ bind(L); +} + +#endif // !PRODUCT + +address TemplateInterpreterGenerator::generate_slow_signature_handler() {SCOPEMARK_NAME(generate_slow_signature_handler,_masm) + address entry = __ pc(); + + const Register rbx = rmethod; + const Register r14 = rlocals; + + // rbx: method + // r14: pointer to locals + // c_rarg3: first stack arg - wordSize + __ movl(c_rarg3, esp); + __ push(RA); //position is subtle, you can move it's postion if you know its influence + __ subptr(esp, 6 * wordSize, esp); + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::slow_signature_handler), + rbx, r14, c_rarg3); + + // rax: result handler + + // Stack layout: + // -------- 0 <- sp + // -------- 1 + // -------- 2 + // -------- 3 + // -------- 4 + // -------- 1 float/double identifiers + // -------- RA + // -------- c_rarg3 + + // rsp: 5 integer or float args (if static first is unused) + // 1 float/double identifiers + // return address + // stack args + // garbage + // expression stack bottom + // bcp (NULL) + // ... + + // Do FP first so we can use c_rarg3 as temp + Register identifier = T12; + __ ldw(identifier, Address(esp, 5 * wordSize)); // float/double identifiers + int floatreg_start_index = FloatRegisterImpl::float_arg_base + 1; //because a0(16) must be env in JNI + for ( int i= 0; i < Argument::n_int_register_parameters_c-1; i++ ) { + FloatRegister floatreg = as_FloatRegister(i + floatreg_start_index); + Label isfloatordouble, isdouble, next; + + __ testw(identifier, 1 << (i*2)); // Float or Double? + __ jcc(Assembler::notZero, isfloatordouble); + + // Do Int register here + switch ( i ) { + case 0: + __ ldptr(rscratch1, Address(esp, 0)); + __ ldw(rscratch2, Address(rbx, Method::access_flags_offset())); + __ testw(rscratch2, JVM_ACC_STATIC, rcc); + assert_different_registers(rscratch1, rcc); + __ cmove(Assembler::zero, c_rarg1, rscratch1, c_rarg1); + break; + case 1: + __ ldptr(c_rarg2, Address(esp, wordSize)); + break; + case 2: + __ ldptr(c_rarg3, Address(esp, 2 * wordSize)); + break; + case 3: + __ ldptr(c_rarg4, Address(esp, 3 * wordSize)); + break; + case 4: + __ ldptr(c_rarg5, Address(esp, 4 * wordSize)); + break; + default: + break; + } + + __ jmp (next); + + __ bind(isfloatordouble); + __ testw(identifier, 1 << ((i*2)+1)); // Double? + __ jcc(Assembler::notZero, isdouble); + + // Do Float Here + __ load_float(floatreg, Address(esp, i * wordSize)); + __ jmp(next); + + // Do Double here + __ bind(isdouble); + __ load_double(floatreg, Address(esp, i * wordSize)); + + __ bind(next); + } + + + // restore rsp + __ addptr(esp, 6 * wordSize, esp); + + // Restore RA + __ pop(RA); + + __ ret_sw(); + + return entry; +} + +address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) {BLOCK_COMMENT("generate_math_entry enter"); + // rmethod: methodOop + // V0: scratrch + // rsender: send 's sp + + if (!InlineIntrinsics) return NULL; // Generate a vanilla entry + + address entry_point = __ pc(); + + // These don't need a safepoint check because they aren't virtually + // callable. We won't enter these intrinsics from compiled code. + // If in the future we added an intrinsic which was virtually callable + // we'd have to worry about how to safepoint so that this code is used. + + // mathematical functions inlined by compiler + // (interpreter must provide identical implementation + // in order to avoid monotonicity bugs when switching + // from interpreter to compiler in the middle of some + // computation) + // + // stack: [ lo(arg) ] <-- sp + // [ hi(arg) ] + // + + __ subl(esp, 2 * wordSize, esp); + __ stptr(RA, Address(esp, wordSize)); + __ stptr(rfp, Address(esp, 0)); + __ ldi(rfp, 2 * wordSize, esp); + + // [ fp ] <-- sp + // [ ra ] + // [ lo ] <-- fp + // [ hi ] + //FIXME, need consider this + + if (kind == Interpreter::java_lang_math_sqrt) { + __ fldd(F16, 2 * wordSize, esp); + __ sqrt_d(f0, F16); + } else if (kind == Interpreter::java_lang_math_exp) { + __ fldd(F16, 2 * wordSize, esp); + __ subl(esp, wordSize * 2, esp); + if (StubRoutines::dexp() != NULL) { + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dexp()))); + } else { + __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dexp)); + } + __ addl(esp, wordSize * 2, esp); + } else if (kind == Interpreter::java_lang_math_log) { + __ fldd(F16, 2 * wordSize, esp); + __ subl(esp, wordSize * 2, esp); + if (StubRoutines::dlog() != NULL) { + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog()))); + } else { + __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dlog)); + } + __ addl(esp, wordSize * 2, esp); + } else if (kind == Interpreter::java_lang_math_log10) { + __ fldd(F16, 2 * wordSize, esp); + __ subl(esp, wordSize * 2, esp); + if (StubRoutines::dlog10() != NULL) { + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog10()))); + } else { + __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10)); + } + __ addl(esp, wordSize * 2, esp); + } else if (kind == Interpreter::java_lang_math_sin) { + __ fldd(F16, 2 * wordSize, esp); + __ subl(esp, wordSize * 2, esp); + if (StubRoutines::dsin() != NULL) { + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dsin()))); + } else { + __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dsin)); + } + __ addl(esp, wordSize * 2, esp); + } else if (kind == Interpreter::java_lang_math_cos) { + __ fldd(F16, 2 * wordSize, esp); + __ subl(esp, wordSize * 2, esp); + if (StubRoutines::dcos() != NULL) { + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dcos()))); + } else { + __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dcos)); + } + __ addl(esp, wordSize * 2, esp); + } else if (kind == Interpreter::java_lang_math_pow) { + __ fldd(F17, 2 * wordSize, esp); + __ fldd(F16, 4 * wordSize, esp); + __ subl(esp, wordSize * 2, esp); + if (StubRoutines::dpow() != NULL) { + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dpow()))); + } else { + __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dpow)); + } + __ addiu(esp, wordSize * 2, esp); + } else if (kind == Interpreter::java_lang_math_tan) { + __ fldd(F16, 2 * wordSize, esp); + __ subl(esp, wordSize * 2, esp); + if (StubRoutines::dtan() != NULL) { + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dtan()))); + } else { + __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dtan)); + } + __ addiu(esp, wordSize * 2, esp); + } else { + __ fldd(F16, 2 * wordSize, esp); + switch (kind) { + case Interpreter::java_lang_math_abs: + __ abs_d(f0, F16); + break; + default: + ShouldNotReachHere(); + } + + } + + // must maintain return value in f0:f1 + __ ldptr(RA, Address(rfp, (-1) * wordSize)); + //FIXME + __ movl(esp, rsender); + __ ldptr(rfp, Address(rfp, (-2) * wordSize)); + __ ret_sw();BLOCK_COMMENT("generate_math_entry leave"); + return entry_point; +} diff --git a/src/hotspot/cpu/sw64/templateTable_sw64.cpp b/src/hotspot/cpu/sw64/templateTable_sw64.cpp new file mode 100644 index 00000000000..a1be0b3f5e2 --- /dev/null +++ b/src/hotspot/cpu/sw64/templateTable_sw64.cpp @@ -0,0 +1,4096 @@ +/* + * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "compiler/disassembler.hpp" +#include "gc/shared/collectedHeap.hpp" +#include "gc/shared/tlab_globals.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "interpreter/interp_masm.hpp" +#include "interpreter/templateTable.hpp" +#include "memory/universe.hpp" +#include "oops/methodData.hpp" +#include "oops/objArrayKlass.hpp" +#include "oops/oop.inline.hpp" +#include "prims/jvmtiExport.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/safepointMechanism.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/synchronizer.hpp" +#include "utilities/macros.hpp" + +#define __ Disassembler::hook(__FILE__, __LINE__, _masm)-> +#ifdef PRODUCT +#define BLOCK_COMMENT(str) /* nothing */ +#else +#define BLOCK_COMMENT(str) { char line[1024];sprintf(line,"%s:%s:%d",str,__FILE__, __LINE__); __ block_comment(line);} +#endif +#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") + +// Address Computation: local variables +static inline Address iaddress(int n) { + return Address(rlocals, Interpreter::local_offset_in_bytes(n)); +} + +static inline Address laddress(int n) { + return iaddress(n + 1); +} + +static inline Address faddress(int n) { + return iaddress(n); +} + +static inline Address daddress(int n) { + return laddress(n); +} + +static inline Address aaddress(int n) { + return iaddress(n); +} + +static inline Address iaddress(Register r) { + return Address(rlocals, r, Address::times_ptr); +} + +static inline Address laddress(Register r) { + return Address(rlocals, r, Address::times_ptr, Interpreter::local_offset_in_bytes(1)); +} + +static inline Address faddress(Register r) { + return iaddress(r); +} + +static inline Address daddress(Register r) { + return laddress(r); +} + +static inline Address aaddress(Register r) { + return iaddress(r); +} + + +// expression stack +// (Note: Must not use symmetric equivalents at_rsp_m1/2 since they store +// data beyond the rsp which is potentially unsafe in an MT environment; +// an interrupt may overwrite that data.) +static inline Address at_rsp() { + return Address(esp, 0); +} + +// At top of Java expression stack which may be different than esp(). It +// isn't for category 1 objects. +static inline Address at_tos () { + return Address(esp, Interpreter::expr_offset_in_bytes(0)); +} + +static inline Address at_tos_p1() { + return Address(esp, Interpreter::expr_offset_in_bytes(1)); +} + +static inline Address at_tos_p2() { + return Address(esp, Interpreter::expr_offset_in_bytes(2)); +} + +// Condition conversion +static Assembler::Condition j_not(TemplateTable::Condition cc) { + switch (cc) { + case TemplateTable::equal : return Assembler::notEqual; + case TemplateTable::not_equal : return Assembler::equal; + case TemplateTable::less : return Assembler::greaterEqual; + case TemplateTable::less_equal : return Assembler::greater; + case TemplateTable::greater : return Assembler::lessEqual; + case TemplateTable::greater_equal: return Assembler::less; + } + ShouldNotReachHere(); + return Assembler::zero; +} + + + +// Miscelaneous helper routines +// Store an oop (or NULL) at the address described by obj. +// If val == noreg this means store a NULL + + +static void do_oop_store(InterpreterMacroAssembler* _masm, + Address dst, + Register val, + DecoratorSet decorators = 0) {SCOPEMARK_NAME(do_oop_store, _masm) + assert(val == noreg || val == V0, "parameter is just for looks"); + __ store_heap_oop(dst, val, T9, T11, decorators);//It's OK to use register like this? Can use rscratch* to replace? TODO:check jzy +} + +static void do_oop_load(InterpreterMacroAssembler* _masm, + Address src, + Register dst, + DecoratorSet decorators = 0) { + __ load_heap_oop(dst, src, T9, T11, decorators); +} + +Address TemplateTable::at_bcp(int offset) { + assert(_desc->uses_bcp(), "inconsistent uses_bcp information"); + return Address(rbcp, offset); +} + + +void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg, + Register temp_reg, bool load_bc_into_bc_reg/*=true*/, + int byte_no) {SCOPEMARK_NAME(patch_bytecode, _masm) + if (!RewriteBytecodes) return; + Label L_patch_done; + + switch (bc) { + case Bytecodes::_fast_aputfield: + case Bytecodes::_fast_bputfield: + case Bytecodes::_fast_zputfield: + case Bytecodes::_fast_cputfield: + case Bytecodes::_fast_dputfield: + case Bytecodes::_fast_fputfield: + case Bytecodes::_fast_iputfield: + case Bytecodes::_fast_lputfield: + case Bytecodes::_fast_sputfield: + { + // We skip bytecode quickening for putfield instructions when + // the put_code written to the constant pool cache is zero. + // This is required so that every execution of this instruction + // calls out to InterpreterRuntime::resolve_get_put to do + // additional, required work. + assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range"); + assert(load_bc_into_bc_reg, "we use bc_reg as temp"); + __ get_cache_and_index_and_bytecode_at_bcp(temp_reg, bc_reg, temp_reg, byte_no, 1); + __ movw(bc_reg, bc); + __ cmpw(temp_reg, (int) 0); + __ jcc(Assembler::zero, L_patch_done); // don't patch + } + break; + default: + assert(byte_no == -1, "sanity"); + // the pair bytecodes have already done the load. + if (load_bc_into_bc_reg) { + __ movw(bc_reg, bc); + } + } + + if (JvmtiExport::can_post_breakpoint()) { + Label L_fast_patch; + // if a breakpoint is present we can't rewrite the stream directly + __ ldbu(temp_reg, at_bcp(0)); + __ cmpw(temp_reg, Bytecodes::_breakpoint); + __ jcc(Assembler::notEqual, L_fast_patch); + __ get_method(temp_reg); + // Let breakpoint table handling rewrite to quicker bytecode + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::set_original_bytecode_at), temp_reg, rbcp, bc_reg); + __ jmp(L_patch_done); + __ bind(L_fast_patch); + } + +#ifdef ASSERT + Label L_okay; + __ load_unsigned_byte(temp_reg, at_bcp(0)); + __ cmpw(temp_reg, (int) Bytecodes::java_code(bc)); + __ jcc(Assembler::equal, L_okay); + __ cmpw(temp_reg, bc_reg); + __ jcc(Assembler::equal, L_okay); + __ stop("patching the wrong bytecode"); + __ bind(L_okay); +#endif + + // patch bytecode + __ stb(bc_reg, at_bcp(0)); + __ bind(L_patch_done); +} +// Individual instructions + + +void TemplateTable::nop() { + transition(vtos, vtos); + // nothing to do +} + +void TemplateTable::shouldnotreachhere() { + transition(vtos, vtos); + __ stop("shouldnotreachhere bytecode"); +} + +void TemplateTable::aconst_null() { + transition(vtos, atos); + __ bis(R0, R0, FSR); +} + +void TemplateTable::iconst(int value) { + transition(vtos, itos); + if (value == 0) { + __ bis(R0, R0, FSR); + } else { + __ ldi(FSR, value, R0); + } +} + +void TemplateTable::lconst(int value) { + transition(vtos, ltos); + if (value == 0) { + __ bis(R0, R0, FSR); + } else { + __ ldi(FSR, value, R0); + } +} + + + +void TemplateTable::fconst(int value) { + transition(vtos, ftos); + static float _f1 = 1.0, _f2 = 2.0; + float* p; + switch( value ) { + default: ShouldNotReachHere(); + case 0: __ fcpys(f31, f31, FSF); return; + case 1: p = &_f1; break; + case 2: p = &_f2; break; + } + __ load_float(FSF, ExternalAddress((address)p)); +} + +void TemplateTable::dconst(int value) { + transition(vtos, dtos); + static double _d1 = 1.0; + double* p; + switch( value ) { + default: ShouldNotReachHere(); + case 0: __ fcpys(f31, f31, FSF); return; + case 1: p = &_d1; break; + } + __ load_double(FSF, ExternalAddress((address)p)); +} + +void TemplateTable::bipush() { + transition(vtos, itos); + __ load_signed_byte64(FSR, at_bcp(1)); +} + +void TemplateTable::sipush() {SCOPEMARK + transition(vtos, itos); +// the following code is an optimization on sw64 since +// we dont have unaligned load insn + __ load_signed_byte64(FSR, at_bcp(1)); + __ ldbu(AT, at_bcp(2)); + __ slll(FSR, 8, FSR); + __ bis(FSR, AT, FSR); + __ sexth(FSR, FSR); +} + +void TemplateTable::ldc(bool wide) {SCOPEMARK + transition(vtos, vtos); + Register rarg = c_rarg1; + Label call_ldc, notFloat, notClass, notInt, Done; + + if (wide) { + __ get_unsigned_2_byte_index_at_bcp(T2, 1); + } else { + __ load_unsigned_byte(T2, at_bcp(1)); + } + + __ get_cpool_and_tags(T3, T1); + const int base_offset = ConstantPool::header_size() * wordSize; + const int tags_offset = Array::base_offset_in_bytes(); + + // get type + __ lea(T4, Address(T1, T2, Address::times_1, tags_offset)); + __ memb(); + __ ldbu(T4, Address(T4, 0)); + __ memb(); + + // unresolved class - get the resolved class + __ cmpw(T4, JVM_CONSTANT_UnresolvedClass); + __ jcc(Assembler::equal, call_ldc); + + // unresolved class in error state - call into runtime to throw the error + // from the first resolution attempt + __ cmpw(T4, JVM_CONSTANT_UnresolvedClassInError); + __ jcc(Assembler::equal, call_ldc); + + // resolved class - need to call vm to get java mirror of the class + __ cmpw(T4, JVM_CONSTANT_Class); + __ jcc(Assembler::notEqual, notClass); + + __ bind(call_ldc); + + __ movw(rarg, wide); + call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), rarg); + + __ push(atos); + __ jmp(Done); + + __ bind(notClass); + __ cmpw(T4, JVM_CONSTANT_Float); + __ jcc(Assembler::notEqual, notFloat); + + // ftos + __ load_float(FSF, Address(T3, T2, Address::times_ptr, base_offset)); + __ push(ftos); + __ jmp(Done); + + __ bind(notFloat); + __ cmpw(T4, JVM_CONSTANT_Integer); + __ jcc(Assembler::notEqual, notInt); + + // itos + __ ldws(FSR, Address(T3, T2, Address::times_ptr, base_offset)); + __ push(itos); + __ jmp(Done); + + // assume the tag is for condy; if not, the VM runtime will tell us + __ bind(notInt); + condy_helper(Done); + + __ bind(Done); +} + +// Fast path for caching oop constants. +void TemplateTable::fast_aldc(bool wide) {SCOPEMARK + transition(vtos, atos); + + Register result = FSR; + Register tmp = SSR; + Register rarg = c_rarg1; + int index_size = wide ? sizeof(u2) : sizeof(u1); + + Label resolved; + + // We are resolved if the resolved reference cache entry contains a + // non-null object (String, MethodType, etc.) + assert_different_registers(result, tmp); + __ get_cache_index_at_bcp(tmp, 1, index_size); + __ load_resolved_reference_at_index(result, tmp); + __ testptr(result, result); + __ jcc(Assembler::notZero, resolved); + + address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc); + + // first time invocation - must resolve first + __ movw(rarg, (int)bytecode()); + __ call_VM(result, entry, rarg); + __ bind(resolved); + + { // Check for the null sentinel. + // If we just called the VM, it already did the mapping for us, + // but it's harmless to retry. + Label notNull; + ExternalAddress null_sentinel((address)Universe::the_null_sentinel_addr()); + __ ldptr(tmp, null_sentinel); + __ resolve_oop_handle(tmp); + __ cmpoop(tmp, result); + __ jcc(Assembler::notEqual, notNull); + __ bis(R0, R0, result); // NULL object reference + __ bind(notNull); + } + + if (VerifyOops) { + __ verify_oop(result); + } +} + +void TemplateTable::ldc2_w() {SCOPEMARK + transition(vtos, vtos); + Label notDouble, notLong, Done; + __ get_unsigned_2_byte_index_at_bcp(T2, 1); // get index in cpool + + __ get_cpool_and_tags(T3, T1); + const int base_offset = ConstantPool::header_size() * wordSize; + const int tags_offset = Array::base_offset_in_bytes(); + + // get type + __ lea(AT, Address(T1, T2, Address::times_1, tags_offset)); + __ memb(); + __ ldbu(AT, Address(AT, 0)); + __ memb(); + __ cmpw(AT, JVM_CONSTANT_Double); + __ jcc(Assembler::notEqual, notDouble); + + // dtos + __ load_double(FSF, Address(T3, T2, Address::times_ptr, base_offset)); + __ push(dtos); + + __ jmp(Done); + __ bind(notDouble); + __ cmpw(AT, JVM_CONSTANT_Long); + __ jcc(Assembler::notEqual, notLong); + + // ltos + __ ldptr(FSR, Address(T3, T2, Address::times_ptr, base_offset + 0 * wordSize)); + __ push(ltos); + __ jmp(Done); + + __ bind(notLong); + condy_helper(Done); + + __ bind(Done); +} + +void TemplateTable::condy_helper(Label& Done) {SCOPEMARK + const Register obj = T0; + const Register off = T1; + const Register flags = T2; + const Register rarg = A1; + __ movw(rarg, (int)bytecode()); + call_VM(obj, CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc), rarg); + __ get_vm_result_2(flags, rthread); + // VMr = obj = base address to find primitive value to push + // VMr2 = flags = (tos, off) using format of CPCE::_flags + __ andw(flags, ConstantPoolCacheEntry::field_index_mask, off); + const Address field(obj, off, Address::times_1, 0*wordSize); + + // What sort of thing are we loading? + __ srll(flags, ConstantPoolCacheEntry::tos_state_shift, flags); + __ andw(flags, ConstantPoolCacheEntry::tos_state_mask, flags); + + switch (bytecode()) { + case Bytecodes::_ldc: + case Bytecodes::_ldc_w: + { + // tos in (itos, ftos, stos, btos, ctos, ztos) + Label notInt, notFloat, notShort, notByte, notChar, notBool; + __ cmpw(flags, itos); + __ jcc(Assembler::notEqual, notInt); + // itos + __ ldws(FSR, field); + __ push(itos); + __ jmp(Done); + + __ bind(notInt); + __ cmpw(flags, ftos); + __ jcc(Assembler::notEqual, notFloat); + // ftos + __ load_float(FSF, field); + __ push(ftos); + __ jmp(Done); + + __ bind(notFloat); + __ cmpw(flags, stos); + __ jcc(Assembler::notEqual, notShort); + // stos + __ load_signed_short(FSR, field); + __ push(stos); + __ jmp(Done); + + __ bind(notShort); + __ cmpw(flags, btos); + __ jcc(Assembler::notEqual, notByte); + // btos + __ load_signed_byte64(FSR, field); + __ push(btos); + __ jmp(Done); + + __ bind(notByte); + __ cmpw(flags, ctos); + __ jcc(Assembler::notEqual, notChar); + // ctos + __ load_unsigned_short(FSR, field); + __ push(ctos); + __ jmp(Done); + + __ bind(notChar); + __ cmpw(flags, ztos); + __ jcc(Assembler::notEqual, notBool); + // ztos + __ load_signed_byte64(FSR, field); + __ push(ztos); + __ jmp(Done); + + __ bind(notBool); + break; + } + + case Bytecodes::_ldc2_w: + { + Label notLong, notDouble; + __ cmpw(flags, ltos); + __ jcc(Assembler::notEqual, notLong); + // ltos + // Loading high word first because movptr clobbers rax + __ ldptr(FSR, field); + __ push(ltos); + __ jmp(Done); + + __ bind(notLong); + __ cmpw(flags, dtos); + __ jcc(Assembler::notEqual, notDouble); + // dtos + __ load_double(FSF, field); + __ push(dtos); + __ jmp(Done); + + __ bind(notDouble); + break; + } + + default: + ShouldNotReachHere(); + } + + __ stop("bad ldc/condy"); +} + +void TemplateTable::locals_index(Register reg, int offset) {SCOPEMARK + __ load_unsigned_byte(reg, at_bcp(offset)); + __ subl(R0, reg, reg); +} + +void TemplateTable::iload() {SCOPEMARK + iload_internal(); +} + +void TemplateTable::nofast_iload() { + iload_internal(may_not_rewrite); +} + +void TemplateTable::iload_internal(RewriteControl rc) {SCOPEMARK_NAME(iload_internal, _masm) + transition(vtos, itos); + if (RewriteFrequentPairs && rc == may_rewrite) { + Label rewrite, done; + const Register bc = c_rarg3; + + // get next byte + __ load_unsigned_byte(T2, + at_bcp(Bytecodes::length_for(Bytecodes::_iload))); + // if _iload, wait to rewrite to iload2. We only want to rewrite the + // last two iloads in a pair. Comparing against fast_iload means that + // the next bytecode is neither an iload or a caload, and therefore + // an iload pair. + __ cmpw(T2, Bytecodes::_iload); + __ jcc(Assembler::equal, done); + + __ cmpw(T2, Bytecodes::_fast_iload); + __ movw(bc, Bytecodes::_fast_iload2); + + __ jcc(Assembler::equal, rewrite); + + // if _caload, rewrite to fast_icaload + __ cmpw(T2, Bytecodes::_caload); + __ movw(bc, Bytecodes::_fast_icaload); + __ jcc(Assembler::equal, rewrite); + + // rewrite so iload doesn't check again. + __ movw(bc, Bytecodes::_fast_iload); + + // rewrite + // bc: fast bytecode + __ bind(rewrite); + patch_bytecode(Bytecodes::_iload, bc, T2, false); + __ bind(done); + } + + // Get the local value into tos + locals_index(T2); + __ ldws(FSR, iaddress(T2)); +} + +void TemplateTable::fast_iload2() {SCOPEMARK + transition(vtos, itos); + locals_index(T2); + __ ldws(FSR, iaddress(T2)); + __ push(itos); + locals_index(T2, 3); + __ ldws(FSR, iaddress(T2)); +} + +void TemplateTable::fast_iload() {SCOPEMARK + transition(vtos, itos); + locals_index(T2); + __ ldws(FSR, iaddress(T2)); +} + +void TemplateTable::lload() {SCOPEMARK + transition(vtos, ltos); + locals_index(T2); + __ ldptr(FSR, laddress(T2)); +} + +void TemplateTable::fload() {SCOPEMARK + transition(vtos, ftos); + locals_index(T2); + __ load_float(FSF, faddress(T2)); +} + +void TemplateTable::dload() {SCOPEMARK + transition(vtos, dtos); + locals_index(T2); + __ load_double(FSF, daddress(T2)); +} + +void TemplateTable::aload() {SCOPEMARK + transition(vtos, atos); + locals_index(T2); + __ ldptr(FSR, aaddress(T2)); +} + +void TemplateTable::locals_index_wide(Register reg) {SCOPEMARK + __ ldhu_unaligned_be(reg, at_bcp(2)); + __ subl(R0, reg, reg); +} + +void TemplateTable::wide_iload() { + transition(vtos, itos); + locals_index_wide(T2); + __ ldws(FSR, iaddress(T2)); +} + +void TemplateTable::wide_lload() { + transition(vtos, ltos); + locals_index_wide(T2); + __ ldptr(FSR, laddress(T2)); +} + +void TemplateTable::wide_fload() { + transition(vtos, ftos); + locals_index_wide(T2); + __ load_float(FSF, faddress(T2)); +} + +void TemplateTable::wide_dload() { + transition(vtos, dtos); + locals_index_wide(T2); + __ load_double(FSF, daddress(T2)); +} + +void TemplateTable::wide_aload() { + transition(vtos, atos); + locals_index_wide(T2); + __ ldptr(FSR, aaddress(T2)); +} + +void TemplateTable::index_check(Register array, Register index) { + // Pop ptr into array + __ pop_ptr(array); + index_check_without_pop(array, index); +} + +void TemplateTable::index_check_without_pop(Register array, Register index) {SCOPEMARK_NAME(index_check_without_pop, _masm) + // destroys A2 + // check array + __ null_check(array, arrayOopDesc::length_offset_in_bytes()); + + // check index + __ cmpwu(index, Address(array, arrayOopDesc::length_offset_in_bytes())); + //throw_ArrayIndexOutOfBoundsException assume abberrant index in c_rarg2, should extend c_rarg2 as valid value because of negativen number + if (c_rarg2 != index) __ movl(c_rarg2, index); + + Label skip; + __ jcc(Assembler::below, skip); + // Pass array to create more detailed exceptions. + __ movl(c_rarg1, array); + __ jump(ExternalAddress(Interpreter::_throw_ArrayIndexOutOfBoundsException_entry)); + __ bind(skip); +} + +void TemplateTable::iaload() { + transition(itos, itos); + // FSR: index + // SSR: array + index_check(SSR, FSR); + __ access_load_at(T_INT, IN_HEAP | IS_ARRAY, FSR, + Address(SSR, FSR, Address::times_4, + arrayOopDesc::base_offset_in_bytes(T_INT)), + noreg, noreg); +} + +void TemplateTable::laload() { + transition(itos, ltos); + // FSR: index + // SSR: array + index_check(SSR, FSR); + __ access_load_at(T_LONG, IN_HEAP | IS_ARRAY, noreg /* ltos */, + Address(SSR, FSR, Address::times_8, + arrayOopDesc::base_offset_in_bytes(T_LONG)), + noreg, noreg); +} + + + +void TemplateTable::faload() { + transition(itos, ftos); + // FSR: index + // SSR: array + index_check(SSR, FSR); + __ access_load_at(T_FLOAT, IN_HEAP | IS_ARRAY, noreg /* ftos */, + Address(SSR, FSR, + Address::times_4, + arrayOopDesc::base_offset_in_bytes(T_FLOAT)), + noreg, noreg); +} + +void TemplateTable::daload() { + transition(itos, dtos); + // FSR: index + // SSR: array + index_check(SSR, FSR); + __ access_load_at(T_DOUBLE, IN_HEAP | IS_ARRAY, noreg /* dtos */, + Address(SSR, FSR, + Address::times_8, + arrayOopDesc::base_offset_in_bytes(T_DOUBLE)), + noreg, noreg); +} + +void TemplateTable::aaload() { + transition(itos, atos); + // FSR: index + // SSR: array + index_check(SSR, FSR); + do_oop_load(_masm, + Address(SSR, FSR, + UseCompressedOops ? Address::times_4 : Address::times_ptr, + arrayOopDesc::base_offset_in_bytes(T_OBJECT)), + FSR, + IS_ARRAY); +} + +void TemplateTable::baload() { + transition(itos, itos); + // FSR: index + // SSR: array + index_check(SSR, FSR); + __ access_load_at(T_BYTE, IN_HEAP | IS_ARRAY, FSR, + Address(SSR, FSR, Address::times_1, arrayOopDesc::base_offset_in_bytes(T_BYTE)), + noreg, noreg); +} + +void TemplateTable::caload() { + transition(itos, itos); + // FSR: index + // SSR: array + index_check(SSR, FSR); + __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, FSR, + Address(SSR, FSR, Address::times_2, arrayOopDesc::base_offset_in_bytes(T_CHAR)), + noreg, noreg); +} + +// iload followed by caload frequent pair +void TemplateTable::fast_icaload() { + transition(vtos, itos); + // load index out of locals + locals_index(T2); + __ ldws(FSR, iaddress(T2)); + + // FSR: index + // SSR: array + index_check(SSR, FSR); + __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, FSR, + Address(SSR, FSR, Address::times_2, arrayOopDesc::base_offset_in_bytes(T_CHAR)), + noreg, noreg); +} + + +void TemplateTable::saload() { + transition(itos, itos); + // FSR: index + // SSR: array + index_check(SSR, FSR); + __ access_load_at(T_SHORT, IN_HEAP | IS_ARRAY, FSR, + Address(SSR, FSR, Address::times_2, arrayOopDesc::base_offset_in_bytes(T_SHORT)), + noreg, noreg); +} + +void TemplateTable::iload(int n) { + transition(vtos, itos); + __ ldws(FSR, iaddress(n)); +} + +void TemplateTable::lload(int n) { + transition(vtos, ltos); + __ ldptr(FSR, laddress(n)); +} + +void TemplateTable::fload(int n) { + transition(vtos, ftos); + __ load_float(FSF, faddress(n)); +} + +void TemplateTable::dload(int n) { + transition(vtos, dtos); + __ load_double(FSF, daddress(n)); +} + +void TemplateTable::aload(int n) { + transition(vtos, atos); + __ ldptr(FSR, aaddress(n)); +} + +void TemplateTable::aload_0() { + aload_0_internal(); +} + +void TemplateTable::nofast_aload_0() { + aload_0_internal(may_not_rewrite); +} + +void TemplateTable::aload_0_internal(RewriteControl rc) { + transition(vtos, atos); + // According to bytecode histograms, the pairs: + // + // _aload_0, _fast_igetfield + // _aload_0, _fast_agetfield + // _aload_0, _fast_fgetfield + // + // occur frequently. If RewriteFrequentPairs is set, the (slow) + // _aload_0 bytecode checks if the next bytecode is either + // _fast_igetfield, _fast_agetfield or _fast_fgetfield and then + // rewrites the current bytecode into a pair bytecode; otherwise it + // rewrites the current bytecode into _fast_aload_0 that doesn't do + // the pair check anymore. + // + // Note: If the next bytecode is _getfield, the rewrite must be + // delayed, otherwise we may miss an opportunity for a pair. + // + // Also rewrite frequent pairs + // aload_0, aload_1 + // aload_0, iload_1 + // These bytecodes with a small amount of code are most profitable + // to rewrite + if (RewriteFrequentPairs && rc == may_rewrite) { + Label rewrite, done; + + const Register bc = c_rarg3; + + // get next byte + __ load_unsigned_byte(T2, at_bcp(Bytecodes::length_for(Bytecodes::_aload_0))); + + // if _getfield then wait with rewrite + __ cmpw(T2, Bytecodes::_getfield); + __ jcc(Assembler::equal, done); + + // if _igetfield then rewrite to _fast_iaccess_0 + assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) == Bytecodes::_aload_0, "fix bytecode definition"); + __ cmpw(T2, Bytecodes::_fast_igetfield); + __ movw(bc, Bytecodes::_fast_iaccess_0); + __ jcc(Assembler::equal, rewrite); + + // if _agetfield then rewrite to _fast_aaccess_0 + assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) == Bytecodes::_aload_0, "fix bytecode definition"); + __ cmpw(T2, Bytecodes::_fast_agetfield); + __ movw(bc, Bytecodes::_fast_aaccess_0); + __ jcc(Assembler::equal, rewrite); + + // if _fgetfield then rewrite to _fast_faccess_0 + assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) == Bytecodes::_aload_0, "fix bytecode definition"); + __ cmpw(T2, Bytecodes::_fast_fgetfield); + __ movw(bc, Bytecodes::_fast_faccess_0); + __ jcc(Assembler::equal, rewrite); + + // else rewrite to _fast_aload0 + assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) == Bytecodes::_aload_0, "fix bytecode definition"); + __ movw(bc, Bytecodes::_fast_aload_0); + + // rewrite + // bc: fast bytecode + __ bind(rewrite); + patch_bytecode(Bytecodes::_aload_0, bc, T2, false); + + __ bind(done); + } + + // Do actual aload_0 (must do this after patch_bytecode which might call VM and GC might change oop). + aload(0); +} + +void TemplateTable::istore() { + transition(itos, vtos); + locals_index(T2); + __ stw(FSR, iaddress(T2)); +} + +void TemplateTable::lstore() { + transition(ltos, vtos); + locals_index(T2); + __ stptr(FSR, laddress(T2)); +} + +void TemplateTable::fstore() { + transition(ftos, vtos); + locals_index(T2); + __ store_float(FSF, faddress(T2)); +} + +void TemplateTable::dstore() { + transition(dtos, vtos); + locals_index(T2); + __ store_double(FSF, daddress(T2)); +} + +void TemplateTable::astore() { + transition(vtos, vtos); + __ pop_ptr(FSR); + locals_index(T2); + __ stptr(FSR, aaddress(T2)); +} + +void TemplateTable::wide_istore() { + transition(vtos, vtos); + __ pop_i(); + locals_index_wide(T2); + __ stw(FSR, iaddress(T2)); +} + +void TemplateTable::wide_lstore() { + transition(vtos, vtos); + __ pop_l(); + locals_index_wide(T2); + __ stptr(FSR, laddress(T2)); +} + +void TemplateTable::wide_fstore() { + transition(vtos, vtos); + __ pop_f(FSF); + locals_index_wide(T2); + __ fsts(FSF, faddress(T2)); +} + +void TemplateTable::wide_dstore() { + transition(vtos, vtos); + __ pop_d(FSF); + locals_index_wide(T2); + __ fstd(FSF, daddress(T2)); +} + +void TemplateTable::wide_astore() { + transition(vtos, vtos); + __ pop_ptr(FSR); + locals_index_wide(T2); + __ stptr(FSR, aaddress(T2)); +} + +void TemplateTable::iastore() { + transition(itos, vtos); + __ pop_i(SSR); + // FSR: value + // SSR: index + // T2 : array + index_check(T2, SSR); // prefer index in SSR + __ access_store_at(T_INT, IN_HEAP | IS_ARRAY, + Address(T2, SSR, Address::times_4, + arrayOopDesc::base_offset_in_bytes(T_INT)), + FSR, noreg, noreg); +} + +void TemplateTable::lastore() { + transition(ltos, vtos); + __ pop_i(SSR); + // FSR: value + // SSR: index + // T2 : array + index_check(T2, SSR); + __ access_store_at(T_LONG, IN_HEAP | IS_ARRAY, + Address(T2, SSR, Address::times_8, + arrayOopDesc::base_offset_in_bytes(T_LONG)), + noreg /* ltos */, noreg, noreg); +} + + +void TemplateTable::fastore() { + transition(ftos, vtos); + __ pop_i(SSR); + // FSR: value + // SSR: index + // T2 : array + index_check(T2, SSR); + __ access_store_at(T_FLOAT, IN_HEAP | IS_ARRAY, + Address(T2, SSR, Address::times_4, + arrayOopDesc::base_offset_in_bytes(T_FLOAT)), + noreg /* ftos */, noreg, noreg); +} + +void TemplateTable::dastore() { + transition(dtos, vtos); + __ pop_i(SSR); + // FSR: value + // SSR: index + // T2 : array + index_check(T2, SSR); + __ access_store_at(T_DOUBLE, IN_HEAP | IS_ARRAY, + Address(T2, SSR, Address::times_8, + arrayOopDesc::base_offset_in_bytes(T_DOUBLE)), + noreg /* dtos */, noreg, noreg); +} + +void TemplateTable::aastore() { + Label is_null, ok_is_subtype, done; + transition(vtos, vtos); + // stack: ..., array, index, value + __ ldptr(FSR, at_tos()); // value + __ ldws(SSR, at_tos_p1()); // index + __ ldptr(T2, at_tos_p2()); // array + + Address element_address(T2, SSR, + UseCompressedOops? Address::times_4 : Address::times_ptr, + arrayOopDesc::base_offset_in_bytes(T_OBJECT)); + + index_check_without_pop(T2, SSR); + __ testptr(FSR, FSR); + __ jcc(Assembler::zero, is_null); + + // Move subklass into T3 + __ load_klass(T3, FSR); + // Move superklass into FSR + __ load_klass(FSR, T2); + __ ldptr(FSR, Address(FSR, + ObjArrayKlass::element_klass_offset())); + + // Generate subtype check. Blows T0, T1 + // Superklass in FSR. Subklass in T3. + __ gen_subtype_check(T3, ok_is_subtype); + + // Come here on failure + // object is at TOS + __ jump(ExternalAddress(Interpreter::_throw_ArrayStoreException_entry)); + + // Come here on success + __ bind(ok_is_subtype); + + // Get the value we will store + __ ldptr(FSR, at_tos()); + __ ldws(SSR, at_tos_p1()); // index + // Now store using the appropriate barrier + do_oop_store(_masm, element_address, FSR, IS_ARRAY); + __ jmp(done); + + // Have a NULL in FSR, T2=array, SSR=index. Store NULL at ary[idx] + __ bind(is_null); + __ profile_null_seen(T3); + + // Store a NULL + do_oop_store(_masm, element_address, noreg, IS_ARRAY); + + // Pop stack arguments + __ bind(done); + __ addptr(esp, 3 * Interpreter::stackElementSize, esp); +} + +void TemplateTable::bastore() { + transition(itos, vtos); + Register rbx = SSR; + Register rdx = T2; + Register rcx = T12; + Register rax = FSR; + __ pop_i(rbx); + // rax: value + // rbx: index + // rdx: array + index_check(rdx, rbx); // prefer index in rbx + // Need to check whether array is boolean or byte + // since both types share the bastore bytecode. + __ load_klass(rcx, rdx); + __ ldw(rcx, Address(rcx, Klass::layout_helper_offset())); + int diffbit = Klass::layout_helper_boolean_diffbit(); + __ testw(rcx, diffbit); + Label L_skip; + __ jcc(Assembler::zero, L_skip); + __ andw(rax, 0x1, rax); // if it is a T_BOOLEAN array, mask the stored value to 0/1 + __ bind(L_skip); + __ access_store_at(T_BYTE, IN_HEAP | IS_ARRAY, + Address(rdx, rbx, Address::times_1, + arrayOopDesc::base_offset_in_bytes(T_BYTE)), + rax, noreg, noreg); +} + +void TemplateTable::castore() { + transition(itos, vtos); + __ pop_i(SSR); + // FSR: value + // SSR: index + // T2: array + index_check(T2, SSR); + __ access_store_at(T_CHAR, IN_HEAP | IS_ARRAY, + Address(T2, SSR, Address::times_2, + arrayOopDesc::base_offset_in_bytes(T_CHAR)), + FSR, noreg, noreg); +} + + +void TemplateTable::sastore() { + castore(); +} + +void TemplateTable::istore(int n) { + transition(itos, vtos); + __ stw(FSR, iaddress(n)); +} + +void TemplateTable::lstore(int n) { + transition(ltos, vtos); + __ stptr(FSR, laddress(n)); +} + +void TemplateTable::fstore(int n) { + transition(ftos, vtos); + __ store_float(FSF, faddress(n)); +} + +void TemplateTable::dstore(int n) { + transition(dtos, vtos); + __ store_double(FSF, laddress(n)); +} + +void TemplateTable::astore(int n) { + transition(vtos, vtos); + __ pop_ptr(FSR); + __ stptr(FSR, aaddress(n)); +} + +void TemplateTable::pop() { + transition(vtos, vtos); + __ addptr(esp, Interpreter::stackElementSize, esp); +} + +void TemplateTable::pop2() { + transition(vtos, vtos); + __ addptr(esp, 2 * Interpreter::stackElementSize, esp); +} + +void TemplateTable::dup() { + transition(vtos, vtos); + __ load_ptr(0, FSR); + __ push_ptr(FSR); + // stack: ..., a, a +} + +void TemplateTable::dup_x1() { + transition(vtos, vtos); + // stack: ..., a, b + __ load_ptr( 0, FSR); // load b + __ load_ptr( 1, A5 ); // load a + __ store_ptr(1, FSR); // store b + __ store_ptr(0, A5 ); // store a + __ push_ptr(FSR); // push b + // stack: ..., b, a, b +} + +void TemplateTable::dup_x2() { + transition(vtos, vtos); + // stack: ..., a, b, c + __ load_ptr( 0, FSR); // load c + __ load_ptr( 2, A5 ); // load a + __ store_ptr(2, FSR); // store c in a + __ push_ptr(FSR); // push c + // stack: ..., c, b, c, c + __ load_ptr( 2, FSR); // load b + __ store_ptr(2, A5 ); // store a in b + // stack: ..., c, a, c, c + __ store_ptr(1, FSR); // store b in c + // stack: ..., c, a, b, c +} + +void TemplateTable::dup2() { + transition(vtos, vtos); + // stack: ..., a, b + __ load_ptr(1, FSR); // load a + __ push_ptr(FSR); // push a + __ load_ptr(1, FSR); // load b + __ push_ptr(FSR); // push b + // stack: ..., a, b, a, b +} + +void TemplateTable::dup2_x1() { + transition(vtos, vtos); + // stack: ..., a, b, c + __ load_ptr( 0, T2); // load c + __ load_ptr( 1, FSR); // load b + __ push_ptr(FSR); // push b + __ push_ptr(T2); // push c + // stack: ..., a, b, c, b, c + __ store_ptr(3, T2); // store c in b + // stack: ..., a, c, c, b, c + __ load_ptr( 4, T2); // load a + __ store_ptr(2, T2); // store a in 2nd c + // stack: ..., a, c, a, b, c + __ store_ptr(4, FSR); // store b in a + // stack: ..., b, c, a, b, c +} + +void TemplateTable::dup2_x2() { + transition(vtos, vtos); + // stack: ..., a, b, c, d + __ load_ptr(0, T2); // load d + __ load_ptr(1, FSR); // load c + __ push_ptr(FSR); // push c + __ push_ptr(T2); // push d + // stack: ..., a, b, c, d, c, d + __ load_ptr(4, FSR); // load b + __ store_ptr(2, FSR); // store b in d + __ store_ptr(4, T2); // store d in b + // stack: ..., a, d, c, b, c, d + __ load_ptr(5, T2); // load a + __ load_ptr(3, FSR); // load c + __ store_ptr(3, T2); // store a in c + __ store_ptr(5, FSR); // store c in a + // stack: ..., c, d, a, b, c, d +} + +void TemplateTable::swap() { + transition(vtos, vtos); + // stack: ..., a, b + __ load_ptr(1, A5); // load a + __ load_ptr(0, FSR); // load b + __ store_ptr(0, A5); // store a in b + __ store_ptr(1, FSR); // store b in a + // stack: ..., b, a +} + +void TemplateTable::iop2(Operation op) { + transition(itos, itos); + + __ pop_i(SSR); + if (UseSW6B) { + switch (op) { + case add : __ addw(SSR, FSR, FSR); break; + case sub : __ subw(SSR, FSR, FSR); break; + case mul : __ mulw(SSR, FSR, FSR); break; + case _and : __ and_ins(SSR, FSR, FSR); break; + case _or : __ bis(SSR, FSR, FSR); break; + case _xor : __ xor_ins(SSR, FSR, FSR); break; + case shl : __ sllw(SSR, FSR, FSR); break; + case shr : __ sraw(SSR, FSR, FSR); break; + case ushr : __ srlw(SSR, FSR, FSR); break; + default : ShouldNotReachHere(); + } + } else { + switch (op) { + case add : __ addwu(SSR, FSR, FSR); break; + case sub : __ subwu(SSR, FSR, FSR); break; + case mul : __ mulwu(SSR, FSR, FSR); break; + case _and : __ andw(SSR, FSR, FSR); break; + case _or : __ orw(SSR, FSR, FSR); break; + case _xor : __ xorw(SSR, FSR, FSR); break; + case shl : __ and_ins(FSR, 0x1f, FSR); __ slll(SSR, FSR, FSR); break; + case shr : __ and_ins(FSR, 0x1f, FSR); __ addw(SSR, 0, SSR); __ sral(SSR, FSR, FSR); break; + case ushr : __ and_ins(FSR, 0x1f, FSR); __ movwu(SSR, SSR); __ srll(SSR, FSR, FSR); break; + default : ShouldNotReachHere(); + } + __ movws(FSR, FSR); + } +} + +void TemplateTable::lop2(Operation op) { + transition(ltos, ltos); + __ pop_l(T2); + + switch (op) { + case add : __ addptr(T2, FSR, FSR); break; + case sub : __ subptr(T2, FSR, FSR); break; + case _and: __ andptr(T2, FSR, FSR); break; + case _or : __ orptr(T2, FSR, FSR); break; + case _xor: __ xorptr(T2, FSR, FSR); break; + default : ShouldNotReachHere(); + } +} + +void TemplateTable::idiv() { + transition(itos, itos); + Label not_zero; + + __ bne_l(FSR, not_zero); + __ jump(ExternalAddress(Interpreter::_throw_ArithmeticException_entry)); + __ bind(not_zero); + + __ pop_i(SSR); + if (UseSW6B) { + __ divw(SSR, FSR, FSR); + } else if (FastIntDiv) { + __ stop("check idiv_sw"); + __ idiv_sw(SSR, FSR, FSR);//TODO:need check jzy + } else { + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::sdiv), FSR, SSR); + //__ movws(FSR, FSR);//clear high 32bits + } +} + +void TemplateTable::irem() { + transition(itos, itos); + Label not_zero; + __ pop_i(SSR); + + __ bne_l(FSR, not_zero); + __ jump(ExternalAddress(Interpreter::_throw_ArithmeticException_entry)); + + __ bind(not_zero); + if (UseSW6B) { + __ remw(SSR, FSR, FSR); + } else if (FastIntRem) { + __ stop("check irem_sw"); + __ irem_sw(SSR, FSR, FSR);//TODO:need check jzy + } else { + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::srem), FSR, SSR); + //__ movws(FSR, FSR);//clear high 32bits + } +} + +void TemplateTable::lmul() { + transition(ltos, ltos); + __ pop_l(T2); + __ mull(FSR, T2, FSR); +} + +void TemplateTable::ldiv() { + transition(ltos, ltos); + Label normal; + + __ bne_l(FSR, normal); + + __ jump(ExternalAddress(Interpreter::_throw_ArithmeticException_entry)); + + __ bind(normal); + __ pop_l(A2); + if (UseSW6B) { + __ divl(A2, FSR, FSR); + } else if (FastLongDiv) { + Label ldiv, exit; + __ slll(A2, 0xb, T7); + __ sral(T7, 0xb, T7); + __ cmpeq(A2, T7, T7); + __ bne_l(T7, ldiv); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::ldiv), FSR, A2); + __ jmp(exit); + + __ bind(ldiv); + __ ldiv_sw(A2, FSR, FSR); + + __ bind(exit); + } else { + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::ldiv), FSR, A2); + } +} + +void TemplateTable::lrem() { + transition(ltos, ltos); + Label normal; + + __ bne_l(FSR, normal); + + __ jump(ExternalAddress(Interpreter::_throw_ArithmeticException_entry)); + + __ bind(normal); + __ pop_l (A2); + if (UseSW6B) { + __ reml(A2, FSR, FSR); + } else if (FastLongRem) { + Label lrem, exit; + __ slll(A2, 0xb, T7); + __ sral(T7, 0xb, T7); + __ cmpeq(A2, T7, T7); + __ bne_l(T7, lrem); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::lrem), FSR, A2); + __ jmp(exit); + + __ bind(lrem); + __ lrem_sw(A2, FSR, FSR); + + __ bind(exit); + } else { + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::lrem), FSR, A2); + } +} + +void TemplateTable::lshl() { + transition(itos, ltos); + __ pop_l(T0); + __ slll(T0, FSR, FSR); +} + +void TemplateTable::lshr() { + transition(itos, ltos); + __ pop_l(T0); + __ sral(T0, FSR, FSR); +} + +void TemplateTable::lushr() { + transition(itos, ltos); + __ pop_l(T0); + __ srll(T0, FSR, FSR); +} + +void TemplateTable::fop2(Operation op) { + transition(ftos, ftos); + switch (op) { + case add: + __ flds(FTF, 0, esp); + __ add_s(FSF, FTF, FSF); + break; + case sub: + __ flds(FTF, 0, esp); + __ sub_s(FSF, FTF, FSF); + break; + case mul: + __ flds(FTF, 0, esp); + __ mul_s(FSF, FTF, FSF); + break; + case div: + __ flds(FTF, 0, esp); + __ div_s(FSF, FTF, FSF); + break; + case rem: + { + __ flds(f16, 0, esp); //x + __ fcpys(FSF, FSF, f17); + Label nan, cont, end; + + // y = 0.0f + __ ffbeq(f17, nan); + // x = NaN infinity + __ boundary_test(f16, GP); + __ beq_l(GP, nan); + // y = NaN + __ boundary_test(f17, GP); + __ bne_l(GP, cont); + __ fimovd(f17, AT); + __ slll(AT, 12, GP); + __ bne_l(GP, nan); + + __ bind(cont); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem), 2); + __ jmp(end); + + __ bind(nan); + __ fdivd(f31, f31, FSF); + __ bind(end); + } + break; + default : ShouldNotReachHere(); + } + + __ addptr(esp, 1 * wordSize, esp); +} + +void TemplateTable::dop2(Operation op) { + transition(dtos, dtos); + switch (op) { + case add: + __ fldd(FTF, 0, esp); + __ add_d(FSF, FTF, FSF); + break; + case sub: + __ fldd(FTF, 0, esp); + __ sub_d(FSF, FTF, FSF); + break; + case mul: + __ fldd(FTF, 0, esp); + __ mul_d(FSF, FTF, FSF); + break; + case div: + __ fldd(FTF, 0, esp); + __ div_d(FSF, FTF, FSF); + break; + case rem: + { + __ fldd(f16, 0, esp); //x + __ fcpys(FSF, FSF, f17); + Label nan, cont, end; + // y = 0.0f + __ ffbeq(f17, nan); + // x = NaN infinity + __ boundary_test(f16, GP); + __ beq_l(GP, nan); + // y = NaN + __ boundary_test(f17, GP); + __ bne_l(GP, cont); + __ fimovd(f17, AT); + __ slll(AT, 12, GP); + __ bne_l(GP, nan); + + __ bind(cont); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem), 2); + __ jmp(end); + + __ bind(nan); + __ fdivd(f31, f31, FSF); + __ bind(end); + } + break; + default : ShouldNotReachHere(); + } + + __ addptr(esp, 2 * wordSize, esp); +} + +void TemplateTable::ineg() { + transition(itos, itos); + __ subw(R0, FSR, FSR); +} + +void TemplateTable::lneg() { + transition(ltos, ltos); + __ subl(R0, FSR, FSR); +} + +void TemplateTable::fneg() { + transition(ftos, ftos); + __ fcpysn(FSF, FSF, FSF); +} + +void TemplateTable::dneg() { + transition(dtos, dtos); + __ fcpysn(FSF, FSF, FSF); +} + +void TemplateTable::iinc() { + transition(vtos, vtos); + __ load_signed_byte64(AT, at_bcp(2)); // get constant + locals_index(T2); + __ ldws(FSR, iaddress(T2)); + __ addl(FSR, AT, FSR); + __ stw(FSR, iaddress(T2)); +} + +void TemplateTable::wide_iinc() { + transition(vtos, vtos); + locals_index_wide(T2); + __ get_unsigned_2_byte_index_at_bcp(FSR, 4); + __ sexth(FSR, FSR); + __ ldws(AT, iaddress(T2)); + __ addl(AT, FSR, FSR); + __ stw(FSR, iaddress(T2)); +} + +void TemplateTable::convert() { + // Checking +#ifdef ASSERT + { + TosState tos_in = ilgl; + TosState tos_out = ilgl; + switch (bytecode()) { + case Bytecodes::_i2l: // fall through + case Bytecodes::_i2f: // fall through + case Bytecodes::_i2d: // fall through + case Bytecodes::_i2b: // fall through + case Bytecodes::_i2c: // fall through + case Bytecodes::_i2s: tos_in = itos; break; + case Bytecodes::_l2i: // fall through + case Bytecodes::_l2f: // fall through + case Bytecodes::_l2d: tos_in = ltos; break; + case Bytecodes::_f2i: // fall through + case Bytecodes::_f2l: // fall through + case Bytecodes::_f2d: tos_in = ftos; break; + case Bytecodes::_d2i: // fall through + case Bytecodes::_d2l: // fall through + case Bytecodes::_d2f: tos_in = dtos; break; + default : ShouldNotReachHere(); + } + switch (bytecode()) { + case Bytecodes::_l2i: // fall through + case Bytecodes::_f2i: // fall through + case Bytecodes::_d2i: // fall through + case Bytecodes::_i2b: // fall through + case Bytecodes::_i2c: // fall through + case Bytecodes::_i2s: tos_out = itos; break; + case Bytecodes::_i2l: // fall through + case Bytecodes::_f2l: // fall through + case Bytecodes::_d2l: tos_out = ltos; break; + case Bytecodes::_i2f: // fall through + case Bytecodes::_l2f: // fall through + case Bytecodes::_d2f: tos_out = ftos; break; + case Bytecodes::_i2d: // fall through + case Bytecodes::_l2d: // fall through + case Bytecodes::_f2d: tos_out = dtos; break; + default : ShouldNotReachHere(); + } + transition(tos_in, tos_out); + } +#endif // ASSERT + + // Conversion + switch (bytecode()) { + case Bytecodes::_i2l: + __ movws(FSR, FSR); + break; + case Bytecodes::_i2f: + //__ movws(FSR, FSR); + __ ifmovd(FSR, f30); + __ fcvtls(f30, FSF); + break; + case Bytecodes::_i2d: + //__ movws(FSR, FSR); + __ ifmovd(FSR, f30); + __ fcvtld(f30 , FSF); + break; + case Bytecodes::_i2b: + __ sextb(FSR, FSR); + //__ movw(FSR, FSR); + break; + case Bytecodes::_i2c: + __ zapnot(FSR, 0x3, FSR); + break; + case Bytecodes::_i2s: + __ sexth(FSR, FSR); + //__ movws(FSR, FSR); + break; + case Bytecodes::_l2i: + __ movws(FSR, FSR); + break; + case Bytecodes::_l2f: + __ ifmovd(FSR, FSF); + __ cvt_s_l(FSF, FSF); + break; + case Bytecodes::_l2d: + __ ifmovd(FSR, FSF); + __ cvt_d_l(FSF, FSF); + break; + case Bytecodes::_f2i: + { + Label L; + __ fcpys(FSF, FSF, f16); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2i), 1); + //__ movws(FSR, FSR); + __ bind(L); + } + break; + case Bytecodes::_f2l: + { + Label L; + __ fcpys(FSF, FSF, f16); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2l), 1); + __ bind(L); + } + break; + case Bytecodes::_f2d: + __ cvt_d_s(FSF, FSF); + break; + case Bytecodes::_d2i: + { + Label L; + __ fcpys(FSF, FSF, f16); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2i), 1); + //__ movws(FSR, FSR); + __ bind(L); + } + break; + case Bytecodes::_d2l: + { + Label L; + __ fcpys(FSF, FSF, f16); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2l), 1); + __ bind(L); + } + break; + case Bytecodes::_d2f: + __ cvt_s_d(FSF, FSF); + break; + default: + ShouldNotReachHere(); + } +} + +void TemplateTable::lcmp() { + transition(ltos, itos); + + Label done; + __ pop_l(SSR); + __ cmpl(SSR, FSR); + __ ldi(FSR, -1, R0); + __ jcc(Assembler::less, done); + __ ldi(FSR, 0, R0); + __ jcc(Assembler::equal, done); + __ ldi(FSR, 1, R0); + __ bind(done); +} + +void TemplateTable::float_cmp(bool is_float, int unordered_result) { + Label less, done; + + __ bis(R0, R0, FSR); + if (is_float) { + __ flds(FTF, 0, esp); + __ fcmpeq(FTF, FSF, FcmpRES); + __ addiu(esp, 1 * wordSize, esp); + __ ffbne(FcmpRES, done); + + if (unordered_result < 0) + __ c_ult_s(FTF, FSF); + else + __ c_olt_s(FTF, FSF); + } else { + __ fldd(FTF, 0, esp); + __ fcmpeq(FTF, FSF, FcmpRES); + __ addiu(esp, 2 * wordSize, esp); + __ ffbne(FcmpRES, done); + + if (unordered_result<0) + __ c_ult_d(FTF, FSF); + else + __ c_olt_d(FTF, FSF); + } + __ ffbne(FcmpRES, less); + __ ldi(FSR, 1, R0); + __ jmp(done); + __ bind(less); + __ ldi(FSR, -1, R0); + __ bind(done); +} + +void TemplateTable::branch(bool is_jsr, bool is_wide) {SCOPEMARK_NAME(TemplateTable_branch, _masm) + Register rcx = rmethod; + Register rax = T5; + Register rbx = T2; + Register rdx = T7; + __ get_method(rcx); + __ profile_taken_branch(rax, rbx); // T5 holds updated MDP, T2 + // holds bumped taken count + + const ByteSize be_offset = MethodCounters::backedge_counter_offset() + + InvocationCounter::counter_offset(); + const ByteSize inv_offset = MethodCounters::invocation_counter_offset() + + InvocationCounter::counter_offset(); + + // Load up T7 with the branch displacement TODO:check jzy + if (is_wide) { + __ ldbu(T7, at_bcp(1)); + __ ldbu(AT, at_bcp(2)); + __ slll(T7, 8, T7); + __ bis(T7, AT, T7); + __ ldbu(AT, at_bcp(3)); + __ slll(T7, 8, T7); + __ bis(T7, AT, T7); + __ ldbu(AT, at_bcp(4)); + __ slll(T7, 8, T7); + __ bis(T7, AT, T7); + __ movws(T7, T7); + } else { + __ load_signed_byte64(T7, at_bcp(1)); + __ ldbu(AT, at_bcp(2)); + __ slll(T7, 8, T7); + __ bis(T7, AT, T7); + } + + // Handle all the JSR stuff here, then exit. + // It's much shorter and cleaner than intermingling with the non-JSR + // normal-branch stuff occurring below. + if (is_jsr) { + // Pre-load the next target bytecode into rnext + __ load_unsigned_byte(rnext, Address(rbcp, T7, Address::times_1, 0)); + + // compute return address as bci in FSR + __ lea(FSR, at_bcp((is_wide ? 5 : 3) - + in_bytes(ConstMethod::codes_offset()))); + __ ldptr(AT, Address(rmethod, Method::const_offset())); + __ subptr(FSR, AT, FSR); + // Adjust the bcp in rbcp by the displacement in T7 + __ addptr(rbcp, T7, rbcp); + // jsr returns atos that is not an oop + __ push_i(FSR); + __ dispatch_only(vtos, true); + return; + } + + // Normal (non-jsr) branch handling + + // Adjust the bcp in S0 by the displacement in T7 + __ addptr(rbcp, T7, rbcp); + + assert(UseLoopCounter || !UseOnStackReplacement, + "on-stack-replacement requires loop counters"); + Label backedge_counter_overflow; + Label dispatch; + if (UseLoopCounter) { + // increment backedge counter for backward branches + // T5: MDO + // T2: MDO bumped taken-count + // rmethod: method + // T7: target offset + // rbcp: target bcp + // rlocals: locals pointer + // check if forward or backward branch + __ jcc(Assembler::positive, dispatch, T7); // count only if backward branch + + // check if MethodCounters exists + Label has_counters; + __ ldptr(rcc, Address(rmethod, Method::method_counters_offset())); + __ jcc(Assembler::notZero, has_counters); + __ push(T7); + __ push(T2); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::build_method_counters), + rmethod); + __ pop(T2); + __ pop(T7); + __ ldptr(T5, Address(rmethod, Method::method_counters_offset())); + __ jcc(Assembler::zero, dispatch, T5); + __ bind(has_counters); + + Label no_mdo; + int increment = InvocationCounter::count_increment; + if (ProfileInterpreter) { + // Are we profiling? + __ ldptr(T2, Address(rmethod, Method::method_data_offset())); //T2 for p1876 used + __ jcc(Assembler::zero, no_mdo, T2); + // Increment the MDO backedge counter + const Address mdo_backedge_counter(T2, in_bytes(MethodData::backedge_counter_offset()) + + in_bytes(InvocationCounter::counter_offset())); + const Address mask(T2, in_bytes(MethodData::backedge_mask_offset())); + __ increment_mask_and_jump(mdo_backedge_counter, increment, mask, T5, false, Assembler::zero, + UseOnStackReplacement ? &backedge_counter_overflow : NULL); + __ jmp(dispatch); + } + __ bind(no_mdo); + // Increment backedge counter in MethodCounters* + __ ldptr(T0, Address(rmethod, Method::method_counters_offset())); + const Address mask(T0, in_bytes(MethodCounters::backedge_mask_offset())); + __ increment_mask_and_jump(Address(T0, be_offset), increment, mask, + T5, false, Assembler::zero, + UseOnStackReplacement ? &backedge_counter_overflow : NULL); + __ bind(dispatch); + } + + // Pre-load the next target bytecode into rnext + __ load_unsigned_byte(rnext, Address(rbcp, 0)); + + // continue with the bytecode @ target + // FSR: return bci for jsr's, unused otherwise + // rnext: target bytecode + // rbcp: target bcp + __ dispatch_only(vtos, true); + + if (UseLoopCounter && UseOnStackReplacement) { + + // invocation counter overflow + __ bind(backedge_counter_overflow); + __ subptr(R0, T7, T7); // yj todo: ?? why neg T7 + __ addptr(T7, rbcp, T7); // branch bcp + // IcoResult frequency_counter_overflow([JavaThread*], address branch_bcp) + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::frequency_counter_overflow), + T7); + __ load_unsigned_byte(rnext, Address(rbcp, 0)); //swjdk8 and aarch64 use it lsp?? + // V0: osr nmethod (osr ok) or NULL (osr not possible) return by the call_vm + __ testptr(V0, V0); // test result + __ jcc(Assembler::zero, dispatch); // no osr if null + // nmethod may have been invalidated (VM may block upon call_VM return) + __ cmpb(Address(V0, nmethod::state_offset()), nmethod::in_use); + __ jcc(Assembler::notEqual, dispatch); + + // We have the address of an on stack replacement routine in V0. + // In preparation of invoking it, first we must migrate the locals + // and monitors from off the interpreter frame on the stack. + // Ensure to save the osr nmethod over the migration call, + // it will be preserved in rbcp. + __ movl(rbcp, V0); + + call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin)); + + // V0 is OSR buffer, move it to expected parameter location + __ movl(j_rarg0, V0); + // We use j_rarg definitions here so that registers don't conflict as parameter + // registers change across platforms as we are in the midst of a calling + // sequence to the OSR nmethod and we don't want collision. These are NOT parameters. + + const Register retaddr = j_rarg2; + const Register sender_sp = j_rarg1; + + // pop the interpreter frame + __ ldptr(sender_sp, Address(rfp, frame::interpreter_frame_sender_sp_offset * wordSize)); // get sender + __ leave(); // remove frame anchor + __ move(retaddr, RA); // get return address + // set sp to sender sp + // Ensure compiled code always sees stack at proper alignment + //__ andptr(sender_sp, -(StackAlignmentInBytes), esp); //TODO: jzy check why need alignment? + __ movl(esp, sender_sp); + + // unlike x86 we need no specialized return from compiled code + // to the interpreter or the call stub. + + // push the return address +// __ push(retaddr); + + // and begin the OSR nmethod + __ jmp(Address(rbcp, nmethod::osr_entry_point_offset())); + } +} + +void TemplateTable::if_0cmp(Condition cc) {SCOPEMARK_NAME(if_0cmp, _masm) + transition(itos, vtos); + // assume branch is more often taken than not (loops use backward branches) + Label not_taken; + __ cmpw(FSR, R0); + __ jcc(j_not(cc), not_taken); + branch(false, false); + __ bind(not_taken); + __ profile_not_taken_branch(FSR); +} + +void TemplateTable::if_icmp(Condition cc) { + transition(itos, vtos); + // assume branch is more often taken than not (loops use backward branches) + Label not_taken; + __ pop_i(SSR); + __ cmpw(SSR, FSR); + __ jcc(j_not(cc), not_taken); + branch(false, false); + __ bind(not_taken); + __ profile_not_taken_branch(FSR); +} + +void TemplateTable::if_nullcmp(Condition cc) { + transition(atos, vtos); + // assume branch is more often taken than not (loops use backward branches) + Label not_taken; + __ testptr(FSR, FSR); + __ jcc(j_not(cc), not_taken); + branch(false, false); + __ bind(not_taken); + __ profile_not_taken_branch(FSR); +} + +void TemplateTable::if_acmp(Condition cc) { + transition(atos, vtos); + // assume branch is more often taken than not (loops use backward branches) + Label not_taken; + __ pop_ptr(SSR); + __ cmpoop(SSR, FSR); + __ jcc(j_not(cc), not_taken); + branch(false, false); + __ bind(not_taken); + __ profile_not_taken_branch(FSR); +} + +void TemplateTable::ret() {SCOPEMARK_NAME(TemplateTable::ret, _masm) + transition(vtos, vtos); + locals_index(T2); + __ ldptr(T2, iaddress(T2)); // get return bci, compute return bcp + __ profile_ret(T2, T3); + __ get_method(T1); + __ ldptr(rbcp, Address(T1, Method::const_offset())); + __ lea(rbcp, Address(rbcp, T2, Address::times_1, + ConstMethod::codes_offset())); + __ dispatch_next(vtos, 0, true); +} + +void TemplateTable::wide_ret() { + transition(vtos, vtos); + locals_index_wide(T2); + __ ldptr(T2, aaddress(T2)); // get return bci, compute return bcp + __ profile_ret(T2, T3); + __ get_method(T1); + __ ldptr(rbcp, Address(T1, Method::const_offset())); + __ lea(rbcp, Address(rbcp, T2, Address::times_1, ConstMethod::codes_offset())); + __ dispatch_next(vtos, 0, true); +} + +void TemplateTable::tableswitch() { + Label default_case, continue_execution; + transition(itos, vtos); + Register rbx = T2; + Register rcx = T3; + Register rdx = T7; + Register rax = FSR; + + // align rbcp + __ lea(rbx, at_bcp(BytesPerInt)); + __ andptr(rbx, -BytesPerInt, rbx); + // load lo & hi + __ ldwu(rcx, Address(rbx, BytesPerInt)); + __ ldwu(rdx, Address(rbx, 2 * BytesPerInt)); + __ bswapw(rcx); + __ bswapw(rdx); + // check against lo & hi + __ cmpw(rax, rcx); + __ jcc(Assembler::less, default_case); + __ cmpw(rax, rdx); + __ jcc(Assembler::greater, default_case); + // lookup dispatch offset + __ subwu(rax, rcx, rax); + __ ldwu(rdx, Address(rbx, rax, Address::times_4, 3 * BytesPerInt)); + __ profile_switch_case(rax, rbx, rcx); + // continue execution + __ bind(continue_execution); + __ bswapw(rdx); + __ addw(rdx, R0, rdx);// sign extend T7 + __ load_unsigned_byte(rnext, Address(rbcp, rdx, Address::times_1)); + __ addptr(rbcp, rdx, rbcp); + __ dispatch_only(vtos, true); + // handle default + __ bind(default_case); + __ profile_switch_default(rax); + __ ldw(rdx, Address(rbx, 0)); + __ jmp(continue_execution); +} + +void TemplateTable::lookupswitch() { + transition(itos, itos); + __ stop("lookupswitch bytecode should have been rewritten"); +} + +void TemplateTable::fast_linearswitch() { + transition(itos, vtos); + Label loop_entry, loop, found, continue_execution; + const Register rbx = T2; + const Register rcx = T3; + const Register rdx = T7; + // swap FSR so we can avoid swapping the table entries + __ bswapw(FSR); + // align rbcp + __ lea(rbx, at_bcp(BytesPerInt)); // btw: should be able to get rid of + // this instruction (change offsets + // below) + __ andptr(rbx, -BytesPerInt, rbx); + // set counter + __ ldwu(rcx, Address(rbx, BytesPerInt)); + __ bswapw(rcx); + __ jmp(loop_entry); + // table search + __ bind(loop); + __ cmpw(FSR, Address(rbx, rcx, Address::times_8, 2 * BytesPerInt)); + __ jcc(Assembler::equal, found); + __ bind(loop_entry); + __ decrementl(rcx); + __ jcc(Assembler::greaterEqual, loop, rcx); + // default case + __ profile_switch_default(FSR); + __ ldw(rdx, Address(rbx, 0)); + __ jmp(continue_execution); + // entry found -> get offset + __ bind(found); + __ ldwu(rdx, Address(rbx, rcx, Address::times_8, 3 * BytesPerInt)); + __ profile_switch_case(rcx, FSR, rbx); + // continue execution + __ bind(continue_execution); + __ bswapw(rdx); + __ addw(rdx, R0, rdx);// sign extend rdx + __ load_unsigned_byte(rnext, Address(rbcp, rdx, Address::times_1)); + __ addptr(rbcp, rdx, rbcp); + __ dispatch_only(vtos, true); +} + +void TemplateTable::fast_binaryswitch() { + transition(itos, vtos); + // Implementation using the following core algorithm: + // + // int binary_search(int key, LookupswitchPair* array, int n) { + // // Binary search according to "Methodik des Programmierens" by + // // Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985. + // int i = 0; + // int j = n; + // while (i+1 < j) { + // // invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q) + // // with Q: for all i: 0 <= i < n: key < a[i] + // // where a stands for the array and assuming that the (inexisting) + // // element a[n] is infinitely big. + // int h = (i + j) >> 1; + // // i < h < j + // if (key < array[h].fast_match()) { + // j = h; + // } else { + // i = h; + // } + // } + // // R: a[i] <= key < a[i+1] or Q + // // (i.e., if key is within array, i is the correct index) + // return i; + // } + + // Register allocation + const Register key = FSR; // already set (tosca) + const Register array = T2; + const Register i = T3; + const Register j = T7; + const Register h = T1; + const Register temp = T0; + + //__ subw(FSR, R0, key);//sign extend + // Find array start + __ lea(array, at_bcp(3 * BytesPerInt)); // btw: should be able to + // get rid of this + // instruction (change + // offsets below) + __ andptr(array, -BytesPerInt, array); + + // initialize i & j + __ movw(i, R0); // i = 0; + __ ldwu(j, Address(array, -BytesPerInt)); // j = length(array); + + // Convert j into native byteordering + __ bswapw(j); + + // And start + Label entry; + __ jmp(entry); + BLOCK_COMMENT("binary search loop"); + // binary search loop + { + Label loop; + __ bind(loop); + // int h = (i + j) >> 1; + __ addw(i, j, h); // h = i + j; + __ srll(h, 1, h); // h = (i + j) >> 1; + // if (key < array[h].fast_match()) { + // j = h; + // } else { + // i = h; + // } + // Convert array[h].match to native byte-ordering before compare + __ ldwu(temp, Address(array, h, Address::times_8)); + __ bswapw(temp); + __ subw(temp, R0, temp); + __ cmpl(key, temp); + // j = h if (key < array[h].fast_match()) + __ cmove(Assembler::less, j, h, j); + // i = h if (key >= array[h].fast_match()) + __ cmove(Assembler::greaterEqual, i, h, i); + // while (i+1 < j) + __ bind(entry); + __ addwu(i, 1, h); // i+1 + __ cmpw(h, j); // i+1 < j + __ jcc(Assembler::less, loop); + } + + // end of binary search, result index is i (must check again!) + Label default_case; + // Convert array[i].match to native byte-ordering before compare + __ ldwu(temp, Address(array, i, Address::times_8)); + __ bswapw(temp); + __ subw(temp, R0, temp); + __ cmpl(key, temp); + __ jcc(Assembler::notEqual, default_case); + + // entry found -> j = offset + __ ldwu(j , Address(array, i, Address::times_8, BytesPerInt)); + __ profile_switch_case(i, key, array); + __ bswapw(j); + __ addw(j, R0, j);// sign extend j + + __ load_unsigned_byte(rnext, Address(rbcp, j, Address::times_1)); + __ addptr(rbcp, j, rbcp); + __ dispatch_only(vtos, true); + + // default case -> j = default offset + __ bind(default_case); + __ profile_switch_default(i); + __ ldwu(j, Address(array, -2 * BytesPerInt)); + __ bswapw(j); + __ addw(j, R0, j); + + __ movws(key, key);//clear hi-32bit + + __ load_unsigned_byte(rnext, Address(rbcp, j, Address::times_1)); + __ addptr(rbcp, j, rbcp); + __ dispatch_only(vtos, true); +} + +void TemplateTable::_return(TosState state) {SCOPEMARK_NAME(TemplateTable::_return, _masm) + transition(state, state); + + assert(_desc->calls_vm(), + "inconsistent calls_vm information"); // call in remove_activation + + if (_desc->bytecode() == Bytecodes::_return_register_finalizer) { + assert(state == vtos, "only valid state"); + Register robj = c_rarg1; + __ ldptr(robj, aaddress(0)); + __ load_klass(T1, robj); + __ ldw(T1, Address(T1, Klass::access_flags_offset())); + __ testw(T1, JVM_ACC_HAS_FINALIZER); + Label skip_register_finalizer; + __ jcc(Assembler::zero, skip_register_finalizer); + + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::register_finalizer), robj); + + __ bind(skip_register_finalizer); + } + + if (_desc->bytecode() == Bytecodes::_return) { + __ memb();// storestore maybe wmemb + } + + if (_desc->bytecode() != Bytecodes::_return_register_finalizer) { + Label no_safepoint; + NOT_PRODUCT(__ block_comment("Thread-local Safepoint poll")); + __ ldbu(AT, Address(rthread, JavaThread::polling_word_offset())); + __ and_ins(AT, SafepointMechanism::poll_bit(), rcc); + __ jcc(Assembler::zero, no_safepoint); + __ push(state); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::at_safepoint)); + __ pop(state); + __ bind(no_safepoint); + } + + // Narrow result if state is itos but result type is smaller. + // Need to narrow in the return bytecode rather than in generate_return_entry + // since compiled code callers expect the result to already be narrowed. + if (state == itos) { + __ narrow(FSR); + } + + __ remove_activation(state, T12); + __ memb(); + + __ jmp(T12); +} + +// ---------------------------------------------------------------------------- +// Volatile variables demand their effects be made known to all CPU's +// in order. Store buffers on most chips allow reads & writes to +// reorder; the JMM's ReadAfterWrite.java test fails in -Xint mode +// without some kind of memory barrier (i.e., it's not sufficient that +// the interpreter does not reorder volatile references, the hardware +// also must not reorder them). +// +// According to the new Java Memory Model (JMM): +// (1) All volatiles are serialized wrt to each other. ALSO reads & +// writes act as aquire & release, so: +// (2) A read cannot let unrelated NON-volatile memory refs that +// happen after the read float up to before the read. It's OK for +// non-volatile memory refs that happen before the volatile read to +// float down below it. +// (3) Similar a volatile write cannot let unrelated NON-volatile +// memory refs that happen BEFORE the write float down to after the +// write. It's OK for non-volatile memory refs that happen after the +// volatile write to float up before it. +// +// We only put in barriers around volatile refs (they are expensive), +// not _between_ memory refs (that would require us to track the +// flavor of the previous memory refs). Requirements (2) and (3) +// require some barriers before volatile stores and after volatile +// loads. These nearly cover requirement (1) but miss the +// volatile-store-volatile-load case. This final case is placed after +// volatile-stores although it could just as well go before +// volatile-loads. + +void TemplateTable::volatile_barrier() { + __ memb(); +} + +void TemplateTable::resolve_cache_and_index(int byte_no, + Register cache, + Register index, + size_t index_size) {SCOPEMARK_NAME(resolve_cache_and_index, _masm) + const Register temp = A1; + assert_different_registers(cache, index, temp); + + Label L_clinit_barrier_slow; + Label resolved, Ldone; + + Bytecodes::Code code = bytecode(); + switch (code) { + case Bytecodes::_nofast_getfield: code = Bytecodes::_getfield; break; + case Bytecodes::_nofast_putfield: code = Bytecodes::_putfield; break; + default: break; + } + + assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range"); + __ get_cache_and_index_and_bytecode_at_bcp(cache, index, temp, byte_no, 1, index_size); + __ cmpw(temp, code); // have we resolved this bytecode? + __ jcc(Assembler::equal, resolved); + + // resolve first time through + // Class initialization barrier slow path lands here as well. + __ bind(L_clinit_barrier_slow); + address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache); + __ movw(temp, code); + __ call_VM(noreg, entry, temp); + // Update registers with resolved info + __ get_cache_and_index_at_bcp(cache, index, 1, index_size); + + __ bind(resolved); + + // Class initialization barrier for static methods + if (VM_Version::supports_fast_class_init_checks() && bytecode() == Bytecodes::_invokestatic) { + const Register method = temp; + const Register klass = temp; + const Register thread = rthread; + + __ load_resolved_method_at_index(byte_no, method, cache, index); + __ load_method_holder(klass, method); + __ clinit_barrier(klass, thread, NULL /*L_fast_path*/, &L_clinit_barrier_slow); + } + +} + +// The Rcache and index registers must be set before call +// n.b unlike x86 cache already includes the index offset// yj todo: ?? +void TemplateTable::load_field_cp_cache_entry(Register obj, + Register cache, + Register index, + Register off, + Register flags, + bool is_static = false) {SCOPEMARK_NAME(load_field_cp_cache_entry, _masm) + assert_different_registers(cache, index, flags, off); + + ByteSize cp_base_offset = ConstantPoolCache::base_offset(); + // Field offset + __ ldptr(off, Address(cache, index, Address::times_ptr, + in_bytes(cp_base_offset + + ConstantPoolCacheEntry::f2_offset()))); + // Flags + __ ldwu(flags, Address(cache, index, Address::times_ptr, + in_bytes(cp_base_offset + + ConstantPoolCacheEntry::flags_offset()))); + + // klass overwrite register + if (is_static) { + __ ldptr(obj, Address(cache, index, Address::times_ptr, + in_bytes(cp_base_offset + + ConstantPoolCacheEntry::f1_offset()))); + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); + __ ldptr(obj, Address(obj, mirror_offset)); + __ resolve_oop_handle(obj); + } +} + +void TemplateTable::load_invoke_cp_cache_entry(int byte_no, + Register method, + Register itable_index, + Register flags, + bool is_invokevirtual, + bool is_invokevfinal, /*unused*/ + bool is_invokedynamic) {SCOPEMARK_NAME(load_invoke_cp_cache_entry, _masm) + // setup registers + const Register cache = T3; + const Register index = T1; + assert_different_registers(method, flags); + assert_different_registers(method, cache, index); + assert_different_registers(itable_index, flags); + assert_different_registers(itable_index, cache, index); + // determine constant pool cache field offsets + assert(is_invokevirtual == (byte_no == f2_byte), "is_invokevirtual flag redundant"); + + const int flags_offset = in_bytes(ConstantPoolCache::base_offset() + + ConstantPoolCacheEntry::flags_offset()); + // access constant pool cache fields + const int index_offset = in_bytes(ConstantPoolCache::base_offset() + + ConstantPoolCacheEntry::f2_offset()); + + size_t index_size = (is_invokedynamic ? sizeof(u4) : sizeof(u2)); + resolve_cache_and_index(byte_no, cache, index, index_size); + __ load_resolved_method_at_index(byte_no, method, cache, index); + + if (itable_index != noreg) { + // pick up itable or appendix index from f2 also: + __ ldptr(itable_index, Address(cache, index, Address::times_ptr, index_offset)); + } + __ ldwu(flags, Address(cache, index, Address::times_ptr, flags_offset)); +} + +// The registers cache and index expected to be set before call. +// Correct values of the cache and index registers are preserved. +void TemplateTable::jvmti_post_field_access(Register cache, + Register index, + bool is_static, + bool has_tos) { + if (JvmtiExport::can_post_field_access()) { + // Check to see if a field access watch has been set before we take + // the time to call into the VM. + Label L1; + assert_different_registers(cache, index, rax); + __ ldws(rax, ExternalAddress((address) JvmtiExport::get_field_access_count_addr())); + __ jcc(Assembler::zero, L1, rax); + + // cache entry pointer + __ addptr(cache, in_bytes(ConstantPoolCache::base_offset()), cache); + __ slll(index, LogBytesPerWord, index); + __ addptr(cache, index, cache); + if (is_static) { + __ movl(rax, R0); // NULL object reference + } else { + __ pop(atos); + __ verify_oop(rax); + __ push(atos); + } + // FSR: object pointer or NULL + // cache: cache entry pointer + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access), + rax, cache); + __ get_cache_and_index_at_bcp(cache, index, 1); + __ bind(L1); + } +} + +void TemplateTable::pop_and_check_object(Register r) {SCOPEMARK_NAME(pop_and_check_object, _masm) + __ pop_ptr(r); + __ null_check(r); // for field access must check obj. + __ verify_oop(r); +} + +void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteControl rc) {SCOPEMARK_NAME(getfield_or_static, _masm) + transition(vtos, vtos); + + const Register cache = T3; + const Register index = T0; + const Register obj = c_rarg3; + const Register off = T2; + const Register flags = T1; + const Register bc = c_rarg3; // uses same reg as obj, so don't mix them + + resolve_cache_and_index(byte_no, cache, index, sizeof(u2)); + jvmti_post_field_access(cache, index, is_static, false); + load_field_cp_cache_entry(obj, cache, index, off, flags, is_static); + + const Register bVolatile = T11;// don't clobber it + {// yj todo: x86 seems don't care for the volatile, but aarch64 cares. + __ andw(flags, 1 << ConstantPoolCacheEntry::is_volatile_shift, bVolatile); + + Label notVolatile; + __ jcc(Assembler::zero, notVolatile, bVolatile); + volatile_barrier(); + __ bind(notVolatile); + } + + if (!is_static) pop_and_check_object(obj); + + const Address field(obj, off, Address::times_1, 0*wordSize); + + Label Done, notByte, notBool, notInt, notShort, notChar, notLong, notFloat, notObj; + + __ srll(flags, ConstantPoolCacheEntry::tos_state_shift, flags); + // Make sure we don't need to mask edx after the above shift + assert(btos == 0, "change code, btos != 0"); + + __ andw(flags, ConstantPoolCacheEntry::tos_state_mask, flags); + + __ jcc(Assembler::notZero, notByte, flags); + // btos + __ access_load_at(T_BYTE, IN_HEAP, FSR, field, noreg, noreg); + __ push(btos); + // Rewrite bytecode to be faster + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_bgetfield, bc, T2); + } + __ jmp(Done); + + __ bind(notByte); + __ cmpw(flags, ztos); + __ jcc(Assembler::notEqual, notBool); + + // ztos (same code as btos) + __ access_load_at(T_BOOLEAN, IN_HEAP, FSR, field, noreg, noreg); + __ push(ztos); + // Rewrite bytecode to be faster + if (!is_static && rc == may_rewrite) { + // use btos rewriting, no truncating to t/f bit is needed for getfield. + patch_bytecode(Bytecodes::_fast_bgetfield, bc, T2); + } + __ jmp(Done); + + __ bind(notBool); + __ cmpw(flags, atos); + __ jcc(Assembler::notEqual, notObj); + // atos + do_oop_load(_masm, field, FSR); + __ push(atos); + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_agetfield, bc, T2); + } + __ jmp(Done); + + __ bind(notObj); + __ cmpw(flags, itos); + __ jcc(Assembler::notEqual, notInt); + // itos + __ access_load_at(T_INT, IN_HEAP, FSR, field, noreg, noreg); + __ push(itos); + // Rewrite bytecode to be faster + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_igetfield, bc, T2); + } + __ jmp(Done); + + __ bind(notInt); + __ cmpw(flags, ctos); + __ jcc(Assembler::notEqual, notChar); + // ctos + __ access_load_at(T_CHAR, IN_HEAP, FSR, field, noreg, noreg); + __ push(ctos); + // Rewrite bytecode to be faster + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_cgetfield, bc, T2); + } + __ jmp(Done); + + __ bind(notChar); + __ cmpw(flags, stos); + __ jcc(Assembler::notEqual, notShort); + // stos + __ access_load_at(T_SHORT, IN_HEAP, FSR, field, noreg, noreg); + __ push(stos); + // Rewrite bytecode to be faster + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_sgetfield, bc, T2); + } + __ jmp(Done); + + __ bind(notShort); + __ cmpw(flags, ltos); + __ jcc(Assembler::notEqual, notLong); + // ltos + // yj todo: ??Generate code as if volatile (x86_32). There just aren't enough registers to + // save that information and this code is faster than the test. + __ access_load_at(T_LONG, IN_HEAP | MO_RELAXED, noreg /* ltos */, field, noreg, noreg); + __ push(ltos); + // Rewrite bytecode to be faster + if (!is_static && rc == may_rewrite) patch_bytecode(Bytecodes::_fast_lgetfield, bc, T2); + __ jmp(Done); + + __ bind(notLong); + __ cmpw(flags, ftos); + __ jcc(Assembler::notEqual, notFloat); + // ftos + + __ access_load_at(T_FLOAT, IN_HEAP, noreg /* ftos */, field, noreg, noreg); + __ push(ftos); + // Rewrite bytecode to be faster + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_fgetfield, bc, T2); + } + __ jmp(Done); + + __ bind(notFloat); +#ifdef ASSERT + Label notDouble; + __ cmpw(flags, dtos); + __ jcc(Assembler::notEqual, notDouble); +#endif + // dtos + // MO_RELAXED: for the case of volatile field, in fact it adds no extra work for the underlying implementation + __ access_load_at(T_DOUBLE, IN_HEAP | MO_RELAXED, noreg /* dtos */, field, noreg, noreg); + __ push(dtos); + // Rewrite bytecode to be faster + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_dgetfield, bc, T2); + } +#ifdef ASSERT + __ jmp(Done); + + __ bind(notDouble); + __ stop("Bad state"); +#endif + + __ bind(Done); + + { + Label notVolatile; + __ jcc(Assembler::zero, notVolatile, bVolatile); + volatile_barrier(); + __ bind(notVolatile); + } +} + +void TemplateTable::getfield(int byte_no) { + getfield_or_static(byte_no, false); +} + +void TemplateTable::nofast_getfield(int byte_no) { + getfield_or_static(byte_no, false, may_not_rewrite); +} + +void TemplateTable::getstatic(int byte_no) { + getfield_or_static(byte_no, true); +} + + +// The registers cache and index expected to be set before call. +// The function may destroy various registers, just not the cache and index registers. +void TemplateTable::jvmti_post_field_mod(Register cache, Register index, bool is_static) { + + const Register robj = c_rarg2; + const Register RBX = c_rarg1; + const Register RCX = c_rarg3; + const Register RDX = rscratch1; + + ByteSize cp_base_offset = ConstantPoolCache::base_offset(); + + if (JvmtiExport::can_post_field_modification()) { + // Check to see if a field modification watch has been set before + // we take the time to call into the VM. + Label L1; + assert_different_registers(cache, index, rcc); + __ ldws(rcc, ExternalAddress((address)JvmtiExport::get_field_modification_count_addr())); + __ jcc(Assembler::zero, L1); + + __ get_cache_and_index_at_bcp(robj, RDX, 1); + + + if (is_static) { + // Life is simple. Null out the object pointer. + __ movw(RBX, R0); + + } else { + // Life is harder. The stack holds the value on top, followed by + // the object. We don't know the size of the value, though; it + // could be one or two words depending on its type. As a result, + // we must find the type to determine where the object is. + __ ldwu(RCX, Address(robj, RDX, + Address::times_ptr, + in_bytes(cp_base_offset + + ConstantPoolCacheEntry::flags_offset()))); + __ srll(RCX, ConstantPoolCacheEntry::tos_state_shift, RCX); + + // Make sure we don't need to mask rcx after the above shift + ConstantPoolCacheEntry::verify_tos_state_shift(); + __ ldptr(c_rarg1, at_tos_p1()); // initially assume a one word jvalue + __ cmpw(c_rarg3, ltos); + __ ldptr(AT, at_tos_p2()); + __ cmove(Assembler::equal, + c_rarg1, AT, c_rarg1); // ltos (two word jvalue) + __ cmpw(c_rarg3, dtos); + __ cmove(Assembler::equal, + c_rarg1, AT, c_rarg1); // dtos (two word jvalue) + } + // cache entry pointer + __ addptr(robj, in_bytes(cp_base_offset), robj); + __ slll(RDX, LogBytesPerWord, RDX); + __ addptr(robj, RDX, robj); + // object (tos) + __ movl(RCX, esp); + // c_rarg1: object pointer set up above (NULL if static) + // c_rarg2: cache entry pointer + // c_rarg3: jvalue object on the stack + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::post_field_modification), + RBX, robj, RCX); + __ get_cache_and_index_at_bcp(cache, index, 1); + __ bind(L1); + } +} + +void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteControl rc) {SCOPEMARK_NAME(putfield_or_static, _masm) + transition(vtos, vtos); + + const Register cache = T3; + const Register index = T0; + const Register obj = T3; + const Register off = T2; + const Register flags = T1; + const Register bc = c_rarg3; + + resolve_cache_and_index(byte_no, cache, index, sizeof(u2)); + jvmti_post_field_mod(cache, index, is_static); + load_field_cp_cache_entry(obj, cache, index, off, flags, is_static); + + //x64 dont need mb since its mem seq model is strong, but we are weak, we ref aarch64 here. + const Register bVolatile = T11;// yj todo: will T11 be clobber?? + Label notVolatile, Done; + __ andw(flags, 1 << ConstantPoolCacheEntry::is_volatile_shift, bVolatile); + __ jcc(Assembler::zero, notVolatile, bVolatile); + volatile_barrier(); + __ BIND(notVolatile); + + // field addresses + const Address field(obj, off, Address::times_1, 0*wordSize); + + Label notByte, notBool, notInt, notShort, notChar, + notLong, notFloat, notObj, notDouble; + + __ srll(flags, ConstantPoolCacheEntry::tos_state_shift, flags); + + assert(btos == 0, "change code, btos != 0"); + __ andw(flags, ConstantPoolCacheEntry::tos_state_mask, flags); + __ jcc(Assembler::notZero, notByte, flags); + + // btos + { + __ pop(btos); + if (!is_static) pop_and_check_object(obj); + __ access_store_at(T_BYTE, IN_HEAP, field, FSR, noreg, noreg); + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_bputfield, bc, T2, true, byte_no); + } + __ jmp(Done); + } + + __ BIND(notByte); + __ cmpw(flags, ztos); + __ jcc(Assembler::notEqual, notBool); + + // ztos + { + __ pop(ztos); + if (!is_static) pop_and_check_object(obj); + __ access_store_at(T_BOOLEAN, IN_HEAP, field, FSR, noreg, noreg); + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_zputfield, bc, T2, true, byte_no); + } + __ jmp(Done); + } + + __ BIND(notBool); + __ cmpw(flags, atos); + __ jcc(Assembler::notEqual, notObj); + + // atos + { + __ pop(atos); + if (!is_static) pop_and_check_object(obj); + // Store into the field + do_oop_store(_masm, field, FSR); + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_aputfield, bc, T2, true, byte_no); + } + __ jmp(Done); + } + + __ BIND(notObj); + __ cmpw(flags, itos); + __ jcc(Assembler::notEqual, notInt); + + // itos + { + __ pop(itos); + if (!is_static) pop_and_check_object(obj); + __ access_store_at(T_INT, IN_HEAP, field, FSR, noreg, noreg); + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_iputfield, bc, T2, true, byte_no); + } + __ jmp(Done); + } + + __ BIND(notInt); + __ cmpw(flags, ctos); + __ jcc(Assembler::notEqual, notChar); + + // ctos + { + __ pop(ctos); + if (!is_static) pop_and_check_object(obj); + __ access_store_at(T_CHAR, IN_HEAP, field, FSR, noreg, noreg); + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_cputfield, bc, T2, true, byte_no); + } + __ jmp(Done); + } + + __ BIND(notChar); + __ cmpw(flags, stos); + __ jcc(Assembler::notEqual, notShort); + + // stos + { + __ pop(stos); + if (!is_static) pop_and_check_object(obj); + __ access_store_at(T_SHORT, IN_HEAP, field, FSR, noreg, noreg); + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_sputfield, bc, T2, true, byte_no); + } + __ jmp(Done); + } + + __ BIND(notShort); + __ cmpw(flags, ltos); + __ jcc(Assembler::notEqual, notLong); + + // ltos + { + __ pop(ltos); + if (!is_static) pop_and_check_object(obj); + __ access_store_at(T_LONG, IN_HEAP, field, noreg /* ltos*/, noreg, noreg); + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_lputfield, bc, T2, true, byte_no); + } + __ jmp(Done); + } + + __ BIND(notLong); + __ cmpw(flags, ftos); + __ jcc(Assembler::notEqual, notFloat); + + // ftos + { + __ pop(ftos); + if (!is_static) pop_and_check_object(obj); + __ access_store_at(T_FLOAT, IN_HEAP, field, noreg /* ftos */, noreg, noreg); + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_fputfield, bc, T2, true, byte_no); + } + __ jmp(Done); + } + + __ BIND(notFloat); +#ifdef ASSERT + __ cmpw(flags, dtos); + __ jcc(Assembler::notEqual, notDouble); +#endif + + // dtos + { + __ pop(dtos); + if (!is_static) pop_and_check_object(obj); + __ access_store_at(T_DOUBLE, IN_HEAP, field, noreg /* dtos */, noreg, noreg); + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_dputfield, bc, T2, true, byte_no); + } + } + +#ifdef ASSERT + __ jmp(Done); + + __ BIND(notDouble); + __ stop("Bad state"); +#endif + + __ BIND(Done); + + // Check for volatile store + { + Label notVolatile; + __ jcc(Assembler::zero, notVolatile, bVolatile); + volatile_barrier(); + __ BIND(notVolatile); + } +} + +void TemplateTable::putfield(int byte_no) { + putfield_or_static(byte_no, false); +} + +void TemplateTable::nofast_putfield(int byte_no) { + putfield_or_static(byte_no, false, may_not_rewrite); +} + +void TemplateTable::putstatic(int byte_no) { + putfield_or_static(byte_no, true); +} + +void TemplateTable::jvmti_post_fast_field_mod() { + + const Register scratch = c_rarg3; + const Register rbx = T2; + + if (JvmtiExport::can_post_field_modification()) { + // Check to see if a field modification watch has been set before + // we take the time to call into the VM. + Label L2; + __ ldws(scratch, ExternalAddress((address)JvmtiExport::get_field_modification_count_addr())); + __ jcc(Assembler::zero, L2, scratch); + __ pop_ptr(rbx); // copy the object pointer from tos + __ verify_oop(rbx); + __ push_ptr(rbx); // put the object pointer back on tos + // Save tos values before call_VM() clobbers them. Since we have + // to do it for every data type, we use the saved values as the + // jvalue object. + switch (bytecode()) { // load values into the jvalue object + case Bytecodes::_fast_aputfield: __ push_ptr(FSR); break; + case Bytecodes::_fast_bputfield: // fall through + case Bytecodes::_fast_zputfield: // fall through + case Bytecodes::_fast_sputfield: // fall through + case Bytecodes::_fast_cputfield: // fall through + case Bytecodes::_fast_iputfield: __ push_i(FSR); break; + case Bytecodes::_fast_dputfield: __ push(dtos); break; + case Bytecodes::_fast_fputfield: __ push(ftos); break; + case Bytecodes::_fast_lputfield: __ push_l(FSR); break; + + default: + ShouldNotReachHere(); + } + __ movl(scratch, esp); + // access constant pool cache entry + __ get_cache_entry_pointer_at_bcp(c_rarg2, FSR, 1); + __ verify_oop(rbx); + // rbx: object pointer copied above + // c_rarg2: cache entry pointer + // c_rarg3: jvalue object on the stack + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_modification), rbx, c_rarg2, c_rarg3); + + switch (bytecode()) { // restore tos values + case Bytecodes::_fast_aputfield: __ pop_ptr(FSR); break; + case Bytecodes::_fast_bputfield: // fall through + case Bytecodes::_fast_zputfield: // fall through + case Bytecodes::_fast_sputfield: // fall through + case Bytecodes::_fast_cputfield: // fall through + case Bytecodes::_fast_iputfield: __ pop_i(FSR); break; + case Bytecodes::_fast_dputfield: __ pop(dtos); break; + case Bytecodes::_fast_fputfield: __ pop(ftos); break; + case Bytecodes::_fast_lputfield: __ pop_l(FSR); break; + default: break; + } + __ bind(L2); + } +} + +void TemplateTable::fast_storefield(TosState state) { + transition(state, vtos); + + const Register scratch = T11; + const Register rbx = T2; + const Register rcx = T3; + const Register rdx = T1; + + ByteSize base = ConstantPoolCache::base_offset(); + + jvmti_post_fast_field_mod(); + + // access constant pool cache + __ get_cache_and_index_at_bcp(rcx, rbx, 1); + + // test for volatile with rdx but rdx is tos register for lputfield. + __ ldwu(rdx, Address(rcx, rbx, Address::times_ptr, + in_bytes(base + + ConstantPoolCacheEntry::flags_offset()))); + + // replace index with field offset from cache entry + __ ldptr(rbx, Address(rcx, rbx, Address::times_ptr, + in_bytes(base + ConstantPoolCacheEntry::f2_offset()))); + + //x64 dont need mb since its mem seq model is strong, but we are weak, we ref aarch64 here. + { + __ andw(rdx, 1 << ConstantPoolCacheEntry::is_volatile_shift, scratch); + + Label notVolatile; + __ jcc(Assembler::zero, notVolatile, scratch); + volatile_barrier(); + __ bind(notVolatile); + } + + // Get object from stack + pop_and_check_object(rcx); + + // field address + const Address field(rcx, rbx, Address::times_1); + + // access field + switch (bytecode()) { + case Bytecodes::_fast_aputfield: + do_oop_store(_masm, field, FSR); + break; + case Bytecodes::_fast_lputfield: + __ access_store_at(T_LONG, IN_HEAP, field, noreg /* ltos */, noreg, noreg); + break; + case Bytecodes::_fast_iputfield: + __ access_store_at(T_INT, IN_HEAP, field, FSR, noreg, noreg); + break; + case Bytecodes::_fast_zputfield: + __ access_store_at(T_BOOLEAN, IN_HEAP, field, FSR, noreg, noreg); + break; + case Bytecodes::_fast_bputfield: + __ access_store_at(T_BYTE, IN_HEAP, field, FSR, noreg, noreg); + break; + case Bytecodes::_fast_sputfield: + __ access_store_at(T_SHORT, IN_HEAP, field, FSR, noreg, noreg); + break; + case Bytecodes::_fast_cputfield: + __ access_store_at(T_CHAR, IN_HEAP, field, FSR, noreg, noreg); + break; + case Bytecodes::_fast_fputfield: + __ access_store_at(T_FLOAT, IN_HEAP, field, noreg /* ftos*/, noreg, noreg); + break; + case Bytecodes::_fast_dputfield: + __ access_store_at(T_DOUBLE, IN_HEAP, field, noreg /* dtos*/, noreg, noreg); + break; + default: + ShouldNotReachHere(); + } + + { + Label notVolatile; + __ jcc(Assembler::zero, notVolatile, scratch); + volatile_barrier(); + __ bind(notVolatile); + } +} + +void TemplateTable::fast_accessfield(TosState state) { + transition(atos, state); + + const Register scratch = T11; + const Register rcx = T3; + const Register rbx = T2; + + // Do the JVMTI work here to avoid disturbing the register state below + if (JvmtiExport::can_post_field_access()) { + // Check to see if a field access watch has been set before we + // take the time to call into the VM. + Label L1; + __ ldws(rcx, ExternalAddress((address) JvmtiExport::get_field_access_count_addr())); + __ jcc(Assembler::zero, L1, rcx); + // access constant pool cache entry + __ get_cache_entry_pointer_at_bcp(c_rarg2, rcx, 1); + __ verify_oop(FSR); + __ push(FSR); // save object pointer before call_VM() clobbers it + __ movl(c_rarg1, FSR); + // c_rarg1: object pointer copied above + // c_rarg2: cache entry pointer + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access), + c_rarg1, c_rarg2); + __ pop_ptr(FSR); + __ bind(L1); + } + + // access constant pool cache + __ get_cache_and_index_at_bcp(rcx, rbx, 1); + + // Must prevent reordering of stores for object initialization with stores that publish the new object. + __ memb(); + + // replace index with field offset from cache entry + { + __ ldw(AT, Address(rcx, rbx, Address::times_8, + in_bytes(ConstantPoolCache::base_offset() + + ConstantPoolCacheEntry::flags_offset()))); + __ andw(AT, 1 << ConstantPoolCacheEntry::is_volatile_shift, scratch); + + Label notVolatile; + __ jcc(Assembler::zero, notVolatile, scratch); + volatile_barrier(); + __ bind(notVolatile); + } + __ ldptr(rbx, Address(rcx, rbx, Address::times_ptr, + in_bytes(ConstantPoolCache::base_offset() + + ConstantPoolCacheEntry::f2_offset()))); + + // FSR: object + __ verify_oop(FSR); + __ null_check(FSR); + // field addresses + Address field(FSR, rbx, Address::times_1); + + // access field + switch (bytecode()) { + case Bytecodes::_fast_agetfield: + do_oop_load(_masm, field, FSR); + __ verify_oop(FSR); + break; + case Bytecodes::_fast_lgetfield: + __ access_load_at(T_LONG, IN_HEAP, noreg /* ltos */, field, noreg, noreg); + break; + case Bytecodes::_fast_igetfield: + __ access_load_at(T_INT, IN_HEAP, FSR, field, noreg, noreg); + break; + case Bytecodes::_fast_bgetfield: + __ access_load_at(T_BYTE, IN_HEAP, FSR, field, noreg, noreg); + break; + case Bytecodes::_fast_sgetfield: + __ access_load_at(T_SHORT, IN_HEAP, FSR, field, noreg, noreg); + break; + case Bytecodes::_fast_cgetfield: + __ access_load_at(T_CHAR, IN_HEAP, FSR, field, noreg, noreg); + break; + case Bytecodes::_fast_fgetfield: + __ access_load_at(T_FLOAT, IN_HEAP, noreg /* ftos */, field, noreg, noreg); + break; + case Bytecodes::_fast_dgetfield: + __ access_load_at(T_DOUBLE, IN_HEAP, noreg /* dtos */, field, noreg, noreg); + break; + default: + ShouldNotReachHere(); + } + + { + Label notVolatile; + __ jcc(Assembler::zero, notVolatile, scratch); + volatile_barrier(); + __ bind(notVolatile); + } +} + +void TemplateTable::fast_xaccess(TosState state) { + transition(vtos, state); + + const Register scratch = T11; + const Register rcx = T3; + const Register rdx = T2; + const Register rbx = T1; + + // get receiver + __ ldptr(FSR, aaddress(0)); + // access constant pool cache + __ get_cache_and_index_at_bcp(rcx, rdx, 2); + __ ldptr(rbx, + Address(rcx, rdx, Address::times_ptr, + in_bytes(ConstantPoolCache::base_offset() + + ConstantPoolCacheEntry::f2_offset()))); + + { + __ ldw(AT, Address(rcx, rdx, Address::times_8, + in_bytes(ConstantPoolCache::base_offset() + + ConstantPoolCacheEntry::flags_offset()))); + __ andw(AT, 1 << ConstantPoolCacheEntry::is_volatile_shift, scratch); + + Label notVolatile; + __ jcc(Assembler::zero, notVolatile, scratch); + volatile_barrier(); + __ bind(notVolatile); + } + + // make sure exception is reported in correct bcp range (getfield is + // next instruction) + __ incrementl(rbcp); + __ null_check(FSR); + const Address field = Address(FSR, rbx, Address::times_1, 0*wordSize); + switch (state) { + case itos: + __ access_load_at(T_INT, IN_HEAP, FSR, field, noreg, noreg); + break; + case atos: + do_oop_load(_masm, field, FSR); + __ verify_oop(FSR); + break; + case ftos: + __ access_load_at(T_FLOAT, IN_HEAP, noreg /* ftos */, field, noreg, noreg); + break; + default: + ShouldNotReachHere(); + } + + { + Label notVolatile; + __ jcc(Assembler::zero, notVolatile, scratch); + volatile_barrier(); + __ bind(notVolatile); + } + + __ decrementl(rbcp); +} + +//----------------------------------------------------------------------------- + + +void TemplateTable::prepare_invoke(int byte_no, + Register method, // linked method (or i-klass) + Register index, // itable index, MethodType, etc. + Register recv, // if caller wants to see it + Register flags // if caller wants to test it + ) {SCOPEMARK_NAME(prepare_invoke, _masm) + const Register rdx = T1; + const Register rcx = T3; + + // determine flags + const Bytecodes::Code code = bytecode(); + const bool is_invokeinterface = code == Bytecodes::_invokeinterface; + const bool is_invokedynamic = code == Bytecodes::_invokedynamic; + const bool is_invokehandle = code == Bytecodes::_invokehandle; + const bool is_invokevirtual = code == Bytecodes::_invokevirtual; + const bool is_invokespecial = code == Bytecodes::_invokespecial; + const bool load_receiver = (recv != noreg); + const bool save_flags = (flags != noreg); + assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic), ""); + assert(save_flags == (is_invokeinterface || is_invokevirtual), "need flags for vfinal"); + assert(flags == noreg || flags == rdx, ""); + assert(recv == noreg || recv == rcx, ""); + //assert(method == rmethod, "rmethod is a S reg"); + + // setup registers & access constant pool cache + if (recv == noreg) recv = rcx; + if (flags == noreg) flags = rdx; + assert_different_registers(method, index, recv, flags); + + // save 'interpreter return address' + __ save_bcp(); + + load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic); + + // maybe push appendix to arguments (just before return address) + if (is_invokedynamic || is_invokehandle) {// + Label L_no_push; + Register rbx = rmethod; + __ testw(flags, (1 << ConstantPoolCacheEntry::has_appendix_shift)); + __ jcc(Assembler::zero, L_no_push); + // Push the appendix as a trailing parameter. + // This must be done before we get the receiver, + // since the parameter_size includes it. + __ push(rbx); + __ movl(rbx, index); + __ load_resolved_reference_at_index(index, rbx); + __ pop(rbx); + __ push(index); // push appendix (MethodType, CallSite, etc.) + __ bind(L_no_push); + } + + // load receiver if needed (after appendix is pushed so parameter size is correct) + // Note: no return address pushed yet + if (load_receiver) { + __ andw(flags, ConstantPoolCacheEntry::parameter_size_mask, recv); + const int no_return_pc_pushed_yet = 0; // argument slot correction before we push return address + // Since we won't push RA on stack, no_return_pc_pushed_yet should be 0. + const int receiver_is_at_end = -1; // back off one slot to get receiver + Address recv_addr = __ argument_address(recv, no_return_pc_pushed_yet + receiver_is_at_end); + __ ldptr(recv, recv_addr); + __ verify_oop(recv); + } + + if (save_flags) { + __ movw(rbcp, flags); + } + + // compute return type + __ srll(flags, ConstantPoolCacheEntry::tos_state_shift, flags); + // Make sure we don't need to mask flags after the above shift + ConstantPoolCacheEntry::verify_tos_state_shift(); + // load return address + { + const address table_addr = (address) Interpreter::invoke_return_entry_table_for(code); + ExternalAddress table(table_addr); + __ lea(rscratch1, table); + __ ldptr(RA, Address(rscratch1, flags, Address::times_ptr)); + } + + // push return address + // __ push(RA);// yj: we dont't push ret addr + + if (save_flags) { + __ movw(flags, rbcp); + __ restore_bcp(); + } +} + +void TemplateTable::invokevirtual_helper(Register index, + Register recv, + Register flags) {SCOPEMARK_NAME(invokevirtual_helper, _masm) + const Register rdx = T2; + const Register rax = FSR; + + // Uses temporary registers FSR, rdx + assert_different_registers(index, recv, rax, rdx); + assert(index == rmethod, ""); + assert(recv == T3, ""); + + // Test for an invoke of a final method + Label notFinal; + __ testw(flags, (1 << ConstantPoolCacheEntry::is_vfinal_shift)); + __ jcc(Assembler::zero, notFinal); + + const Register method = index; // method must be rmethod + assert(method == rmethod, + "method* must be rmethod for interpreter calling convention"); + + // do the call - the index is actually the method to call + // that is, f2 is a vtable index if !is_vfinal, else f2 is a Method* + + // It's final, need a null check here! + __ null_check(recv); + + // profile this call + __ profile_final_call(rax); + __ profile_arguments_type(rax, method, rbcp, true); + + __ jump_from_interpreted(method, rax); + + __ bind(notFinal); + + // get receiver klass + __ null_check(recv, oopDesc::klass_offset_in_bytes()); + __ load_klass(rax, recv); + + // profile this call + __ profile_virtual_call(rax, rlocals, rdx); + // get target Method* & entry point + __ lookup_virtual_method(rax, index, method); + + __ profile_arguments_type(rdx, method, rbcp, true); + __ jump_from_interpreted(method, rdx); +} + +void TemplateTable::invokevirtual(int byte_no) {SCOPEMARK_NAME(invokevirtual, _masm) + transition(vtos, vtos); + assert(byte_no == f2_byte, "use this argument"); + prepare_invoke(byte_no, + rmethod,// method or vtable index + noreg, // unused itable index + T3, T1); // recv, flags + + // rmethod: index + // T3 : receiver + // T1 : flags + + invokevirtual_helper(rmethod, T3, T1); +} + +void TemplateTable::invokespecial(int byte_no) {SCOPEMARK_NAME(invokespecial, _masm) + transition(vtos, vtos); + assert(byte_no == f1_byte, "use this argument"); + Register rcx = T3; // + Register rax = V0; + Register rbx = rmethod; + prepare_invoke(byte_no, rbx, noreg, // get f1 Method* + rcx); // get receiver also for null check + __ verify_oop(rcx); + __ null_check(rcx); + // do the call + __ profile_call(rax); + __ profile_arguments_type(rax, rbx, c_rarg4, false); + __ jump_from_interpreted(rbx, rax); +} + +void TemplateTable::invokestatic(int byte_no) {SCOPEMARK_NAME(invokestatic, _masm) + transition(vtos, vtos); + assert(byte_no == f1_byte, "use this argument"); + Register rax = V0; + prepare_invoke(byte_no, rmethod, noreg); + // do the call + __ profile_call(rax); + __ profile_arguments_type(rax, rmethod, c_rarg3, false); + __ jump_from_interpreted(rmethod, rax); +} + + +void TemplateTable::fast_invokevfinal(int byte_no) { + transition(vtos, vtos); + assert(byte_no == f2_byte, "use this argument"); + __ stop("fast_invokevfinal not used on sw64"); +} + + +void TemplateTable::invokeinterface(int byte_no) {SCOPEMARK_NAME(invokeinterface, _masm) + transition(vtos, vtos); + assert(byte_no == f1_byte, "use this argument"); + Register rax = V0; + Register rcx = T3; + Register rdx = T1; + + prepare_invoke(byte_no, rax, rmethod, // get f1 Klass*, f2 Method* + rcx, rdx); // recv, flags + + // rax: reference klass (from f1) if interface method + // rbx: method (from f2) + // rcx: receiver + // rdx: flags + + // First check for Object case, then private interface method, + // then regular interface method. + + // Special case of invokeinterface called for virtual method of + // java.lang.Object. See cpCache.cpp for details. + Label notObjectMethod; + __ testw(rdx, (1 << ConstantPoolCacheEntry::is_forced_virtual_shift)); + __ jcc(Assembler::zero, notObjectMethod); + invokevirtual_helper(rmethod, rcx, rdx); + // no return from above + __ bind(notObjectMethod); + + Label no_such_interface; // for receiver subtype check + Register recvKlass; // used for exception processing + + // Check for private method invocation - indicated by vfinal + Label notVFinal; + __ testw(rdx, (1 << ConstantPoolCacheEntry::is_vfinal_shift)); + __ jcc(Assembler::zero, notVFinal); + + // Get receiver klass into rlocals - also a null check + __ null_check(rcx, oopDesc::klass_offset_in_bytes()); + __ load_klass(rlocals, rcx); + + Label subtype; + __ check_klass_subtype(rlocals, rax, c_rarg4, subtype); + // If we get here the typecheck failed + recvKlass = rdx; + __ movl(recvKlass, rlocals); // shuffle receiver class for exception use + __ jmp(no_such_interface); + + __ bind(subtype); + + // do the call - rbx is actually the method to call + + __ profile_final_call(rdx); + __ profile_arguments_type(rdx, rmethod, c_rarg4, true); + + __ jump_from_interpreted(rmethod, rdx); + // no return from above + __ bind(notVFinal); + + // Get receiver klass into rdx - also a null check + __ restore_locals(); // restore r14 + __ null_check(rcx, oopDesc::klass_offset_in_bytes()); + __ load_klass(rdx, rcx); + + Label no_such_method; + + // Preserve method for throw_AbstractMethodErrorVerbose. + __ movl(rcx, rmethod); + // Receiver subtype check against REFC. + // Superklass in rax. Subklass in rdx. Blows rcx, rdi. + __ lookup_interface_method(// inputs: rec. class, interface, itable index + rdx, rax, noreg, + // outputs: scan temp. reg, scan temp. reg + c_rarg4, rlocals, + no_such_interface, + /*return_method=*/false); + + // profile this call + __ restore_bcp(); // rbcp was destroyed by receiver type check + __ profile_virtual_call(rdx, c_rarg4, rlocals); + + // Get declaring interface class from method, and itable index + __ load_method_holder(rax, rmethod); + __ ldws(rmethod, Address(rmethod, Method::itable_index_offset())); + __ movw(rscratch1, Method::itable_index_max); + __ subw(rmethod, rscratch1, rmethod); + __ subw(R0, rmethod, rmethod); + + // Preserve recvKlass for throw_AbstractMethodErrorVerbose. + __ movl(rlocals, rdx); + __ lookup_interface_method(// inputs: rec. class, interface, itable index + rlocals, rax, rmethod, + // outputs: method, scan temp. reg + rmethod, c_rarg4, + no_such_interface); + + // rmethod: Method* to call + // rcx: receiver + // Check for abstract method error + // Note: This should be done more efficiently via a throw_abstract_method_error + // interpreter entry point and a conditional jump to it in case of a null + // method. + __ testptr(rmethod, rmethod); + __ jcc(Assembler::zero, no_such_method); + + __ profile_arguments_type(rdx, rmethod, c_rarg4, true); + + // do the call + // rcx: receiver + // rmethod,: Method* + __ jump_from_interpreted(rmethod, rdx); + __ should_not_reach_here("3501"); + + // exception handling code follows... + // note: must restore interpreter registers to canonical + // state for exception handling to work correctly! + + __ bind(no_such_method); + // throw exception + //__ pop(rbx); // pop return address (pushed by prepare_invoke) + __ restore_bcp(); // rbcp must be correct for exception handler (was destroyed) + __ restore_locals(); // make sure locals pointer is correct as well (was destroyed) + // Pass arguments for generating a verbose error message. + + recvKlass = c_rarg1; + Register method = c_rarg2; + if (recvKlass != rdx) { __ movl(recvKlass, rdx); } + if (method != rcx) { __ movl(method, rcx); } + + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodErrorVerbose), + recvKlass, method); + // The call_VM checks for exception, so we should never return here. + __ should_not_reach_here("3522"); + + __ bind(no_such_interface); + // throw exception + //__ pop(rbx); // pop return address (pushed by prepare_invoke) + __ restore_bcp(); // rbcp must be correct for exception handler (was destroyed) + __ restore_locals(); // make sure locals pointer is correct as well (was destroyed) + // Pass arguments for generating a verbose error message. + if (recvKlass != rdx) { __ movl(recvKlass, rdx); } + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_IncompatibleClassChangeErrorVerbose), + recvKlass, rax); + // the call_VM checks for exception, so we should never return here. + __ should_not_reach_here("3534"); +} + +void TemplateTable::invokehandle(int byte_no) {SCOPEMARK_NAME(invokehandle, _masm) + transition(vtos, vtos); + assert(byte_no == f1_byte, "use this argument"); + const Register rbx_method = rmethod; + const Register rax_mtype = V0; + const Register rcx_recv = T3; + const Register rdx_flags = c_rarg3; + const Register rdx = c_rarg3; + const Register rax = V0; + + prepare_invoke(byte_no, rbx_method, rax_mtype, rcx_recv); + __ verify_method_ptr(rbx_method); + __ verify_oop(rcx_recv); + __ null_check(rcx_recv); + + // rax: MethodType object (from cpool->resolved_references[f1], if necessary) + // rbx: MH.invokeExact_MT method (from f2) + + // Note: rax_mtype is already pushed (if necessary) by prepare_invoke + + // FIXME: profile the LambdaForm also + __ profile_final_call(rax); + __ profile_arguments_type(rdx, rbx_method, c_rarg5, true); + + __ jump_from_interpreted(rbx_method, rdx); +} + +void TemplateTable::invokedynamic(int byte_no) { + transition(vtos, vtos); + assert(byte_no == f1_byte, "use this argument"); + + const Register rbx_method = rmethod; + const Register rax_callsite = V0; + Register rdx = c_rarg2; + + //__ stop("TODO: should check function right:invokedynamic jzy"); + prepare_invoke(byte_no, rbx_method, rax_callsite); + + // rax: CallSite object (from cpool->resolved_references[f1]) + // rbx: MH.linkToCallSite method (from f2) + + // Note: rax_callsite is already pushed by prepare_invoke + + // %%% should make a type profile for any invokedynamic that takes a ref argument + // profile this call + __ profile_call(c_rarg3); + __ profile_arguments_type(rdx, rbx_method, c_rarg3, false); + + __ verify_oop(rax_callsite); + + __ jump_from_interpreted(rbx_method, rdx); +} + +//----------------------------------------------------------------------------- +// Allocation + +void TemplateTable::_new() {SCOPEMARK_NAME(TemplateTable::_new, _masm) + transition(vtos, atos); + Register rax = V0; + Register rbx = c_rarg3; + Register rcx = c_rarg4; + Register rdx = c_rarg5; + __ get_unsigned_2_byte_index_at_bcp(rdx, 1); + Label slow_case; + Label slow_case_no_pop; + Label done; + Label initialize_header; + Label initialize_object; // including clearing the fields + + __ get_cpool_and_tags(rcx, rax); + + // Make sure the class we're about to instantiate has been resolved. + // This is done before loading InstanceKlass to be consistent with the order + // how Constant Pool is updated (see ConstantPool::klass_at_put) + const int tags_offset = Array::base_offset_in_bytes(); + __ cmpab(Address(rax, rdx, Address::times_1, tags_offset), JVM_CONSTANT_Class); + __ jcc(Assembler::notEqual, slow_case_no_pop); + + // get InstanceKlass + __ load_resolved_klass_at_index(rcx, rcx, rdx); + __ push(rcx); // save the contexts of klass for initializing the header + + // make sure klass is initialized & doesn't have finalizer + // make sure klass is fully initialized + __ cmpb(Address(rcx, InstanceKlass::init_state_offset()), InstanceKlass::fully_initialized); + __ jcc(Assembler::notEqual, slow_case); + + // get instance_size in InstanceKlass (scaled to a count of bytes) + __ ldw(rdx, Address(rcx, Klass::layout_helper_offset())); + // test to see if it has a finalizer or is malformed in some way + __ testw(rdx, Klass::_lh_instance_slow_path_bit); + __ jcc(Assembler::notZero, slow_case); + + // Allocate the instance: + // If TLAB is enabled: + // Try to allocate in the TLAB. + // If fails, go to the slow path. + // Else If inline contiguous allocations are enabled: + // Try to allocate in eden. + // If fails due to heap end, go to slow path. + // + // If TLAB is enabled OR inline contiguous is enabled: + // Initialize the allocation. + // Exit. + // + // Go to slow path. + + const bool allow_shared_alloc = + Universe::heap()->supports_inline_contig_alloc(); + + const Register thread = rthread; + if (UseTLAB) { + __ tlab_allocate(thread, rax, rdx, 0, rcx, rbx, slow_case); + if (ZeroTLAB) { + // the fields have been already cleared + __ jmp(initialize_header); + } else { + // initialize both the header and fields + __ jmp(initialize_object); + } + } else { + // Allocation in the shared Eden, if allowed. + // + // rdx: instance size in bytes + __ eden_allocate(thread, rax, rdx, 0, rbx, slow_case); + } + + // If UseTLAB or allow_shared_alloc are true, the object is created above and + // there is an initialize need. Otherwise, skip and go to the slow path. + if (UseTLAB || allow_shared_alloc) { + // The object is initialized before the header. If the object size is + // zero, go directly to the header initialization. + __ BIND(initialize_object); + __ decrementl(rdx, sizeof(oopDesc)); + __ jcc(Assembler::zero, initialize_header, rdx); + + // Initialize topmost object field, divide rdx by 8, check if odd and + // test if zero. + __ movw(rcx, R0); // use zero reg to clear memory (shorter code) +#ifdef ASSERT + __ movl(rscratch1, rdx); +#endif + __ srll(rdx, LogBytesPerLong, rdx); // divide by 2*oopSize and set result flag if odd + + // rdx must have been multiple of 8 +#ifdef ASSERT + // make sure rdx was multiple of 8 + Label L; + // Ignore partial flag stall after shrl() since it is debug VM + __ srll(rscratch1, LogBytesPerLong-1, rscratch1); + __ testptr(rscratch1, 0x1);// the least significant bit is zero? + __ jcc(Assembler::zero, L); + __ stop("object size is not multiple of 2 - adjust this code"); + __ BIND(L); + // rdx must be > 0, no extra check needed here +#endif + + // initialize remaining object fields: rdx was a multiple of 8 + { Label loop; + __ BIND(loop); + __ stptr(rcx, Address(rax, rdx, Address::times_8, sizeof(oopDesc) - 1*oopSize)); + __ decrementl(rdx); + __ jcc(Assembler::notZero, loop, rdx); + } + + // initialize object header only. + __ BIND(initialize_header); + if (UseBiasedLocking) { + __ pop(rcx); // get saved klass back in the register. + __ ldptr(rbx, Address(rcx, Klass::prototype_header_offset())); + __ stptr(rbx, Address(rax, oopDesc::mark_offset_in_bytes ())); + } else { + __ mov_immediate64(rscratch1, (intptr_t)markWord::prototype().value()); + __ stptr(rscratch1, Address(rax, oopDesc::mark_offset_in_bytes ())); // header + __ pop(rcx); // get saved klass back in the register. + } + __ store_klass_gap(rax, R0); + __ store_klass(rax, rcx); // klass + + { + SkipIfEqual skip_if(_masm, &DTraceAllocProbes, 0); + // Trigger dtrace event for fastpath + __ push(atos); + __ call_VM_leaf( + CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), rax); + __ pop(atos); + } + + __ jmp(done); + } + + // slow case + __ BIND(slow_case); + __ pop(rcx); // restore stack pointer to what it was when we came in. + __ BIND(slow_case_no_pop); + + Register rarg1 = c_rarg1; + Register rarg2 = c_rarg2; + + __ get_constant_pool(rarg1); + __ get_unsigned_2_byte_index_at_bcp(rarg2, 1); + call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), rarg1, rarg2); + __ verify_oop(rax); + + // continue + __ BIND(done); + __ memb();// add for sw64 +} + +void TemplateTable::newarray() { + transition(itos, atos); + Register rarg1 = c_rarg1; + Register rax = V0; + __ load_unsigned_byte(rarg1, at_bcp(1)); + //__ movws(rax, rax); + call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray), + rarg1, rax); + __ memb();// add for sw64 +} + +void TemplateTable::anewarray() { + transition(itos, atos); + + Register rarg1 = c_rarg1; + Register rarg2 = c_rarg2; + Register rax = V0; + + __ get_unsigned_2_byte_index_at_bcp(rarg2, 1); + __ get_constant_pool(rarg1); + //__ movws(rax, rax); + call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray), + rarg1, rarg2, rax); + // Must prevent reordering of stores for object initialization with stores that publish the new object. + __ memb(); +} + +void TemplateTable::arraylength() { + transition(atos, itos); + Register rax = V0; + __ null_check(rax, arrayOopDesc::length_offset_in_bytes()); + __ ldws(rax, Address(rax, arrayOopDesc::length_offset_in_bytes())); +} + +void TemplateTable::checkcast() { + transition(atos, atos); + Label done, is_null, ok_is_subtype, quicked, resolved; + Register rax = V0; + Register rcx = c_rarg4; + Register rdx = c_rarg3; + Register rbx = rmethod; + __ testptr(rax, rax); // object is in rax + __ jcc(Assembler::zero, is_null); + + // Get cpool & tags index + __ get_cpool_and_tags(rcx, rdx); // rcx=cpool, rdx=tags array + __ get_unsigned_2_byte_index_at_bcp(rbx, 1); // rbx=index + // See if bytecode has already been quicked + __ cmpab(Address(rdx, rbx, + Address::times_1, + Array::base_offset_in_bytes()), + JVM_CONSTANT_Class); + __ jcc(Assembler::equal, quicked); + __ push(atos); // save receiver for result, and for GC + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc)); + + // vm_result_2 has metadata result + __ get_vm_result_2(rax, rthread); + + __ pop_ptr(rdx); // restore receiver + __ jmp(resolved); + + // Get superklass in rax and subklass in rbx + __ bind(quicked); + __ movl(rdx, rax); // Save object in rdx; rax needed for subtype check + __ load_resolved_klass_at_index(rax, rcx, rbx); + + __ bind(resolved); + __ load_klass(rbx, rdx); + + // Generate subtype check. Blows rcx, rdi. Object in rdx. + // Superklass in rax. Subklass in rbx. + __ gen_subtype_check(rbx, ok_is_subtype); + + // Come here on failure + __ push_ptr(rdx); + // object is at TOS + __ jump(ExternalAddress(Interpreter::_throw_ClassCastException_entry)); + + // Come here on success + __ bind(ok_is_subtype); + __ movl(rax, rdx); // Restore object in rdx + + // Collect counts on whether this check-cast sees NULLs a lot or not. + if (ProfileInterpreter) { + __ jmp(done); + __ bind(is_null); + __ profile_null_seen(rcx); + } else { + __ bind(is_null); // same as 'done' + } + __ bind(done); +} + +void TemplateTable::instanceof() { + transition(atos, itos); + Label done, is_null, ok_is_subtype, quicked, resolved; + Register rax = V0; + Register rcx = c_rarg4; + Register rdx = c_rarg3; + Register rbx = rmethod; + __ testptr(rax, rax); + __ jcc(Assembler::zero, is_null); + + // Get cpool & tags index + __ get_cpool_and_tags(rcx, rdx); // rcx=cpool, rdx=tags array + __ get_unsigned_2_byte_index_at_bcp(rbx, 1); // rbx=index + // See if bytecode has already been quicked + __ cmpab(Address(rdx, rbx, + Address::times_1, + Array::base_offset_in_bytes()), + JVM_CONSTANT_Class); + __ jcc(Assembler::equal, quicked); + + __ push(atos); // save receiver for result, and for GC + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc)); + // vm_result_2 has metadata result + + __ get_vm_result_2(rax, rthread); + + __ pop_ptr(rdx); // restore receiver + __ verify_oop(rdx); + __ load_klass(rdx, rdx); + __ jmp(resolved); + + // Get superklass in rax and subklass in rdx + __ bind(quicked); + __ load_klass(rdx, rax); + __ load_resolved_klass_at_index(rax, rcx, rbx); + + __ bind(resolved); + + // Generate subtype check. Blows rcx, rdi + // Superklass in rax. Subklass in rdx. + __ gen_subtype_check(rdx, ok_is_subtype); + + // Come here on failure + __ movl(rax, R0); + __ jmp(done); + // Come here on success + __ bind(ok_is_subtype); + __ movw(rax, 1); + + // Collect counts on whether this test sees NULLs a lot or not. + if (ProfileInterpreter) { + __ jmp(done); + __ bind(is_null); + __ profile_null_seen(rcx); + } else { + __ bind(is_null); // same as 'done' + } + __ bind(done); + // rax = 0: obj == NULL or obj is not an instanceof the specified klass + // rax = 1: obj != NULL and obj is an instanceof the specified klass +} + + +//---------------------------------------------------------------------------------------------------- +// Breakpoints +void TemplateTable::_breakpoint() { + // Note: We get here even if we are single stepping.. + // jbug insists on setting breakpoints at every bytecode + // even if we are in single step mode. + + transition(vtos, vtos); + + Register rarg = c_rarg1; + Register rax = V0; + + // get the unpatched byte code + __ get_method(rarg); + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::get_original_bytecode_at), + rarg, rbcp); + __ movl(rmethod, rax); // why? + + // post the breakpoint event + __ get_method(rarg); + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint), + rarg, rbcp); + __ movl(rnext, rmethod); + // complete the execution of original bytecode + __ dispatch_only_normal(vtos); +} + +//----------------------------------------------------------------------------- +// Exceptions + +void TemplateTable::athrow() { + transition(atos, vtos); + const Register rax = FSR; + __ null_check(rax); + __ jump(ExternalAddress(Interpreter::throw_exception_entry())); +} + +//----------------------------------------------------------------------------- +// Synchronization +// +// Note: monitorenter & exit are symmetric routines; which is reflected +// in the assembly code structure as well +// +// Stack layout: +// +// [expressions ] <--- esp = expression stack top +// .. +// [expressions ] +// [monitor entry] <--- monitor block top = expression stack bot +// .. +// [monitor entry] +// [frame data ] <--- monitor block bot +// ... +// [saved rbp ] <--- rbp +void TemplateTable::monitorenter() { + transition(atos, vtos); + Register rax = FSR; + + // check for NULL object + __ null_check(rax); + + const Address monitor_block_top( + rfp, frame::interpreter_frame_monitor_block_top_offset * wordSize); + const Address monitor_block_bot( + rfp, frame::interpreter_frame_initial_sp_offset * wordSize); + const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; + + Label allocated; + + Register rtop = c_rarg3; + Register rbot = c_rarg2; + Register rmon = c_rarg1; + + // initialize entry pointer + __ movl(rmon, R0); // points to free slot or NULL + + // find a free slot in the monitor block (result in rmon) + { + Label entry, loop, exit; + __ ldptr(rtop, monitor_block_top); // points to current entry, + // starting with top-most entry + __ lea(rbot, monitor_block_bot); // points to word before bottom + // of monitor block + __ jmp(entry); + + __ bind(loop); + // check if current entry is used + __ cmpptr(Address(rtop, BasicObjectLock::obj_offset_in_bytes()), (int32_t) NULL_WORD); + // if not used then remember entry in rmon + __ cmove(Assembler::equal, rmon, rtop, rmon); // cmov => cmovptr + // check if current entry is for same object + __ cmpptr(rax, Address(rtop, BasicObjectLock::obj_offset_in_bytes())); + // if same object then stop searching + __ jcc(Assembler::equal, exit); + // otherwise advance to next entry + __ addptr(rtop, entry_size, rtop); + __ bind(entry); + // check if bottom reached + __ cmpptr(rtop, rbot); + // if not at bottom then check this entry + __ jcc(Assembler::notEqual, loop); + __ bind(exit); + } + + __ testptr(rmon, rmon); // check if a slot has been found + __ jcc(Assembler::notZero, allocated); // if found, continue with that one + + // allocate one if there's no free slot + { + Label entry, loop; + // 1. compute new pointers // rsp: old expression stack top + __ ldptr(rmon, monitor_block_bot); // rmon: old expression stack bottom + __ subptr(esp, entry_size, esp); // move expression stack top + __ subptr(rmon, entry_size, rmon); // move expression stack bottom + __ movl(rtop, esp); // set start value for copy loop + __ stptr(rmon, monitor_block_bot); // set new monitor block bottom + __ jmp(entry); + + // 2. move expression stack contents + __ bind(loop); + __ ldptr(rbot, Address(rtop, entry_size)); // load expression stack + // word from old location + __ stptr(rbot, Address(rtop, 0)); // and store it at new location + __ addptr(rtop, wordSize, rtop); // advance to next word + __ bind(entry); + __ cmpptr(rtop, rmon); // check if bottom reached + __ jcc(Assembler::notEqual, loop); // if not at bottom then + // copy next word + } + + // call run-time routine + // rmon: points to monitor entry + __ bind(allocated); + + // Increment bcp to point to the next bytecode, so exception + // handling for async. exceptions work correctly. + // The object has already been poped from the stack, so the + // expression stack looks correct. + __ incrementl(rbcp); + + // store object + __ stptr(rax, Address(rmon, BasicObjectLock::obj_offset_in_bytes())); + __ lock_object(rmon); + + // check to make sure this monitor doesn't cause stack overflow after locking + __ save_bcp(); // in case of exception + __ generate_stack_overflow_check(0); + + // The bcp has already been incremented. Just need to dispatch to + // next instruction. + __ dispatch_next(vtos); +} + +void TemplateTable::monitorexit() { + transition(atos, vtos); + Register rax = FSR; + + // check for NULL object + __ null_check(rax); + + const Address monitor_block_top( + rfp, frame::interpreter_frame_monitor_block_top_offset * wordSize); + const Address monitor_block_bot( + rfp, frame::interpreter_frame_initial_sp_offset * wordSize); + const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; + + Register rtop = c_rarg1; + Register rbot = c_rarg2; + + Label found; + + // find matching slot + { + Label entry, loop; + __ ldptr(rtop, monitor_block_top); // points to current entry, + // starting with top-most entry + __ lea(rbot, monitor_block_bot); // points to word before bottom + // of monitor block + __ jmp(entry); + + __ bind(loop); + // check if current entry is for same object + __ cmpptr(rax, Address(rtop, BasicObjectLock::obj_offset_in_bytes())); + // if same object then stop searching + __ jcc(Assembler::equal, found); + // otherwise advance to next entry + __ addptr(rtop, entry_size, rtop); + __ bind(entry); + // check if bottom reached + __ cmpptr(rtop, rbot); + // if not at bottom then check this entry + __ jcc(Assembler::notEqual, loop); + } + + // error handling. Unlocking was not block-structured + __ call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_illegal_monitor_state_exception)); + __ should_not_reach_here("4101"); + + // call run-time routine + __ bind(found); + __ push_ptr(rax); // make sure object is on stack (contract with oopMaps) + __ unlock_object(rtop); + __ pop_ptr(rax); // discard object +} + +// Wide instructions +void TemplateTable::wide() { //__ stop("TODO:check function right:wide jzy"); + transition(vtos, vtos); + __ load_unsigned_byte(rscratch1, at_bcp(1)); + ExternalAddress wtable((address)Interpreter::_wentry_point); + __ jump(ArrayAddress(wtable, Address(noreg, rscratch1, Address::times_ptr)), rscratch2, rcc); + // Note: the rbcp increment step is part of the individual wide bytecode implementations +} + +// Multi arrays +void TemplateTable::multianewarray() { + transition(vtos, atos); + + Register rax = FSR; + const Register rbx = rscratch1; + Register rarg = c_rarg1; + + __ load_unsigned_byte(rax, at_bcp(3)); // get number of dimensions + // last dim is on top of stack; we want address of first one: + // first_addr = last_addr + (ndims - 1) * stackElementSize - 1*wordsize + // the latter wordSize to point to the beginning of the array. + __ lea(rarg, Address(esp, rax, Interpreter::stackElementScale(), -wordSize)); + call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray), rarg); + __ load_unsigned_byte(rbx, at_bcp(3)); + __ lea(esp, Address(esp, rbx, Interpreter::stackElementScale())); // get rid of counts + __ memb();// add for sw64 +} diff --git a/src/hotspot/cpu/sw64/templateTable_sw64.hpp b/src/hotspot/cpu/sw64/templateTable_sw64.hpp new file mode 100644 index 00000000000..fe443f8e1bd --- /dev/null +++ b/src/hotspot/cpu/sw64/templateTable_sw64.hpp @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SW64_VM_TEMPLATETABLE_SW64_64_HPP +#define CPU_SW64_VM_TEMPLATETABLE_SW64_64_HPP + +static void prepare_invoke(int byte_no, + Register method, // linked method (or i-klass) + Register index = noreg, // itable index, MethodType, etc. + Register recv = noreg, // if caller wants to see it + Register flags = noreg // if caller wants to test it + ); + static void invokevirtual_helper(Register index, Register recv, + Register flags); + static void volatile_barrier(); + + // Helpers + static void index_check(Register array, Register index); + static void index_check_without_pop(Register array, Register index); + +#endif // CPU_SW64_VM_TEMPLATETABLE_SW64_64_HPP diff --git a/src/hotspot/cpu/sw64/universalNativeInvoker_sw64.cpp b/src/hotspot/cpu/sw64/universalNativeInvoker_sw64.cpp new file mode 100644 index 00000000000..e28d9510ab8 --- /dev/null +++ b/src/hotspot/cpu/sw64/universalNativeInvoker_sw64.cpp @@ -0,0 +1,150 @@ +/* + * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2019, Arm Limited. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "code/codeBlob.hpp" +#include "memory/resourceArea.hpp" +#include "prims/universalNativeInvoker.hpp" + +#define __ _masm-> + +void ProgrammableInvoker::Generator::generate() { + + __ enter(); + + // Put the context pointer in ebx/rbx - it's going to be heavily used below both before and after the call + Register ctxt_reg = rbx; + Register used_regs[] = { ctxt_reg, rcx, rsi, rdi }; + GrowableArray preserved_regs; + + for (size_t i = 0; i < sizeof(used_regs)/sizeof(Register); i++) { + Register used_reg = used_regs[i]; + if (!_abi->is_volatile_reg(used_reg)) { + preserved_regs.push(used_reg); + } + } + + __ block_comment("init_and_alloc_stack"); + + for (int i = 0; i < preserved_regs.length(); i++) { + __ push(preserved_regs.at(i)); + } + + __ movl(ctxt_reg, c_rarg0); // FIXME c args? or java? + + __ block_comment("allocate_stack"); + __ ldptr(rcx, Address(ctxt_reg, (int) _layout->stack_args_bytes)); + __ subptr(rsp, rcx, rsp); + __ andptr(rsp, -_abi->_stack_alignment_bytes, rsp); + + // Note: rcx is used below! + + __ block_comment("load_arguments"); + + __ srll(rcx, LogBytesPerWord, rcx); // bytes -> words + __ ldptr(rsi, Address(ctxt_reg, (int) _layout->stack_args)); + __ ldptr(rdi, rsp); + // __ rep_mov(); //need check + + +/* for (int i = 0; i < _abi->_vector_argument_registers.length(); i++) { + // [1] -> 64 bit -> xmm + // [2] -> 128 bit -> xmm + // [4] -> 256 bit -> ymm + // [8] -> 512 bit -> zmm + + XMMRegister reg = _abi->_vector_argument_registers.at(i); + size_t offs = _layout->arguments_vector + i * xmm_reg_size; + __ movdqu(reg, Address(ctxt_reg, (int)offs)); + } */ + + for (int i = 0; i < _abi->_integer_argument_registers.length(); i++) { + size_t offs = _layout->arguments_integer + i * sizeof(uintptr_t); + __ ldptr(_abi->_integer_argument_registers.at(i), Address(ctxt_reg, (int)offs)); + } + + if (_abi->_shadow_space_bytes != 0) { + __ block_comment("allocate shadow space for argument register spill"); + __ subptr(rsp, _abi->_shadow_space_bytes, rsp); + } + + // call target function + __ block_comment("call target function"); + __ ldptr(rscratch3, Address(ctxt_reg, (int) _layout->arguments_next_pc));//need check + __ call(rscratch3); + + if (_abi->_shadow_space_bytes != 0) { + __ block_comment("pop shadow space"); + __ addptr(rsp, _abi->_shadow_space_bytes, rsp); + } + + __ block_comment("store_registers"); + for (int i = 0; i < _abi->_integer_return_registers.length(); i++) { + ssize_t offs = _layout->returns_integer + i * sizeof(uintptr_t); + __ stptr(_abi->_integer_return_registers.at(i), Address(ctxt_reg, offs)); + } + +/*for (int i = 0; i < _abi->_vector_return_registers.length(); i++) { + // [1] -> 64 bit -> xmm + // [2] -> 128 bit -> xmm (SSE) + // [4] -> 256 bit -> ymm (AVX) + // [8] -> 512 bit -> zmm (AVX-512, aka AVX3) + + XMMRegister reg = _abi->_vector_return_registers.at(i); + size_t offs = _layout->returns_vector + i * xmm_reg_size; + __ movdqu(Address(ctxt_reg, (int)offs), reg); + } + + for (size_t i = 0; i < _abi->_X87_return_registers_noof; i++) { + size_t offs = _layout->returns_x87 + i * (sizeof(long double)); + __ fstp_x(Address(ctxt_reg, (int)offs)); //pop ST(0) + } */ + + // Restore backed up preserved register + for (int i = 0; i < preserved_regs.length(); i++) { + __ ldptr(preserved_regs.at(i), Address(rbp, -(int)(sizeof(uintptr_t) * (i + 1)))); + } + + __ leave(); + __ ret(); + + __ flush(); + +} + +address ProgrammableInvoker::generate_adapter(jobject jabi, jobject jlayout) { + ResourceMark rm; + const ABIDescriptor abi = ForeignGlobals::parse_abi_descriptor(jabi); + const BufferLayout layout = ForeignGlobals::parse_buffer_layout(jlayout); + + BufferBlob* _invoke_native_blob = BufferBlob::create("invoke_native_blob", native_invoker_size); + + CodeBuffer code2(_invoke_native_blob); + ProgrammableInvoker::Generator g2(&code2, &abi, &layout); + g2.generate(); + code2.log_section_sizes("InvokeNativeBlob"); + + return _invoke_native_blob->code_begin(); +} diff --git a/src/hotspot/cpu/sw64/universalUpcallHandler_sw64.cpp b/src/hotspot/cpu/sw64/universalUpcallHandler_sw64.cpp new file mode 100644 index 00000000000..3a34d7dff0c --- /dev/null +++ b/src/hotspot/cpu/sw64/universalUpcallHandler_sw64.cpp @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2019, Arm Limited. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "code/codeBlob.hpp" +#include "code/codeBlob.hpp" +#include "code/vmreg.inline.hpp" +#include "compiler/disassembler.hpp" +#include "logging/logStream.hpp" +#include "memory/resourceArea.hpp" +#include "prims/universalUpcallHandler.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/signature.hpp" +#include "runtime/stubRoutines.hpp" +#include "utilities/formatBuffer.hpp" +#include "utilities/globalDefinitions.hpp" + +#define __ _masm-> + +// 1. Create buffer according to layout +// 2. Load registers & stack args into buffer +// 3. Call upcall helper with upcall handler instance & buffer pointer (C++ ABI) +// 4. Load return value from buffer into foreign ABI registers +// 5. Return +address ProgrammableUpcallHandler::generate_upcall_stub(jobject rec, jobject jabi, jobject jlayout) { + Unimplemented(); + + ResourceMark rm; + const ABIDescriptor abi = ForeignGlobals::parse_abi_descriptor(jabi); + const BufferLayout layout = ForeignGlobals::parse_buffer_layout(jlayout); + + CodeBuffer buffer("upcall_stub", 1024, upcall_stub_size); + + MacroAssembler* _masm = new MacroAssembler(&buffer); + int stack_alignment_C = 16; // bytes + int register_size = sizeof(uintptr_t); + //int buffer_alignment = xmm_reg_size; + + // stub code + __ enter(); + + // save pointer to JNI receiver handle into constant segment + Address rec_adr = __ as_Address(InternalAddress(__ address_constant((address)rec))); + + assert(abi._stack_alignment_bytes % 16 == 0, "stack must be 16 byte aligned"); + + __ subptr(rsp, (int) align_up(layout.buffer_size, abi._stack_alignment_bytes), rsp); + + Register used[] = { c_rarg0, c_rarg1, rax, rbx, rdi, rsi, r12, r13, r14, r15 }; + GrowableArray preserved; + // TODO need to preserve anything killed by the upcall that is non-volatile, needs XMM regs as well, probably + for (size_t i = 0; i < sizeof(used)/sizeof(Register); i++) { + Register reg = used[i]; + if (!abi.is_volatile_reg(reg)) { + preserved.push(reg); + } + } + + int preserved_size = align_up(preserved.length() * register_size, stack_alignment_C); // includes register alignment + int buffer_offset = preserved_size; // offset from rsp + + __ subptr(rsp, preserved_size, rsp); + for (int i = 0; i < preserved.length(); i++) { + __ stptr(preserved.at(i), Address(rsp, i * register_size)); + } + + for (int i = 0; i < abi._integer_argument_registers.length(); i++) { + size_t offs = buffer_offset + layout.arguments_integer + i * sizeof(uintptr_t); + __ stptr(abi._integer_argument_registers.at(i), Address(rsp, (int)offs)); + } + +// for (int i = 0; i < abi._vector_argument_registers.length(); i++) { +// FloatRegister reg = abi._vector_argument_registers.at(i); +// size_t offs = buffer_offset + layout.arguments_vector + i * xmm_reg_size; +// __ fldd(reg, Address(rsp, (int)offs)); +// } + + // Capture prev stack pointer (stack arguments base) + __ lea(rax, Address(rbp, 16 + 32)); // also skip shadow space + __ stptr(rax, Address(rsp, buffer_offset + (int) layout.stack_args)); + + + // Call upcall helper + __ ldptr(c_rarg0, rec_adr); + __ lea(c_rarg1, Address(rsp, buffer_offset)); + + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, ProgrammableUpcallHandler::attach_thread_and_do_upcall))); + + for (int i = 0; i < abi._integer_return_registers.length(); i++) { + size_t offs = buffer_offset + layout.returns_integer + i * sizeof(uintptr_t); + __ ldptr(abi._integer_return_registers.at(i), Address(rsp, (int)offs)); + } + +// for (int i = 0; i < abi._vector_return_registers.length(); i++) { +// XMMRegister reg = abi._vector_return_registers.at(i); +// size_t offs = buffer_offset + layout.returns_vector + i * xmm_reg_size; +// __ movdqu(reg, Address(rsp, (int)offs)); +// } + +// for (size_t i = abi._X87_return_registers_noof; i > 0 ; i--) { +// ssize_t offs = buffer_offset + layout.returns_x87 + (i - 1) * (sizeof(long double)); +// __ fld_x (Address(rsp, (int)offs)); +// } + + // Restore preserved registers + for (int i = 0; i < preserved.length(); i++) { + __ ldptr(preserved.at(i), Address(rsp, i * register_size)); + } + + __ leave(); + __ ret(); + + _masm->flush(); + + BufferBlob* blob = BufferBlob::create("upcall_stub", &buffer); + + return blob->code_begin(); +} + +address ProgrammableUpcallHandler::generate_optimized_upcall_stub(jobject mh, Method* entry, jobject jabi, jobject jconv) { + ShouldNotCallThis(); + return NULL; +} + +bool ProgrammableUpcallHandler::supports_optimized_upcalls() { + return NULL; +} diff --git a/src/hotspot/cpu/sw64/vmStructs_sw64.hpp b/src/hotspot/cpu/sw64/vmStructs_sw64.hpp new file mode 100644 index 00000000000..8eb2c2a4e84 --- /dev/null +++ b/src/hotspot/cpu/sw64/vmStructs_sw64.hpp @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SW64_VM_VMSTRUCTS_SW64_HPP +#define CPU_SW64_VM_VMSTRUCTS_SW64_HPP + +// These are the CPU-specific fields, types and integer +// constants required by the Serviceability Agent. This file is +// referenced by vmStructs.cpp. + +#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ + volatile_nonstatic_field(JavaFrameAnchor, _last_Java_fp, intptr_t*) + +#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) + +#define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) + +#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) + +#endif // CPU_SW64_VM_VMSTRUCTS_SW64_HPP diff --git a/src/hotspot/cpu/sw64/vm_version_ext_sw64.cpp b/src/hotspot/cpu/sw64/vm_version_ext_sw64.cpp new file mode 100644 index 00000000000..e274aa47798 --- /dev/null +++ b/src/hotspot/cpu/sw64/vm_version_ext_sw64.cpp @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "code/codeBlob.hpp" +#include "memory/allocation.hpp" +#include "memory/allocation.inline.hpp" +#include "runtime/os.inline.hpp" +#include "vm_version_ext_sw64.hpp" + +// VM_Version_Ext statics +int VM_Version_Ext::_no_of_threads = 0; +int VM_Version_Ext::_no_of_cores = 0; +int VM_Version_Ext::_no_of_sockets = 0; +bool VM_Version_Ext::_initialized = false; +char VM_Version_Ext::_cpu_name[CPU_TYPE_DESC_BUF_SIZE] = {0}; +char VM_Version_Ext::_cpu_desc[CPU_DETAILED_DESC_BUF_SIZE] = {0}; + +void VM_Version_Ext::initialize_cpu_information(void) { + // do nothing if cpu info has been initialized + if (_initialized) { + return; + } + + int core_id = -1; + int chip_id = -1; + int len = 0; + char* src_string = NULL; + + _no_of_cores = os::processor_count(); + _no_of_threads = _no_of_cores; + _no_of_sockets = _no_of_cores; + snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "Sw64"); + snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "Sw64 %s", _features_string); + _initialized = true; +} + +int VM_Version_Ext::number_of_threads(void) { + initialize_cpu_information(); + return _no_of_threads; +} + +int VM_Version_Ext::number_of_cores(void) { + initialize_cpu_information(); + return _no_of_cores; +} + +int VM_Version_Ext::number_of_sockets(void) { + initialize_cpu_information(); + return _no_of_sockets; +} + +const char* VM_Version_Ext::cpu_name(void) { + initialize_cpu_information(); + char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_TYPE_DESC_BUF_SIZE, mtTracing); + if (NULL == tmp) { + return NULL; + } + strncpy(tmp, _cpu_name, CPU_TYPE_DESC_BUF_SIZE); + return tmp; +} + +const char* VM_Version_Ext::cpu_description(void) { + initialize_cpu_information(); + char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_DETAILED_DESC_BUF_SIZE, mtTracing); + if (NULL == tmp) { + return NULL; + } + strncpy(tmp, _cpu_desc, CPU_DETAILED_DESC_BUF_SIZE); + return tmp; +} diff --git a/src/hotspot/cpu/sw64/vm_version_ext_sw64.hpp b/src/hotspot/cpu/sw64/vm_version_ext_sw64.hpp new file mode 100644 index 00000000000..16ae7063e31 --- /dev/null +++ b/src/hotspot/cpu/sw64/vm_version_ext_sw64.hpp @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SW64_VM_VM_VERSION_EXT_SW64_HPP +#define CPU_SW64_VM_VM_VERSION_EXT_SW64_HPP + +#include "runtime/vm_version.hpp" +#include "utilities/macros.hpp" + +class VM_Version_Ext : public VM_Version { + private: + static const size_t CPU_TYPE_DESC_BUF_SIZE = 256; + static const size_t CPU_DETAILED_DESC_BUF_SIZE = 4096; + + static int _no_of_threads; + static int _no_of_cores; + static int _no_of_sockets; + static bool _initialized; + static char _cpu_name[CPU_TYPE_DESC_BUF_SIZE]; + static char _cpu_desc[CPU_DETAILED_DESC_BUF_SIZE]; + + public: + static int number_of_threads(void); + static int number_of_cores(void); + static int number_of_sockets(void); + + static const char* cpu_name(void); + static const char* cpu_description(void); + static void initialize_cpu_information(void); + +}; + +#endif // CPU_SW64_VM_VM_VERSION_EXT_SW64_HPP diff --git a/src/hotspot/cpu/sw64/vm_version_sw64.cpp b/src/hotspot/cpu/sw64/vm_version_sw64.cpp new file mode 100644 index 00000000000..97cec7b5d2c --- /dev/null +++ b/src/hotspot/cpu/sw64/vm_version_sw64.cpp @@ -0,0 +1,628 @@ +/* + * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "code/codeBlob.hpp" +#include "memory/resourceArea.hpp" +#include "runtime/java.hpp" +#include "runtime/stubCodeGenerator.hpp" +#include "runtime/globals_extension.hpp" +#include "runtime/vm_version.hpp" +#include "utilities/macros.hpp" +#include "utilities/powerOfTwo.hpp" +#include "utilities/virtualizationSupport.hpp" + +#include OS_HEADER_INLINE(os) + +//ZHJ #include +//ZHJ #include + +#ifndef HWCAP_AES +#define HWCAP_AES (1<<3) +#endif + +#ifndef HWCAP_PMULL +#define HWCAP_PMULL (1<<4) +#endif + +#ifndef HWCAP_SHA1 +#define HWCAP_SHA1 (1<<5) +#endif + +#ifndef HWCAP_SHA2 +#define HWCAP_SHA2 (1<<6) +#endif + +#ifndef HWCAP_CRC32 +#define HWCAP_CRC32 (1<<7) +#endif + +#ifndef HWCAP_ATOMICS +#define HWCAP_ATOMICS (1<<8) +#endif + +#ifndef HWCAP_ASIMD +#define HWCAP_ASIMD (1<<9) +#endif + +////int VM_Version::_cpu; +////int VM_Version::_model; +////int VM_Version::_model2; +////int VM_Version::_variant; +////int VM_Version::_revision; +////int VM_Version::_stepping; +////VM_Version::PsrInfo VM_Version::_psr_info = { 0, }; +int VM_Version::_features = VM_Version::unknown_m; +const char* VM_Version::_features_str = ""; + +////static BufferBlob* stub_blob; +////static const int stub_size = 550; + +////extern "C" { +//// typedef void (*getPsrInfo_stub_t)(void*); +////} +////static getPsrInfo_stub_t getPsrInfo_stub = NULL; + + +////class VM_Version_StubGenerator: public StubCodeGenerator { +//// public: +//// +//// VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {} +//// +//// address generate_getPsrInfo() { +//// StubCodeMark mark(this, "VM_Version", "getPsrInfo_stub"); +////# define __ _masm-> +//// address start = __ pc(); +//// +//// // void getPsrInfo(VM_Version::PsrInfo* psr_info); +//// +//// address entry = __ pc(); +//// +//// __ enter(); +//// +//// __ get_dczid_el0(rscratch1); +//// __ strw(rscratch1, Address(c_rarg0, in_bytes(VM_Version::dczid_el0_offset()))); +//// +//// __ get_ctr_el0(rscratch1); +//// __ strw(rscratch1, Address(c_rarg0, in_bytes(VM_Version::ctr_el0_offset()))); +//// +//// __ leave(); +//// __ ret(lr); +//// +////# undef __ +//// +//// return start; +//// } +////}; + + +////void VM_Version::get_processor_features() { +//// _supports_cx8 = true; +//// _supports_atomic_getset4 = true; +//// _supports_atomic_getadd4 = true; +//// _supports_atomic_getset8 = true; +//// _supports_atomic_getadd8 = true; +//// +//// getPsrInfo_stub(&_psr_info); +//// +//// int dcache_line = VM_Version::dcache_line_size(); +//// +//// // Limit AllocatePrefetchDistance so that it does not exceed the +//// // constraint in AllocatePrefetchDistanceConstraintFunc. +//// if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) +//// FLAG_SET_DEFAULT(AllocatePrefetchDistance, MIN2(512, 3*dcache_line)); +//// +//// if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize)) +//// FLAG_SET_DEFAULT(AllocatePrefetchStepSize, dcache_line); +//// if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) +//// FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 3*dcache_line); +//// if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) +//// FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 3*dcache_line); +//// if (FLAG_IS_DEFAULT(SoftwarePrefetchHintDistance)) +//// FLAG_SET_DEFAULT(SoftwarePrefetchHintDistance, 3*dcache_line); +//// +//// if (PrefetchCopyIntervalInBytes != -1 && +//// ((PrefetchCopyIntervalInBytes & 7) || (PrefetchCopyIntervalInBytes >= 32768))) { +//// warning("PrefetchCopyIntervalInBytes must be -1, or a multiple of 8 and < 32768"); +//// PrefetchCopyIntervalInBytes &= ~7; +//// if (PrefetchCopyIntervalInBytes >= 32768) +//// PrefetchCopyIntervalInBytes = 32760; +//// } +//// +//// if (AllocatePrefetchDistance !=-1 && (AllocatePrefetchDistance & 7)) { +//// warning("AllocatePrefetchDistance must be multiple of 8"); +//// AllocatePrefetchDistance &= ~7; +//// } +//// +//// if (AllocatePrefetchStepSize & 7) { +//// warning("AllocatePrefetchStepSize must be multiple of 8"); +//// AllocatePrefetchStepSize &= ~7; +//// } +//// +//// if (SoftwarePrefetchHintDistance != -1 && +//// (SoftwarePrefetchHintDistance & 7)) { +//// warning("SoftwarePrefetchHintDistance must be -1, or a multiple of 8"); +//// SoftwarePrefetchHintDistance &= ~7; +//// } +//// +//// unsigned long auxv = 0; //ZHJ getauxval(AT_HWCAP); +//// +//// char buf[512]; +//// +//// _features = auxv; +//// +//// int cpu_lines = 0; +//// if (FILE *f = fopen("/proc/cpuinfo", "r")) { +//// char buf[128], *p; +//// while (fgets(buf, sizeof (buf), f) != NULL) { +//// if ((p = strchr(buf, ':')) != NULL) { +//// long v = strtol(p+1, NULL, 0); +//// if (strncmp(buf, "CPU implementer", sizeof "CPU implementer" - 1) == 0) { +//// _cpu = v; +//// cpu_lines++; +//// } else if (strncmp(buf, "CPU variant", sizeof "CPU variant" - 1) == 0) { +//// _variant = v; +//// } else if (strncmp(buf, "CPU part", sizeof "CPU part" - 1) == 0) { +//// if (_model != v) _model2 = _model; +//// _model = v; +//// } else if (strncmp(buf, "CPU revision", sizeof "CPU revision" - 1) == 0) { +//// _revision = v; +//// } +//// } +//// } +//// fclose(f); +//// } +//// +//// // Enable vendor specific features +//// +//// // ThunderX +//// if (_cpu == CPU_CAVIUM && (_model == 0xA1)) { +//// if (_variant == 0) _features |= CPU_DMB_ATOMICS; +//// if (FLAG_IS_DEFAULT(AvoidUnalignedAccesses)) { +//// FLAG_SET_DEFAULT(AvoidUnalignedAccesses, true); +//// } +//// if (FLAG_IS_DEFAULT(UseSIMDForMemoryOps)) { +//// FLAG_SET_DEFAULT(UseSIMDForMemoryOps, (_variant > 0)); +//// } +//// if (FLAG_IS_DEFAULT(UseSIMDForArrayEquals)) { +//// FLAG_SET_DEFAULT(UseSIMDForArrayEquals, false); +//// } +//// } +//// +//// // ThunderX2 +//// if ((_cpu == CPU_CAVIUM && (_model == 0xAF)) || +//// (_cpu == CPU_BROADCOM && (_model == 0x516))) { +//// if (FLAG_IS_DEFAULT(AvoidUnalignedAccesses)) { +//// FLAG_SET_DEFAULT(AvoidUnalignedAccesses, true); +//// } +//// if (FLAG_IS_DEFAULT(UseSIMDForMemoryOps)) { +//// FLAG_SET_DEFAULT(UseSIMDForMemoryOps, true); +//// } +//// if (FLAG_IS_DEFAULT(UseFPUForSpilling)) { +//// FLAG_SET_DEFAULT(UseFPUForSpilling, true); +//// } +//// } +//// +//// // Cortex A53 +//// if (_cpu == CPU_ARM && (_model == 0xd03 || _model2 == 0xd03)) { +//// _features |= CPU_A53MAC; +//// if (FLAG_IS_DEFAULT(UseSIMDForArrayEquals)) { +//// FLAG_SET_DEFAULT(UseSIMDForArrayEquals, false); +//// } +//// } +//// +//// // Cortex A73 +//// if (_cpu == CPU_ARM && (_model == 0xd09 || _model2 == 0xd09)) { +//// if (FLAG_IS_DEFAULT(SoftwarePrefetchHintDistance)) { +//// FLAG_SET_DEFAULT(SoftwarePrefetchHintDistance, -1); +//// } +//// // A73 is faster with short-and-easy-for-speculative-execution-loop +//// if (FLAG_IS_DEFAULT(UseSimpleArrayEquals)) { +//// FLAG_SET_DEFAULT(UseSimpleArrayEquals, true); +//// } +//// } +//// +//// if (_cpu == CPU_ARM && (_model == 0xd07 || _model2 == 0xd07)) _features |= CPU_STXR_PREFETCH; +//// // If an olde style /proc/cpuinfo (cpu_lines == 1) then if _model is an A57 (0xd07) +//// // we assume the worst and assume we could be on a big little system and have +//// // undisclosed A53 cores which we could be swapped to at any stage +//// if (_cpu == CPU_ARM && cpu_lines == 1 && _model == 0xd07) _features |= CPU_A53MAC; +//// +//// sprintf(buf, "0x%02x:0x%x:0x%03x:%d", _cpu, _variant, _model, _revision); +//// if (_model2) sprintf(buf+strlen(buf), "(0x%03x)", _model2); +//// if (auxv & HWCAP_ASIMD) strcat(buf, ", simd"); +//// if (auxv & HWCAP_CRC32) strcat(buf, ", crc"); +//// if (auxv & HWCAP_AES) strcat(buf, ", aes"); +//// if (auxv & HWCAP_SHA1) strcat(buf, ", sha1"); +//// if (auxv & HWCAP_SHA2) strcat(buf, ", sha256"); +//// if (auxv & HWCAP_ATOMICS) strcat(buf, ", lse"); +//// +//// _features_string = os::strdup(buf); +//// +//// if (FLAG_IS_DEFAULT(UseCRC32)) { +//// UseCRC32 = (auxv & HWCAP_CRC32) != 0; +//// } +//// +//// if (UseCRC32 && (auxv & HWCAP_CRC32) == 0) { +//// warning("UseCRC32 specified, but not supported on this CPU"); +//// FLAG_SET_DEFAULT(UseCRC32, false); +//// } +//// +//// if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) { +//// FLAG_SET_DEFAULT(UseAdler32Intrinsics, true); +//// } +//// +//// if (UseVectorizedMismatchIntrinsic) { +//// warning("UseVectorizedMismatchIntrinsic specified, but not available on this CPU."); +//// FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false); +//// } +//// +//// if (auxv & HWCAP_ATOMICS) { +//// if (FLAG_IS_DEFAULT(UseLSE)) +//// FLAG_SET_DEFAULT(UseLSE, true); +//// } else { +//// if (UseLSE) { +//// warning("UseLSE specified, but not supported on this CPU"); +//// FLAG_SET_DEFAULT(UseLSE, false); +//// } +//// } +//// +//// if (auxv & HWCAP_AES) { +//// UseAES = UseAES || FLAG_IS_DEFAULT(UseAES); +//// UseAESIntrinsics = +//// UseAESIntrinsics || (UseAES && FLAG_IS_DEFAULT(UseAESIntrinsics)); +//// if (UseAESIntrinsics && !UseAES) { +//// warning("UseAESIntrinsics enabled, but UseAES not, enabling"); +//// UseAES = true; +//// } +//// } else { +//// if (UseAES) { +//// warning("UseAES specified, but not supported on this CPU"); +//// FLAG_SET_DEFAULT(UseAES, false); +//// } +//// if (UseAESIntrinsics) { +//// warning("UseAESIntrinsics specified, but not supported on this CPU"); +//// FLAG_SET_DEFAULT(UseAESIntrinsics, false); +//// } +//// } +//// +//// if (UseAESCTRIntrinsics) { +//// warning("AES/CTR intrinsics are not available on this CPU"); +//// FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); +//// } +//// +//// if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) { +//// UseCRC32Intrinsics = true; +//// } +//// +//// if (auxv & HWCAP_CRC32) { +//// if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) { +//// FLAG_SET_DEFAULT(UseCRC32CIntrinsics, true); +//// } +//// } else if (UseCRC32CIntrinsics) { +//// warning("CRC32C is not available on the CPU"); +//// FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false); +//// } +//// +//// if (FLAG_IS_DEFAULT(UseFMA)) { +//// FLAG_SET_DEFAULT(UseFMA, true); +//// } +//// +//// if (auxv & (HWCAP_SHA1 | HWCAP_SHA2)) { +//// if (FLAG_IS_DEFAULT(UseSHA)) { +//// FLAG_SET_DEFAULT(UseSHA, true); +//// } +//// } else if (UseSHA) { +//// warning("SHA instructions are not available on this CPU"); +//// FLAG_SET_DEFAULT(UseSHA, false); +//// } +//// +//// if (UseSHA && (auxv & HWCAP_SHA1)) { +//// if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) { +//// FLAG_SET_DEFAULT(UseSHA1Intrinsics, true); +//// } +//// } else if (UseSHA1Intrinsics) { +//// warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU."); +//// FLAG_SET_DEFAULT(UseSHA1Intrinsics, false); +//// } +//// +//// if (UseSHA && (auxv & HWCAP_SHA2)) { +//// if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) { +//// FLAG_SET_DEFAULT(UseSHA256Intrinsics, true); +//// } +//// } else if (UseSHA256Intrinsics) { +//// warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU."); +//// FLAG_SET_DEFAULT(UseSHA256Intrinsics, false); +//// } +//// +//// if (UseSHA512Intrinsics) { +//// warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU."); +//// FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); +//// } +//// +//// if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) { +//// FLAG_SET_DEFAULT(UseSHA, false); +//// } +//// +//// if (auxv & HWCAP_PMULL) { +//// if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) { +//// FLAG_SET_DEFAULT(UseGHASHIntrinsics, true); +//// } +//// } else if (UseGHASHIntrinsics) { +//// warning("GHASH intrinsics are not available on this CPU"); +//// FLAG_SET_DEFAULT(UseGHASHIntrinsics, false); +//// } +//// +//// if (is_zva_enabled()) { +//// if (FLAG_IS_DEFAULT(UseBlockZeroing)) { +//// FLAG_SET_DEFAULT(UseBlockZeroing, true); +//// } +//// if (FLAG_IS_DEFAULT(BlockZeroingLowLimit)) { +//// FLAG_SET_DEFAULT(BlockZeroingLowLimit, 4 * VM_Version::zva_length()); +//// } +//// } else if (UseBlockZeroing) { +//// warning("DC ZVA is not available on this CPU"); +//// FLAG_SET_DEFAULT(UseBlockZeroing, false); +//// } +//// +//// // This machine allows unaligned memory accesses +//// if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) { +//// FLAG_SET_DEFAULT(UseUnalignedAccesses, true); +//// } +//// +//// if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) { +//// UseMultiplyToLenIntrinsic = true; +//// } +//// +//// if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) { +//// UseSquareToLenIntrinsic = true; +//// } +//// +//// if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) { +//// UseMulAddIntrinsic = true; +//// } +//// +//// if (FLAG_IS_DEFAULT(UseBarriersForVolatile)) { +//// UseBarriersForVolatile = (_features & CPU_DMB_ATOMICS) != 0; +//// } +//// +//// if (FLAG_IS_DEFAULT(UsePopCountInstruction)) { +//// UsePopCountInstruction = true; +//// } +//// +//// if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { +//// UseMontgomeryMultiplyIntrinsic = true; +//// } +//// if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { +//// UseMontgomerySquareIntrinsic = true; +//// } +//// +////#ifdef COMPILER2 +//// if (FLAG_IS_DEFAULT(OptoScheduling)) { +//// OptoScheduling = true; +//// } +////#endif +////} + +int VM_Version::determine_features() { + //////////////////////add some other feature here////////////////// + int features = platform_features(unknown_m); + //spt_16k_page_m; + return features; +} + +void VM_Version::initialize() { + + _features = determine_features(); + //no need, Abstract_VM_Version already define it as false + _supports_cx8 = true; + + //////////////////////add some other feature here////////////////// + + if (UseG1GC && FLAG_IS_DEFAULT(MaxGCPauseMillis)) { + FLAG_SET_DEFAULT(MaxGCPauseMillis, 650*8); + } + + if (UseG1GC && FLAG_IS_DEFAULT(GCPauseIntervalMillis)) { + FLAG_SET_DEFAULT(GCPauseIntervalMillis, MaxGCPauseMillis + 1); + } + +#ifdef COMPILER2 + if (MaxVectorSize > 0) { + if (!is_power_of_2(MaxVectorSize)) { + warning("MaxVectorSize must be a power of 2"); + MaxVectorSize = 8; + } + } + // Vector optimization was closed by default. + if (FLAG_IS_DEFAULT(MaxVectorSize)) { + MaxVectorSize = 0; + } + + // Use ctlz/cttz/ctpop instructions if available. + if (is_shenwei()) { + if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) { + FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, 1); + } + if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) { + FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, 1); + } + if (FLAG_IS_DEFAULT(UsePopCountInstruction)) { + FLAG_SET_DEFAULT(UsePopCountInstruction, 1); + } + if (is_sw6b() && FLAG_IS_DEFAULT(UseSW6B)) { + //FLAG_SET_DEFAULT(UseSW6B, 1); + } + if (is_sw8a() && FLAG_IS_DEFAULT(UseSW8A)) { + FLAG_SET_DEFAULT(UseSW8A, 0); + FLAG_SET_DEFAULT(FRegisterConflict, 0); +// FLAG_SET_DEFAULT(UseWmemb, 1); +// FLAG_SET_DEFAULT(UseAddpi, 0); + } + if (UseAES) { + if (!FLAG_IS_DEFAULT(UseAES)) { + warning("AES instructions are not available on this CPU"); + } + FLAG_SET_DEFAULT(UseAES, false); + } + if (UseAESIntrinsics) { + if (!FLAG_IS_DEFAULT(UseAESIntrinsics)) { + warning("AES intrinsics are not available on this CPU"); + } + FLAG_SET_DEFAULT(UseAESIntrinsics, false); + } + } else if (UseCountLeadingZerosInstruction || UseCountTrailingZerosInstruction + || UsePopCountInstruction) { + if (!FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) + warning("Only SW CPUs support UseCountTrailingZerosInstruction"); + FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, 0); + FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, 0); + FLAG_SET_DEFAULT(UsePopCountInstruction, 0); + } + if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { + UseMontgomeryMultiplyIntrinsic = true; + } + if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { + UseMontgomerySquareIntrinsic = true; + } +#endif + + if (TieredCompilation) { + if (!FLAG_IS_DEFAULT(TieredCompilation)) + warning("TieredCompilation not supported"); + FLAG_SET_DEFAULT(TieredCompilation, false); + } +// if (UseCRC32Intrinsics) { +// if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics)) +// warning("CRC32 intrinsics are not available on this CPU"); +// FLAG_SET_DEFAULT(UseCRC32Intrinsics, false); +// } + char buf[512]; + jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s", + (has_l2_cache() ? ", has_l2_cache" : ""), + (has_16k_page() ? ", has_16k_page" : ""), + (is_shenwei() ? ", on_shenwei_platform" : ""), + (is_sw2f() ? ", SW410(2F)" : ""), + (is_sw4a() ? ", SW411(4A)" : "" ), + (is_sw6a() ? ", SW421(6A)" : ""), + (is_sw6b() ? ", SW422(6B)" : ""), + (is_sw1621() ? ", SW1621" : ""), + (is_sw8a() ? ", SW8A" : ""), + (UseCountTrailingZerosInstruction ? ", UseCountTrailingZerosInstruction" : "")); + + // buf is started with ", " or is empty + _features_str = strdup(strlen(buf) > 2 ? buf + 2 : buf); + + if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { + FLAG_SET_DEFAULT(AllocatePrefetchStyle, 1); + } + + if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) { + FLAG_SET_DEFAULT(AllocatePrefetchLines, 1); + } + + if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize)) { + FLAG_SET_DEFAULT(AllocatePrefetchStepSize, 64); + } + + if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) { + FLAG_SET_DEFAULT(AllocatePrefetchDistance, 64); + } + + if (FLAG_IS_DEFAULT(AllocateInstancePrefetchLines)) { + FLAG_SET_DEFAULT(AllocateInstancePrefetchLines, 1); + } + + if (UseSHA) { + warning("SHA instructions are not available on this CPU"); + FLAG_SET_DEFAULT(UseSHA, false); + } + + if (UseFMA) { + warning("FMA instructions are not available on this CPU"); + FLAG_SET_DEFAULT(UseFMA, false); + } + + if (UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics) { + warning("SHA intrinsics are not available on this CPU"); + FLAG_SET_DEFAULT(UseSHA1Intrinsics, false); + FLAG_SET_DEFAULT(UseSHA256Intrinsics, false); + FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); + } + + check_virtualizations(); + + NOT_PRODUCT( if (PrintMiscellaneous && Verbose) print_features(); ); +} + +void VM_Version::print_features() { + tty->print_cr("Version:%s", cpu_features()); +} + +static bool check_info_file(const char* fpath, + const char* virt1, VirtualizationType vt1, + const char* virt2, VirtualizationType vt2) { + char line[500]; + FILE* fp = os::fopen(fpath, "r"); + if (fp == nullptr) { + return false; + } + while (fgets(line, sizeof(line), fp) != nullptr) { + if (strcasestr(line, virt1) != 0) { + Abstract_VM_Version::_detected_virtualization = vt1; + fclose(fp); + return true; + } + if (virt2 != NULL && strcasestr(line, virt2) != 0) { + Abstract_VM_Version::_detected_virtualization = vt2; + fclose(fp); + return true; + } + } + fclose(fp); + return false; +} + +void VM_Version::check_virtualizations() { + const char* pname_file = "/sys/devices/virtual/dmi/id/product_name"; + const char* tname_file = "/sys/hypervisor/type"; + if (check_info_file(pname_file, "KVM", KVM, "VMWare", VMWare)) { + return; + } + check_info_file(tname_file, "Xen", XenPVHVM, NULL, NoDetectedVirtualization); +} + +void VM_Version::print_platform_virtualization_info(outputStream* st) { + VirtualizationType vrt = VM_Version::get_detected_virtualization(); + if (vrt == KVM) { + st->print_cr("KVM virtualization detected"); + } else if (vrt == VMWare) { + st->print_cr("VMWare virtualization detected"); + } else if (vrt == XenPVHVM) { + st->print_cr("Xen virtualization detected"); + } +} diff --git a/src/hotspot/cpu/sw64/vm_version_sw64.hpp b/src/hotspot/cpu/sw64/vm_version_sw64.hpp new file mode 100644 index 00000000000..d3c1fab4407 --- /dev/null +++ b/src/hotspot/cpu/sw64/vm_version_sw64.hpp @@ -0,0 +1,178 @@ +/* + * Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SW64_VM_VM_VERSION_SW64_HPP +#define CPU_SW64_VM_VM_VERSION_SW64_HPP + + +#include "runtime/globals_extension.hpp" +#include "runtime/abstract_vm_version.hpp" +#include "utilities/sizes.hpp" +#include "utilities/macros.hpp" + +class VM_Version : public Abstract_VM_Version { +//// friend class JVMCIVMStructs; + +protected: +//// static int _cpu; +//// static int _model; +//// static int _model2; +//// static int _variant; +//// static int _revision; +//// static int _stepping; +//// +//// struct PsrInfo { +//// uint32_t dczid_el0; +//// uint32_t ctr_el0; +//// }; +//// static PsrInfo _psr_info; + static int _features; + static const char* _features_str; +//// static void get_processor_features(); + static void print_features(); + static int determine_features(); + static int platform_features(int features); + +public: + // Initialization + static void initialize(); + + static void check_virtualizations(); + + static void print_platform_virtualization_info(outputStream*); + +//// // Asserts +//// static void assert_is_initialized() { +//// } + + static bool has_l2_cache() { return _features & with_l2_cache_m; } + static bool has_16k_page() { return _features & spt_16k_page_m; } + static bool is_sw2f() { return _features & sw2f_m; } + static bool is_sw4a() { return _features & sw4a_m; } + static bool is_sw6a() { return _features & sw6a_m; } + static bool is_sw6b() { return _features & sw6b_m; } + static bool is_sw8a() { return _features & wx_h8000_m; }//TODO UseSW8A + static bool is_sw1621() { return _features & sw1621_m; } + static bool is_sw3231() { return _features & sw3231_m; } + static bool is_shenwei() { return _features & with_sw_support_m; } +// static bool sw2only() { return is_sw2f() || is_sw4a() || is_sw6a(); } + static bool sw2only() { return true; } + static bool sw3only() { return is_sw6b(); } + static bool sw4only() { return is_sw8a(); } + static const char* cpu_features() { return _features_str; } + +//// static bool expensive_load(int ld_size, int scale) { +//// if (cpu_family() == CPU_ARM) { +//// // Half-word load with index shift by 1 (aka scale is 2) has +//// // extra cycle latency, e.g. ldrsh w0, [x1,w2,sxtw #1]. +//// if (ld_size == 2 && scale == 2) { +//// return true; +//// } +//// } +//// return false; +//// } + + enum Family { + CPU_ARM = 'A', + CPU_BROADCOM = 'B', + CPU_CAVIUM = 'C', + CPU_DEC = 'D', + CPU_INFINEON = 'I', + CPU_MOTOROLA = 'M', + CPU_NVIDIA = 'N', + CPU_AMCC = 'P', + CPU_QUALCOM = 'Q', + CPU_MARVELL = 'V', + CPU_INTEL = 'i', + }; + + enum Feature_Flag { + with_l2_cache = 0, + spt_16k_page = 1, + sw2f = 2, + sw4a = 3, + sw6a = 4, + sw6b = 5, + sw1621 = 6, + sw3231 = 7, + wx_h8000 = 8, + with_sw_support = 9, +// CPU_FP = (1<<0), +// CPU_ASIMD = (1<<1), +// CPU_EVTSTRM = (1<<2), +// CPU_AES = (1<<3), +// CPU_PMULL = (1<<4), +// CPU_SHA1 = (1<<5), +// CPU_SHA2 = (1<<6), +// CPU_CRC32 = (1<<7), +// CPU_LSE = (1<<8), +// CPU_STXR_PREFETCH= (1 << 29), +// CPU_A53MAC = (1 << 30), +// CPU_DMB_ATOMICS = (1 << 31), + }; + + enum Feature_Flag_Set { + unknown_m = 0, + all_features_m = -1, + with_l2_cache_m = 1 << with_l2_cache, + spt_16k_page_m = 1 << spt_16k_page, + sw2f_m = 1 << sw2f, + sw4a_m = 1 << sw4a, + sw6a_m = 1 << sw6a, + sw6b_m = 1 << sw6b, + sw1621_m = 1 << sw1621, + sw3231_m = 1 << sw3231, + wx_h8000_m = 1 << wx_h8000, + with_sw_support_m = 1 << with_sw_support, + + //////////////////////add some other feature here////////////////// + }; + +//// static int cpu_family() { return _cpu; } +//// static int cpu_model() { return _model; } +//// static int cpu_model2() { return _model2; } +//// static int cpu_variant() { return _variant; } +//// static int cpu_revision() { return _revision; } +//// static ByteSize dczid_el0_offset() { return byte_offset_of(PsrInfo, dczid_el0); } +//// static ByteSize ctr_el0_offset() { return byte_offset_of(PsrInfo, ctr_el0); } +//// static bool is_zva_enabled() { +//// // Check the DZP bit (bit 4) of dczid_el0 is zero +//// // and block size (bit 0~3) is not zero. +//// return ((_psr_info.dczid_el0 & 0x10) == 0 && +//// (_psr_info.dczid_el0 & 0xf) != 0); +//// } +//// static int icache_line_size() { +//// return (1 << (_psr_info.ctr_el0 & 0x0f)) * 4; +//// } +//// static int dcache_line_size() { +//// return (1 << ((_psr_info.ctr_el0 >> 16) & 0x0f)) * 4; +//// } + + static bool supports_fast_class_init_checks() { + return true; + } + constexpr static bool supports_stack_watermark_barrier() { return true; } +}; + +#endif // CPU_SW64_VM_VM_VERSION_SW64_HPP diff --git a/src/hotspot/cpu/sw64/vmreg_sw64.cpp b/src/hotspot/cpu/sw64/vmreg_sw64.cpp new file mode 100644 index 00000000000..dba590c6d71 --- /dev/null +++ b/src/hotspot/cpu/sw64/vmreg_sw64.cpp @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "code/vmreg.hpp" +#include "vmreg_sw64.inline.hpp" + + +void VMRegImpl::set_regName() { + Register reg = ::as_Register(0); + int i; + for (i = 0; i < ConcreteRegisterImpl::max_gpr ; ) { + regName[i++] = reg->name(); + regName[i++] = reg->name(); + reg = reg->successor(); + } + + FloatRegister freg = ::as_FloatRegister(0); + for ( ; i < ConcreteRegisterImpl::max_fpr ; ) { + regName[i++] = freg->name(); + regName[i++] = freg->name(); + freg = freg->successor(); + } + + for ( ; i < ConcreteRegisterImpl::number_of_registers ; i ++ ) { + regName[i] = "NON-GPR-FPR"; + } +} + +#define INTEGER_TYPE 0 +#define VECTOR_TYPE 1 +#define STACK_TYPE 3 + +VMReg VMRegImpl::vmStorageToVMReg(int type, int index) { + switch(type) { + case INTEGER_TYPE: return ::as_Register(index)->as_VMReg(); + case VECTOR_TYPE: return ::as_FloatRegister(index)->as_VMReg(); + case STACK_TYPE: return VMRegImpl::stack2reg(index LP64_ONLY(* 2)); + } + return VMRegImpl::Bad(); +} diff --git a/src/hotspot/cpu/sw64/vmreg_sw64.hpp b/src/hotspot/cpu/sw64/vmreg_sw64.hpp new file mode 100644 index 00000000000..39e4feb116d --- /dev/null +++ b/src/hotspot/cpu/sw64/vmreg_sw64.hpp @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + + #ifndef CPU_SW64_VM_VMREG_SW64_HPP + #define CPU_SW64_VM_VMREG_SW64_HPP + + inline bool is_Register() { + return (unsigned int) value() < (unsigned int) ConcreteRegisterImpl::max_gpr; + } + + inline bool is_FloatRegister() { + return value() >= ConcreteRegisterImpl::max_gpr && value() < ConcreteRegisterImpl::max_fpr; + } + + inline Register as_Register() { + + assert( is_Register(), "must be"); + // Yuk + return ::as_Register(value() >> 1); + } + + inline FloatRegister as_FloatRegister() { + assert( is_FloatRegister() && is_even(value()), "must be" ); + // Yuk + return ::as_FloatRegister((value() - ConcreteRegisterImpl::max_gpr) >> 1); + } + + inline bool is_concrete() { + assert(is_reg(), "must be"); + if(is_Register()) return true; + if(is_FloatRegister()) return true; + assert(false, "what register?"); + return false; + } + + #endif // CPU_SW64_VM_VMREG_SW64_HPP diff --git a/src/hotspot/cpu/sw64/vmreg_sw64.inline.hpp b/src/hotspot/cpu/sw64/vmreg_sw64.inline.hpp new file mode 100644 index 00000000000..b21409dbd4a --- /dev/null +++ b/src/hotspot/cpu/sw64/vmreg_sw64.inline.hpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SW64_VM_VMREG_SW64_INLINE_HPP +#define CPU_SW64_VM_VMREG_SW64_INLINE_HPP + +inline VMReg RegisterImpl::as_VMReg() { + if( this==noreg ) return VMRegImpl::Bad(); + return VMRegImpl::as_VMReg(encoding() << 1 ); +} + +inline VMReg FloatRegisterImpl::as_VMReg() { + return VMRegImpl::as_VMReg((encoding() << 1) + ConcreteRegisterImpl::max_gpr); +} + +#endif // CPU_SW64_VM_VMREG_SW64_INLINE_HPP diff --git a/src/hotspot/cpu/sw64/vtableStubs_sw64.cpp b/src/hotspot/cpu/sw64/vtableStubs_sw64.cpp new file mode 100644 index 00000000000..f7f9e4233af --- /dev/null +++ b/src/hotspot/cpu/sw64/vtableStubs_sw64.cpp @@ -0,0 +1,268 @@ +/* + * Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "code/vtableStubs.hpp" +#include "interp_masm_sw64.hpp" +#include "memory/resourceArea.hpp" +#include "oops/compiledICHolder.hpp" +#include "oops/instanceKlass.hpp" +#include "oops/klassVtable.hpp" +#include "runtime/sharedRuntime.hpp" +#include "vmreg_sw64.inline.hpp" +#ifdef COMPILER2 +#include "opto/runtime.hpp" +#endif + +// machine-dependent part of VtableStubs: create VtableStub of correct size and +// initialize its code + +#define __ masm-> + +#ifndef PRODUCT +extern "C" void bad_compiled_vtable_index(JavaThread* thread, oop receiver, int index); +#endif + +VtableStub* VtableStubs::create_vtable_stub(int vtable_index) { + // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing. + const int stub_code_length = code_size_limit(true); + VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index); + // Can be NULL if there is no free space in the code cache. + if (s == NULL) { + return NULL; + } + + // Count unused bytes in instruction sequences of variable size. + // We add them to the computed buffer size in order to avoid + // overflow in subsequently generated stubs. + address start_pc; + int slop_bytes = 0; + int slop_delta = 0; + // No variance was detected in vtable stub sizes. Setting index_dependent_slop == 0 will unveil any deviation from this observation. + const int index_dependent_slop = 0; + + ResourceMark rm; + CodeBuffer cb(s->entry_point(), stub_code_length); + MacroAssembler* masm = new MacroAssembler(&cb); + +#if (!defined(PRODUCT) && defined(COMPILER2)) + if (CountCompiledCalls) { + __ incrementl(ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr())); + } +#endif + + // get receiver (need to skip return address on top of stack) + assert(VtableStub::receiver_location() == j_rarg0->as_VMReg(), "receiver expected in j_rarg0"); + + // get receiver klass + Register rax = V0; +// Register rbx = rscratch1; + address npe_addr = __ pc(); + __ load_klass(rax, j_rarg0); + +#ifndef PRODUCT + if (DebugVtables) { + Label L; + start_pc = __ pc(); + // check offset vs vtable length + __ cmpw(Address(rax, Klass::vtable_length_offset()), vtable_index*vtableEntry::size()); + slop_delta = 12 - (__ pc() - start_pc); // cmpl varies in length, depending on data + slop_bytes += slop_delta; + assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); + + __ jcc(Assembler::greater, L); + __ movw(c_rarg2, vtable_index); + // VTABLE TODO: find upper bound for call_VM length. + start_pc = __ pc(); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), j_rarg0, c_rarg2); + slop_delta = 550 - (__ pc() - start_pc); + slop_bytes += slop_delta; + assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); + __ bind(L); + } +#endif // PRODUCT + + const Register method = rmethod; + + // load Method* and target address + start_pc = __ pc(); + __ lookup_virtual_method(rax, vtable_index, method); + slop_delta = 16 - (int)(__ pc() - start_pc); + slop_bytes += slop_delta; + assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); + +#ifndef PRODUCT + if (DebugVtables) { + Label L; + __ cmpptr(method, R0); + __ jcc(Assembler::equal, L); + __ cmpptr(Address(method, Method::from_compiled_offset()), R0); + __ jcc(Assembler::notZero, L); + __ stop("Vtable entry is NULL"); + __ bind(L); + } +#endif // PRODUCT + + // rax: receiver klass + // method (rbx): Method* + // rcx: receiver + address ame_addr = __ pc(); + __ jmp( Address(method, Method::from_compiled_offset())); + + masm->flush(); + slop_bytes += index_dependent_slop; // add'l slop for size variance due to large itable offsets + bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, index_dependent_slop); + + return s; +} + + +VtableStub* VtableStubs::create_itable_stub(int itable_index) { + // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing. + const int stub_code_length = code_size_limit(false); + VtableStub* s = new(stub_code_length) VtableStub(false, itable_index); + // Can be NULL if there is no free space in the code cache. + if (s == NULL) { + return NULL; + } + + // Count unused bytes in instruction sequences of variable size. + // We add them to the computed buffer size in order to avoid + // overflow in subsequently generated stubs. + address start_pc; + int slop_bytes = 0; + int slop_delta = 0; + const int index_dependent_slop = (itable_index == 0) ? 4 : // code size change with transition from 8-bit to 32-bit constant (@index == 16). + (itable_index < 16) ? 3 : 0; // index == 0 generates even shorter code. + + ResourceMark rm; + CodeBuffer cb(s->entry_point(), stub_code_length); + MacroAssembler *masm = new MacroAssembler(&cb); + +#if (!defined(PRODUCT) && defined(COMPILER2)) + if (CountCompiledCalls) { + __ incrementl(ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr())); + } +#endif // PRODUCT + + // Entry arguments: + // rax: CompiledICHolder + // j_rarg0: Receiver + + // Most registers are in use; we'll use rax, rbx, r10, r11 + // (various calling sequences use r[cd]x, r[sd]i, r[89]; stay away from them) + const Register recv_klass_reg = r10; + const Register holder_klass_reg = rax; // declaring interface klass (DECC) + const Register resolved_klass_reg = rbx; // resolved interface klass (REFC) + const Register temp_reg = r11; + + const Register icholder_reg = rax; + __ ldptr(resolved_klass_reg, Address(icholder_reg, CompiledICHolder::holder_klass_offset())); + __ ldptr(holder_klass_reg, Address(icholder_reg, CompiledICHolder::holder_metadata_offset())); + + Label L_no_such_interface; + + // get receiver klass (also an implicit null-check) + assert(VtableStub::receiver_location() == j_rarg0->as_VMReg(), "receiver expected in j_rarg0"); + address npe_addr = __ pc(); + __ load_klass(recv_klass_reg, j_rarg0); + + start_pc = __ pc(); + + // Receiver subtype check against REFC. + // Destroys recv_klass_reg value. + __ lookup_interface_method(// inputs: rec. class, interface + recv_klass_reg, resolved_klass_reg, noreg, + // outputs: scan temp. reg1, scan temp. reg2 + recv_klass_reg, temp_reg, + L_no_such_interface, + /*return_method=*/false); + + const ptrdiff_t typecheckSize = __ pc() - start_pc; + start_pc = __ pc(); + + // Get selected method from declaring class and itable index + const Register method = rbx; + __ load_klass(recv_klass_reg, j_rarg0); // restore recv_klass_reg + __ lookup_interface_method(// inputs: rec. class, interface, itable index + recv_klass_reg, holder_klass_reg, itable_index, + // outputs: method, scan temp. reg + method, temp_reg, + L_no_such_interface); + + const ptrdiff_t lookupSize = __ pc() - start_pc; + + // We expect we need index_dependent_slop extra bytes. Reason: + // The emitted code in lookup_interface_method changes when itable_index exceeds 15. + // For linux, a very narrow estimate would be 112, but Solaris requires some more space (130). + const ptrdiff_t estimate = 144; + const ptrdiff_t codesize = typecheckSize + lookupSize + index_dependent_slop; + slop_delta = (int)(estimate - codesize); + slop_bytes += slop_delta; + assert(slop_delta >= 0, "itable #%d: Code size estimate (%d) for lookup_interface_method too small, required: %d", itable_index, (int)estimate, (int)codesize); + + // If we take a trap while this arg is on the stack we will not + // be able to walk the stack properly. This is not an issue except + // when there are mistakes in this assembly code that could generate + // a spurious fault. Ask me how I know... + + // method (rbx): Method* + // j_rarg0: receiver + +#ifdef ASSERT + if (DebugVtables) { + Label L2; + __ cmpptr(method, R0); + __ jcc(Assembler::equal, L2); + __ cmpptr(Address(method, Method::from_compiled_offset()), R0); + __ jcc(Assembler::notZero, L2); + __ stop("compiler entrypoint is null"); + __ bind(L2); + } +#endif // ASSERT + + address ame_addr = __ pc(); + __ jmp(Address(method, Method::from_compiled_offset())); + + __ bind(L_no_such_interface); + // Handle IncompatibleClassChangeError in itable stubs. + // More detailed error message. + // We force resolving of the call site by jumping to the "handle + // wrong method" stub, and so let the interpreter runtime do all the + // dirty work. + __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); + + masm->flush(); + slop_bytes += index_dependent_slop; // add'l slop for size variance due to large itable offsets + bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, index_dependent_slop); + + return s; +} + +int VtableStub::pd_code_alignment() { + // cache line size is 64 bytes, but we want to limit alignment loss. + const unsigned int icache_line_size = wordSize; + return icache_line_size; +} diff --git a/src/hotspot/os/linux/os_linux.cpp b/src/hotspot/os/linux/os_linux.cpp index 7c951cee51c..9c1b4fbafe8 100644 --- a/src/hotspot/os/linux/os_linux.cpp +++ b/src/hotspot/os/linux/os_linux.cpp @@ -1048,6 +1048,12 @@ void os::pd_start_thread(Thread* thread) { Monitor* sync_with_child = osthread->startThread_lock(); MutexLocker ml(sync_with_child, Mutex::_no_safepoint_check_flag); sync_with_child->notify(); +//#if defined(SW64) +// // To be accessed in NativeGeneralJump::patch_verified_entry() +// if (thread->is_Java_thread()) { +// ((JavaThread*)thread)->set_handle_wrong_method_stub(SharedRuntime::get_handle_wrong_method_stub()); +// } +//#endif } // Free Linux resources related to the OSThread @@ -1741,6 +1747,11 @@ void * os::dll_load(const char *filename, char *ebuf, int ebuflen) { #else {EM_PPC64, EM_PPC64, ELFCLASS64, ELFDATA2MSB, (char*)"Power PC 64"}, {EM_SH, EM_SH, ELFCLASS32, ELFDATA2MSB, (char*)"SuperH BE"}, +#endif +#if defined(__sw_64) + {EM_SW_64, EM_SW_64, ELFCLASS64, ELFDATA2LSB, (char*)"Sw64"}, +#else + {EM_ALPHA, EM_ALPHA, ELFCLASS64, ELFDATA2LSB, (char*)"Alpha"}, #endif {EM_ARM, EM_ARM, ELFCLASS32, ELFDATA2LSB, (char*)"ARM"}, // we only support 64 bit z architecture @@ -1775,6 +1786,9 @@ void * os::dll_load(const char *filename, char *ebuf, int ebuflen) { static Elf32_Half running_arch_code=EM_ARM; #elif (defined S390) static Elf32_Half running_arch_code=EM_S390; +#elif (defined __sw_64) && (defined SW64) + static Elf32_Half running_arch_code=EM_SW_64; +#elif (defined __alpha) && (defined SW64) #elif (defined ALPHA) static Elf32_Half running_arch_code=EM_ALPHA; #elif (defined MIPSEL) @@ -2851,10 +2865,13 @@ void os::pd_commit_memory_or_exit(char* addr, size_t size, bool exec, } // Define MAP_HUGETLB here so we can build HotSpot on old systems. +#ifdef SW64 //ZHJ20170828 +#define MAP_HUGETLB 0x100000 +#else #ifndef MAP_HUGETLB - #define MAP_HUGETLB 0x40000 +#define MAP_HUGETLB 0x40000 +#endif #endif - // If mmap flags are set with MAP_HUGETLB and the system supports multiple // huge page sizes, flag bits [26:31] can be used to encode the log2 of the // desired huge page size. Otherwise, the system's default huge page size will be used. @@ -2871,6 +2888,22 @@ void os::pd_commit_memory_or_exit(char* addr, size_t size, bool exec, int os::Linux::commit_memory_impl(char* addr, size_t size, size_t alignment_hint, bool exec) { +#ifdef SW64 + if (UseHugeTLBFS && alignment_hint > (size_t)vm_page_size()) { + int prot = exec ? PROT_READ|PROT_WRITE|PROT_EXEC : PROT_READ|PROT_WRITE; + uintptr_t res = + (uintptr_t) ::mmap(addr, size, prot, + MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS|MAP_HUGETLB, + -1, 0); + if (res != (uintptr_t) MAP_FAILED) { + if (UseNUMAInterleaving) { + numa_make_global(addr, size); + } + return 0; + } + // Fall through and try to use small pages + } +#endif int err = os::Linux::commit_memory_impl(addr, size, exec); if (err == 0) { realign_memory(addr, size, alignment_hint); diff --git a/src/hotspot/os/posix/os_posix.cpp b/src/hotspot/os/posix/os_posix.cpp index 4307a189edf..6b42aa6d5e6 100644 --- a/src/hotspot/os/posix/os_posix.cpp +++ b/src/hotspot/os/posix/os_posix.cpp @@ -1090,7 +1090,7 @@ size_t os::Posix::get_initial_stack_size(ThreadType thr_type, size_t req_stack_s } #ifndef ZERO -#ifndef ARM +#if !defined(ARM) && !defined(SW64) static bool get_frame_at_stack_banging_point(JavaThread* thread, address pc, const void* ucVoid, frame* fr) { if (Interpreter::contains(pc)) { // interpreter performs stack banging after the fixed frame header has @@ -1126,7 +1126,7 @@ static bool get_frame_at_stack_banging_point(JavaThread* thread, address pc, con assert(fr->is_java_frame(), "Safety check"); return true; } -#endif // ARM +#endif // ARM or SW64 // This return true if the signal handler should just continue, ie. return after calling this bool os::Posix::handle_stack_overflow(JavaThread* thread, address addr, address pc, @@ -1135,8 +1135,8 @@ bool os::Posix::handle_stack_overflow(JavaThread* thread, address addr, address StackOverflow* overflow_state = thread->stack_overflow_state(); if (overflow_state->in_stack_yellow_reserved_zone(addr)) { if (thread->thread_state() == _thread_in_Java) { -#ifndef ARM - // arm32 doesn't have this +#if !defined(ARM) && !defined(SW64) + // arm32 or SW doesn't have this if (overflow_state->in_stack_reserved_zone(addr)) { frame fr; if (get_frame_at_stack_banging_point(thread, pc, ucVoid, &fr)) { diff --git a/src/hotspot/os_cpu/linux_sw64/assembler_linux_sw64.cpp b/src/hotspot/os_cpu/linux_sw64/assembler_linux_sw64.cpp new file mode 100644 index 00000000000..920d94da7f1 --- /dev/null +++ b/src/hotspot/os_cpu/linux_sw64/assembler_linux_sw64.cpp @@ -0,0 +1,28 @@ +/* + * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +// nothing required here + + diff --git a/src/hotspot/os_cpu/linux_sw64/atomic_linux_sw64.hpp b/src/hotspot/os_cpu/linux_sw64/atomic_linux_sw64.hpp new file mode 100644 index 00000000000..cb01a8149b9 --- /dev/null +++ b/src/hotspot/os_cpu/linux_sw64/atomic_linux_sw64.hpp @@ -0,0 +1,280 @@ +/* + * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_SW64_VM_ATOMIC_LINUX_SW64_HPP +#define OS_CPU_LINUX_SW64_VM_ATOMIC_LINUX_SW64_HPP + +#include "runtime/os.hpp" +#include "runtime/vm_version.hpp" + +// Implementation of class atomic + +template +struct Atomic::PlatformAdd { + template + D fetch_and_add(D volatile* dest, I add_value, atomic_memory_order order) const; + + template + D add_and_fetch(D volatile* dest, I add_value, atomic_memory_order order) const { + return fetch_and_add(dest, add_value, order) + add_value; + } +}; + +template<> +template +inline D Atomic::PlatformAdd<4>::fetch_and_add(D volatile* dest, I add_value, + atomic_memory_order order) const { + STATIC_ASSERT(4 == sizeof(I)); + STATIC_ASSERT(4 == sizeof(D)); + D __ret; + I __tmp; + D* __addr; + __asm__ __volatile__ ( + "1: ldi %[__addr],%[__dest]\n\t" + " lldw %[__ret],0(%[__addr])\n\t" + " ldi %[__tmp],1\n\t" + " wr_f %[__tmp]\n\t" + " addw %[__ret],%[__val],%[__tmp]\n\t" + " .align 3\n\t" + " lstw %[__tmp],0(%[__addr])\n\t" + " rd_f %[__tmp]\n\t" + " beq %[__tmp],1b\n\t" + " zapnot %[__ret],0xf,%[__ret]\n\t" + " \n\t" + : [__ret]"=&r" (__ret), [__addr]"=&r"(__addr), [__tmp]"=&r"(__tmp) + : [__dest] "m" (*(volatile jint*)dest), [__val] "Ir" (add_value) + : "memory" ); + + return __ret; +} + +template<> +template +inline T Atomic::PlatformXchg<4>::operator()(T volatile* dest, + T exchange_value, + atomic_memory_order order) const { + STATIC_ASSERT(4 == sizeof(T)); + + //warning("Atomic::PlatformXchg<4>"); + T __ret, __tmp; + T* __addr; + __asm__ __volatile__ ( + "1: ldi %[__addr],%[__dest]\n\t" + " lldw %[__ret],0(%[__addr])\n\t" + " ldi %[__tmp],1\n\t" + " wr_f %[__tmp]\n\t" + " mov %[__val],%[__tmp]\n\t" + " .align 3\n\t" + " lstw %[__tmp],0(%[__addr])\n\t" + " rd_f %[__tmp]\n\t" + " beq %[__tmp],1b\n\t" + " zapnot %[__ret],0xf,%[__ret]\n\t" + " \n\t" + : [__ret]"=&r" (__ret), [__addr]"=&r"(__addr), [__tmp]"=&r"(__tmp) + : [__dest] "m" (*(T volatile *)dest), [__val] "Ir"(exchange_value) /* _val can not be constant in stl */ + : "memory" ); + return __ret; +} + + +// No direct support for cmpxchg of bytes; emulate using int. +template<> +struct Atomic::PlatformCmpxchg<1> : Atomic::CmpxchgByteUsingInt {}; + +/*template<> +template +inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value, + T volatile* dest, + T compare_value, + atomic_memory_order ) const { + STATIC_ASSERT(1 == sizeof(T)); + T __prev, __cmp; + T __tmp; + T* __addr; + __asm__ __volatile__ ( + "1: ldi %[__addr],%[__dest]\n\t" + " lldw %[__prev],0(%[__addr])\n\t" + " zap %[__prev], 0x1, %[__tmp]\n\t" + " bis %[__val], %[__tmp], %[__val]\n\t" + " mov %[__old],%[__tmp]\n\t" + " zapnot %[__prev], 0x1, %[__prev]\n\t" + " cmpeq %[__prev],%[__tmp],%[__cmp]\n\t" + " wr_f %[__cmp]\n\t" + " mov %[__val],%[__tmp]\n\t" + " .align 3\n\t" + " lstw %[__tmp],0(%[__addr])\n\t" + " rd_f %[__tmp]\n\t" + " beq %[__cmp],2f\n\t" + " beq %[__tmp],1b\n\t" + "2: \n\t" + " zapnot %[__prev],0xf,%[__prev]\n\t" + : [__prev]"=&r" (__prev), [__addr]"=&r" (__addr), [__cmp] "=&r" (__cmp), [__tmp] "=&r" (__tmp) + : [__dest] "m" (*(T volatile *)dest), [__old]"Ir" (compare_value), [__val]"Ir" (exchange_value) + : "memory" ); + + return __prev; +}*/ + +template<> +template +inline T Atomic::PlatformCmpxchg<4>::operator()(T volatile* dest, + T compare_value, + T exchange_value, + atomic_memory_order /* order */) const { + STATIC_ASSERT(4 == sizeof(T)); + + //warning("Atomic::PlatformCmpxchg<4_1> exchange_value=%d dest=%d compare_value=%d\n", exchange_value, *dest, compare_value); + T __prev, __cmp; + T __tmp; + T* __addr; + __asm__ __volatile__ ( + "1: ldi %[__addr],%[__dest]\n\t" + " lldw %[__prev],0(%[__addr])\n\t" + " mov %[__old],%[__tmp]\n\t" + " addw %[__tmp], 0x0, %[__tmp]\n\t" + " cmpeq %[__prev],%[__tmp],%[__cmp]\n\t" + " wr_f %[__cmp]\n\t" + " mov %[__val],%[__tmp]\n\t" + " .align 3\n\t" + " lstw %[__tmp],0(%[__addr])\n\t" + " rd_f %[__tmp]\n\t" + " beq %[__cmp],2f\n\t" + " beq %[__tmp],1b\n\t" + "2: \n\t" + " zapnot %[__prev],0xf,%[__prev]\n\t" + : [__prev]"=&r" (__prev), [__addr]"=&r" (__addr), [__cmp] "=&r" (__cmp), [__tmp] "=&r" (__tmp) + : [__dest] "m" (*(T volatile *)dest), [__old]"Ir" (compare_value), [__val]"Ir" (exchange_value) /* _val can not be constant in stl */ + : "memory" ); + //warning("Atomic::PlatformCmpxchg<4_2> exchange_value=%d dest=%d compare_value=%d\n", exchange_value, *dest, compare_value); + return __prev; +} + + +template<> +template +inline D Atomic::PlatformAdd<8>::fetch_and_add(D volatile* dest, I add_value, + atomic_memory_order order) const { + STATIC_ASSERT(8 == sizeof(I)); + STATIC_ASSERT(8 == sizeof(D)); + //warning("Atomic::PlatformAdd<8>::fetch_and_add"); + D __ret; + I __tmp; + D* __addr; + __asm__ __volatile__ ( + "1: ldi %[__addr],%[__dest]\n\t" + " lldl %[__ret],0(%[__addr])\n\t" + " ldi %[__tmp],1\n\t" + " wr_f %[__tmp]\n\t" + " addl %[__ret],%[__val],%[__tmp]\n\t" + " .align 3\n\t" + " lstl %[__tmp],0(%[__addr])\n\t" + " rd_f %[__tmp]\n\t" + " beq %[__tmp],1b\n\t" + " \n\t" + : [__ret]"=&r" (__ret), [__addr]"=&r"(__addr), [__tmp]"=&r"(__tmp) + : [__dest] "m" (*(D volatile *)dest), [__val] "Ir"(add_value) + : "memory" ); + + return __ret; +} + +template<> +template +inline T Atomic::PlatformXchg<8>::operator()(T volatile* dest, T exchange_value, + atomic_memory_order order) const { + STATIC_ASSERT(8 == sizeof(T)); + + //warning("Atomic::PlatformXchg<8>"); + T __ret, __tmp; + T __addr; + __asm__ __volatile__ ( + "1: ldi %[__addr],%[__dest]\n\t" + " lldl %[__ret],0(%[__addr])\n\t" + " ldi %[__tmp],1\n\t" + " wr_f %[__tmp]\n\t" + " mov %[__val],%[__tmp]\n\t" + " .align 3\n\t" + " lstl %[__tmp],0(%[__addr])\n\t" + " rd_f %[__tmp]\n\t" + " beq %[__tmp],1b\n\t" + " \n\t" + : [__ret]"=&r" (__ret), [__addr]"=&r"(__addr), [__tmp]"=&r"(__tmp) + : [__dest] "m" (*(T volatile *)dest), [__val] "Ir"(exchange_value) /* _val can not be constant in stl */ + : "memory" ); + + return __ret; +} + +template<> +template +inline T Atomic::PlatformCmpxchg<8>::operator()(T volatile* dest, + T compare_value, + T exchange_value, + atomic_memory_order /* order */) const { + STATIC_ASSERT(8 == sizeof(T)); + //warning("Atomic::PlatformCmpxchg<8>"); + T __prev, __cmp; + T __tmp, __addr; + + __asm__ __volatile__ ( + "1: ldi %[__addr],%[__dest]\n\t" + " lldl %[__prev],0(%[__addr])\n\t" + " cmpeq %[__prev],%[__old],%[__cmp]\n\t" + " wr_f %[__cmp]\n\t" + " mov %[__val],%[__tmp]\n\t" + " .align 3\n\t" + " lstl %[__tmp],0(%[__addr])\n\t" + " rd_f %[__tmp]\n\t" + " beq %[__cmp],2f\n\t" + " beq %[__tmp],1b\n\t" + "2: \n\t" + : [__prev]"=&r" (__prev), [__addr]"=&r" (__addr), [__cmp] "=&r" (__cmp), [__tmp] "=&r" (__tmp) + : [__dest] "m" (*(T volatile *)dest), [__old]"Ir" (compare_value), [__val]"Ir" (exchange_value) /* _val can not be constant in stl */ + : "memory" ); + + return __prev; +} + +template +struct Atomic::PlatformOrderedLoad +{ + template + T operator()(const volatile T* p) const { T data; __atomic_load(const_cast(p), &data, __ATOMIC_ACQUIRE); return data; } +}; + +template +struct Atomic::PlatformOrderedStore +{ + template + void operator()(volatile T* p, T v) const { __atomic_store(const_cast(p), &v, __ATOMIC_RELEASE); } +}; + +template +struct Atomic::PlatformOrderedStore +{ + template + void operator()(volatile T* p, T v) const { release_store(p, v); OrderAccess::fence(); } +}; +#endif // OS_CPU_LINUX_SW64_VM_ATOMIC_LINUX_SW64_HPP diff --git a/src/hotspot/os_cpu/linux_sw64/bytes_linux_sw64.hpp b/src/hotspot/os_cpu/linux_sw64/bytes_linux_sw64.hpp new file mode 100644 index 00000000000..1ada4cec3e0 --- /dev/null +++ b/src/hotspot/os_cpu/linux_sw64/bytes_linux_sw64.hpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_SW64_VM_BYTES_LINUX_SW64_HPP +#define OS_CPU_LINUX_SW64_VM_BYTES_LINUX_SW64_HPP + +#include + +// Efficient swapping of data bytes from Java byte +// ordering to native byte ordering and vice versa. +inline u2 Bytes::swap_u2(u2 x) { return bswap_16(x); } +inline u4 Bytes::swap_u4(u4 x) { return bswap_32(x); } +inline u8 Bytes::swap_u8(u8 x) { return bswap_64(x); } + +#endif // OS_CPU_LINUX_SW64_VM_BYTES_LINUX_SW64_HPP diff --git a/src/hotspot/os_cpu/linux_sw64/copy_linux_sw64.hpp b/src/hotspot/os_cpu/linux_sw64/copy_linux_sw64.hpp new file mode 100644 index 00000000000..546b44c6d34 --- /dev/null +++ b/src/hotspot/os_cpu/linux_sw64/copy_linux_sw64.hpp @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_SW64_VM_COPY_LINUX_SW64_HPP +#define OS_CPU_LINUX_SW64_VM_COPY_LINUX_SW64_HPP + +static void pd_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) { + (void)memmove(to, from, count * HeapWordSize); +} + +static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) { + switch (count) { + case 8: to[7] = from[7]; + case 7: to[6] = from[6]; + case 6: to[5] = from[5]; + case 5: to[4] = from[4]; + case 4: to[3] = from[3]; + case 3: to[2] = from[2]; + case 2: to[1] = from[1]; + case 1: to[0] = from[0]; + case 0: break; + default: + (void)memcpy(to, from, count * HeapWordSize); + break; + } +} + +static void pd_disjoint_words_atomic(const HeapWord* from, HeapWord* to, size_t count) { + switch (count) { + case 8: to[7] = from[7]; + case 7: to[6] = from[6]; + case 6: to[5] = from[5]; + case 5: to[4] = from[4]; + case 4: to[3] = from[3]; + case 3: to[2] = from[2]; + case 2: to[1] = from[1]; + case 1: to[0] = from[0]; + case 0: break; + default: + while (count-- > 0) { + *to++ = *from++; + } + break; + } +} + +static void pd_aligned_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) { + pd_conjoint_words(from, to, count); +} + +static void pd_aligned_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) { + pd_disjoint_words(from, to, count); +} + +static void pd_conjoint_bytes(const void* from, void* to, size_t count) { + (void)memmove(to, from, count); +} + +static void pd_conjoint_bytes_atomic(const void* from, void* to, size_t count) { + pd_conjoint_bytes(from, to, count); +} + +template +static void copy_conjoint_atomic(const T* from, T* to, size_t count) { + if (from > to) { + while (count-- > 0) { + // Copy forwards + *to++ = *from++; + } + } else { + from += count - 1; + to += count - 1; + while (count-- > 0) { + // Copy backwards + *to-- = *from--; + } + } +} + +static void pd_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) { + copy_conjoint_atomic(from, to, count); +} + +static void pd_conjoint_jints_atomic(const jint* from, jint* to, size_t count) { + copy_conjoint_atomic(from, to, count); +} + +static void pd_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) { + copy_conjoint_atomic(from, to, count); +} + +static void pd_conjoint_oops_atomic(const oop* from, oop* to, size_t count) { + //assert(!UseCompressedOops, "foo!"); + assert(HeapWordSize == BytesPerOop, "heapwords and oops must be the same size"); + copy_conjoint_atomic(from, to, count); +} + +static void pd_arrayof_conjoint_bytes(const HeapWord* from, HeapWord* to, size_t count) { + pd_conjoint_bytes_atomic(from, to, count); +} + +static void pd_arrayof_conjoint_jshorts(const HeapWord* from, HeapWord* to, size_t count) { + pd_conjoint_jshorts_atomic((jshort*)from, (jshort*)to, count); +} + +static void pd_arrayof_conjoint_jints(const HeapWord* from, HeapWord* to, size_t count) { + pd_conjoint_jints_atomic((jint*)from, (jint*)to, count); +} + +static void pd_arrayof_conjoint_jlongs(const HeapWord* from, HeapWord* to, size_t count) { + pd_conjoint_jlongs_atomic((jlong*)from, (jlong*)to, count); +} + +static void pd_arrayof_conjoint_oops(const HeapWord* from, HeapWord* to, size_t count) { + //assert(!UseCompressedOops, "foo!"); + assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size"); + pd_conjoint_oops_atomic((oop*)from, (oop*)to, count); +} + +#endif // OS_CPU_LINUX_SW64_VM_COPY_LINUX_SW64_HPP diff --git a/src/hotspot/os_cpu/linux_sw64/globals_linux_sw64.hpp b/src/hotspot/os_cpu/linux_sw64/globals_linux_sw64.hpp new file mode 100644 index 00000000000..6d834b71697 --- /dev/null +++ b/src/hotspot/os_cpu/linux_sw64/globals_linux_sw64.hpp @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_SW64_VM_GLOBALS_LINUX_SW64_HPP +#define OS_CPU_LINUX_SW64_VM_GLOBALS_LINUX_SW64_HPP + +// Sets the default values for platform dependent flags used by the runtime system. +// (see globals.hpp) + +define_pd_global(bool, DontYieldALot, false); +define_pd_global(intx, ThreadStackSize, 1024); // 0 => use system default +define_pd_global(intx, VMThreadStackSize, 1024); + +define_pd_global(intx, CompilerThreadStackSize, 0); + +define_pd_global(uintx,JVMInvokeMethodSlack, 8192); + +// Used on 64 bit platforms for UseCompressedOops base address +define_pd_global(uintx,HeapBaseMinAddress, 2*G); + +#endif // OS_CPU_LINUX_SW64_VM_GLOBALS_LINUX_SW64_HPP diff --git a/src/hotspot/os_cpu/linux_sw64/linux_sw64.ad b/src/hotspot/os_cpu/linux_sw64/linux_sw64.ad new file mode 100644 index 00000000000..c3b8cd2c456 --- /dev/null +++ b/src/hotspot/os_cpu/linux_sw64/linux_sw64.ad @@ -0,0 +1,69 @@ +// +// Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. +// Copyright (c) 2014, Red Hat Inc. All rights reserved. +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +// +// This code is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License version 2 only, as +// published by the Free Software Foundation. +// +// This code is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// version 2 for more details (a copy is included in the LICENSE file that +// accompanied this code). +// +// You should have received a copy of the GNU General Public License version +// 2 along with this work; if not, write to the Free Software Foundation, +// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +// or visit www.oracle.com if you need additional information or have any +// questions. +// +// + +// AArch64 Linux Architecture Description File + +//----------OS-DEPENDENT ENCODING BLOCK---------------------------------------- +// This block specifies the encoding classes used by the compiler to +// output byte streams. Encoding classes generate functions which are +// called by Machine Instruction Nodes in order to generate the bit +// encoding of the instruction. Operands specify their base encoding +// interface with the interface keyword. There are currently +// supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, & +// COND_INTER. REG_INTER causes an operand to generate a function +// which returns its register number when queried. CONST_INTER causes +// an operand to generate a function which returns the value of the +// constant when queried. MEMORY_INTER causes an operand to generate +// four functions which return the Base Register, the Index Register, +// the Scale Value, and the Offset Value of the operand when queried. +// COND_INTER causes an operand to generate six functions which return +// the encoding code (ie - encoding bits for the instruction) +// associated with each basic boolean condition for a conditional +// instruction. Instructions specify two basic values for encoding. +// They use the ins_encode keyword to specify their encoding class +// (which must be one of the class names specified in the encoding +// block), and they use the opcode keyword to specify, in order, their +// primary, secondary, and tertiary opcode. Only the opcode sections +// which a particular instruction needs for encoding need to be +// specified. +encode %{ + // Build emit functions for each basic byte or larger field in the intel + // encoding scheme (opcode, rm, sib, immediate), and call them from C++ + // code in the enc_class source block. Emit functions will live in the + // main source block for now. In future, we can generalize this by + // adding a syntax that specifies the sizes of fields in an order, + // so that the adlc can build the emit functions automagically + + enc_class Java_To_Runtime(method meth) %{ + %} + +%} + + +// Platform dependent source + +source %{ + +%} diff --git a/src/hotspot/os_cpu/linux_sw64/linux_sw64.s b/src/hotspot/os_cpu/linux_sw64/linux_sw64.s new file mode 100644 index 00000000000..dd28925d197 --- /dev/null +++ b/src/hotspot/os_cpu/linux_sw64/linux_sw64.s @@ -0,0 +1,380 @@ +# +# Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved. +# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +# +# This code is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License version 2 only, as +# published by the Free Software Foundation. +# +# This code is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# version 2 for more details (a copy is included in the LICENSE file that +# accompanied this code). +# +# You should have received a copy of the GNU General Public License version +# 2 along with this work; if not, write to the Free Software Foundation, +# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +# +# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +# or visit www.oracle.com if you need additional information or have any +# questions. +# + + +# # NOTE WELL! The _Copy functions are called directly + # from server-compiler-generated code via CallLeafNoFP, + # which means that they *must* either not use floating + # point or use it in the same manner as does the server + # compiler. + +# .globl _Copy_arrayof_conjoint_bytes + .globl _Copy_arrayof_conjoint_jshorts +# .globl _Copy_conjoint_jshorts_atomic +# .globl _Copy_arrayof_conjoint_jints +# .globl _Copy_conjoint_jints_atomic +# .globl _Copy_arrayof_conjoint_jlongs +# .globl _Copy_conjoint_jlongs_atomic + + .text + +# .globl SpinPause +# .align 16 +# .type SpinPause,@function +SpinPause: +# rep +# nop +# movq $1, %rax +# ret + +# # Support for void Copy::arrayof_conjoint_bytes(void* from, +# # void* to, +# # size_t count) +# # rdi - from +# # rsi - to +# # rdx - count, treated as ssize_t +# # +# .p2align 4,,15 + .type _Copy_arrayof_conjoint_bytes,@function +_Copy_arrayof_conjoint_bytes: +# movq %rdx,%r8 # byte count +# shrq $3,%rdx # qword count +# cmpq %rdi,%rsi +# leaq -1(%rdi,%r8,1),%rax # from + bcount*1 - 1 +# jbe acb_CopyRight +# cmpq %rax,%rsi +# jbe acb_CopyLeft +acb_CopyRight: +# leaq -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8 +# leaq -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8 +# negq %rdx +# jmp 7f +# .p2align 4,,15 +#1: movq 8(%rax,%rdx,8),%rsi +# movq %rsi,8(%rcx,%rdx,8) +# addq $1,%rdx +# jnz 1b +#2: testq $4,%r8 # check for trailing dword +# jz 3f +# movl 8(%rax),%esi # copy trailing dword +# movl %esi,8(%rcx) +# addq $4,%rax +# addq $4,%rcx # original %rsi is trashed, so we +# # can't use it as a base register +#3: testq $2,%r8 # check for trailing word +# jz 4f +# movw 8(%rax),%si # copy trailing word +# movw %si,8(%rcx) +# addq $2,%rcx +#4: testq $1,%r8 # check for trailing byte +# jz 5f +# movb -1(%rdi,%r8,1),%al # copy trailing byte +# movb %al,8(%rcx) +#5: ret +# .p2align 4,,15 +#6: movq -24(%rax,%rdx,8),%rsi +# movq %rsi,-24(%rcx,%rdx,8) +# movq -16(%rax,%rdx,8),%rsi +# movq %rsi,-16(%rcx,%rdx,8) +# movq -8(%rax,%rdx,8),%rsi +# movq %rsi,-8(%rcx,%rdx,8) +# movq (%rax,%rdx,8),%rsi +# movq %rsi,(%rcx,%rdx,8) +#7: addq $4,%rdx +# jle 6b +# subq $4,%rdx +# jl 1b +# jmp 2b +acb_CopyLeft: +# testq $1,%r8 # check for trailing byte +# jz 1f +# movb -1(%rdi,%r8,1),%cl # copy trailing byte +# movb %cl,-1(%rsi,%r8,1) +# subq $1,%r8 # adjust for possible trailing word +#1: testq $2,%r8 # check for trailing word +# jz 2f +# movw -2(%rdi,%r8,1),%cx # copy trailing word +# movw %cx,-2(%rsi,%r8,1) +#2: testq $4,%r8 # check for trailing dword +# jz 5f +# movl (%rdi,%rdx,8),%ecx # copy trailing dword +# movl %ecx,(%rsi,%rdx,8) +# jmp 5f +# .p2align 4,,15 +#3: movq -8(%rdi,%rdx,8),%rcx +# movq %rcx,-8(%rsi,%rdx,8) +# subq $1,%rdx +# jnz 3b +# ret +# .p2align 4,,15 +#4: movq 24(%rdi,%rdx,8),%rcx +# movq %rcx,24(%rsi,%rdx,8) +# movq 16(%rdi,%rdx,8),%rcx +# movq %rcx,16(%rsi,%rdx,8) +# movq 8(%rdi,%rdx,8),%rcx +# movq %rcx,8(%rsi,%rdx,8) +# movq (%rdi,%rdx,8),%rcx +# movq %rcx,(%rsi,%rdx,8) +#5: subq $4,%rdx +# jge 4b +# addq $4,%rdx +# jg 3b +# ret + +# # Support for void Copy::arrayof_conjoint_jshorts(void* from, +# # void* to, +# # size_t count) +# # Equivalent to +# # conjoint_jshorts_atomic +# # +# # If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we +# # let the hardware handle it. The tow or four words within dwords +# # or qwords that span cache line boundaries will still be loaded +# # and stored atomically. +# # +# # rdi - from +# # rsi - to +# # rdx - count, treated as ssize_t +# # +# .p2align 4,,15 + .type _Copy_arrayof_conjoint_jshorts,@function + .type _Copy_conjoint_jshorts_atomic,@function +_Copy_arrayof_conjoint_jshorts: +_Copy_conjoint_jshorts_atomic: +# movq %rdx,%r8 # word count +# shrq $2,%rdx # qword count +# cmpq %rdi,%rsi +# leaq -2(%rdi,%r8,2),%rax # from + wcount*2 - 2 +# jbe acs_CopyRight +# cmpq %rax,%rsi +# jbe acs_CopyLeft +acs_CopyRight: +# leaq -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8 +# leaq -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8 +# negq %rdx +# jmp 6f +#1: movq 8(%rax,%rdx,8),%rsi +# movq %rsi,8(%rcx,%rdx,8) +# addq $1,%rdx +# jnz 1b +#2: testq $2,%r8 # check for trailing dword +# jz 3f +# movl 8(%rax),%esi # copy trailing dword +# movl %esi,8(%rcx) +# addq $4,%rcx # original %rsi is trashed, so we +# # can't use it as a base register +#3: testq $1,%r8 # check for trailing word +# jz 4f +# movw -2(%rdi,%r8,2),%si # copy trailing word +# movw %si,8(%rcx) +#4: ret +# .p2align 4,,15 +#5: movq -24(%rax,%rdx,8),%rsi +# movq %rsi,-24(%rcx,%rdx,8) +# movq -16(%rax,%rdx,8),%rsi +# movq %rsi,-16(%rcx,%rdx,8) +# movq -8(%rax,%rdx,8),%rsi +# movq %rsi,-8(%rcx,%rdx,8) +# movq (%rax,%rdx,8),%rsi +# movq %rsi,(%rcx,%rdx,8) +#6: addq $4,%rdx +# jle 5b +# subq $4,%rdx +# jl 1b +# jmp 2b +acs_CopyLeft: +# testq $1,%r8 # check for trailing word +# jz 1f +# movw -2(%rdi,%r8,2),%cx # copy trailing word +# movw %cx,-2(%rsi,%r8,2) +#1: testq $2,%r8 # check for trailing dword +# jz 4f +# movl (%rdi,%rdx,8),%ecx # copy trailing dword +# movl %ecx,(%rsi,%rdx,8) +# jmp 4f +#2: movq -8(%rdi,%rdx,8),%rcx +# movq %rcx,-8(%rsi,%rdx,8) +# subq $1,%rdx +# jnz 2b +# ret +# .p2align 4,,15 +#3: movq 24(%rdi,%rdx,8),%rcx +# movq %rcx,24(%rsi,%rdx,8) +# movq 16(%rdi,%rdx,8),%rcx +# movq %rcx,16(%rsi,%rdx,8) +# movq 8(%rdi,%rdx,8),%rcx +# movq %rcx,8(%rsi,%rdx,8) +# movq (%rdi,%rdx,8),%rcx +# movq %rcx,(%rsi,%rdx,8) +#4: subq $4,%rdx +# jge 3b +# addq $4,%rdx +# jg 2b +# ret + +# # Support for void Copy::arrayof_conjoint_jints(jint* from, +# # jint* to, +# # size_t count) +# # Equivalent to +# # conjoint_jints_atomic +# # +# # If 'from' and/or 'to' are aligned on 4-byte boundaries, we let +# # the hardware handle it. The two dwords within qwords that span +# # cache line boundaries will still be loaded and stored atomically. +# # +# # rdi - from +# # rsi - to +# # rdx - count, treated as ssize_t +# # +# .p2align 4,,15 + .type _Copy_arrayof_conjoint_jints,@function + .type _Copy_conjoint_jints_atomic,@function +_Copy_arrayof_conjoint_jints: +_Copy_conjoint_jints_atomic: +# movq %rdx,%r8 # dword count +# shrq %rdx # qword count +# cmpq %rdi,%rsi +# leaq -4(%rdi,%r8,4),%rax # from + dcount*4 - 4 +# jbe aci_CopyRight +# cmpq %rax,%rsi +# jbe aci_CopyLeft +aci_CopyRight: +# leaq -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8 +# leaq -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8 +# negq %rdx +# jmp 5f +# .p2align 4,,15 +#1: movq 8(%rax,%rdx,8),%rsi +# movq %rsi,8(%rcx,%rdx,8) +# addq $1,%rdx +# jnz 1b +#2: testq $1,%r8 # check for trailing dword +# jz 3f +# movl 8(%rax),%esi # copy trailing dword +# movl %esi,8(%rcx) +#3: ret +# .p2align 4,,15 +#4: movq -24(%rax,%rdx,8),%rsi +# movq %rsi,-24(%rcx,%rdx,8) +# movq -16(%rax,%rdx,8),%rsi +# movq %rsi,-16(%rcx,%rdx,8) +# movq -8(%rax,%rdx,8),%rsi +# movq %rsi,-8(%rcx,%rdx,8) +# movq (%rax,%rdx,8),%rsi +# movq %rsi,(%rcx,%rdx,8) +#5: addq $4,%rdx +# jle 4b +# subq $4,%rdx +# jl 1b +# jmp 2b +aci_CopyLeft: +# testq $1,%r8 # check for trailing dword +# jz 3f +# movl -4(%rdi,%r8,4),%ecx # copy trailing dword +# movl %ecx,-4(%rsi,%r8,4) +# jmp 3f +#1: movq -8(%rdi,%rdx,8),%rcx +# movq %rcx,-8(%rsi,%rdx,8) +# subq $1,%rdx +# jnz 1b +# ret +# .p2align 4,,15 +#2: movq 24(%rdi,%rdx,8),%rcx +# movq %rcx,24(%rsi,%rdx,8) +# movq 16(%rdi,%rdx,8),%rcx +# movq %rcx,16(%rsi,%rdx,8) +# movq 8(%rdi,%rdx,8),%rcx +# movq %rcx,8(%rsi,%rdx,8) +# movq (%rdi,%rdx,8),%rcx +# movq %rcx,(%rsi,%rdx,8) +#3: subq $4,%rdx +# jge 2b +# addq $4,%rdx +# jg 1b +# ret + +# # Support for void Copy::arrayof_conjoint_jlongs(jlong* from, +# # jlong* to, +# # size_t count) +# # Equivalent to +# # conjoint_jlongs_atomic +# # arrayof_conjoint_oops +# # conjoint_oops_atomic +# # +# # rdi - from +# # rsi - to +# # rdx - count, treated as ssize_t +# # +# .p2align 4,,15 + .type _Copy_arrayof_conjoint_jlongs,@function + .type _Copy_conjoint_jlongs_atomic,@function +_Copy_arrayof_conjoint_jlongs: +_Copy_conjoint_jlongs_atomic: +# cmpq %rdi,%rsi +# leaq -8(%rdi,%rdx,8),%rax # from + count*8 - 8 +# jbe acl_CopyRight +# cmpq %rax,%rsi +# jbe acl_CopyLeft +acl_CopyRight: +# leaq -8(%rsi,%rdx,8),%rcx # to + count*8 - 8 +# negq %rdx +# jmp 3f +#1: movq 8(%rax,%rdx,8),%rsi +# movq %rsi,8(%rcx,%rdx,8) +# addq $1,%rdx +# jnz 1b +# ret +# .p2align 4,,15 +#2: movq -24(%rax,%rdx,8),%rsi +# movq %rsi,-24(%rcx,%rdx,8) +# movq -16(%rax,%rdx,8),%rsi +# movq %rsi,-16(%rcx,%rdx,8) +# movq -8(%rax,%rdx,8),%rsi +# movq %rsi,-8(%rcx,%rdx,8) +# movq (%rax,%rdx,8),%rsi +# movq %rsi,(%rcx,%rdx,8) +#3: addq $4,%rdx +# jle 2b +# subq $4,%rdx +# jl 1b +# ret +#4: movq -8(%rdi,%rdx,8),%rcx +# movq %rcx,-8(%rsi,%rdx,8) +# subq $1,%rdx +# jnz 4b +# ret +# .p2align 4,,15 +#5: movq 24(%rdi,%rdx,8),%rcx +# movq %rcx,24(%rsi,%rdx,8) +# movq 16(%rdi,%rdx,8),%rcx +# movq %rcx,16(%rsi,%rdx,8) +# movq 8(%rdi,%rdx,8),%rcx +# movq %rcx,8(%rsi,%rdx,8) +# movq (%rdi,%rdx,8),%rcx +# movq %rcx,(%rsi,%rdx,8) +acl_CopyLeft: +# subq $4,%rdx +# jge 5b +# addq $4,%rdx +# jg 4b +# ret diff --git a/src/hotspot/os_cpu/linux_sw64/orderAccess_linux_sw64.hpp b/src/hotspot/os_cpu/linux_sw64/orderAccess_linux_sw64.hpp new file mode 100644 index 00000000000..a228207349a --- /dev/null +++ b/src/hotspot/os_cpu/linux_sw64/orderAccess_linux_sw64.hpp @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_SW64_VM_ORDERACCESS_LINUX_SW64_HPP +#define OS_CPU_LINUX_SW64_VM_ORDERACCESS_LINUX_SW64_HPP + +// Included in orderAccess.hpp header file. + +#include "runtime/os.hpp" + +// Implementation of class OrderAccess. +#include "vm_version_sw64.hpp" + +// Implementation of class OrderAccess. +#define inlasm_sync() __asm__ __volatile__ ("memb" : : : "memory"); + +inline void OrderAccess::loadload() { acquire(); } +inline void OrderAccess::storestore() { release(); } +inline void OrderAccess::loadstore() { acquire(); } +inline void OrderAccess::storeload() { fence(); } + +#define FULL_MEM_BARRIER __sync_synchronize() +#define READ_MEM_BARRIER __atomic_thread_fence(__ATOMIC_ACQUIRE); +#define WRITE_MEM_BARRIER __atomic_thread_fence(__ATOMIC_RELEASE); + +inline void OrderAccess::acquire() { + inlasm_sync(); +} + +inline void OrderAccess::release() { + inlasm_sync(); +} + +inline void OrderAccess::fence() { + inlasm_sync(); +} + +inline void OrderAccess::cross_modify_fence_impl() { + inlasm_sync(); +} + +// +//template<> +//struct OrderAccess::PlatformOrderedStore<1, RELEASE_X_FENCE> +//{ +// template +// void operator()(T v, volatile T* p) const { +// __asm__ volatile ( "xchgb (%2),%0" +// : "=q" (v) +// : "0" (v), "r" (p) +// : "memory"); +// } +//}; +// +//template<> +//struct OrderAccess::PlatformOrderedStore<2, RELEASE_X_FENCE> +//{ +// template +// void operator()(T v, volatile T* p) const { +// __asm__ volatile ( "xchgw (%2),%0" +// : "=r" (v) +// : "0" (v), "r" (p) +// : "memory"); +// } +//}; +// +//template<> +//struct OrderAccess::PlatformOrderedStore<4, RELEASE_X_FENCE> +//{ +// template +// void operator()(T v, volatile T* p) const { +// T __ret, __tmp; +// T* __addr; +// __asm__ __volatile__ ( +// "1: memb\n\t" +// " ldi %[__addr],%[__dest]\n\t" +// " lldw %[__ret],0(%[__addr])\n\t" +// " ldi %[__tmp],1\n\t" +// " wr_f %[__tmp]\n\t" +// " mov %[__val],%[__tmp]\n\t" +// " .align 3\n\t" +// " lstw %[__tmp],0(%[__addr])\n\t" +// " rd_f %[__tmp]\n\t" +// " beq %[__tmp],1b\n\t" +// " zapnot %[__ret],0xf,%[__ret]\n\t" +// " \n\t" +// : [__ret]"=&r" (__ret), [__addr]"=&r"(__addr), [__tmp]"=&r"(__tmp) +// : [__dest] "m" (*(T volatile *)p), [__val] "Ir"(v) +// : "memory" ); +// } +//}; + +//template +//struct OrderAccess::PlatformOrderedLoad +//{ +// template +// T operator()(const volatile T* p) const { T data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; } +//}; +// +//template +//struct OrderAccess::PlatformOrderedStore +//{ +// template +// void operator()(T v, volatile T* p) const { __atomic_store(p, &v, __ATOMIC_RELEASE); } +//}; +// +//template +//struct OrderAccess::PlatformOrderedStore +//{ +// template +// void operator()(T v, volatile T* p) const { release_store(p, v); fence(); } +//}; + +#endif // OS_CPU_LINUX_SW64_VM_ORDERACCESS_LINUX_SW64_HPP diff --git a/src/hotspot/os_cpu/linux_sw64/os_linux_sw64.cpp b/src/hotspot/os_cpu/linux_sw64/os_linux_sw64.cpp new file mode 100644 index 00000000000..a19cd293e7f --- /dev/null +++ b/src/hotspot/os_cpu/linux_sw64/os_linux_sw64.cpp @@ -0,0 +1,599 @@ +/* + * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +// no precompiled headers +#include "jvm.h" +#include "asm/macroAssembler.hpp" +#include "classfile/vmSymbols.hpp" +#include "compiler/disassembler.hpp" +#include "code/codeCache.hpp" +#include "code/icBuffer.hpp" +#include "code/vtableStubs.hpp" +#include "code/nativeInst.hpp" +#include "interpreter/interpreter.hpp" +#include "logging/log.hpp" +#include "memory/allocation.inline.hpp" +#include "os_share_linux.hpp" +#include "prims/jniFastGetField.hpp" +#include "prims/jvm_misc.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/interfaceSupport.inline.hpp" +#include "runtime/java.hpp" +#include "runtime/javaCalls.hpp" +#include "runtime/mutexLocker.hpp" +#include "runtime/osThread.hpp" +#include "runtime/safepointMechanism.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/thread.inline.hpp" +#include "runtime/timer.hpp" +#include "signals_posix.hpp" +#include "services/memTracker.hpp" +#include "utilities/align.hpp" +#include "utilities/debug.hpp" +#include "utilities/events.hpp" +#include "utilities/vmError.hpp" + +// put OS-includes here +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include + +//not sure +#define REG_SP 30 //// #define REG_SP 29 +#define REG_FP 15 //// #define REG_FP 30 +#define REG_RA 26 //// #define REG_FP 30 +//#define PRINT_SIGNAL_HANDLE + +address __attribute__((always_inline)) os::current_stack_pointer() { +//ZHJ return (address)__builtin_frame_address(0); +// register void *ssp; +// __asm__ (" mov $sp,%0\n":"=r"(ssp)); +// +// return (address) (char *)ssp; + return (address)__builtin_frame_address(0); +} + +address os::ucontext_get_ra(const ucontext_t * uc) { + return (address)uc->uc_mcontext.sc_regs[REG_RA]; +} + +char* os::non_memory_address_word() { + // Must never look like an address returned by reserve_memory, + // even in its subfields (as defined by the CPU immediate fields, + // if the CPU splits constants across multiple instructions). + + return (char*) -1; +} + +address os::Posix::ucontext_get_pc(const ucontext_t * uc) { + //return (address)uc->uc_mcontext.gregs[REG_PC]; + return (address)uc->uc_mcontext.sc_pc; +} + +void os::Posix::ucontext_set_pc(ucontext_t * uc, address pc) { +//ZHJ uc->uc_mcontext.pc = (intptr_t)pc; + uc->uc_mcontext.sc_pc = (intptr_t)pc; +} + +intptr_t* os::Linux::ucontext_get_sp(const ucontext_t * uc) { +//ZHJ return (intptr_t*)uc->uc_mcontext.sp; + return (intptr_t*)uc->uc_mcontext.sc_regs[REG_SP]; +} + +intptr_t* os::Linux::ucontext_get_fp(const ucontext_t * uc) { +//ZHJ return (intptr_t*)uc->uc_mcontext.regs[REG_FP]; + return (intptr_t*)uc->uc_mcontext.sc_regs[REG_FP]; +} + +address os::fetch_frame_from_context(const void* ucVoid, + intptr_t** ret_sp, intptr_t** ret_fp) { + + address epc; + const ucontext_t* uc = (const ucontext_t*)ucVoid; + + if (uc != NULL) { + epc = os::Posix::ucontext_get_pc(uc); + if (ret_sp) *ret_sp = os::Linux::ucontext_get_sp(uc); + if (ret_fp) *ret_fp = os::Linux::ucontext_get_fp(uc); + } else { + // construct empty ExtendedPC for return value checking + epc = NULL; + if (ret_sp) *ret_sp = (intptr_t *)NULL; + if (ret_fp) *ret_fp = (intptr_t *)NULL; + } + + return epc; +} + +frame os::fetch_frame_from_context(const void* ucVoid) { + intptr_t* sp; + intptr_t* fp; + address epc = fetch_frame_from_context(ucVoid, &sp, &fp); + if (!is_readable_pointer(epc)) { + // Try to recover from calling into bad memory + // Assume new frame has not been set up, the same as + // compiled frame stack bang + return fetch_compiled_frame_from_context(ucVoid); + } + frame ret_frame(sp, fp, epc); + // ret_frame.fixRa(ucVoid); + return ret_frame; +} + +frame os::fetch_compiled_frame_from_context(const void* ucVoid) { + const ucontext_t* uc = (const ucontext_t*)ucVoid; + intptr_t* fp = os::Linux::ucontext_get_fp(uc); + intptr_t* sp = os::Linux::ucontext_get_sp(uc); + return frame(sp + 1, fp, (address)*sp); +} + +// By default, gcc always saves frame pointer rfp on this stack. This +// may get turned off by -fomit-frame-pointer. +frame os::get_sender_for_C_frame(frame* fr) { + return frame(NULL, fr->link(), fr->sender_pc()); +} + +intptr_t* __attribute__((always_inline)) os::get_previous_fp() { + register void *sfp; + __asm__ (" mov $fp,%0\n":"=r"(sfp)); + + return (intptr_t *)sfp; +} + +frame os::current_frame() { + intptr_t* fp = (intptr_t*)get_previous_fp(); + frame myframe((intptr_t*)os::current_stack_pointer(), + (intptr_t*)fp, + CAST_FROM_FN_PTR(address, os::current_frame)); + //myframe.init_sender_for_c_frame(CAST_FROM_FN_PTR(address, os::current_frame)); + if (os::is_first_C_frame(&myframe)) { + // stack is not walkable + return frame(); + } else { + return os::get_sender_for_C_frame(&myframe); + } +} + +// Utility functions +bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info, + ucontext_t* uc, JavaThread* thread) { + + + + /* + NOTE: does not seem to work on linux. + if (info == NULL || info->si_code <= 0 || info->si_code == SI_NOINFO) { + // can't decode this kind of signal + info = NULL; + } else { + assert(sig == info->si_signo, "bad siginfo"); + } + */ + + // decide if this trap can be handled by a stub + address stub = NULL; + + address pc = NULL; + + //%note os_trap_1 + if (info != NULL && uc != NULL && thread != NULL) { + pc = (address) os::Posix::ucontext_get_pc(uc); + +// if (StubRoutines::is_safefetch_fault(pc)) { +// os::Posix::ucontext_set_pc(uc, StubRoutines::continuation_for_safefetch_fault(pc)); +// return true; +// } + + address addr = (address) info->si_addr; + + // Halt if SI_KERNEL before more crashes get misdiagnosed as Java bugs + // This can happen in any running code (currently more frequently in + // interpreter code but has been seen in compiled code) + if (sig == SIGSEGV && info->si_addr == 0 && info->si_code == SI_KERNEL) { + fatal("An irrecoverable SI_KERNEL SIGSEGV has occurred due " + "to unstable signal handling in this distribution."); + } + + // Handle ALL stack overflow variations here + if (sig == SIGSEGV) { + // check if fault address is within thread stack + if (thread->is_in_full_stack(addr)) { + // stack overflow + if (os::Posix::handle_stack_overflow(thread, addr, pc, uc, &stub)) { + return true; // continue + } + } + } + + if (thread->thread_state() == _thread_in_Java) { + // Java thread running in Java code => find exception handler if any + // a fault inside compiled code, the interpreter, or a stub + if (TraceSignalHandling) tty->print("java thread running in java code\n"); + if (sig == SIGILL && (nativeInstruction_at(pc)->is_sigill_zombie_not_entrant() || nativeInstruction_at(pc - 4)->is_sigill_zombie_not_entrant())) { +#ifdef PRINT_SIGNAL_HANDLE + tty->print_cr("verified entry = %lx, sig=%d", nativeInstruction_at(pc), sig); +#endif + stub = SharedRuntime::get_handle_wrong_method_stub(); + // Handle signal from NativeJump::patch_verified_entry(). + }else if (sig == SIGSEGV && SafepointMechanism::is_poll_address((address)info->si_addr)) { + stub = SharedRuntime::get_poll_stub(pc); + if (TraceSignalHandling) tty->print_cr("polling address = %lx, sig=%d, stub = %lx", (unsigned long)SafepointMechanism::get_polling_page(), sig, (unsigned long)stub); + } else if (sig == SIGBUS /* && info->si_code == BUS_OBJERR */) { + // BugId 4454115: A read from a MappedByteBuffer can fault + // here if the underlying file has been truncated. + // Do not crash the VM in such a case. + CodeBlob* cb = CodeCache::find_blob_unsafe(pc); + CompiledMethod* nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL; + bool is_unsafe_arraycopy = thread->doing_unsafe_access() && UnsafeCopyMemory::contains_pc(pc); + if (TraceSignalHandling) tty->print("cb = %lx, nm = %lx\n", (unsigned long)cb, (unsigned long)nm); + if ((nm != NULL && nm->has_unsafe_access()) || is_unsafe_arraycopy) { + address next_pc = pc + BytesPerInstWord; + if (is_unsafe_arraycopy) { + next_pc = UnsafeCopyMemory::page_error_continue_pc(pc); + } + stub = SharedRuntime::handle_unsafe_access(thread, next_pc); + } + } else if (sig == SIGFPE && + (info->si_code == FPE_INTDIV || info->si_code == FPE_FLTDIV)) { + stub = SharedRuntime::continuation_for_implicit_exception(thread, + pc, + SharedRuntime::IMPLICIT_DIVIDE_BY_ZERO); + } else if (sig == SIGSEGV && + MacroAssembler::uses_implicit_null_check(info->si_addr)) { + if (TraceSignalHandling) tty->print("continuation for implicit exception\n"); + // Determination of interpreter/vtable stub/compiled code null exception + stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL); + if (TraceSignalHandling) tty->print_cr("continuation_for_implicit_exception stub: %lx", (unsigned long)stub); + } + } else if ((thread->thread_state() == _thread_in_vm || + thread->thread_state() == _thread_in_native) && + (sig == SIGBUS && /* info->si_code == BUS_OBJERR && */ + thread->doing_unsafe_access())) { + if (TraceSignalHandling) tty->print_cr("SIGBUS in vm thread \n"); + address next_pc = pc + BytesPerInstWord; + if (UnsafeCopyMemory::contains_pc(pc)) { + next_pc = UnsafeCopyMemory::page_error_continue_pc(pc); + } + stub = SharedRuntime::handle_unsafe_access(thread, next_pc); + } + + // jni_fast_GetField can trap at certain pc's if a GC kicks in + // and the heap gets shrunk before the field access. + if ((sig == SIGSEGV) || (sig == SIGBUS)) { + if (TraceSignalHandling) tty->print("jni fast get trap: "); + address addr = JNI_FastGetField::find_slowcase_pc(pc); + if (addr != (address)-1) { + stub = addr; + } + if (TraceSignalHandling) tty->print_cr("addr = %lx, stub = %lx", (unsigned long)addr, (unsigned long)stub); + } + +// // Check to see if we caught the safepoint code in the +// // process of write protecting the memory serialization page. +// // It write enables the page immediately after protecting it +// // so we can just return to retry the write. +// if ((sig == SIGSEGV) && +// os::is_memory_serialize_page(thread, (address) info->si_addr)) { +// if (TraceSignalHandling) tty->print("writes protecting the memory serialiazation page\n"); +// // Block current thread until the memory serialize page permission restored. +// os::block_on_serialize_page_trap(); +// return true; +// } + } + + // Execution protection violation + // + // This should be kept as the last step in the triage. We don't + // have a dedicated trap number for a no-execute fault, so be + // conservative and allow other handlers the first shot. + // + // Note: We don't test that info->si_code == SEGV_ACCERR here. + // this si_code is so generic that it is almost meaningless; and + // the si_code for this condition may change in the future. + // Furthermore, a false-positive should be harmless. + if (UnguardOnExecutionViolation > 0 && + stub == NULL && + (sig == SIGSEGV || sig == SIGBUS) /*&& + uc->uc_mcontext.sc_regs[REG_TRAPNO] == trap_page_fault*/) { + ShouldNotReachHere(); + int page_size = os::vm_page_size(); + address addr = (address) info->si_addr; + address pc = os::Posix::ucontext_get_pc(uc); + // Make sure the pc and the faulting address are sane. + // + // If an instruction spans a page boundary, and the page containing + // the beginning of the instruction is executable but the following + // page is not, the pc and the faulting address might be slightly + // different - we still want to unguard the 2nd page in this case. + // + // 15 bytes seems to be a (very) safe value for max instruction size. + bool pc_is_near_addr = + (pointer_delta((void*) addr, (void*) pc, sizeof(char)) < 15); + bool instr_spans_page_boundary = + (align_down((intptr_t) pc ^ (intptr_t) addr, + (intptr_t) page_size) > 0); + + if (pc == addr || (pc_is_near_addr && instr_spans_page_boundary)) { + static volatile address last_addr = + (address) os::non_memory_address_word(); + + // In conservative mode, don't unguard unless the address is in the VM + if (addr != last_addr && + (UnguardOnExecutionViolation > 1 || os::address_is_in_vm(addr))) { + + // Set memory to RWX and retry + address page_start = align_down(addr, page_size); + bool res = os::protect_memory((char*) page_start, page_size, + os::MEM_PROT_RWX); + + log_debug(os)("Execution protection violation " + "at " INTPTR_FORMAT + ", unguarding " INTPTR_FORMAT ": %s, errno=%d", p2i(addr), + p2i(page_start), (res ? "success" : "failed"), errno); + stub = pc; + + // Set last_addr so if we fault again at the same address, we don't end + // up in an endless loop. + // + // There are two potential complications here. Two threads trapping at + // the same address at the same time could cause one of the threads to + // think it already unguarded, and abort the VM. Likely very rare. + // + // The other race involves two threads alternately trapping at + // different addresses and failing to unguard the page, resulting in + // an endless loop. This condition is probably even more unlikely than + // the first. + // + // Although both cases could be avoided by using locks or thread local + // last_addr, these solutions are unnecessary complication: this + // handler is a best-effort safety net, not a complete solution. It is + // disabled by default and should only be used as a workaround in case + // we missed any no-execute-unsafe VM code. + + last_addr = addr; + } + } + } + + if (stub != NULL) { + //if (TraceSignalHandling) tty->print_cr("resolved stub=%lx\n",(unsigned long)stub);//0404 + // save all thread context in case we need to restore it + if (thread != NULL) thread->set_saved_exception_pc(pc); + + os::Posix::ucontext_set_pc(uc, stub); + return true; + } + + return false; +} + +// FCSR:...|24| 23 |22|21|... +// ...|FS|FCC0|FO|FN|... +void os::Linux::init_thread_fpu_state(void) { + // Nothing to do +} + +int os::Linux::get_fpu_control_word(void) { + ShouldNotReachHere(); + return 0; +} + +void os::Linux::set_fpu_control_word(int fpu_control) { + ShouldNotReachHere(); +} + +//////////////////////////////////////////////////////////////////////////////// +// thread stack + +size_t os::Posix::_compiler_thread_min_stack_allowed = 48 * K; +size_t os::Posix::_java_thread_min_stack_allowed = 40 * K; +size_t os::Posix::_vm_internal_thread_min_stack_allowed = 96 * K; + +// return default stack size for thr_type +size_t os::Posix::default_stack_size(os::ThreadType thr_type) { + // default stack size (compiler thread needs larger stack) + size_t s = (thr_type == os::compiler_thread ? 2 * M : 512 * K); + return s; +} + +///////////////////////////////////////////////////////////////////////////// +// helper functions for fatal error handler + +void os::print_context(outputStream *st, const void *context) { + if (context == NULL) return; + + const ucontext_t *uc = (const ucontext_t*)context; + st->print_cr("Registers:"); + st->print( "V0=" INTPTR_FORMAT, uc->uc_mcontext.sc_regs[0]); + st->print(", T0=" INTPTR_FORMAT, uc->uc_mcontext.sc_regs[1]); + st->print(", T1=" INTPTR_FORMAT, uc->uc_mcontext.sc_regs[2]); + st->print(", T2=" INTPTR_FORMAT, uc->uc_mcontext.sc_regs[3]); + st->cr(); + st->print( "T3=" INTPTR_FORMAT, uc->uc_mcontext.sc_regs[4]); + st->print(", T4=" INTPTR_FORMAT, uc->uc_mcontext.sc_regs[5]); + st->print(", T5=" INTPTR_FORMAT, uc->uc_mcontext.sc_regs[6]); + st->print(", T6=" INTPTR_FORMAT, uc->uc_mcontext.sc_regs[7]); + st->cr(); + st->print( "T7=" INTPTR_FORMAT, uc->uc_mcontext.sc_regs[8]); + st->print(", S0=" INTPTR_FORMAT, uc->uc_mcontext.sc_regs[9]); + st->print(", S1=" INTPTR_FORMAT, uc->uc_mcontext.sc_regs[10]); + st->print(", S2=" INTPTR_FORMAT, uc->uc_mcontext.sc_regs[11]); + st->cr(); + st->print( "S3=" INTPTR_FORMAT, uc->uc_mcontext.sc_regs[12]); + st->print(", S4=" INTPTR_FORMAT, uc->uc_mcontext.sc_regs[13]); + st->print(", S5=" INTPTR_FORMAT, uc->uc_mcontext.sc_regs[14]); + st->print(", FP=" INTPTR_FORMAT, uc->uc_mcontext.sc_regs[15]); + st->cr(); + st->print( "A0=" INTPTR_FORMAT, uc->uc_mcontext.sc_regs[16]); + st->print(", A1=" INTPTR_FORMAT, uc->uc_mcontext.sc_regs[17]); + st->print(", A2=" INTPTR_FORMAT, uc->uc_mcontext.sc_regs[18]); + st->print(", A3=" INTPTR_FORMAT, uc->uc_mcontext.sc_regs[19]); + st->cr(); + st->print( "A4=" INTPTR_FORMAT, uc->uc_mcontext.sc_regs[20]); + st->print(", A5=" INTPTR_FORMAT, uc->uc_mcontext.sc_regs[21]); + st->print(", T8=" INTPTR_FORMAT, uc->uc_mcontext.sc_regs[22]); + st->print(", T9=" INTPTR_FORMAT, uc->uc_mcontext.sc_regs[23]); + st->cr(); + st->print( "T10=" INTPTR_FORMAT, uc->uc_mcontext.sc_regs[24]); + st->print(", T11=" INTPTR_FORMAT, uc->uc_mcontext.sc_regs[25]); + st->print(", RA=" INTPTR_FORMAT, uc->uc_mcontext.sc_regs[26]); + st->print(", T12=" INTPTR_FORMAT, uc->uc_mcontext.sc_regs[27]); + st->cr(); + st->print( "AT=" INTPTR_FORMAT, uc->uc_mcontext.sc_regs[28]); + st->print(", GP=" INTPTR_FORMAT, uc->uc_mcontext.sc_regs[29]); + st->print(", SP=" INTPTR_FORMAT, uc->uc_mcontext.sc_regs[30]); + st->print(", R0=" INTPTR_FORMAT, uc->uc_mcontext.sc_regs[31]); + st->cr(); + st->cr(); + + intptr_t *sp = (intptr_t *)os::Linux::ucontext_get_sp(uc); + st->print_cr("Top of Stack: (sp=" PTR_FORMAT ")", p2i(sp)); + //print_hex_dump(st, (address)sp, (address)(sp + 8), sizeof(intptr_t)); + print_hex_dump(st, (address)sp-32, (address)(sp + 32), sizeof(intptr_t)); + st->cr(); + + // Note: it may be unsafe to inspect memory near pc. For example, pc may + // point to garbage if entry point in an nmethod is corrupted. Leave + // this at the end, and hope for the best. + address pc = os::fetch_frame_from_context(uc).pc(); + st->print_cr("Instructions: (pc=" PTR_FORMAT ")", p2i(pc)); + print_hex_dump(st, pc - 32, pc + 32, sizeof(char)); +} + +void os::print_tos_pc(outputStream *st, const void *context) { + if (context == NULL) return; + + const ucontext_t* uc = (const ucontext_t*)context; + // Note: it may be unsafe to inspect memory near pc. For example, pc may + // point to garbage if entry point in an nmethod is corrupted. Leave + // this at the end, and hope for the best. + address pc = os::fetch_frame_from_context(uc).pc(); + st->print_cr("Instructions: (pc=" PTR_FORMAT ")", p2i(pc)); + print_hex_dump(st, pc - 32, pc + 32, sizeof(char)); +} + +void os::print_register_info(outputStream *st, const void *context) { + if (context == NULL) return; + + const ucontext_t *uc = (const ucontext_t*)context; + + st->print_cr("Register to memory mapping:"); + st->cr(); + + // this is horrendously verbose but the layout of the registers in the + // // context does not match how we defined our abstract Register set, so + // // we can't just iterate through the gregs area + // + // // this is only for the "general purpose" registers + st->print("V0=" ); print_location(st, uc->uc_mcontext.sc_regs[0]); + st->print("T0=" ); print_location(st, uc->uc_mcontext.sc_regs[1]); + st->print("T1=" ); print_location(st, uc->uc_mcontext.sc_regs[2]); + st->print("T2=" ); print_location(st, uc->uc_mcontext.sc_regs[3]); + st->cr(); + st->print("T3=" ); print_location(st, uc->uc_mcontext.sc_regs[4]); + st->print("T4=" ); print_location(st, uc->uc_mcontext.sc_regs[5]); + st->print("T5=" ); print_location(st, uc->uc_mcontext.sc_regs[6]); + st->print("T6=" ); print_location(st, uc->uc_mcontext.sc_regs[7]); + st->cr(); + st->print("T7=" ); print_location(st, uc->uc_mcontext.sc_regs[8]); + st->print("S0=" ); print_location(st, uc->uc_mcontext.sc_regs[9]); + st->print("S1=" ); print_location(st, uc->uc_mcontext.sc_regs[10]); + st->print("S2=" ); print_location(st, uc->uc_mcontext.sc_regs[11]); + st->cr(); + st->print("S3=" ); print_location(st, uc->uc_mcontext.sc_regs[12]); + st->print("S4=" ); print_location(st, uc->uc_mcontext.sc_regs[13]); + st->print("S5=" ); print_location(st, uc->uc_mcontext.sc_regs[14]); + st->print("FP=" ); print_location(st, uc->uc_mcontext.sc_regs[15]); + st->cr(); + st->print("A0=" ); print_location(st, uc->uc_mcontext.sc_regs[16]); + st->print("A1=" ); print_location(st, uc->uc_mcontext.sc_regs[17]); + st->print("A2=" ); print_location(st, uc->uc_mcontext.sc_regs[18]); + st->print("A3=" ); print_location(st, uc->uc_mcontext.sc_regs[19]); + st->cr(); + st->print("A4=" ); print_location(st, uc->uc_mcontext.sc_regs[20]); + st->print("A5=" ); print_location(st, uc->uc_mcontext.sc_regs[21]); + st->print("T8=" ); print_location(st, uc->uc_mcontext.sc_regs[22]); + st->print("T9=" ); print_location(st, uc->uc_mcontext.sc_regs[23]); + st->cr(); + st->print("T10=" ); print_location(st, uc->uc_mcontext.sc_regs[24]); + st->print("T11=" ); print_location(st, uc->uc_mcontext.sc_regs[25]); + st->print("RA=" ); print_location(st, uc->uc_mcontext.sc_regs[26]); + st->print("T12=" ); print_location(st, uc->uc_mcontext.sc_regs[27]); + st->cr(); + st->print("AT=" ); print_location(st, uc->uc_mcontext.sc_regs[28]); + st->print("GP=" ); print_location(st, uc->uc_mcontext.sc_regs[29]); + st->print("SP=" ); print_location(st, uc->uc_mcontext.sc_regs[30]); + st->print("R0=" ); print_location(st, uc->uc_mcontext.sc_regs[31]); + st->cr(); +} + +void os::setup_fpu() { + /* + //no use for MIPS + int fcsr; + address fpu_cntrl = StubRoutines::addr_fpu_cntrl_wrd_std(); + __asm__ __volatile__ ( + ".set noat;" + "cfc1 %0, $31;" + "sw %0, 0(%1);" + : "=r" (fcsr) + : "r" (fpu_cntrl) + : "memory" + ); + printf("fpu_cntrl: %lx\n", fpu_cntrl); + */ +} + +#ifndef PRODUCT +void os::verify_stack_alignment() { + //warning("TODO:os::verify_stack_alignment, check jzy"); + //assert(((intptr_t)os::current_stack_pointer() & (StackAlignmentInBytes-1)) == 0, "incorrect stack alignment"); +} +#endif + +int os::extra_bang_size_in_bytes() { + // sw64 does not require the additional stack bang. + //warning("TODO:os::extra_bang_size_in_bytes, check lsp"); + return 0; +} + +extern "C" int SpinPause() {return 0;} diff --git a/src/hotspot/os_cpu/linux_sw64/os_linux_sw64.hpp b/src/hotspot/os_cpu/linux_sw64/os_linux_sw64.hpp new file mode 100644 index 00000000000..9441b274692 --- /dev/null +++ b/src/hotspot/os_cpu/linux_sw64/os_linux_sw64.hpp @@ -0,0 +1,44 @@ +/* + * Copyright (c) 1999, 2017, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_SW64_VM_OS_LINUX_SW64_HPP +#define OS_CPU_LINUX_SW64_VM_OS_LINUX_SW64_HPP + + static void setup_fpu(); + + static intptr_t *get_previous_fp(); + + static address ucontext_get_ra(const ucontext_t* uc); + + // Used to register dynamic code cache area with the OS + // Note: Currently only used in 64 bit Windows implementations + static bool register_code_area(char *low, char *high) { return true; } + +//// // Atomically copy 64 bits of data +//// static void atomic_copy64(const volatile void *src, volatile void *dst) { +//// *(jlong *) dst = *(const jlong *) src; +//// } + +#endif // OS_CPU_LINUX_SW64_VM_OS_LINUX_SW64_HPP diff --git a/src/hotspot/os_cpu/linux_sw64/os_linux_sw64.inline.hpp b/src/hotspot/os_cpu/linux_sw64/os_linux_sw64.inline.hpp new file mode 100644 index 00000000000..9ca12eca3b7 --- /dev/null +++ b/src/hotspot/os_cpu/linux_sw64/os_linux_sw64.inline.hpp @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_SW64_VM_OS_LINUX_SW64_INLINE_HPP +#define OS_CPU_LINUX_SW64_VM_OS_LINUX_SW64_INLINE_HPP + +#include "runtime/os.hpp" + +////// See http://www.technovelty.org/code/c/reading-rdtsc.htl for details +////inline jlong os::rdtsc() { +////#if 0 +//// uint64_t res; +//// uint32_t ts1, ts2; +//// __asm__ __volatile__ ("rdtsc" : "=a" (ts1), "=d" (ts2)); +//// res = ((uint64_t)ts1 | (uint64_t)ts2 << 32); +//// return (jlong)res; +////#else +//// return (jlong)0; +////#endif +////} + +#endif // OS_CPU_LINUX_SW64_VM_OS_LINUX_SW64_INLINE_HPP diff --git a/src/hotspot/os_cpu/linux_sw64/prefetch_linux_sw64.inline.hpp b/src/hotspot/os_cpu/linux_sw64/prefetch_linux_sw64.inline.hpp new file mode 100644 index 00000000000..87426c4fca8 --- /dev/null +++ b/src/hotspot/os_cpu/linux_sw64/prefetch_linux_sw64.inline.hpp @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_SW64_VM_PREFETCH_LINUX_SW64_INLINE_HPP +#define OS_CPU_LINUX_SW64_VM_PREFETCH_LINUX_SW64_INLINE_HPP + +#include "runtime/prefetch.hpp" + + +inline void Prefetch::read (void *loc, intx interval) { + if (interval >= 0) + __asm__ __volatile__ ( + " fillcs 0(%0) \n" + : + : "r" ( ((address)loc) +((long)interval) ) + : "memory" + ); +} + +inline void Prefetch::write(void *loc, intx interval) { + if (interval >= 0) + __asm__ __volatile__ ( + " fillde 0(%0) \n" + : + : "r" ( ((address)loc) +((long)interval) ) + : "memory" + ); +} + +#endif // OS_CPU_LINUX_SW64_VM_PREFETCH_LINUX_SW64_INLINE_HPP diff --git a/src/hotspot/os_cpu/linux_sw64/safefetch_linux_sw64.S b/src/hotspot/os_cpu/linux_sw64/safefetch_linux_sw64.S new file mode 100644 index 00000000000..90f2b00c18b --- /dev/null +++ b/src/hotspot/os_cpu/linux_sw64/safefetch_linux_sw64.S @@ -0,0 +1,54 @@ +/* + * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + + .globl SafeFetchN_impl + .globl _SafeFetchN_fault + .globl _SafeFetchN_continuation + .globl SafeFetch32_impl + .globl _SafeFetch32_fault + .globl _SafeFetch32_continuation + + # Support for int SafeFetch32(int* address, int defaultval); + # + # A0($16) : address + # A1($17) : defaultval +SafeFetch32_impl: +_SafeFetch32_fault: + ldw $0,0($16) + ret +_SafeFetch32_continuation: + mov $0,$17 + ret + + # Support for intptr_t SafeFetchN(intptr_t* address, intptr_t defaultval); + # + # A1($16) : address + # A0($17) : defaultval +SafeFetchN_impl: +_SafeFetchN_fault: + ldl $0,0($16) + ret +_SafeFetchN_continuation: + mov $0,$17 + ret diff --git a/src/hotspot/os_cpu/linux_sw64/thread_linux_sw64.cpp b/src/hotspot/os_cpu/linux_sw64/thread_linux_sw64.cpp new file mode 100644 index 00000000000..006426e74cf --- /dev/null +++ b/src/hotspot/os_cpu/linux_sw64/thread_linux_sw64.cpp @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/thread.inline.hpp" + +void JavaThread::pd_initialize() +{ + _anchor.clear(); + + // A non-existing address as error detector +// if (CompileBroker::get_compilation_id() > 0) +// _handle_wrong_method_stub = (address)SharedRuntime::get_handle_wrong_method_stub(); +// else +// _handle_wrong_method_stub = (address)0x2B2B2B; +} + +frame JavaThread::pd_last_frame() { + assert(has_last_Java_frame(), "must have last_Java_sp() when suspended"); + vmassert(_anchor.last_Java_pc() != NULL, "not walkable"); + return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp(), _anchor.last_Java_pc()); +} + +// For Forte Analyzer AsyncGetCallTrace profiling support - thread is +// currently interrupted by SIGPROF +bool JavaThread::pd_get_top_frame_for_signal_handler(frame* fr_addr, + void* ucontext, bool isInJava) { + + assert(Thread::current() == this, "caller must be current thread"); + return pd_get_top_frame(fr_addr, ucontext, isInJava); +} + +bool JavaThread::pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava) { + return pd_get_top_frame(fr_addr, ucontext, isInJava); +} + +bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava) { + // If we have a last_Java_frame, then we should use it even if + // isInJava == true. It should be more reliable than ucontext info. + if (has_last_Java_frame() && frame_anchor()->walkable()) { + *fr_addr = pd_last_frame(); + return true; + } + + // At this point, we don't have a last_Java_frame, so + // we try to glean some information out of the ucontext + // if we were running Java code when SIGPROF came in. + if (isInJava) { + ucontext_t* uc = (ucontext_t*) ucontext; + + intptr_t* ret_fp; + intptr_t* ret_sp; + address addr = os::fetch_frame_from_context(uc, &ret_sp, &ret_fp); + if (addr == NULL || ret_sp == NULL ) { + // ucontext wasn't useful + return false; + } + + frame ret_frame(ret_sp, ret_fp, addr); + if (!ret_frame.safe_for_sender(this)) { +#if COMPILER2_OR_JVMCI + // C2 and JVMCI use ebp as a general register see if NULL fp helps + frame ret_frame2(ret_sp, NULL, addr); + if (!ret_frame2.safe_for_sender(this)) { + // nothing else to try if the frame isn't good + return false; + } + ret_frame = ret_frame2; +#else + // nothing else to try if the frame isn't good + return false; +#endif // COMPILER2_OR_JVMCI + } + *fr_addr = ret_frame; + return true; + } + + // nothing else to try + return false; +} + +void JavaThread::cache_global_variables() { } + diff --git a/src/hotspot/os_cpu/linux_sw64/thread_linux_sw64.hpp b/src/hotspot/os_cpu/linux_sw64/thread_linux_sw64.hpp new file mode 100644 index 00000000000..d6573191784 --- /dev/null +++ b/src/hotspot/os_cpu/linux_sw64/thread_linux_sw64.hpp @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_SW64_VM_THREAD_LINUX_SW64_HPP +#define OS_CPU_LINUX_SW64_VM_THREAD_LINUX_SW64_HPP + + private: + void pd_initialize(); + + frame pd_last_frame(); + + public: + + static ByteSize last_Java_fp_offset() { + return byte_offset_of(JavaThread, _anchor) + JavaFrameAnchor::last_Java_fp_offset(); + } + + bool pd_get_top_frame_for_signal_handler(frame* fr_addr, void* ucontext, + bool isInJava); + + bool pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava); +private: + bool pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava); +public: + + // For convenient implementation of NativeGeneralJump::replace_mt_safe() + volatile address _handle_wrong_method_stub; + static ByteSize handle_wrong_method_stub_offset() { return byte_offset_of(JavaThread, _handle_wrong_method_stub); } + void set_handle_wrong_method_stub(address stub) { _handle_wrong_method_stub = stub; } + +#endif // OS_CPU_LINUX_SW64_VM_THREAD_LINUX_SW64_HPP diff --git a/src/hotspot/os_cpu/linux_sw64/vmStructs_linux_sw64.hpp b/src/hotspot/os_cpu/linux_sw64/vmStructs_linux_sw64.hpp new file mode 100644 index 00000000000..310c09e2162 --- /dev/null +++ b/src/hotspot/os_cpu/linux_sw64/vmStructs_linux_sw64.hpp @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_SW64_VM_VMSTRUCTS_LINUX_SW64_HPP +#define OS_CPU_LINUX_SW64_VM_VMSTRUCTS_LINUX_SW64_HPP + +// These are the OS and CPU-specific fields, types and integer +// constants required by the Serviceability Agent. This file is +// referenced by vmStructs.cpp. + +#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ + \ + /******************************/ \ + /* Threads (NOTE: incomplete) */ \ + /******************************/ \ + nonstatic_field(OSThread, _thread_id, OSThread::thread_id_t) \ + nonstatic_field(OSThread, _pthread_id, pthread_t) + + +#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \ + \ + /**********************/ \ + /* Posix Thread IDs */ \ + /**********************/ \ + \ + declare_integer_type(OSThread::thread_id_t) \ + declare_unsigned_integer_type(pthread_t) + +#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) + +#define VM_LONG_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) + +#endif // OS_CPU_LINUX_SW64_VM_VMSTRUCTS_LINUX_SW64_HPP diff --git a/src/hotspot/os_cpu/linux_sw64/vm_version_linux_sw64.cpp b/src/hotspot/os_cpu/linux_sw64/vm_version_linux_sw64.cpp new file mode 100644 index 00000000000..80ee00d56f3 --- /dev/null +++ b/src/hotspot/os_cpu/linux_sw64/vm_version_linux_sw64.cpp @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2006, 2019, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "runtime/os.hpp" +#include "runtime/vm_version.hpp" + +#define CPU_FAMILY_AMOUNT 9 + +const char cpuinfo[CPU_FAMILY_AMOUNT][30] = { + "not-sw", // 0 + "sw410", // 1 + "sw4a", // 2 + "sw6a", // 3 + "sw6b", // 4 + "sw1621", // 5 + "sw421", // 6 + "sw3231", // 7 + "h8000", // 8 WX-H8000 for 8A +}; + +void read_cpu_info(const char *path, char *result) { + FILE *ptr; + char buf[1024]; + int i = 0; + if((ptr=fopen(path, "r")) != NULL) { + while(fgets(buf, 1024, ptr)!=NULL) { + strcat(result,buf); + i++; + if (i == 10) break; + } + fclose(ptr); + } else { + tty->print_cr("fopen %s error\n", path); + } +} + +void strlwr(char *str){ + for (; *str!='\0'; str++) + *str = tolower(*str); +} + +int VM_Version::platform_features(int features) { + char res[10240]; + int i; + features = spt_16k_page_m; //default support + memset(res, '\0', 10240 * sizeof(char)); + read_cpu_info("/proc/cpuinfo", res); + // res is converted to lower case + strlwr(res); + for (i = 1; i < CPU_FAMILY_AMOUNT; i++) { + if (strstr(res, cpuinfo[i])) { + break; + } + } + //add some other support when detected on shenwei + if (i != CPU_FAMILY_AMOUNT) { + features |= with_sw_support_m; + } + switch (i % CPU_FAMILY_AMOUNT) { + case 1 : + features |= sw2f_m; + //tty->print_cr("sw2f platform"); + break; + case 2 : + features |= sw4a_m; + //tty->print_cr("sw4a platform"); + break; + case 3 : + features |= sw6a_m; + //tty->print_cr("sw6a platform"); + break; + case 4 : + features |= sw6b_m; + //tty->print_cr("sw6b platform"); + break; + case 5 : + features |= sw1621_m; + //tty->print_cr("sw6b platform"); + break; + case 6 : + features |= sw4a_m; + //tty->print_cr("sw6b platform"); + break; + case 7 : + features |= sw3231_m; + break; + case 8 : + features |= wx_h8000_m; + break; + default: + ; + //tty->print_cr("cpu not support, the cpuinfo is: %s", res); + //ShouldNotReachHere(); + } + return features; +} diff --git a/src/hotspot/share/asm/assembler.hpp b/src/hotspot/share/asm/assembler.hpp index 202cba10c5d..ad117c7a9e8 100644 --- a/src/hotspot/share/asm/assembler.hpp +++ b/src/hotspot/share/asm/assembler.hpp @@ -311,6 +311,9 @@ class AbstractAssembler : public ResourceObj { static bool is_simm9(int64_t x) { return is_simm(x, 9); } static bool is_simm10(int64_t x) { return is_simm(x, 10); } static bool is_simm16(int64_t x) { return is_simm(x, 16); } +#ifdef SW64 + static bool is_simm21(int64_t x) { return is_simm(x, 21); } +#endif static bool is_simm32(int64_t x) { return is_simm(x, 32); } // Test if x is within unsigned immediate range for width. diff --git a/src/hotspot/share/asm/codeBuffer.cpp b/src/hotspot/share/asm/codeBuffer.cpp index 0012152d48d..a71a9023f67 100644 --- a/src/hotspot/share/asm/codeBuffer.cpp +++ b/src/hotspot/share/asm/codeBuffer.cpp @@ -1165,7 +1165,11 @@ void CodeStrings::free() { const char* CodeStrings::add_string(const char * string) { check_valid(); +#ifdef SW64 + CodeString* s = new CodeString(string, 0); +#else CodeString* s = new CodeString(string); +#endif s->set_next(_strings); if (_strings == NULL) { _strings_last = s; diff --git a/src/hotspot/share/gc/g1/g1BlockOffsetTable.inline.hpp b/src/hotspot/share/gc/g1/g1BlockOffsetTable.inline.hpp index 74b0cb2818e..b2afceb5a0d 100644 --- a/src/hotspot/share/gc/g1/g1BlockOffsetTable.inline.hpp +++ b/src/hotspot/share/gc/g1/g1BlockOffsetTable.inline.hpp @@ -76,8 +76,15 @@ void G1BlockOffsetTable::set_offset_array(size_t left, size_t right, u_char offs check_index(right, "right index out of range"); assert(left <= right, "indexes out of order"); size_t num_cards = right - left + 1; - memset_with_concurrent_readers - (const_cast (&_offset_array[left]), offset, num_cards); +#ifndef SW64 + memset_with_concurrent_readers(const_cast (&_offset_array[left]), offset, num_cards); +#else + size_t i = left; + const size_t end = i + num_cards; + for (; i < end; i++) { + _offset_array[i] = offset; + } +#endif } // Variant of index_for that does not check the index for validity. diff --git a/src/hotspot/share/gc/g1/g1CardTable.cpp b/src/hotspot/share/gc/g1/g1CardTable.cpp index 9e565c1ba64..f55e0416d9c 100644 --- a/src/hotspot/share/gc/g1/g1CardTable.cpp +++ b/src/hotspot/share/gc/g1/g1CardTable.cpp @@ -32,7 +32,13 @@ void G1CardTable::g1_mark_as_young(const MemRegion& mr) { CardValue *const first = byte_for(mr.start()); CardValue *const last = byte_after(mr.last()); +#ifdef SW64 + for (CardValue* i = first; i < last; i++) { + *i = g1_young_gen; + } +#else memset_with_concurrent_readers(first, g1_young_gen, last - first); +#endif } #ifndef PRODUCT diff --git a/src/hotspot/share/interpreter/abstractInterpreter.cpp b/src/hotspot/share/interpreter/abstractInterpreter.cpp index 46ffb3705a9..beb6285932e 100644 --- a/src/hotspot/share/interpreter/abstractInterpreter.cpp +++ b/src/hotspot/share/interpreter/abstractInterpreter.cpp @@ -143,8 +143,8 @@ AbstractInterpreter::MethodKind AbstractInterpreter::method_kind(const methodHan case vmIntrinsics::_dlog10: return java_lang_math_log10; case vmIntrinsics::_dpow: return java_lang_math_pow; case vmIntrinsics::_dexp: return java_lang_math_exp; - case vmIntrinsics::_fmaD: return java_lang_math_fmaD; - case vmIntrinsics::_fmaF: return java_lang_math_fmaF; +// case vmIntrinsics::_fmaD: return java_lang_math_fmaD; +// case vmIntrinsics::_fmaF: return java_lang_math_fmaF; case vmIntrinsics::_Reference_get: return java_lang_ref_reference_get; case vmIntrinsics::_dsqrt: // _dsqrt will be selected for both Math::sqrt and StrictMath::sqrt, but the latter @@ -268,8 +268,8 @@ void AbstractInterpreter::print_method_kind(MethodKind kind) { case java_lang_math_sqrt : tty->print("java_lang_math_sqrt" ); break; case java_lang_math_log : tty->print("java_lang_math_log" ); break; case java_lang_math_log10 : tty->print("java_lang_math_log10" ); break; - case java_lang_math_fmaD : tty->print("java_lang_math_fmaD" ); break; - case java_lang_math_fmaF : tty->print("java_lang_math_fmaF" ); break; +// case java_lang_math_fmaD : tty->print("java_lang_math_fmaD" ); break; +// case java_lang_math_fmaF : tty->print("java_lang_math_fmaF" ); break; case java_util_zip_CRC32_update : tty->print("java_util_zip_CRC32_update"); break; case java_util_zip_CRC32_updateBytes : tty->print("java_util_zip_CRC32_updateBytes"); break; case java_util_zip_CRC32_updateByteBuffer : tty->print("java_util_zip_CRC32_updateByteBuffer"); break; diff --git a/src/hotspot/share/interpreter/abstractInterpreter.hpp b/src/hotspot/share/interpreter/abstractInterpreter.hpp index 2fb5b97e808..d240a6ed456 100644 --- a/src/hotspot/share/interpreter/abstractInterpreter.hpp +++ b/src/hotspot/share/interpreter/abstractInterpreter.hpp @@ -242,7 +242,7 @@ class AbstractInterpreter: AllStatic { return stackElementWords * i; } -#if !defined(ZERO) && (defined(IA32) || defined(AMD64)) +#if !defined(ZERO) && (defined(IA32) || defined(AMD64)) || defined(SW64) static Address::ScaleFactor stackElementScale() { return NOT_LP64(Address::times_4) LP64_ONLY(Address::times_8); } diff --git a/src/hotspot/share/interpreter/interpreterRuntime.cpp b/src/hotspot/share/interpreter/interpreterRuntime.cpp index d66ed24d862..13a29da670b 100644 --- a/src/hotspot/share/interpreter/interpreterRuntime.cpp +++ b/src/hotspot/share/interpreter/interpreterRuntime.cpp @@ -1459,7 +1459,7 @@ JRT_ENTRY(void, InterpreterRuntime::prepare_native_call(JavaThread* current, Met // preparing the same method will be sure to see non-null entry & mirror. JRT_END -#if defined(IA32) || defined(AMD64) || defined(ARM) +#if defined(IA32) || defined(AMD64) || defined(ARM) || defined(SW64) JRT_LEAF(void, InterpreterRuntime::popframe_move_outgoing_args(JavaThread* current, void* src_address, void* dest_address)) if (src_address == dest_address) { return; diff --git a/src/hotspot/share/interpreter/interpreterRuntime.hpp b/src/hotspot/share/interpreter/interpreterRuntime.hpp index c32431784aa..42eb87f4038 100644 --- a/src/hotspot/share/interpreter/interpreterRuntime.hpp +++ b/src/hotspot/share/interpreter/interpreterRuntime.hpp @@ -135,7 +135,7 @@ class InterpreterRuntime: AllStatic { Method* method, intptr_t* from, intptr_t* to); -#if defined(IA32) || defined(AMD64) || defined(ARM) +#if defined(IA32) || defined(AMD64) || defined(ARM) || defined(SW64) // Popframe support (only needed on x86, AMD64 and ARM) static void popframe_move_outgoing_args(JavaThread* current, void* src_address, void* dest_address); #endif diff --git a/src/hotspot/share/interpreter/templateInterpreterGenerator.cpp b/src/hotspot/share/interpreter/templateInterpreterGenerator.cpp index 8637da2e32d..931b24391a7 100644 --- a/src/hotspot/share/interpreter/templateInterpreterGenerator.cpp +++ b/src/hotspot/share/interpreter/templateInterpreterGenerator.cpp @@ -198,8 +198,8 @@ void TemplateInterpreterGenerator::generate_all() { method_entry(java_lang_math_log10) method_entry(java_lang_math_exp ) method_entry(java_lang_math_pow ) - method_entry(java_lang_math_fmaF ) - method_entry(java_lang_math_fmaD ) +// method_entry(java_lang_math_fmaF ) +// method_entry(java_lang_math_fmaD ) method_entry(java_lang_ref_reference_get) AbstractInterpreter::initialize_method_handle_entries(); diff --git a/src/hotspot/share/interpreter/templateInterpreterGenerator.hpp b/src/hotspot/share/interpreter/templateInterpreterGenerator.hpp index 4e167ff451a..7486225ecfb 100644 --- a/src/hotspot/share/interpreter/templateInterpreterGenerator.hpp +++ b/src/hotspot/share/interpreter/templateInterpreterGenerator.hpp @@ -122,6 +122,9 @@ class TemplateInterpreterGenerator: public AbstractInterpreterGenerator { public: TemplateInterpreterGenerator(StubQueue* _code); }; +#ifdef SW64 +void generate_transcendental_entry(AbstractInterpreter::MethodKind kind, int fpargs); +#endif // SW64 #endif // !ZERO diff --git a/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp b/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp index 597ddb3800f..249fccfb37d 100644 --- a/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp +++ b/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp @@ -102,7 +102,7 @@ inline T JfrBigEndian::read_unaligned(const address location) { inline bool JfrBigEndian::platform_supports_unaligned_reads(void) { #if defined(IA32) || defined(AMD64) || defined(PPC) || defined(S390) return true; -#elif defined(ARM) || defined(AARCH64) || defined(RISCV) +#elif defined(ARM) || defined(AARCH64) || defined(RISCV) || defined(SW64) return false; #else #warning "Unconfigured platform" diff --git a/src/hotspot/share/memory/metaspace.cpp b/src/hotspot/share/memory/metaspace.cpp index d1882c70e2c..a671438d23a 100644 --- a/src/hotspot/share/memory/metaspace.cpp +++ b/src/hotspot/share/memory/metaspace.cpp @@ -581,7 +581,7 @@ bool Metaspace::class_space_is_initialized() { // On error, returns an unreserved space. ReservedSpace Metaspace::reserve_address_space_for_compressed_classes(size_t size) { -#if defined(AARCH64) || defined(PPC64) +#if defined(AARCH64) || defined(PPC64) || defined(SW64) const size_t alignment = Metaspace::reserve_alignment(); // AArch64: Try to align metaspace class space so that we can decode a @@ -645,14 +645,14 @@ ReservedSpace Metaspace::reserve_address_space_for_compressed_classes(size_t siz } #endif // defined(AARCH64) || defined(PPC64) -#ifdef AARCH64 +#if defined(AARCH64) || defined(SW64) // Note: on AARCH64, if the code above does not find any good placement, we // have no recourse. We return an empty space and the VM will exit. return ReservedSpace(); #else // Default implementation: Just reserve anywhere. return ReservedSpace(size, Metaspace::reserve_alignment(), os::vm_page_size(), (char*)NULL); -#endif // AARCH64 +#endif // AARCH64 or SW64 } #endif // _LP64 diff --git a/src/hotspot/share/oops/method.cpp b/src/hotspot/share/oops/method.cpp index cce34ff7409..765c95db813 100644 --- a/src/hotspot/share/oops/method.cpp +++ b/src/hotspot/share/oops/method.cpp @@ -1582,6 +1582,11 @@ vmSymbolID Method::klass_id_for_intrinsics(const Klass* holder) { // exception: the AES intrinsics come from lib/ext/sunjce_provider.jar // which does not use the class default class loader so we check for its loader here const InstanceKlass* ik = InstanceKlass::cast(holder); +#ifndef SW64 + if ((ik->class_loader() != NULL) && !SystemDictionary::is_platform_class_loader(ik->class_loader())) { + return vmSymbolID::NO_SID; // regardless of name, no intrinsics here + } +#endif if ((ik->class_loader() != NULL) && !SystemDictionary::is_platform_class_loader(ik->class_loader())) { return vmSymbolID::NO_SID; // regardless of name, no intrinsics here } diff --git a/src/hotspot/share/runtime/abstract_vm_version.cpp b/src/hotspot/share/runtime/abstract_vm_version.cpp index cd8e239f023..6f3ed9b1e5a 100644 --- a/src/hotspot/share/runtime/abstract_vm_version.cpp +++ b/src/hotspot/share/runtime/abstract_vm_version.cpp @@ -180,6 +180,7 @@ const char* Abstract_VM_Version::jre_release_version() { #endif // PPC64 #else #define CPU AARCH64_ONLY("aarch64") \ + SW64_ONLY("sw64") \ AMD64_ONLY("amd64") \ IA32_ONLY("x86") \ IA64_ONLY("ia64") \ diff --git a/src/hotspot/share/runtime/globals.hpp b/src/hotspot/share/runtime/globals.hpp index aac0dc88482..c78a510dcf6 100644 --- a/src/hotspot/share/runtime/globals.hpp +++ b/src/hotspot/share/runtime/globals.hpp @@ -1237,7 +1237,7 @@ const intx ObjectAlignmentInBytes = 8; develop(bool, VerifyOops, false, \ "Do plausibility checks for oops") \ \ - develop(bool, CheckUnhandledOops, false, \ + product(bool, CheckUnhandledOops, false, \ "Check for unhandled oops in VM code") \ \ develop(bool, VerifyJNIFields, trueInDebug, \ diff --git a/src/hotspot/share/runtime/safepointMechanism.cpp b/src/hotspot/share/runtime/safepointMechanism.cpp index 20e163a7f6c..fe65acffed6 100644 --- a/src/hotspot/share/runtime/safepointMechanism.cpp +++ b/src/hotspot/share/runtime/safepointMechanism.cpp @@ -32,6 +32,7 @@ #include "runtime/stackWatermarkSet.hpp" #include "services/memTracker.hpp" #include "utilities/globalDefinitions.hpp" +#include uintptr_t SafepointMechanism::_poll_word_armed_value; uintptr_t SafepointMechanism::_poll_word_disarmed_value; diff --git a/src/hotspot/share/runtime/sharedRuntime.cpp b/src/hotspot/share/runtime/sharedRuntime.cpp index 9af4b513a99..fdae33dd8df 100644 --- a/src/hotspot/share/runtime/sharedRuntime.cpp +++ b/src/hotspot/share/runtime/sharedRuntime.cpp @@ -214,6 +214,25 @@ JRT_LEAF(jlong, SharedRuntime::lmul(jlong y, jlong x)) return x * y; JRT_END +#ifdef SW64 +JRT_LEAF(jint, SharedRuntime::sdiv(jint y, jint x)) + if (x == min_jint && y == CONST64(-1)) { + return x; + } else { + return x / y; + } +JRT_END + + +JRT_LEAF(jint, SharedRuntime::srem(jint y, jint x)) + if (x == min_jint && y == CONST64(-1)) { + return 0; + } else { + return x % y; + } +JRT_END +#endif + JRT_LEAF(jlong, SharedRuntime::ldiv(jlong y, jlong x)) if (x == min_jlong && y == CONST64(-1)) { @@ -2662,7 +2681,7 @@ AdapterHandlerEntry* AdapterHandlerLibrary::_int_arg_handler = NULL; AdapterHandlerEntry* AdapterHandlerLibrary::_obj_arg_handler = NULL; AdapterHandlerEntry* AdapterHandlerLibrary::_obj_int_arg_handler = NULL; AdapterHandlerEntry* AdapterHandlerLibrary::_obj_obj_arg_handler = NULL; -const int AdapterHandlerLibrary_size = 16*K; +const int AdapterHandlerLibrary_size = NOT_SW64(16*K)SW64_ONLY(46*K); BufferBlob* AdapterHandlerLibrary::_buffer = NULL; BufferBlob* AdapterHandlerLibrary::buffer_blob() { diff --git a/src/hotspot/share/runtime/sharedRuntime.hpp b/src/hotspot/share/runtime/sharedRuntime.hpp index 8912a7e8bd0..fa10336ff57 100644 --- a/src/hotspot/share/runtime/sharedRuntime.hpp +++ b/src/hotspot/share/runtime/sharedRuntime.hpp @@ -94,6 +94,12 @@ class SharedRuntime: AllStatic { // not have machine instructions to implement their functionality. // Do not remove these. +#ifdef SW64 + static jint sdiv(jint y, jint x); + static jint srem(jint y, jint x); + static unsigned int updateBytesCRC32(unsigned long crc, const unsigned char *buf_bytes, unsigned int len_ints); +#endif + // long arithmetics static jlong lmul(jlong y, jlong x); static jlong ldiv(jlong y, jlong x); diff --git a/src/hotspot/share/runtime/sharedRuntimeTrig.cpp b/src/hotspot/share/runtime/sharedRuntimeTrig.cpp index 6e3aa30b0b9..497b9f05051 100644 --- a/src/hotspot/share/runtime/sharedRuntimeTrig.cpp +++ b/src/hotspot/share/runtime/sharedRuntimeTrig.cpp @@ -508,25 +508,25 @@ static int __ieee754_rem_pio2(double x, double *y) { */ static const double -S1 = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */ -S2 = 8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */ -S3 = -1.98412698298579493134e-04, /* 0xBF2A01A0, 0x19C161D5 */ -S4 = 2.75573137070700676789e-06, /* 0x3EC71DE3, 0x57B1FE7D */ -S5 = -2.50507602534068634195e-08, /* 0xBE5AE5E6, 0x8A2B9CEB */ -S6 = 1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */ + SS1 = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */ +SS2 = 8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */ +SS3 = -1.98412698298579493134e-04, /* 0xBF2A01A0, 0x19C161D5 */ +SS4 = 2.75573137070700676789e-06, /* 0x3EC71DE3, 0x57B1FE7D */ +SS5 = -2.50507602534068634195e-08, /* 0xBE5AE5E6, 0x8A2B9CEB */ +SS6 = 1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */ static double __kernel_sin(double x, double y, int iy) { - double z,r,v; - int ix; - ix = high(x)&0x7fffffff; /* high word of x */ - if(ix<0x3e400000) /* |x| < 2**-27 */ - {if((int)x==0) return x;} /* generate inexact */ - z = x*x; - v = z*x; - r = S2+z*(S3+z*(S4+z*(S5+z*S6))); - if(iy==0) return x+v*(S1+z*r); - else return x-((z*(half*y-v*r)-y)-v*S1); + double z,r,v; + int ix; + ix = high(x)&0x7fffffff; /* high word of x */ + if(ix<0x3e400000) /* |x| < 2**-27 */ + {if((int)x==0) return x;} /* generate inexact */ + z = x*x; + v = z*x; + r = SS2+z*(SS3+z*(SS4+z*(SS5+z*SS6))); + if(iy==0) return x+v*(SS1+z*r); + else return x-((z*(half*y-v*r)-y)-v*SS1); } /* diff --git a/src/hotspot/share/runtime/stubRoutines.cpp b/src/hotspot/share/runtime/stubRoutines.cpp index e1b8a4e6099..13df836127b 100644 --- a/src/hotspot/share/runtime/stubRoutines.cpp +++ b/src/hotspot/share/runtime/stubRoutines.cpp @@ -324,9 +324,9 @@ void StubRoutines::initialize2() { } \ } \ - TEST_FILL(jbyte); - TEST_FILL(jshort); - TEST_FILL(jint); +// TEST_FILL(jbyte); +// TEST_FILL(jshort); +// TEST_FILL(jint); #undef TEST_FILL diff --git a/src/hotspot/share/runtime/thread.cpp b/src/hotspot/share/runtime/thread.cpp index 59f1475844a..3b3316d0843 100644 --- a/src/hotspot/share/runtime/thread.cpp +++ b/src/hotspot/share/runtime/thread.cpp @@ -2909,6 +2909,7 @@ jint Threads::create_vm(JavaVMInitArgs* args, bool* canTryAgain) { return status; } + JFR_ONLY(Jfr::on_create_vm_1();) // Should be done after the heap is fully created diff --git a/src/hotspot/share/utilities/macros.hpp b/src/hotspot/share/utilities/macros.hpp index 33ecfe089f8..0e785115538 100644 --- a/src/hotspot/share/utilities/macros.hpp +++ b/src/hotspot/share/utilities/macros.hpp @@ -579,6 +579,14 @@ #define NOT_RISCV64(code) code #endif +#ifdef SW64 +#define SW64_ONLY(code) code +#define NOT_SW64(code) +#else +#define SW64_ONLY(code) +#define NOT_SW64(code) code +#endif + #ifdef VM_LITTLE_ENDIAN #define LITTLE_ENDIAN_ONLY(code) code #define BIG_ENDIAN_ONLY(code) diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.cpp b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.cpp index 9accba375a2..e5145646026 100644 --- a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.cpp +++ b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.cpp @@ -64,6 +64,10 @@ #include "sun_jvm_hotspot_debugger_riscv64_RISCV64ThreadContext.h" #endif +#ifdef sw64 +#include "sun_jvm_hotspot_debugger_sw64_SW64ThreadContext.h" +#endif + class AutoJavaString { JNIEnv* m_env; jstring m_str; @@ -412,7 +416,7 @@ JNIEXPORT jbyteArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo return (err == PS_OK)? array : 0; } -#if defined(i586) || defined(amd64) || defined(ppc64) || defined(ppc64le) || defined(aarch64) || defined(riscv64) +#if defined(i586) || defined(amd64) || defined(ppc64) || defined(ppc64le) || defined(aarch64) || defined(riscv64) || defined(sw64) extern "C" JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLocal_getThreadIntegerRegisterSet0 (JNIEnv *env, jobject this_obj, jint lwp_id) { @@ -449,6 +453,9 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo #endif #if defined(ppc64) || defined(ppc64le) #define NPRGREG sun_jvm_hotspot_debugger_ppc64_PPC64ThreadContext_NPRGREG +#endif +#ifdef sw64 +#define NPRGREG sun_jvm_hotspot_debugger_sw64_SW64ThreadContext_NPRGREG #endif @@ -601,6 +608,20 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo #endif +#ifdef sw64 + +#define REG_INDEX(reg) sun_jvm_hotspot_debugger_sw64_SW64ThreadContext_##reg + + { + int i; + for (i = 0; i < 31; i++) + regs[i] = gregs.regs[i]; + regs[REG_INDEX(PC)] = gregs.pc; + regs[REG_INDEX(PSTATE)] = gregs.pstate; + } + +#endif /* sw64 */ + env->ReleaseLongArrayElements(array, regs, JNI_COMMIT); return array; } diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h b/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h index a69496e77a4..e29f5796509 100644 --- a/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h +++ b/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h @@ -46,6 +46,10 @@ #elif defined(riscv64) #include #endif +#if defined(sw64) + #include + #define user_regs_struct user_pt_regs +#endif // This C bool type must be int for compatibility with Linux calls and // it would be a mistake to equivalence it to C++ bool on many platforms diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c index 3068f475626..05b7ac68267 100644 --- a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c +++ b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c @@ -133,6 +133,9 @@ static bool process_get_lwp_regs(struct ps_prochandle* ph, pid_t pid, struct use #elif defined(PT_GETREGS) #define PTRACE_GETREGS_REQ PT_GETREGS #endif +#if defined(sw64) + #undef PTRACE_GETREGS_REQ +#endif #if defined(PTRACE_GETREGSET) struct iovec iov; diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HSDB.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HSDB.java index 02b66512353..14e5379ede1 100644 --- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HSDB.java +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HSDB.java @@ -998,7 +998,7 @@ public class HSDB implements ObjectHistogramPanel.Listener, SAListener { curFrame.getFP(), anno)); } else { - // For C2, which has null frame pointers on x86/amd64/aarch64 + // For C2, which has null frame pointers on x86/amd64/aarch64/sw64 CodeBlob cb = VM.getVM().getCodeCache().findBlob(curFrame.getPC()); Address sp = curFrame.getSP(); if (Assert.ASSERTS_ENABLED) { diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java index e0e9b4b6727..e0ef8e263ed 100644 --- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java @@ -37,6 +37,7 @@ import sun.jvm.hotspot.debugger.MachineDescriptionAMD64; import sun.jvm.hotspot.debugger.MachineDescriptionPPC64; import sun.jvm.hotspot.debugger.MachineDescriptionAArch64; import sun.jvm.hotspot.debugger.MachineDescriptionRISCV64; +import sun.jvm.hotspot.debugger.MachineDescriptionSW64; import sun.jvm.hotspot.debugger.MachineDescriptionIntelX86; import sun.jvm.hotspot.debugger.NoSuchSymbolException; import sun.jvm.hotspot.debugger.bsd.BsdDebuggerLocal; @@ -536,7 +537,10 @@ public class HotSpotAgent { machDesc = new MachineDescriptionAMD64(); } else if (cpu.equals("aarch64")) { machDesc = new MachineDescriptionAArch64(); - } else { + } else if (cpu.equals("sw64")) { + machDesc = new MachineDescriptionSW64(); + } + else { throw new DebuggerException("Win32 supported under x86, amd64 and aarch64 only"); } @@ -572,6 +576,8 @@ public class HotSpotAgent { machDesc = new MachineDescriptionAArch64(); } else if (cpu.equals("riscv64")) { machDesc = new MachineDescriptionRISCV64(); + } else if (cpu.equals("sw64") || cpu.equals("sw_64")) { + machDesc = new MachineDescriptionSW64(); } else { try { machDesc = (MachineDescription) diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionSW64.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionSW64.java new file mode 100644 index 00000000000..cc50412e89b --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionSW64.java @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger; + +public class MachineDescriptionSW64 extends MachineDescriptionTwosComplement implements MachineDescription { + public long getAddressSize() { + return 8; + } + + public boolean isLP64() { + return true; + } + + public boolean isBigEndian() { + return false; + } +} \ No newline at end of file diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java index 469bb6e0665..d1cbfafc234 100644 --- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java @@ -35,11 +35,13 @@ import sun.jvm.hotspot.debugger.amd64.*; import sun.jvm.hotspot.debugger.aarch64.*; import sun.jvm.hotspot.debugger.riscv64.*; import sun.jvm.hotspot.debugger.ppc64.*; +import sun.jvm.hotspot.debugger.sw64.*; import sun.jvm.hotspot.debugger.linux.x86.*; import sun.jvm.hotspot.debugger.linux.amd64.*; import sun.jvm.hotspot.debugger.linux.ppc64.*; import sun.jvm.hotspot.debugger.linux.aarch64.*; import sun.jvm.hotspot.debugger.linux.riscv64.*; +import sun.jvm.hotspot.debugger.linux.sw64.*; import sun.jvm.hotspot.utilities.*; class LinuxCDebugger implements CDebugger { @@ -114,6 +116,13 @@ class LinuxCDebugger implements CDebugger { Address pc = context.getRegisterAsAddress(RISCV64ThreadContext.PC); if (pc == null) return null; return new LinuxRISCV64CFrame(dbg, fp, pc); + } else if (cpu.equals("sw64")) { + SW64ThreadContext context = (SW64ThreadContext) thread.getContext(); + Address fp = context.getRegisterAsAddress(SW64ThreadContext.FP); + if (fp == null) return null; + Address pc = context.getRegisterAsAddress(SW64ThreadContext.PC); + if (pc == null) return null; + return new LinuxSW64CFrame(dbg, fp, pc); } else { // Runtime exception thrown by LinuxThreadContextFactory if unknown cpu ThreadContext context = (ThreadContext) thread.getContext(); diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThreadContextFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThreadContextFactory.java index 69a34fe2afa..d09e5de5479 100644 --- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThreadContextFactory.java +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThreadContextFactory.java @@ -29,16 +29,19 @@ import sun.jvm.hotspot.debugger.*; import sun.jvm.hotspot.debugger.linux.amd64.*; import sun.jvm.hotspot.debugger.linux.x86.*; import sun.jvm.hotspot.debugger.linux.ppc64.*; +import sun.jvm.hotspot.debugger.linux.sw64.*; class LinuxThreadContextFactory { static ThreadContext createThreadContext(LinuxDebugger dbg) { String cpu = dbg.getCPU(); if (cpu.equals("x86")) { - return new LinuxX86ThreadContext(dbg); + return new LinuxX86ThreadContext(dbg); } else if (cpu.equals("amd64")) { - return new LinuxAMD64ThreadContext(dbg); - } else if (cpu.equals("ppc64")) { + return new LinuxAMD64ThreadContext(dbg); + } else if (cpu.equals("ppc64")) { return new LinuxPPC64ThreadContext(dbg); + } else if (cpu.equals("sw64") || cpu.equals("sw_64")) { + return new LinuxSW64ThreadContext(dbg); } else { try { Class tcc = Class.forName("sun.jvm.hotspot.debugger.linux." + diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/sw64/LinuxSW64CFrame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/sw64/LinuxSW64CFrame.java new file mode 100644 index 00000000000..c629e4207cd --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/sw64/LinuxSW64CFrame.java @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.linux.sw64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.sw64.*; +import sun.jvm.hotspot.debugger.linux.*; +import sun.jvm.hotspot.debugger.cdbg.*; +import sun.jvm.hotspot.debugger.cdbg.basic.*; + +final public class LinuxSW64CFrame extends BasicCFrame { + public LinuxSW64CFrame(LinuxDebugger dbg, Address fp, Address pc) { + super(dbg.getCDebugger()); + this.fp = fp; + this.pc = pc; + this.dbg = dbg; + } + + // override base class impl to avoid ELF parsing + public ClosestSymbol closestSymbolToPC() { + // try native lookup in debugger. + return dbg.lookup(dbg.getAddressValue(pc())); + } + + public Address pc() { + return pc; + } + + public Address localVariableBase() { + return fp; + } + + public CFrame sender(ThreadProxy thread) { + SW64ThreadContext context = (SW64ThreadContext) thread.getContext(); + Address rsp = context.getRegisterAsAddress(SW64ThreadContext.SP); + + if ((fp == null) || fp.lessThan(rsp)) { + return null; + } + + // Check alignment of fp + if (dbg.getAddressValue(fp) % (2 * ADDRESS_SIZE) != 0) { + return null; + } + + Address nextFP = fp.getAddressAt(0 * ADDRESS_SIZE); + if (nextFP == null || nextFP.lessThanOrEqual(fp)) { + return null; + } + Address nextPC = fp.getAddressAt(1 * ADDRESS_SIZE); + if (nextPC == null) { + return null; + } + return new LinuxSW64CFrame(dbg, nextFP, nextPC); + } + + // package/class internals only + private static final int ADDRESS_SIZE = 8; + private Address pc; + private Address sp; + private Address fp; + private LinuxDebugger dbg; +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/sw64/LinuxSW64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/sw64/LinuxSW64ThreadContext.java new file mode 100644 index 00000000000..842e294cacb --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/sw64/LinuxSW64ThreadContext.java @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.linux.sw64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.sw64.*; +import sun.jvm.hotspot.debugger.linux.*; + +public class LinuxSW64ThreadContext extends SW64ThreadContext { + private LinuxDebugger debugger; + + public LinuxSW64ThreadContext(LinuxDebugger debugger) { + super(); + this.debugger = debugger; + } + + public void setRegisterAsAddress(int index, Address value) { + setRegister(index, debugger.getAddressValue(value)); + } + + public Address getRegisterAsAddress(int index) { + return debugger.newAddress(getRegister(index)); + } +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/sw64/ProcSW64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/sw64/ProcSW64Thread.java new file mode 100644 index 00000000000..a5d6ddfeba4 --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/sw64/ProcSW64Thread.java @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.proc.sw64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.sw64.*; +import sun.jvm.hotspot.debugger.proc.*; +import sun.jvm.hotspot.utilities.*; + +public class ProcSW64Thread implements ThreadProxy { + private ProcDebugger debugger; + private int id; + + public ProcSW64Thread(ProcDebugger debugger, Address addr) { + this.debugger = debugger; + + // FIXME: the size here should be configurable. However, making it + // so would produce a dependency on the "types" package from the + // debugger package, which is not desired. + this.id = (int) addr.getCIntegerAt(0, 4, true); + } + + public ProcSW64Thread(ProcDebugger debugger, long id) { + this.debugger = debugger; + this.id = (int) id; + } + + public ThreadContext getContext() throws IllegalThreadStateException { + ProcSW64ThreadContext context = new ProcSW64ThreadContext(debugger); + long[] regs = debugger.getThreadIntegerRegisterSet(id); + if (Assert.ASSERTS_ENABLED) { + Assert.that(regs.length == SW64ThreadContext.NPRGREG, "size mismatch"); + } + for (int i = 0; i < regs.length; i++) { + context.setRegister(i, regs[i]); + } + return context; + } + + public boolean canSetContext() throws DebuggerException { + return false; + } + + public void setContext(ThreadContext context) + throws IllegalThreadStateException, DebuggerException { + throw new DebuggerException("Unimplemented"); + } + + public String toString() { + return "t@" + id; + } + + public boolean equals(Object obj) { + if ((obj == null) || !(obj instanceof ProcSW64Thread)) { + return false; + } + + return (((ProcSW64Thread) obj).id == id); + } + + public int hashCode() { + return id; + } +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/sw64/ProcSW64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/sw64/ProcSW64ThreadContext.java new file mode 100644 index 00000000000..f8def11beb7 --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/sw64/ProcSW64ThreadContext.java @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.proc.sw64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.sw64.*; +import sun.jvm.hotspot.debugger.proc.*; + +public class ProcSW64ThreadContext extends SW64ThreadContext { + private ProcDebugger debugger; + + public ProcSW64ThreadContext(ProcDebugger debugger) { + super(); + this.debugger = debugger; + } + + public void setRegisterAsAddress(int index, Address value) { + setRegister(index, debugger.getAddressValue(value)); + } + + public Address getRegisterAsAddress(int index) { + return debugger.newAddress(getRegister(index)); + } +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/sw64/ProcSW64ThreadFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/sw64/ProcSW64ThreadFactory.java new file mode 100644 index 00000000000..9908d21dbec --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/sw64/ProcSW64ThreadFactory.java @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.proc.sw64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.proc.*; + +public class ProcSW64ThreadFactory implements ProcThreadFactory { + private ProcDebugger debugger; + + public ProcSW64ThreadFactory(ProcDebugger debugger) { + this.debugger = debugger; + } + + public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) { + return new ProcSW64Thread(debugger, threadIdentifierAddr); + } + + public ThreadProxy createThreadWrapper(long id) { + return new ProcSW64Thread(debugger, id); + } +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerClient.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerClient.java index 2bd396c8f4f..5585affc385 100644 --- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerClient.java +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerClient.java @@ -33,6 +33,7 @@ import sun.jvm.hotspot.debugger.cdbg.*; import sun.jvm.hotspot.debugger.remote.x86.*; import sun.jvm.hotspot.debugger.remote.amd64.*; import sun.jvm.hotspot.debugger.remote.ppc64.*; +import sun.jvm.hotspot.debugger.remote.sw64.*; /** An implementation of Debugger which wraps a RemoteDebugger, providing remote debugging via RMI. diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/sw64/RemoteSW64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/sw64/RemoteSW64Thread.java new file mode 100644 index 00000000000..a0f1e6fe72e --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/sw64/RemoteSW64Thread.java @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.remote.sw64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.sw64.*; +import sun.jvm.hotspot.debugger.remote.*; +import sun.jvm.hotspot.utilities.*; + +public class RemoteSW64Thread extends RemoteThread { + public RemoteSW64Thread(RemoteDebuggerClient debugger, Address addr) { + super(debugger, addr); + } + + public RemoteSW64Thread(RemoteDebuggerClient debugger, long id) { + super(debugger, id); + } + + public ThreadContext getContext() throws IllegalThreadStateException { + RemoteSW64ThreadContext context = new RemoteSW64ThreadContext(debugger); + long[] regs = (addr != null)? debugger.getThreadIntegerRegisterSet(addr) : + debugger.getThreadIntegerRegisterSet(id); + if (Assert.ASSERTS_ENABLED) { + Assert.that(regs.length == SW64ThreadContext.NPRGREG, "size of register set must match"); + } + for (int i = 0; i < regs.length; i++) { + context.setRegister(i, regs[i]); + } + return context; + } +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/sw64/RemoteSW64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/sw64/RemoteSW64ThreadContext.java new file mode 100644 index 00000000000..fcaf8d2597a --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/sw64/RemoteSW64ThreadContext.java @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.remote.sw64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.sw64.*; +import sun.jvm.hotspot.debugger.remote.*; + +public class RemoteSW64ThreadContext extends SW64ThreadContext { + private RemoteDebuggerClient debugger; + + public RemoteSW64ThreadContext(RemoteDebuggerClient debugger) { + super(); + this.debugger = debugger; + } + + public void setRegisterAsAddress(int index, Address value) { + setRegister(index, debugger.getAddressValue(value)); + } + + public Address getRegisterAsAddress(int index) { + return debugger.newAddress(getRegister(index)); + } +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/sw64/RemoteSW64ThreadFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/sw64/RemoteSW64ThreadFactory.java new file mode 100644 index 00000000000..618764c882a --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/sw64/RemoteSW64ThreadFactory.java @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.remote.sw64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.remote.*; + +public class RemoteSW64ThreadFactory implements RemoteThreadFactory { + private RemoteDebuggerClient debugger; + + public RemoteSW64ThreadFactory(RemoteDebuggerClient debugger) { + this.debugger = debugger; + } + + public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) { + return new RemoteSW64Thread(debugger, threadIdentifierAddr); + } + + public ThreadProxy createThreadWrapper(long id) { + return new RemoteSW64Thread(debugger, id); + } +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/sw64/SW64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/sw64/SW64ThreadContext.java new file mode 100644 index 00000000000..58c0f8447a0 --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/sw64/SW64ThreadContext.java @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.sw64; + +import java.lang.annotation.Native; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.cdbg.*; + +/** Specifies the thread context on sw64 platforms; only a sub-portion + * of the context is guaranteed to be present on all operating + * systems. */ + +public abstract class SW64ThreadContext implements ThreadContext { + // Taken from /usr/include/asm/sigcontext.h on Linux/SW64. + + // /* + // * Signal context structure - contains all info to do with the state + // * before the signal handler was invoked. + // */ + + // NOTE: the indices for the various registers must be maintained as + // listed across various operating systems. However, only a small + // subset of the registers' values are guaranteed to be present (and + // must be present for the SA's stack walking to work) + + // One instance of the Native annotation is enough to trigger header generation + // for this file. + @Native + public static final int V0 = 0; + public static final int T0 = 1; + public static final int T1 = 2; + public static final int T2 = 3; + public static final int T3 = 4; + public static final int T4 = 5; + public static final int T5 = 6; + public static final int T6 = 7; + public static final int T7 = 8; + public static final int S0 = 9; + public static final int S1 = 10; + public static final int S2 = 11; + public static final int S3 = 12; + public static final int S4 = 13; + public static final int S5 = 14; + public static final int FP = 15; + public static final int A0 = 16; + public static final int A1 = 17; + public static final int A2 = 18; + public static final int A3 = 19; + public static final int A4 = 20; + public static final int A5 = 21; + public static final int T8 = 22; + public static final int T9 = 23; + public static final int T10 = 24; + public static final int T11 = 25; + public static final int RA = 26; + public static final int T12 = 27; + public static final int AT = 28; + public static final int GP = 29; + public static final int SP = 30; + public static final int PC = 31; + public static final int PSTATE = 32; + + public static final int NPRGREG = 33; + + private static final String[] regNames = { + "V0", "T0", "T1", "T2", + "T3", "T4", "T5", "T6", + "T7", "S0", "S1", "S2", + "S3", "S4", "S5", "FP", + "A0", "A1", "A2", "A3", + "A4", "A5", "T8", "T9", + "T10", "T11", "RA", "T12", + "AT", "GP", "SP", "PC", + "PSTATE", + }; + + private long[] data; + + public SW64ThreadContext() { + data = new long[NPRGREG]; + } + + public int getNumRegisters() { + return NPRGREG; + } + + public String getRegisterName(int index) { + return regNames[index]; + } + + public void setRegister(int index, long value) { + data[index] = value; + } + + public long getRegister(int index) { + return data[index]; + } + + public CFrame getTopFrame(Debugger dbg) { + return null; + } + + /** This can't be implemented in this class since we would have to + * tie the implementation to, for example, the debugging system */ + public abstract void setRegisterAsAddress(int index, Address value); + + /** This can't be implemented in this class since we would have to + * tie the implementation to, for example, the debugging system */ + public abstract Address getRegisterAsAddress(int index); +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/windbg/WindbgDebuggerLocal.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/windbg/WindbgDebuggerLocal.java index 55de9fc57ef..ad2d88d27d5 100644 --- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/windbg/WindbgDebuggerLocal.java +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/windbg/WindbgDebuggerLocal.java @@ -30,9 +30,11 @@ import java.util.*; import sun.jvm.hotspot.debugger.*; import sun.jvm.hotspot.debugger.aarch64.*; import sun.jvm.hotspot.debugger.amd64.*; +import sun.jvm.hotspot.debugger.sw64.*; import sun.jvm.hotspot.debugger.x86.*; import sun.jvm.hotspot.debugger.windbg.aarch64.*; import sun.jvm.hotspot.debugger.windbg.amd64.*; +import sun.jvm.hotspot.debugger.windbg.sw64.*; import sun.jvm.hotspot.debugger.windbg.x86.*; import sun.jvm.hotspot.debugger.win32.coff.*; import sun.jvm.hotspot.debugger.cdbg.*; @@ -117,6 +119,8 @@ public class WindbgDebuggerLocal extends DebuggerBase implements WindbgDebugger threadFactory = new WindbgAMD64ThreadFactory(this); } else if (cpu.equals("aarch64")) { threadFactory = new WindbgAARCH64ThreadFactory(this); + } else if (cpu.equals("sw64")) { + threadFactory = new WindbgSW64ThreadFactory(this); } if (useCache) { diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/windbg/sw64/WindbgSW64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/windbg/sw64/WindbgSW64Thread.java new file mode 100644 index 00000000000..019234b88e8 --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/windbg/sw64/WindbgSW64Thread.java @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, Microsoft Corporation. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.windbg.sw64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.sw64.*; +import sun.jvm.hotspot.debugger.windbg.*; + +class WindbgSW64Thread implements ThreadProxy { + private WindbgDebugger debugger; + private long sysId; + private boolean gotID; + private long id; + + // The address argument must be the address of the OSThread::_thread_id + WindbgSW64Thread(WindbgDebugger debugger, Address addr) { + this.debugger = debugger; + this.sysId = (long)addr.getCIntegerAt(0, 4, true); + gotID = false; + } + + WindbgSW64Thread(WindbgDebugger debugger, long sysId) { + this.debugger = debugger; + this.sysId = sysId; + gotID = false; + } + + public ThreadContext getContext() throws IllegalThreadStateException { + long[] data = debugger.getThreadIntegerRegisterSet(getThreadID()); + WindbgSW64ThreadContext context = new WindbgSW64ThreadContext(debugger); + for (int i = 0; i < data.length; i++) { + context.setRegister(i, data[i]); + } + return context; + } + + public boolean canSetContext() throws DebuggerException { + return false; + } + + public void setContext(ThreadContext thrCtx) + throws IllegalThreadStateException, DebuggerException { + throw new DebuggerException("Unimplemented"); + } + + public boolean equals(Object obj) { + if ((obj == null) || !(obj instanceof WindbgSW64Thread)) { + return false; + } + + return (((WindbgSW64Thread) obj).getThreadID() == getThreadID()); + } + + public int hashCode() { + return Long.hashCode(getThreadID()); + } + + public String toString() { + return Long.toString(getThreadID()); + } + + /** Retrieves the thread ID of this thread by examining the Thread + Information Block. */ + private long getThreadID() { + if (!gotID) { + id = debugger.getThreadIdFromSysId(sysId); + } + + return id; + } +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/windbg/sw64/WindbgSW64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/windbg/sw64/WindbgSW64ThreadContext.java new file mode 100644 index 00000000000..f6fb9ca3b0b --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/windbg/sw64/WindbgSW64ThreadContext.java @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, Microsoft Corporation. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.windbg.sw64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.sw64.*; +import sun.jvm.hotspot.debugger.windbg.*; + +class WindbgSW64ThreadContext extends SW64ThreadContext { + private WindbgDebugger debugger; + + public WindbgSW64ThreadContext(WindbgDebugger debugger) { + super(); + this.debugger = debugger; + } + + public void setRegisterAsAddress(int index, Address value) { + setRegister(index, debugger.getAddressValue(value)); + } + + public Address getRegisterAsAddress(int index) { + return debugger.newAddress(getRegister(index)); + } +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/windbg/sw64/WindbgSW64ThreadFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/windbg/sw64/WindbgSW64ThreadFactory.java new file mode 100644 index 00000000000..c7dee2a7d2c --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/windbg/sw64/WindbgSW64ThreadFactory.java @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, Microsoft Corporation. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.windbg.sw64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.windbg.*; + +public class WindbgSW64ThreadFactory implements WindbgThreadFactory { + private WindbgDebugger debugger; + + public WindbgSW64ThreadFactory(WindbgDebugger debugger) { + this.debugger = debugger; + } + + public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) { + return new WindbgSW64Thread(debugger, threadIdentifierAddr); + } + + public ThreadProxy createThreadWrapper(long id) { + return new WindbgSW64Thread(debugger, id); + } +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java index d16ac8aae51..3b31b319303 100644 --- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java @@ -35,6 +35,7 @@ import sun.jvm.hotspot.runtime.linux_x86.LinuxX86JavaThreadPDAccess; import sun.jvm.hotspot.runtime.linux_amd64.LinuxAMD64JavaThreadPDAccess; import sun.jvm.hotspot.runtime.linux_aarch64.LinuxAARCH64JavaThreadPDAccess; import sun.jvm.hotspot.runtime.linux_riscv64.LinuxRISCV64JavaThreadPDAccess; +import sun.jvm.hotspot.runtime.linux_sw64.LinuxSW64JavaThreadPDAccess; import sun.jvm.hotspot.runtime.linux_ppc64.LinuxPPC64JavaThreadPDAccess; import sun.jvm.hotspot.runtime.bsd_x86.BsdX86JavaThreadPDAccess; import sun.jvm.hotspot.runtime.bsd_amd64.BsdAMD64JavaThreadPDAccess; @@ -116,6 +117,8 @@ public class Threads { access = new LinuxAARCH64JavaThreadPDAccess(); } else if (cpu.equals("riscv64")) { access = new LinuxRISCV64JavaThreadPDAccess(); + } else if (cpu.equals("sw64")) { + access = new LinuxSW64JavaThreadPDAccess(); } else { try { access = (JavaThreadPDAccess) diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_sw64/LinuxSW64JavaThreadPDAccess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_sw64/LinuxSW64JavaThreadPDAccess.java new file mode 100644 index 00000000000..cfe6ec60c93 --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_sw64/LinuxSW64JavaThreadPDAccess.java @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.runtime.linux_sw64; + +import java.io.*; +import java.util.*; +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.sw64.*; +import sun.jvm.hotspot.runtime.*; +import sun.jvm.hotspot.runtime.sw64.*; +import sun.jvm.hotspot.types.*; +import sun.jvm.hotspot.utilities.*; +import sun.jvm.hotspot.utilities.Observable; +import sun.jvm.hotspot.utilities.Observer; + +public class LinuxSW64JavaThreadPDAccess implements JavaThreadPDAccess { + private static AddressField lastJavaFPField; + private static AddressField osThreadField; + + // Field from OSThread + private static CIntegerField osThreadThreadIDField; + + // This is currently unneeded but is being kept in case we change + // the currentFrameGuess algorithm + private static final long GUESS_SCAN_RANGE = 128 * 1024; + + static { + VM.registerVMInitializedObserver(new Observer() { + public void update(Observable o, Object data) { + initialize(VM.getVM().getTypeDataBase()); + } + }); + } + + private static synchronized void initialize(TypeDataBase db) { + Type type = db.lookupType("JavaThread"); + osThreadField = type.getAddressField("_osthread"); + + Type anchorType = db.lookupType("JavaFrameAnchor"); + lastJavaFPField = anchorType.getAddressField("_last_Java_fp"); + + Type osThreadType = db.lookupType("OSThread"); + osThreadThreadIDField = osThreadType.getCIntegerField("_thread_id"); + } + + public Address getLastJavaFP(Address addr) { + return lastJavaFPField.getValue(addr.addOffsetTo(sun.jvm.hotspot.runtime.JavaThread.getAnchorField().getOffset())); + } + + public Address getLastJavaPC(Address addr) { + return null; + } + + public Address getBaseOfStackPointer(Address addr) { + return null; + } + + public Frame getLastFramePD(JavaThread thread, Address addr) { + Address fp = thread.getLastJavaFP(); + if (fp == null) { + return null; // no information + } + return new SW64Frame(thread.getLastJavaSP(), fp); + } + + public RegisterMap newRegisterMap(JavaThread thread, boolean updateMap) { + return new SW64RegisterMap(thread, updateMap); + } + + public Frame getCurrentFrameGuess(JavaThread thread, Address addr) { + ThreadProxy t = getThreadProxy(addr); + SW64ThreadContext context = (SW64ThreadContext) t.getContext(); + SW64CurrentFrameGuess guesser = new SW64CurrentFrameGuess(context, thread); + if (!guesser.run(GUESS_SCAN_RANGE)) { + return null; + } + if (guesser.getPC() == null) { + return new SW64Frame(guesser.getSP(), guesser.getFP()); + } else { + return new SW64Frame(guesser.getSP(), guesser.getFP(), guesser.getPC()); + } + } + + public void printThreadIDOn(Address addr, PrintStream tty) { + tty.print(getThreadProxy(addr)); + } + + public void printInfoOn(Address threadAddr, PrintStream tty) { + tty.print("Thread id: "); + printThreadIDOn(threadAddr, tty); +// tty.println("\nPostJavaState: " + getPostJavaState(threadAddr)); + } + + public Address getLastSP(Address addr) { + ThreadProxy t = getThreadProxy(addr); + SW64ThreadContext context = (SW64ThreadContext) t.getContext(); + return context.getRegisterAsAddress(SW64ThreadContext.SP); + } + + public ThreadProxy getThreadProxy(Address addr) { + // Addr is the address of the JavaThread. + // Fetch the OSThread (for now and for simplicity, not making a + // separate "OSThread" class in this package) + Address osThreadAddr = osThreadField.getValue(addr); + // Get the address of the _thread_id from the OSThread + Address threadIdAddr = osThreadAddr.addOffsetTo(osThreadThreadIDField.getOffset()); + + JVMDebugger debugger = VM.getVM().getDebugger(); + return debugger.getThreadForIdentifierAddress(threadIdAddr); + } +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/sw64/SW64CurrentFrameGuess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/sw64/SW64CurrentFrameGuess.java new file mode 100644 index 00000000000..c340a41e828 --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/sw64/SW64CurrentFrameGuess.java @@ -0,0 +1,250 @@ +/* + * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2019, Red Hat Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.runtime.sw64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.sw64.*; +import sun.jvm.hotspot.code.*; +import sun.jvm.hotspot.interpreter.*; +import sun.jvm.hotspot.runtime.*; +import sun.jvm.hotspot.runtime.sw64.*; + +/**

Should be able to be used on all sw64 platforms we support + (Linux/sw64) to implement JavaThread's "currentFrameGuess()" + functionality. Input is an SW64ThreadContext; output is SP, FP, + and PC for an SW64Frame. Instantiation of the SW64Frame is + left to the caller, since we may need to subclass SW64Frame to + support signal handler frames on Unix platforms.

+ +

Algorithm is to walk up the stack within a given range (say, + 512K at most) looking for a plausible PC and SP for a Java frame, + also considering those coming in from the context. If we find a PC + that belongs to the VM (i.e., in generated code like the + interpreter or CodeCache) then we try to find an associated FP. + We repeat this until we either find a complete frame or run out of + stack to look at.

*/ + +public class SW64CurrentFrameGuess { + private SW64ThreadContext context; + private JavaThread thread; + private Address spFound; + private Address fpFound; + private Address pcFound; + + private static final boolean DEBUG = System.getProperty("sun.jvm.hotspot.runtime.sw64.SW64Frame.DEBUG") + != null; + + public SW64CurrentFrameGuess(SW64ThreadContext context, + JavaThread thread) { + this.context = context; + this.thread = thread; + } + + /** Returns false if not able to find a frame within a reasonable range. */ + public boolean run(long regionInBytesToSearch) { + Address sp = context.getRegisterAsAddress(SW64ThreadContext.SP); + Address pc = context.getRegisterAsAddress(SW64ThreadContext.PC); + Address fp = context.getRegisterAsAddress(SW64ThreadContext.FP); + if (sp == null) { + // Bail out if no last java frame either + if (thread.getLastJavaSP() != null) { + setValues(thread.getLastJavaSP(), thread.getLastJavaFP(), null); + return true; + } + return false; + } + Address end = sp.addOffsetTo(regionInBytesToSearch); + VM vm = VM.getVM(); + + setValues(null, null, null); // Assume we're not going to find anything + + if (vm.isJavaPCDbg(pc)) { + if (vm.isClientCompiler()) { + // If the topmost frame is a Java frame, we are (pretty much) + // guaranteed to have a viable FP. We should be more robust + // than this (we have the potential for losing entire threads' + // stack traces) but need to see how much work we really have + // to do here. Searching the stack for an (SP, FP) pair is + // hard since it's easy to misinterpret inter-frame stack + // pointers as base-of-frame pointers; we also don't know the + // sizes of C1 frames (not registered in the nmethod) so can't + // derive them from SP. + + setValues(sp, fp, pc); + return true; + } else { + if (vm.getInterpreter().contains(pc)) { + if (DEBUG) { + System.out.println("CurrentFrameGuess: choosing interpreter frame: sp = " + + sp + ", fp = " + fp + ", pc = " + pc); + } + setValues(sp, fp, pc); + return true; + } + + // For the server compiler, FP is not guaranteed to be valid + // for compiled code. In addition, an earlier attempt at a + // non-searching algorithm (see below) failed because the + // stack pointer from the thread context was pointing + // (considerably) beyond the ostensible end of the stack, into + // garbage; walking from the topmost frame back caused a crash. + // + // This algorithm takes the current PC as a given and tries to + // find the correct corresponding SP by walking up the stack + // and repeatedly performing stackwalks (very inefficient). + // + // FIXME: there is something wrong with stackwalking across + // adapter frames...this is likely to be the root cause of the + // failure with the simpler algorithm below. + + for (long offset = 0; + offset < regionInBytesToSearch; + offset += vm.getAddressSize()) { + try { + Address curSP = sp.addOffsetTo(offset); + Frame frame = new SW64Frame(curSP, null, pc); + RegisterMap map = thread.newRegisterMap(false); + while (frame != null) { + if (frame.isEntryFrame() && frame.entryFrameIsFirst()) { + // We were able to traverse all the way to the + // bottommost Java frame. + // This sp looks good. Keep it. + if (DEBUG) { + System.out.println("CurrentFrameGuess: Choosing sp = " + curSP + ", pc = " + pc); + } + setValues(curSP, null, pc); + return true; + } + frame = frame.sender(map); + } + } catch (Exception e) { + if (DEBUG) { + System.out.println("CurrentFrameGuess: Exception " + e + " at offset " + offset); + } + // Bad SP. Try another. + } + } + + // Were not able to find a plausible SP to go with this PC. + // Bail out. + return false; + + /* + // Original algorithm which does not work because SP was + // pointing beyond where it should have: + + // For the server compiler, FP is not guaranteed to be valid + // for compiled code. We see whether the PC is in the + // interpreter and take care of that, otherwise we run code + // (unfortunately) duplicated from SW64Frame.senderForCompiledFrame. + + CodeCache cc = vm.getCodeCache(); + if (cc.contains(pc)) { + CodeBlob cb = cc.findBlob(pc); + + // See if we can derive a frame pointer from SP and PC + // NOTE: This is the code duplicated from SW64Frame + Address saved_fp = null; + int llink_offset = cb.getLinkOffset(); + if (llink_offset >= 0) { + // Restore base-pointer, since next frame might be an interpreter frame. + Address fp_addr = sp.addOffsetTo(VM.getVM().getAddressSize() * llink_offset); + saved_fp = fp_addr.getAddressAt(0); + } + + setValues(sp, saved_fp, pc); + return true; + } + */ + } + } else { + // If the current program counter was not known to us as a Java + // PC, we currently assume that we are in the run-time system + // and attempt to look to thread-local storage for saved SP and + // FP. Note that if these are null (because we were, in fact, + // in Java code, i.e., vtable stubs or similar, and the SA + // didn't have enough insight into the target VM to understand + // that) then we are going to lose the entire stack trace for + // the thread, which is sub-optimal. FIXME. + + if (DEBUG) { + System.out.println("CurrentFrameGuess: choosing last Java frame: sp = " + + thread.getLastJavaSP() + ", fp = " + thread.getLastJavaFP()); + } + if (thread.getLastJavaSP() == null) { + return false; // No known Java frames on stack + } + + // The runtime has a nasty habit of not saving fp in the frame + // anchor, leaving us to grovel about in the stack to find a + // plausible address. Fortunately, this only happens in + // compiled code; there we always have a valid PC, and we always + // push LR and FP onto the stack as a pair, with FP at the lower + // address. + pc = thread.getLastJavaPC(); + fp = thread.getLastJavaFP(); + sp = thread.getLastJavaSP(); + + if (fp == null) { + CodeCache cc = vm.getCodeCache(); + if (cc.contains(pc)) { + CodeBlob cb = cc.findBlob(pc); + if (DEBUG) { + System.out.println("FP is null. Found blob frame size " + cb.getFrameSize()); + } + // See if we can derive a frame pointer from SP and PC + long link_offset = cb.getFrameSize() - 2 * VM.getVM().getAddressSize(); + if (link_offset >= 0) { + fp = sp.addOffsetTo(link_offset); + } + } + } + + // We found a PC in the frame anchor. Check that it's plausible, and + // if it is, use it. + if (vm.isJavaPCDbg(pc)) { + setValues(sp, fp, pc); + } else { + setValues(sp, fp, null); + } + + return true; + } + } + + public Address getSP() { return spFound; } + public Address getFP() { return fpFound; } + /** May be null if getting values from thread-local storage; take + care to call the correct SW64Frame constructor to recover this if + necessary */ + public Address getPC() { return pcFound; } + + private void setValues(Address sp, Address fp, Address pc) { + spFound = sp; + fpFound = fp; + pcFound = pc; + } +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/sw64/SW64Frame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/sw64/SW64Frame.java new file mode 100644 index 00000000000..23c4eb85571 --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/sw64/SW64Frame.java @@ -0,0 +1,566 @@ +/* + * Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2019, Red Hat Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.runtime.sw64; + +import java.util.*; +import sun.jvm.hotspot.code.*; +import sun.jvm.hotspot.compiler.*; +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.oops.*; +import sun.jvm.hotspot.runtime.*; +import sun.jvm.hotspot.types.*; +import sun.jvm.hotspot.utilities.*; +import sun.jvm.hotspot.utilities.Observable; +import sun.jvm.hotspot.utilities.Observer; + +/** Specialization of and implementation of abstract methods of the + Frame class for the sw64 family of CPUs. */ + +public class SW64Frame extends Frame { + private static final boolean DEBUG; + static { + DEBUG = System.getProperty("sun.jvm.hotspot.runtime.sw64.SW64Frame.DEBUG") != null; + } + + // All frames + private static final int LINK_OFFSET = 0; + private static final int RETURN_ADDR_OFFSET = 1; + private static final int SENDER_SP_OFFSET = 2; + + // Interpreter frames + private static final int INTERPRETER_FRAME_SENDER_SP_OFFSET = -1; + private static final int INTERPRETER_FRAME_LAST_SP_OFFSET = INTERPRETER_FRAME_SENDER_SP_OFFSET - 1; + private static final int INTERPRETER_FRAME_METHOD_OFFSET = INTERPRETER_FRAME_LAST_SP_OFFSET - 1; // -3 + private static int INTERPRETER_FRAME_MIRROR_OFFSET; + private static int INTERPRETER_FRAME_MDX_OFFSET; // Non-core builds only + private static int INTERPRETER_FRAME_CACHE_OFFSET; + private static int INTERPRETER_FRAME_LOCALS_OFFSET; + private static int INTERPRETER_FRAME_BCX_OFFSET; + private static int INTERPRETER_FRAME_INITIAL_SP_OFFSET; + private static int INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET; + private static int INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET; + + // Entry frames + private static int ENTRY_FRAME_CALL_WRAPPER_OFFSET = -6; + + // Native frames + private static final int NATIVE_FRAME_INITIAL_PARAM_OFFSET = 2; + + private static VMReg fp = new VMReg(15); + + static { + VM.registerVMInitializedObserver(new Observer() { + public void update(Observable o, Object data) { + initialize(VM.getVM().getTypeDataBase()); + } + }); + } + + private static synchronized void initialize(TypeDataBase db) { + INTERPRETER_FRAME_MIRROR_OFFSET = INTERPRETER_FRAME_METHOD_OFFSET - 1; // -4 + INTERPRETER_FRAME_MDX_OFFSET = INTERPRETER_FRAME_MIRROR_OFFSET - 1; // -5 // Non-core builds only + INTERPRETER_FRAME_CACHE_OFFSET = INTERPRETER_FRAME_MDX_OFFSET - 1; // -6 + INTERPRETER_FRAME_LOCALS_OFFSET = INTERPRETER_FRAME_CACHE_OFFSET - 1; // -7 + INTERPRETER_FRAME_BCX_OFFSET = INTERPRETER_FRAME_LOCALS_OFFSET - 1; // -8 + INTERPRETER_FRAME_INITIAL_SP_OFFSET = INTERPRETER_FRAME_BCX_OFFSET - 1; // -9 + INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET ; // -9 + INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET; // -9 + } + + + // an additional field beyond sp and pc: + Address raw_fp; // frame pointer + private Address raw_unextendedSP; + + private SW64Frame() { + } + + private void adjustForDeopt() { + if ( pc != null) { + // Look for a deopt pc and if it is deopted convert to original pc + CodeBlob cb = VM.getVM().getCodeCache().findBlob(pc); + if (cb != null && cb.isJavaMethod()) { + NMethod nm = (NMethod) cb; + if (pc.equals(nm.deoptHandlerBegin())) { + if (Assert.ASSERTS_ENABLED) { + Assert.that(this.getUnextendedSP() != null, "null SP in Java frame"); + } + // adjust pc if frame is deoptimized. + pc = this.getUnextendedSP().getAddressAt(nm.origPCOffset()); + deoptimized = true; + } + } + } + } + + public SW64Frame(Address raw_sp, Address raw_fp, Address pc) { + this.raw_sp = raw_sp; + this.raw_unextendedSP = raw_sp; + this.raw_fp = raw_fp; + this.pc = pc; + adjustUnextendedSP(); + + // Frame must be fully constructed before this call + adjustForDeopt(); + + if (DEBUG) { + System.out.println("SW64Frame(sp, fp, pc): " + this); + dumpStack(); + } + } + + public SW64Frame(Address raw_sp, Address raw_fp) { + this.raw_sp = raw_sp; + this.raw_unextendedSP = raw_sp; + this.raw_fp = raw_fp; + + // We cannot assume SP[-1] always contains a valid return PC (e.g. if + // the callee is a C/C++ compiled frame). If the PC is not known to + // Java then this.pc is null. + Address savedPC = raw_sp.getAddressAt(-1 * VM.getVM().getAddressSize()); + if (VM.getVM().isJavaPCDbg(savedPC)) { + this.pc = savedPC; + } + + adjustUnextendedSP(); + + // Frame must be fully constructed before this call + adjustForDeopt(); + + if (DEBUG) { + System.out.println("SW64Frame(sp, fp): " + this); + dumpStack(); + } + } + + public SW64Frame(Address raw_sp, Address raw_unextendedSp, Address raw_fp, Address pc) { + this.raw_sp = raw_sp; + this.raw_unextendedSP = raw_unextendedSp; + this.raw_fp = raw_fp; + this.pc = pc; + adjustUnextendedSP(); + + // Frame must be fully constructed before this call + adjustForDeopt(); + + if (DEBUG) { + System.out.println("SW64Frame(sp, unextendedSP, fp, pc): " + this); + dumpStack(); + } + + } + + public Object clone() { + SW64Frame frame = new SW64Frame(); + frame.raw_sp = raw_sp; + frame.raw_unextendedSP = raw_unextendedSP; + frame.raw_fp = raw_fp; + frame.pc = pc; + frame.deoptimized = deoptimized; + return frame; + } + + public boolean equals(Object arg) { + if (arg == null) { + return false; + } + + if (!(arg instanceof SW64Frame)) { + return false; + } + + SW64Frame other = (SW64Frame) arg; + + return (AddressOps.equal(getSP(), other.getSP()) && + AddressOps.equal(getUnextendedSP(), other.getUnextendedSP()) && + AddressOps.equal(getFP(), other.getFP()) && + AddressOps.equal(getPC(), other.getPC())); + } + + public int hashCode() { + if (raw_sp == null) { + return 0; + } + + return raw_sp.hashCode(); + } + + public String toString() { + return "sp: " + (getSP() == null? "null" : getSP().toString()) + + ", unextendedSP: " + (getUnextendedSP() == null? "null" : getUnextendedSP().toString()) + + ", fp: " + (getFP() == null? "null" : getFP().toString()) + + ", pc: " + (pc == null? "null" : pc.toString()); + } + + // accessors for the instance variables + public Address getFP() { return raw_fp; } + public Address getSP() { return raw_sp; } + public Address getID() { return raw_sp; } + + // FIXME: not implemented yet + public boolean isSignalHandlerFrameDbg() { return false; } + public int getSignalNumberDbg() { return 0; } + public String getSignalNameDbg() { return null; } + + public boolean isInterpretedFrameValid() { + if (Assert.ASSERTS_ENABLED) { + Assert.that(isInterpretedFrame(), "Not an interpreted frame"); + } + + // These are reasonable sanity checks + if (getFP() == null || getFP().andWithMask(0x3) != null) { + return false; + } + + if (getSP() == null || getSP().andWithMask(0x3) != null) { + return false; + } + + if (getFP().addOffsetTo(INTERPRETER_FRAME_INITIAL_SP_OFFSET * VM.getVM().getAddressSize()).lessThan(getSP())) { + return false; + } + + // These are hacks to keep us out of trouble. + // The problem with these is that they mask other problems + if (getFP().lessThanOrEqual(getSP())) { + // this attempts to deal with unsigned comparison above + return false; + } + + if (getFP().minus(getSP()) > 4096 * VM.getVM().getAddressSize()) { + // stack frames shouldn't be large. + return false; + } + + return true; + } + + // FIXME: not applicable in current system + // void patch_pc(Thread* thread, address pc); + + public Frame sender(RegisterMap regMap, CodeBlob cb) { + SW64RegisterMap map = (SW64RegisterMap) regMap; + + if (Assert.ASSERTS_ENABLED) { + Assert.that(map != null, "map must be set"); + } + + // Default is we done have to follow them. The sender_for_xxx will + // update it accordingly + map.setIncludeArgumentOops(false); + + if (isEntryFrame()) return senderForEntryFrame(map); + if (isInterpretedFrame()) return senderForInterpreterFrame(map); + + if(cb == null) { + cb = VM.getVM().getCodeCache().findBlob(getPC()); + } else { + if (Assert.ASSERTS_ENABLED) { + Assert.that(cb.equals(VM.getVM().getCodeCache().findBlob(getPC())), "Must be the same"); + } + } + + if (cb != null) { + return senderForCompiledFrame(map, cb); + } + + // Must be native-compiled frame, i.e. the marshaling code for native + // methods that exists in the core system. + return new SW64Frame(getSenderSP(), getLink(), getSenderPC()); + } + + private Frame senderForEntryFrame(SW64RegisterMap map) { + if (DEBUG) { + System.out.println("senderForEntryFrame"); + } + if (Assert.ASSERTS_ENABLED) { + Assert.that(map != null, "map must be set"); + } + // Java frame called from C; skip all C frames and return top C + // frame of that chunk as the sender + SW64JavaCallWrapper jcw = (SW64JavaCallWrapper) getEntryFrameCallWrapper(); + if (Assert.ASSERTS_ENABLED) { + Assert.that(!entryFrameIsFirst(), "next Java fp must be non zero"); + Assert.that(jcw.getLastJavaSP().greaterThan(getSP()), "must be above this frame on stack"); + } + SW64Frame fr; + if (jcw.getLastJavaPC() != null) { + fr = new SW64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP(), jcw.getLastJavaPC()); + } else { + fr = new SW64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP()); + } + map.clear(); + if (Assert.ASSERTS_ENABLED) { + Assert.that(map.getIncludeArgumentOops(), "should be set by clear"); + } + return fr; + } + + //------------------------------------------------------------------------------ + // frame::adjust_unextended_sp + private void adjustUnextendedSP() { + // If we are returning to a compiled MethodHandle call site, the + // saved_fp will in fact be a saved value of the unextended SP. The + // simplest way to tell whether we are returning to such a call site + // is as follows: + + CodeBlob cb = cb(); + NMethod senderNm = (cb == null) ? null : cb.asNMethodOrNull(); + if (senderNm != null) { + // If the sender PC is a deoptimization point, get the original + // PC. For MethodHandle call site the unextended_sp is stored in + // saved_fp. + if (senderNm.isDeoptMhEntry(getPC())) { + // DEBUG_ONLY(verifyDeoptMhOriginalPc(senderNm, getFP())); + raw_unextendedSP = getFP(); + } + else if (senderNm.isDeoptEntry(getPC())) { + // DEBUG_ONLY(verifyDeoptOriginalPc(senderNm, raw_unextendedSp)); + } + else if (senderNm.isMethodHandleReturn(getPC())) { + raw_unextendedSP = getFP(); + } + } + } + + private Frame senderForInterpreterFrame(SW64RegisterMap map) { + if (DEBUG) { + System.out.println("senderForInterpreterFrame"); + } + Address unextendedSP = addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0); + Address sp = addressOfStackSlot(SENDER_SP_OFFSET); + // We do not need to update the callee-save register mapping because above + // us is either another interpreter frame or a converter-frame, but never + // directly a compiled frame. + // 11/24/04 SFG. With the removal of adapter frames this is no longer true. + // However c2 no longer uses callee save register for java calls so there + // are no callee register to find. + + if (map.getUpdateMap()) + updateMapWithSavedLink(map, addressOfStackSlot(LINK_OFFSET)); + + return new SW64Frame(sp, unextendedSP, getLink(), getSenderPC()); + } + + private void updateMapWithSavedLink(RegisterMap map, Address savedFPAddr) { + map.setLocation(fp, savedFPAddr); + } + + private Frame senderForCompiledFrame(SW64RegisterMap map, CodeBlob cb) { + if (DEBUG) { + System.out.println("senderForCompiledFrame"); + } + + // + // NOTE: some of this code is (unfortunately) duplicated SW64CurrentFrameGuess + // + + if (Assert.ASSERTS_ENABLED) { + Assert.that(map != null, "map must be set"); + } + + // frame owned by optimizing compiler + if (Assert.ASSERTS_ENABLED) { + Assert.that(cb.getFrameSize() >= 0, "must have non-zero frame size"); + } + Address senderSP = getUnextendedSP().addOffsetTo(cb.getFrameSize()); + + // The return_address is always the word on the stack + Address senderPC = senderSP.getAddressAt(-1 * VM.getVM().getAddressSize()); + + // This is the saved value of FP which may or may not really be an FP. + // It is only an FP if the sender is an interpreter frame. + Address savedFPAddr = senderSP.addOffsetTo(- SENDER_SP_OFFSET * VM.getVM().getAddressSize()); + + if (map.getUpdateMap()) { + // Tell GC to use argument oopmaps for some runtime stubs that need it. + // For C1, the runtime stub might not have oop maps, so set this flag + // outside of update_register_map. + map.setIncludeArgumentOops(cb.callerMustGCArguments()); + + if (cb.getOopMaps() != null) { + ImmutableOopMapSet.updateRegisterMap(this, cb, map, true); + } + + // Since the prolog does the save and restore of FP there is no oopmap + // for it so we must fill in its location as if there was an oopmap entry + // since if our caller was compiled code there could be live jvm state in it. + updateMapWithSavedLink(map, savedFPAddr); + } + + return new SW64Frame(senderSP, savedFPAddr.getAddressAt(0), senderPC); + } + + protected boolean hasSenderPD() { + return true; + } + + public long frameSize() { + return (getSenderSP().minus(getSP()) / VM.getVM().getAddressSize()); + } + + public Address getLink() { + try { + if (DEBUG) { + System.out.println("Reading link at " + addressOfStackSlot(LINK_OFFSET) + + " = " + addressOfStackSlot(LINK_OFFSET).getAddressAt(0)); + } + return addressOfStackSlot(LINK_OFFSET).getAddressAt(0); + } catch (Exception e) { + if (DEBUG) + System.out.println("Returning null"); + return null; + } + } + + // FIXME: not implementable yet + //inline void frame::set_link(intptr_t* addr) { *(intptr_t **)addr_at(link_offset) = addr; } + + public Address getUnextendedSP() { return raw_unextendedSP; } + + // Return address: + public Address getSenderPCAddr() { return addressOfStackSlot(RETURN_ADDR_OFFSET); } + public Address getSenderPC() { return getSenderPCAddr().getAddressAt(0); } + + // return address of param, zero origin index. + public Address getNativeParamAddr(int idx) { + return addressOfStackSlot(NATIVE_FRAME_INITIAL_PARAM_OFFSET + idx); + } + + public Address getSenderSP() { return addressOfStackSlot(SENDER_SP_OFFSET); } + + public Address addressOfInterpreterFrameLocals() { + return addressOfStackSlot(INTERPRETER_FRAME_LOCALS_OFFSET); + } + + private Address addressOfInterpreterFrameBCP() { + return addressOfStackSlot(INTERPRETER_FRAME_BCX_OFFSET); + } + + public int getInterpreterFrameBCI() { + // FIXME: this is not atomic with respect to GC and is unsuitable + // for use in a non-debugging, or reflective, system. Need to + // figure out how to express this. + Address bcp = addressOfInterpreterFrameBCP().getAddressAt(0); + Address methodHandle = addressOfInterpreterFrameMethod().getAddressAt(0); + Method method = (Method)Metadata.instantiateWrapperFor(methodHandle); + return bcpToBci(bcp, method); + } + + public Address addressOfInterpreterFrameMDP() { + return addressOfStackSlot(INTERPRETER_FRAME_MDX_OFFSET); + } + + // FIXME + //inline int frame::interpreter_frame_monitor_size() { + // return BasicObjectLock::size(); + //} + + // expression stack + // (the max_stack arguments are used by the GC; see class FrameClosure) + + public Address addressOfInterpreterFrameExpressionStack() { + Address monitorEnd = interpreterFrameMonitorEnd().address(); + return monitorEnd.addOffsetTo(-1 * VM.getVM().getAddressSize()); + } + + public int getInterpreterFrameExpressionStackDirection() { return -1; } + + // top of expression stack + public Address addressOfInterpreterFrameTOS() { + return getSP(); + } + + /** Expression stack from top down */ + public Address addressOfInterpreterFrameTOSAt(int slot) { + return addressOfInterpreterFrameTOS().addOffsetTo(slot * VM.getVM().getAddressSize()); + } + + public Address getInterpreterFrameSenderSP() { + if (Assert.ASSERTS_ENABLED) { + Assert.that(isInterpretedFrame(), "interpreted frame expected"); + } + return addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0); + } + + // Monitors + public BasicObjectLock interpreterFrameMonitorBegin() { + return new BasicObjectLock(addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET)); + } + + public BasicObjectLock interpreterFrameMonitorEnd() { + Address result = addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET).getAddressAt(0); + if (Assert.ASSERTS_ENABLED) { + // make sure the pointer points inside the frame + Assert.that(AddressOps.gt(getFP(), result), "result must < than frame pointer"); + Assert.that(AddressOps.lte(getSP(), result), "result must >= than stack pointer"); + } + return new BasicObjectLock(result); + } + + public int interpreterFrameMonitorSize() { + return BasicObjectLock.size(); + } + + // Method + public Address addressOfInterpreterFrameMethod() { + return addressOfStackSlot(INTERPRETER_FRAME_METHOD_OFFSET); + } + + // Constant pool cache + public Address addressOfInterpreterFrameCPCache() { + return addressOfStackSlot(INTERPRETER_FRAME_CACHE_OFFSET); + } + + // Entry frames + public JavaCallWrapper getEntryFrameCallWrapper() { + return new SW64JavaCallWrapper(addressOfStackSlot(ENTRY_FRAME_CALL_WRAPPER_OFFSET).getAddressAt(0)); + } + + protected Address addressOfSavedOopResult() { + // offset is 2 for compiler2 and 3 for compiler1 + return getSP().addOffsetTo((VM.getVM().isClientCompiler() ? 2 : 3) * + VM.getVM().getAddressSize()); + } + + protected Address addressOfSavedReceiver() { + return getSP().addOffsetTo(-4 * VM.getVM().getAddressSize()); + } + + private void dumpStack() { + for (Address addr = getSP().addOffsetTo(-4 * VM.getVM().getAddressSize()); + AddressOps.lt(addr, getSP()); + addr = addr.addOffsetTo(VM.getVM().getAddressSize())) { + System.out.println(addr + ": " + addr.getAddressAt(0)); + } + System.out.println("-----------------------"); + for (Address addr = getSP(); + AddressOps.lte(addr, getSP().addOffsetTo(20 * VM.getVM().getAddressSize())); + addr = addr.addOffsetTo(VM.getVM().getAddressSize())) { + System.out.println(addr + ": " + addr.getAddressAt(0)); + } + } +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/sw64/SW64JavaCallWrapper.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/sw64/SW64JavaCallWrapper.java new file mode 100644 index 00000000000..7413dd84c41 --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/sw64/SW64JavaCallWrapper.java @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.runtime.sw64; + +import java.util.*; +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.types.*; +import sun.jvm.hotspot.runtime.*; +import sun.jvm.hotspot.utilities.Observable; +import sun.jvm.hotspot.utilities.Observer; + +public class SW64JavaCallWrapper extends JavaCallWrapper { + private static AddressField lastJavaFPField; + + static { + VM.registerVMInitializedObserver(new Observer() { + public void update(Observable o, Object data) { + initialize(VM.getVM().getTypeDataBase()); + } + }); + } + + private static synchronized void initialize(TypeDataBase db) { + Type type = db.lookupType("JavaFrameAnchor"); + + lastJavaFPField = type.getAddressField("_last_Java_fp"); + } + + public SW64JavaCallWrapper(Address addr) { + super(addr); + } + + public Address getLastJavaFP() { + return lastJavaFPField.getValue(addr.addOffsetTo(anchorField.getOffset())); + } +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/sw64/SW64RegisterMap.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/sw64/SW64RegisterMap.java new file mode 100644 index 00000000000..ded1591a6b7 --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/sw64/SW64RegisterMap.java @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.runtime.sw64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.runtime.*; + +public class SW64RegisterMap extends RegisterMap { + + /** This is the only public constructor */ + public SW64RegisterMap(JavaThread thread, boolean updateMap) { + super(thread, updateMap); + } + + protected SW64RegisterMap(RegisterMap map) { + super(map); + } + + public Object clone() { + SW64RegisterMap retval = new SW64RegisterMap(this); + return retval; + } + + // no PD state to clear or copy: + protected void clearPD() {} + protected void initializePD() {} + protected void initializeFromPD(RegisterMap map) {} + protected Address getLocationPD(VMReg reg) { return null; } +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java index f4cd4873207..57b5707f9ed 100644 --- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java @@ -50,7 +50,7 @@ public class PlatformInfo { public static boolean knownCPU(String cpu) { final String[] KNOWN = - new String[] {"i386", "x86", "x86_64", "amd64", "ppc64", "ppc64le", "aarch64", "riscv64"}; + new String[] {"i386", "x86", "x86_64", "amd64", "ppc64", "ppc64le", "aarch64", "riscv64", "sw64", "sw_64"}; for(String s : KNOWN) { if(s.equals(cpu)) @@ -83,6 +83,8 @@ public class PlatformInfo { if (cpu.equals("ppc64le")) return "ppc64"; + if (cpu.equals("sw_64") || cpu.equals("sw64")) + return "sw64"; return cpu; } diff --git a/test/hotspot/jtreg/runtime/StackGuardPages/exeinvoke.c b/test/hotspot/jtreg/runtime/StackGuardPages/exeinvoke.c index abef2ea050a..c8b8c8949fd 100644 --- a/test/hotspot/jtreg/runtime/StackGuardPages/exeinvoke.c +++ b/test/hotspot/jtreg/runtime/StackGuardPages/exeinvoke.c @@ -69,7 +69,8 @@ static void handler(int sig, siginfo_t *si, void *unused) { longjmp(context, 1); } -static char* altstack = NULL; +// static char* altstack = NULL; +char* altstack = NULL; void set_signal_handler() { if (altstack == NULL) { diff --git a/test/lib-test/jdk/test/lib/TestMutuallyExclusivePlatformPredicates.java b/test/lib-test/jdk/test/lib/TestMutuallyExclusivePlatformPredicates.java index e78e200ac24..6dc8c0fa934 100644 --- a/test/lib-test/jdk/test/lib/TestMutuallyExclusivePlatformPredicates.java +++ b/test/lib-test/jdk/test/lib/TestMutuallyExclusivePlatformPredicates.java @@ -45,7 +45,7 @@ import java.util.Set; */ public class TestMutuallyExclusivePlatformPredicates { private static enum MethodGroup { - ARCH("isAArch64", "isARM", "isRISCV64", "isPPC", "isS390x", "isX64", "isX86"), + ARCH("isAArch64", "isARM", "isRISCV64", "isPPC", "isS390x", "isX64", "isX86", "isSW64"), BITNESS("is32bit", "is64bit"), OS("isAix", "isLinux", "isOSX", "isWindows"), VM_TYPE("isClient", "isServer", "isMinimal", "isZero", "isEmbedded"), diff --git a/test/lib/jdk/test/lib/Platform.java b/test/lib/jdk/test/lib/Platform.java index 2e2e16e6593..8226da5dc20 100644 --- a/test/lib/jdk/test/lib/Platform.java +++ b/test/lib/jdk/test/lib/Platform.java @@ -234,6 +234,11 @@ public class Platform { return isArch("(i386)|(x86(?!_64))"); } + public static boolean isSW64() { + // On Linux it's 'sw_64' or 'sw64'. + return isArch("(sw_64)|(sw64)"); + } + public static String getOsArch() { return osArch; }